Blame SOURCES/glibc-rh1380680-15.patch

147e83
From cddea07761373ce92dc75e8306212d71fa2043ba Mon Sep 17 00:00:00 2001
147e83
From: Stefan Liebler <stli@linux.vnet.ibm.com>
147e83
Date: Mon, 7 Nov 2016 17:18:39 +0100
147e83
Subject: [PATCH 15/17] S390: Fix utf32 to utf16 handling of low surrogates
147e83
 (disable cu42).
147e83
147e83
Upstream commit a42a95c43133d69b1108f582cffa0f6986a9c3da
147e83
147e83
According to the latest Unicode standard, a conversion from/to UTF-xx has
147e83
to report an error if the character value is in range of an utf16 surrogate
147e83
(0xd800..0xdfff). See https://sourceware.org/ml/libc-help/2015-12/msg00015.html.
147e83
147e83
Thus the cu42 instruction, which converts from utf32 to utf16,  has to be
147e83
disabled because it does not report an error in case of a value in range of
147e83
a low surrogate (0xdc00..0xdfff). The etf3eh variant is removed and the c,
147e83
vector variant is adjusted to handle the value in range of an utf16 low
147e83
surrogate correctly.
147e83
147e83
ChangeLog:
147e83
147e83
	* sysdeps/s390/utf16-utf32-z9.c: Disable cu42 instruction and report
147e83
	an error in case of a value in range of an utf16 low surrogate.
147e83
---
147e83
 sysdeps/s390/utf16-utf32-z9.c | 155 +++++++++++++++++-------------------------
147e83
 1 file changed, 62 insertions(+), 93 deletions(-)
147e83
147e83
diff --git a/sysdeps/s390/utf16-utf32-z9.c b/sysdeps/s390/utf16-utf32-z9.c
147e83
index 8d42ab8..5d2ac44 100644
147e83
--- a/sysdeps/s390/utf16-utf32-z9.c
147e83
+++ b/sysdeps/s390/utf16-utf32-z9.c
147e83
@@ -145,42 +145,6 @@ gconv_end (struct __gconv_step *data)
147e83
   free (data->__data);
147e83
 }
147e83
 
147e83
-/* The macro for the hardware loop.  This is used for both
147e83
-   directions.  */
147e83
-#define HARDWARE_CONVERT(INSTRUCTION)					\
147e83
-  {									\
147e83
-    register const unsigned char* pInput __asm__ ("8") = inptr;		\
147e83
-    register size_t inlen __asm__ ("9") = inend - inptr;		\
147e83
-    register unsigned char* pOutput __asm__ ("10") = outptr;		\
147e83
-    register size_t outlen __asm__("11") = outend - outptr;		\
147e83
-    unsigned long cc = 0;						\
147e83
-									\
147e83
-    __asm__ __volatile__ (".machine push       \n\t"			\
147e83
-			  ".machine \"z9-109\" \n\t"			\
147e83
-			  ".machinemode \"zarch_nohighgprs\"\n\t"	\
147e83
-			  "0: " INSTRUCTION "  \n\t"			\
147e83
-			  ".machine pop        \n\t"			\
147e83
-			  "   jo     0b        \n\t"			\
147e83
-			  "   ipm    %2        \n"			\
147e83
-			  : "+a" (pOutput), "+a" (pInput), "+d" (cc),	\
147e83
-			    "+d" (outlen), "+d" (inlen)			\
147e83
-			  :						\
147e83
-			  : "cc", "memory");				\
147e83
-									\
147e83
-    inptr = pInput;							\
147e83
-    outptr = pOutput;							\
147e83
-    cc >>= 28;								\
147e83
-									\
147e83
-    if (cc == 1)							\
147e83
-      {									\
147e83
-	result = __GCONV_FULL_OUTPUT;					\
147e83
-      }									\
147e83
-    else if (cc == 2)							\
147e83
-      {									\
147e83
-	result = __GCONV_ILLEGAL_INPUT;					\
147e83
-      }									\
147e83
-  }
147e83
-
147e83
 #define PREPARE_LOOP							\
147e83
   enum direction dir = ((struct utf16_data *) step->__data)->dir;	\
147e83
   int emit_bom = ((struct utf16_data *) step->__data)->emit_bom;	\
147e83
@@ -310,7 +274,7 @@ gconv_end (struct __gconv_step *data)
147e83
 		  "    slgr %[R_OUTLEN],%[R_TMP3]\n\t"			\
147e83
 		  /* Calculate remaining uint16_t values in loaded vrs.  */ \
147e83
 		  "12: lghi %[R_TMP2],16\n\t"				\
147e83
-		  "    sgr %[R_TMP2],%[R_TMP]\n\t"			\
147e83
+		  "    slgr %[R_TMP2],%[R_TMP]\n\t"			\
147e83
 		  "    srl %[R_TMP2],1\n\t"				\
147e83
 		  "    llh %[R_TMP],0(%[R_IN])\n\t"			\
147e83
 		  "    aghi %[R_OUTLEN],-4\n\t"				\
147e83
@@ -437,7 +401,7 @@ strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single)
147e83
     uint32_t c = get32 (inptr);						\
147e83
 									\
147e83
     if (__builtin_expect (c <= 0xd7ff, 1)				\
147e83
-	|| (c >=0xdc00 && c <= 0xffff))					\
147e83
+	|| (c > 0xdfff && c <= 0xffff))					\
147e83
       {									\
147e83
 	/* Two UTF-16 chars.  */					\
147e83
 	put16 (outptr, c);						\
147e83
@@ -475,29 +439,10 @@ strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single)
147e83
     inptr += 4;								\
147e83
   }
147e83
 
147e83
-#define BODY_TO_ETF3EH							\
147e83
-  {									\
147e83
-    HARDWARE_CONVERT ("cu42 %0, %1");					\
147e83
-									\
147e83
-    if (__glibc_likely (inptr == inend)					\
147e83
-	|| result == __GCONV_FULL_OUTPUT)				\
147e83
-      break;								\
147e83
-									\
147e83
-    if (inptr + 4 > inend)						\
147e83
-      {									\
147e83
-	result = __GCONV_INCOMPLETE_INPUT;				\
147e83
-	break;								\
147e83
-      }									\
147e83
-									\
147e83
-    STANDARD_TO_LOOP_ERR_HANDLER (4);					\
147e83
-  }
147e83
-
147e83
 #define BODY_TO_VX							\
147e83
   {									\
147e83
-    register const unsigned char* pInput asm ("8") = inptr;		\
147e83
-    register size_t inlen asm ("9") = inend - inptr;			\
147e83
-    register unsigned char* pOutput asm ("10") = outptr;		\
147e83
-    register size_t outlen asm("11") = outend - outptr;			\
147e83
+    size_t inlen = inend - inptr;					\
147e83
+    size_t outlen = outend - outptr;					\
147e83
     unsigned long tmp, tmp2, tmp3;					\
147e83
     asm volatile (".machine push\n\t"					\
147e83
 		  ".machine \"z13\"\n\t"				\
147e83
@@ -509,8 +454,8 @@ strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single)
147e83
 		  CONVERT_32BIT_SIZE_T ([R_OUTLEN])			\
147e83
 		  /* Loop which handles UTF-16 chars			\
147e83
 		     ch < 0xd800 || (ch > 0xdfff && ch < 0x10000).  */	\
147e83
-		  "0:  clgijl %[R_INLEN],32,20f\n\t"			\
147e83
-		  "    clgijl %[R_OUTLEN],16,20f\n\t"			\
147e83
+		  "0:  clgijl %[R_INLEN],32,2f\n\t"			\
147e83
+		  "    clgijl %[R_OUTLEN],16,2f\n\t"			\
147e83
 		  "1:  vlm %%v16,%%v17,0(%[R_IN])\n\t"			\
147e83
 		  "    lghi %[R_TMP2],0\n\t"				\
147e83
 		  /* Shorten to UTF-16.  */				\
147e83
@@ -526,9 +471,15 @@ strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single)
147e83
 		  "    aghi %[R_INLEN],-32\n\t"				\
147e83
 		  "    aghi %[R_OUTLEN],-16\n\t"			\
147e83
 		  "    la %[R_OUT],16(%[R_OUT])\n\t"			\
147e83
-		  "    clgijl %[R_INLEN],32,20f\n\t"			\
147e83
-		  "    clgijl %[R_OUTLEN],16,20f\n\t"			\
147e83
+		  "    clgijl %[R_INLEN],32,2f\n\t"			\
147e83
+		  "    clgijl %[R_OUTLEN],16,2f\n\t"			\
147e83
 		  "    j 1b\n\t"					\
147e83
+		  /* Calculate remaining uint32_t values in inptr.  */	\
147e83
+		  "2:  \n\t"						\
147e83
+		  "    clgije %[R_INLEN],0,99f\n\t"			\
147e83
+		  "    clgijl %[R_INLEN],4,92f\n\t"			\
147e83
+		  "    srlg %[R_TMP2],%[R_INLEN],2\n\t"			\
147e83
+		  "    j 20f\n\t"					\
147e83
 		  /* Setup to check for ch >= 0xd800 && ch <= 0xdfff	\
147e83
 		     and check for ch >= 0x10000. (v30, v31)  */	\
147e83
 		  "9:  .long 0xd800,0xdfff,0x10000,0x10000\n\t"		\
147e83
@@ -540,21 +491,59 @@ strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single)
147e83
 		  "    agr %[R_TMP],%[R_TMP2]\n\t"			\
147e83
 		  "    srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes.  */ \
147e83
 		  "    ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store.  */ \
147e83
-		  "    jl 20f\n\t"					\
147e83
+		  "    jl 12f\n\t"					\
147e83
 		  "    vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t"		\
147e83
 		  /* Update pointers.  */				\
147e83
 		  "    la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
147e83
 		  "    slgr %[R_INLEN],%[R_TMP]\n\t"			\
147e83
 		  "    la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t"		\
147e83
 		  "    slgr %[R_OUTLEN],%[R_TMP3]\n\t"			\
147e83
-		  /* Handles UTF16 surrogates with convert instruction.  */ \
147e83
-		  "20: cu42 %[R_OUT],%[R_IN]\n\t"			\
147e83
-		  "    jo 0b\n\t" /* Try vector implemenation again.  */ \
147e83
-		  "    lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1.  */ \
147e83
-		  "    lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2.  */ \
147e83
+		  /* Calculate remaining uint32_t values in vrs.  */	\
147e83
+		  "12: lghi %[R_TMP2],8\n\t"				\
147e83
+		  "    srlg %[R_TMP3],%[R_TMP3],1\n\t"			\
147e83
+		  "    slgr %[R_TMP2],%[R_TMP3]\n\t"			\
147e83
+		  /* Handle remaining UTF-32 characters.  */		\
147e83
+		  "20: l %[R_TMP],0(%[R_IN])\n\t"			\
147e83
+		  "    aghi %[R_INLEN],-4\n\t"				\
147e83
+		  /* Test if ch is 2byte UTF-16 char. */		\
147e83
+		  "    clfi %[R_TMP],0xffff\n\t"			\
147e83
+		  "    jh 21f\n\t"					\
147e83
+		  /* Handle 2 byte UTF16 char.  */			\
147e83
+		  "    lgr %[R_TMP3],%[R_TMP]\n\t"			\
147e83
+		  "    nilf %[R_TMP],0xf800\n\t"			\
147e83
+		  "    clfi %[R_TMP],0xd800\n\t"			\
147e83
+		  "    je 91f\n\t" /* Do not accept UTF-16 surrogates.  */ \
147e83
+		  "    slgfi %[R_OUTLEN],2\n\t"				\
147e83
+		  "    jl 90f \n\t"					\
147e83
+		  "    sth %[R_TMP3],0(%[R_OUT])\n\t"			\
147e83
+		  "    la %[R_IN],4(%[R_IN])\n\t"			\
147e83
+		  "    la %[R_OUT],2(%[R_OUT])\n\t"			\
147e83
+		  "    brctg %[R_TMP2],20b\n\t"				\
147e83
+		  "    j 0b\n\t" /* Switch to vx-loop.  */		\
147e83
+		  /* Test if ch is 4byte UTF-16 char. */		\
147e83
+		  "21: clfi %[R_TMP],0x10ffff\n\t"			\
147e83
+		  "    jh 91f\n\t" /* ch > 0x10ffff is not allowed!  */	\
147e83
+		  /* Handle 4 byte UTF16 char.  */			\
147e83
+		  "    slgfi %[R_OUTLEN],4\n\t"				\
147e83
+		  "    jl 90f \n\t"					\
147e83
+		  "    slfi %[R_TMP],0x10000\n\t" /* zabcd = uvwxy - 1.  */ \
147e83
+		  "    llilf %[R_TMP3],0xd800dc00\n\t"			\
147e83
+		  "    la %[R_IN],4(%[R_IN])\n\t"			\
147e83
+		  "    risbgn %[R_TMP3],%[R_TMP],38,47,6\n\t" /* High surrogate.  */ \
147e83
+		  "    risbgn %[R_TMP3],%[R_TMP],54,63,0\n\t" /* Low surrogate.  */ \
147e83
+		  "    st %[R_TMP3],0(%[R_OUT])\n\t"			\
147e83
+		  "    la %[R_OUT],4(%[R_OUT])\n\t"			\
147e83
+		  "    brctg %[R_TMP2],20b\n\t"				\
147e83
+		  "    j 0b\n\t" /* Switch to vx-loop.  */		\
147e83
+		  "92: lghi %[R_RES],%[RES_IN_FULL]\n\t"		\
147e83
+		  "    j 99f\n\t"					\
147e83
+		  "91: lghi %[R_RES],%[RES_IN_ILL]\n\t"			\
147e83
+		  "    j 99f\n\t"					\
147e83
+		  "90: lghi %[R_RES],%[RES_OUT_FULL]\n\t"		\
147e83
+		  "99: \n\t"						\
147e83
 		  ".machine pop"					\
147e83
-		  : /* outputs */ [R_IN] "+a" (pInput)			\
147e83
-		    , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput)	\
147e83
+		  : /* outputs */ [R_IN] "+a" (inptr)			\
147e83
+		    , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr)	\
147e83
 		    , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp)	\
147e83
 		    , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3)	\
147e83
 		    , [R_RES] "+d" (result)				\
147e83
@@ -567,17 +556,10 @@ strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single)
147e83
 		    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
147e83
 		    ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31")	\
147e83
 		  );							\
147e83
-    inptr = pInput;							\
147e83
-    outptr = pOutput;							\
147e83
-									\
147e83
     if (__glibc_likely (inptr == inend)					\
147e83
-	|| result == __GCONV_FULL_OUTPUT)				\
147e83
+	|| result != __GCONV_ILLEGAL_INPUT)				\
147e83
       break;								\
147e83
-    if (inptr + 4 > inend)						\
147e83
-      {									\
147e83
-	result = __GCONV_INCOMPLETE_INPUT;				\
147e83
-	break;								\
147e83
-      }									\
147e83
+									\
147e83
     STANDARD_TO_LOOP_ERR_HANDLER (4);					\
147e83
   }
147e83
 
147e83
@@ -590,15 +572,6 @@ strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single)
147e83
 #define BODY			BODY_TO_C
147e83
 #include <iconv/loop.c>
147e83
 
147e83
-/* Generate loop-function with hardware utf-convert instruction.  */
147e83
-#define MIN_NEEDED_INPUT	MIN_NEEDED_TO
147e83
-#define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
147e83
-#define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
147e83
-#define LOOPFCT			__to_utf16_loop_etf3eh
147e83
-#define LOOP_NEED_FLAGS
147e83
-#define BODY			BODY_TO_ETF3EH
147e83
-#include <iconv/loop.c>
147e83
-
147e83
 #if defined HAVE_S390_VX_ASM_SUPPORT
147e83
 /* Generate loop-function with hardware vector instructions.  */
147e83
 # define MIN_NEEDED_INPUT	MIN_NEEDED_TO
147e83
@@ -623,10 +596,6 @@ __to_utf16_loop_resolver (unsigned long int dl_hwcap)
147e83
     return __to_utf16_loop_vx;
147e83
   else
147e83
 #endif
147e83
-  if (dl_hwcap & HWCAP_S390_ZARCH && dl_hwcap & HWCAP_S390_HIGH_GPRS
147e83
-      && dl_hwcap & HWCAP_S390_ETF3EH)
147e83
-    return __to_utf16_loop_etf3eh;
147e83
-  else
147e83
     return __to_utf16_loop_c;
147e83
 }
147e83
 
147e83
-- 
147e83
1.8.3.1
147e83