arrfab / rpms / glibc

Forked from rpms/glibc 5 years ago
Clone

Blame SOURCES/glibc-rh1380680-11.patch

147e83
From ec79c95f95869a30d3f13c6229ebef1ad4931281 Mon Sep 17 00:00:00 2001
147e83
From: Stefan Liebler <stli@linux.vnet.ibm.com>
147e83
Date: Mon, 7 Nov 2016 16:20:45 +0100
147e83
Subject: [PATCH 11/17] S390: Optimize utf8-utf16 module.
147e83
147e83
Upstream commit 5bd11b19099b3f22d821515f9c93f1ecc1a7e15e
147e83
147e83
This patch reworks the s390 specific module to convert between utf8 and utf16.
147e83
Now ifunc is used to choose either the c or etf3eh (with convert utf instruction)
147e83
variants at runtime. Furthermore a new vector variant for z13 is introduced
147e83
which will be build and chosen if vector support is available at build / runtime.
147e83
147e83
In case of converting utf 8 to utf16, the vector variant optimizes input of
147e83
1byte utf8 characters. The convert utf instruction is used if a multibyte utf8
147e83
character is found.
147e83
147e83
For the other direction utf16 to utf8, the cu21 instruction can't be re-enabled,
147e83
because it does not report an error, if the input-stream consists of a single
147e83
low surrogate utf16 char (e.g. 0xdc00). This applies to the newest z13, too.
147e83
Thus there is only the c or the new vector variant, which can handle 1..4 byte
147e83
utf8 characters.
147e83
147e83
The c variant from utf16 to utf8 has beed fixed. If a high surrogate was at the
147e83
end of the input-buffer, then errno was set to EINVAL and the input-pointer
147e83
pointed just after the high surrogate. Now it points to the beginning of the
147e83
high surrogate.
147e83
147e83
This patch also fixes some whitespace errors. The c variant from utf8 to utf16
147e83
is now checking that tail-bytes starts with 0b10... and the value is not in
147e83
range of an utf16 surrogate.
147e83
147e83
Furthermore, the etf3eh variants are handling the "UTF-xx//IGNORE" case now.
147e83
Before they ignored the ignore-case and always stopped at an error.
147e83
147e83
ChangeLog:
147e83
147e83
	* sysdeps/s390/s390-64/utf8-utf16-z9.c: Use ifunc to select c,
147e83
	etf3eh or new vector loop-variant.
147e83
---
147e83
 sysdeps/s390/s390-64/utf8-utf16-z9.c | 547 ++++++++++++++++++++++++++++-------
147e83
 1 file changed, 441 insertions(+), 106 deletions(-)
147e83
147e83
diff --git a/sysdeps/s390/s390-64/utf8-utf16-z9.c b/sysdeps/s390/s390-64/utf8-utf16-z9.c
147e83
index 590a149..b36ee9e 100644
147e83
--- a/sysdeps/s390/s390-64/utf8-utf16-z9.c
147e83
+++ b/sysdeps/s390/s390-64/utf8-utf16-z9.c
147e83
@@ -30,33 +30,27 @@
147e83
 #include <dl-procinfo.h>
147e83
 #include <gconv.h>
147e83
 
147e83
-/* UTF-16 big endian byte order mark.  */
147e83
-#define BOM_UTF16	0xfeff
147e83
+#if defined HAVE_S390_VX_GCC_SUPPORT
147e83
+# define ASM_CLOBBER_VR(NR) , NR
147e83
+#else
147e83
+# define ASM_CLOBBER_VR(NR)
147e83
+#endif
147e83
 
147e83
+/* Defines for skeleton.c.  */
147e83
 #define DEFINE_INIT		0
147e83
 #define DEFINE_FINI		0
147e83
 #define MIN_NEEDED_FROM		1
147e83
 #define MAX_NEEDED_FROM		4
147e83
 #define MIN_NEEDED_TO		2
147e83
 #define MAX_NEEDED_TO		4
147e83
-#define FROM_LOOP		from_utf8_loop
147e83
-#define TO_LOOP			to_utf8_loop
147e83
+#define FROM_LOOP		__from_utf8_loop
147e83
+#define TO_LOOP			__to_utf8_loop
147e83
 #define FROM_DIRECTION		(dir == from_utf8)
147e83
 #define ONE_DIRECTION           0
147e83
-#define PREPARE_LOOP							\
147e83
-  enum direction dir = ((struct utf8_data *) step->__data)->dir;	\
147e83
-  int emit_bom = ((struct utf8_data *) step->__data)->emit_bom;		\
147e83
-									\
147e83
-  if (emit_bom && !data->__internal_use					\
147e83
-      && data->__invocation_counter == 0)				\
147e83
-    {									\
147e83
-      /* Emit the UTF-16 Byte Order Mark.  */				\
147e83
-      if (__glibc_unlikely (outbuf + 2 > outend))			      \
147e83
-	return __GCONV_FULL_OUTPUT;					\
147e83
-									\
147e83
-      put16u (outbuf, BOM_UTF16);					\
147e83
-      outbuf += 2;							\
147e83
-    }
147e83
+
147e83
+
147e83
+/* UTF-16 big endian byte order mark.  */
147e83
+#define BOM_UTF16	0xfeff
147e83
 
147e83
 /* Direction of the transformation.  */
147e83
 enum direction
147e83
@@ -151,16 +145,16 @@ gconv_end (struct __gconv_step *data)
147e83
     register unsigned long long outlen __asm__("11") = outend - outptr;	\
147e83
     uint64_t cc = 0;							\
147e83
 									\
147e83
-    __asm__ volatile (".machine push       \n\t"			\
147e83
-		      ".machine \"z9-109\" \n\t"			\
147e83
-		      "0: " INSTRUCTION "  \n\t"			\
147e83
-		      ".machine pop        \n\t"			\
147e83
-		      "   jo     0b        \n\t"			\
147e83
-		      "   ipm    %2        \n"				\
147e83
-		      : "+a" (pOutput), "+a" (pInput), "+d" (cc),	\
147e83
-			"+d" (outlen), "+d" (inlen)			\
147e83
-		      :							\
147e83
-		      : "cc", "memory");				\
147e83
+    __asm__ __volatile__ (".machine push       \n\t"			\
147e83
+			  ".machine \"z9-109\" \n\t"			\
147e83
+			  "0: " INSTRUCTION "  \n\t"			\
147e83
+			  ".machine pop        \n\t"			\
147e83
+			  "   jo     0b        \n\t"			\
147e83
+			  "   ipm    %2        \n"			\
147e83
+			  : "+a" (pOutput), "+a" (pInput), "+d" (cc),	\
147e83
+			    "+d" (outlen), "+d" (inlen)			\
147e83
+			  :						\
147e83
+			  : "cc", "memory");				\
147e83
 									\
147e83
     inptr = pInput;							\
147e83
     outptr = pOutput;							\
147e83
@@ -169,50 +163,135 @@ gconv_end (struct __gconv_step *data)
147e83
     if (cc == 1)							\
147e83
       {									\
147e83
 	result = __GCONV_FULL_OUTPUT;					\
147e83
-	break;								\
147e83
       }									\
147e83
     else if (cc == 2)							\
147e83
       {									\
147e83
 	result = __GCONV_ILLEGAL_INPUT;					\
147e83
-	break;								\
147e83
       }									\
147e83
   }
147e83
 
147e83
+#define PREPARE_LOOP							\
147e83
+  enum direction dir = ((struct utf8_data *) step->__data)->dir;	\
147e83
+  int emit_bom = ((struct utf8_data *) step->__data)->emit_bom;		\
147e83
+									\
147e83
+  if (emit_bom && !data->__internal_use					\
147e83
+      && data->__invocation_counter == 0)				\
147e83
+    {									\
147e83
+      /* Emit the UTF-16 Byte Order Mark.  */				\
147e83
+      if (__glibc_unlikely (outbuf + 2 > outend))			\
147e83
+	return __GCONV_FULL_OUTPUT;					\
147e83
+									\
147e83
+      put16u (outbuf, BOM_UTF16);					\
147e83
+      outbuf += 2;							\
147e83
+    }
147e83
+
147e83
 /* Conversion function from UTF-8 to UTF-16.  */
147e83
+#define BODY_FROM_HW(ASM)						\
147e83
+  {									\
147e83
+    ASM;								\
147e83
+    if (__glibc_likely (inptr == inend)					\
147e83
+	|| result == __GCONV_FULL_OUTPUT)				\
147e83
+      break;								\
147e83
+									\
147e83
+    int i;								\
147e83
+    for (i = 1; inptr + i < inend && i < 5; ++i)			\
147e83
+      if ((inptr[i] & 0xc0) != 0x80)					\
147e83
+	break;								\
147e83
+									\
147e83
+    if (__glibc_likely (inptr + i == inend				\
147e83
+			&& result == __GCONV_EMPTY_INPUT))		\
147e83
+      {									\
147e83
+	result = __GCONV_INCOMPLETE_INPUT;				\
147e83
+	break;								\
147e83
+      }									\
147e83
+    STANDARD_FROM_LOOP_ERR_HANDLER (i);					\
147e83
+  }
147e83
+
147e83
+#define BODY_FROM_ETF3EH BODY_FROM_HW (HARDWARE_CONVERT ("cu12 %0, %1, 1"))
147e83
+
147e83
+#define HW_FROM_VX							\
147e83
+  {									\
147e83
+    register const unsigned char* pInput asm ("8") = inptr;		\
147e83
+    register size_t inlen asm ("9") = inend - inptr;			\
147e83
+    register unsigned char* pOutput asm ("10") = outptr;		\
147e83
+    register size_t outlen asm("11") = outend - outptr;			\
147e83
+    unsigned long tmp, tmp2, tmp3;					\
147e83
+    asm volatile (".machine push\n\t"					\
147e83
+		  ".machine \"z13\"\n\t"				\
147e83
+		  ".machinemode \"zarch_nohighgprs\"\n\t"		\
147e83
+		  "    vrepib %%v30,0x7f\n\t" /* For compare > 0x7f.  */ \
147e83
+		  "    vrepib %%v31,0x20\n\t"				\
147e83
+		  /* Loop which handles UTF-8 chars <=0x7f.  */		\
147e83
+		  "0:  clgijl %[R_INLEN],16,20f\n\t"			\
147e83
+		  "    clgijl %[R_OUTLEN],32,20f\n\t"			\
147e83
+		  "1:  vl %%v16,0(%[R_IN])\n\t"				\
147e83
+		  "    vstrcbs %%v17,%%v16,%%v30,%%v31\n\t"		\
147e83
+		  "    jno 10f\n\t" /* Jump away if not all bytes are 1byte \
147e83
+				       UTF8 chars.  */			\
147e83
+		  /* Enlarge to UTF-16.  */				\
147e83
+		  "    vuplhb %%v18,%%v16\n\t"				\
147e83
+		  "    la %[R_IN],16(%[R_IN])\n\t"			\
147e83
+		  "    vupllb %%v19,%%v16\n\t"				\
147e83
+		  "    aghi %[R_INLEN],-16\n\t"				\
147e83
+		  /* Store 32 bytes to buf_out.  */			\
147e83
+		  "    vstm %%v18,%%v19,0(%[R_OUT])\n\t"		\
147e83
+		  "    aghi %[R_OUTLEN],-32\n\t"			\
147e83
+		  "    la %[R_OUT],32(%[R_OUT])\n\t"			\
147e83
+		  "    clgijl %[R_INLEN],16,20f\n\t"			\
147e83
+		  "    clgijl %[R_OUTLEN],32,20f\n\t"			\
147e83
+		  "    j 1b\n\t"					\
147e83
+		  "10:\n\t"						\
147e83
+		  /* At least one byte is > 0x7f.			\
147e83
+		     Store the preceding 1-byte chars.  */		\
147e83
+		  "    vlgvb %[R_TMP],%%v17,7\n\t"			\
147e83
+		  "    sllk %[R_TMP2],%[R_TMP],1\n\t" /* Compute highest \
147e83
+							 index to store. */ \
147e83
+		  "    llgfr %[R_TMP3],%[R_TMP2]\n\t"			\
147e83
+		  "    ahi %[R_TMP2],-1\n\t"				\
147e83
+		  "    jl 20f\n\t"					\
147e83
+		  "    vuplhb %%v18,%%v16\n\t"				\
147e83
+		  "    vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t"		\
147e83
+		  "    ahi %[R_TMP2],-16\n\t"				\
147e83
+		  "    jl 11f\n\t"					\
147e83
+		  "    vupllb %%v19,%%v16\n\t"				\
147e83
+		  "    vstl %%v19,%[R_TMP2],16(%[R_OUT])\n\t"		\
147e83
+		  "11: \n\t" /* Update pointers.  */			\
147e83
+		  "    la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
147e83
+		  "    slgr %[R_INLEN],%[R_TMP]\n\t"			\
147e83
+		  "    la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t"		\
147e83
+		  "    slgr %[R_OUTLEN],%[R_TMP3]\n\t"			\
147e83
+		  /* Handle multibyte utf8-char with convert instruction. */ \
147e83
+		  "20: cu12 %[R_OUT],%[R_IN],1\n\t"			\
147e83
+		  "    jo 0b\n\t" /* Try vector implemenation again.  */ \
147e83
+		  "    lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1.  */ \
147e83
+		  "    lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2.  */ \
147e83
+		  ".machine pop"					\
147e83
+		  : /* outputs */ [R_IN] "+a" (pInput)			\
147e83
+		    , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput)	\
147e83
+		    , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp)	\
147e83
+		    , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3)	\
147e83
+		    , [R_RES] "+d" (result)				\
147e83
+		  : /* inputs */					\
147e83
+		    [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT)		\
147e83
+		    , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT)		\
147e83
+		  : /* clobber list */ "memory", "cc"			\
147e83
+		    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
147e83
+		    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
147e83
+		    ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31")	\
147e83
+		  );							\
147e83
+    inptr = pInput;							\
147e83
+    outptr = pOutput;							\
147e83
+  }
147e83
+#define BODY_FROM_VX BODY_FROM_HW (HW_FROM_VX)
147e83
+
147e83
 
147e83
-#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
147e83
-#define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
147e83
-#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
147e83
-#define MAX_NEEDED_OUTPUT	MAX_NEEDED_TO
147e83
-#define LOOPFCT			FROM_LOOP
147e83
 /* The software implementation is based on the code in gconv_simple.c.  */
147e83
-#define BODY								\
147e83
+#define BODY_FROM_C							\
147e83
   {									\
147e83
-    if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH)				\
147e83
-      {									\
147e83
-	HARDWARE_CONVERT ("cu12 %0, %1, 1");				\
147e83
-									\
147e83
-	if (inptr != inend)						\
147e83
-	  {								\
147e83
-	    int i;							\
147e83
-	    for (i = 1; inptr + i < inend; ++i)				\
147e83
-	      if ((inptr[i] & 0xc0) != 0x80)				\
147e83
-		break;							\
147e83
-								\
147e83
-	    if (__glibc_likely (inptr + i == inend))			      \
147e83
-	      {								\
147e83
-		result = __GCONV_INCOMPLETE_INPUT;			\
147e83
-		break;							\
147e83
-	      }								\
147e83
-	    STANDARD_FROM_LOOP_ERR_HANDLER (i);				\
147e83
-	  }								\
147e83
-	continue;							\
147e83
-    }									\
147e83
-									\
147e83
     /* Next input byte.  */						\
147e83
     uint16_t ch = *inptr;						\
147e83
 									\
147e83
-    if (__glibc_likely (ch < 0x80))					      \
147e83
+    if (__glibc_likely (ch < 0x80))					\
147e83
       {									\
147e83
 	/* One byte sequence.  */					\
147e83
 	++inptr;							\
147e83
@@ -230,13 +309,13 @@ gconv_end (struct __gconv_step *data)
147e83
 	    cnt = 2;							\
147e83
 	    ch &= 0x1f;							\
147e83
 	  }								\
147e83
-        else if (__glibc_likely ((ch & 0xf0) == 0xe0))			      \
147e83
+	else if (__glibc_likely ((ch & 0xf0) == 0xe0))			\
147e83
 	  {								\
147e83
 	    /* We expect three bytes.  */				\
147e83
 	    cnt = 3;							\
147e83
 	    ch &= 0x0f;							\
147e83
 	  }								\
147e83
-	else if (__glibc_likely ((ch & 0xf8) == 0xf0))			      \
147e83
+	else if (__glibc_likely ((ch & 0xf8) == 0xf0))			\
147e83
 	  {								\
147e83
 	    /* We expect four bytes.  */				\
147e83
 	    cnt = 4;							\
147e83
@@ -257,7 +336,7 @@ gconv_end (struct __gconv_step *data)
147e83
 	    STANDARD_FROM_LOOP_ERR_HANDLER (i);				\
147e83
 	  }								\
147e83
 									\
147e83
-	if (__glibc_unlikely (inptr + cnt > inend))			      \
147e83
+	if (__glibc_unlikely (inptr + cnt > inend))			\
147e83
 	  {								\
147e83
 	    /* We don't have enough input.  But before we report	\
147e83
 	       that check that all the bytes are correct.  */		\
147e83
@@ -265,7 +344,7 @@ gconv_end (struct __gconv_step *data)
147e83
 	      if ((inptr[i] & 0xc0) != 0x80)				\
147e83
 		break;							\
147e83
 									\
147e83
-	    if (__glibc_likely (inptr + i == inend))			      \
147e83
+	    if (__glibc_likely (inptr + i == inend))			\
147e83
 	      {								\
147e83
 		result = __GCONV_INCOMPLETE_INPUT;			\
147e83
 		break;							\
147e83
@@ -280,23 +359,31 @@ gconv_end (struct __gconv_step *data)
147e83
 	       low) are needed.  */					\
147e83
 	    uint16_t zabcd, high, low;					\
147e83
 									\
147e83
-	    if (__glibc_unlikely (outptr + 4 > outend))			      \
147e83
+	    if (__glibc_unlikely (outptr + 4 > outend))			\
147e83
 	      {								\
147e83
 		/* Overflow in the output buffer.  */			\
147e83
 		result = __GCONV_FULL_OUTPUT;				\
147e83
 		break;							\
147e83
 	      }								\
147e83
 									\
147e83
+	    /* Check if tail-bytes >= 0x80, < 0xc0.  */			\
147e83
+	    for (i = 1; i < cnt; ++i)					\
147e83
+	      {								\
147e83
+		if ((inptr[i] & 0xc0) != 0x80)				\
147e83
+		  /* This is an illegal encoding.  */			\
147e83
+		  goto errout;						\
147e83
+	      }								\
147e83
+									\
147e83
 	    /* See Principles of Operations cu12.  */			\
147e83
 	    zabcd = (((inptr[0] & 0x7) << 2) |				\
147e83
-                     ((inptr[1] & 0x30) >> 4)) - 1;			\
147e83
+		     ((inptr[1] & 0x30) >> 4)) - 1;			\
147e83
 									\
147e83
 	    /* z-bit must be zero after subtracting 1.  */		\
147e83
 	    if (zabcd & 0x10)						\
147e83
 	      STANDARD_FROM_LOOP_ERR_HANDLER (4)			\
147e83
 									\
147e83
 	    high = (uint16_t)(0xd8 << 8);       /* high surrogate id */ \
147e83
-	    high |= zabcd << 6;	                        /* abcd bits */	\
147e83
+	    high |= zabcd << 6;                         /* abcd bits */	\
147e83
 	    high |= (inptr[1] & 0xf) << 2;              /* efgh bits */	\
147e83
 	    high |= (inptr[2] & 0x30) >> 4;               /* ij bits */	\
147e83
 									\
147e83
@@ -326,8 +413,19 @@ gconv_end (struct __gconv_step *data)
147e83
 		ch <<= 6;						\
147e83
 		ch |= byte & 0x3f;					\
147e83
 	      }								\
147e83
-	    inptr += cnt;						\
147e83
 									\
147e83
+	    /* If i < cnt, some trail byte was not >= 0x80, < 0xc0.	\
147e83
+	       If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
147e83
+	       have been represented with fewer than cnt bytes.  */	\
147e83
+	    if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)	\
147e83
+		/* Do not accept UTF-16 surrogates.  */			\
147e83
+		|| (ch >= 0xd800 && ch <= 0xdfff))			\
147e83
+	      {								\
147e83
+		/* This is an illegal encoding.  */			\
147e83
+		goto errout;						\
147e83
+	      }								\
147e83
+									\
147e83
+	    inptr += cnt;						\
147e83
 	  }								\
147e83
       }									\
147e83
     /* Now adjust the pointers and store the result.  */		\
147e83
@@ -335,43 +433,70 @@ gconv_end (struct __gconv_step *data)
147e83
     outptr += sizeof (uint16_t);					\
147e83
   }
147e83
 
147e83
+/* Generate loop-function with software implementation.  */
147e83
+#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
147e83
+#define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
147e83
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
147e83
+#define MAX_NEEDED_OUTPUT	MAX_NEEDED_TO
147e83
+#define LOOPFCT			__from_utf8_loop_c
147e83
+#define LOOP_NEED_FLAGS
147e83
+#define BODY			BODY_FROM_C
147e83
+#include <iconv/loop.c>
147e83
+
147e83
+/* Generate loop-function with hardware utf-convert instruction.  */
147e83
+#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
147e83
+#define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
147e83
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
147e83
+#define MAX_NEEDED_OUTPUT	MAX_NEEDED_TO
147e83
+#define LOOPFCT			__from_utf8_loop_etf3eh
147e83
 #define LOOP_NEED_FLAGS
147e83
+#define BODY			BODY_FROM_ETF3EH
147e83
 #include <iconv/loop.c>
147e83
 
147e83
+#if defined HAVE_S390_VX_ASM_SUPPORT
147e83
+/* Generate loop-function with hardware vector and utf-convert instructions.  */
147e83
+# define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
147e83
+# define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
147e83
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
147e83
+# define MAX_NEEDED_OUTPUT	MAX_NEEDED_TO
147e83
+# define LOOPFCT		__from_utf8_loop_vx
147e83
+# define LOOP_NEED_FLAGS
147e83
+# define BODY			BODY_FROM_VX
147e83
+# include <iconv/loop.c>
147e83
+#endif
147e83
+
147e83
+
147e83
+/* Generate ifunc'ed loop function.  */
147e83
+__typeof(__from_utf8_loop_c)
147e83
+__attribute__ ((ifunc ("__from_utf8_loop_resolver")))
147e83
+__from_utf8_loop;
147e83
+
147e83
+static void *
147e83
+__from_utf8_loop_resolver (unsigned long int dl_hwcap)
147e83
+{
147e83
+#if defined HAVE_S390_VX_ASM_SUPPORT
147e83
+  if (dl_hwcap & HWCAP_S390_VX)
147e83
+    return __from_utf8_loop_vx;
147e83
+  else
147e83
+#endif
147e83
+  if (dl_hwcap & HWCAP_S390_ETF3EH)
147e83
+    return __from_utf8_loop_etf3eh;
147e83
+  else
147e83
+    return __from_utf8_loop_c;
147e83
+}
147e83
+
147e83
+strong_alias (__from_utf8_loop_c_single, __from_utf8_loop_single)
147e83
+
147e83
 /* Conversion from UTF-16 to UTF-8.  */
147e83
 
147e83
-#define MIN_NEEDED_INPUT	MIN_NEEDED_TO
147e83
-#define MAX_NEEDED_INPUT	MAX_NEEDED_TO
147e83
-#define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
147e83
-#define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
147e83
-#define LOOPFCT			TO_LOOP
147e83
 /* The software routine is based on the functionality of the S/390
147e83
    hardware instruction (cu21) as described in the Principles of
147e83
    Operation.  */
147e83
-#define BODY								\
147e83
+#define BODY_TO_C							\
147e83
   {									\
147e83
-    /* The hardware instruction currently fails to report an error for	\
147e83
-       isolated low surrogates so we have to disable the instruction	\
147e83
-       until this gets resolved.  */					\
147e83
-    if (0) /* (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) */			\
147e83
-      {									\
147e83
-	HARDWARE_CONVERT ("cu21 %0, %1, 1");				\
147e83
-	if (inptr != inend)						\
147e83
-	  {								\
147e83
-	    /* Check if the third byte is				\
147e83
-	       a valid start of a UTF-16 surrogate.  */			\
147e83
-	    if (inend - inptr == 3 && (inptr[3] & 0xfc) != 0xdc)	\
147e83
-	      STANDARD_TO_LOOP_ERR_HANDLER (3);				\
147e83
-									\
147e83
-	    result = __GCONV_INCOMPLETE_INPUT;				\
147e83
-	    break;							\
147e83
-	  }								\
147e83
-	continue;							\
147e83
-      }									\
147e83
-									\
147e83
     uint16_t c = get16 (inptr);						\
147e83
 									\
147e83
-    if (__glibc_likely (c <= 0x007f))					      \
147e83
+    if (__glibc_likely (c <= 0x007f))					\
147e83
       {									\
147e83
 	/* Single byte UTF-8 char.  */					\
147e83
 	*outptr = c & 0xff;						\
147e83
@@ -379,20 +504,20 @@ gconv_end (struct __gconv_step *data)
147e83
       }									\
147e83
     else if (c >= 0x0080 && c <= 0x07ff)				\
147e83
       {									\
147e83
-        /* Two byte UTF-8 char.  */					\
147e83
+	/* Two byte UTF-8 char.  */					\
147e83
 									\
147e83
-	if (__glibc_unlikely (outptr + 2 > outend))			      \
147e83
+	if (__glibc_unlikely (outptr + 2 > outend))			\
147e83
 	  {								\
147e83
 	    /* Overflow in the output buffer.  */			\
147e83
 	    result = __GCONV_FULL_OUTPUT;				\
147e83
 	    break;							\
147e83
 	  }								\
147e83
 									\
147e83
-        outptr[0] = 0xc0;						\
147e83
-        outptr[0] |= c >> 6;						\
147e83
+	outptr[0] = 0xc0;						\
147e83
+	outptr[0] |= c >> 6;						\
147e83
 									\
147e83
-        outptr[1] = 0x80;						\
147e83
-        outptr[1] |= c & 0x3f;						\
147e83
+	outptr[1] = 0x80;						\
147e83
+	outptr[1] |= c & 0x3f;						\
147e83
 									\
147e83
 	outptr += 2;							\
147e83
       }									\
147e83
@@ -400,7 +525,7 @@ gconv_end (struct __gconv_step *data)
147e83
       {									\
147e83
 	/* Three byte UTF-8 char.  */					\
147e83
 									\
147e83
-	if (__glibc_unlikely (outptr + 3 > outend))			      \
147e83
+	if (__glibc_unlikely (outptr + 3 > outend))			\
147e83
 	  {								\
147e83
 	    /* Overflow in the output buffer.  */			\
147e83
 	    result = __GCONV_FULL_OUTPUT;				\
147e83
@@ -419,22 +544,22 @@ gconv_end (struct __gconv_step *data)
147e83
       }									\
147e83
     else if (c >= 0xd800 && c <= 0xdbff)				\
147e83
       {									\
147e83
-        /* Four byte UTF-8 char.  */					\
147e83
+	/* Four byte UTF-8 char.  */					\
147e83
 	uint16_t low, uvwxy;						\
147e83
 									\
147e83
-	if (__glibc_unlikely (outptr + 4 > outend))			      \
147e83
+	if (__glibc_unlikely (outptr + 4 > outend))			\
147e83
 	  {								\
147e83
 	    /* Overflow in the output buffer.  */			\
147e83
 	    result = __GCONV_FULL_OUTPUT;				\
147e83
 	    break;							\
147e83
 	  }								\
147e83
-	inptr += 2;							\
147e83
-	if (__glibc_unlikely (inptr + 2 > inend))			      \
147e83
+	if (__glibc_unlikely (inptr + 4 > inend))			\
147e83
 	  {								\
147e83
 	    result = __GCONV_INCOMPLETE_INPUT;				\
147e83
 	    break;							\
147e83
 	  }								\
147e83
 									\
147e83
+	inptr += 2;							\
147e83
 	low = get16 (inptr);						\
147e83
 									\
147e83
 	if ((low & 0xfc00) != 0xdc00)					\
147e83
@@ -461,11 +586,221 @@ gconv_end (struct __gconv_step *data)
147e83
       }									\
147e83
     else								\
147e83
       {									\
147e83
-        STANDARD_TO_LOOP_ERR_HANDLER (2);				\
147e83
+	STANDARD_TO_LOOP_ERR_HANDLER (2);				\
147e83
       }									\
147e83
     inptr += 2;								\
147e83
   }
147e83
-#define LOOP_NEED_FLAGS
147e83
-#include <iconv/loop.c>
147e83
+
147e83
+#define BODY_TO_VX							\
147e83
+  {									\
147e83
+    size_t inlen  = inend - inptr;					\
147e83
+    size_t outlen  = outend - outptr;					\
147e83
+    unsigned long tmp, tmp2, tmp3;					\
147e83
+    asm volatile (".machine push\n\t"					\
147e83
+		  ".machine \"z13\"\n\t"				\
147e83
+		  ".machinemode \"zarch_nohighgprs\"\n\t"		\
147e83
+		  /* Setup to check for values <= 0x7f.  */		\
147e83
+		  "    larl %[R_TMP],9f\n\t"				\
147e83
+		  "    vlm %%v30,%%v31,0(%[R_TMP])\n\t"			\
147e83
+		  /* Loop which handles UTF-16 chars <=0x7f.  */	\
147e83
+		  "0:  clgijl %[R_INLEN],32,2f\n\t"			\
147e83
+		  "    clgijl %[R_OUTLEN],16,2f\n\t"			\
147e83
+		  "1:  vlm %%v16,%%v17,0(%[R_IN])\n\t"			\
147e83
+		  "    lghi %[R_TMP2],0\n\t"				\
147e83
+		  /* Check for > 1byte UTF-8 chars.  */			\
147e83
+		  "    vstrchs %%v19,%%v16,%%v30,%%v31\n\t"		\
147e83
+		  "    jno 10f\n\t" /* Jump away if not all bytes are 1byte \
147e83
+				       UTF8 chars.  */			\
147e83
+		  "    vstrchs %%v19,%%v17,%%v30,%%v31\n\t"		\
147e83
+		  "    jno 11f\n\t" /* Jump away if not all bytes are 1byte \
147e83
+				       UTF8 chars.  */			\
147e83
+		  /* Shorten to UTF-8.  */				\
147e83
+		  "    vpkh %%v18,%%v16,%%v17\n\t"			\
147e83
+		  "    la %[R_IN],32(%[R_IN])\n\t"			\
147e83
+		  "    aghi %[R_INLEN],-32\n\t"				\
147e83
+		  /* Store 16 bytes to buf_out.  */			\
147e83
+		  "    vst %%v18,0(%[R_OUT])\n\t"			\
147e83
+		  "    aghi %[R_OUTLEN],-16\n\t"			\
147e83
+		  "    la %[R_OUT],16(%[R_OUT])\n\t"			\
147e83
+		  "    clgijl %[R_INLEN],32,2f\n\t"			\
147e83
+		  "    clgijl %[R_OUTLEN],16,2f\n\t"			\
147e83
+		  "    j 1b\n\t"					\
147e83
+		  /* Setup to check for ch > 0x7f. (v30, v31)  */	\
147e83
+		  "9:  .short 0x7f,0x7f,0x0,0x0,0x0,0x0,0x0,0x0\n\t"	\
147e83
+		  "    .short 0x2000,0x2000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
147e83
+		  /* At least one byte is > 0x7f.			\
147e83
+		     Store the preceding 1-byte chars.  */		\
147e83
+		  "11: lghi %[R_TMP2],16\n\t" /* match was found in v17.  */ \
147e83
+		  "10:\n\t"						\
147e83
+		  "    vlgvb %[R_TMP],%%v19,7\n\t"			\
147e83
+		  /* Shorten to UTF-8.  */				\
147e83
+		  "    vpkh %%v18,%%v16,%%v17\n\t"			\
147e83
+		  "    ar %[R_TMP],%[R_TMP2]\n\t" /* Number of in bytes.  */ \
147e83
+		  "    srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes.  */ \
147e83
+		  "    ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store.  */ \
147e83
+		  "    jl 13f\n\t"					\
147e83
+		  "    vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t"		\
147e83
+		  /* Update pointers.  */				\
147e83
+		  "    la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
147e83
+		  "    slgr %[R_INLEN],%[R_TMP]\n\t"			\
147e83
+		  "    la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t"		\
147e83
+		  "    slgr %[R_OUTLEN],%[R_TMP3]\n\t"			\
147e83
+		  "13: \n\t"						\
147e83
+		  /* Calculate remaining uint16_t values in loaded vrs.  */ \
147e83
+		  "    lghi %[R_TMP2],16\n\t"				\
147e83
+		  "    slgr %[R_TMP2],%[R_TMP3]\n\t"			\
147e83
+		  "    llh %[R_TMP],0(%[R_IN])\n\t"			\
147e83
+		  "    aghi %[R_INLEN],-2\n\t"				\
147e83
+		  "    j 22f\n\t"					\
147e83
+		  /* Handle remaining bytes.  */			\
147e83
+		  "2:  \n\t"						\
147e83
+		  /* Zero, one or more bytes available?  */		\
147e83
+		  "    clgfi %[R_INLEN],1\n\t"				\
147e83
+		  "    locghie %[R_RES],%[RES_IN_FULL]\n\t" /* Only one byte.  */ \
147e83
+		  "    jle 99f\n\t" /* End if less than two bytes.  */	\
147e83
+		  /* Calculate remaining uint16_t values in inptr.  */	\
147e83
+		  "    srlg %[R_TMP2],%[R_INLEN],1\n\t"			\
147e83
+		  /* Handle multibyte utf8-char. */			\
147e83
+		  "20: llh %[R_TMP],0(%[R_IN])\n\t"			\
147e83
+		  "    aghi %[R_INLEN],-2\n\t"				\
147e83
+		  /* Test if ch is 1-byte UTF-8 char.  */		\
147e83
+		  "21: clijh %[R_TMP],0x7f,22f\n\t"			\
147e83
+		  /* Handle 1-byte UTF-8 char.  */			\
147e83
+		  "31: slgfi %[R_OUTLEN],1\n\t"				\
147e83
+		  "    jl 90f \n\t"					\
147e83
+		  "    stc %[R_TMP],0(%[R_OUT])\n\t"			\
147e83
+		  "    la %[R_IN],2(%[R_IN])\n\t"			\
147e83
+		  "    la %[R_OUT],1(%[R_OUT])\n\t"			\
147e83
+		  "    brctg %[R_TMP2],20b\n\t"				\
147e83
+		  "    j 0b\n\t" /* Switch to vx-loop.  */		\
147e83
+		  /* Test if ch is 2-byte UTF-8 char.  */		\
147e83
+		  "22: clfi %[R_TMP],0x7ff\n\t"				\
147e83
+		  "    jh 23f\n\t"					\
147e83
+		  /* Handle 2-byte UTF-8 char.  */			\
147e83
+		  "32: slgfi %[R_OUTLEN],2\n\t"				\
147e83
+		  "    jl 90f \n\t"					\
147e83
+		  "    llill %[R_TMP3],0xc080\n\t"			\
147e83
+		  "    la %[R_IN],2(%[R_IN])\n\t"			\
147e83
+		  "    risbgn %[R_TMP3],%[R_TMP],51,55,2\n\t" /* 1. byte.   */ \
147e83
+		  "    risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 2. byte.   */ \
147e83
+		  "    sth %[R_TMP3],0(%[R_OUT])\n\t"			\
147e83
+		  "    la %[R_OUT],2(%[R_OUT])\n\t"			\
147e83
+		  "    brctg %[R_TMP2],20b\n\t"				\
147e83
+		  "    j 0b\n\t" /* Switch to vx-loop.  */		\
147e83
+		  /* Test if ch is 3-byte UTF-8 char.  */		\
147e83
+		  "23: clfi %[R_TMP],0xd7ff\n\t"			\
147e83
+		  "    jh 24f\n\t"					\
147e83
+		  /* Handle 3-byte UTF-8 char.  */			\
147e83
+		  "33: slgfi %[R_OUTLEN],3\n\t"				\
147e83
+		  "    jl 90f \n\t"					\
147e83
+		  "    llilf %[R_TMP3],0xe08080\n\t"			\
147e83
+		  "    la %[R_IN],2(%[R_IN])\n\t"			\
147e83
+		  "    risbgn %[R_TMP3],%[R_TMP],44,47,4\n\t" /* 1. byte.  */ \
147e83
+		  "    risbgn %[R_TMP3],%[R_TMP],50,55,2\n\t" /* 2. byte.  */ \
147e83
+		  "    risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 3. byte.  */ \
147e83
+		  "    stcm %[R_TMP3],7,0(%[R_OUT])\n\t"		\
147e83
+		  "    la %[R_OUT],3(%[R_OUT])\n\t"			\
147e83
+		  "    brctg %[R_TMP2],20b\n\t"				\
147e83
+		  "    j 0b\n\t" /* Switch to vx-loop.  */		\
147e83
+		  /* Test if ch is 4-byte UTF-8 char.  */		\
147e83
+		  "24: clfi %[R_TMP],0xdfff\n\t"			\
147e83
+		  "    jh 33b\n\t" /* Handle this 3-byte UTF-8 char.  */ \
147e83
+		  "    clfi %[R_TMP],0xdbff\n\t"			\
147e83
+		  "    locghih %[R_RES],%[RES_IN_ILL]\n\t"		\
147e83
+		  "    jh 99f\n\t" /* Jump away if this is a low surrogate \
147e83
+				      without a preceding high surrogate.  */ \
147e83
+		  /* Handle 4-byte UTF-8 char.  */			\
147e83
+		  "34: slgfi %[R_OUTLEN],4\n\t"				\
147e83
+		  "    jl 90f \n\t"					\
147e83
+		  "    slgfi %[R_INLEN],2\n\t"				\
147e83
+		  "    locghil %[R_RES],%[RES_IN_FULL]\n\t"		\
147e83
+		  "    jl 99f\n\t" /* Jump away if low surrogate is missing.  */ \
147e83
+		  "    llilf %[R_TMP3],0xf0808080\n\t"			\
147e83
+		  "    aghi %[R_TMP],0x40\n\t"				\
147e83
+		  "    risbgn %[R_TMP3],%[R_TMP],37,39,16\n\t" /* 1. byte: uvw  */ \
147e83
+		  "    risbgn %[R_TMP3],%[R_TMP],42,43,14\n\t" /* 2. byte: xy  */ \
147e83
+		  "    risbgn %[R_TMP3],%[R_TMP],44,47,14\n\t" /* 2. byte: efgh  */ \
147e83
+		  "    risbgn %[R_TMP3],%[R_TMP],50,51,12\n\t" /* 3. byte: ij */ \
147e83
+		  "    llh %[R_TMP],2(%[R_IN])\n\t" /* Load low surrogate.  */ \
147e83
+		  "    risbgn %[R_TMP3],%[R_TMP],52,55,2\n\t" /* 3. byte: klmn  */ \
147e83
+		  "    risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 4. byte: opqrst  */ \
147e83
+		  "    nilf %[R_TMP],0xfc00\n\t"			\
147e83
+		  "    clfi %[R_TMP],0xdc00\n\t" /* Check if it starts with 0xdc00.  */ \
147e83
+		  "    locghine %[R_RES],%[RES_IN_ILL]\n\t"		\
147e83
+		  "    jne 99f\n\t" /* Jump away if low surrogate is invalid.  */ \
147e83
+		  "    st %[R_TMP3],0(%[R_OUT])\n\t"			\
147e83
+		  "    la %[R_IN],4(%[R_IN])\n\t"			\
147e83
+		  "    la %[R_OUT],4(%[R_OUT])\n\t"			\
147e83
+		  "    aghi %[R_TMP2],-2\n\t"				\
147e83
+		  "    jh 20b\n\t"					\
147e83
+		  "    j 0b\n\t" /* Switch to vx-loop.  */		\
147e83
+		  /* Exit with __GCONV_FULL_OUTPUT.  */			\
147e83
+		  "90: lghi %[R_RES],%[RES_OUT_FULL]\n\t"		\
147e83
+		  "99: \n\t"						\
147e83
+		  ".machine pop"					\
147e83
+		  : /* outputs */ [R_IN] "+a" (inptr)			\
147e83
+		    , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr)	\
147e83
+		    , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp)	\
147e83
+		    , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3)	\
147e83
+		    , [R_RES] "+d" (result)				\
147e83
+		  : /* inputs */					\
147e83
+		    [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT)		\
147e83
+		    , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT)		\
147e83
+		    , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT)	\
147e83
+		  : /* clobber list */ "memory", "cc"			\
147e83
+		    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
147e83
+		    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
147e83
+		    ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31")	\
147e83
+		  );							\
147e83
+    if (__glibc_likely (inptr == inend)					\
147e83
+	|| result != __GCONV_ILLEGAL_INPUT)				\
147e83
+      break;								\
147e83
+									\
147e83
+    STANDARD_TO_LOOP_ERR_HANDLER (2);					\
147e83
+  }
147e83
+
147e83
+/* Generate loop-function with software implementation.  */
147e83
+#define MIN_NEEDED_INPUT	MIN_NEEDED_TO
147e83
+#define MAX_NEEDED_INPUT	MAX_NEEDED_TO
147e83
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
147e83
+#define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
147e83
+#if defined HAVE_S390_VX_ASM_SUPPORT
147e83
+# define LOOPFCT		__to_utf8_loop_c
147e83
+# define BODY                   BODY_TO_C
147e83
+# define LOOP_NEED_FLAGS
147e83
+# include <iconv/loop.c>
147e83
+
147e83
+/* Generate loop-function with software implementation.  */
147e83
+# define MIN_NEEDED_INPUT	MIN_NEEDED_TO
147e83
+# define MAX_NEEDED_INPUT	MAX_NEEDED_TO
147e83
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
147e83
+# define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
147e83
+# define LOOPFCT		__to_utf8_loop_vx
147e83
+# define BODY                   BODY_TO_VX
147e83
+# define LOOP_NEED_FLAGS
147e83
+# include <iconv/loop.c>
147e83
+
147e83
+/* Generate ifunc'ed loop function.  */
147e83
+__typeof(__to_utf8_loop_c)
147e83
+__attribute__ ((ifunc ("__to_utf8_loop_resolver")))
147e83
+__to_utf8_loop;
147e83
+
147e83
+static void *
147e83
+__to_utf8_loop_resolver (unsigned long int dl_hwcap)
147e83
+{
147e83
+  if (dl_hwcap & HWCAP_S390_VX)
147e83
+    return __to_utf8_loop_vx;
147e83
+  else
147e83
+    return __to_utf8_loop_c;
147e83
+}
147e83
+
147e83
+strong_alias (__to_utf8_loop_c_single, __to_utf8_loop_single)
147e83
+
147e83
+#else
147e83
+# define LOOPFCT		TO_LOOP
147e83
+# define BODY                   BODY_TO_C
147e83
+# define LOOP_NEED_FLAGS
147e83
+# include <iconv/loop.c>
147e83
+#endif /* !HAVE_S390_VX_ASM_SUPPORT  */
147e83
 
147e83
 #include <iconv/skeleton.c>
147e83
-- 
147e83
1.8.3.1
147e83