Blame SOURCES/glibc-rh1380680-12.patch

147e83
From c806cab89b52a644b5c563b8f1c8ae59abfc2c13 Mon Sep 17 00:00:00 2001
147e83
From: Stefan Liebler <stli@linux.vnet.ibm.com>
147e83
Date: Mon, 7 Nov 2016 16:22:17 +0100
147e83
Subject: [PATCH 12/17] S390: Optimize utf16-utf32 module.
147e83
147e83
Upstream commit 6896776c3c9c32fd22324e6de6737dd69ae73213
147e83
147e83
This patch reworks the s390 specific module to convert between utf16 and utf32.
147e83
Now ifunc is used to choose either the c or etf3eh (with convert utf
147e83
instruction) variants at runtime.
147e83
Furthermore a new vector variant for z13 is introduced which will be build
147e83
and chosen if vector support is available at build / runtime.
147e83
147e83
In case of converting utf 32 to utf16, the vector variant optimizes input of
147e83
2byte utf16 characters. The convert utf instruction is used if an utf16
147e83
surrogate is found.
147e83
147e83
For the other direction utf16 to utf32, the cu24 instruction can't be re-
147e83
enabled, because it does not report an error, if the input-stream consists of
147e83
a single low surrogate utf16 char (e.g. 0xdc00). This applies to the newest z13,
147e83
too. Thus there is only the c or the new vector variant, which can handle utf16
147e83
surrogate characters.
147e83
147e83
This patch also fixes some whitespace errors. Furthermore, the etf3eh variant is
147e83
handling the "UTF-xx//IGNORE" case now. Before they ignored the ignore-case and
147e83
always stopped at an error.
147e83
147e83
ChangeLog:
147e83
147e83
	* sysdeps/s390/s390-64/utf16-utf32-z9.c: Use ifunc to select c,
147e83
	etf3eh or new vector loop-variant.
147e83
---
147e83
 sysdeps/s390/s390-64/utf16-utf32-z9.c | 471 +++++++++++++++++++++++++++-------
147e83
 1 file changed, 379 insertions(+), 92 deletions(-)
147e83
147e83
diff --git a/sysdeps/s390/s390-64/utf16-utf32-z9.c b/sysdeps/s390/s390-64/utf16-utf32-z9.c
147e83
index e6a033d..33594f1 100644
147e83
--- a/sysdeps/s390/s390-64/utf16-utf32-z9.c
147e83
+++ b/sysdeps/s390/s390-64/utf16-utf32-z9.c
147e83
@@ -30,47 +30,27 @@
147e83
 #include <dl-procinfo.h>
147e83
 #include <gconv.h>
147e83
 
147e83
+#if defined HAVE_S390_VX_GCC_SUPPORT
147e83
+# define ASM_CLOBBER_VR(NR) , NR
147e83
+#else
147e83
+# define ASM_CLOBBER_VR(NR)
147e83
+#endif
147e83
+
147e83
 /* UTF-32 big endian byte order mark.  */
147e83
 #define BOM_UTF32               0x0000feffu
147e83
 
147e83
 /* UTF-16 big endian byte order mark.  */
147e83
-#define BOM_UTF16	        0xfeff
147e83
+#define BOM_UTF16               0xfeff
147e83
 
147e83
 #define DEFINE_INIT		0
147e83
 #define DEFINE_FINI		0
147e83
 #define MIN_NEEDED_FROM		2
147e83
 #define MAX_NEEDED_FROM		4
147e83
 #define MIN_NEEDED_TO		4
147e83
-#define FROM_LOOP		from_utf16_loop
147e83
-#define TO_LOOP			to_utf16_loop
147e83
+#define FROM_LOOP		__from_utf16_loop
147e83
+#define TO_LOOP			__to_utf16_loop
147e83
 #define FROM_DIRECTION		(dir == from_utf16)
147e83
 #define ONE_DIRECTION           0
147e83
-#define PREPARE_LOOP							\
147e83
-  enum direction dir = ((struct utf16_data *) step->__data)->dir;	\
147e83
-  int emit_bom = ((struct utf16_data *) step->__data)->emit_bom;	\
147e83
-									\
147e83
-  if (emit_bom && !data->__internal_use					\
147e83
-      && data->__invocation_counter == 0)				\
147e83
-    {									\
147e83
-      if (dir == to_utf16)						\
147e83
-	{								\
147e83
-          /* Emit the UTF-16 Byte Order Mark.  */			\
147e83
-          if (__glibc_unlikely (outbuf + 2 > outend))			      \
147e83
-	    return __GCONV_FULL_OUTPUT;					\
147e83
-									\
147e83
-	  put16u (outbuf, BOM_UTF16);					\
147e83
-	  outbuf += 2;							\
147e83
-	}								\
147e83
-      else								\
147e83
-	{								\
147e83
-          /* Emit the UTF-32 Byte Order Mark.  */			\
147e83
-	  if (__glibc_unlikely (outbuf + 4 > outend))			      \
147e83
-	    return __GCONV_FULL_OUTPUT;					\
147e83
-									\
147e83
-	  put32u (outbuf, BOM_UTF32);					\
147e83
-	  outbuf += 4;							\
147e83
-	}								\
147e83
-    }
147e83
 
147e83
 /* Direction of the transformation.  */
147e83
 enum direction
147e83
@@ -169,16 +149,16 @@ gconv_end (struct __gconv_step *data)
147e83
     register unsigned long long outlen __asm__("11") = outend - outptr;	\
147e83
     uint64_t cc = 0;							\
147e83
 									\
147e83
-    __asm__ volatile (".machine push       \n\t"			\
147e83
-		      ".machine \"z9-109\" \n\t"			\
147e83
-		      "0: " INSTRUCTION "  \n\t"			\
147e83
-		      ".machine pop        \n\t"			\
147e83
-		      "   jo     0b        \n\t"			\
147e83
-		      "   ipm    %2        \n"				\
147e83
-		      : "+a" (pOutput), "+a" (pInput), "+d" (cc),	\
147e83
-		      "+d" (outlen), "+d" (inlen)			\
147e83
-		      :							\
147e83
-		      : "cc", "memory");				\
147e83
+    __asm__ __volatile__ (".machine push       \n\t"			\
147e83
+			  ".machine \"z9-109\" \n\t"			\
147e83
+			  "0: " INSTRUCTION "  \n\t"			\
147e83
+			  ".machine pop        \n\t"			\
147e83
+			  "   jo     0b        \n\t"			\
147e83
+			  "   ipm    %2        \n"			\
147e83
+			  : "+a" (pOutput), "+a" (pInput), "+d" (cc),	\
147e83
+			    "+d" (outlen), "+d" (inlen)			\
147e83
+			  :						\
147e83
+			  : "cc", "memory");				\
147e83
 									\
147e83
     inptr = pInput;							\
147e83
     outptr = pOutput;							\
147e83
@@ -187,44 +167,46 @@ gconv_end (struct __gconv_step *data)
147e83
     if (cc == 1)							\
147e83
       {									\
147e83
 	result = __GCONV_FULL_OUTPUT;					\
147e83
-	break;								\
147e83
       }									\
147e83
     else if (cc == 2)							\
147e83
       {									\
147e83
 	result = __GCONV_ILLEGAL_INPUT;					\
147e83
-	break;								\
147e83
       }									\
147e83
   }
147e83
 
147e83
+#define PREPARE_LOOP							\
147e83
+  enum direction dir = ((struct utf16_data *) step->__data)->dir;	\
147e83
+  int emit_bom = ((struct utf16_data *) step->__data)->emit_bom;	\
147e83
+									\
147e83
+  if (emit_bom && !data->__internal_use					\
147e83
+      && data->__invocation_counter == 0)				\
147e83
+    {									\
147e83
+      if (dir == to_utf16)						\
147e83
+	{								\
147e83
+	  /* Emit the UTF-16 Byte Order Mark.  */			\
147e83
+	  if (__glibc_unlikely (outbuf + 2 > outend))			\
147e83
+	    return __GCONV_FULL_OUTPUT;					\
147e83
+									\
147e83
+	  put16u (outbuf, BOM_UTF16);					\
147e83
+	  outbuf += 2;							\
147e83
+	}								\
147e83
+      else								\
147e83
+	{								\
147e83
+	  /* Emit the UTF-32 Byte Order Mark.  */			\
147e83
+	  if (__glibc_unlikely (outbuf + 4 > outend))			\
147e83
+	    return __GCONV_FULL_OUTPUT;					\
147e83
+									\
147e83
+	  put32u (outbuf, BOM_UTF32);					\
147e83
+	  outbuf += 4;							\
147e83
+	}								\
147e83
+    }
147e83
+
147e83
 /* Conversion function from UTF-16 to UTF-32 internal/BE.  */
147e83
 
147e83
-#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
147e83
-#define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
147e83
-#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
147e83
-#define LOOPFCT			FROM_LOOP
147e83
 /* The software routine is copied from utf-16.c (minus bytes
147e83
    swapping).  */
147e83
-#define BODY								\
147e83
+#define BODY_FROM_C							\
147e83
   {									\
147e83
-    /* The hardware instruction currently fails to report an error for	\
147e83
-       isolated low surrogates so we have to disable the instruction	\
147e83
-       until this gets resolved.  */					\
147e83
-    if (0) /* (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) */			\
147e83
-      {									\
147e83
-	HARDWARE_CONVERT ("cu24 %0, %1, 1");				\
147e83
-	if (inptr != inend)						\
147e83
-	  {								\
147e83
-	    /* Check if the third byte is				\
147e83
-	       a valid start of a UTF-16 surrogate.  */			\
147e83
-	    if (inend - inptr == 3 && (inptr[3] & 0xfc) != 0xdc)	\
147e83
-	      STANDARD_FROM_LOOP_ERR_HANDLER (3);			\
147e83
-									\
147e83
-	    result = __GCONV_INCOMPLETE_INPUT;				\
147e83
-	    break;							\
147e83
-	  }								\
147e83
-	continue;							\
147e83
-      }									\
147e83
-									\
147e83
     uint16_t u1 = get16 (inptr);					\
147e83
 									\
147e83
     if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff)		\
147e83
@@ -235,15 +217,15 @@ gconv_end (struct __gconv_step *data)
147e83
       }									\
147e83
     else								\
147e83
       {									\
147e83
-        /* An isolated low-surrogate was found.  This has to be         \
147e83
+	/* An isolated low-surrogate was found.  This has to be         \
147e83
 	   considered ill-formed.  */					\
147e83
-        if (__glibc_unlikely (u1 >= 0xdc00))				      \
147e83
+	if (__glibc_unlikely (u1 >= 0xdc00))				\
147e83
 	  {								\
147e83
 	    STANDARD_FROM_LOOP_ERR_HANDLER (2);				\
147e83
 	  }								\
147e83
 	/* It's a surrogate character.  At least the first word says	\
147e83
 	   it is.  */							\
147e83
-	if (__glibc_unlikely (inptr + 4 > inend))			      \
147e83
+	if (__glibc_unlikely (inptr + 4 > inend))			\
147e83
 	  {								\
147e83
 	    /* We don't have enough input for another complete input	\
147e83
 	       character.  */						\
147e83
@@ -266,48 +248,200 @@ gconv_end (struct __gconv_step *data)
147e83
       }									\
147e83
     outptr += 4;							\
147e83
   }
147e83
-#define LOOP_NEED_FLAGS
147e83
-#include <iconv/loop.c>
147e83
+
147e83
+#define BODY_FROM_VX							\
147e83
+  {									\
147e83
+    size_t inlen = inend - inptr;					\
147e83
+    size_t outlen = outend - outptr;					\
147e83
+    unsigned long tmp, tmp2, tmp3;					\
147e83
+    asm volatile (".machine push\n\t"					\
147e83
+		  ".machine \"z13\"\n\t"				\
147e83
+		  ".machinemode \"zarch_nohighgprs\"\n\t"		\
147e83
+		  /* Setup to check for surrogates.  */			\
147e83
+		  "    larl %[R_TMP],9f\n\t"				\
147e83
+		  "    vlm %%v30,%%v31,0(%[R_TMP])\n\t"			\
147e83
+		  /* Loop which handles UTF-16 chars <0xd800, >0xdfff.  */ \
147e83
+		  "0:  clgijl %[R_INLEN],16,2f\n\t"			\
147e83
+		  "    clgijl %[R_OUTLEN],32,2f\n\t"			\
147e83
+		  "1:  vl %%v16,0(%[R_IN])\n\t"				\
147e83
+		  /* Check for surrogate chars.  */			\
147e83
+		  "    vstrchs %%v19,%%v16,%%v30,%%v31\n\t"		\
147e83
+		  "    jno 10f\n\t"					\
147e83
+		  /* Enlarge to UTF-32.  */				\
147e83
+		  "    vuplhh %%v17,%%v16\n\t"				\
147e83
+		  "    la %[R_IN],16(%[R_IN])\n\t"			\
147e83
+		  "    vupllh %%v18,%%v16\n\t"				\
147e83
+		  "    aghi %[R_INLEN],-16\n\t"				\
147e83
+		  /* Store 32 bytes to buf_out.  */			\
147e83
+		  "    vstm %%v17,%%v18,0(%[R_OUT])\n\t"		\
147e83
+		  "    aghi %[R_OUTLEN],-32\n\t"			\
147e83
+		  "    la %[R_OUT],32(%[R_OUT])\n\t"			\
147e83
+		  "    clgijl %[R_INLEN],16,2f\n\t"			\
147e83
+		  "    clgijl %[R_OUTLEN],32,2f\n\t"			\
147e83
+		  "    j 1b\n\t"					\
147e83
+		  /* Setup to check for ch >= 0xd800 && ch <= 0xdfff. (v30, v31)  */ \
147e83
+		  "9:  .short 0xd800,0xdfff,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
147e83
+		  "    .short 0xa000,0xc000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
147e83
+		  /* At least on uint16_t is in range of surrogates.	\
147e83
+		     Store the preceding chars.  */			\
147e83
+		  "10: vlgvb %[R_TMP],%%v19,7\n\t"			\
147e83
+		  "    vuplhh %%v17,%%v16\n\t"				\
147e83
+		  "    sllg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes.  */ \
147e83
+		  "    ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store.  */ \
147e83
+		  "    jl 12f\n\t"					\
147e83
+		  "    vstl %%v17,%[R_TMP2],0(%[R_OUT])\n\t"		\
147e83
+		  "    vupllh %%v18,%%v16\n\t"				\
147e83
+		  "    ahi %[R_TMP2],-16\n\t"				\
147e83
+		  "    jl 11f\n\t"					\
147e83
+		  "    vstl %%v18,%[R_TMP2],16(%[R_OUT])\n\t"		\
147e83
+		  "11: \n\t" /* Update pointers.  */			\
147e83
+		  "    la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
147e83
+		  "    slgr %[R_INLEN],%[R_TMP]\n\t"			\
147e83
+		  "    la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t"		\
147e83
+		  "    slgr %[R_OUTLEN],%[R_TMP3]\n\t"			\
147e83
+		  /* Calculate remaining uint16_t values in loaded vrs.  */ \
147e83
+		  "12: lghi %[R_TMP2],16\n\t"				\
147e83
+		  "    sgr %[R_TMP2],%[R_TMP]\n\t"			\
147e83
+		  "    srl %[R_TMP2],1\n\t"				\
147e83
+		  "    llh %[R_TMP],0(%[R_IN])\n\t"			\
147e83
+		  "    aghi %[R_OUTLEN],-4\n\t"				\
147e83
+		  "    j 16f\n\t"					\
147e83
+		  /* Handle remaining bytes.  */			\
147e83
+		  "2:  \n\t"						\
147e83
+		  /* Zero, one or more bytes available?  */		\
147e83
+		  "    clgfi %[R_INLEN],1\n\t"				\
147e83
+		  "    je 97f\n\t" /* Only one byte available.  */	\
147e83
+		  "    jl 99f\n\t" /* End if no bytes available.  */	\
147e83
+		  /* Calculate remaining uint16_t values in inptr.  */	\
147e83
+		  "    srlg %[R_TMP2],%[R_INLEN],1\n\t"			\
147e83
+		  /* Handle remaining uint16_t values.  */		\
147e83
+		  "13: llh %[R_TMP],0(%[R_IN])\n\t"			\
147e83
+		  "    slgfi %[R_OUTLEN],4\n\t"				\
147e83
+		  "    jl 96f \n\t"					\
147e83
+		  "    clfi %[R_TMP],0xd800\n\t"			\
147e83
+		  "    jhe 15f\n\t"					\
147e83
+		  "14: st %[R_TMP],0(%[R_OUT])\n\t"			\
147e83
+		  "    la %[R_IN],2(%[R_IN])\n\t"			\
147e83
+		  "    aghi %[R_INLEN],-2\n\t"				\
147e83
+		  "    la %[R_OUT],4(%[R_OUT])\n\t"			\
147e83
+		  "    brctg %[R_TMP2],13b\n\t"				\
147e83
+		  "    j 0b\n\t" /* Switch to vx-loop.  */		\
147e83
+		  /* Handle UTF-16 surrogate pair.  */			\
147e83
+		  "15: clfi %[R_TMP],0xdfff\n\t"			\
147e83
+		  "    jh 14b\n\t" /* Jump away if ch > 0xdfff.  */	\
147e83
+		  "16: clfi %[R_TMP],0xdc00\n\t"			\
147e83
+		  "    jhe 98f\n\t" /* Jump away in case of low-surrogate.  */ \
147e83
+		  "    slgfi %[R_INLEN],4\n\t"				\
147e83
+		  "    jl 97f\n\t" /* Big enough input?  */		\
147e83
+		  "    llh %[R_TMP3],2(%[R_IN])\n\t" /* Load low surrogate.  */ \
147e83
+		  "    slfi %[R_TMP],0xd7c0\n\t"			\
147e83
+		  "    sll %[R_TMP],10\n\t"				\
147e83
+		  "    risbgn %[R_TMP],%[R_TMP3],54,63,0\n\t" /* Insert klmnopqrst.  */ \
147e83
+		  "    nilf %[R_TMP3],0xfc00\n\t"			\
147e83
+		  "    clfi %[R_TMP3],0xdc00\n\t" /* Check if it starts with 0xdc00.  */ \
147e83
+		  "    jne 98f\n\t"					\
147e83
+		  "    st %[R_TMP],0(%[R_OUT])\n\t"			\
147e83
+		  "    la %[R_IN],4(%[R_IN])\n\t"			\
147e83
+		  "    la %[R_OUT],4(%[R_OUT])\n\t"			\
147e83
+		  "    aghi %[R_TMP2],-2\n\t"				\
147e83
+		  "    jh 13b\n\t" /* Handle remaining uint16_t values.  */ \
147e83
+		  "    j 0b\n\t" /* Switch to vx-loop.  */		\
147e83
+		  "96: \n\t" /* Return full output.  */			\
147e83
+		  "    lghi %[R_RES],%[RES_OUT_FULL]\n\t"		\
147e83
+		  "    j 99f\n\t"					\
147e83
+		  "97: \n\t" /* Return incomplete input.  */		\
147e83
+		  "    lghi %[R_RES],%[RES_IN_FULL]\n\t"		\
147e83
+		  "    j 99f\n\t"					\
147e83
+		  "98:\n\t" /* Return Illegal character.  */		\
147e83
+		  "    lghi %[R_RES],%[RES_IN_ILL]\n\t"			\
147e83
+		  "99:\n\t"						\
147e83
+		  ".machine pop"					\
147e83
+		  : /* outputs */ [R_IN] "+a" (inptr)			\
147e83
+		    , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr)	\
147e83
+		    , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp)	\
147e83
+		    , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3)	\
147e83
+		    , [R_RES] "+d" (result)				\
147e83
+		  : /* inputs */					\
147e83
+		    [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT)		\
147e83
+		    , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT)		\
147e83
+		    , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT)	\
147e83
+		  : /* clobber list */ "memory", "cc"			\
147e83
+		    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
147e83
+		    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
147e83
+		    ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31")	\
147e83
+		  );							\
147e83
+    if (__glibc_likely (inptr == inend)					\
147e83
+	|| result != __GCONV_ILLEGAL_INPUT)				\
147e83
+      break;								\
147e83
+									\
147e83
+    STANDARD_FROM_LOOP_ERR_HANDLER (2);					\
147e83
+  }
147e83
+
147e83
+
147e83
+/* Generate loop-function with software routing.  */
147e83
+#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
147e83
+#define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
147e83
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
147e83
+#if defined HAVE_S390_VX_ASM_SUPPORT
147e83
+# define LOOPFCT		__from_utf16_loop_c
147e83
+# define LOOP_NEED_FLAGS
147e83
+# define BODY			BODY_FROM_C
147e83
+# include <iconv/loop.c>
147e83
+
147e83
+/* Generate loop-function with hardware vector instructions.  */
147e83
+# define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
147e83
+# define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
147e83
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
147e83
+# define LOOPFCT		__from_utf16_loop_vx
147e83
+# define LOOP_NEED_FLAGS
147e83
+# define BODY			BODY_FROM_VX
147e83
+# include <iconv/loop.c>
147e83
+
147e83
+/* Generate ifunc'ed loop function.  */
147e83
+__typeof(__from_utf16_loop_c)
147e83
+__attribute__ ((ifunc ("__from_utf16_loop_resolver")))
147e83
+__from_utf16_loop;
147e83
+
147e83
+static void *
147e83
+__from_utf16_loop_resolver (unsigned long int dl_hwcap)
147e83
+{
147e83
+  if (dl_hwcap & HWCAP_S390_VX)
147e83
+    return __from_utf16_loop_vx;
147e83
+  else
147e83
+    return __from_utf16_loop_c;
147e83
+}
147e83
+
147e83
+strong_alias (__from_utf16_loop_c_single, __from_utf16_loop_single)
147e83
+#else
147e83
+# define LOOPFCT		FROM_LOOP
147e83
+# define LOOP_NEED_FLAGS
147e83
+# define BODY			BODY_FROM_C
147e83
+# include <iconv/loop.c>
147e83
+#endif
147e83
 
147e83
 /* Conversion from UTF-32 internal/BE to UTF-16.  */
147e83
 
147e83
-#define MIN_NEEDED_INPUT	MIN_NEEDED_TO
147e83
-#define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
147e83
-#define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
147e83
-#define LOOPFCT			TO_LOOP
147e83
 /* The software routine is copied from utf-16.c (minus bytes
147e83
    swapping).  */
147e83
-#define BODY								\
147e83
+#define BODY_TO_C							\
147e83
   {									\
147e83
-    if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH)				\
147e83
-      {									\
147e83
-	HARDWARE_CONVERT ("cu42 %0, %1");				\
147e83
-									\
147e83
-	if (inptr != inend)						\
147e83
-	  {								\
147e83
-	    result = __GCONV_INCOMPLETE_INPUT;				\
147e83
-	    break;							\
147e83
-	  }								\
147e83
-	continue;							\
147e83
-      }									\
147e83
-									\
147e83
     uint32_t c = get32 (inptr);						\
147e83
 									\
147e83
     if (__builtin_expect (c <= 0xd7ff, 1)				\
147e83
 	|| (c >=0xdc00 && c <= 0xffff))					\
147e83
       {									\
147e83
-        /* Two UTF-16 chars.  */					\
147e83
-        put16 (outptr, c);						\
147e83
+	/* Two UTF-16 chars.  */					\
147e83
+	put16 (outptr, c);						\
147e83
       }									\
147e83
     else if (__builtin_expect (c >= 0x10000, 1)				\
147e83
 	     && __builtin_expect (c <= 0x10ffff, 1))			\
147e83
       {									\
147e83
 	/* Four UTF-16 chars.  */					\
147e83
-        uint16_t zabcd = ((c & 0x1f0000) >> 16) - 1;			\
147e83
+	uint16_t zabcd = ((c & 0x1f0000) >> 16) - 1;			\
147e83
 	uint16_t out;							\
147e83
 									\
147e83
 	/* Generate a surrogate character.  */				\
147e83
-	if (__glibc_unlikely (outptr + 4 > outend))			      \
147e83
+	if (__glibc_unlikely (outptr + 4 > outend))			\
147e83
 	  {								\
147e83
 	    /* Overflow in the output buffer.  */			\
147e83
 	    result = __GCONV_FULL_OUTPUT;				\
147e83
@@ -326,12 +460,165 @@ gconv_end (struct __gconv_step *data)
147e83
       }									\
147e83
     else								\
147e83
       {									\
147e83
-        STANDARD_TO_LOOP_ERR_HANDLER (4);				\
147e83
+	STANDARD_TO_LOOP_ERR_HANDLER (4);				\
147e83
       }									\
147e83
     outptr += 2;							\
147e83
     inptr += 4;								\
147e83
   }
147e83
+
147e83
+#define BODY_TO_ETF3EH							\
147e83
+  {									\
147e83
+    HARDWARE_CONVERT ("cu42 %0, %1");					\
147e83
+									\
147e83
+    if (__glibc_likely (inptr == inend)					\
147e83
+	|| result == __GCONV_FULL_OUTPUT)				\
147e83
+      break;								\
147e83
+									\
147e83
+    if (inptr + 4 > inend)						\
147e83
+      {									\
147e83
+	result = __GCONV_INCOMPLETE_INPUT;				\
147e83
+	break;								\
147e83
+      }									\
147e83
+									\
147e83
+    STANDARD_TO_LOOP_ERR_HANDLER (4);					\
147e83
+  }
147e83
+
147e83
+#define BODY_TO_VX							\
147e83
+  {									\
147e83
+    register const unsigned char* pInput asm ("8") = inptr;		\
147e83
+    register size_t inlen asm ("9") = inend - inptr;			\
147e83
+    register unsigned char* pOutput asm ("10") = outptr;		\
147e83
+    register size_t outlen asm("11") = outend - outptr;			\
147e83
+    unsigned long tmp, tmp2, tmp3;					\
147e83
+    asm volatile (".machine push\n\t"					\
147e83
+		  ".machine \"z13\"\n\t"				\
147e83
+		  ".machinemode \"zarch_nohighgprs\"\n\t"		\
147e83
+		  /* Setup to check for surrogates.  */			\
147e83
+		  "    larl %[R_TMP],9f\n\t"				\
147e83
+		  "    vlm %%v30,%%v31,0(%[R_TMP])\n\t"			\
147e83
+		  /* Loop which handles UTF-16 chars			\
147e83
+		     ch < 0xd800 || (ch > 0xdfff && ch < 0x10000).  */	\
147e83
+		  "0:  clgijl %[R_INLEN],32,20f\n\t"			\
147e83
+		  "    clgijl %[R_OUTLEN],16,20f\n\t"			\
147e83
+		  "1:  vlm %%v16,%%v17,0(%[R_IN])\n\t"			\
147e83
+		  "    lghi %[R_TMP2],0\n\t"				\
147e83
+		  /* Shorten to UTF-16.  */				\
147e83
+		  "    vpkf %%v18,%%v16,%%v17\n\t"			\
147e83
+		  /* Check for surrogate chars.  */			\
147e83
+		  "    vstrcfs %%v19,%%v16,%%v30,%%v31\n\t"		\
147e83
+		  "    jno 10f\n\t"					\
147e83
+		  "    vstrcfs %%v19,%%v17,%%v30,%%v31\n\t"		\
147e83
+		  "    jno 11f\n\t"					\
147e83
+		  /* Store 16 bytes to buf_out.  */			\
147e83
+		  "    vst %%v18,0(%[R_OUT])\n\t"			\
147e83
+		  "    la %[R_IN],32(%[R_IN])\n\t"			\
147e83
+		  "    aghi %[R_INLEN],-32\n\t"				\
147e83
+		  "    aghi %[R_OUTLEN],-16\n\t"			\
147e83
+		  "    la %[R_OUT],16(%[R_OUT])\n\t"			\
147e83
+		  "    clgijl %[R_INLEN],32,20f\n\t"			\
147e83
+		  "    clgijl %[R_OUTLEN],16,20f\n\t"			\
147e83
+		  "    j 1b\n\t"					\
147e83
+		  /* Setup to check for ch >= 0xd800 && ch <= 0xdfff	\
147e83
+		     and check for ch >= 0x10000. (v30, v31)  */	\
147e83
+		  "9:  .long 0xd800,0xdfff,0x10000,0x10000\n\t"		\
147e83
+		  "    .long 0xa0000000,0xc0000000, 0xa0000000,0xa0000000\n\t" \
147e83
+		  /* At least on UTF32 char is in range of surrogates.	\
147e83
+		     Store the preceding characters.  */		\
147e83
+		  "11: ahi %[R_TMP2],16\n\t"				\
147e83
+		  "10: vlgvb %[R_TMP],%%v19,7\n\t"			\
147e83
+		  "    agr %[R_TMP],%[R_TMP2]\n\t"			\
147e83
+		  "    srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes.  */ \
147e83
+		  "    ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store.  */ \
147e83
+		  "    jl 20f\n\t"					\
147e83
+		  "    vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t"		\
147e83
+		  /* Update pointers.  */				\
147e83
+		  "    la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
147e83
+		  "    slgr %[R_INLEN],%[R_TMP]\n\t"			\
147e83
+		  "    la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t"		\
147e83
+		  "    slgr %[R_OUTLEN],%[R_TMP3]\n\t"			\
147e83
+		  /* Handles UTF16 surrogates with convert instruction.  */ \
147e83
+		  "20: cu42 %[R_OUT],%[R_IN]\n\t"			\
147e83
+		  "    jo 0b\n\t" /* Try vector implemenation again.  */ \
147e83
+		  "    lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1.  */ \
147e83
+		  "    lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2.  */ \
147e83
+		  ".machine pop"					\
147e83
+		  : /* outputs */ [R_IN] "+a" (pInput)			\
147e83
+		    , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput)	\
147e83
+		    , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp)	\
147e83
+		    , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3)	\
147e83
+		    , [R_RES] "+d" (result)				\
147e83
+		  : /* inputs */					\
147e83
+		    [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT)		\
147e83
+		    , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT)		\
147e83
+		    , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT)	\
147e83
+		  : /* clobber list */ "memory", "cc"			\
147e83
+		    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
147e83
+		    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
147e83
+		    ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31")	\
147e83
+		  );							\
147e83
+    inptr = pInput;							\
147e83
+    outptr = pOutput;							\
147e83
+									\
147e83
+    if (__glibc_likely (inptr == inend)					\
147e83
+	|| result == __GCONV_FULL_OUTPUT)				\
147e83
+      break;								\
147e83
+    if (inptr + 4 > inend)						\
147e83
+      {									\
147e83
+	result = __GCONV_INCOMPLETE_INPUT;				\
147e83
+	break;								\
147e83
+      }									\
147e83
+    STANDARD_TO_LOOP_ERR_HANDLER (4);					\
147e83
+  }
147e83
+
147e83
+/* Generate loop-function with software routing.  */
147e83
+#define MIN_NEEDED_INPUT	MIN_NEEDED_TO
147e83
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
147e83
+#define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
147e83
+#define LOOPFCT			__to_utf16_loop_c
147e83
+#define LOOP_NEED_FLAGS
147e83
+#define BODY			BODY_TO_C
147e83
+#include <iconv/loop.c>
147e83
+
147e83
+/* Generate loop-function with hardware utf-convert instruction.  */
147e83
+#define MIN_NEEDED_INPUT	MIN_NEEDED_TO
147e83
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
147e83
+#define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
147e83
+#define LOOPFCT			__to_utf16_loop_etf3eh
147e83
 #define LOOP_NEED_FLAGS
147e83
+#define BODY			BODY_TO_ETF3EH
147e83
 #include <iconv/loop.c>
147e83
 
147e83
+#if defined HAVE_S390_VX_ASM_SUPPORT
147e83
+/* Generate loop-function with hardware vector instructions.  */
147e83
+# define MIN_NEEDED_INPUT	MIN_NEEDED_TO
147e83
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
147e83
+# define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
147e83
+# define LOOPFCT		__to_utf16_loop_vx
147e83
+# define LOOP_NEED_FLAGS
147e83
+# define BODY			BODY_TO_VX
147e83
+# include <iconv/loop.c>
147e83
+#endif
147e83
+
147e83
+/* Generate ifunc'ed loop function.  */
147e83
+__typeof(__to_utf16_loop_c)
147e83
+__attribute__ ((ifunc ("__to_utf16_loop_resolver")))
147e83
+__to_utf16_loop;
147e83
+
147e83
+static void *
147e83
+__to_utf16_loop_resolver (unsigned long int dl_hwcap)
147e83
+{
147e83
+#if defined HAVE_S390_VX_ASM_SUPPORT
147e83
+  if (dl_hwcap & HWCAP_S390_VX)
147e83
+    return __to_utf16_loop_vx;
147e83
+  else
147e83
+#endif
147e83
+  if (dl_hwcap & HWCAP_S390_ETF3EH)
147e83
+    return __to_utf16_loop_etf3eh;
147e83
+  else
147e83
+    return __to_utf16_loop_c;
147e83
+}
147e83
+
147e83
+strong_alias (__to_utf16_loop_c_single, __to_utf16_loop_single)
147e83
+
147e83
+
147e83
 #include <iconv/skeleton.c>
147e83
-- 
147e83
1.8.3.1
147e83