Blame SOURCES/glibc-rh1375235-8.patch

147e83
From ed26260ce20cd6c05e8c8526bd2bd9fadfee8d19 Mon Sep 17 00:00:00 2001
147e83
From: Stefan Liebler <stli@linux.vnet.ibm.com>
147e83
Date: Thu, 27 Jul 2017 10:53:59 +0200
147e83
Subject: [PATCH 08/10] S390: Use cu42 instruction for converting from utf32 to
147e83
 utf16.
147e83
147e83
upstream-commit 593e4da186906525e2a0bdc4d87601bd0c2625eb
147e83
147e83
This patch adds an ifunc variant to use the cu instruction on arch12 CPUs.
147e83
This new ifunc variant can be built if binutils support z13 vector
147e83
instructions.  At runtime, HWCAP_S390_VXE decides if we can use the
147e83
cu42 instruction.
147e83
147e83
ChangeLog:
147e83
147e83
	* sysdeps/s390/utf16-utf32-z9.c (__to_utf16_loop_vx_cu):
147e83
	Use vector and cu42 instruction.
147e83
	* sysdeps/s390/multiarch/utf16-utf32-z9.c:
147e83
	Add __to_utf16_loop_vx_cu in ifunc resolver.
147e83
---
147e83
 sysdeps/s390/multiarch/utf16-utf32-z9.c |   8 ++-
147e83
 sysdeps/s390/utf16-utf32-z9.c           | 107 +++++++++++++++++++++++++++++++-
147e83
 2 files changed, 111 insertions(+), 4 deletions(-)
147e83
147e83
diff --git a/sysdeps/s390/multiarch/utf16-utf32-z9.c b/sysdeps/s390/multiarch/utf16-utf32-z9.c
147e83
index 6e64169..ded3cc2 100644
147e83
--- a/sysdeps/s390/multiarch/utf16-utf32-z9.c
147e83
+++ b/sysdeps/s390/multiarch/utf16-utf32-z9.c
147e83
@@ -37,8 +37,10 @@ s390_libc_ifunc_expr (FROM_LOOP_DEFAULT, FROM_LOOP,
147e83
 		      : FROM_LOOP_DEFAULT);
147e83
 
147e83
 s390_libc_ifunc_expr (TO_LOOP_DEFAULT, TO_LOOP,
147e83
-		      (HAVE_TO_VX && (hwcap & HWCAP_S390_VX))
147e83
-		      ? TO_LOOP_VX
147e83
-		      : TO_LOOP_DEFAULT);
147e83
+		      (HAVE_TO_VX_CU && (hwcap & HWCAP_S390_VXE))
147e83
+		      ? TO_LOOP_VX_CU
147e83
+		      : (HAVE_TO_VX && (hwcap & HWCAP_S390_VX))
147e83
+			? TO_LOOP_VX
147e83
+			: TO_LOOP_DEFAULT);
147e83
 
147e83
 #include <iconv/skeleton.c>
147e83
diff --git a/sysdeps/s390/utf16-utf32-z9.c b/sysdeps/s390/utf16-utf32-z9.c
147e83
index 5a39d5d..b1728d6 100644
147e83
--- a/sysdeps/s390/utf16-utf32-z9.c
147e83
+++ b/sysdeps/s390/utf16-utf32-z9.c
147e83
@@ -40,9 +40,11 @@
147e83
 #if defined HAVE_S390_VX_ASM_SUPPORT && defined USE_MULTIARCH
147e83
 # define HAVE_FROM_VX		1
147e83
 # define HAVE_TO_VX		1
147e83
+# define HAVE_TO_VX_CU		1
147e83
 #else
147e83
 # define HAVE_FROM_VX		0
147e83
 # define HAVE_TO_VX		0
147e83
+# define HAVE_TO_VX_CU		0
147e83
 #endif
147e83
 
147e83
 #if defined HAVE_S390_VX_GCC_SUPPORT
147e83
@@ -471,7 +473,7 @@ gconv_end (struct __gconv_step *data)
147e83
 		  "    vlm %%v30,%%v31,0(%[R_TMP])\n\t"			\
147e83
 		  CONVERT_32BIT_SIZE_T ([R_INLEN])			\
147e83
 		  CONVERT_32BIT_SIZE_T ([R_OUTLEN])			\
147e83
-		  /* Loop which handles UTF-16 chars			\
147e83
+		  /* Loop which handles UTF-32 chars			\
147e83
 		     ch < 0xd800 || (ch > 0xdfff && ch < 0x10000).  */	\
147e83
 		  "0:  clgijl %[R_INLEN],32,2f\n\t"			\
147e83
 		  "    clgijl %[R_OUTLEN],16,2f\n\t"			\
147e83
@@ -595,6 +597,109 @@ gconv_end (struct __gconv_step *data)
147e83
 # define TO_LOOP_VX		NULL
147e83
 #endif /* HAVE_TO_VX != 1  */
147e83
 
147e83
+#if HAVE_TO_VX_CU == 1
147e83
+#define BODY_TO_VX_CU							\
147e83
+  {									\
147e83
+    register const unsigned char* pInput asm ("8") = inptr;		\
147e83
+    register size_t inlen asm ("9") = inend - inptr;			\
147e83
+    register unsigned char* pOutput asm ("10") = outptr;		\
147e83
+    register size_t outlen asm ("11") = outend - outptr;		\
147e83
+    unsigned long tmp, tmp2, tmp3;					\
147e83
+    asm volatile (".machine push\n\t"					\
147e83
+		  ".machine \"z13\"\n\t"				\
147e83
+		  ".machinemode \"zarch_nohighgprs\"\n\t"		\
147e83
+		  /* Setup to check for surrogates.  */			\
147e83
+		  "    larl %[R_TMP],9f\n\t"				\
147e83
+		  "    vlm %%v30,%%v31,0(%[R_TMP])\n\t"			\
147e83
+		  CONVERT_32BIT_SIZE_T ([R_INLEN])			\
147e83
+		  CONVERT_32BIT_SIZE_T ([R_OUTLEN])			\
147e83
+		  /* Loop which handles UTF-32 chars			\
147e83
+		     ch < 0xd800 || (ch > 0xdfff && ch < 0x10000).  */	\
147e83
+		  "0:  clgijl %[R_INLEN],32,20f\n\t"			\
147e83
+		  "    clgijl %[R_OUTLEN],16,20f\n\t"			\
147e83
+		  "1:  vlm %%v16,%%v17,0(%[R_IN])\n\t"			\
147e83
+		  "    lghi %[R_TMP2],0\n\t"				\
147e83
+		  /* Shorten to UTF-16.  */				\
147e83
+		  "    vpkf %%v18,%%v16,%%v17\n\t"			\
147e83
+		  /* Check for surrogate chars.  */			\
147e83
+		  "    vstrcfs %%v19,%%v16,%%v30,%%v31\n\t"		\
147e83
+		  "    jno 10f\n\t"					\
147e83
+		  "    vstrcfs %%v19,%%v17,%%v30,%%v31\n\t"		\
147e83
+		  "    jno 11f\n\t"					\
147e83
+		  /* Store 16 bytes to buf_out.  */			\
147e83
+		  "    vst %%v18,0(%[R_OUT])\n\t"			\
147e83
+		  "    la %[R_IN],32(%[R_IN])\n\t"			\
147e83
+		  "    aghi %[R_INLEN],-32\n\t"				\
147e83
+		  "    aghi %[R_OUTLEN],-16\n\t"			\
147e83
+		  "    la %[R_OUT],16(%[R_OUT])\n\t"			\
147e83
+		  "    clgijl %[R_INLEN],32,20f\n\t"			\
147e83
+		  "    clgijl %[R_OUTLEN],16,20f\n\t"			\
147e83
+		  "    j 1b\n\t"					\
147e83
+		  /* Setup to check for ch >= 0xd800 && ch <= 0xdfff	\
147e83
+		     and check for ch >= 0x10000. (v30, v31)  */	\
147e83
+		  "9:  .long 0xd800,0xdfff,0x10000,0x10000\n\t"		\
147e83
+		  "    .long 0xa0000000,0xc0000000, 0xa0000000,0xa0000000\n\t" \
147e83
+		  /* At least one UTF32 char is in range of surrogates.	\
147e83
+		     Store the preceding characters.  */		\
147e83
+		  "11: ahi %[R_TMP2],16\n\t"				\
147e83
+		  "10: vlgvb %[R_TMP],%%v19,7\n\t"			\
147e83
+		  "    agr %[R_TMP],%[R_TMP2]\n\t"			\
147e83
+		  "    srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes.  */ \
147e83
+		  "    ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store.  */ \
147e83
+		  "    jl 20f\n\t"					\
147e83
+		  "    vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t"		\
147e83
+		  /* Update pointers.  */				\
147e83
+		  "    la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
147e83
+		  "    slgr %[R_INLEN],%[R_TMP]\n\t"			\
147e83
+		  "    la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t"		\
147e83
+		  "    slgr %[R_OUTLEN],%[R_TMP3]\n\t"			\
147e83
+		  /* Handles UTF16 surrogates with convert instruction.  */ \
147e83
+		  "20: cu42 %[R_OUT],%[R_IN]\n\t"			\
147e83
+		  "    jo 0b\n\t" /* Try vector implemenation again.  */ \
147e83
+		  "    lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1.  */ \
147e83
+		  "    lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2.  */ \
147e83
+		  ".machine pop"					\
147e83
+		  : /* outputs */ [R_IN] "+a" (pInput)			\
147e83
+		    , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput)	\
147e83
+		    , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp)	\
147e83
+		    , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3)	\
147e83
+		    , [R_RES] "+d" (result)				\
147e83
+		  : /* inputs */					\
147e83
+		    [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT)		\
147e83
+		    , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT)		\
147e83
+		  : /* clobber list */ "memory", "cc"			\
147e83
+		    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
147e83
+		    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
147e83
+		    ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31")	\
147e83
+		  );							\
147e83
+    inptr = pInput;							\
147e83
+    outptr = pOutput;							\
147e83
+									\
147e83
+    if (__glibc_likely (inlen == 0)					\
147e83
+	|| result == __GCONV_FULL_OUTPUT)				\
147e83
+      break;								\
147e83
+    if (inlen < 4)							\
147e83
+      {									\
147e83
+	result = __GCONV_INCOMPLETE_INPUT;				\
147e83
+	break;								\
147e83
+      }									\
147e83
+									\
147e83
+    STANDARD_TO_LOOP_ERR_HANDLER (4);					\
147e83
+  }
147e83
+
147e83
+/* Generate loop-function with hardware vector and utf-convert instructions.  */
147e83
+# define MIN_NEEDED_INPUT	MIN_NEEDED_TO
147e83
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
147e83
+# define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
147e83
+# define TO_LOOP_VX_CU		__to_utf16_loop_vx_cu
147e83
+# define LOOPFCT		TO_LOOP_VX_CU
147e83
+# define LOOP_NEED_FLAGS
147e83
+# define BODY			BODY_TO_VX_CU
147e83
+# include <iconv/loop.c>
147e83
+#else
147e83
+# define TO_LOOP_VX_CU		NULL
147e83
+#endif /* HAVE_TO_VX_CU != 1  */
147e83
+
147e83
 /* This file also exists in sysdeps/s390/multiarch/ which
147e83
    generates ifunc resolvers for FROM/TO_LOOP functions
147e83
    and includes iconv/skeleton.c afterwards.  */
147e83
-- 
147e83
1.8.3.1
147e83