Blame SOURCES/glibc-rh1240351-4.patch

147e83
    Backport of the following patch as a prerequistite for
147e83
    96d6fd6c4060d739abb1822e7ad633af749532b2:
147e83
    commit 69f13dbf06c6195de0ada8632271d58ca3cf55da
147e83
    Author: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
147e83
    Date:   Thu Sep 26 09:29:19 2013 -0500
147e83
    
147e83
        PowerPC: strcpy/stpcpy optimization for PPC64/POWER7
147e83
    
147e83
        This patch intends to unify both strcpy and stpcpy implementationsi
147e83
        for PPC64 and PPC64/POWER7. The idead default powerpc64 implementation
147e83
        is to provide both doubleword and word aligned memory access.
147e83
    
147e83
        For PPC64/POWER7 is also provide doubleword and word memory access,
147e83
        remove the branch hints, use the cmpb instruction for compare
147e83
        doubleword/words, and add an optimization for inputs of same alignment.
147e83
    
147e83
        ChangeLog:
147e83
    
147e83
            2013-10-04  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
147e83
    
147e83
            * sysdeps/powerpc/powerpc64/strcpy.S (strcpy): Add word load/store
147e83
            to provide a boost for large inputs with word alignment.
147e83
            * sysdeps/powerpc/powerpc64/stpcpy.S (__stpcpy): Rewrite
147e83
            implementation based on optimized PPC64 strcpy.
147e83
            * sysdeps/powerpc/powerpc64/power7/strcpy.S: New file: optimized
147e83
            strcpy for PPC64/POWER7 based on both doubleword and word load/store.
147e83
            * sysdeps/powerpc/powerpc64/power7/stpcpy.S: New file: optimized
147e83
            stpcpy for PPC64/POWER7 based on PPC64/POWER7 strcpy.
147e83
147e83
diff --git a/sysdeps/powerpc/powerpc64/power7/stpcpy.S b/sysdeps/powerpc/powerpc64/power7/stpcpy.S
147e83
new file mode 100644
147e83
index 0000000..727dd06
147e83
--- /dev/null
147e83
+++ b/sysdeps/powerpc/powerpc64/power7/stpcpy.S
147e83
@@ -0,0 +1,24 @@
147e83
+/* Optimized stpcpy implementation for PowerPC64/POWER7.
147e83
+   Copyright (C) 2013 Free Software Foundation, Inc.
147e83
+   This file is part of the GNU C Library.
147e83
+
147e83
+   The GNU C Library is free software; you can redistribute it and/or
147e83
+   modify it under the terms of the GNU Lesser General Public
147e83
+   License as published by the Free Software Foundation; either
147e83
+   version 2.1 of the License, or (at your option) any later version.
147e83
+
147e83
+   The GNU C Library is distributed in the hope that it will be useful,
147e83
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
147e83
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
147e83
+   Lesser General Public License for more details.
147e83
+
147e83
+   You should have received a copy of the GNU Lesser General Public
147e83
+   License along with the GNU C Library; if not, see
147e83
+   <http://www.gnu.org/licenses/>.  */
147e83
+
147e83
+#define USE_AS_STPCPY
147e83
+#include <sysdeps/powerpc/powerpc64/power7/strcpy.S>
147e83
+
147e83
+weak_alias (__stpcpy, stpcpy)
147e83
+libc_hidden_def (__stpcpy)
147e83
+libc_hidden_builtin_def (stpcpy)
147e83
diff --git a/sysdeps/powerpc/powerpc64/power7/strcpy.S b/sysdeps/powerpc/powerpc64/power7/strcpy.S
147e83
new file mode 100644
147e83
index 0000000..5c341a1
147e83
--- /dev/null
147e83
+++ b/sysdeps/powerpc/powerpc64/power7/strcpy.S
147e83
@@ -0,0 +1,274 @@
147e83
+/* Optimized strcpy/stpcpy implementation for PowerPC64/POWER7.
147e83
+   Copyright (C) 2013 Free Software Foundation, Inc.
147e83
+   This file is part of the GNU C Library.
147e83
+
147e83
+   The GNU C Library is free software; you can redistribute it and/or
147e83
+   modify it under the terms of the GNU Lesser General Public
147e83
+   License as published by the Free Software Foundation; either
147e83
+   version 2.1 of the License, or (at your option) any later version.
147e83
+
147e83
+   The GNU C Library is distributed in the hope that it will be useful,
147e83
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
147e83
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
147e83
+   Lesser General Public License for more details.
147e83
+
147e83
+   You should have received a copy of the GNU Lesser General Public
147e83
+   License along with the GNU C Library; if not, see
147e83
+   <http://www.gnu.org/licenses/>.  */
147e83
+
147e83
+#include <sysdep.h>
147e83
+
147e83
+/* Implements the function
147e83
+
147e83
+   char * [r3] strcpy (char *dest [r3], const char *src [r4])
147e83
+
147e83
+   or
147e83
+
147e83
+   char * [r3] strcpy (char *dest [r3], const char *src [r4])
147e83
+
147e83
+   if USE_AS_STPCPY is defined. It tries to use aligned memory accesses
147e83
+   when possible using the following algorithm:
147e83
+
147e83
+   if (((((uintptr_t)dst & 0x7UL) == 0) && ((uintptr_t)src & 0x7UL) == 0))
147e83
+     goto aligned_doubleword_copy;
147e83
+   if (((((uintptr_t)dst & 0x3UL) == 0) && ((uintptr_t)src & 0x3UL) == 0))
147e83
+     goto aligned_word_copy;
147e83
+   if (((uintptr_t)dst & 0x7UL) == ((uintptr_t)src & 0x7UL))
147e83
+     goto same_alignment;
147e83
+   goto unaligned;
147e83
+
147e83
+   The aligned comparison are made using cmpb instructions.  */
147e83
+
147e83
+#ifdef USE_AS_STPCPY
147e83
+# define FUNC_NAME __stpcpy
147e83
+#else
147e83
+# define FUNC_NAME strcpy
147e83
+#endif
147e83
+
147e83
+	.machine  power7
147e83
+EALIGN (FUNC_NAME, 4, 0)
147e83
+	CALL_MCOUNT 2
147e83
+
147e83
+#define rTMP	r0
147e83
+#ifdef USE_AS_STPCPY
147e83
+#define rRTN	r3	/* pointer to previous word/doubleword in dest */
147e83
+#else
147e83
+#define rRTN	r12	/* pointer to previous word/doubleword in dest */
147e83
+#endif
147e83
+#define rSRC	r4	/* pointer to previous word/doubleword in src */
147e83
+#define rMASK	r5	/* mask 0xffffffff | 0xffffffffffffffff */
147e83
+#define rWORD	r6	/* current word from src */
147e83
+#define rALT	r7	/* alternate word from src */
147e83
+#define rRTNAL	r8	/* alignment of return pointer */
147e83
+#define rSRCAL	r9	/* alignment of source pointer */
147e83
+#define rALCNT	r10	/* bytes to read to reach 8 bytes alignment */
147e83
+#define rSUBAL	r11	/* doubleword minus unaligned displacement */
147e83
+
147e83
+#ifndef USE_AS_STPCPY
147e83
+/* Save the dst pointer to use as return value.  */
147e83
+	mr	rRTN, r3
147e83
+#endif
147e83
+	or	rTMP, rSRC, rRTN
147e83
+	clrldi.	rTMP, rTMP, 61
147e83
+	bne	L(check_word_alignment)
147e83
+	b	L(aligned_doubleword_copy)
147e83
+
147e83
+L(same_alignment):
147e83
+/* Src and dst with same alignment: align both to doubleword.  */
147e83
+	mr	rALCNT, rRTN
147e83
+	lbz	rWORD, 0(rSRC)
147e83
+	subfic	rSUBAL, rRTNAL, 8
147e83
+	addi	rRTN, rRTN, 1
147e83
+	addi	rSRC, rSRC, 1
147e83
+	cmpdi	cr7, rWORD, 0
147e83
+	stb	rWORD, 0(rALCNT)
147e83
+	beq	cr7, L(s2)
147e83
+
147e83
+	add	rALCNT, rALCNT, rSUBAL
147e83
+	subf	rALCNT, rRTN, rALCNT
147e83
+	addi	rALCNT, rALCNT, 1
147e83
+	mtctr	rALCNT
147e83
+	b	L(s1)
147e83
+
147e83
+	.align 4
147e83
+L(s0):
147e83
+	addi	rSRC, rSRC, 1
147e83
+	lbz	rWORD, -1(rSRC)
147e83
+	cmpdi	cr7, rWORD, 0
147e83
+	stb	rWORD, -1(rALCNT)
147e83
+	beqlr	cr7
147e83
+	mr	rRTN, rALCNT
147e83
+L(s1):
147e83
+	addi	rALCNT, rRTN,1
147e83
+	bdnz	L(s0)
147e83
+	b L(aligned_doubleword_copy)
147e83
+	.align 4
147e83
+L(s2):
147e83
+	mr	rRTN, rALCNT
147e83
+	blr
147e83
+
147e83
+/* For doubleword aligned memory, operate using doubleword load and stores.  */
147e83
+	.align 4
147e83
+L(aligned_doubleword_copy):
147e83
+	li	rMASK, 0
147e83
+	addi	rRTN, rRTN, -8
147e83
+	ld	rWORD, 0(rSRC)
147e83
+	b	L(g2)
147e83
+
147e83
+	.align 4
147e83
+L(g0):	ldu	rALT, 8(rSRC)
147e83
+	stdu	rWORD, 8(rRTN)
147e83
+	cmpb	rTMP, rALT, rMASK
147e83
+	cmpdi	rTMP, 0
147e83
+	bne	L(g1)
147e83
+	ldu	rWORD, 8(rSRC)
147e83
+	stdu	rALT, 8(rRTN)
147e83
+L(g2):	cmpb	rTMP, rWORD, rMASK
147e83
+	cmpdi	rTMP, 0		/* If rTMP is 0, no null's have been found.  */
147e83
+	beq	L(g0)
147e83
+
147e83
+	mr	rALT, rWORD
147e83
+/* We've hit the end of the string.  Do the rest byte-by-byte.  */
147e83
+L(g1):
147e83
+#ifdef __LITTLE_ENDIAN__
147e83
+	extrdi.	rTMP, rALT, 8, 56
147e83
+	stbu	rALT, 8(rRTN)
147e83
+	beqlr-
147e83
+	extrdi.	rTMP, rALT, 8, 48
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr-
147e83
+	extrdi.	rTMP, rALT, 8, 40
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr-
147e83
+	extrdi.	rTMP, rALT, 8, 32
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr-
147e83
+	extrdi.	rTMP, rALT, 8, 24
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr-
147e83
+	extrdi.	rTMP, rALT, 8, 16
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr-
147e83
+	extrdi.	rTMP, rALT, 8, 8
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr-
147e83
+	extrdi	rTMP, rALT, 8, 0
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+#else
147e83
+	extrdi.	rTMP, rALT, 8, 0
147e83
+	stbu	rTMP, 8(rRTN)
147e83
+	beqlr
147e83
+	extrdi.	rTMP, rALT, 8, 8
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr
147e83
+	extrdi.	rTMP, rALT, 8, 16
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr
147e83
+	extrdi.	rTMP, rALT, 8, 24
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr
147e83
+	extrdi.	rTMP, rALT, 8, 32
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr
147e83
+	extrdi.	rTMP, rALT, 8, 40
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr
147e83
+	extrdi.	rTMP, rALT, 8, 48
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr
147e83
+	stbu	rALT, 1(rRTN)
147e83
+#endif
147e83
+	blr
147e83
+
147e83
+L(check_word_alignment):
147e83
+	clrldi. rTMP, rTMP, 62
147e83
+	beq	L(aligned_word_copy)
147e83
+	rldicl	rRTNAL, rRTN, 0, 61
147e83
+	rldicl	rSRCAL, rSRC, 0, 61
147e83
+	cmpld	cr7, rSRCAL, rRTNAL
147e83
+	beq	cr7, L(same_alignment)
147e83
+	b	L(unaligned)
147e83
+
147e83
+/* For word aligned memory, operate using word load and stores.  */
147e83
+	.align	4
147e83
+L(aligned_word_copy):
147e83
+	li	rMASK, 0
147e83
+	addi	rRTN, rRTN, -4
147e83
+	lwz	rWORD, 0(rSRC)
147e83
+	b	L(g5)
147e83
+
147e83
+	.align	4
147e83
+L(g3):	lwzu	rALT, 4(rSRC)
147e83
+	stwu	rWORD, 4(rRTN)
147e83
+	cmpb	rTMP, rALT, rMASK
147e83
+	cmpwi	rTMP, 0
147e83
+	bne	L(g4)
147e83
+	lwzu	rWORD, 4(rSRC)
147e83
+	stwu	rALT, 4(rRTN)
147e83
+L(g5):	cmpb	rTMP, rWORD, rMASK
147e83
+	cmpwi	rTMP, 0		/* If rTMP is 0, no null in word.  */
147e83
+	beq	L(g3)
147e83
+
147e83
+	mr      rALT, rWORD
147e83
+/* We've hit the end of the string.  Do the rest byte-by-byte.  */
147e83
+L(g4):
147e83
+#ifdef __LITTLE_ENDIAN__
147e83
+	rlwinm.	rTMP, rALT, 0, 24, 31
147e83
+	stbu	rALT, 4(rRTN)
147e83
+	beqlr-
147e83
+	rlwinm.	rTMP, rALT, 24, 24, 31
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr-
147e83
+	rlwinm.	rTMP, rALT, 16, 24, 31
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr-
147e83
+	rlwinm	rTMP, rALT, 8, 24, 31
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+#else
147e83
+	rlwinm. rTMP, rALT, 8, 24, 31
147e83
+	stbu    rTMP, 4(rRTN)
147e83
+	beqlr
147e83
+	rlwinm. rTMP, rALT, 16, 24, 31
147e83
+	stbu    rTMP, 1(rRTN)
147e83
+	beqlr
147e83
+	rlwinm. rTMP, rALT, 24, 24, 31
147e83
+	stbu    rTMP, 1(rRTN)
147e83
+	beqlr
147e83
+	stbu    rALT, 1(rRTN)
147e83
+#endif
147e83
+	blr
147e83
+
147e83
+/* Oh well.  In this case, we just do a byte-by-byte copy.  */
147e83
+	.align	4
147e83
+L(unaligned):
147e83
+	lbz	rWORD, 0(rSRC)
147e83
+	addi	rRTN, rRTN, -1
147e83
+	cmpdi	rWORD, 0
147e83
+	beq	L(u2)
147e83
+
147e83
+	.align 	5
147e83
+L(u0):	lbzu	rALT, 1(rSRC)
147e83
+	stbu	rWORD, 1(rRTN)
147e83
+	cmpdi	rALT, 0
147e83
+	beq	L(u1)
147e83
+	lbzu	rWORD, 1(rSRC)
147e83
+	stbu	rALT, 1(rRTN)
147e83
+	cmpdi	rWORD, 0
147e83
+	beq	L(u2)
147e83
+	lbzu	rALT, 1(rSRC)
147e83
+	stbu	rWORD, 1(rRTN)
147e83
+	cmpdi	rALT, 0
147e83
+	beq	L(u1)
147e83
+	lbzu	rWORD, 1(rSRC)
147e83
+	stbu	rALT, 1(rRTN)
147e83
+	cmpdi	rWORD, 0
147e83
+	bne	L(u0)
147e83
+L(u2):	stbu	rWORD, 1(rRTN)
147e83
+	blr
147e83
+L(u1):	stbu	rALT, 1(rRTN)
147e83
+	blr
147e83
+END (FUNC_NAME)
147e83
+
147e83
+#ifndef USE_AS_STPCPY
147e83
+libc_hidden_builtin_def (strcpy)
147e83
+#endif
147e83
diff --git a/sysdeps/powerpc/powerpc64/stpcpy.S b/sysdeps/powerpc/powerpc64/stpcpy.S
147e83
index d795b61..09aa3be 100644
147e83
--- a/sysdeps/powerpc/powerpc64/stpcpy.S
147e83
+++ b/sysdeps/powerpc/powerpc64/stpcpy.S
147e83
@@ -1,5 +1,5 @@
147e83
 /* Optimized stpcpy implementation for PowerPC64.
147e83
-   Copyright (C) 1997, 1999, 2000, 2002, 2004 Free Software Foundation, Inc.
147e83
+   Copyright (C) 1997-2013 Free Software Foundation, Inc.
147e83
    This file is part of the GNU C Library.
147e83
 
147e83
    The GNU C Library is free software; you can redistribute it and/or
147e83
@@ -16,123 +16,9 @@
147e83
    License along with the GNU C Library; if not, see
147e83
    <http://www.gnu.org/licenses/>.  */
147e83
 
147e83
-#include <sysdep.h>
147e83
-#include <bp-sym.h>
147e83
-#include <bp-asm.h>
147e83
+#define USE_AS_STPCPY
147e83
+#include <sysdeps/powerpc/powerpc64/strcpy.S>
147e83
 
147e83
-/* See strlen.s for comments on how the end-of-string testing works.  */
147e83
-
147e83
-/* char * [r3] stpcpy (char *dest [r3], const char *src [r4])  */
147e83
-
147e83
-EALIGN (BP_SYM (__stpcpy), 4, 0)
147e83
-	CALL_MCOUNT 2
147e83
-
147e83
-#define rTMP	r0
147e83
-#define rRTN	r3
147e83
-#if __BOUNDED_POINTERS__
147e83
-# define rDEST	r4		/* pointer to previous word in dest */
147e83
-# define rSRC	r5		/* pointer to previous word in src */
147e83
-# define rLOW	r11
147e83
-# define rHIGH	r12
147e83
-#else
147e83
-# define rDEST	r3		/* pointer to previous word in dest */
147e83
-# define rSRC	r4		/* pointer to previous word in src */
147e83
-#endif
147e83
-#define rWORD	r6		/* current word from src */
147e83
-#define rFEFE	r7		/* 0xfefefeff */
147e83
-#define r7F7F	r8		/* 0x7f7f7f7f */
147e83
-#define rNEG	r9		/* ~(word in src | 0x7f7f7f7f) */
147e83
-#define rALT	r10		/* alternate word from src */
147e83
-
147e83
-	CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH)
147e83
-	CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH)
147e83
-	STORE_RETURN_BOUNDS (rLOW, rHIGH)
147e83
-
147e83
-	or	rTMP, rSRC, rDEST
147e83
-	clrldi.	rTMP, rTMP, 62
147e83
-	addi	rDEST, rDEST, -4
147e83
-	bne	L(unaligned)
147e83
-
147e83
-	lis	rFEFE, -0x101
147e83
-	lis	r7F7F, 0x7f7f
147e83
-	lwz	rWORD, 0(rSRC)
147e83
-	addi	rFEFE, rFEFE, -0x101
147e83
-	addi	r7F7F, r7F7F, 0x7f7f
147e83
-	b	L(g2)
147e83
-
147e83
-L(g0):	lwzu	rALT, 4(rSRC)
147e83
-	stwu	rWORD, 4(rDEST)
147e83
-	add	rTMP, rFEFE, rALT
147e83
-	nor	rNEG, r7F7F, rALT
147e83
-	and.	rTMP, rTMP, rNEG
147e83
-	bne-	L(g1)
147e83
-	lwzu	rWORD, 4(rSRC)
147e83
-	stwu	rALT, 4(rDEST)
147e83
-L(g2):	add	rTMP, rFEFE, rWORD
147e83
-	nor	rNEG, r7F7F, rWORD
147e83
-	and.	rTMP, rTMP, rNEG
147e83
-	beq+	L(g0)
147e83
-
147e83
-	mr	rALT, rWORD
147e83
-/* We've hit the end of the string.  Do the rest byte-by-byte.  */
147e83
-L(g1):
147e83
-#ifdef __LITTLE_ENDIAN__
147e83
-	rlwinm.	rTMP, rALT, 0, 24, 31
147e83
-	stbu	rALT, 4(rDEST)
147e83
-	beqlr-
147e83
-	rlwinm.	rTMP, rALT, 24, 24, 31
147e83
-	stbu	rTMP, 1(rDEST)
147e83
-	beqlr-
147e83
-	rlwinm.	rTMP, rALT, 16, 24, 31
147e83
-	stbu	rTMP, 1(rDEST)
147e83
-	beqlr-
147e83
-	rlwinm	rTMP, rALT, 8, 24, 31
147e83
-	stbu	rTMP, 1(rDEST)
147e83
-	blr
147e83
-#else
147e83
-	rlwinm.	rTMP, rALT, 8, 24, 31
147e83
-	stbu	rTMP, 4(rDEST)
147e83
-	beqlr-
147e83
-	rlwinm.	rTMP, rALT, 16, 24, 31
147e83
-	stbu	rTMP, 1(rDEST)
147e83
-	beqlr-
147e83
-	rlwinm.	rTMP, rALT, 24, 24, 31
147e83
-	stbu	rTMP, 1(rDEST)
147e83
-	beqlr-
147e83
-	stbu	rALT, 1(rDEST)
147e83
-	CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt)
147e83
-	STORE_RETURN_VALUE (rDEST)
147e83
-	blr
147e83
-#endif
147e83
-
147e83
-/* Oh well.  In this case, we just do a byte-by-byte copy.  */
147e83
-	.align 4
147e83
-	nop
147e83
-L(unaligned):
147e83
-	lbz	rWORD, 0(rSRC)
147e83
-	addi	rDEST, rDEST, 3
147e83
-	cmpwi	rWORD, 0
147e83
-	beq-	L(u2)
147e83
-
147e83
-L(u0):	lbzu	rALT, 1(rSRC)
147e83
-	stbu	rWORD, 1(rDEST)
147e83
-	cmpwi	rALT, 0
147e83
-	beq-	L(u1)
147e83
-	nop		/* Let 601 load start of loop.  */
147e83
-	lbzu	rWORD, 1(rSRC)
147e83
-	stbu	rALT, 1(rDEST)
147e83
-	cmpwi	rWORD, 0
147e83
-	bne+	L(u0)
147e83
-L(u2):	stbu	rWORD, 1(rDEST)
147e83
-	CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt)
147e83
-	STORE_RETURN_VALUE (rDEST)
147e83
-	blr
147e83
-L(u1):	stbu	rALT, 1(rDEST)
147e83
-	CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt)
147e83
-	STORE_RETURN_VALUE (rDEST)
147e83
-	blr
147e83
-END (BP_SYM (__stpcpy))
147e83
-
147e83
-weak_alias (BP_SYM (__stpcpy), BP_SYM (stpcpy))
147e83
+weak_alias (__stpcpy, stpcpy)
147e83
 libc_hidden_def (__stpcpy)
147e83
 libc_hidden_builtin_def (stpcpy)
147e83
diff --git a/sysdeps/powerpc/powerpc64/strcpy.S b/sysdeps/powerpc/powerpc64/strcpy.S
147e83
index 9434c27..793325d 100644
147e83
--- a/sysdeps/powerpc/powerpc64/strcpy.S
147e83
+++ b/sysdeps/powerpc/powerpc64/strcpy.S
147e83
@@ -1,5 +1,5 @@
147e83
 /* Optimized strcpy implementation for PowerPC64.
147e83
-   Copyright (C) 1997, 1999, 2000, 2002, 2003, 2011 Free Software Foundation, Inc.
147e83
+   Copyright (C) 1997-2013 Free Software Foundation, Inc.
147e83
    This file is part of the GNU C Library.
147e83
 
147e83
    The GNU C Library is free software; you can redistribute it and/or
147e83
@@ -17,52 +17,43 @@
147e83
    <http://www.gnu.org/licenses/>.  */
147e83
 
147e83
 #include <sysdep.h>
147e83
-#include <bp-sym.h>
147e83
-#include <bp-asm.h>
147e83
 
147e83
 /* See strlen.s for comments on how the end-of-string testing works.  */
147e83
 
147e83
 /* char * [r3] strcpy (char *dest [r3], const char *src [r4])  */
147e83
 
147e83
-EALIGN (BP_SYM (strcpy), 4, 0)
147e83
+#ifdef USE_AS_STPCPY
147e83
+# define FUNC_NAME __stpcpy
147e83
+#else
147e83
+# define FUNC_NAME strcpy
147e83
+#endif
147e83
+
147e83
+EALIGN (FUNC_NAME, 4, 0)
147e83
 	CALL_MCOUNT 2
147e83
 
147e83
 #define rTMP	r0
147e83
-#define rRTN	r3	/* incoming DEST arg preserved as result */
147e83
-/* Note.  The Bounded pointer support in this code is broken.  This code
147e83
-   was inherited from PPC32 and that support was never completed.
147e83
-   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.
147e83
-   These artifacts are left in the code as a reminder in case we need
147e83
-   bounded pointer support in the future.  */
147e83
-#if __BOUNDED_POINTERS__
147e83
-# define rDEST	r4	/* pointer to previous word in dest */
147e83
-# define rSRC	r5	/* pointer to previous word in src */
147e83
-# define rLOW	r11
147e83
-# define rHIGH	r12
147e83
+#ifdef USE_AS_STPCPY
147e83
+#define rRTN    r3      /* pointer to previous word/doubleword in dest */
147e83
 #else
147e83
-# define rSRC	r4	/* pointer to previous word in src */
147e83
-# define rDEST	r5	/* pointer to previous word in dest */
147e83
+#define rRTN    r12     /* pointer to previous word/doubleword in dest */
147e83
 #endif
147e83
+#define rSRC	r4	/* pointer to previous word/doubleword in src */
147e83
 #define rWORD	r6	/* current word from src */
147e83
-#define rFEFE	r7	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
147e83
-#define r7F7F	r8	/* constant 0x7f7f7f7f7f7f7f7f */
147e83
-#define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
147e83
+#define rFEFE	r7	/* constant 0xfefefeff | 0xfefefefefefefeff */
147e83
+#define r7F7F	r8	/* constant 0x7f7f7f7f | 0x7f7f7f7f7f7f7f7f */
147e83
+#define rNEG	r9	/* ~(word in s1 | r7F7F) */
147e83
 #define rALT	r10	/* alternate word from src */
147e83
 
147e83
-	CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH)
147e83
-	CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH)
147e83
-	STORE_RETURN_BOUNDS (rLOW, rHIGH)
147e83
-
147e83
-	dcbt	0,rSRC
147e83
+#ifndef USE_AS_STPCPY
147e83
+/* Save the dst pointer to use as return value.  */
147e83
+	mr      rRTN, r3
147e83
+#endif
147e83
 	or	rTMP, rSRC, rRTN
147e83
 	clrldi.	rTMP, rTMP, 61
147e83
-#if __BOUNDED_POINTERS__
147e83
-	addi	rDEST, rDEST, -8
147e83
-#else
147e83
-	addi	rDEST, rRTN, -8
147e83
-#endif
147e83
-	dcbtst	0,rRTN
147e83
-	bne	L(unaligned)
147e83
+	bne	L(check_word_alignment)
147e83
+
147e83
+/* For doubleword aligned memory, operate using doubleword load and stores.  */
147e83
+	addi	rRTN, rRTN, -8
147e83
 
147e83
 	lis	rFEFE, -0x101
147e83
 	lis	r7F7F, 0x7f7f
147e83
@@ -75,13 +66,13 @@ EALIGN (BP_SYM (strcpy), 4, 0)
147e83
 	b	L(g2)
147e83
 
147e83
 L(g0):	ldu	rALT, 8(rSRC)
147e83
-	stdu	rWORD, 8(rDEST)
147e83
+	stdu	rWORD, 8(rRTN)
147e83
 	add	rTMP, rFEFE, rALT
147e83
 	nor	rNEG, r7F7F, rALT
147e83
 	and.	rTMP, rTMP, rNEG
147e83
 	bne-	L(g1)
147e83
 	ldu	rWORD, 8(rSRC)
147e83
-	stdu	rALT, 8(rDEST)
147e83
+	stdu	rALT, 8(rRTN)
147e83
 L(g2):	add	rTMP, rFEFE, rWORD
147e83
 	nor	rNEG, r7F7F, rWORD
147e83
 	and.	rTMP, rTMP, rNEG
147e83
@@ -92,80 +83,134 @@ L(g2):	add	rTMP, rFEFE, rWORD
147e83
 L(g1):
147e83
 #ifdef __LITTLE_ENDIAN__
147e83
 	extrdi.	rTMP, rALT, 8, 56
147e83
-	stb	rALT, 8(rDEST)
147e83
+	stbu	rALT, 8(rRTN)
147e83
 	beqlr-
147e83
 	extrdi.	rTMP, rALT, 8, 48
147e83
-	stb	rTMP, 9(rDEST)
147e83
+	stbu	rTMP, 1(rRTN)
147e83
 	beqlr-
147e83
 	extrdi.	rTMP, rALT, 8, 40
147e83
-	stb	rTMP, 10(rDEST)
147e83
+	stbu	rTMP, 1(rRTN)
147e83
 	beqlr-
147e83
 	extrdi.	rTMP, rALT, 8, 32
147e83
-	stb	rTMP, 11(rDEST)
147e83
+	stbu	rTMP, 1(rRTN)
147e83
 	beqlr-
147e83
 	extrdi.	rTMP, rALT, 8, 24
147e83
-	stb	rTMP, 12(rDEST)
147e83
+	stbu	rTMP, 1(rRTN)
147e83
 	beqlr-
147e83
 	extrdi.	rTMP, rALT, 8, 16
147e83
-	stb	rTMP, 13(rDEST)
147e83
+	stbu	rTMP, 1(rRTN)
147e83
 	beqlr-
147e83
 	extrdi.	rTMP, rALT, 8, 8
147e83
-	stb	rTMP, 14(rDEST)
147e83
+	stbu	rTMP, 1(rRTN)
147e83
 	beqlr-
147e83
 	extrdi	rTMP, rALT, 8, 0
147e83
-	stb	rTMP, 15(rDEST)
147e83
-	blr
147e83
+	stbu	rTMP, 1(rRTN)
147e83
 #else
147e83
 	extrdi.	rTMP, rALT, 8, 0
147e83
-	stb	rTMP, 8(rDEST)
147e83
+	stbu	rTMP, 8(rRTN)
147e83
 	beqlr-
147e83
 	extrdi.	rTMP, rALT, 8, 8
147e83
-	stb	rTMP, 9(rDEST)
147e83
+	stbu	rTMP, 1(rRTN)
147e83
 	beqlr-
147e83
 	extrdi.	rTMP, rALT, 8, 16
147e83
-	stb	rTMP, 10(rDEST)
147e83
+	stbu	rTMP, 1(rRTN)
147e83
 	beqlr-
147e83
 	extrdi.	rTMP, rALT, 8, 24
147e83
-	stb	rTMP, 11(rDEST)
147e83
+	stbu	rTMP, 1(rRTN)
147e83
 	beqlr-
147e83
 	extrdi.	rTMP, rALT, 8, 32
147e83
-	stb	rTMP, 12(rDEST)
147e83
-	beqlr-
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr
147e83
 	extrdi.	rTMP, rALT, 8, 40
147e83
-	stb	rTMP, 13(rDEST)
147e83
+	stbu	rTMP, 1(rRTN)
147e83
 	beqlr-
147e83
 	extrdi.	rTMP, rALT, 8, 48
147e83
-	stb	rTMP, 14(rDEST)
147e83
+	stbu	rTMP, 1(rRTN)
147e83
 	beqlr-
147e83
-	stb	rALT, 15(rDEST)
147e83
-	/* GKM FIXME: check high bound.  */
147e83
+	stbu	rALT, 1(rRTN)
147e83
+#endif
147e83
 	blr
147e83
+
147e83
+L(check_word_alignment):
147e83
+	clrldi. rTMP, rTMP, 62
147e83
+	bne     L(unaligned)
147e83
+
147e83
+/* For word aligned memory, operate using word load and stores.  */
147e83
+	addi	rRTN, rRTN, -4
147e83
+
147e83
+	lis	rFEFE, -0x101
147e83
+	lis	r7F7F, 0x7f7f
147e83
+	lwz	rWORD, 0(rSRC)
147e83
+	addi	rFEFE, rFEFE, -0x101
147e83
+	addi	r7F7F, r7F7F, 0x7f7f
147e83
+	b	L(g5)
147e83
+
147e83
+L(g3):	lwzu	rALT, 4(rSRC)
147e83
+	stwu	rWORD, 4(rRTN)
147e83
+	add	rTMP, rFEFE, rALT
147e83
+	nor	rNEG, r7F7F, rALT
147e83
+	and.	rTMP, rTMP, rNEG
147e83
+	bne-	L(g4)
147e83
+	lwzu	rWORD, 4(rSRC)
147e83
+	stwu	rALT, 4(rRTN)
147e83
+L(g5):	add	rTMP, rFEFE, rWORD
147e83
+	nor	rNEG, r7F7F, rWORD
147e83
+	and.	rTMP, rTMP, rNEG
147e83
+	beq+	L(g3)
147e83
+
147e83
+	mr	rALT, rWORD
147e83
+/* We've hit the end of the string.  Do the rest byte-by-byte.  */
147e83
+L(g4):
147e83
+#ifdef __LITTLE_ENDIAN__
147e83
+	rlwinm.	rTMP, rALT, 0, 24, 31
147e83
+	stbu	rALT, 4(rRTN)
147e83
+	beqlr-
147e83
+	rlwinm.	rTMP, rALT, 24, 24, 31
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr-
147e83
+	rlwinm.	rTMP, rALT, 16, 24, 31
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr-
147e83
+	rlwinm	rTMP, rALT, 8, 24, 31
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+#else
147e83
+	rlwinm.	rTMP, rALT, 8, 24, 31
147e83
+	stbu	rTMP, 4(rRTN)
147e83
+	beqlr-
147e83
+	rlwinm.	rTMP, rALT, 16, 24, 31
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr-
147e83
+	rlwinm.	rTMP, rALT, 24, 24, 31
147e83
+	stbu	rTMP, 1(rRTN)
147e83
+	beqlr-
147e83
+	stbu	rALT, 1(rRTN)
147e83
 #endif
147e83
+	blr
147e83
 
147e83
 /* Oh well.  In this case, we just do a byte-by-byte copy.  */
147e83
 	.align 4
147e83
 	nop
147e83
 L(unaligned):
147e83
 	lbz	rWORD, 0(rSRC)
147e83
-	addi	rDEST, rRTN, -1
147e83
+	addi	rRTN, rRTN, -1
147e83
 	cmpwi	rWORD, 0
147e83
 	beq-	L(u2)
147e83
 
147e83
 L(u0):	lbzu	rALT, 1(rSRC)
147e83
-	stbu	rWORD, 1(rDEST)
147e83
+	stbu	rWORD, 1(rRTN)
147e83
 	cmpwi	rALT, 0
147e83
 	beq-	L(u1)
147e83
 	nop		/* Let 601 load start of loop.  */
147e83
 	lbzu	rWORD, 1(rSRC)
147e83
-	stbu	rALT, 1(rDEST)
147e83
+	stbu	rALT, 1(rRTN)
147e83
 	cmpwi	rWORD, 0
147e83
 	bne+	L(u0)
147e83
-L(u2):	stb	rWORD, 1(rDEST)
147e83
-	/* GKM FIXME: check high bound.  */
147e83
+L(u2):	stbu	rWORD, 1(rRTN)
147e83
 	blr
147e83
-L(u1):	stb	rALT, 1(rDEST)
147e83
-	/* GKM FIXME: check high bound.  */
147e83
+L(u1):	stbu	rALT, 1(rRTN)
147e83
 	blr
147e83
+END (FUNC_NAME)
147e83
 
147e83
-END (BP_SYM (strcpy))
147e83
+#ifndef USE_AS_STPCPY
147e83
 libc_hidden_builtin_def (strcpy)
147e83
+#endif