summaryrefslogtreecommitdiff
path: root/packages/glibc/2.17/0055-glibc-ppc64le-33.patch
diff options
context:
space:
mode:
Diffstat (limited to 'packages/glibc/2.17/0055-glibc-ppc64le-33.patch')
-rw-r--r--packages/glibc/2.17/0055-glibc-ppc64le-33.patch1255
1 files changed, 1255 insertions, 0 deletions
diff --git a/packages/glibc/2.17/0055-glibc-ppc64le-33.patch b/packages/glibc/2.17/0055-glibc-ppc64le-33.patch
new file mode 100644
index 0000000..9da11df
--- /dev/null
+++ b/packages/glibc/2.17/0055-glibc-ppc64le-33.patch
@@ -0,0 +1,1255 @@
+# commit 466b03933234017473c12dd1d92bda5e7fe49df7
+# Author: Alan Modra <amodra@gmail.com>
+# Date: Sat Aug 17 18:48:36 2013 +0930
+#
+# PowerPC LE memchr and memrchr
+# http://sourceware.org/ml/libc-alpha/2013-08/msg00105.html
+#
+# Like strnlen, memchr and memrchr had a number of defects fixed by this
+# patch as well as adding little-endian support. The first one I
+# noticed was that the entry to the main loop needlessly checked for
+# "are we done yet?" when we know the size is large enough that we can't
+# be done. The second defect I noticed was that the main loop count was
+# wrong, which in turn meant that the small loop needed to handle an
+# extra word. Thirdly, there is nothing to say that the string can't
+# wrap around zero, except of course that we'd normally hit a segfault
+# on trying to read from address zero. Fixing that simplified a number
+# of places:
+#
+# - /* Are we done already? */
+# - addi r9,r8,8
+# - cmpld r9,r7
+# - bge L(null)
+#
+# becomes
+#
+# + cmpld r8,r7
+# + beqlr
+#
+# However, the exit gets an extra test because I test for being on the
+# last word then if so whether the byte offset is less than the end.
+# Overall, the change is a win.
+#
+# Lastly, memrchr used the wrong cache hint.
+#
+# * sysdeps/powerpc/powerpc64/power7/memchr.S: Replace rlwimi with
+# insrdi. Make better use of reg selection to speed exit slightly.
+# Schedule entry path a little better. Remove useless "are we done"
+# checks on entry to main loop. Handle wrapping around zero address.
+# Correct main loop count. Handle single left-over word from main
+# loop inline rather than by using loop_small. Remove extra word
+# case in loop_small caused by wrong loop count. Add little-endian
+# support.
+# * sysdeps/powerpc/powerpc32/power7/memchr.S: Likewise.
+# * sysdeps/powerpc/powerpc64/power7/memrchr.S: Likewise. Use proper
+# cache hint.
+# * sysdeps/powerpc/powerpc32/power7/memrchr.S: Likewise.
+# * sysdeps/powerpc/powerpc64/power7/rawmemchr.S: Add little-endian
+# support. Avoid rlwimi.
+# * sysdeps/powerpc/powerpc32/power7/rawmemchr.S: Likewise.
+#
+diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memchr.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memchr.S
+--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memchr.S 2014-05-29 13:09:17.000000000 -0500
++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memchr.S 2014-05-29 13:13:37.000000000 -0500
+@@ -1,5 +1,5 @@
+ /* Optimized memchr implementation for PowerPC32/POWER7 using cmpb insn.
+- Copyright (C) 2010-2012 Free Software Foundation, Inc.
++ Copyright (C) 2010-2014 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+@@ -18,116 +18,118 @@
+ <http://www.gnu.org/licenses/>. */
+
+ #include <sysdep.h>
+-#include <bp-sym.h>
+-#include <bp-asm.h>
+
+ /* int [r3] memchr (char *s [r3], int byte [r4], int size [r5]) */
+ .machine power7
+-ENTRY (BP_SYM (__memchr))
++ENTRY (__memchr)
+ CALL_MCOUNT
+ dcbt 0,r3
+ clrrwi r8,r3,2
+- rlwimi r4,r4,8,16,23
+- rlwimi r4,r4,16,0,15
++ insrwi r4,r4,8,16 /* Replicate byte to word. */
+ add r7,r3,r5 /* Calculate the last acceptable address. */
++ insrwi r4,r4,16,0
+ cmplwi r5,16
++ li r9, -1
++ rlwinm r6,r3,3,27,28 /* Calculate padding. */
++ addi r7,r7,-1
++#ifdef __LITTLE_ENDIAN__
++ slw r9,r9,r6
++#else
++ srw r9,r9,r6
++#endif
+ ble L(small_range)
+
+- cmplw cr7,r3,r7 /* Compare the starting address (r3) with the
+- ending address (r7). If (r3 >= r7), the size
+- passed in is zero or negative. */
+- ble cr7,L(proceed)
+-
+- li r7,-1 /* Artificially set our ending address (r7)
+- such that we will exit early. */
+-L(proceed):
+- rlwinm r6,r3,3,27,28 /* Calculate padding. */
+- cmpli cr6,r6,0 /* cr6 == Do we have padding? */
+ lwz r12,0(r8) /* Load word from memory. */
+- cmpb r10,r12,r4 /* Check for BYTE's in WORD1. */
+- beq cr6,L(proceed_no_padding)
+- slw r10,r10,r6
+- srw r10,r10,r6
+-L(proceed_no_padding):
+- cmplwi cr7,r10,0 /* If r10 == 0, no BYTEs have been found. */
++ cmpb r3,r12,r4 /* Check for BYTEs in WORD1. */
++ and r3,r3,r9
++ clrlwi r5,r7,30 /* Byte count - 1 in last word. */
++ clrrwi r7,r7,2 /* Address of last word. */
++ cmplwi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */
+ bne cr7,L(done)
+
+- /* Are we done already? */
+- addi r9,r8,4
+- cmplw cr6,r9,r7
+- bge cr6,L(null)
+-
+ mtcrf 0x01,r8
+ /* Are we now aligned to a doubleword boundary? If so, skip to
+ the main loop. Otherwise, go through the alignment code. */
+-
+ bt 29,L(loop_setup)
+
+ /* Handle WORD2 of pair. */
+ lwzu r12,4(r8)
+- cmpb r10,r12,r4
+- cmplwi cr7,r10,0
++ cmpb r3,r12,r4
++ cmplwi cr7,r3,0
+ bne cr7,L(done)
+
+- /* Are we done already? */
+- addi r9,r8,4
+- cmplw cr6,r9,r7
+- bge cr6,L(null)
+-
+ L(loop_setup):
+- sub r5,r7,r9
+- srwi r6,r5,3 /* Number of loop iterations. */
++ /* The last word we want to read in the loop below is the one
++ containing the last byte of the string, ie. the word at
++ (s + size - 1) & ~3, or r7. The first word read is at
++ r8 + 4, we read 2 * cnt words, so the last word read will
++ be at r8 + 4 + 8 * cnt - 4. Solving for cnt gives
++ cnt = (r7 - r8) / 8 */
++ sub r6,r7,r8
++ srwi r6,r6,3 /* Number of loop iterations. */
+ mtctr r6 /* Setup the counter. */
+- b L(loop)
+- /* Main loop to look for BYTE backwards in the string. Since
+- it's a small loop (< 8 instructions), align it to 32-bytes. */
+- .p2align 5
++
++ /* Main loop to look for BYTE in the string. Since
++ it's a small loop (8 instructions), align it to 32-bytes. */
++ .align 5
+ L(loop):
+ /* Load two words, compare and merge in a
+ single register for speed. This is an attempt
+ to speed up the byte-checking process for bigger strings. */
+ lwz r12,4(r8)
+ lwzu r11,8(r8)
+- cmpb r10,r12,r4
++ cmpb r3,r12,r4
+ cmpb r9,r11,r4
+- or r5,r9,r10 /* Merge everything in one word. */
+- cmplwi cr7,r5,0
++ or r6,r9,r3 /* Merge everything in one word. */
++ cmplwi cr7,r6,0
+ bne cr7,L(found)
+ bdnz L(loop)
+
+- /* We're here because the counter reached 0, and that means we
+- didn't have any matches for BYTE in the whole range. */
+- subi r11,r7,4
+- cmplw cr6,r8,r11
+- blt cr6,L(loop_small)
+- b L(null)
++ /* We may have one more dword to read. */
++ cmplw r8,r7
++ beqlr
+
++ lwzu r12,4(r8)
++ cmpb r3,r12,r4
++ cmplwi cr6,r3,0
++ bne cr6,L(done)
++ blr
++
++ .align 4
++L(found):
+ /* OK, one (or both) of the words contains BYTE. Check
+ the first word and decrement the address in case the first
+ word really contains BYTE. */
+- .align 4
+-L(found):
+- cmplwi cr6,r10,0
++ cmplwi cr6,r3,0
+ addi r8,r8,-4
+ bne cr6,L(done)
+
+ /* BYTE must be in the second word. Adjust the address
+- again and move the result of cmpb to r10 so we can calculate the
++ again and move the result of cmpb to r3 so we can calculate the
+ pointer. */
+
+- mr r10,r9
++ mr r3,r9
+ addi r8,r8,4
+
+- /* r10 has the output of the cmpb instruction, that is, it contains
++ /* r3 has the output of the cmpb instruction, that is, it contains
+ 0xff in the same position as BYTE in the original
+ word from the string. Use that to calculate the pointer.
+ We need to make sure BYTE is *before* the end of the range. */
+ L(done):
+- cntlzw r0,r10 /* Count leading zeroes before the match. */
+- srwi r0,r0,3 /* Convert leading zeroes to bytes. */
++#ifdef __LITTLE_ENDIAN__
++ addi r0,r3,-1
++ andc r0,r0,r3
++ popcntw r0,r0 /* Count trailing zeros. */
++#else
++ cntlzw r0,r3 /* Count leading zeros before the match. */
++#endif
++ cmplw r8,r7 /* Are we on the last word? */
++ srwi r0,r0,3 /* Convert leading/trailing zeros to bytes. */
+ add r3,r8,r0
+- cmplw r3,r7
+- bge L(null)
++ cmplw cr7,r0,r5 /* If on the last dword, check byte offset. */
++ bnelr
++ blelr cr7
++ li r3,0
+ blr
+
+ .align 4
+@@ -139,69 +141,44 @@
+ .align 4
+ L(small_range):
+ cmplwi r5,0
+- rlwinm r6,r3,3,27,28 /* Calculate padding. */
+- beq L(null) /* This branch is for the cmplwi r5,0 above */
++ beq L(null)
+ lwz r12,0(r8) /* Load word from memory. */
+- cmplwi cr6,r6,0 /* cr6 == Do we have padding? */
+- cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */
+- beq cr6,L(small_no_padding)
+- slw r10,r10,r6
+- srw r10,r10,r6
+-L(small_no_padding):
+- cmplwi cr7,r10,0
++ cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */
++ and r3,r3,r9
++ cmplwi cr7,r3,0
++ clrlwi r5,r7,30 /* Byte count - 1 in last word. */
++ clrrwi r7,r7,2 /* Address of last word. */
++ cmplw r8,r7 /* Are we done already? */
+ bne cr7,L(done)
++ beqlr
+
+- /* Are we done already? */
+- addi r9,r8,4
+- cmplw r9,r7
+- bge L(null)
+-
+-L(loop_small): /* loop_small has been unrolled. */
+ lwzu r12,4(r8)
+- cmpb r10,r12,r4
+- addi r9,r8,4
+- cmplwi cr6,r10,0
+- cmplw r9,r7
++ cmpb r3,r12,r4
++ cmplwi cr6,r3,0
++ cmplw r8,r7
+ bne cr6,L(done)
+- bge L(null)
++ beqlr
+
+ lwzu r12,4(r8)
+- cmpb r10,r12,r4
+- addi r9,r8,4
+- cmplwi cr6,r10,0
+- cmplw r9,r7
++ cmpb r3,r12,r4
++ cmplwi cr6,r3,0
++ cmplw r8,r7
+ bne cr6,L(done)
+- bge L(null)
++ beqlr
+
+ lwzu r12,4(r8)
+- cmpb r10,r12,r4
+- addi r9,r8,4
+- cmplwi cr6,r10,0
+- cmplw r9,r7
++ cmpb r3,r12,r4
++ cmplwi cr6,r3,0
++ cmplw r8,r7
+ bne cr6,L(done)
+- bge L(null)
++ beqlr
+
+ lwzu r12,4(r8)
+- cmpb r10,r12,r4
+- addi r9,r8,4
+- cmplwi cr6,r10,0
+- cmplw r9,r7
++ cmpb r3,r12,r4
++ cmplwi cr6,r3,0
+ bne cr6,L(done)
+- bge L(null)
+-
+- /* For most cases we will never get here. Under some combinations of
+- padding + length there is a leftover word that still needs to be
+- checked. */
+- lwzu r12,4(r8)
+- cmpb r10,r12,r4
+- addi r9,r8,4
+- cmplwi cr6,r10,0
+- bne cr6,L(done)
+-
+- /* save a branch and exit directly */
+- li r3,0
+ blr
+
+-END (BP_SYM (__memchr))
+-weak_alias (BP_SYM (__memchr), BP_SYM(memchr))
++END (__memchr)
++weak_alias (__memchr, memchr)
+ libc_hidden_builtin_def (memchr)
+diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memrchr.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memrchr.S
+--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memrchr.S 2014-05-29 13:09:17.000000000 -0500
++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memrchr.S 2014-05-29 13:13:47.000000000 -0500
+@@ -1,5 +1,5 @@
+ /* Optimized memrchr implementation for PowerPC32/POWER7 using cmpb insn.
+- Copyright (C) 2010 Free Software Foundation, Inc.
++ Copyright (C) 2010-2014 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+@@ -18,124 +18,136 @@
+ <http://www.gnu.org/licenses/>. */
+
+ #include <sysdep.h>
+-#include <bp-sym.h>
+-#include <bp-asm.h>
+
+ /* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */
+ .machine power7
+-ENTRY (BP_SYM (__memrchr))
++ENTRY (__memrchr)
+ CALL_MCOUNT
+- dcbt 0,r3
+- mr r7,r3
+- add r3,r7,r5 /* Calculate the last acceptable address. */
+- cmplw cr7,r3,r7 /* Is the address equal or less than r3? */
++ add r7,r3,r5 /* Calculate the last acceptable address. */
++ neg r0,r7
++ addi r7,r7,-1
++ mr r10,r3
++ clrrwi r6,r7,7
++ li r9,3<<5
++ dcbt r9,r6,16 /* Stream hint, decreasing addresses. */
+
+ /* Replicate BYTE to word. */
+- rlwimi r4,r4,8,16,23
+- rlwimi r4,r4,16,0,15
+- bge cr7,L(proceed)
+-
+- li r3,-1 /* Make r11 the biggest if r4 <= 0. */
+-L(proceed):
++ insrwi r4,r4,8,16
++ insrwi r4,r4,16,0
+ li r6,-4
+- addi r9,r3,-1
+- clrrwi r8,r9,2
+- addi r8,r8,4
+- neg r0,r3
++ li r9,-1
+ rlwinm r0,r0,3,27,28 /* Calculate padding. */
+-
++ clrrwi r8,r7,2
++ srw r9,r9,r0
+ cmplwi r5,16
++ clrrwi r0,r10,2
+ ble L(small_range)
+
+- lwbrx r12,r8,r6 /* Load reversed word from memory. */
+- cmpb r10,r12,r4 /* Check for BYTE in WORD1. */
+- slw r10,r10,r0
+- srw r10,r10,r0
+- cmplwi cr7,r10,0 /* If r10 == 0, no BYTE's have been found. */
++#ifdef __LITTLE_ENDIAN__
++ lwzx r12,0,r8
++#else
++ lwbrx r12,0,r8 /* Load reversed word from memory. */
++#endif
++ cmpb r3,r12,r4 /* Check for BYTE in WORD1. */
++ and r3,r3,r9
++ cmplwi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */
+ bne cr7,L(done)
+
+- /* Are we done already? */
+- addi r9,r8,-4
+- cmplw cr6,r9,r7
+- ble cr6,L(null)
+-
+ mtcrf 0x01,r8
+ /* Are we now aligned to a doubleword boundary? If so, skip to
+ the main loop. Otherwise, go through the alignment code. */
+- mr r8,r9
+- bt 29,L(loop_setup)
++ bf 29,L(loop_setup)
+
+ /* Handle WORD2 of pair. */
++#ifdef __LITTLE_ENDIAN__
++ lwzx r12,r8,r6
++#else
+ lwbrx r12,r8,r6
+- cmpb r10,r12,r4
+- cmplwi cr7,r10,0
+- bne cr7,L(done)
+-
+- /* Are we done already? */
++#endif
+ addi r8,r8,-4
+- cmplw cr6,r8,r7
+- ble cr6,L(null)
++ cmpb r3,r12,r4
++ cmplwi cr7,r3,0
++ bne cr7,L(done)
+
+ L(loop_setup):
+- li r0,-8
+- sub r5,r8,r7
+- srwi r9,r5,3 /* Number of loop iterations. */
++ /* The last word we want to read in the loop below is the one
++ containing the first byte of the string, ie. the word at
++ s & ~3, or r0. The first word read is at r8 - 4, we
++ read 2 * cnt words, so the last word read will be at
++ r8 - 4 - 8 * cnt + 4. Solving for cnt gives
++ cnt = (r8 - r0) / 8 */
++ sub r5,r8,r0
++ addi r8,r8,-4
++ srwi r9,r5,3 /* Number of loop iterations. */
+ mtctr r9 /* Setup the counter. */
+- b L(loop)
+- /* Main loop to look for BYTE backwards in the string. Since it's a
+- small loop (< 8 instructions), align it to 32-bytes. */
+- .p2align 5
++
++ /* Main loop to look for BYTE backwards in the string.
++ FIXME: Investigate whether 32 byte align helps with this
++ 9 instruction loop. */
++ .align 5
+ L(loop):
+ /* Load two words, compare and merge in a
+ single register for speed. This is an attempt
+ to speed up the byte-checking process for bigger strings. */
+
+- lwbrx r12,r8,r6
+- lwbrx r11,r8,r0
+- addi r8,r8,-4
+- cmpb r10,r12,r4
++#ifdef __LITTLE_ENDIAN__
++ lwzx r12,0,r8
++ lwzx r11,r8,r6
++#else
++ lwbrx r12,0,r8
++ lwbrx r11,r8,r6
++#endif
++ cmpb r3,r12,r4
+ cmpb r9,r11,r4
+- or r5,r9,r10 /* Merge everything in one word. */
++ or r5,r9,r3 /* Merge everything in one word. */
+ cmplwi cr7,r5,0
+ bne cr7,L(found)
+- addi r8,r8,-4
++ addi r8,r8,-8
+ bdnz L(loop)
+- /* We're here because the counter reached 0, and that means we
+- didn't have any matches for BYTE in the whole range. Just return
+- the original range. */
+- addi r9,r8,4
+- cmplw cr6,r9,r7
+- bgt cr6,L(loop_small)
+- b L(null)
+
+- /* OK, one (or both) of the words contains BYTE. Check
+- the first word and decrement the address in case the first
+- word really contains BYTE. */
++ /* We may have one more word to read. */
++ cmplw r8,r0
++ bnelr
++
++#ifdef __LITTLE_ENDIAN__
++ lwzx r12,0,r8
++#else
++ lwbrx r12,0,r8
++#endif
++ cmpb r3,r12,r4
++ cmplwi cr7,r3,0
++ bne cr7,L(done)
++ blr
++
+ .align 4
+ L(found):
+- cmplwi cr6,r10,0
+- addi r8,r8,4
++ /* OK, one (or both) of the words contains BYTE. Check
++ the first word. */
++ cmplwi cr6,r3,0
+ bne cr6,L(done)
+
+ /* BYTE must be in the second word. Adjust the address
+- again and move the result of cmpb to r10 so we can calculate the
++ again and move the result of cmpb to r3 so we can calculate the
+ pointer. */
+
+- mr r10,r9
++ mr r3,r9
+ addi r8,r8,-4
+
+- /* r10 has the output of the cmpb instruction, that is, it contains
++ /* r3 has the output of the cmpb instruction, that is, it contains
+ 0xff in the same position as BYTE in the original
+ word from the string. Use that to calculate the pointer.
+ We need to make sure BYTE is *before* the end of the
+ range. */
+ L(done):
+- cntlzw r0,r10 /* Count leading zeroes before the match. */
+- srwi r6,r0,3 /* Convert leading zeroes to bytes. */
+- addi r0,r6,1
++ cntlzw r9,r3 /* Count leading zeros before the match. */
++ cmplw r8,r0 /* Are we on the last word? */
++ srwi r6,r9,3 /* Convert leading zeros to bytes. */
++ addi r0,r6,-3
+ sub r3,r8,r0
+- cmplw r3,r7
+- blt L(null)
++ cmplw cr7,r3,r10
++ bnelr
++ bgelr cr7
++ li r3,0
+ blr
+
+ .align 4
+@@ -149,29 +161,36 @@
+ cmplwi r5,0
+ beq L(null)
+
+- lwbrx r12,r8,r6 /* Load reversed word from memory. */
+- cmpb r10,r12,r4 /* Check for null bytes in WORD1. */
+- slw r10,r10,r0
+- srw r10,r10,r0
+- cmplwi cr7,r10,0
++#ifdef __LITTLE_ENDIAN__
++ lwzx r12,0,r8
++#else
++ lwbrx r12,0,r8 /* Load reversed word from memory. */
++#endif
++ cmpb r3,r12,r4 /* Check for BYTE in WORD1. */
++ and r3,r3,r9
++ cmplwi cr7,r3,0
+ bne cr7,L(done)
+
++ /* Are we done already? */
++ cmplw r8,r0
+ addi r8,r8,-4
+- cmplw r8,r7
+- ble L(null)
+- b L(loop_small)
++ beqlr
+
+- .p2align 5
++ .align 5
+ L(loop_small):
+- lwbrx r12,r8,r6
+- cmpb r10,r12,r4
+- cmplwi cr6,r10,0
+- bne cr6,L(done)
++#ifdef __LITTLE_ENDIAN__
++ lwzx r12,0,r8
++#else
++ lwbrx r12,0,r8
++#endif
++ cmpb r3,r12,r4
++ cmplw r8,r0
++ cmplwi cr7,r3,0
++ bne cr7,L(done)
+ addi r8,r8,-4
+- cmplw r8,r7
+- ble L(null)
+- b L(loop_small)
++ bne L(loop_small)
++ blr
+
+-END (BP_SYM (__memrchr))
+-weak_alias (BP_SYM (__memrchr), BP_SYM(memrchr))
++END (__memrchr)
++weak_alias (__memrchr, memrchr)
+ libc_hidden_builtin_def (memrchr)
+diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/rawmemchr.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/rawmemchr.S
+--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/rawmemchr.S 2014-05-29 13:09:17.000000000 -0500
++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/rawmemchr.S 2014-05-29 13:09:19.000000000 -0500
+@@ -29,16 +29,21 @@
+ clrrwi r8,r3,2 /* Align the address to word boundary. */
+
+ /* Replicate byte to word. */
+- rlwimi r4,r4,8,16,23
+- rlwimi r4,r4,16,0,15
++ rldimi r4,r4,8,48
++ rldimi r4,r4,16,32
+
+ /* Now r4 has a word of c bytes. */
+
+ rlwinm r6,r3,3,27,28 /* Calculate padding. */
+ lwz r12,0(r8) /* Load word from memory. */
+ cmpb r5,r12,r4 /* Compare each byte against c byte. */
++#ifdef __LITTLE_ENDIAN__
++ srw r5,r5,r6
++ slw r5,r5,r6
++#else
+ slw r5,r5,r6 /* Move left to discard ignored bits. */
+ srw r5,r5,r6 /* Bring the bits back as zeros. */
++#endif
+ cmpwi cr7,r5,0 /* If r5 == 0, no c bytes have been found. */
+ bne cr7,L(done)
+
+@@ -92,8 +97,14 @@
+ word from the string. Use that fact to find out what is
+ the position of the byte inside the string. */
+ L(done):
++#ifdef __LITTLE_ENDIAN__
++ addi r0,r5,-1
++ andc r0,r0,r5
++ popcntw r0,r0
++#else
+ cntlzw r0,r5 /* Count leading zeros before the match. */
+- srwi r0,r0,3 /* Convert leading zeroes to bytes. */
++#endif
++ srwi r0,r0,3 /* Convert leading zeros to bytes. */
+ add r3,r8,r0 /* Return address of the matching char. */
+ blr
+ END (BP_SYM (__rawmemchr))
+diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memchr.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memchr.S
+--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memchr.S 2014-05-29 13:09:17.000000000 -0500
++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memchr.S 2014-05-29 13:13:57.000000000 -0500
+@@ -1,5 +1,5 @@
+ /* Optimized memchr implementation for PowerPC64/POWER7 using cmpb insn.
+- Copyright (C) 2010-2012 Free Software Foundation, Inc.
++ Copyright (C) 2010-2014 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+@@ -18,118 +18,119 @@
+ <http://www.gnu.org/licenses/>. */
+
+ #include <sysdep.h>
+-#include <bp-sym.h>
+-#include <bp-asm.h>
+
+ /* int [r3] memchr (char *s [r3], int byte [r4], int size [r5]) */
+ .machine power7
+-ENTRY (BP_SYM (__memchr))
+- CALL_MCOUNT 2
++ENTRY (__memchr)
++ CALL_MCOUNT 3
+ dcbt 0,r3
+ clrrdi r8,r3,3
+- rlwimi r4,r4,8,16,23
+- rlwimi r4,r4,16,0,15
++ insrdi r4,r4,8,48
+ add r7,r3,r5 /* Calculate the last acceptable address. */
++ insrdi r4,r4,16,32
+ cmpldi r5,32
++ li r9, -1
++ rlwinm r6,r3,3,26,28 /* Calculate padding. */
+ insrdi r4,r4,32,0
++ addi r7,r7,-1
++#ifdef __LITTLE_ENDIAN__
++ sld r9,r9,r6
++#else
++ srd r9,r9,r6
++#endif
+ ble L(small_range)
+
+- cmpld cr7,r3,r7 /* Compare the starting address (r3) with the
+- ending address (r7). If (r3 >= r7),
+- the size passed in was zero or negative. */
+- ble cr7,L(proceed)
+-
+- li r7,-1 /* Artificially set our ending address (r7)
+- such that we will exit early. */
+-
+-L(proceed):
+- rlwinm r6,r3,3,26,28 /* Calculate padding. */
+- cmpldi cr6,r6,0 /* cr6 == Do we have padding? */
+ ld r12,0(r8) /* Load doubleword from memory. */
+- cmpb r10,r12,r4 /* Check for BYTEs in DWORD1. */
+- beq cr6,L(proceed_no_padding)
+- sld r10,r10,r6
+- srd r10,r10,r6
+-L(proceed_no_padding):
+- cmpldi cr7,r10,0 /* Does r10 indicate we got a hit? */
++ cmpb r3,r12,r4 /* Check for BYTEs in DWORD1. */
++ and r3,r3,r9
++ clrldi r5,r7,61 /* Byte count - 1 in last dword. */
++ clrrdi r7,r7,3 /* Address of last doubleword. */
++ cmpldi cr7,r3,0 /* Does r3 indicate we got a hit? */
+ bne cr7,L(done)
+
+- /* See if we are at the last acceptable address yet. */
+- addi r9,r8,8
+- cmpld cr6,r9,r7
+- bge cr6,L(null)
+-
+ mtcrf 0x01,r8
+ /* Are we now aligned to a quadword boundary? If so, skip to
+ the main loop. Otherwise, go through the alignment code. */
+-
+ bt 28,L(loop_setup)
+
+ /* Handle DWORD2 of pair. */
+ ldu r12,8(r8)
+- cmpb r10,r12,r4
+- cmpldi cr7,r10,0
++ cmpb r3,r12,r4
++ cmpldi cr7,r3,0
+ bne cr7,L(done)
+
+- /* Are we done already? */
+- addi r9,r8,8
+- cmpld cr6,r9,r7
+- bge cr6,L(null)
+-
+ L(loop_setup):
+- sub r5,r7,r9
+- srdi r6,r5,4 /* Number of loop iterations. */
++ /* The last dword we want to read in the loop below is the one
++ containing the last byte of the string, ie. the dword at
++ (s + size - 1) & ~7, or r7. The first dword read is at
++ r8 + 8, we read 2 * cnt dwords, so the last dword read will
++ be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives
++ cnt = (r7 - r8) / 16 */
++ sub r6,r7,r8
++ srdi r6,r6,4 /* Number of loop iterations. */
+ mtctr r6 /* Setup the counter. */
+- b L(loop)
+- /* Main loop to look for BYTE backwards in the string. Since
+- it's a small loop (< 8 instructions), align it to 32-bytes. */
+- .p2align 5
++
++ /* Main loop to look for BYTE in the string. Since
++ it's a small loop (8 instructions), align it to 32-bytes. */
++ .align 5
+ L(loop):
+ /* Load two doublewords, compare and merge in a
+ single register for speed. This is an attempt
+ to speed up the byte-checking process for bigger strings. */
+ ld r12,8(r8)
+ ldu r11,16(r8)
+- cmpb r10,r12,r4
++ cmpb r3,r12,r4
+ cmpb r9,r11,r4
+- or r5,r9,r10 /* Merge everything in one doubleword. */
+- cmpldi cr7,r5,0
++ or r6,r9,r3 /* Merge everything in one doubleword. */
++ cmpldi cr7,r6,0
+ bne cr7,L(found)
+ bdnz L(loop)
+
+- /* We're here because the counter reached 0, and that means we
+- didn't have any matches for BYTE in the whole range. */
+- subi r11,r7,8
+- cmpld cr6,r8,r11
+- blt cr6,L(loop_small)
+- b L(null)
++ /* We may have one more dword to read. */
++ cmpld r8,r7
++ beqlr
+
++ ldu r12,8(r8)
++ cmpb r3,r12,r4
++ cmpldi cr6,r3,0
++ bne cr6,L(done)
++ blr
++
++ .align 4
++L(found):
+ /* OK, one (or both) of the doublewords contains BYTE. Check
+ the first doubleword and decrement the address in case the first
+ doubleword really contains BYTE. */
+- .align 4
+-L(found):
+- cmpldi cr6,r10,0
++ cmpldi cr6,r3,0
+ addi r8,r8,-8
+ bne cr6,L(done)
+
+ /* BYTE must be in the second doubleword. Adjust the address
+- again and move the result of cmpb to r10 so we can calculate the
++ again and move the result of cmpb to r3 so we can calculate the
+ pointer. */
+
+- mr r10,r9
++ mr r3,r9
+ addi r8,r8,8
+
+- /* r10 has the output of the cmpb instruction, that is, it contains
++ /* r3 has the output of the cmpb instruction, that is, it contains
+ 0xff in the same position as BYTE in the original
+ doubleword from the string. Use that to calculate the pointer.
+ We need to make sure BYTE is *before* the end of the range. */
+ L(done):
+- cntlzd r0,r10 /* Count leading zeroes before the match. */
+- srdi r0,r0,3 /* Convert leading zeroes to bytes. */
++#ifdef __LITTLE_ENDIAN__
++ addi r0,r3,-1
++ andc r0,r0,r3
++ popcntd r0,r0 /* Count trailing zeros. */
++#else
++ cntlzd r0,r3 /* Count leading zeros before the match. */
++#endif
++ cmpld r8,r7 /* Are we on the last dword? */
++ srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */
+ add r3,r8,r0
+- cmpld r3,r7
+- bge L(null)
++ cmpld cr7,r0,r5 /* If on the last dword, check byte offset. */
++ bnelr
++ blelr cr7
++ li r3,0
+ blr
+
+ .align 4
+@@ -141,67 +142,44 @@
+ .align 4
+ L(small_range):
+ cmpldi r5,0
+- rlwinm r6,r3,3,26,28 /* Calculate padding. */
+- beq L(null) /* This branch is for the cmpldi r5,0 above. */
++ beq L(null)
+ ld r12,0(r8) /* Load word from memory. */
+- cmpldi cr6,r6,0 /* cr6 == Do we have padding? */
+- cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */
+- /* If no padding, skip the shifts. */
+- beq cr6,L(small_no_padding)
+- sld r10,r10,r6
+- srd r10,r10,r6
+-L(small_no_padding):
+- cmpldi cr7,r10,0
++ cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */
++ and r3,r3,r9
++ cmpldi cr7,r3,0
++ clrldi r5,r7,61 /* Byte count - 1 in last dword. */
++ clrrdi r7,r7,3 /* Address of last doubleword. */
++ cmpld r8,r7 /* Are we done already? */
+ bne cr7,L(done)
+-
+- /* Are we done already? */
+- addi r9,r8,8
+- cmpld r9,r7
+- bge L(null)
+- /* If we're not done, drop through into loop_small. */
+-
+-L(loop_small): /* loop_small has been unrolled. */
+- ldu r12,8(r8)
+- cmpb r10,r12,r4
+- addi r9,r8,8
+- cmpldi cr6,r10,0
+- cmpld r9,r7
+- bne cr6,L(done) /* Found something. */
+- bge L(null) /* Hit end of string (length). */
++ beqlr
+
+ ldu r12,8(r8)
+- cmpb r10,r12,r4
+- addi r9,r8,8
+- cmpldi cr6,r10,0
+- cmpld r9,r7
++ cmpb r3,r12,r4
++ cmpldi cr6,r3,0
++ cmpld r8,r7
+ bne cr6,L(done) /* Found something. */
+- bge L(null)
++ beqlr /* Hit end of string (length). */
+
+ ldu r12,8(r8)
+- subi r11,r7,8
+- cmpb r10,r12,r4
+- cmpldi cr6,r10,0
+- ori r2,r2,0 /* Force a dispatch group. */
++ cmpb r3,r12,r4
++ cmpldi cr6,r3,0
++ cmpld r8,r7
+ bne cr6,L(done)
++ beqlr
+
+- cmpld r8,r11 /* At end of range? */
+- bge L(null)
+-
+- /* For most cases we will never get here. Under some combinations of
+- padding + length there is a leftover double that still needs to be
+- checked. */
+- ldu r12,8(r8)
+- cmpb r10,r12,r4
+- addi r9,r8,8
+- cmpldi cr6,r10,0
+- cmpld r9,r7
+- bne cr6,L(done) /* Found something. */
++ ldu r12,8(r8)
++ cmpb r3,r12,r4
++ cmpldi cr6,r3,0
++ cmpld r8,r7
++ bne cr6,L(done)
++ beqlr
+
+- /* Save a branch and exit directly. */
+- li r3,0
++ ldu r12,8(r8)
++ cmpb r3,r12,r4
++ cmpldi cr6,r3,0
++ bne cr6,L(done)
+ blr
+
+-
+-END (BP_SYM (__memchr))
+-weak_alias (BP_SYM (__memchr), BP_SYM(memchr))
++END (__memchr)
++weak_alias (__memchr, memchr)
+ libc_hidden_builtin_def (memchr)
+diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memrchr.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memrchr.S
+--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memrchr.S 2014-05-29 13:09:17.000000000 -0500
++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memrchr.S 2014-05-29 13:14:06.000000000 -0500
+@@ -1,5 +1,5 @@
+ /* Optimized memrchr implementation for PowerPC64/POWER7 using cmpb insn.
+- Copyright (C) 2010 Free Software Foundation, Inc.
++ Copyright (C) 2010-2014 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+@@ -18,125 +18,137 @@
+ <http://www.gnu.org/licenses/>. */
+
+ #include <sysdep.h>
+-#include <bp-sym.h>
+-#include <bp-asm.h>
+
+ /* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */
+ .machine power7
+-ENTRY (BP_SYM (__memrchr))
+- CALL_MCOUNT
+- dcbt 0,r3
+- mr r7,r3
+- add r3,r7,r5 /* Calculate the last acceptable address. */
+- cmpld cr7,r3,r7 /* Is the address equal or less than r3? */
++ENTRY (__memrchr)
++ CALL_MCOUNT 3
++ add r7,r3,r5 /* Calculate the last acceptable address. */
++ neg r0,r7
++ addi r7,r7,-1
++ mr r10,r3
++ clrrdi r6,r7,7
++ li r9,3<<5
++ dcbt r9,r6,8 /* Stream hint, decreasing addresses. */
+
+ /* Replicate BYTE to doubleword. */
+- rlwimi r4,r4,8,16,23
+- rlwimi r4,r4,16,0,15
++ insrdi r4,r4,8,48
++ insrdi r4,r4,16,32
+ insrdi r4,r4,32,0
+- bge cr7,L(proceed)
+-
+- li r3,-1 /* Make r11 the biggest if r4 <= 0. */
+-L(proceed):
+ li r6,-8
+- addi r9,r3,-1
+- clrrdi r8,r9,3
+- addi r8,r8,8
+- neg r0,r3
++ li r9,-1
+ rlwinm r0,r0,3,26,28 /* Calculate padding. */
+-
++ clrrdi r8,r7,3
++ srd r9,r9,r0
+ cmpldi r5,32
++ clrrdi r0,r10,3
+ ble L(small_range)
+
+- ldbrx r12,r8,r6 /* Load reversed doubleword from memory. */
+- cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */
+- sld r10,r10,r0
+- srd r10,r10,r0
+- cmpldi cr7,r10,0 /* If r10 == 0, no BYTE's have been found. */
++#ifdef __LITTLE_ENDIAN__
++ ldx r12,0,r8
++#else
++ ldbrx r12,0,r8 /* Load reversed doubleword from memory. */
++#endif
++ cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */
++ and r3,r3,r9
++ cmpldi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */
+ bne cr7,L(done)
+
+- /* Are we done already? */
+- addi r9,r8,-8
+- cmpld cr6,r9,r7
+- ble cr6,L(null)
+-
+ mtcrf 0x01,r8
+- /* Are we now aligned to a doubleword boundary? If so, skip to
++ /* Are we now aligned to a quadword boundary? If so, skip to
+ the main loop. Otherwise, go through the alignment code. */
+- mr r8,r9
+- bt 28,L(loop_setup)
++ bf 28,L(loop_setup)
+
+ /* Handle DWORD2 of pair. */
++#ifdef __LITTLE_ENDIAN__
++ ldx r12,r8,r6
++#else
+ ldbrx r12,r8,r6
+- cmpb r10,r12,r4
+- cmpldi cr7,r10,0
+- bne cr7,L(done)
+-
+- /* Are we done already. */
++#endif
+ addi r8,r8,-8
+- cmpld cr6,r8,r7
+- ble cr6,L(null)
++ cmpb r3,r12,r4
++ cmpldi cr7,r3,0
++ bne cr7,L(done)
+
+ L(loop_setup):
+- li r0,-16
+- sub r5,r8,r7
+- srdi r9,r5,4 /* Number of loop iterations. */
++ /* The last dword we want to read in the loop below is the one
++ containing the first byte of the string, ie. the dword at
++ s & ~7, or r0. The first dword read is at r8 - 8, we
++ read 2 * cnt dwords, so the last dword read will be at
++ r8 - 8 - 16 * cnt + 8. Solving for cnt gives
++ cnt = (r8 - r0) / 16 */
++ sub r5,r8,r0
++ addi r8,r8,-8
++ srdi r9,r5,4 /* Number of loop iterations. */
+ mtctr r9 /* Setup the counter. */
+- b L(loop)
+- /* Main loop to look for BYTE backwards in the string. Since it's a
+- small loop (< 8 instructions), align it to 32-bytes. */
+- .p2align 5
++
++ /* Main loop to look for BYTE backwards in the string.
++ FIXME: Investigate whether 32 byte align helps with this
++ 9 instruction loop. */
++ .align 5
+ L(loop):
+ /* Load two doublewords, compare and merge in a
+ single register for speed. This is an attempt
+ to speed up the byte-checking process for bigger strings. */
+
+- ldbrx r12,r8,r6
+- ldbrx r11,r8,r0
+- addi r8,r8,-8
+- cmpb r10,r12,r4
++#ifdef __LITTLE_ENDIAN__
++ ldx r12,0,r8
++ ldx r11,r8,r6
++#else
++ ldbrx r12,0,r8
++ ldbrx r11,r8,r6
++#endif
++ cmpb r3,r12,r4
+ cmpb r9,r11,r4
+- or r5,r9,r10 /* Merge everything in one doubleword. */
++ or r5,r9,r3 /* Merge everything in one doubleword. */
+ cmpldi cr7,r5,0
+ bne cr7,L(found)
+- addi r8,r8,-8
++ addi r8,r8,-16
+ bdnz L(loop)
+- /* We're here because the counter reached 0, and that means we
+- didn't have any matches for BYTE in the whole range. Just return
+- the original range. */
+- addi r9,r8,8
+- cmpld cr6,r9,r7
+- bgt cr6,L(loop_small)
+- b L(null)
+-
+- /* OK, one (or both) of the words contains BYTE. Check
+- the first word and decrement the address in case the first
+- word really contains BYTE. */
++
++ /* We may have one more word to read. */
++ cmpld r8,r0
++ bnelr
++
++#ifdef __LITTLE_ENDIAN__
++ ldx r12,0,r8
++#else
++ ldbrx r12,0,r8
++#endif
++ cmpb r3,r12,r4
++ cmpldi cr7,r3,0
++ bne cr7,L(done)
++ blr
++
+ .align 4
+ L(found):
+- cmpldi cr6,r10,0
+- addi r8,r8,8
++ /* OK, one (or both) of the dwords contains BYTE. Check
++ the first dword. */
++ cmpldi cr6,r3,0
+ bne cr6,L(done)
+
+ /* BYTE must be in the second word. Adjust the address
+- again and move the result of cmpb to r10 so we can calculate the
++ again and move the result of cmpb to r3 so we can calculate the
+ pointer. */
+
+- mr r10,r9
++ mr r3,r9
+ addi r8,r8,-8
+
+- /* r10 has the output of the cmpb instruction, that is, it contains
+- 0xff in the same position as the BYTE in the original
++ /* r3 has the output of the cmpb instruction, that is, it contains
++ 0xff in the same position as BYTE in the original
+ word from the string. Use that to calculate the pointer.
+ We need to make sure BYTE is *before* the end of the
+ range. */
+ L(done):
+- cntlzd r0,r10 /* Count leading zeroes before the match. */
+- srdi r6,r0,3 /* Convert leading zeroes to bytes. */
+- addi r0,r6,1
++ cntlzd r9,r3 /* Count leading zeros before the match. */
++ cmpld r8,r0 /* Are we on the last word? */
++ srdi r6,r9,3 /* Convert leading zeros to bytes. */
++ addi r0,r6,-7
+ sub r3,r8,r0
+- cmpld r3,r7
+- blt L(null)
++ cmpld cr7,r3,r10
++ bnelr
++ bgelr cr7
++ li r3,0
+ blr
+
+ .align 4
+@@ -150,30 +162,36 @@
+ cmpldi r5,0
+ beq L(null)
+
+- ldbrx r12,r8,r6 /* Load reversed doubleword from memory. */
+- cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */
+- sld r10,r10,r0
+- srd r10,r10,r0
+- cmpldi cr7,r10,0
++#ifdef __LITTLE_ENDIAN__
++ ldx r12,0,r8
++#else
++ ldbrx r12,0,r8 /* Load reversed doubleword from memory. */
++#endif
++ cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */
++ and r3,r3,r9
++ cmpldi cr7,r3,0
+ bne cr7,L(done)
+
+ /* Are we done already? */
++ cmpld r8,r0
+ addi r8,r8,-8
+- cmpld r8,r7
+- ble L(null)
+- b L(loop_small)
++ beqlr
+
+- .p2align 5
++ .align 5
+ L(loop_small):
+- ldbrx r12,r8,r6
+- cmpb r10,r12,r4
+- cmpldi cr6,r10,0
+- bne cr6,L(done)
++#ifdef __LITTLE_ENDIAN__
++ ldx r12,0,r8
++#else
++ ldbrx r12,0,r8
++#endif
++ cmpb r3,r12,r4
++ cmpld r8,r0
++ cmpldi cr7,r3,0
++ bne cr7,L(done)
+ addi r8,r8,-8
+- cmpld r8,r7
+- ble L(null)
+- b L(loop_small)
++ bne L(loop_small)
++ blr
+
+-END (BP_SYM (__memrchr))
+-weak_alias (BP_SYM (__memrchr), BP_SYM(memrchr))
++END (__memrchr)
++weak_alias (__memrchr, memrchr)
+ libc_hidden_builtin_def (memrchr)
+diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/rawmemchr.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
+--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/rawmemchr.S 2014-05-29 13:09:17.000000000 -0500
++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/rawmemchr.S 2014-05-29 13:09:19.000000000 -0500
+@@ -29,8 +29,8 @@
+ clrrdi r8,r3,3 /* Align the address to doubleword boundary. */
+
+ /* Replicate byte to doubleword. */
+- rlwimi r4,r4,8,16,23
+- rlwimi r4,r4,16,0,15
++ insrdi r4,r4,8,48
++ insrdi r4,r4,16,32
+ insrdi r4,r4,32,0
+
+ /* Now r4 has a doubleword of c bytes. */
+@@ -38,8 +38,13 @@
+ rlwinm r6,r3,3,26,28 /* Calculate padding. */
+ ld r12,0(r8) /* Load doubleword from memory. */
+ cmpb r5,r12,r4 /* Compare each byte against c byte. */
++#ifdef __LITTLE_ENDIAN__
++ srd r5,r5,r6
++ sld r5,r5,r6
++#else
+ sld r5,r5,r6 /* Move left to discard ignored bits. */
+ srd r5,r5,r6 /* Bring the bits back as zeros. */
++#endif
+ cmpdi cr7,r5,0 /* If r5 == 0, no c bytes have been found. */
+ bne cr7,L(done)
+
+@@ -93,8 +98,14 @@
+ doubleword from the string. Use that fact to find out what is
+ the position of the byte inside the string. */
+ L(done):
++#ifdef __LITTLE_ENDIAN__
++ addi r0,r5,-1
++ andc r0,r0,r5
++ popcntd r0,r0 /* Count trailing zeros. */
++#else
+ cntlzd r0,r5 /* Count leading zeros before the match. */
+- srdi r0,r0,3 /* Convert leading zeroes to bytes. */
++#endif
++ srdi r0,r0,3 /* Convert leading zeros to bytes. */
+ add r3,r8,r0 /* Return address of the matching char. */
+ blr
+ END (BP_SYM (__rawmemchr))