From 798904409cfb7e6b481a290b776b7f178c9036bf Mon Sep 17 00:00:00 2001 From: messense Date: Thu, 13 May 2021 11:35:09 +0800 Subject: Add ppc64le patches for glibc 2.17 from CentOS git diff --git a/packages/glibc/2.17/0019-glibc-rh731833-misc-5.patch b/packages/glibc/2.17/0019-glibc-rh731833-misc-5.patch new file mode 100644 index 0000000..ad46fe5 --- /dev/null +++ b/packages/glibc/2.17/0019-glibc-rh731833-misc-5.patch @@ -0,0 +1,31 @@ +From 920e759ea4f48ca9c8b4dba6dfe5c88d27033121 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Mon, 17 Jun 2013 15:50:53 -0500 +Subject: [PATCH 36/42] PowerPC: Reserve TCB space for EBB framework + +This patch reserves four pointer to be used in future Event-Based +Branch framework for PowerPC. +(cherry picked from commit e55a9b256d53c7fc5145e3e4d338d3741b23e232) +--- + nptl/sysdeps/powerpc/tls.h | 5 +++++ + 2 files changed, 10 insertions(+) + +diff --git glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h +index 4c09eec..611c773 100644 +--- glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h ++++ glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h +@@ -61,6 +61,11 @@ typedef union dtv + are private. */ + typedef struct + { ++ /* Reservation for the Event-Based Branching ABI. */ ++ uintptr_t ebb_handler; ++ uintptr_t ebb_ctx_pointer; ++ uintptr_t ebb_reserved1; ++ uintptr_t ebb_reserved2; + uintptr_t pointer_guard; + uintptr_t stack_guard; + dtv_t *dtv; +-- +1.7.11.7 + diff --git a/packages/glibc/2.17/0020-glibc-rh731833-libm-3.patch b/packages/glibc/2.17/0020-glibc-rh731833-libm-3.patch new file mode 100644 index 0000000..89fe7a2 --- /dev/null +++ b/packages/glibc/2.17/0020-glibc-rh731833-libm-3.patch @@ -0,0 +1,269 @@ +From c00f26c0eaba5a9680aac0f98de4b6e385a8cb82 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Fri, 8 Mar 2013 11:07:15 -0300 +Subject: [PATCH 18/42] PowerPC: unify math_ldbl.h implementations + +This patch removes redudant definition from PowerPC specific +math_ldbl, using the definitions from ieee754 math_ldbl.h. +(backported from commit edf66e57fc2bac083ecc9756a5fe47f9041ed3bb) +--- + sysdeps/ieee754/ldbl-128ibm/math_ldbl.h | 10 +- + sysdeps/powerpc/Implies | 1 + + sysdeps/powerpc/fpu/math_ldbl.h | 171 ++------------------------------ + sysdeps/unix/sysv/linux/powerpc/Implies | 4 - + 5 files changed, 34 insertions(+), 168 deletions(-) + delete mode 100644 sysdeps/unix/sysv/linux/powerpc/Implies + +diff --git glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h +index be9ac71..1cce1fc 100644 +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h +@@ -125,7 +125,7 @@ ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64) + /* Handy utility functions to pack/unpack/cononicalize and find the nearbyint + of long double implemented as double double. */ + static inline long double +-ldbl_pack (double a, double aa) ++default_ldbl_pack (double a, double aa) + { + union ibm_extended_long_double u; + u.dd[0] = a; +@@ -134,7 +134,7 @@ ldbl_pack (double a, double aa) + } + + static inline void +-ldbl_unpack (long double l, double *a, double *aa) ++default_ldbl_unpack (long double l, double *a, double *aa) + { + union ibm_extended_long_double u; + u.d = l; +@@ -142,6 +142,12 @@ ldbl_unpack (long double l, double *a, double *aa) + *aa = u.dd[1]; + } + ++#ifndef ldbl_pack ++# define ldbl_pack default_ldbl_pack ++#endif ++#ifndef ldbl_unpack ++# define ldbl_unpack default_ldbl_unpack ++#endif + + /* Convert a finite long double to canonical form. + Does not handle +/-Inf properly. */ +diff --git glibc-2.17-c758a686/sysdeps/powerpc/Implies glibc-2.17-c758a686/sysdeps/powerpc/Implies +index 7ccf9a7..78dba95 100644 +--- glibc-2.17-c758a686/sysdeps/powerpc/Implies ++++ glibc-2.17-c758a686/sysdeps/powerpc/Implies +@@ -1,4 +1,5 @@ + # On PowerPC we use the IBM extended long double format. + ieee754/ldbl-128ibm ++ieee754/ldbl-opt + ieee754/dbl-64 + ieee754/flt-32 +diff --git glibc-2.17-c758a686/sysdeps/powerpc/fpu/math_ldbl.h glibc-2.17-c758a686/sysdeps/powerpc/fpu/math_ldbl.h +index 6cd6d0b..36378c0 100644 +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/math_ldbl.h ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/math_ldbl.h +@@ -2,132 +2,12 @@ + #error "Never use directly; include instead." + #endif + +-#include +-#include +- +-static inline void +-ldbl_extract_mantissa (int64_t *hi64, u_int64_t *lo64, int *exp, long double x) +-{ +- /* We have 105 bits of mantissa plus one implicit digit. Since +- 106 bits are representable we use the first implicit digit for +- the number before the decimal point and the second implicit bit +- as bit 53 of the mantissa. */ +- unsigned long long hi, lo; +- int ediff; +- union ibm_extended_long_double eldbl; +- eldbl.d = x; +- *exp = eldbl.ieee.exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS; +- +- lo = ((long long)eldbl.ieee.mantissa2 << 32) | eldbl.ieee.mantissa3; +- hi = ((long long)eldbl.ieee.mantissa0 << 32) | eldbl.ieee.mantissa1; +- /* If the lower double is not a denomal or zero then set the hidden +- 53rd bit. */ +- if (eldbl.ieee.exponent2 > 0x001) +- { +- lo |= (1ULL << 52); +- lo = lo << 7; /* pre-shift lo to match ieee854. */ +- /* The lower double is normalized separately from the upper. We +- may need to adjust the lower manitissa to reflect this. */ +- ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2; +- if (ediff > 53) +- lo = lo >> (ediff-53); +- } +- hi |= (1ULL << 52); +- +- if ((eldbl.ieee.negative != eldbl.ieee.negative2) +- && ((eldbl.ieee.exponent2 != 0) && (lo != 0LL))) +- { +- hi--; +- lo = (1ULL << 60) - lo; +- if (hi < (1ULL << 52)) +- { +- /* we have a borrow from the hidden bit, so shift left 1. */ +- hi = (hi << 1) | (lo >> 59); +- lo = 0xfffffffffffffffLL & (lo << 1); +- *exp = *exp - 1; +- } +- } +- *lo64 = (hi << 60) | lo; +- *hi64 = hi >> 4; +-} +- +-static inline long double +-ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64) +-{ +- union ibm_extended_long_double u; +- unsigned long hidden2, lzcount; +- unsigned long long hi, lo; +- +- u.ieee.negative = sign; +- u.ieee.negative2 = sign; +- u.ieee.exponent = exp + IBM_EXTENDED_LONG_DOUBLE_BIAS; +- u.ieee.exponent2 = exp-53 + IBM_EXTENDED_LONG_DOUBLE_BIAS; +- /* Expect 113 bits (112 bits + hidden) right justified in two longs. +- The low order 53 bits (52 + hidden) go into the lower double */ +- lo = (lo64 >> 7)& ((1ULL << 53) - 1); +- hidden2 = (lo64 >> 59) & 1ULL; +- /* The high order 53 bits (52 + hidden) go into the upper double */ +- hi = (lo64 >> 60) & ((1ULL << 11) - 1); +- hi |= (hi64 << 4); +- +- if (lo != 0LL) +- { +- /* hidden2 bit of low double controls rounding of the high double. +- If hidden2 is '1' then round up hi and adjust lo (2nd mantissa) +- plus change the sign of the low double to compensate. */ +- if (hidden2) +- { +- hi++; +- u.ieee.negative2 = !sign; +- lo = (1ULL << 53) - lo; +- } +- /* The hidden bit of the lo mantissa is zero so we need to +- normalize the it for the low double. Shift it left until the +- hidden bit is '1' then adjust the 2nd exponent accordingly. */ +- +- if (sizeof (lo) == sizeof (long)) +- lzcount = __builtin_clzl (lo); +- else if ((lo >> 32) != 0) +- lzcount = __builtin_clzl ((long) (lo >> 32)); +- else +- lzcount = __builtin_clzl ((long) lo) + 32; +- lzcount = lzcount - 11; +- if (lzcount > 0) +- { +- int expnt2 = u.ieee.exponent2 - lzcount; +- if (expnt2 >= 1) +- { +- /* Not denormal. Normalize and set low exponent. */ +- lo = lo << lzcount; +- u.ieee.exponent2 = expnt2; +- } +- else +- { +- /* Is denormal. */ +- lo = lo << (lzcount + expnt2); +- u.ieee.exponent2 = 0; +- } +- } +- } +- else +- { +- u.ieee.negative2 = 0; +- u.ieee.exponent2 = 0; +- } +- +- u.ieee.mantissa3 = lo & ((1ULL << 32) - 1); +- u.ieee.mantissa2 = (lo >> 32) & ((1ULL << 20) - 1); +- u.ieee.mantissa1 = hi & ((1ULL << 32) - 1); +- u.ieee.mantissa0 = (hi >> 32) & ((1ULL << 20) - 1); +- return u.d; +-} +- +-/* gcc generates disgusting code to pack and unpack long doubles. +- This tells gcc that pack/unpack is really a nop. We use fr1/fr2 +- because those are the regs used to pass/return a single +- long double arg. */ ++/* GCC does not optimize the default ldbl_pack code to not spill register ++ in the stack. The following optimization tells gcc that pack/unpack ++ is really a nop. We use fr1/fr2 because those are the regs used to ++ pass/return a single long double arg. */ + static inline long double +-ldbl_pack (double a, double aa) ++ldbl_pack_ppc (double a, double aa) + { + register long double x __asm__ ("fr1"); + register double xh __asm__ ("fr1"); +@@ -139,7 +19,7 @@ ldbl_pack (double a, double aa) + } + + static inline void +-ldbl_unpack (long double l, double *a, double *aa) ++ldbl_unpack_ppc (long double l, double *a, double *aa) + { + register long double x __asm__ ("fr1"); + register double xh __asm__ ("fr1"); +@@ -150,40 +30,7 @@ ldbl_unpack (long double l, double *a, double *aa) + *aa = xl; + } + ++#define ldbl_pack ldbl_pack_ppc ++#define ldbl_unpack ldbl_unpack_ppc + +-/* Convert a finite long double to canonical form. +- Does not handle +/-Inf properly. */ +-static inline void +-ldbl_canonicalize (double *a, double *aa) +-{ +- double xh, xl; +- +- xh = *a + *aa; +- xl = (*a - xh) + *aa; +- *a = xh; +- *aa = xl; +-} +- +-/* Simple inline nearbyint (double) function . +- Only works in the default rounding mode +- but is useful in long double rounding functions. */ +-static inline double +-ldbl_nearbyint (double a) +-{ +- double two52 = 0x10000000000000LL; +- +- if (__builtin_expect ((__builtin_fabs (a) < two52), 1)) +- { +- if (__builtin_expect ((a > 0.0), 1)) +- { +- a += two52; +- a -= two52; +- } +- else if (__builtin_expect ((a < 0.0), 1)) +- { +- a = two52 - a; +- a = -(a - two52); +- } +- } +- return a; +-} ++#include +diff --git glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/Implies glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/Implies +deleted file mode 100644 +index ff27cdb..0000000 +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/Implies ++++ /dev/null +@@ -1,4 +0,0 @@ +-# Make sure these routines come before ldbl-opt. +-ieee754/ldbl-128ibm +-# These supply the ABI compatibility for when long double was double. +-ieee754/ldbl-opt +-- +1.7.11.7 + diff --git a/packages/glibc/2.17/0021-glibc-rh731833-libm-4.patch b/packages/glibc/2.17/0021-glibc-rh731833-libm-4.patch new file mode 100644 index 0000000..a513494 --- /dev/null +++ b/packages/glibc/2.17/0021-glibc-rh731833-libm-4.patch @@ -0,0 +1,53 @@ +Combination of the following two commits: + +From 45045c44fabde9152ab1a0b4ed06419a3621f535 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Thu, 21 Mar 2013 14:15:45 -0300 +Subject: [PATCH 20/42] PowerPC: fix sqrtl ABI issue + +This patch fixes a sqrtl ABI issue when building for powerpc64. +(cherry picked from commit b5784d95bb94eda59b08aca735406908e209f638) + +From dad835a11f370afd2dae4bac554fa64fac5a8c6e Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Tue, 26 Mar 2013 10:01:57 -0300 +Subject: [PATCH 21/42] PowerPC: fix libm ABI issue for llroundl (cherry + picked from commit + fce14d4e9c6e08ad8c825fe88d8cbdac5c739565) + +diff -pruN glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c 2012-12-25 08:32:13.000000000 +0530 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c 2013-08-06 17:45:56.719534470 +0530 +@@ -17,6 +17,7 @@ + . */ + + #include ++#include + + /* I think that what this routine is supposed to do is round a value + to the nearest integer, with values exactly on the boundary rounded +@@ -47,3 +48,6 @@ weak_alias (__llround, llround) + strong_alias (__llround, __llroundl) + weak_alias (__llround, llroundl) + #endif ++#if LONG_DOUBLE_COMPAT (libm, GLIBC_2_1) ++compat_symbol (libm, __llround, llroundl, GLIBC_2_1); ++#endif +diff -pruN glibc-2.17-c758a686/sysdeps/powerpc/fpu/w_sqrt.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/w_sqrt.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/w_sqrt.c 2012-12-25 08:32:13.000000000 +0530 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/w_sqrt.c 2013-08-06 17:45:53.459534613 +0530 +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + + double + __sqrt (double x) /* wrapper sqrt */ +@@ -42,3 +43,6 @@ weak_alias (__sqrt, sqrt) + #ifdef NO_LONG_DOUBLE + strong_alias (__sqrt, __sqrtl) weak_alias (__sqrt, sqrtl) + #endif ++#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) ++compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0); ++#endif diff --git a/packages/glibc/2.17/0022-glibc-powerpc-ldbl_high.patch b/packages/glibc/2.17/0022-glibc-powerpc-ldbl_high.patch new file mode 100644 index 0000000..68faeee --- /dev/null +++ b/packages/glibc/2.17/0022-glibc-powerpc-ldbl_high.patch @@ -0,0 +1,13 @@ +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-27 19:59:00.000000000 -0500 +@@ -190,6 +190,9 @@ + # define ldbl_unpack default_ldbl_unpack + #endif + ++/* Extract high double. */ ++#define ldbl_high(x) ((double) x) ++ + /* Convert a finite long double to canonical form. + Does not handle +/-Inf properly. */ + static inline void diff --git a/packages/glibc/2.17/0023-glibc-ppc64le-01.patch b/packages/glibc/2.17/0023-glibc-ppc64le-01.patch new file mode 100644 index 0000000..e2a86d7 --- /dev/null +++ b/packages/glibc/2.17/0023-glibc-ppc64le-01.patch @@ -0,0 +1,83 @@ +# commit 1695c7737655241e1773bdddc93e82c22d8d1584 +# Author: Adhemerval Zanella +# Date: Tue Feb 4 09:48:47 2014 -0200 +# +# abilist-pattern configurability +# +# This patch creates implicit rules to match the abifiles if +# abilist-pattern is defined in the architecture Makefile. This allows +# machine specific Makefiles to define different abifiles names +# (for instance *-le.abilist for powerpc64le). +# +diff -urN glibc-2.17-c758a686/Makerules glibc-2.17-c758a686/Makerules +--- glibc-2.17-c758a686/Makerules 2014-06-02 15:29:42.000000000 +0000 ++++ glibc-2.17-c758a686/Makerules 2014-06-02 15:25:21.000000000 +0000 +@@ -1152,6 +1152,14 @@ + LC_ALL=C $(OBJDUMP) --dynamic-syms $< > $@T + mv -f $@T $@ + ++# A sysdeps/.../Makefile can set abilist-pattern to something like ++# %-foo.abilist to look for libc-foo.abilist instead of libc.abilist. ++# This makes sense if multiple ABIs can be most cleanly supported by a ++# configuration without using separate sysdeps directories for each. ++ifdef abilist-pattern ++vpath $(abilist-pattern) $(+sysdep_dirs) ++endif ++ + vpath %.abilist $(+sysdep_dirs) + + # The .PRECIOUS rule prevents the files built by an implicit rule whose +@@ -1161,18 +1169,42 @@ + .PRECIOUS: %.symlist + generated += $(extra-libs:=.symlist) + ++ifdef abilist-pattern ++check-abi-%: $(common-objpfx)config.make $(abilist-pattern) $(objpfx)%.symlist ++ $(check-abi-pattern) ++check-abi-%: $(common-objpfx)config.make $(abilist-pattern) \ ++ $(common-objpfx)%.symlist ++ $(check-abi-pattern) ++endif + check-abi-%: $(common-objpfx)config.make %.abilist $(objpfx)%.symlist + $(check-abi) + check-abi-%: $(common-objpfx)config.make %.abilist $(common-objpfx)%.symlist + $(check-abi) ++define check-abi-pattern ++ diff -p -U 0 $(filter $(abilist-pattern),$^) $(filter %.symlist,$^) ++endef + define check-abi + diff -p -U 0 $(filter %.abilist,$^) $(filter %.symlist,$^) + endef + ++ifdef abilist-pattern ++update-abi-%: $(objpfx)%.symlist $(abilist-pattern) ++ $(update-abi-pattern) ++update-abi-%: $(common-objpfx)%.symlist $(abilist-pattern) ++ $(update-abi-pattern) ++endif + update-abi-%: $(objpfx)%.symlist %.abilist + $(update-abi) + update-abi-%: $(common-objpfx)%.symlist %.abilist + $(update-abi) ++define update-abi-pattern ++@if cmp -s $^ 2> /dev/null; \ ++ then \ ++ echo '+++ $(filter $(abilist-pattern),$^) is unchanged'; \ ++ else cp -f $^; \ ++ echo '*** Now check $(filter $(abilist-pattern),$^) changes for correctness ***'; \ ++ fi ++endef + define update-abi + @if cmp -s $^ 2> /dev/null; \ + then \ +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/Makefile glibc-2.17-c758a686/sysdeps/powerpc/Makefile +--- glibc-2.17-c758a686/sysdeps/powerpc/Makefile 2014-06-02 15:29:42.000000000 +0000 ++++ glibc-2.17-c758a686/sysdeps/powerpc/Makefile 2014-06-02 15:25:21.000000000 +0000 +@@ -27,3 +27,7 @@ + sysdep_headers += sys/platform/ppc.h + tests += test-gettimebase + endif ++ ++ifneq (,$(filter %le,$(config-machine))) ++abilist-pattern = %-le.abilist ++endif diff --git a/packages/glibc/2.17/0024-glibc-ppc64le-02.patch b/packages/glibc/2.17/0024-glibc-ppc64le-02.patch new file mode 100644 index 0000000..3878826 --- /dev/null +++ b/packages/glibc/2.17/0024-glibc-ppc64le-02.patch @@ -0,0 +1,3197 @@ +# co`mmit c01603f763003cec55234ac757c7a934652caa55 +# Author: Adhemerval Zanella +# Date: Tue Feb 4 09:49:34 2014 -0200 +# +# PowerPC: powerpc64le abilist for 2.17 +# +# This patch is the abifiles for powerpc64le based on GLIBC 2.17. +# +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/ld-le.abilist glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/ld-le.abilist +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/ld-le.abilist 1970-01-01 00:00:00.000000000 +0000 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/ld-le.abilist 2014-06-02 15:22:40.000000000 +0000 +@@ -0,0 +1,11 @@ ++GLIBC_2.17 ++ GLIBC_2.17 A ++ __libc_memalign F ++ __libc_stack_end D 0x8 ++ __tls_get_addr F ++ _dl_mcount F ++ _r_debug D 0x28 ++ calloc F ++ free F ++ malloc F ++ realloc F +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libBrokenLocale-le.abilist glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libBrokenLocale-le.abilist +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libBrokenLocale-le.abilist 1970-01-01 00:00:00.000000000 +0000 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libBrokenLocale-le.abilist 2014-06-02 15:22:40.000000000 +0000 +@@ -0,0 +1,3 @@ ++GLIBC_2.17 ++ GLIBC_2.17 A ++ __ctype_get_mb_cur_max F +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libanl-le.abilist glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libanl-le.abilist +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libanl-le.abilist 1970-01-01 00:00:00.000000000 +0000 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libanl-le.abilist 2014-06-02 15:22:40.000000000 +0000 +@@ -0,0 +1,6 @@ ++GLIBC_2.17 ++ GLIBC_2.17 A ++ gai_cancel F ++ gai_error F ++ gai_suspend F ++ getaddrinfo_a F +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libc-le.abilist glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libc-le.abilist +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libc-le.abilist 1970-01-01 00:00:00.000000000 +0000 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libc-le.abilist 2014-06-02 15:22:40.000000000 +0000 +@@ -0,0 +1,2168 @@ ++GLIBC_2.17 ++ GLIBC_2.17 A ++ _Exit F ++ _IO_2_1_stderr_ D 0xe0 ++ _IO_2_1_stdin_ D 0xe0 ++ _IO_2_1_stdout_ D 0xe0 ++ _IO_adjust_column F ++ _IO_adjust_wcolumn F ++ _IO_default_doallocate F ++ _IO_default_finish F ++ _IO_default_pbackfail F ++ _IO_default_uflow F ++ _IO_default_xsgetn F ++ _IO_default_xsputn F ++ _IO_do_write F ++ _IO_doallocbuf F ++ _IO_fclose F ++ _IO_fdopen F ++ _IO_feof F ++ _IO_ferror F ++ _IO_fflush F ++ _IO_fgetpos F ++ _IO_fgetpos64 F ++ _IO_fgets F ++ _IO_file_attach F ++ _IO_file_close F ++ _IO_file_close_it F ++ _IO_file_doallocate F ++ _IO_file_finish F ++ _IO_file_fopen F ++ _IO_file_init F ++ _IO_file_jumps D 0xa8 ++ _IO_file_open F ++ _IO_file_overflow F ++ _IO_file_read F ++ _IO_file_seek F ++ _IO_file_seekoff F ++ _IO_file_setbuf F ++ _IO_file_stat F ++ _IO_file_sync F ++ _IO_file_underflow F ++ _IO_file_write F ++ _IO_file_xsputn F ++ _IO_flockfile F ++ _IO_flush_all F ++ _IO_flush_all_linebuffered F ++ _IO_fopen F ++ _IO_fprintf F ++ _IO_fputs F ++ _IO_fread F ++ _IO_free_backup_area F ++ _IO_free_wbackup_area F ++ _IO_fsetpos F ++ _IO_fsetpos64 F ++ _IO_ftell F ++ _IO_ftrylockfile F ++ _IO_funlockfile F ++ _IO_fwrite F ++ _IO_getc F ++ _IO_getline F ++ _IO_getline_info F ++ _IO_gets F ++ _IO_init F ++ _IO_init_marker F ++ _IO_init_wmarker F ++ _IO_iter_begin F ++ _IO_iter_end F ++ _IO_iter_file F ++ _IO_iter_next F ++ _IO_least_wmarker F ++ _IO_link_in F ++ _IO_list_all D 0x8 ++ _IO_list_lock F ++ _IO_list_resetlock F ++ _IO_list_unlock F ++ _IO_marker_delta F ++ _IO_marker_difference F ++ _IO_padn F ++ _IO_peekc_locked F ++ _IO_popen F ++ _IO_printf F ++ _IO_proc_close F ++ _IO_proc_open F ++ _IO_putc F ++ _IO_puts F ++ _IO_remove_marker F ++ _IO_seekmark F ++ _IO_seekoff F ++ _IO_seekpos F ++ _IO_seekwmark F ++ _IO_setb F ++ _IO_setbuffer F ++ _IO_setvbuf F ++ _IO_sgetn F ++ _IO_sprintf F ++ _IO_sputbackc F ++ _IO_sputbackwc F ++ _IO_sscanf F ++ _IO_str_init_readonly F ++ _IO_str_init_static F ++ _IO_str_overflow F ++ _IO_str_pbackfail F ++ _IO_str_seekoff F ++ _IO_str_underflow F ++ _IO_sungetc F ++ _IO_sungetwc F ++ _IO_switch_to_get_mode F ++ _IO_switch_to_main_wget_area F ++ _IO_switch_to_wbackup_area F ++ _IO_switch_to_wget_mode F ++ _IO_un_link F ++ _IO_ungetc F ++ _IO_unsave_markers F ++ _IO_unsave_wmarkers F ++ _IO_vfprintf F ++ _IO_vfscanf F ++ _IO_vsprintf F ++ _IO_wdefault_doallocate F ++ _IO_wdefault_finish F ++ _IO_wdefault_pbackfail F ++ _IO_wdefault_uflow F ++ _IO_wdefault_xsgetn F ++ _IO_wdefault_xsputn F ++ _IO_wdo_write F ++ _IO_wdoallocbuf F ++ _IO_wfile_jumps D 0xa8 ++ _IO_wfile_overflow F ++ _IO_wfile_seekoff F ++ _IO_wfile_sync F ++ _IO_wfile_underflow F ++ _IO_wfile_xsputn F ++ _IO_wmarker_delta F ++ _IO_wsetb F ++ __adjtimex F ++ __after_morecore_hook D 0x8 ++ __argz_count F ++ __argz_next F ++ __argz_stringify F ++ __asprintf F ++ __asprintf_chk F ++ __assert F ++ __assert_fail F ++ __assert_perror_fail F ++ __backtrace F ++ __backtrace_symbols F ++ __backtrace_symbols_fd F ++ __bsd_getpgrp F ++ __bzero F ++ __check_rhosts_file D 0x4 ++ __chk_fail F ++ __clone F ++ __close F ++ __cmsg_nxthdr F ++ __confstr_chk F ++ __connect F ++ __ctype_b_loc F ++ __ctype_get_mb_cur_max F ++ __ctype_tolower_loc F ++ __ctype_toupper_loc F ++ __curbrk D 0x8 ++ __cxa_at_quick_exit F ++ __cxa_atexit F ++ __cxa_finalize F ++ __cyg_profile_func_enter F ++ __cyg_profile_func_exit F ++ __daylight D 0x4 ++ __dcgettext F ++ __default_morecore F ++ __dgettext F ++ __dprintf_chk F ++ __dup2 F ++ __duplocale F ++ __endmntent F ++ __environ D 0x8 ++ __errno_location F ++ __fbufsize F ++ __fcntl F ++ __fdelt_chk F ++ __fdelt_warn F ++ __ffs F ++ __fgets_chk F ++ __fgets_unlocked_chk F ++ __fgetws_chk F ++ __fgetws_unlocked_chk F ++ __finite F ++ __finitef F ++ __finitel F ++ __flbf F ++ __fork F ++ __fpending F ++ __fprintf_chk F ++ __fpu_control D 0x4 ++ __fpurge F ++ __fread_chk F ++ __fread_unlocked_chk F ++ __freadable F ++ __freading F ++ __free_hook D 0x8 ++ __freelocale F ++ __fsetlocking F ++ __fwprintf_chk F ++ __fwritable F ++ __fwriting F ++ __fxstat F ++ __fxstat64 F ++ __fxstatat F ++ __fxstatat64 F ++ __getauxval F ++ __getcwd_chk F ++ __getdelim F ++ __getdomainname_chk F ++ __getgroups_chk F ++ __gethostname_chk F ++ __getlogin_r_chk F ++ __getmntent_r F ++ __getpagesize F ++ __getpgid F ++ __getpid F ++ __gets_chk F ++ __gettimeofday F ++ __getwd_chk F ++ __gmtime_r F ++ __h_errno_location F ++ __isalnum_l F ++ __isalpha_l F ++ __isascii_l F ++ __isblank_l F ++ __iscntrl_l F ++ __isctype F ++ __isdigit_l F ++ __isgraph_l F ++ __isinf F ++ __isinff F ++ __isinfl F ++ __islower_l F ++ __isnan F ++ __isnanf F ++ __isnanl F ++ __isoc99_fscanf F ++ __isoc99_fwscanf F ++ __isoc99_scanf F ++ __isoc99_sscanf F ++ __isoc99_swscanf F ++ __isoc99_vfscanf F ++ __isoc99_vfwscanf F ++ __isoc99_vscanf F ++ __isoc99_vsscanf F ++ __isoc99_vswscanf F ++ __isoc99_vwscanf F ++ __isoc99_wscanf F ++ __isprint_l F ++ __ispunct_l F ++ __isspace_l F ++ __isupper_l F ++ __iswalnum_l F ++ __iswalpha_l F ++ __iswblank_l F ++ __iswcntrl_l F ++ __iswctype F ++ __iswctype_l F ++ __iswdigit_l F ++ __iswgraph_l F ++ __iswlower_l F ++ __iswprint_l F ++ __iswpunct_l F ++ __iswspace_l F ++ __iswupper_l F ++ __iswxdigit_l F ++ __isxdigit_l F ++ __ivaliduser F ++ __key_decryptsession_pk_LOCAL D 0x8 ++ __key_encryptsession_pk_LOCAL D 0x8 ++ __key_gendes_LOCAL D 0x8 ++ __libc_allocate_rtsig F ++ __libc_calloc F ++ __libc_current_sigrtmax F ++ __libc_current_sigrtmin F ++ __libc_free F ++ __libc_freeres F ++ __libc_init_first F ++ __libc_mallinfo F ++ __libc_malloc F ++ __libc_mallopt F ++ __libc_memalign F ++ __libc_pvalloc F ++ __libc_realloc F ++ __libc_sa_len F ++ __libc_start_main F ++ __libc_valloc F ++ __longjmp_chk F ++ __lseek F ++ __lxstat F ++ __lxstat64 F ++ __malloc_hook D 0x8 ++ __malloc_initialize_hook D 0x8 ++ __mbrlen F ++ __mbrtowc F ++ __mbsnrtowcs_chk F ++ __mbsrtowcs_chk F ++ __mbstowcs_chk F ++ __memalign_hook D 0x8 ++ __memcpy_chk F ++ __memmove_chk F ++ __mempcpy F ++ __mempcpy_chk F ++ __mempcpy_small F ++ __memset_chk F ++ __monstartup F ++ __morecore D 0x8 ++ __nanosleep F ++ __newlocale F ++ __nl_langinfo_l F ++ __nldbl__IO_fprintf F ++ __nldbl__IO_printf F ++ __nldbl__IO_sprintf F ++ __nldbl__IO_sscanf F ++ __nldbl__IO_vfprintf F ++ __nldbl__IO_vfscanf F ++ __nldbl__IO_vsprintf F ++ __nldbl___asprintf F ++ __nldbl___asprintf_chk F ++ __nldbl___dprintf_chk F ++ __nldbl___fprintf_chk F ++ __nldbl___fwprintf_chk F ++ __nldbl___isoc99_fscanf F ++ __nldbl___isoc99_fwscanf F ++ __nldbl___isoc99_scanf F ++ __nldbl___isoc99_sscanf F ++ __nldbl___isoc99_swscanf F ++ __nldbl___isoc99_vfscanf F ++ __nldbl___isoc99_vfwscanf F ++ __nldbl___isoc99_vscanf F ++ __nldbl___isoc99_vsscanf F ++ __nldbl___isoc99_vswscanf F ++ __nldbl___isoc99_vwscanf F ++ __nldbl___isoc99_wscanf F ++ __nldbl___obstack_printf_chk F ++ __nldbl___obstack_vprintf_chk F ++ __nldbl___printf_chk F ++ __nldbl___printf_fp F ++ __nldbl___snprintf_chk F ++ __nldbl___sprintf_chk F ++ __nldbl___strfmon_l F ++ __nldbl___swprintf_chk F ++ __nldbl___syslog_chk F ++ __nldbl___vasprintf_chk F ++ __nldbl___vdprintf_chk F ++ __nldbl___vfprintf_chk F ++ __nldbl___vfscanf F ++ __nldbl___vfwprintf_chk F ++ __nldbl___vprintf_chk F ++ __nldbl___vsnprintf F ++ __nldbl___vsnprintf_chk F ++ __nldbl___vsprintf_chk F ++ __nldbl___vsscanf F ++ __nldbl___vstrfmon F ++ __nldbl___vstrfmon_l F ++ __nldbl___vswprintf_chk F ++ __nldbl___vsyslog_chk F ++ __nldbl___vwprintf_chk F ++ __nldbl___wprintf_chk F ++ __nldbl_asprintf F ++ __nldbl_dprintf F ++ __nldbl_fprintf F ++ __nldbl_fscanf F ++ __nldbl_fwprintf F ++ __nldbl_fwscanf F ++ __nldbl_obstack_printf F ++ __nldbl_obstack_vprintf F ++ __nldbl_printf F ++ __nldbl_printf_size F ++ __nldbl_scanf F ++ __nldbl_snprintf F ++ __nldbl_sprintf F ++ __nldbl_sscanf F ++ __nldbl_strfmon F ++ __nldbl_strfmon_l F ++ __nldbl_swprintf F ++ __nldbl_swscanf F ++ __nldbl_syslog F ++ __nldbl_vasprintf F ++ __nldbl_vdprintf F ++ __nldbl_vfprintf F ++ __nldbl_vfscanf F ++ __nldbl_vfwprintf F ++ __nldbl_vfwscanf F ++ __nldbl_vprintf F ++ __nldbl_vscanf F ++ __nldbl_vsnprintf F ++ __nldbl_vsprintf F ++ __nldbl_vsscanf F ++ __nldbl_vswprintf F ++ __nldbl_vswscanf F ++ __nldbl_vsyslog F ++ __nldbl_vwprintf F ++ __nldbl_vwscanf F ++ __nldbl_wprintf F ++ __nldbl_wscanf F ++ __nss_configure_lookup F ++ __nss_database_lookup F ++ __nss_group_lookup F ++ __nss_hostname_digits_dots F ++ __nss_hosts_lookup F ++ __nss_next F ++ __nss_passwd_lookup F ++ __obstack_printf_chk F ++ __obstack_vprintf_chk F ++ __open F ++ __open64 F ++ __open64_2 F ++ __open_2 F ++ __openat64_2 F ++ __openat_2 F ++ __overflow F ++ __pipe F ++ __poll F ++ __poll_chk F ++ __posix_getopt F ++ __ppc_get_timebase_freq F ++ __ppoll_chk F ++ __pread64 F ++ __pread64_chk F ++ __pread_chk F ++ __printf_chk F ++ __printf_fp F ++ __profile_frequency F ++ __progname D 0x8 ++ __progname_full D 0x8 ++ __ptsname_r_chk F ++ __pwrite64 F ++ __rawmemchr F ++ __rcmd_errstr D 0x8 ++ __read F ++ __read_chk F ++ __readlink_chk F ++ __readlinkat_chk F ++ __realloc_hook D 0x8 ++ __realpath_chk F ++ __recv_chk F ++ __recvfrom_chk F ++ __register_atfork F ++ __res_init F ++ __res_nclose F ++ __res_ninit F ++ __res_randomid F ++ __res_state F ++ __rpc_thread_createerr F ++ __rpc_thread_svc_fdset F ++ __rpc_thread_svc_max_pollfd F ++ __rpc_thread_svc_pollfd F ++ __sbrk F ++ __sched_cpualloc F ++ __sched_cpucount F ++ __sched_cpufree F ++ __sched_get_priority_max F ++ __sched_get_priority_min F ++ __sched_getparam F ++ __sched_getscheduler F ++ __sched_setscheduler F ++ __sched_yield F ++ __select F ++ __send F ++ __setmntent F ++ __setpgid F ++ __sigaction F ++ __sigaddset F ++ __sigdelset F ++ __sigismember F ++ __signbit F ++ __signbitf F ++ __signbitl F ++ __sigpause F ++ __sigsetjmp F ++ __sigsuspend F ++ __snprintf_chk F ++ __sprintf_chk F ++ __stack_chk_fail F ++ __statfs F ++ __stpcpy F ++ __stpcpy_chk F ++ __stpcpy_small F ++ __stpncpy F ++ __stpncpy_chk F ++ __strcasecmp F ++ __strcasecmp_l F ++ __strcasestr F ++ __strcat_chk F ++ __strcoll_l F ++ __strcpy_chk F ++ __strcpy_small F ++ __strcspn_c1 F ++ __strcspn_c2 F ++ __strcspn_c3 F ++ __strdup F ++ __strerror_r F ++ __strfmon_l F ++ __strftime_l F ++ __strncasecmp_l F ++ __strncat_chk F ++ __strncpy_chk F ++ __strndup F ++ __strpbrk_c2 F ++ __strpbrk_c3 F ++ __strsep_1c F ++ __strsep_2c F ++ __strsep_3c F ++ __strsep_g F ++ __strspn_c1 F ++ __strspn_c2 F ++ __strspn_c3 F ++ __strtod_internal F ++ __strtod_l F ++ __strtof_internal F ++ __strtof_l F ++ __strtok_r F ++ __strtok_r_1c F ++ __strtol_internal F ++ __strtol_l F ++ __strtold_internal F ++ __strtold_l F ++ __strtoll_internal F ++ __strtoll_l F ++ __strtoul_internal F ++ __strtoul_l F ++ __strtoull_internal F ++ __strtoull_l F ++ __strverscmp F ++ __strxfrm_l F ++ __swprintf_chk F ++ __sysconf F ++ __sysctl F ++ __syslog_chk F ++ __sysv_signal F ++ __timezone D 0x8 ++ __toascii_l F ++ __tolower_l F ++ __toupper_l F ++ __towctrans F ++ __towctrans_l F ++ __towlower_l F ++ __towupper_l F ++ __ttyname_r_chk F ++ __tzname D 0x10 ++ __uflow F ++ __underflow F ++ __uselocale F ++ __vasprintf_chk F ++ __vdprintf_chk F ++ __vfork F ++ __vfprintf_chk F ++ __vfscanf F ++ __vfwprintf_chk F ++ __vprintf_chk F ++ __vsnprintf F ++ __vsnprintf_chk F ++ __vsprintf_chk F ++ __vsscanf F ++ __vswprintf_chk F ++ __vsyslog_chk F ++ __vwprintf_chk F ++ __wait F ++ __waitpid F ++ __wcpcpy_chk F ++ __wcpncpy_chk F ++ __wcrtomb_chk F ++ __wcscasecmp_l F ++ __wcscat_chk F ++ __wcscoll_l F ++ __wcscpy_chk F ++ __wcsftime_l F ++ __wcsncasecmp_l F ++ __wcsncat_chk F ++ __wcsncpy_chk F ++ __wcsnrtombs_chk F ++ __wcsrtombs_chk F ++ __wcstod_internal F ++ __wcstod_l F ++ __wcstof_internal F ++ __wcstof_l F ++ __wcstol_internal F ++ __wcstol_l F ++ __wcstold_internal F ++ __wcstold_l F ++ __wcstoll_internal F ++ __wcstoll_l F ++ __wcstombs_chk F ++ __wcstoul_internal F ++ __wcstoul_l F ++ __wcstoull_internal F ++ __wcstoull_l F ++ __wcsxfrm_l F ++ __wctomb_chk F ++ __wctrans_l F ++ __wctype_l F ++ __wmemcpy_chk F ++ __wmemmove_chk F ++ __wmempcpy_chk F ++ __wmemset_chk F ++ __woverflow F ++ __wprintf_chk F ++ __write F ++ __wuflow F ++ __wunderflow F ++ __xmknod F ++ __xmknodat F ++ __xpg_basename F ++ __xpg_sigpause F ++ __xpg_strerror_r F ++ __xstat F ++ __xstat64 F ++ _authenticate F ++ _dl_mcount_wrapper F ++ _dl_mcount_wrapper_check F ++ _environ D 0x8 ++ _exit F ++ _flushlbf F ++ _libc_intl_domainname D 0x5 ++ _longjmp F ++ _mcleanup F ++ _mcount F ++ _nl_default_dirname D 0x12 ++ _nl_domain_bindings D 0x8 ++ _nl_msg_cat_cntr D 0x4 ++ _null_auth D 0x18 ++ _obstack_allocated_p F ++ _obstack_begin F ++ _obstack_begin_1 F ++ _obstack_free F ++ _obstack_memory_used F ++ _obstack_newchunk F ++ _res D 0x238 ++ _res_hconf D 0x48 ++ _rpc_dtablesize F ++ _seterr_reply F ++ _setjmp F ++ _sys_errlist D 0x438 ++ _sys_nerr D 0x4 ++ _sys_siglist D 0x208 ++ _tolower F ++ _toupper F ++ a64l F ++ abort F ++ abs F ++ accept F ++ accept4 F ++ access F ++ acct F ++ addmntent F ++ addseverity F ++ adjtime F ++ adjtimex F ++ advance F ++ alarm F ++ aligned_alloc F ++ alphasort F ++ alphasort64 F ++ argp_err_exit_status D 0x4 ++ argp_error F ++ argp_failure F ++ argp_help F ++ argp_parse F ++ argp_program_bug_address D 0x8 ++ argp_program_version D 0x8 ++ argp_program_version_hook D 0x8 ++ argp_state_help F ++ argp_usage F ++ argz_add F ++ argz_add_sep F ++ argz_append F ++ argz_count F ++ argz_create F ++ argz_create_sep F ++ argz_delete F ++ argz_extract F ++ argz_insert F ++ argz_next F ++ argz_replace F ++ argz_stringify F ++ asctime F ++ asctime_r F ++ asprintf F ++ atof F ++ atoi F ++ atol F ++ atoll F ++ authdes_create F ++ authdes_getucred F ++ authdes_pk_create F ++ authnone_create F ++ authunix_create F ++ authunix_create_default F ++ backtrace F ++ backtrace_symbols F ++ backtrace_symbols_fd F ++ basename F ++ bcmp F ++ bcopy F ++ bdflush F ++ bind F ++ bind_textdomain_codeset F ++ bindresvport F ++ bindtextdomain F ++ brk F ++ bsd_signal F ++ bsearch F ++ btowc F ++ bzero F ++ c16rtomb F ++ c32rtomb F ++ calloc F ++ callrpc F ++ canonicalize_file_name F ++ capget F ++ capset F ++ catclose F ++ catgets F ++ catopen F ++ cbc_crypt F ++ cfgetispeed F ++ cfgetospeed F ++ cfmakeraw F ++ cfree F ++ cfsetispeed F ++ cfsetospeed F ++ cfsetspeed F ++ chdir F ++ chflags F ++ chmod F ++ chown F ++ chroot F ++ clearenv F ++ clearerr F ++ clearerr_unlocked F ++ clnt_broadcast F ++ clnt_create F ++ clnt_pcreateerror F ++ clnt_perrno F ++ clnt_perror F ++ clnt_spcreateerror F ++ clnt_sperrno F ++ clnt_sperror F ++ clntraw_create F ++ clnttcp_create F ++ clntudp_bufcreate F ++ clntudp_create F ++ clntunix_create F ++ clock F ++ clock_adjtime F ++ clock_getcpuclockid F ++ clock_getres F ++ clock_gettime F ++ clock_nanosleep F ++ clock_settime F ++ clone F ++ close F ++ closedir F ++ closelog F ++ confstr F ++ connect F ++ copysign F ++ copysignf F ++ copysignl F ++ creat F ++ creat64 F ++ create_module F ++ ctermid F ++ ctime F ++ ctime_r F ++ cuserid F ++ daemon F ++ daylight D 0x4 ++ dcgettext F ++ dcngettext F ++ delete_module F ++ des_setparity F ++ dgettext F ++ difftime F ++ dirfd F ++ dirname F ++ div F ++ dl_iterate_phdr F ++ dngettext F ++ dprintf F ++ drand48 F ++ drand48_r F ++ dup F ++ dup2 F ++ dup3 F ++ duplocale F ++ dysize F ++ eaccess F ++ ecb_crypt F ++ ecvt F ++ ecvt_r F ++ endaliasent F ++ endfsent F ++ endgrent F ++ endhostent F ++ endmntent F ++ endnetent F ++ endnetgrent F ++ endprotoent F ++ endpwent F ++ endrpcent F ++ endservent F ++ endsgent F ++ endspent F ++ endttyent F ++ endusershell F ++ endutent F ++ endutxent F ++ environ D 0x8 ++ envz_add F ++ envz_entry F ++ envz_get F ++ envz_merge F ++ envz_remove F ++ envz_strip F ++ epoll_create F ++ epoll_create1 F ++ epoll_ctl F ++ epoll_pwait F ++ epoll_wait F ++ erand48 F ++ erand48_r F ++ err F ++ error F ++ error_at_line F ++ error_message_count D 0x4 ++ error_one_per_line D 0x4 ++ error_print_progname D 0x8 ++ errx F ++ ether_aton F ++ ether_aton_r F ++ ether_hostton F ++ ether_line F ++ ether_ntoa F ++ ether_ntoa_r F ++ ether_ntohost F ++ euidaccess F ++ eventfd F ++ eventfd_read F ++ eventfd_write F ++ execl F ++ execle F ++ execlp F ++ execv F ++ execve F ++ execvp F ++ execvpe F ++ exit F ++ faccessat F ++ fallocate F ++ fallocate64 F ++ fanotify_init F ++ fanotify_mark F ++ fattach F ++ fchdir F ++ fchflags F ++ fchmod F ++ fchmodat F ++ fchown F ++ fchownat F ++ fclose F ++ fcloseall F ++ fcntl F ++ fcvt F ++ fcvt_r F ++ fdatasync F ++ fdetach F ++ fdopen F ++ fdopendir F ++ feof F ++ feof_unlocked F ++ ferror F ++ ferror_unlocked F ++ fexecve F ++ fflush F ++ fflush_unlocked F ++ ffs F ++ ffsl F ++ ffsll F ++ fgetc F ++ fgetc_unlocked F ++ fgetgrent F ++ fgetgrent_r F ++ fgetpos F ++ fgetpos64 F ++ fgetpwent F ++ fgetpwent_r F ++ fgets F ++ fgets_unlocked F ++ fgetsgent F ++ fgetsgent_r F ++ fgetspent F ++ fgetspent_r F ++ fgetwc F ++ fgetwc_unlocked F ++ fgetws F ++ fgetws_unlocked F ++ fgetxattr F ++ fileno F ++ fileno_unlocked F ++ finite F ++ finitef F ++ finitel F ++ flistxattr F ++ flock F ++ flockfile F ++ fmemopen F ++ fmtmsg F ++ fnmatch F ++ fopen F ++ fopen64 F ++ fopencookie F ++ fork F ++ fpathconf F ++ fprintf F ++ fputc F ++ fputc_unlocked F ++ fputs F ++ fputs_unlocked F ++ fputwc F ++ fputwc_unlocked F ++ fputws F ++ fputws_unlocked F ++ fread F ++ fread_unlocked F ++ free F ++ freeaddrinfo F ++ freeifaddrs F ++ freelocale F ++ fremovexattr F ++ freopen F ++ freopen64 F ++ frexp F ++ frexpf F ++ frexpl F ++ fscanf F ++ fseek F ++ fseeko F ++ fseeko64 F ++ fsetpos F ++ fsetpos64 F ++ fsetxattr F ++ fstatfs F ++ fstatfs64 F ++ fstatvfs F ++ fstatvfs64 F ++ fsync F ++ ftell F ++ ftello F ++ ftello64 F ++ ftime F ++ ftok F ++ ftruncate F ++ ftruncate64 F ++ ftrylockfile F ++ fts_children F ++ fts_close F ++ fts_open F ++ fts_read F ++ fts_set F ++ ftw F ++ ftw64 F ++ funlockfile F ++ futimens F ++ futimes F ++ futimesat F ++ fwide F ++ fwprintf F ++ fwrite F ++ fwrite_unlocked F ++ fwscanf F ++ gai_strerror F ++ gcvt F ++ get_avphys_pages F ++ get_current_dir_name F ++ get_kernel_syms F ++ get_myaddress F ++ get_nprocs F ++ get_nprocs_conf F ++ get_phys_pages F ++ getaddrinfo F ++ getaliasbyname F ++ getaliasbyname_r F ++ getaliasent F ++ getaliasent_r F ++ getauxval F ++ getc F ++ getc_unlocked F ++ getchar F ++ getchar_unlocked F ++ getcontext F ++ getcwd F ++ getdate F ++ getdate_err D 0x4 ++ getdate_r F ++ getdelim F ++ getdirentries F ++ getdirentries64 F ++ getdomainname F ++ getdtablesize F ++ getegid F ++ getenv F ++ geteuid F ++ getfsent F ++ getfsfile F ++ getfsspec F ++ getgid F ++ getgrent F ++ getgrent_r F ++ getgrgid F ++ getgrgid_r F ++ getgrnam F ++ getgrnam_r F ++ getgrouplist F ++ getgroups F ++ gethostbyaddr F ++ gethostbyaddr_r F ++ gethostbyname F ++ gethostbyname2 F ++ gethostbyname2_r F ++ gethostbyname_r F ++ gethostent F ++ gethostent_r F ++ gethostid F ++ gethostname F ++ getifaddrs F ++ getipv4sourcefilter F ++ getitimer F ++ getline F ++ getloadavg F ++ getlogin F ++ getlogin_r F ++ getmntent F ++ getmntent_r F ++ getmsg F ++ getnameinfo F ++ getnetbyaddr F ++ getnetbyaddr_r F ++ getnetbyname F ++ getnetbyname_r F ++ getnetent F ++ getnetent_r F ++ getnetgrent F ++ getnetgrent_r F ++ getnetname F ++ getopt F ++ getopt_long F ++ getopt_long_only F ++ getpagesize F ++ getpass F ++ getpeername F ++ getpgid F ++ getpgrp F ++ getpid F ++ getpmsg F ++ getppid F ++ getpriority F ++ getprotobyname F ++ getprotobyname_r F ++ getprotobynumber F ++ getprotobynumber_r F ++ getprotoent F ++ getprotoent_r F ++ getpt F ++ getpublickey F ++ getpw F ++ getpwent F ++ getpwent_r F ++ getpwnam F ++ getpwnam_r F ++ getpwuid F ++ getpwuid_r F ++ getresgid F ++ getresuid F ++ getrlimit F ++ getrlimit64 F ++ getrpcbyname F ++ getrpcbyname_r F ++ getrpcbynumber F ++ getrpcbynumber_r F ++ getrpcent F ++ getrpcent_r F ++ getrpcport F ++ getrusage F ++ gets F ++ getsecretkey F ++ getservbyname F ++ getservbyname_r F ++ getservbyport F ++ getservbyport_r F ++ getservent F ++ getservent_r F ++ getsgent F ++ getsgent_r F ++ getsgnam F ++ getsgnam_r F ++ getsid F ++ getsockname F ++ getsockopt F ++ getsourcefilter F ++ getspent F ++ getspent_r F ++ getspnam F ++ getspnam_r F ++ getsubopt F ++ gettext F ++ gettimeofday F ++ getttyent F ++ getttynam F ++ getuid F ++ getusershell F ++ getutent F ++ getutent_r F ++ getutid F ++ getutid_r F ++ getutline F ++ getutline_r F ++ getutmp F ++ getutmpx F ++ getutxent F ++ getutxid F ++ getutxline F ++ getw F ++ getwc F ++ getwc_unlocked F ++ getwchar F ++ getwchar_unlocked F ++ getwd F ++ getxattr F ++ glob F ++ glob64 F ++ glob_pattern_p F ++ globfree F ++ globfree64 F ++ gmtime F ++ gmtime_r F ++ gnu_dev_major F ++ gnu_dev_makedev F ++ gnu_dev_minor F ++ gnu_get_libc_release F ++ gnu_get_libc_version F ++ grantpt F ++ group_member F ++ gsignal F ++ gtty F ++ h_errlist D 0x28 ++ h_nerr D 0x4 ++ hasmntopt F ++ hcreate F ++ hcreate_r F ++ hdestroy F ++ hdestroy_r F ++ herror F ++ host2netname F ++ hsearch F ++ hsearch_r F ++ hstrerror F ++ htonl F ++ htons F ++ iconv F ++ iconv_close F ++ iconv_open F ++ if_freenameindex F ++ if_indextoname F ++ if_nameindex F ++ if_nametoindex F ++ imaxabs F ++ imaxdiv F ++ in6addr_any D 0x10 ++ in6addr_loopback D 0x10 ++ index F ++ inet6_opt_append F ++ inet6_opt_find F ++ inet6_opt_finish F ++ inet6_opt_get_val F ++ inet6_opt_init F ++ inet6_opt_next F ++ inet6_opt_set_val F ++ inet6_option_alloc F ++ inet6_option_append F ++ inet6_option_find F ++ inet6_option_init F ++ inet6_option_next F ++ inet6_option_space F ++ inet6_rth_add F ++ inet6_rth_getaddr F ++ inet6_rth_init F ++ inet6_rth_reverse F ++ inet6_rth_segments F ++ inet6_rth_space F ++ inet_addr F ++ inet_aton F ++ inet_lnaof F ++ inet_makeaddr F ++ inet_netof F ++ inet_network F ++ inet_nsap_addr F ++ inet_nsap_ntoa F ++ inet_ntoa F ++ inet_ntop F ++ inet_pton F ++ init_module F ++ initgroups F ++ initstate F ++ initstate_r F ++ innetgr F ++ inotify_add_watch F ++ inotify_init F ++ inotify_init1 F ++ inotify_rm_watch F ++ insque F ++ ioctl F ++ iruserok F ++ iruserok_af F ++ isalnum F ++ isalnum_l F ++ isalpha F ++ isalpha_l F ++ isascii F ++ isastream F ++ isatty F ++ isblank F ++ isblank_l F ++ iscntrl F ++ iscntrl_l F ++ isctype F ++ isdigit F ++ isdigit_l F ++ isfdtype F ++ isgraph F ++ isgraph_l F ++ isinf F ++ isinff F ++ isinfl F ++ islower F ++ islower_l F ++ isnan F ++ isnanf F ++ isnanl F ++ isprint F ++ isprint_l F ++ ispunct F ++ ispunct_l F ++ isspace F ++ isspace_l F ++ isupper F ++ isupper_l F ++ iswalnum F ++ iswalnum_l F ++ iswalpha F ++ iswalpha_l F ++ iswblank F ++ iswblank_l F ++ iswcntrl F ++ iswcntrl_l F ++ iswctype F ++ iswctype_l F ++ iswdigit F ++ iswdigit_l F ++ iswgraph F ++ iswgraph_l F ++ iswlower F ++ iswlower_l F ++ iswprint F ++ iswprint_l F ++ iswpunct F ++ iswpunct_l F ++ iswspace F ++ iswspace_l F ++ iswupper F ++ iswupper_l F ++ iswxdigit F ++ iswxdigit_l F ++ isxdigit F ++ isxdigit_l F ++ jrand48 F ++ jrand48_r F ++ key_decryptsession F ++ key_decryptsession_pk F ++ key_encryptsession F ++ key_encryptsession_pk F ++ key_gendes F ++ key_get_conv F ++ key_secretkey_is_set F ++ key_setnet F ++ key_setsecret F ++ kill F ++ killpg F ++ klogctl F ++ l64a F ++ labs F ++ lchmod F ++ lchown F ++ lckpwdf F ++ lcong48 F ++ lcong48_r F ++ ldexp F ++ ldexpf F ++ ldexpl F ++ ldiv F ++ lfind F ++ lgetxattr F ++ link F ++ linkat F ++ listen F ++ listxattr F ++ llabs F ++ lldiv F ++ llistxattr F ++ llseek F ++ loc1 D 0x8 ++ loc2 D 0x8 ++ localeconv F ++ localtime F ++ localtime_r F ++ lockf F ++ lockf64 F ++ locs D 0x8 ++ longjmp F ++ lrand48 F ++ lrand48_r F ++ lremovexattr F ++ lsearch F ++ lseek F ++ lseek64 F ++ lsetxattr F ++ lutimes F ++ madvise F ++ makecontext F ++ mallinfo F ++ malloc F ++ malloc_get_state F ++ malloc_info F ++ malloc_set_state F ++ malloc_stats F ++ malloc_trim F ++ malloc_usable_size F ++ mallopt F ++ mallwatch D 0x8 ++ mblen F ++ mbrlen F ++ mbrtoc16 F ++ mbrtoc32 F ++ mbrtowc F ++ mbsinit F ++ mbsnrtowcs F ++ mbsrtowcs F ++ mbstowcs F ++ mbtowc F ++ mcheck F ++ mcheck_check_all F ++ mcheck_pedantic F ++ memalign F ++ memccpy F ++ memchr F ++ memcmp F ++ memcpy F ++ memfrob F ++ memmem F ++ memmove F ++ mempcpy F ++ memrchr F ++ memset F ++ mincore F ++ mkdir F ++ mkdirat F ++ mkdtemp F ++ mkfifo F ++ mkfifoat F ++ mkostemp F ++ mkostemp64 F ++ mkostemps F ++ mkostemps64 F ++ mkstemp F ++ mkstemp64 F ++ mkstemps F ++ mkstemps64 F ++ mktemp F ++ mktime F ++ mlock F ++ mlockall F ++ mmap F ++ mmap64 F ++ modf F ++ modff F ++ modfl F ++ moncontrol F ++ monstartup F ++ mount F ++ mprobe F ++ mprotect F ++ mrand48 F ++ mrand48_r F ++ mremap F ++ msgctl F ++ msgget F ++ msgrcv F ++ msgsnd F ++ msync F ++ mtrace F ++ munlock F ++ munlockall F ++ munmap F ++ muntrace F ++ name_to_handle_at F ++ nanosleep F ++ netname2host F ++ netname2user F ++ newlocale F ++ nfsservctl F ++ nftw F ++ nftw64 F ++ ngettext F ++ nice F ++ nl_langinfo F ++ nl_langinfo_l F ++ nrand48 F ++ nrand48_r F ++ ntohl F ++ ntohs F ++ ntp_adjtime F ++ ntp_gettime F ++ ntp_gettimex F ++ obstack_alloc_failed_handler D 0x8 ++ obstack_exit_failure D 0x4 ++ obstack_free F ++ obstack_printf F ++ obstack_vprintf F ++ on_exit F ++ open F ++ open64 F ++ open_by_handle_at F ++ open_memstream F ++ open_wmemstream F ++ openat F ++ openat64 F ++ opendir F ++ openlog F ++ optarg D 0x8 ++ opterr D 0x4 ++ optind D 0x4 ++ optopt D 0x4 ++ parse_printf_format F ++ passwd2des F ++ pathconf F ++ pause F ++ pclose F ++ perror F ++ personality F ++ pipe F ++ pipe2 F ++ pivot_root F ++ pmap_getmaps F ++ pmap_getport F ++ pmap_rmtcall F ++ pmap_set F ++ pmap_unset F ++ poll F ++ popen F ++ posix_fadvise F ++ posix_fadvise64 F ++ posix_fallocate F ++ posix_fallocate64 F ++ posix_madvise F ++ posix_memalign F ++ posix_openpt F ++ posix_spawn F ++ posix_spawn_file_actions_addclose F ++ posix_spawn_file_actions_adddup2 F ++ posix_spawn_file_actions_addopen F ++ posix_spawn_file_actions_destroy F ++ posix_spawn_file_actions_init F ++ posix_spawnattr_destroy F ++ posix_spawnattr_getflags F ++ posix_spawnattr_getpgroup F ++ posix_spawnattr_getschedparam F ++ posix_spawnattr_getschedpolicy F ++ posix_spawnattr_getsigdefault F ++ posix_spawnattr_getsigmask F ++ posix_spawnattr_init F ++ posix_spawnattr_setflags F ++ posix_spawnattr_setpgroup F ++ posix_spawnattr_setschedparam F ++ posix_spawnattr_setschedpolicy F ++ posix_spawnattr_setsigdefault F ++ posix_spawnattr_setsigmask F ++ posix_spawnp F ++ ppoll F ++ prctl F ++ pread F ++ pread64 F ++ preadv F ++ preadv64 F ++ printf F ++ printf_size F ++ printf_size_info F ++ prlimit F ++ prlimit64 F ++ process_vm_readv F ++ process_vm_writev F ++ profil F ++ program_invocation_name D 0x8 ++ program_invocation_short_name D 0x8 ++ pselect F ++ psiginfo F ++ psignal F ++ pthread_attr_destroy F ++ pthread_attr_getdetachstate F ++ pthread_attr_getinheritsched F ++ pthread_attr_getschedparam F ++ pthread_attr_getschedpolicy F ++ pthread_attr_getscope F ++ pthread_attr_init F ++ pthread_attr_setdetachstate F ++ pthread_attr_setinheritsched F ++ pthread_attr_setschedparam F ++ pthread_attr_setschedpolicy F ++ pthread_attr_setscope F ++ pthread_cond_broadcast F ++ pthread_cond_destroy F ++ pthread_cond_init F ++ pthread_cond_signal F ++ pthread_cond_timedwait F ++ pthread_cond_wait F ++ pthread_condattr_destroy F ++ pthread_condattr_init F ++ pthread_equal F ++ pthread_exit F ++ pthread_getschedparam F ++ pthread_mutex_destroy F ++ pthread_mutex_init F ++ pthread_mutex_lock F ++ pthread_mutex_unlock F ++ pthread_self F ++ pthread_setcancelstate F ++ pthread_setcanceltype F ++ pthread_setschedparam F ++ ptrace F ++ ptsname F ++ ptsname_r F ++ putc F ++ putc_unlocked F ++ putchar F ++ putchar_unlocked F ++ putenv F ++ putgrent F ++ putmsg F ++ putpmsg F ++ putpwent F ++ puts F ++ putsgent F ++ putspent F ++ pututline F ++ pututxline F ++ putw F ++ putwc F ++ putwc_unlocked F ++ putwchar F ++ putwchar_unlocked F ++ pvalloc F ++ pwrite F ++ pwrite64 F ++ pwritev F ++ pwritev64 F ++ qecvt F ++ qecvt_r F ++ qfcvt F ++ qfcvt_r F ++ qgcvt F ++ qsort F ++ qsort_r F ++ query_module F ++ quick_exit F ++ quotactl F ++ raise F ++ rand F ++ rand_r F ++ random F ++ random_r F ++ rawmemchr F ++ rcmd F ++ rcmd_af F ++ re_comp F ++ re_compile_fastmap F ++ re_compile_pattern F ++ re_exec F ++ re_match F ++ re_match_2 F ++ re_search F ++ re_search_2 F ++ re_set_registers F ++ re_set_syntax F ++ re_syntax_options D 0x8 ++ read F ++ readahead F ++ readdir F ++ readdir64 F ++ readdir64_r F ++ readdir_r F ++ readlink F ++ readlinkat F ++ readv F ++ realloc F ++ realpath F ++ reboot F ++ recv F ++ recvfrom F ++ recvmmsg F ++ recvmsg F ++ regcomp F ++ regerror F ++ regexec F ++ regfree F ++ register_printf_function F ++ register_printf_modifier F ++ register_printf_specifier F ++ register_printf_type F ++ registerrpc F ++ remap_file_pages F ++ remove F ++ removexattr F ++ remque F ++ rename F ++ renameat F ++ revoke F ++ rewind F ++ rewinddir F ++ rexec F ++ rexec_af F ++ rexecoptions D 0x4 ++ rindex F ++ rmdir F ++ rpc_createerr D 0x20 ++ rpmatch F ++ rresvport F ++ rresvport_af F ++ rtime F ++ ruserok F ++ ruserok_af F ++ ruserpass F ++ sbrk F ++ scalbn F ++ scalbnf F ++ scalbnl F ++ scandir F ++ scandir64 F ++ scandirat F ++ scandirat64 F ++ scanf F ++ sched_get_priority_max F ++ sched_get_priority_min F ++ sched_getaffinity F ++ sched_getcpu F ++ sched_getparam F ++ sched_getscheduler F ++ sched_rr_get_interval F ++ sched_setaffinity F ++ sched_setparam F ++ sched_setscheduler F ++ sched_yield F ++ secure_getenv F ++ seed48 F ++ seed48_r F ++ seekdir F ++ select F ++ semctl F ++ semget F ++ semop F ++ semtimedop F ++ send F ++ sendfile F ++ sendfile64 F ++ sendmmsg F ++ sendmsg F ++ sendto F ++ setaliasent F ++ setbuf F ++ setbuffer F ++ setcontext F ++ setdomainname F ++ setegid F ++ setenv F ++ seteuid F ++ setfsent F ++ setfsgid F ++ setfsuid F ++ setgid F ++ setgrent F ++ setgroups F ++ sethostent F ++ sethostid F ++ sethostname F ++ setipv4sourcefilter F ++ setitimer F ++ setjmp F ++ setlinebuf F ++ setlocale F ++ setlogin F ++ setlogmask F ++ setmntent F ++ setnetent F ++ setnetgrent F ++ setns F ++ setpgid F ++ setpgrp F ++ setpriority F ++ setprotoent F ++ setpwent F ++ setregid F ++ setresgid F ++ setresuid F ++ setreuid F ++ setrlimit F ++ setrlimit64 F ++ setrpcent F ++ setservent F ++ setsgent F ++ setsid F ++ setsockopt F ++ setsourcefilter F ++ setspent F ++ setstate F ++ setstate_r F ++ settimeofday F ++ setttyent F ++ setuid F ++ setusershell F ++ setutent F ++ setutxent F ++ setvbuf F ++ setxattr F ++ sgetsgent F ++ sgetsgent_r F ++ sgetspent F ++ sgetspent_r F ++ shmat F ++ shmctl F ++ shmdt F ++ shmget F ++ shutdown F ++ sigaction F ++ sigaddset F ++ sigaltstack F ++ sigandset F ++ sigblock F ++ sigdelset F ++ sigemptyset F ++ sigfillset F ++ siggetmask F ++ sighold F ++ sigignore F ++ siginterrupt F ++ sigisemptyset F ++ sigismember F ++ siglongjmp F ++ signal F ++ signalfd F ++ sigorset F ++ sigpause F ++ sigpending F ++ sigprocmask F ++ sigqueue F ++ sigrelse F ++ sigreturn F ++ sigset F ++ sigsetmask F ++ sigstack F ++ sigsuspend F ++ sigtimedwait F ++ sigvec F ++ sigwait F ++ sigwaitinfo F ++ sleep F ++ snprintf F ++ sockatmark F ++ socket F ++ socketpair F ++ splice F ++ sprintf F ++ sprofil F ++ srand F ++ srand48 F ++ srand48_r F ++ srandom F ++ srandom_r F ++ sscanf F ++ ssignal F ++ sstk F ++ statfs F ++ statfs64 F ++ statvfs F ++ statvfs64 F ++ stderr D 0x8 ++ stdin D 0x8 ++ stdout D 0x8 ++ step F ++ stime F ++ stpcpy F ++ stpncpy F ++ strcasecmp F ++ strcasecmp_l F ++ strcasestr F ++ strcat F ++ strchr F ++ strchrnul F ++ strcmp F ++ strcoll F ++ strcoll_l F ++ strcpy F ++ strcspn F ++ strdup F ++ strerror F ++ strerror_l F ++ strerror_r F ++ strfmon F ++ strfmon_l F ++ strfry F ++ strftime F ++ strftime_l F ++ strlen F ++ strncasecmp F ++ strncasecmp_l F ++ strncat F ++ strncmp F ++ strncpy F ++ strndup F ++ strnlen F ++ strpbrk F ++ strptime F ++ strptime_l F ++ strrchr F ++ strsep F ++ strsignal F ++ strspn F ++ strstr F ++ strtod F ++ strtod_l F ++ strtof F ++ strtof_l F ++ strtoimax F ++ strtok F ++ strtok_r F ++ strtol F ++ strtol_l F ++ strtold F ++ strtold_l F ++ strtoll F ++ strtoll_l F ++ strtoq F ++ strtoul F ++ strtoul_l F ++ strtoull F ++ strtoull_l F ++ strtoumax F ++ strtouq F ++ strverscmp F ++ strxfrm F ++ strxfrm_l F ++ stty F ++ svc_exit F ++ svc_fdset D 0x80 ++ svc_getreq F ++ svc_getreq_common F ++ svc_getreq_poll F ++ svc_getreqset F ++ svc_max_pollfd D 0x4 ++ svc_pollfd D 0x8 ++ svc_register F ++ svc_run F ++ svc_sendreply F ++ svc_unregister F ++ svcauthdes_stats D 0x18 ++ svcerr_auth F ++ svcerr_decode F ++ svcerr_noproc F ++ svcerr_noprog F ++ svcerr_progvers F ++ svcerr_systemerr F ++ svcerr_weakauth F ++ svcfd_create F ++ svcraw_create F ++ svctcp_create F ++ svcudp_bufcreate F ++ svcudp_create F ++ svcudp_enablecache F ++ svcunix_create F ++ svcunixfd_create F ++ swab F ++ swapcontext F ++ swapoff F ++ swapon F ++ swprintf F ++ swscanf F ++ symlink F ++ symlinkat F ++ sync F ++ sync_file_range F ++ syncfs F ++ sys_errlist D 0x438 ++ sys_nerr D 0x4 ++ sys_sigabbrev D 0x208 ++ sys_siglist D 0x208 ++ syscall F ++ sysconf F ++ sysctl F ++ sysinfo F ++ syslog F ++ system F ++ sysv_signal F ++ tcdrain F ++ tcflow F ++ tcflush F ++ tcgetattr F ++ tcgetpgrp F ++ tcgetsid F ++ tcsendbreak F ++ tcsetattr F ++ tcsetpgrp F ++ tdelete F ++ tdestroy F ++ tee F ++ telldir F ++ tempnam F ++ textdomain F ++ tfind F ++ time F ++ timegm F ++ timelocal F ++ timerfd_create F ++ timerfd_gettime F ++ timerfd_settime F ++ times F ++ timespec_get F ++ timezone D 0x8 ++ tmpfile F ++ tmpfile64 F ++ tmpnam F ++ tmpnam_r F ++ toascii F ++ tolower F ++ tolower_l F ++ toupper F ++ toupper_l F ++ towctrans F ++ towctrans_l F ++ towlower F ++ towlower_l F ++ towupper F ++ towupper_l F ++ tr_break F ++ truncate F ++ truncate64 F ++ tsearch F ++ ttyname F ++ ttyname_r F ++ ttyslot F ++ twalk F ++ tzname D 0x10 ++ tzset F ++ ualarm F ++ ulckpwdf F ++ ulimit F ++ umask F ++ umount F ++ umount2 F ++ uname F ++ ungetc F ++ ungetwc F ++ unlink F ++ unlinkat F ++ unlockpt F ++ unsetenv F ++ unshare F ++ updwtmp F ++ updwtmpx F ++ uselib F ++ uselocale F ++ user2netname F ++ usleep F ++ ustat F ++ utime F ++ utimensat F ++ utimes F ++ utmpname F ++ utmpxname F ++ valloc F ++ vasprintf F ++ vdprintf F ++ verr F ++ verrx F ++ versionsort F ++ versionsort64 F ++ vfork F ++ vfprintf F ++ vfscanf F ++ vfwprintf F ++ vfwscanf F ++ vhangup F ++ vlimit F ++ vmsplice F ++ vprintf F ++ vscanf F ++ vsnprintf F ++ vsprintf F ++ vsscanf F ++ vswprintf F ++ vswscanf F ++ vsyslog F ++ vtimes F ++ vwarn F ++ vwarnx F ++ vwprintf F ++ vwscanf F ++ wait F ++ wait3 F ++ wait4 F ++ waitid F ++ waitpid F ++ warn F ++ warnx F ++ wcpcpy F ++ wcpncpy F ++ wcrtomb F ++ wcscasecmp F ++ wcscasecmp_l F ++ wcscat F ++ wcschr F ++ wcschrnul F ++ wcscmp F ++ wcscoll F ++ wcscoll_l F ++ wcscpy F ++ wcscspn F ++ wcsdup F ++ wcsftime F ++ wcsftime_l F ++ wcslen F ++ wcsncasecmp F ++ wcsncasecmp_l F ++ wcsncat F ++ wcsncmp F ++ wcsncpy F ++ wcsnlen F ++ wcsnrtombs F ++ wcspbrk F ++ wcsrchr F ++ wcsrtombs F ++ wcsspn F ++ wcsstr F ++ wcstod F ++ wcstod_l F ++ wcstof F ++ wcstof_l F ++ wcstoimax F ++ wcstok F ++ wcstol F ++ wcstol_l F ++ wcstold F ++ wcstold_l F ++ wcstoll F ++ wcstoll_l F ++ wcstombs F ++ wcstoq F ++ wcstoul F ++ wcstoul_l F ++ wcstoull F ++ wcstoull_l F ++ wcstoumax F ++ wcstouq F ++ wcswcs F ++ wcswidth F ++ wcsxfrm F ++ wcsxfrm_l F ++ wctob F ++ wctomb F ++ wctrans F ++ wctrans_l F ++ wctype F ++ wctype_l F ++ wcwidth F ++ wmemchr F ++ wmemcmp F ++ wmemcpy F ++ wmemmove F ++ wmempcpy F ++ wmemset F ++ wordexp F ++ wordfree F ++ wprintf F ++ write F ++ writev F ++ wscanf F ++ xdecrypt F ++ xdr_accepted_reply F ++ xdr_array F ++ xdr_authdes_cred F ++ xdr_authdes_verf F ++ xdr_authunix_parms F ++ xdr_bool F ++ xdr_bytes F ++ xdr_callhdr F ++ xdr_callmsg F ++ xdr_char F ++ xdr_cryptkeyarg F ++ xdr_cryptkeyarg2 F ++ xdr_cryptkeyres F ++ xdr_des_block F ++ xdr_double F ++ xdr_enum F ++ xdr_float F ++ xdr_free F ++ xdr_getcredres F ++ xdr_hyper F ++ xdr_int F ++ xdr_int16_t F ++ xdr_int32_t F ++ xdr_int64_t F ++ xdr_int8_t F ++ xdr_key_netstarg F ++ xdr_key_netstres F ++ xdr_keybuf F ++ xdr_keystatus F ++ xdr_long F ++ xdr_longlong_t F ++ xdr_netnamestr F ++ xdr_netobj F ++ xdr_opaque F ++ xdr_opaque_auth F ++ xdr_pmap F ++ xdr_pmaplist F ++ xdr_pointer F ++ xdr_quad_t F ++ xdr_reference F ++ xdr_rejected_reply F ++ xdr_replymsg F ++ xdr_rmtcall_args F ++ xdr_rmtcallres F ++ xdr_short F ++ xdr_sizeof F ++ xdr_string F ++ xdr_u_char F ++ xdr_u_hyper F ++ xdr_u_int F ++ xdr_u_long F ++ xdr_u_longlong_t F ++ xdr_u_quad_t F ++ xdr_u_short F ++ xdr_uint16_t F ++ xdr_uint32_t F ++ xdr_uint64_t F ++ xdr_uint8_t F ++ xdr_union F ++ xdr_unixcred F ++ xdr_vector F ++ xdr_void F ++ xdr_wrapstring F ++ xdrmem_create F ++ xdrrec_create F ++ xdrrec_endofrecord F ++ xdrrec_eof F ++ xdrrec_skiprecord F ++ xdrstdio_create F ++ xencrypt F ++ xprt_register F ++ xprt_unregister F +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libcrypt-le.abilist glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libcrypt-le.abilist +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libcrypt-le.abilist 1970-01-01 00:00:00.000000000 +0000 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libcrypt-le.abilist 2014-06-02 15:22:40.000000000 +0000 +@@ -0,0 +1,9 @@ ++GLIBC_2.17 ++ GLIBC_2.17 A ++ crypt F ++ crypt_r F ++ encrypt F ++ encrypt_r F ++ fcrypt F ++ setkey F ++ setkey_r F +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libdl-le.abilist glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libdl-le.abilist +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libdl-le.abilist 1970-01-01 00:00:00.000000000 +0000 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libdl-le.abilist 2014-06-02 15:22:40.000000000 +0000 +@@ -0,0 +1,11 @@ ++GLIBC_2.17 ++ GLIBC_2.17 A ++ dladdr F ++ dladdr1 F ++ dlclose F ++ dlerror F ++ dlinfo F ++ dlmopen F ++ dlopen F ++ dlsym F ++ dlvsym F +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libm-le.abilist glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libm-le.abilist +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libm-le.abilist 1970-01-01 00:00:00.000000000 +0000 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libm-le.abilist 2014-06-02 15:22:40.000000000 +0000 +@@ -0,0 +1,402 @@ ++GLIBC_2.17 ++ GLIBC_2.17 A ++ _LIB_VERSION D 0x4 ++ __acos_finite F ++ __acosf_finite F ++ __acosh_finite F ++ __acoshf_finite F ++ __acoshl_finite F ++ __acosl_finite F ++ __asin_finite F ++ __asinf_finite F ++ __asinl_finite F ++ __atan2_finite F ++ __atan2f_finite F ++ __atan2l_finite F ++ __atanh_finite F ++ __atanhf_finite F ++ __atanhl_finite F ++ __clog10 F ++ __clog10f F ++ __clog10l F ++ __cosh_finite F ++ __coshf_finite F ++ __coshl_finite F ++ __exp10_finite F ++ __exp10f_finite F ++ __exp10l_finite F ++ __exp2_finite F ++ __exp2f_finite F ++ __exp2l_finite F ++ __exp_finite F ++ __expf_finite F ++ __expl_finite F ++ __fe_dfl_env D 0x8 ++ __fe_enabled_env D 0x8 ++ __fe_nomask_env F ++ __fe_nonieee_env D 0x8 ++ __finite F ++ __finitef F ++ __finitel F ++ __fmod_finite F ++ __fmodf_finite F ++ __fmodl_finite F ++ __fpclassify F ++ __fpclassifyf F ++ __fpclassifyl F ++ __gamma_r_finite F ++ __gammaf_r_finite F ++ __gammal_r_finite F ++ __hypot_finite F ++ __hypotf_finite F ++ __hypotl_finite F ++ __j0_finite F ++ __j0f_finite F ++ __j0l_finite F ++ __j1_finite F ++ __j1f_finite F ++ __j1l_finite F ++ __jn_finite F ++ __jnf_finite F ++ __jnl_finite F ++ __lgamma_r_finite F ++ __lgammaf_r_finite F ++ __lgammal_r_finite F ++ __log10_finite F ++ __log10f_finite F ++ __log10l_finite F ++ __log2_finite F ++ __log2f_finite F ++ __log2l_finite F ++ __log_finite F ++ __logf_finite F ++ __logl_finite F ++ __nldbl_nexttowardf F ++ __pow_finite F ++ __powf_finite F ++ __powl_finite F ++ __remainder_finite F ++ __remainderf_finite F ++ __remainderl_finite F ++ __scalb_finite F ++ __scalbf_finite F ++ __scalbl_finite F ++ __signbit F ++ __signbitf F ++ __signbitl F ++ __sinh_finite F ++ __sinhf_finite F ++ __sinhl_finite F ++ __sqrt_finite F ++ __sqrtf_finite F ++ __sqrtl_finite F ++ __y0_finite F ++ __y0f_finite F ++ __y0l_finite F ++ __y1_finite F ++ __y1f_finite F ++ __y1l_finite F ++ __yn_finite F ++ __ynf_finite F ++ __ynl_finite F ++ acos F ++ acosf F ++ acosh F ++ acoshf F ++ acoshl F ++ acosl F ++ asin F ++ asinf F ++ asinh F ++ asinhf F ++ asinhl F ++ asinl F ++ atan F ++ atan2 F ++ atan2f F ++ atan2l F ++ atanf F ++ atanh F ++ atanhf F ++ atanhl F ++ atanl F ++ cabs F ++ cabsf F ++ cabsl F ++ cacos F ++ cacosf F ++ cacosh F ++ cacoshf F ++ cacoshl F ++ cacosl F ++ carg F ++ cargf F ++ cargl F ++ casin F ++ casinf F ++ casinh F ++ casinhf F ++ casinhl F ++ casinl F ++ catan F ++ catanf F ++ catanh F ++ catanhf F ++ catanhl F ++ catanl F ++ cbrt F ++ cbrtf F ++ cbrtl F ++ ccos F ++ ccosf F ++ ccosh F ++ ccoshf F ++ ccoshl F ++ ccosl F ++ ceil F ++ ceilf F ++ ceill F ++ cexp F ++ cexpf F ++ cexpl F ++ cimag F ++ cimagf F ++ cimagl F ++ clog F ++ clog10 F ++ clog10f F ++ clog10l F ++ clogf F ++ clogl F ++ conj F ++ conjf F ++ conjl F ++ copysign F ++ copysignf F ++ copysignl F ++ cos F ++ cosf F ++ cosh F ++ coshf F ++ coshl F ++ cosl F ++ cpow F ++ cpowf F ++ cpowl F ++ cproj F ++ cprojf F ++ cprojl F ++ creal F ++ crealf F ++ creall F ++ csin F ++ csinf F ++ csinh F ++ csinhf F ++ csinhl F ++ csinl F ++ csqrt F ++ csqrtf F ++ csqrtl F ++ ctan F ++ ctanf F ++ ctanh F ++ ctanhf F ++ ctanhl F ++ ctanl F ++ drem F ++ dremf F ++ dreml F ++ erf F ++ erfc F ++ erfcf F ++ erfcl F ++ erff F ++ erfl F ++ exp F ++ exp10 F ++ exp10f F ++ exp10l F ++ exp2 F ++ exp2f F ++ exp2l F ++ expf F ++ expl F ++ expm1 F ++ expm1f F ++ expm1l F ++ fabs F ++ fabsf F ++ fabsl F ++ fdim F ++ fdimf F ++ fdiml F ++ feclearexcept F ++ fedisableexcept F ++ feenableexcept F ++ fegetenv F ++ fegetexcept F ++ fegetexceptflag F ++ fegetround F ++ feholdexcept F ++ feraiseexcept F ++ fesetenv F ++ fesetexceptflag F ++ fesetround F ++ fetestexcept F ++ feupdateenv F ++ finite F ++ finitef F ++ finitel F ++ floor F ++ floorf F ++ floorl F ++ fma F ++ fmaf F ++ fmal F ++ fmax F ++ fmaxf F ++ fmaxl F ++ fmin F ++ fminf F ++ fminl F ++ fmod F ++ fmodf F ++ fmodl F ++ frexp F ++ frexpf F ++ frexpl F ++ gamma F ++ gammaf F ++ gammal F ++ hypot F ++ hypotf F ++ hypotl F ++ ilogb F ++ ilogbf F ++ ilogbl F ++ j0 F ++ j0f F ++ j0l F ++ j1 F ++ j1f F ++ j1l F ++ jn F ++ jnf F ++ jnl F ++ ldexp F ++ ldexpf F ++ ldexpl F ++ lgamma F ++ lgamma_r F ++ lgammaf F ++ lgammaf_r F ++ lgammal F ++ lgammal_r F ++ llrint F ++ llrintf F ++ llrintl F ++ llround F ++ llroundf F ++ llroundl F ++ log F ++ log10 F ++ log10f F ++ log10l F ++ log1p F ++ log1pf F ++ log1pl F ++ log2 F ++ log2f F ++ log2l F ++ logb F ++ logbf F ++ logbl F ++ logf F ++ logl F ++ lrint F ++ lrintf F ++ lrintl F ++ lround F ++ lroundf F ++ lroundl F ++ matherr F ++ modf F ++ modff F ++ modfl F ++ nan F ++ nanf F ++ nanl F ++ nearbyint F ++ nearbyintf F ++ nearbyintl F ++ nextafter F ++ nextafterf F ++ nextafterl F ++ nexttoward F ++ nexttowardf F ++ nexttowardl F ++ pow F ++ pow10 F ++ pow10f F ++ pow10l F ++ powf F ++ powl F ++ remainder F ++ remainderf F ++ remainderl F ++ remquo F ++ remquof F ++ remquol F ++ rint F ++ rintf F ++ rintl F ++ round F ++ roundf F ++ roundl F ++ scalb F ++ scalbf F ++ scalbl F ++ scalbln F ++ scalblnf F ++ scalblnl F ++ scalbn F ++ scalbnf F ++ scalbnl F ++ signgam D 0x4 ++ significand F ++ significandf F ++ significandl F ++ sin F ++ sincos F ++ sincosf F ++ sincosl F ++ sinf F ++ sinh F ++ sinhf F ++ sinhl F ++ sinl F ++ sqrt F ++ sqrtf F ++ sqrtl F ++ tan F ++ tanf F ++ tanh F ++ tanhf F ++ tanhl F ++ tanl F ++ tgamma F ++ tgammaf F ++ tgammal F ++ trunc F ++ truncf F ++ truncl F ++ y0 F ++ y0f F ++ y0l F ++ y1 F ++ y1f F ++ y1l F ++ yn F ++ ynf F ++ ynl F +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libnsl-le.abilist glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libnsl-le.abilist +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libnsl-le.abilist 1970-01-01 00:00:00.000000000 +0000 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libnsl-le.abilist 2014-06-02 15:22:40.000000000 +0000 +@@ -0,0 +1,123 @@ ++GLIBC_2.17 ++ GLIBC_2.17 A ++ __free_fdresult F ++ __nis_default_access F ++ __nis_default_group F ++ __nis_default_owner F ++ __nis_default_ttl F ++ __nis_finddirectory F ++ __nis_hash F ++ __nisbind_connect F ++ __nisbind_create F ++ __nisbind_destroy F ++ __nisbind_next F ++ __yp_check F ++ nis_add F ++ nis_add_entry F ++ nis_addmember F ++ nis_checkpoint F ++ nis_clone_directory F ++ nis_clone_object F ++ nis_clone_result F ++ nis_creategroup F ++ nis_destroy_object F ++ nis_destroygroup F ++ nis_dir_cmp F ++ nis_domain_of F ++ nis_domain_of_r F ++ nis_first_entry F ++ nis_free_directory F ++ nis_free_object F ++ nis_free_request F ++ nis_freenames F ++ nis_freeresult F ++ nis_freeservlist F ++ nis_freetags F ++ nis_getnames F ++ nis_getservlist F ++ nis_ismember F ++ nis_leaf_of F ++ nis_leaf_of_r F ++ nis_lerror F ++ nis_list F ++ nis_local_directory F ++ nis_local_group F ++ nis_local_host F ++ nis_local_principal F ++ nis_lookup F ++ nis_mkdir F ++ nis_modify F ++ nis_modify_entry F ++ nis_name_of F ++ nis_name_of_r F ++ nis_next_entry F ++ nis_perror F ++ nis_ping F ++ nis_print_directory F ++ nis_print_entry F ++ nis_print_group F ++ nis_print_group_entry F ++ nis_print_link F ++ nis_print_object F ++ nis_print_result F ++ nis_print_rights F ++ nis_print_table F ++ nis_read_obj F ++ nis_remove F ++ nis_remove_entry F ++ nis_removemember F ++ nis_rmdir F ++ nis_servstate F ++ nis_sperrno F ++ nis_sperror F ++ nis_sperror_r F ++ nis_stats F ++ nis_verifygroup F ++ nis_write_obj F ++ readColdStartFile F ++ writeColdStartFile F ++ xdr_cback_data F ++ xdr_domainname F ++ xdr_keydat F ++ xdr_mapname F ++ xdr_obj_p F ++ xdr_peername F ++ xdr_valdat F ++ xdr_yp_buf F ++ xdr_ypall F ++ xdr_ypbind_binding F ++ xdr_ypbind_resp F ++ xdr_ypbind_resptype F ++ xdr_ypbind_setdom F ++ xdr_ypdelete_args F ++ xdr_ypmap_parms F ++ xdr_ypmaplist F ++ xdr_yppush_status F ++ xdr_yppushresp_xfr F ++ xdr_ypreq_key F ++ xdr_ypreq_nokey F ++ xdr_ypreq_xfr F ++ xdr_ypresp_all F ++ xdr_ypresp_key_val F ++ xdr_ypresp_maplist F ++ xdr_ypresp_master F ++ xdr_ypresp_order F ++ xdr_ypresp_val F ++ xdr_ypresp_xfr F ++ xdr_ypstat F ++ xdr_ypupdate_args F ++ xdr_ypxfrstat F ++ yp_all F ++ yp_bind F ++ yp_first F ++ yp_get_default_domain F ++ yp_maplist F ++ yp_master F ++ yp_match F ++ yp_next F ++ yp_order F ++ yp_unbind F ++ yp_update F ++ ypbinderr_string F ++ yperr_string F ++ ypprot_err F +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libpthread-le.abilist glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libpthread-le.abilist +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libpthread-le.abilist 1970-01-01 00:00:00.000000000 +0000 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libpthread-le.abilist 2014-06-02 15:22:40.000000000 +0000 +@@ -0,0 +1,224 @@ ++GLIBC_2.17 ++ GLIBC_2.17 A ++ _IO_flockfile F ++ _IO_ftrylockfile F ++ _IO_funlockfile F ++ __close F ++ __connect F ++ __errno_location F ++ __fcntl F ++ __fork F ++ __h_errno_location F ++ __libc_allocate_rtsig F ++ __libc_current_sigrtmax F ++ __libc_current_sigrtmin F ++ __lseek F ++ __nanosleep F ++ __open F ++ __open64 F ++ __pread64 F ++ __pthread_cleanup_routine F ++ __pthread_getspecific F ++ __pthread_key_create F ++ __pthread_mutex_destroy F ++ __pthread_mutex_init F ++ __pthread_mutex_lock F ++ __pthread_mutex_trylock F ++ __pthread_mutex_unlock F ++ __pthread_mutexattr_destroy F ++ __pthread_mutexattr_init F ++ __pthread_mutexattr_settype F ++ __pthread_once F ++ __pthread_register_cancel F ++ __pthread_register_cancel_defer F ++ __pthread_rwlock_destroy F ++ __pthread_rwlock_init F ++ __pthread_rwlock_rdlock F ++ __pthread_rwlock_tryrdlock F ++ __pthread_rwlock_trywrlock F ++ __pthread_rwlock_unlock F ++ __pthread_rwlock_wrlock F ++ __pthread_setspecific F ++ __pthread_unregister_cancel F ++ __pthread_unregister_cancel_restore F ++ __pthread_unwind_next F ++ __pwrite64 F ++ __read F ++ __res_state F ++ __send F ++ __sigaction F ++ __vfork F ++ __wait F ++ __write F ++ _pthread_cleanup_pop F ++ _pthread_cleanup_pop_restore F ++ _pthread_cleanup_push F ++ _pthread_cleanup_push_defer F ++ accept F ++ close F ++ connect F ++ fcntl F ++ flockfile F ++ fork F ++ fsync F ++ ftrylockfile F ++ funlockfile F ++ longjmp F ++ lseek F ++ lseek64 F ++ msync F ++ nanosleep F ++ open F ++ open64 F ++ pause F ++ pread F ++ pread64 F ++ pthread_attr_destroy F ++ pthread_attr_getaffinity_np F ++ pthread_attr_getdetachstate F ++ pthread_attr_getguardsize F ++ pthread_attr_getinheritsched F ++ pthread_attr_getschedparam F ++ pthread_attr_getschedpolicy F ++ pthread_attr_getscope F ++ pthread_attr_getstack F ++ pthread_attr_getstackaddr F ++ pthread_attr_getstacksize F ++ pthread_attr_init F ++ pthread_attr_setaffinity_np F ++ pthread_attr_setdetachstate F ++ pthread_attr_setguardsize F ++ pthread_attr_setinheritsched F ++ pthread_attr_setschedparam F ++ pthread_attr_setschedpolicy F ++ pthread_attr_setscope F ++ pthread_attr_setstack F ++ pthread_attr_setstackaddr F ++ pthread_attr_setstacksize F ++ pthread_barrier_destroy F ++ pthread_barrier_init F ++ pthread_barrier_wait F ++ pthread_barrierattr_destroy F ++ pthread_barrierattr_getpshared F ++ pthread_barrierattr_init F ++ pthread_barrierattr_setpshared F ++ pthread_cancel F ++ pthread_cond_broadcast F ++ pthread_cond_destroy F ++ pthread_cond_init F ++ pthread_cond_signal F ++ pthread_cond_timedwait F ++ pthread_cond_wait F ++ pthread_condattr_destroy F ++ pthread_condattr_getclock F ++ pthread_condattr_getpshared F ++ pthread_condattr_init F ++ pthread_condattr_setclock F ++ pthread_condattr_setpshared F ++ pthread_create F ++ pthread_detach F ++ pthread_equal F ++ pthread_exit F ++ pthread_getaffinity_np F ++ pthread_getattr_np F ++ pthread_getconcurrency F ++ pthread_getcpuclockid F ++ pthread_getname_np F ++ pthread_getschedparam F ++ pthread_getspecific F ++ pthread_join F ++ pthread_key_create F ++ pthread_key_delete F ++ pthread_kill F ++ pthread_kill_other_threads_np F ++ pthread_mutex_consistent F ++ pthread_mutex_consistent_np F ++ pthread_mutex_destroy F ++ pthread_mutex_getprioceiling F ++ pthread_mutex_init F ++ pthread_mutex_lock F ++ pthread_mutex_setprioceiling F ++ pthread_mutex_timedlock F ++ pthread_mutex_trylock F ++ pthread_mutex_unlock F ++ pthread_mutexattr_destroy F ++ pthread_mutexattr_getkind_np F ++ pthread_mutexattr_getprioceiling F ++ pthread_mutexattr_getprotocol F ++ pthread_mutexattr_getpshared F ++ pthread_mutexattr_getrobust F ++ pthread_mutexattr_getrobust_np F ++ pthread_mutexattr_gettype F ++ pthread_mutexattr_init F ++ pthread_mutexattr_setkind_np F ++ pthread_mutexattr_setprioceiling F ++ pthread_mutexattr_setprotocol F ++ pthread_mutexattr_setpshared F ++ pthread_mutexattr_setrobust F ++ pthread_mutexattr_setrobust_np F ++ pthread_mutexattr_settype F ++ pthread_once F ++ pthread_rwlock_destroy F ++ pthread_rwlock_init F ++ pthread_rwlock_rdlock F ++ pthread_rwlock_timedrdlock F ++ pthread_rwlock_timedwrlock F ++ pthread_rwlock_tryrdlock F ++ pthread_rwlock_trywrlock F ++ pthread_rwlock_unlock F ++ pthread_rwlock_wrlock F ++ pthread_rwlockattr_destroy F ++ pthread_rwlockattr_getkind_np F ++ pthread_rwlockattr_getpshared F ++ pthread_rwlockattr_init F ++ pthread_rwlockattr_setkind_np F ++ pthread_rwlockattr_setpshared F ++ pthread_self F ++ pthread_setaffinity_np F ++ pthread_setcancelstate F ++ pthread_setcanceltype F ++ pthread_setconcurrency F ++ pthread_setname_np F ++ pthread_setschedparam F ++ pthread_setschedprio F ++ pthread_setspecific F ++ pthread_sigmask F ++ pthread_sigqueue F ++ pthread_spin_destroy F ++ pthread_spin_init F ++ pthread_spin_lock F ++ pthread_spin_trylock F ++ pthread_spin_unlock F ++ pthread_testcancel F ++ pthread_timedjoin_np F ++ pthread_tryjoin_np F ++ pthread_yield F ++ pwrite F ++ pwrite64 F ++ raise F ++ read F ++ recv F ++ recvfrom F ++ recvmsg F ++ sem_close F ++ sem_destroy F ++ sem_getvalue F ++ sem_init F ++ sem_open F ++ sem_post F ++ sem_timedwait F ++ sem_trywait F ++ sem_unlink F ++ sem_wait F ++ send F ++ sendmsg F ++ sendto F ++ sigaction F ++ siglongjmp F ++ sigwait F ++ system F ++ tcdrain F ++ vfork F ++ wait F ++ waitpid F ++ write F +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libresolv-le.abilist glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libresolv-le.abilist +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libresolv-le.abilist 1970-01-01 00:00:00.000000000 +0000 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libresolv-le.abilist 2014-06-02 15:22:40.000000000 +0000 +@@ -0,0 +1,93 @@ ++GLIBC_2.17 ++ GLIBC_2.17 A ++ __b64_ntop F ++ __b64_pton F ++ __dn_comp F ++ __dn_count_labels F ++ __dn_expand F ++ __dn_skipname F ++ __fp_nquery F ++ __fp_query F ++ __fp_resstat F ++ __hostalias F ++ __loc_aton F ++ __loc_ntoa F ++ __p_cdname F ++ __p_cdnname F ++ __p_class F ++ __p_class_syms D 0xa8 ++ __p_fqname F ++ __p_fqnname F ++ __p_option F ++ __p_query F ++ __p_rcode F ++ __p_secstodate F ++ __p_time F ++ __p_type F ++ __p_type_syms D 0x450 ++ __putlong F ++ __putshort F ++ __res_close F ++ __res_dnok F ++ __res_hnok F ++ __res_hostalias F ++ __res_isourserver F ++ __res_mailok F ++ __res_mkquery F ++ __res_nameinquery F ++ __res_nmkquery F ++ __res_nquery F ++ __res_nquerydomain F ++ __res_nsearch F ++ __res_nsend F ++ __res_ownok F ++ __res_queriesmatch F ++ __res_query F ++ __res_querydomain F ++ __res_search F ++ __res_send F ++ __sym_ntop F ++ __sym_ntos F ++ __sym_ston F ++ _gethtbyaddr F ++ _gethtbyname F ++ _gethtbyname2 F ++ _gethtent F ++ _getlong F ++ _getshort F ++ _res_opcodes D 0x80 ++ _sethtent F ++ inet_net_ntop F ++ inet_net_pton F ++ inet_neta F ++ ns_datetosecs F ++ ns_format_ttl F ++ ns_get16 F ++ ns_get32 F ++ ns_initparse F ++ ns_makecanon F ++ ns_msg_getflag F ++ ns_name_compress F ++ ns_name_ntol F ++ ns_name_ntop F ++ ns_name_pack F ++ ns_name_pton F ++ ns_name_rollback F ++ ns_name_skip F ++ ns_name_uncompress F ++ ns_name_unpack F ++ ns_parse_ttl F ++ ns_parserr F ++ ns_put16 F ++ ns_put32 F ++ ns_samedomain F ++ ns_samename F ++ ns_skiprr F ++ ns_sprintrr F ++ ns_sprintrrf F ++ ns_subdomain F ++ res_gethostbyaddr F ++ res_gethostbyname F ++ res_gethostbyname2 F ++ res_send_setqhook F ++ res_send_setrhook F +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/librt-le.abilist glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/librt-le.abilist +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/librt-le.abilist 1970-01-01 00:00:00.000000000 +0000 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/librt-le.abilist 2014-06-02 15:22:40.000000000 +0000 +@@ -0,0 +1,37 @@ ++GLIBC_2.17 ++ GLIBC_2.17 A ++ __mq_open_2 F ++ aio_cancel F ++ aio_cancel64 F ++ aio_error F ++ aio_error64 F ++ aio_fsync F ++ aio_fsync64 F ++ aio_init F ++ aio_read F ++ aio_read64 F ++ aio_return F ++ aio_return64 F ++ aio_suspend F ++ aio_suspend64 F ++ aio_write F ++ aio_write64 F ++ lio_listio F ++ lio_listio64 F ++ mq_close F ++ mq_getattr F ++ mq_notify F ++ mq_open F ++ mq_receive F ++ mq_send F ++ mq_setattr F ++ mq_timedreceive F ++ mq_timedsend F ++ mq_unlink F ++ shm_open F ++ shm_unlink F ++ timer_create F ++ timer_delete F ++ timer_getoverrun F ++ timer_gettime F ++ timer_settime F +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libthread_db-le.abilist glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libthread_db-le.abilist +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libthread_db-le.abilist 1970-01-01 00:00:00.000000000 +0000 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libthread_db-le.abilist 2014-06-02 15:22:40.000000000 +0000 +@@ -0,0 +1,42 @@ ++GLIBC_2.17 ++ GLIBC_2.17 A ++ td_init F ++ td_log F ++ td_symbol_list F ++ td_ta_clear_event F ++ td_ta_delete F ++ td_ta_enable_stats F ++ td_ta_event_addr F ++ td_ta_event_getmsg F ++ td_ta_get_nthreads F ++ td_ta_get_ph F ++ td_ta_get_stats F ++ td_ta_map_id2thr F ++ td_ta_map_lwp2thr F ++ td_ta_new F ++ td_ta_reset_stats F ++ td_ta_set_event F ++ td_ta_setconcurrency F ++ td_ta_thr_iter F ++ td_ta_tsd_iter F ++ td_thr_clear_event F ++ td_thr_dbresume F ++ td_thr_dbsuspend F ++ td_thr_event_enable F ++ td_thr_event_getmsg F ++ td_thr_get_info F ++ td_thr_getfpregs F ++ td_thr_getgregs F ++ td_thr_getxregs F ++ td_thr_getxregsize F ++ td_thr_set_event F ++ td_thr_setfpregs F ++ td_thr_setgregs F ++ td_thr_setprio F ++ td_thr_setsigpending F ++ td_thr_setxregs F ++ td_thr_sigsetmask F ++ td_thr_tls_get_addr F ++ td_thr_tlsbase F ++ td_thr_tsd F ++ td_thr_validate F +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libutil-le.abilist glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libutil-le.abilist +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libutil-le.abilist 1970-01-01 00:00:00.000000000 +0000 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/nptl/libutil-le.abilist 2014-06-02 15:22:40.000000000 +0000 +@@ -0,0 +1,8 @@ ++GLIBC_2.17 ++ GLIBC_2.17 A ++ forkpty F ++ login F ++ login_tty F ++ logout F ++ logwtmp F ++ openpty F diff --git a/packages/glibc/2.17/0025-glibc-ppc64le-03.patch b/packages/glibc/2.17/0025-glibc-ppc64le-03.patch new file mode 100644 index 0000000..40b6e71 --- /dev/null +++ b/packages/glibc/2.17/0025-glibc-ppc64le-03.patch @@ -0,0 +1,1617 @@ +# commit 9e54314bb06aace405553552f7e7b7d8c172968c +# Author: Joseph Myers +# Date: Thu Jun 6 19:02:09 2013 +0000 +# +# Update miscellaneous scripts from upstream. +# +diff -urN glibc-2.17-c758a686/scripts/config.guess glibc-2.17-c758a686/scripts/config.guess +--- glibc-2.17-c758a686/scripts/config.guess 2014-05-26 15:59:45.000000000 -0500 ++++ glibc-2.17-c758a686/scripts/config.guess 2014-05-26 16:01:00.000000000 -0500 +@@ -1,14 +1,12 @@ + #! /bin/sh + # Attempt to guess a canonical system name. +-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +-# 2011, 2012 Free Software Foundation, Inc. ++# Copyright 1992-2013 Free Software Foundation, Inc. + +-timestamp='2012-09-25' ++timestamp='2013-11-29' + + # This file is free software; you can redistribute it and/or modify it + # under the terms of the GNU General Public License as published by +-# the Free Software Foundation; either version 2 of the License, or ++# the Free Software Foundation; either version 3 of the License, or + # (at your option) any later version. + # + # This program is distributed in the hope that it will be useful, but +@@ -22,19 +20,17 @@ + # As a special exception to the GNU General Public License, if you + # distribute this file as part of a program that contains a + # configuration script generated by Autoconf, you may include it under +-# the same distribution terms that you use for the rest of that program. +- +- +-# Originally written by Per Bothner. Please send patches (context +-# diff format) to and include a ChangeLog +-# entry. ++# the same distribution terms that you use for the rest of that ++# program. This Exception is an additional permission under section 7 ++# of the GNU General Public License, version 3 ("GPLv3"). + # +-# This script attempts to guess a canonical system name similar to +-# config.sub. If it succeeds, it prints the system name on stdout, and +-# exits with 0. Otherwise, it exits with 1. ++# Originally written by Per Bothner. + # + # You can get the latest version of this script from: + # http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD ++# ++# Please send patches with a ChangeLog entry to config-patches@gnu.org. ++ + + me=`echo "$0" | sed -e 's,.*/,,'` + +@@ -54,9 +50,7 @@ + GNU config.guess ($timestamp) + + Originally written by Per Bothner. +-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 +-Free Software Foundation, Inc. ++Copyright 1992-2013 Free Software Foundation, Inc. + + This is free software; see the source for copying conditions. There is NO + warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." +@@ -138,6 +132,27 @@ + UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown + UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + ++case "${UNAME_SYSTEM}" in ++Linux|GNU|GNU/*) ++ # If the system lacks a compiler, then just pick glibc. ++ # We could probably try harder. ++ LIBC=gnu ++ ++ eval $set_cc_for_build ++ cat <<-EOF > $dummy.c ++ #include ++ #if defined(__UCLIBC__) ++ LIBC=uclibc ++ #elif defined(__dietlibc__) ++ LIBC=dietlibc ++ #else ++ LIBC=gnu ++ #endif ++ EOF ++ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` ++ ;; ++esac ++ + # Note: order is significant - the case branches are not exclusive. + + case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in +@@ -859,21 +874,21 @@ + exit ;; + *:GNU:*:*) + # the GNU system +- echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` ++ echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland +- echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu ++ echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} + exit ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; + aarch64:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in +@@ -886,59 +901,54 @@ + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 +- if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi +- echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ++ if test "$?" = 0 ; then LIBC="gnulibc1" ; fi ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} ++ exit ;; ++ arc:Linux:*:* | arceb:Linux:*:*) ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then +- echo ${UNAME_MACHINE}-unknown-linux-gnueabi ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi + else +- echo ${UNAME_MACHINE}-unknown-linux-gnueabihf ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf + fi + fi + exit ;; + avr32*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + cris:Linux:*:*) +- echo ${UNAME_MACHINE}-axis-linux-gnu ++ echo ${UNAME_MACHINE}-axis-linux-${LIBC} + exit ;; + crisv32:Linux:*:*) +- echo ${UNAME_MACHINE}-axis-linux-gnu ++ echo ${UNAME_MACHINE}-axis-linux-${LIBC} + exit ;; + frv:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + hexagon:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + i*86:Linux:*:*) +- LIBC=gnu +- eval $set_cc_for_build +- sed 's/^ //' << EOF >$dummy.c +- #ifdef __dietlibc__ +- LIBC=dietlibc +- #endif +-EOF +- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` +- echo "${UNAME_MACHINE}-pc-linux-${LIBC}" ++ echo ${UNAME_MACHINE}-pc-linux-${LIBC} + exit ;; + ia64:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + m32r*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + m68*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + eval $set_cc_for_build +@@ -957,54 +967,63 @@ + #endif + EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` +- test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } ++ test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; } + ;; ++ or1k:Linux:*:*) ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} ++ exit ;; + or32:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + padre:Linux:*:*) +- echo sparc-unknown-linux-gnu ++ echo sparc-unknown-linux-${LIBC} + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) +- echo hppa64-unknown-linux-gnu ++ echo hppa64-unknown-linux-${LIBC} + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in +- PA7*) echo hppa1.1-unknown-linux-gnu ;; +- PA8*) echo hppa2.0-unknown-linux-gnu ;; +- *) echo hppa-unknown-linux-gnu ;; ++ PA7*) echo hppa1.1-unknown-linux-${LIBC} ;; ++ PA8*) echo hppa2.0-unknown-linux-${LIBC} ;; ++ *) echo hppa-unknown-linux-${LIBC} ;; + esac + exit ;; + ppc64:Linux:*:*) +- echo powerpc64-unknown-linux-gnu ++ echo powerpc64-unknown-linux-${LIBC} + exit ;; + ppc:Linux:*:*) +- echo powerpc-unknown-linux-gnu ++ echo powerpc-unknown-linux-${LIBC} ++ exit ;; ++ ppc64le:Linux:*:*) ++ echo powerpc64le-unknown-linux-${LIBC} ++ exit ;; ++ ppcle:Linux:*:*) ++ echo powerpcle-unknown-linux-${LIBC} + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) +- echo ${UNAME_MACHINE}-ibm-linux ++ echo ${UNAME_MACHINE}-ibm-linux-${LIBC} + exit ;; + sh64*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + sh*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + tile*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + vax:Linux:*:*) +- echo ${UNAME_MACHINE}-dec-linux-gnu ++ echo ${UNAME_MACHINE}-dec-linux-${LIBC} + exit ;; + x86_64:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + xtensa*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. +@@ -1237,19 +1256,31 @@ + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown +- case $UNAME_PROCESSOR in +- i386) +- eval $set_cc_for_build +- if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then +- if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ +- (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ +- grep IS_64BIT_ARCH >/dev/null +- then +- UNAME_PROCESSOR="x86_64" +- fi +- fi ;; +- unknown) UNAME_PROCESSOR=powerpc ;; +- esac ++ eval $set_cc_for_build ++ if test "$UNAME_PROCESSOR" = unknown ; then ++ UNAME_PROCESSOR=powerpc ++ fi ++ if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then ++ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then ++ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ ++ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ ++ grep IS_64BIT_ARCH >/dev/null ++ then ++ case $UNAME_PROCESSOR in ++ i386) UNAME_PROCESSOR=x86_64 ;; ++ powerpc) UNAME_PROCESSOR=powerpc64 ;; ++ esac ++ fi ++ fi ++ elif test "$UNAME_PROCESSOR" = i386 ; then ++ # Avoid executing cc on OS X 10.9, as it ships with a stub ++ # that puts up a graphical alert prompting to install ++ # developer tools. Any system running Mac OS X 10.7 or ++ # later (Darwin 11 and later) is required to have a 64-bit ++ # processor. This is not true of the ARM version of Darwin ++ # that Apple uses in portable devices. ++ UNAME_PROCESSOR=x86_64 ++ fi + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) +diff -urN glibc-2.17-c758a686/scripts/config.sub glibc-2.17-c758a686/scripts/config.sub +--- glibc-2.17-c758a686/scripts/config.sub 2014-05-26 15:59:45.000000000 -0500 ++++ glibc-2.17-c758a686/scripts/config.sub 2014-05-26 16:00:52.000000000 -0500 +@@ -1,24 +1,18 @@ + #! /bin/sh + # Configuration validation subroutine script. +-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +-# 2011, 2012 Free Software Foundation, Inc. +- +-timestamp='2012-08-18' +- +-# This file is (in principle) common to ALL GNU software. +-# The presence of a machine in this file suggests that SOME GNU software +-# can handle that machine. It does not imply ALL GNU software can. +-# +-# This file is free software; you can redistribute it and/or modify +-# it under the terms of the GNU General Public License as published by +-# the Free Software Foundation; either version 2 of the License, or ++# Copyright 1992-2013 Free Software Foundation, Inc. ++ ++timestamp='2013-10-01' ++ ++# This file is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or + # (at your option) any later version. + # +-# This program is distributed in the hope that it will be useful, +-# but WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-# GNU General Public License for more details. ++# This program is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. + # + # You should have received a copy of the GNU General Public License + # along with this program; if not, see . +@@ -26,11 +20,12 @@ + # As a special exception to the GNU General Public License, if you + # distribute this file as part of a program that contains a + # configuration script generated by Autoconf, you may include it under +-# the same distribution terms that you use for the rest of that program. ++# the same distribution terms that you use for the rest of that ++# program. This Exception is an additional permission under section 7 ++# of the GNU General Public License, version 3 ("GPLv3"). + + +-# Please send patches to . Submit a context +-# diff and a properly formatted GNU ChangeLog entry. ++# Please send patches with a ChangeLog entry to config-patches@gnu.org. + # + # Configuration subroutine to validate and canonicalize a configuration type. + # Supply the specified configuration type as an argument. +@@ -73,9 +68,7 @@ + version="\ + GNU config.sub ($timestamp) + +-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 +-Free Software Foundation, Inc. ++Copyright 1992-2013 Free Software Foundation, Inc. + + This is free software; see the source for copying conditions. There is NO + warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." +@@ -156,7 +149,7 @@ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ +- -apple | -axis | -knuth | -cray | -microblaze) ++ -apple | -axis | -knuth | -cray | -microblaze*) + os= + basic_machine=$1 + ;; +@@ -259,10 +252,12 @@ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ +- | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ +- | be32 | be64 \ ++ | arc | arceb \ ++ | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ ++ | avr | avr32 \ ++ | be32 | be64 \ + | bfin \ +- | c4x | clipper \ ++ | c4x | c8051 | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | epiphany \ + | fido | fr30 | frv \ +@@ -270,10 +265,11 @@ + | hexagon \ + | i370 | i860 | i960 | ia64 \ + | ip2k | iq2000 \ ++ | k1om \ + | le32 | le64 \ + | lm32 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ +- | maxq | mb | microblaze | mcore | mep | metag \ ++ | maxq | mb | microblaze | microblazeel | mcore | mep | metag \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ +@@ -291,16 +287,17 @@ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ ++ | mipsr5900 | mipsr5900el \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | moxie \ + | mt \ + | msp430 \ + | nds32 | nds32le | nds32be \ +- | nios | nios2 \ ++ | nios | nios2 | nios2eb | nios2el \ + | ns16k | ns32k \ + | open8 \ +- | or32 \ ++ | or1k | or32 \ + | pdp10 | pdp11 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle \ + | pyramid \ +@@ -328,7 +325,7 @@ + c6x) + basic_machine=tic6x-unknown + ;; +- m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip) ++ m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip) + basic_machine=$basic_machine-unknown + os=-none + ;; +@@ -370,13 +367,13 @@ + | aarch64-* | aarch64_be-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ +- | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ ++ | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* | avr32-* \ + | be32-* | be64-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* \ +- | clipper-* | craynv-* | cydra-* \ ++ | c8051-* | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | elxsi-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ +@@ -385,11 +382,13 @@ + | hexagon-* \ + | i*86-* | i860-* | i960-* | ia64-* \ + | ip2k-* | iq2000-* \ ++ | k1om-* \ + | le32-* | le64-* \ + | lm32-* \ + | m32c-* | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ +- | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \ ++ | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \ ++ | microblaze-* | microblazeel-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ +@@ -407,12 +406,13 @@ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ ++ | mipsr5900-* | mipsr5900el-* \ + | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ + | msp430-* \ + | nds32-* | nds32le-* | nds32be-* \ +- | nios-* | nios2-* \ ++ | nios-* | nios2-* | nios2eb-* | nios2el-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | open8-* \ + | orion-* \ +@@ -788,7 +788,7 @@ + basic_machine=ns32k-utek + os=-sysv + ;; +- microblaze) ++ microblaze*) + basic_machine=microblaze-xilinx + ;; + mingw64) +@@ -796,7 +796,7 @@ + os=-mingw64 + ;; + mingw32) +- basic_machine=i386-pc ++ basic_machine=i686-pc + os=-mingw32 + ;; + mingw32ce) +@@ -832,7 +832,7 @@ + basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + ;; + msys) +- basic_machine=i386-pc ++ basic_machine=i686-pc + os=-msys + ;; + mvs) +@@ -1023,7 +1023,11 @@ + basic_machine=i586-unknown + os=-pw32 + ;; +- rdos) ++ rdos | rdos64) ++ basic_machine=x86_64-pc ++ os=-rdos ++ ;; ++ rdos32) + basic_machine=i386-pc + os=-rdos + ;; +@@ -1350,7 +1354,7 @@ + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ +- | -sym* | -kopensolaris* \ ++ | -sym* | -kopensolaris* | -plan9* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* | -aros* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ +@@ -1496,9 +1500,6 @@ + -aros*) + os=-aros + ;; +- -kaos*) +- os=-kaos +- ;; + -zvmoe) + os=-zvmoe + ;; +@@ -1547,6 +1548,9 @@ + c4x-* | tic4x-*) + os=-coff + ;; ++ c8051-*) ++ os=-elf ++ ;; + hexagon-*) + os=-elf + ;; +@@ -1590,6 +1594,9 @@ + mips*-*) + os=-elf + ;; ++ or1k-*) ++ os=-elf ++ ;; + or32-*) + os=-coff + ;; +diff -urN glibc-2.17-c758a686/scripts/install-sh glibc-2.17-c758a686/scripts/install-sh +--- glibc-2.17-c758a686/scripts/install-sh 2014-05-26 15:59:45.000000000 -0500 ++++ glibc-2.17-c758a686/scripts/install-sh 2014-05-26 16:00:34.000000000 -0500 +@@ -1,250 +1,527 @@ +-#! /bin/sh +-# ++#!/bin/sh + # install - install a program, script, or datafile +-# This comes from X11R5 (mit/util/scripts/install.sh). ++ ++scriptversion=2011-11-20.07; # UTC ++ ++# This originates from X11R5 (mit/util/scripts/install.sh), which was ++# later released in X11R6 (xc/config/util/install.sh) with the ++# following copyright and license. ++# ++# Copyright (C) 1994 X Consortium ++# ++# Permission is hereby granted, free of charge, to any person obtaining a copy ++# of this software and associated documentation files (the "Software"), to ++# deal in the Software without restriction, including without limitation the ++# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++# sell copies of the Software, and to permit persons to whom the Software is ++# furnished to do so, subject to the following conditions: ++# ++# The above copyright notice and this permission notice shall be included in ++# all copies or substantial portions of the Software. ++# ++# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN ++# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- ++# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++# ++# Except as contained in this notice, the name of the X Consortium shall not ++# be used in advertising or otherwise to promote the sale, use or other deal- ++# ings in this Software without prior written authorization from the X Consor- ++# tium. + # +-# Copyright 1991 by the Massachusetts Institute of Technology + # +-# Permission to use, copy, modify, distribute, and sell this software and its +-# documentation for any purpose is hereby granted without fee, provided that +-# the above copyright notice appear in all copies and that both that +-# copyright notice and this permission notice appear in supporting +-# documentation, and that the name of M.I.T. not be used in advertising or +-# publicity pertaining to distribution of the software without specific, +-# written prior permission. M.I.T. makes no representations about the +-# suitability of this software for any purpose. It is provided "as is" +-# without express or implied warranty. ++# FSF changes to this file are in the public domain. + # + # Calling this script install-sh is preferred over install.sh, to prevent +-# `make' implicit rules from creating a file called install from it ++# 'make' implicit rules from creating a file called install from it + # when there is no Makefile. + # + # This script is compatible with the BSD install script, but was written +-# from scratch. It can only install one file at a time, a restriction +-# shared with many OS's install programs. ++# from scratch. + ++nl=' ++' ++IFS=" "" $nl" + + # set DOITPROG to echo to test this script + + # Don't use :- since 4.3BSD and earlier shells don't like it. +-doit="${DOITPROG-}" +- +- +-# put in absolute paths if you don't have them in your path; or use env. vars. +- +-mvprog="${MVPROG-mv}" +-cpprog="${CPPROG-cp}" +-chmodprog="${CHMODPROG-chmod}" +-chownprog="${CHOWNPROG-chown}" +-chgrpprog="${CHGRPPROG-chgrp}" +-stripprog="${STRIPPROG-strip}" +-rmprog="${RMPROG-rm}" +-mkdirprog="${MKDIRPROG-mkdir}" +- +-transformbasename="" +-transform_arg="" +-instcmd="$mvprog" +-chmodcmd="$chmodprog 0755" +-chowncmd="" +-chgrpcmd="" +-stripcmd="" +-rmcmd="$rmprog -f" +-mvcmd="$mvprog" +-src="" +-dst="" +-dir_arg="" +- +-while [ x"$1" != x ]; do +- case $1 in +- -c) instcmd="$cpprog" +- shift +- continue;; +- +- -d) dir_arg=true +- shift +- continue;; +- +- -m) chmodcmd="$chmodprog $2" +- shift +- shift +- continue;; +- +- -o) chowncmd="$chownprog $2" +- shift +- shift +- continue;; +- +- -g) chgrpcmd="$chgrpprog $2" +- shift +- shift +- continue;; +- +- -s) stripcmd="$stripprog" +- shift +- continue;; +- +- -t=*) transformarg=`echo $1 | sed 's/-t=//'` +- shift +- continue;; +- +- -b=*) transformbasename=`echo $1 | sed 's/-b=//'` +- shift +- continue;; +- +- *) if [ x"$src" = x ] +- then +- src=$1 +- else +- # this colon is to work around a 386BSD /bin/sh bug +- : +- dst=$1 +- fi +- shift +- continue;; +- esac +-done +- +-if [ x"$src" = x ] +-then +- echo "install: no input file specified" +- exit 1 ++doit=${DOITPROG-} ++if test -z "$doit"; then ++ doit_exec=exec + else +- true ++ doit_exec=$doit + fi + +-if [ x"$dir_arg" != x ]; then +- dst=$src +- src="" +- +- if [ -d $dst ]; then +- instcmd=: +- else +- instcmd=mkdir +- fi +-else ++# Put in absolute file names if you don't have them in your path; ++# or use environment vars. + +-# Waiting for this to be detected by the "$instcmd $src $dsttmp" command +-# might cause directories to be created, which would be especially bad +-# if $src (and thus $dsttmp) contains '*'. +- +- if [ -f $src -o -d $src ] +- then +- true +- else +- echo "install: $src does not exist" +- exit 1 +- fi +- +- if [ x"$dst" = x ] +- then +- echo "install: no destination specified" +- exit 1 +- else +- true +- fi +- +-# If destination is a directory, append the input filename; if your system +-# does not like double slashes in filenames, you may need to add some logic +- +- if [ -d $dst ] +- then +- dst="$dst"/`basename $src` +- else +- true +- fi +-fi +- +-## this sed command emulates the dirname command +-dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` ++chgrpprog=${CHGRPPROG-chgrp} ++chmodprog=${CHMODPROG-chmod} ++chownprog=${CHOWNPROG-chown} ++cmpprog=${CMPPROG-cmp} ++cpprog=${CPPROG-cp} ++mkdirprog=${MKDIRPROG-mkdir} ++mvprog=${MVPROG-mv} ++rmprog=${RMPROG-rm} ++stripprog=${STRIPPROG-strip} ++ ++posix_glob='?' ++initialize_posix_glob=' ++ test "$posix_glob" != "?" || { ++ if (set -f) 2>/dev/null; then ++ posix_glob= ++ else ++ posix_glob=: ++ fi ++ } ++' + +-# Make sure that the destination directory exists. +-# this part is taken from Noah Friedman's mkinstalldirs script ++posix_mkdir= + +-# Skip lots of stat calls in the usual case. +-if [ ! -d "$dstdir" ]; then +-defaultIFS=' +-' +-IFS="${IFS-${defaultIFS}}" ++# Desired mode of installed file. ++mode=0755 + +-oIFS="${IFS}" +-# Some sh's can't handle IFS=/ for some reason. +-IFS='%' +-set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` +-IFS="${oIFS}" +- +-pathcomp='' +- +-while [ $# -ne 0 ] ; do +- pathcomp="${pathcomp}${1}" +- shift +- +- if [ ! -d "${pathcomp}" ] ; +- then +- $mkdirprog "${pathcomp}" +- else +- true +- fi ++chgrpcmd= ++chmodcmd=$chmodprog ++chowncmd= ++mvcmd=$mvprog ++rmcmd="$rmprog -f" ++stripcmd= + +- pathcomp="${pathcomp}/" ++src= ++dst= ++dir_arg= ++dst_arg= ++ ++copy_on_change=false ++no_target_directory= ++ ++usage="\ ++Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE ++ or: $0 [OPTION]... SRCFILES... DIRECTORY ++ or: $0 [OPTION]... -t DIRECTORY SRCFILES... ++ or: $0 [OPTION]... -d DIRECTORIES... ++ ++In the 1st form, copy SRCFILE to DSTFILE. ++In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. ++In the 4th, create DIRECTORIES. ++ ++Options: ++ --help display this help and exit. ++ --version display version info and exit. ++ ++ -c (ignored) ++ -C install only if different (preserve the last data modification time) ++ -d create directories instead of installing files. ++ -g GROUP $chgrpprog installed files to GROUP. ++ -m MODE $chmodprog installed files to MODE. ++ -o USER $chownprog installed files to USER. ++ -s $stripprog installed files. ++ -t DIRECTORY install into DIRECTORY. ++ -T report an error if DSTFILE is a directory. ++ ++Environment variables override the default commands: ++ CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG ++ RMPROG STRIPPROG ++" ++ ++while test $# -ne 0; do ++ case $1 in ++ -c) ;; ++ ++ -C) copy_on_change=true;; ++ ++ -d) dir_arg=true;; ++ ++ -g) chgrpcmd="$chgrpprog $2" ++ shift;; ++ ++ --help) echo "$usage"; exit $?;; ++ ++ -m) mode=$2 ++ case $mode in ++ *' '* | *' '* | *' ++'* | *'*'* | *'?'* | *'['*) ++ echo "$0: invalid mode: $mode" >&2 ++ exit 1;; ++ esac ++ shift;; ++ ++ -o) chowncmd="$chownprog $2" ++ shift;; ++ ++ -s) stripcmd=$stripprog;; ++ ++ -t) dst_arg=$2 ++ # Protect names problematic for 'test' and other utilities. ++ case $dst_arg in ++ -* | [=\(\)!]) dst_arg=./$dst_arg;; ++ esac ++ shift;; ++ ++ -T) no_target_directory=true;; ++ ++ --version) echo "$0 $scriptversion"; exit $?;; ++ ++ --) shift ++ break;; ++ ++ -*) echo "$0: invalid option: $1" >&2 ++ exit 1;; ++ ++ *) break;; ++ esac ++ shift + done ++ ++if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then ++ # When -d is used, all remaining arguments are directories to create. ++ # When -t is used, the destination is already specified. ++ # Otherwise, the last argument is the destination. Remove it from $@. ++ for arg ++ do ++ if test -n "$dst_arg"; then ++ # $@ is not empty: it contains at least $arg. ++ set fnord "$@" "$dst_arg" ++ shift # fnord ++ fi ++ shift # arg ++ dst_arg=$arg ++ # Protect names problematic for 'test' and other utilities. ++ case $dst_arg in ++ -* | [=\(\)!]) dst_arg=./$dst_arg;; ++ esac ++ done + fi + +-if [ x"$dir_arg" != x ] +-then +- $doit $instcmd $dst && +- +- if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && +- if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && +- if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && +- if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi +-else ++if test $# -eq 0; then ++ if test -z "$dir_arg"; then ++ echo "$0: no input file specified." >&2 ++ exit 1 ++ fi ++ # It's OK to call 'install-sh -d' without argument. ++ # This can happen when creating conditional directories. ++ exit 0 ++fi + +-# If we're going to rename the final executable, determine the name now. ++if test -z "$dir_arg"; then ++ do_exit='(exit $ret); exit $ret' ++ trap "ret=129; $do_exit" 1 ++ trap "ret=130; $do_exit" 2 ++ trap "ret=141; $do_exit" 13 ++ trap "ret=143; $do_exit" 15 ++ ++ # Set umask so as not to create temps with too-generous modes. ++ # However, 'strip' requires both read and write access to temps. ++ case $mode in ++ # Optimize common cases. ++ *644) cp_umask=133;; ++ *755) cp_umask=22;; ++ ++ *[0-7]) ++ if test -z "$stripcmd"; then ++ u_plus_rw= ++ else ++ u_plus_rw='% 200' ++ fi ++ cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; ++ *) ++ if test -z "$stripcmd"; then ++ u_plus_rw= ++ else ++ u_plus_rw=,u+rw ++ fi ++ cp_umask=$mode$u_plus_rw;; ++ esac ++fi + +- if [ x"$transformarg" = x ] +- then +- dstfile=`basename $dst` ++for src ++do ++ # Protect names problematic for 'test' and other utilities. ++ case $src in ++ -* | [=\(\)!]) src=./$src;; ++ esac ++ ++ if test -n "$dir_arg"; then ++ dst=$src ++ dstdir=$dst ++ test -d "$dstdir" ++ dstdir_status=$? ++ else ++ ++ # Waiting for this to be detected by the "$cpprog $src $dsttmp" command ++ # might cause directories to be created, which would be especially bad ++ # if $src (and thus $dsttmp) contains '*'. ++ if test ! -f "$src" && test ! -d "$src"; then ++ echo "$0: $src does not exist." >&2 ++ exit 1 ++ fi ++ ++ if test -z "$dst_arg"; then ++ echo "$0: no destination specified." >&2 ++ exit 1 ++ fi ++ dst=$dst_arg ++ ++ # If destination is a directory, append the input filename; won't work ++ # if double slashes aren't ignored. ++ if test -d "$dst"; then ++ if test -n "$no_target_directory"; then ++ echo "$0: $dst_arg: Is a directory" >&2 ++ exit 1 ++ fi ++ dstdir=$dst ++ dst=$dstdir/`basename "$src"` ++ dstdir_status=0 ++ else ++ # Prefer dirname, but fall back on a substitute if dirname fails. ++ dstdir=` ++ (dirname "$dst") 2>/dev/null || ++ expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ ++ X"$dst" : 'X\(//\)[^/]' \| \ ++ X"$dst" : 'X\(//\)$' \| \ ++ X"$dst" : 'X\(/\)' \| . 2>/dev/null || ++ echo X"$dst" | ++ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ ++ s//\1/ ++ q ++ } ++ /^X\(\/\/\)[^/].*/{ ++ s//\1/ ++ q ++ } ++ /^X\(\/\/\)$/{ ++ s//\1/ ++ q ++ } ++ /^X\(\/\).*/{ ++ s//\1/ ++ q ++ } ++ s/.*/./; q' ++ ` ++ ++ test -d "$dstdir" ++ dstdir_status=$? ++ fi ++ fi ++ ++ obsolete_mkdir_used=false ++ ++ if test $dstdir_status != 0; then ++ case $posix_mkdir in ++ '') ++ # Create intermediate dirs using mode 755 as modified by the umask. ++ # This is like FreeBSD 'install' as of 1997-10-28. ++ umask=`umask` ++ case $stripcmd.$umask in ++ # Optimize common cases. ++ *[2367][2367]) mkdir_umask=$umask;; ++ .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; ++ ++ *[0-7]) ++ mkdir_umask=`expr $umask + 22 \ ++ - $umask % 100 % 40 + $umask % 20 \ ++ - $umask % 10 % 4 + $umask % 2 ++ `;; ++ *) mkdir_umask=$umask,go-w;; ++ esac ++ ++ # With -d, create the new directory with the user-specified mode. ++ # Otherwise, rely on $mkdir_umask. ++ if test -n "$dir_arg"; then ++ mkdir_mode=-m$mode + else +- dstfile=`basename $dst $transformbasename | +- sed $transformarg`$transformbasename ++ mkdir_mode= + fi + +-# don't allow the sed command to completely eliminate the filename ++ posix_mkdir=false ++ case $umask in ++ *[123567][0-7][0-7]) ++ # POSIX mkdir -p sets u+wx bits regardless of umask, which ++ # is incompatible with FreeBSD 'install' when (umask & 300) != 0. ++ ;; ++ *) ++ tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ ++ trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 ++ ++ if (umask $mkdir_umask && ++ exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 ++ then ++ if test -z "$dir_arg" || { ++ # Check for POSIX incompatibilities with -m. ++ # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or ++ # other-writable bit of parent directory when it shouldn't. ++ # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. ++ ls_ld_tmpdir=`ls -ld "$tmpdir"` ++ case $ls_ld_tmpdir in ++ d????-?r-*) different_mode=700;; ++ d????-?--*) different_mode=755;; ++ *) false;; ++ esac && ++ $mkdirprog -m$different_mode -p -- "$tmpdir" && { ++ ls_ld_tmpdir_1=`ls -ld "$tmpdir"` ++ test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" ++ } ++ } ++ then posix_mkdir=: ++ fi ++ rmdir "$tmpdir/d" "$tmpdir" ++ else ++ # Remove any dirs left behind by ancient mkdir implementations. ++ rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null ++ fi ++ trap '' 0;; ++ esac;; ++ esac + +- if [ x"$dstfile" = x ] +- then +- dstfile=`basename $dst` ++ if ++ $posix_mkdir && ( ++ umask $mkdir_umask && ++ $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" ++ ) ++ then : ++ else ++ ++ # The umask is ridiculous, or mkdir does not conform to POSIX, ++ # or it failed possibly due to a race condition. Create the ++ # directory the slow way, step by step, checking for races as we go. ++ ++ case $dstdir in ++ /*) prefix='/';; ++ [-=\(\)!]*) prefix='./';; ++ *) prefix='';; ++ esac ++ ++ eval "$initialize_posix_glob" ++ ++ oIFS=$IFS ++ IFS=/ ++ $posix_glob set -f ++ set fnord $dstdir ++ shift ++ $posix_glob set +f ++ IFS=$oIFS ++ ++ prefixes= ++ ++ for d ++ do ++ test X"$d" = X && continue ++ ++ prefix=$prefix$d ++ if test -d "$prefix"; then ++ prefixes= + else +- true ++ if $posix_mkdir; then ++ (umask=$mkdir_umask && ++ $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break ++ # Don't fail if two instances are running concurrently. ++ test -d "$prefix" || exit 1 ++ else ++ case $prefix in ++ *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; ++ *) qprefix=$prefix;; ++ esac ++ prefixes="$prefixes '$qprefix'" ++ fi + fi ++ prefix=$prefix/ ++ done + +-# Make a temp file name in the proper directory. +- +- dsttmp=$dstdir/#inst.$$# +- +-# Move or copy the file name to the temp name +- +- $doit $instcmd $src $dsttmp && +- +- trap "rm -f ${dsttmp}" 0 && +- +-# and set any options; do chmod last to preserve setuid bits +- +-# If any of these fail, we abort the whole thing. If we want to +-# ignore errors from any of these, just make sure not to ignore +-# errors from the above "$doit $instcmd $src $dsttmp" command. +- +- if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && +- if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && +- if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && +- if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && +- +-# Now rename the file to the real destination. +- +- $doit $rmcmd -f $dstdir/$dstfile && +- $doit $mvcmd $dsttmp $dstdir/$dstfile +- +-fi && ++ if test -n "$prefixes"; then ++ # Don't fail if two instances are running concurrently. ++ (umask $mkdir_umask && ++ eval "\$doit_exec \$mkdirprog $prefixes") || ++ test -d "$dstdir" || exit 1 ++ obsolete_mkdir_used=true ++ fi ++ fi ++ fi ++ ++ if test -n "$dir_arg"; then ++ { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && ++ { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && ++ { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || ++ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 ++ else ++ ++ # Make a couple of temp file names in the proper directory. ++ dsttmp=$dstdir/_inst.$$_ ++ rmtmp=$dstdir/_rm.$$_ ++ ++ # Trap to clean up those temp files at exit. ++ trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 ++ ++ # Copy the file name to the temp name. ++ (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && ++ ++ # and set any options; do chmod last to preserve setuid bits. ++ # ++ # If any of these fail, we abort the whole thing. If we want to ++ # ignore errors from any of these, just make sure not to ignore ++ # errors from the above "$doit $cpprog $src $dsttmp" command. ++ # ++ { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && ++ { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && ++ { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && ++ { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && ++ ++ # If -C, don't bother to copy if it wouldn't change the file. ++ if $copy_on_change && ++ old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && ++ new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && ++ ++ eval "$initialize_posix_glob" && ++ $posix_glob set -f && ++ set X $old && old=:$2:$4:$5:$6 && ++ set X $new && new=:$2:$4:$5:$6 && ++ $posix_glob set +f && ++ ++ test "$old" = "$new" && ++ $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 ++ then ++ rm -f "$dsttmp" ++ else ++ # Rename the file to the real destination. ++ $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || ++ ++ # The rename failed, perhaps because mv can't rename something else ++ # to itself, or perhaps because mv is so ancient that it does not ++ # support -f. ++ { ++ # Now remove or move aside any old file at destination location. ++ # We try this two ways since rm can't unlink itself on some ++ # systems and the destination file might be busy for other ++ # reasons. In this case, the final cleanup might fail but the new ++ # file should still install successfully. ++ { ++ test ! -f "$dst" || ++ $doit $rmcmd -f "$dst" 2>/dev/null || ++ { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && ++ { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } ++ } || ++ { echo "$0: cannot unlink or rename $dst" >&2 ++ (exit 1); exit 1 ++ } ++ } && ++ ++ # Now rename the file to the real destination. ++ $doit $mvcmd "$dsttmp" "$dst" ++ } ++ fi || exit 1 + ++ trap '' 0 ++ fi ++done + +-exit 0 ++# Local variables: ++# eval: (add-hook 'write-file-hooks 'time-stamp) ++# time-stamp-start: "scriptversion=" ++# time-stamp-format: "%:y-%02m-%02d.%02H" ++# time-stamp-time-zone: "UTC" ++# time-stamp-end: "; # UTC" ++# End: +diff -urN glibc-2.17-c758a686/scripts/mkinstalldirs glibc-2.17-c758a686/scripts/mkinstalldirs +--- glibc-2.17-c758a686/scripts/mkinstalldirs 2014-05-26 15:59:45.000000000 -0500 ++++ glibc-2.17-c758a686/scripts/mkinstalldirs 2014-05-26 16:00:34.000000000 -0500 +@@ -1,38 +1,162 @@ + #! /bin/sh + # mkinstalldirs --- make directory hierarchy +-# Author: Noah Friedman +-# Created: 1993-05-16 +-# Public domain + ++scriptversion=2009-04-28.21; # UTC ++ ++# Original author: Noah Friedman ++# Created: 1993-05-16 ++# Public domain. ++# ++# This file is maintained in Automake, please report ++# bugs to or send patches to ++# . ++ ++nl=' ++' ++IFS=" "" $nl" + errstatus=0 ++dirmode= ++ ++usage="\ ++Usage: mkinstalldirs [-h] [--help] [--version] [-m MODE] DIR ... ++ ++Create each directory DIR (with mode MODE, if specified), including all ++leading file name components. ++ ++Report bugs to ." ++ ++# process command line arguments ++while test $# -gt 0 ; do ++ case $1 in ++ -h | --help | --h*) # -h for help ++ echo "$usage" ++ exit $? ++ ;; ++ -m) # -m PERM arg ++ shift ++ test $# -eq 0 && { echo "$usage" 1>&2; exit 1; } ++ dirmode=$1 ++ shift ++ ;; ++ --version) ++ echo "$0 $scriptversion" ++ exit $? ++ ;; ++ --) # stop option processing ++ shift ++ break ++ ;; ++ -*) # unknown option ++ echo "$usage" 1>&2 ++ exit 1 ++ ;; ++ *) # first non-opt arg ++ break ++ ;; ++ esac ++done + + for file + do +- set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'` +- shift ++ if test -d "$file"; then ++ shift ++ else ++ break ++ fi ++done ++ ++case $# in ++ 0) exit 0 ;; ++esac ++ ++# Solaris 8's mkdir -p isn't thread-safe. If you mkdir -p a/b and ++# mkdir -p a/c at the same time, both will detect that a is missing, ++# one will create a, then the other will try to create a and die with ++# a "File exists" error. This is a problem when calling mkinstalldirs ++# from a parallel make. We use --version in the probe to restrict ++# ourselves to GNU mkdir, which is thread-safe. ++case $dirmode in ++ '') ++ if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then ++ echo "mkdir -p -- $*" ++ exec mkdir -p -- "$@" ++ else ++ # On NextStep and OpenStep, the 'mkdir' command does not ++ # recognize any option. It will interpret all options as ++ # directories to create, and then abort because '.' already ++ # exists. ++ test -d ./-p && rmdir ./-p ++ test -d ./--version && rmdir ./--version ++ fi ++ ;; ++ *) ++ if mkdir -m "$dirmode" -p --version . >/dev/null 2>&1 && ++ test ! -d ./--version; then ++ echo "mkdir -m $dirmode -p -- $*" ++ exec mkdir -m "$dirmode" -p -- "$@" ++ else ++ # Clean up after NextStep and OpenStep mkdir. ++ for d in ./-m ./-p ./--version "./$dirmode"; ++ do ++ test -d $d && rmdir $d ++ done ++ fi ++ ;; ++esac + +- pathcomp= +- for d +- do +- pathcomp="$pathcomp$d" +- case "$pathcomp" in +- -* ) pathcomp=./$pathcomp ;; +- esac +- +- if test ! -d "$pathcomp"; then +- echo "mkdir $pathcomp" 1>&2 +- +- mkdir "$pathcomp" || lasterr=$? +- +- if test ! -d "$pathcomp"; then +- errstatus=$lasterr +- fi +- fi ++for file ++do ++ case $file in ++ /*) pathcomp=/ ;; ++ *) pathcomp= ;; ++ esac ++ oIFS=$IFS ++ IFS=/ ++ set fnord $file ++ shift ++ IFS=$oIFS ++ ++ for d ++ do ++ test "x$d" = x && continue ++ ++ pathcomp=$pathcomp$d ++ case $pathcomp in ++ -*) pathcomp=./$pathcomp ;; ++ esac ++ ++ if test ! -d "$pathcomp"; then ++ echo "mkdir $pathcomp" ++ ++ mkdir "$pathcomp" || lasterr=$? ++ ++ if test ! -d "$pathcomp"; then ++ errstatus=$lasterr ++ else ++ if test ! -z "$dirmode"; then ++ echo "chmod $dirmode $pathcomp" ++ lasterr= ++ chmod "$dirmode" "$pathcomp" || lasterr=$? ++ ++ if test ! -z "$lasterr"; then ++ errstatus=$lasterr ++ fi ++ fi ++ fi ++ fi + +- pathcomp="$pathcomp/" +- done ++ pathcomp=$pathcomp/ ++ done + done + + exit $errstatus + +-# mkinstalldirs ends here ++# Local Variables: ++# mode: shell-script ++# sh-indentation: 2 ++# eval: (add-hook 'write-file-hooks 'time-stamp) ++# time-stamp-start: "scriptversion=" ++# time-stamp-format: "%:y-%02m-%02d.%02H" ++# time-stamp-time-zone: "UTC" ++# time-stamp-end: "; # UTC" ++# End: +diff -urN glibc-2.17-c758a686/scripts/move-if-change glibc-2.17-c758a686/scripts/move-if-change +--- glibc-2.17-c758a686/scripts/move-if-change 2014-05-26 15:59:45.000000000 -0500 ++++ glibc-2.17-c758a686/scripts/move-if-change 2014-05-26 16:00:34.000000000 -0500 +@@ -1,17 +1,83 @@ + #!/bin/sh + # Like mv $1 $2, but if the files are the same, just delete $1. +-# Status is 0 if $2 is changed, 1 otherwise. +-if +-test -r $2 +-then +-if +-cmp -s $1 $2 +-then +-echo $2 is unchanged +-rm -f $1 ++# Status is zero if successful, nonzero otherwise. ++ ++VERSION='2012-01-06 07:23'; # UTC ++# The definition above must lie within the first 8 lines in order ++# for the Emacs time-stamp write hook (at end) to update it. ++# If you change this file with Emacs, please let the write hook ++# do its job. Otherwise, update this string manually. ++ ++# Copyright (C) 2002-2013 Free Software Foundation, Inc. ++ ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation, either version 3 of the License, or ++# (at your option) any later version. ++ ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++ ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++usage="usage: $0 SOURCE DEST" ++ ++help="$usage ++ or: $0 OPTION ++If SOURCE is different than DEST, then move it to DEST; else remove SOURCE. ++ ++ --help display this help and exit ++ --version output version information and exit ++ ++The variable CMPPROG can be used to specify an alternative to 'cmp'. ++ ++Report bugs to ." ++ ++version=`expr "$VERSION" : '\([^ ]*\)'` ++version="move-if-change (gnulib) $version ++Copyright (C) 2011 Free Software Foundation, Inc. ++License GPLv3+: GNU GPL version 3 or later ++This is free software: you are free to change and redistribute it. ++There is NO WARRANTY, to the extent permitted by law." ++ ++cmpprog=${CMPPROG-cmp} ++ ++for arg ++do ++ case $arg in ++ --help | --hel | --he | --h) ++ exec echo "$help" ;; ++ --version | --versio | --versi | --vers | --ver | --ve | --v) ++ exec echo "$version" ;; ++ --) ++ shift ++ break ;; ++ -*) ++ echo "$0: invalid option: $arg" >&2 ++ exit 1 ;; ++ *) ++ break ;; ++ esac ++done ++ ++test $# -eq 2 || { echo "$0: $usage" >&2; exit 1; } ++ ++if test -r "$2" && $cmpprog -- "$1" "$2" >/dev/null; then ++ rm -f -- "$1" + else +-mv -f $1 $2 +-fi +-else +-mv -f $1 $2 ++ if mv -f -- "$1" "$2"; then :; else ++ # Ignore failure due to a concurrent move-if-change. ++ test -r "$2" && $cmpprog -- "$1" "$2" >/dev/null && rm -f -- "$1" ++ fi + fi ++ ++## Local Variables: ++## eval: (add-hook 'write-file-hooks 'time-stamp) ++## time-stamp-start: "VERSION='" ++## time-stamp-format: "%:y-%02m-%02d %02H:%02M" ++## time-stamp-time-zone: "UTC" ++## time-stamp-end: "'; # UTC" ++## End: diff --git a/packages/glibc/2.17/0026-glibc-ppc64le-04.patch b/packages/glibc/2.17/0026-glibc-ppc64le-04.patch new file mode 100644 index 0000000..8f6840b --- /dev/null +++ b/packages/glibc/2.17/0026-glibc-ppc64le-04.patch @@ -0,0 +1,676 @@ +# commit 9605ca6c085a749f29b6866a3e00bce1ba1a2698 +# Author: Alan Modra +# Date: Sat Aug 17 18:12:56 2013 +0930 +# +# IBM long double mechanical changes to support little-endian +# http://sourceware.org/ml/libc-alpha/2013-07/msg00001.html +# +# This patch starts the process of supporting powerpc64 little-endian +# long double in glibc. IBM long double is an array of two ieee +# doubles, so making union ibm_extended_long_double reflect this fact is +# the correct way to access fields of the doubles. +# +# * sysdeps/ieee754/ldbl-128ibm/ieee754.h +# (union ibm_extended_long_double): Define as an array of ieee754_double. +# (IBM_EXTENDED_LONG_DOUBLE_BIAS): Delete. +# * sysdeps/ieee754/ldbl-128ibm/printf_fphex.c: Update all references +# to ibm_extended_long_double and IBM_EXTENDED_LONG_DOUBLE_BIAS. +# * sysdeps/ieee754/ldbl-128ibm/e_exp10l.c: Likewise. +# * sysdeps/ieee754/ldbl-128ibm/e_expl.c: Likewise. +# * sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c: Likewise. +# * sysdeps/ieee754/ldbl-128ibm/math_ldbl.h: Likewise. +# * sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c: Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c: Likewise. +# * sysdeps/ieee754/ldbl-128ibm/strtold_l.c: Likewise. +# * sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c: Likewise. +# +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c 2014-05-26 21:08:01.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c 2014-05-26 21:08:10.000000000 -0500 +@@ -36,9 +36,9 @@ + else if (arg > LDBL_MAX_10_EXP + 1) + return LDBL_MAX * LDBL_MAX; + +- u.d = arg; +- arg_high = u.dd[0]; +- arg_low = u.dd[1]; ++ u.ld = arg; ++ arg_high = u.d[0].d; ++ arg_low = u.d[1].d; + exp_high = arg_high * log10_high; + exp_low = arg_high * log10_low + arg_low * M_LN10l; + return __ieee754_expl (exp_high) * __ieee754_expl (exp_low); +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_expl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_expl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_expl.c 2014-05-26 21:08:01.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_expl.c 2014-05-26 21:08:10.000000000 -0500 +@@ -162,39 +162,39 @@ + x = x + xl; + + /* Compute ex2 = 2^n_0 e^(argtable[tval1]) e^(argtable[tval2]). */ +- ex2_u.d = __expl_table[T_EXPL_RES1 + tval1] +- * __expl_table[T_EXPL_RES2 + tval2]; ++ ex2_u.ld = (__expl_table[T_EXPL_RES1 + tval1] ++ * __expl_table[T_EXPL_RES2 + tval2]); + n_i = (int)n; + /* 'unsafe' is 1 iff n_1 != 0. */ + unsafe = fabsl(n_i) >= -LDBL_MIN_EXP - 1; +- ex2_u.ieee.exponent += n_i >> unsafe; ++ ex2_u.d[0].ieee.exponent += n_i >> unsafe; + /* Fortunately, there are no subnormal lowpart doubles in + __expl_table, only normal values and zeros. + But after scaling it can be subnormal. */ +- exponent2 = ex2_u.ieee.exponent2 + (n_i >> unsafe); +- if (ex2_u.ieee.exponent2 == 0) +- /* assert ((ex2_u.ieee.mantissa2|ex2_u.ieee.mantissa3) == 0) */; ++ exponent2 = ex2_u.d[1].ieee.exponent + (n_i >> unsafe); ++ if (ex2_u.d[1].ieee.exponent == 0) ++ /* assert ((ex2_u.d[1].ieee.mantissa0|ex2_u.d[1].ieee.mantissa1) == 0) */; + else if (exponent2 > 0) +- ex2_u.ieee.exponent2 = exponent2; ++ ex2_u.d[1].ieee.exponent = exponent2; + else if (exponent2 <= -54) + { +- ex2_u.ieee.exponent2 = 0; +- ex2_u.ieee.mantissa2 = 0; +- ex2_u.ieee.mantissa3 = 0; ++ ex2_u.d[1].ieee.exponent = 0; ++ ex2_u.d[1].ieee.mantissa0 = 0; ++ ex2_u.d[1].ieee.mantissa1 = 0; + } + else + { + static const double + two54 = 1.80143985094819840000e+16, /* 4350000000000000 */ + twom54 = 5.55111512312578270212e-17; /* 3C90000000000000 */ +- ex2_u.dd[1] *= two54; +- ex2_u.ieee.exponent2 += n_i >> unsafe; +- ex2_u.dd[1] *= twom54; ++ ex2_u.d[1].d *= two54; ++ ex2_u.d[1].ieee.exponent += n_i >> unsafe; ++ ex2_u.d[1].d *= twom54; + } + + /* Compute scale = 2^n_1. */ +- scale_u.d = 1.0L; +- scale_u.ieee.exponent += n_i - (n_i >> unsafe); ++ scale_u.ld = 1.0L; ++ scale_u.d[0].ieee.exponent += n_i - (n_i >> unsafe); + + /* Approximate e^x2 - 1, using a seventh-degree polynomial, + with maximum error in [-2^-16-2^-53,2^-16+2^-53] +@@ -204,7 +204,7 @@ + /* Return result. */ + fesetenv (&oldenv); + +- result = x22 * ex2_u.d + ex2_u.d; ++ result = x22 * ex2_u.ld + ex2_u.ld; + + /* Now we can test whether the result is ultimate or if we are unsure. + In the later case we should probably call a mpn based routine to give +@@ -238,7 +238,7 @@ + if (!unsafe) + return result; + else +- return result * scale_u.d; ++ return result * scale_u.ld; + } + /* Exceptional cases: */ + else if (isless (x, himark)) +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h 2014-05-26 21:08:01.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h 2014-05-26 21:08:10.000000000 -0500 +@@ -180,29 +180,9 @@ + + union ibm_extended_long_double + { +- long double d; +- double dd[2]; +- +- /* This is the IBM extended format long double. */ +- struct +- { /* Big endian. There is no other. */ +- +- unsigned int negative:1; +- unsigned int exponent:11; +- /* Together Mantissa0-3 comprise the mantissa. */ +- unsigned int mantissa0:20; +- unsigned int mantissa1:32; +- +- unsigned int negative2:1; +- unsigned int exponent2:11; +- /* There is an implied 1 here? */ +- /* Together these comprise the mantissa. */ +- unsigned int mantissa2:20; +- unsigned int mantissa3:32; +- } ieee; +- }; +- +-#define IBM_EXTENDED_LONG_DOUBLE_BIAS 0x3ff /* Added to exponent. */ ++ long double ld; ++ union ieee754_double d[2]; ++ }; + + __END_DECLS + +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c 2014-05-26 21:08:01.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c 2014-05-26 21:08:10.000000000 -0500 +@@ -36,22 +36,22 @@ + union ibm_extended_long_double u; + unsigned long long hi, lo; + int ediff; +- u.d = value; ++ u.ld = value; + +- *is_neg = u.ieee.negative; +- *expt = (int) u.ieee.exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS; ++ *is_neg = u.d[0].ieee.negative; ++ *expt = (int) u.d[0].ieee.exponent - IEEE754_DOUBLE_BIAS; + +- lo = ((long long) u.ieee.mantissa2 << 32) | u.ieee.mantissa3; +- hi = ((long long) u.ieee.mantissa0 << 32) | u.ieee.mantissa1; ++ lo = ((long long) u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1; ++ hi = ((long long) u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1; + /* If the lower double is not a denomal or zero then set the hidden + 53rd bit. */ +- if (u.ieee.exponent2 > 0) ++ if (u.d[1].ieee.exponent > 0) + { + lo |= 1LL << 52; + + /* The lower double is normalized separately from the upper. We may + need to adjust the lower manitissa to reflect this. */ +- ediff = u.ieee.exponent - u.ieee.exponent2; ++ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent; + if (ediff > 53) + lo = lo >> (ediff-53); + } +@@ -59,8 +59,8 @@ + difference between the long double and the rounded high double + value. This is indicated by a differnce between the signs of the + high and low doubles. */ +- if ((u.ieee.negative != u.ieee.negative2) +- && ((u.ieee.exponent2 != 0) && (lo != 0L))) ++ if ((u.d[0].ieee.negative != u.d[1].ieee.negative) ++ && ((u.d[1].ieee.exponent != 0) && (lo != 0L))) + { + lo = (1ULL << 53) - lo; + if (hi == 0LL) +@@ -92,7 +92,7 @@ + #define NUM_LEADING_ZEROS (BITS_PER_MP_LIMB \ + - (LDBL_MANT_DIG - ((N - 1) * BITS_PER_MP_LIMB))) + +- if (u.ieee.exponent == 0) ++ if (u.d[0].ieee.exponent == 0) + { + /* A biased exponent of zero is a special case. + Either it is a zero or it is a denormal number. */ +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-26 21:08:01.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-26 21:08:10.000000000 -0500 +@@ -14,28 +14,28 @@ + as bit 53 of the mantissa. */ + uint64_t hi, lo; + int ediff; +- union ibm_extended_long_double eldbl; +- eldbl.d = x; +- *exp = eldbl.ieee.exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS; ++ union ibm_extended_long_double u; ++ u.ld = x; ++ *exp = u.d[0].ieee.exponent - IEEE754_DOUBLE_BIAS; + +- lo = ((int64_t)eldbl.ieee.mantissa2 << 32) | eldbl.ieee.mantissa3; +- hi = ((int64_t)eldbl.ieee.mantissa0 << 32) | eldbl.ieee.mantissa1; ++ lo = ((uint64_t)u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1; ++ hi = ((uint64_t)u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1; + /* If the lower double is not a denomal or zero then set the hidden + 53rd bit. */ +- if (eldbl.ieee.exponent2 > 0x001) ++ if (u.d[1].ieee.exponent > 0x001) + { + lo |= (1ULL << 52); + lo = lo << 7; /* pre-shift lo to match ieee854. */ + /* The lower double is normalized separately from the upper. We + may need to adjust the lower manitissa to reflect this. */ +- ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2; ++ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent; + if (ediff > 53) + lo = lo >> (ediff-53); + hi |= (1ULL << 52); + } + +- if ((eldbl.ieee.negative != eldbl.ieee.negative2) +- && ((eldbl.ieee.exponent2 != 0) && (lo != 0LL))) ++ if ((u.d[0].ieee.negative != u.d[1].ieee.negative) ++ && ((u.d[1].ieee.exponent != 0) && (lo != 0LL))) + { + hi--; + lo = (1ULL << 60) - lo; +@@ -58,10 +58,10 @@ + unsigned long hidden2, lzcount; + unsigned long long hi, lo; + +- u.ieee.negative = sign; +- u.ieee.negative2 = sign; +- u.ieee.exponent = exp + IBM_EXTENDED_LONG_DOUBLE_BIAS; +- u.ieee.exponent2 = exp-53 + IBM_EXTENDED_LONG_DOUBLE_BIAS; ++ u.d[0].ieee.negative = sign; ++ u.d[1].ieee.negative = sign; ++ u.d[0].ieee.exponent = exp + IEEE754_DOUBLE_BIAS; ++ u.d[1].ieee.exponent = exp-53 + IEEE754_DOUBLE_BIAS; + /* Expect 113 bits (112 bits + hidden) right justified in two longs. + The low order 53 bits (52 + hidden) go into the lower double */ + lo = (lo64 >> 7)& ((1ULL << 53) - 1); +@@ -78,7 +78,7 @@ + if (hidden2) + { + hi++; +- u.ieee.negative2 = !sign; ++ u.d[1].ieee.negative = !sign; + lo = (1ULL << 53) - lo; + } + /* The hidden bit of the lo mantissa is zero so we need to +@@ -94,32 +94,32 @@ + lzcount = lzcount - 11; + if (lzcount > 0) + { +- int expnt2 = u.ieee.exponent2 - lzcount; ++ int expnt2 = u.d[1].ieee.exponent - lzcount; + if (expnt2 >= 1) + { + /* Not denormal. Normalize and set low exponent. */ + lo = lo << lzcount; +- u.ieee.exponent2 = expnt2; ++ u.d[1].ieee.exponent = expnt2; + } + else + { + /* Is denormal. */ + lo = lo << (lzcount + expnt2); +- u.ieee.exponent2 = 0; ++ u.d[1].ieee.exponent = 0; + } + } + } + else + { +- u.ieee.negative2 = 0; +- u.ieee.exponent2 = 0; ++ u.d[1].ieee.negative = 0; ++ u.d[1].ieee.exponent = 0; + } + +- u.ieee.mantissa3 = lo & ((1ULL << 32) - 1); +- u.ieee.mantissa2 = (lo >> 32) & ((1ULL << 20) - 1); +- u.ieee.mantissa1 = hi & ((1ULL << 32) - 1); +- u.ieee.mantissa0 = (hi >> 32) & ((1ULL << 20) - 1); +- return u.d; ++ u.d[1].ieee.mantissa1 = lo & ((1ULL << 32) - 1); ++ u.d[1].ieee.mantissa0 = (lo >> 32) & ((1ULL << 20) - 1); ++ u.d[0].ieee.mantissa1 = hi & ((1ULL << 32) - 1); ++ u.d[0].ieee.mantissa0 = (hi >> 32) & ((1ULL << 20) - 1); ++ return u.ld; + } + + /* Handy utility functions to pack/unpack/cononicalize and find the nearbyint +@@ -128,18 +128,18 @@ + default_ldbl_pack (double a, double aa) + { + union ibm_extended_long_double u; +- u.dd[0] = a; +- u.dd[1] = aa; +- return u.d; ++ u.d[0].d = a; ++ u.d[1].d = aa; ++ return u.ld; + } + + static inline void + default_ldbl_unpack (long double l, double *a, double *aa) + { + union ibm_extended_long_double u; +- u.d = l; +- *a = u.dd[0]; +- *aa = u.dd[1]; ++ u.ld = l; ++ *a = u.d[0].d; ++ *aa = u.d[1].d; + } + + #ifndef ldbl_pack +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c 2014-05-26 21:08:01.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c 2014-05-26 21:08:10.000000000 -0500 +@@ -34,11 +34,11 @@ + unsigned long long hi, lo; + int exponent2; + +- u.ieee.negative = sign; +- u.ieee.negative2 = sign; +- u.ieee.exponent = expt + IBM_EXTENDED_LONG_DOUBLE_BIAS; +- u.ieee.exponent2 = 0; +- exponent2 = expt - 53 + IBM_EXTENDED_LONG_DOUBLE_BIAS; ++ u.d[0].ieee.negative = sign; ++ u.d[1].ieee.negative = sign; ++ u.d[0].ieee.exponent = expt + IEEE754_DOUBLE_BIAS; ++ u.d[1].ieee.exponent = 0; ++ exponent2 = expt - 53 + IEEE754_DOUBLE_BIAS; + + #if BITS_PER_MP_LIMB == 32 + /* The low order 53 bits (52 + hidden) go into the lower double */ +@@ -74,15 +74,15 @@ + else + lzcount = lzcount + 42; + +- if (lzcount > u.ieee.exponent) ++ if (lzcount > u.d[0].ieee.exponent) + { +- lzcount = u.ieee.exponent; +- u.ieee.exponent = 0; ++ lzcount = u.d[0].ieee.exponent; ++ u.d[0].ieee.exponent = 0; + exponent2 -= lzcount; + } + else + { +- u.ieee.exponent -= (lzcount - 1); ++ u.d[0].ieee.exponent -= (lzcount - 1); + exponent2 -= (lzcount - 1); + } + +@@ -112,9 +112,9 @@ + { + if ((hi & (1LL << 53)) != 0) + hi -= 1LL << 52; +- u.ieee.exponent++; ++ u.d[0].ieee.exponent++; + } +- u.ieee.negative2 = !sign; ++ u.d[1].ieee.negative = !sign; + lo = (1LL << 53) - lo; + } + +@@ -135,17 +135,17 @@ + exponent2 = exponent2 - lzcount; + } + if (exponent2 > 0) +- u.ieee.exponent2 = exponent2; ++ u.d[1].ieee.exponent = exponent2; + else + lo >>= 1 - exponent2; + } + else +- u.ieee.negative2 = 0; ++ u.d[1].ieee.negative = 0; + +- u.ieee.mantissa3 = lo & 0xffffffffLL; +- u.ieee.mantissa2 = (lo >> 32) & 0xfffff; +- u.ieee.mantissa1 = hi & 0xffffffffLL; +- u.ieee.mantissa0 = (hi >> 32) & ((1LL << (LDBL_MANT_DIG - 86)) - 1); ++ u.d[1].ieee.mantissa1 = lo & 0xffffffffLL; ++ u.d[1].ieee.mantissa0 = (lo >> 32) & 0xfffff; ++ u.d[0].ieee.mantissa1 = hi & 0xffffffffLL; ++ u.d[0].ieee.mantissa0 = (hi >> 32) & ((1LL << (LDBL_MANT_DIG - 86)) - 1); + +- return u.d; ++ return u.ld; + } +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c 2014-05-26 21:08:01.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c 2014-05-26 21:08:10.000000000 -0500 +@@ -27,31 +27,31 @@ + unsigned long long int num0, num1; \ + unsigned long long hi, lo; \ + int ediff; \ +- union ibm_extended_long_double eldbl; \ +- eldbl.d = fpnum.ldbl.d; \ ++ union ibm_extended_long_double u; \ ++ u.ld = fpnum.ldbl.d; \ + \ + assert (sizeof (long double) == 16); \ + \ +- lo = ((long long)eldbl.ieee.mantissa2 << 32) | eldbl.ieee.mantissa3; \ +- hi = ((long long)eldbl.ieee.mantissa0 << 32) | eldbl.ieee.mantissa1; \ ++ lo = ((long long)u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1; \ ++ hi = ((long long)u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1; \ + lo <<= 7; /* pre-shift lo to match ieee854. */ \ + /* If the lower double is not a denomal or zero then set the hidden \ + 53rd bit. */ \ +- if (eldbl.ieee.exponent2 != 0) \ ++ if (u.d[1].ieee.exponent != 0) \ + lo |= (1ULL << (52 + 7)); \ + else \ + lo <<= 1; \ + /* The lower double is normalized separately from the upper. We \ + may need to adjust the lower manitissa to reflect this. */ \ +- ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2; \ ++ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent; \ + if (ediff > 53 + 63) \ + lo = 0; \ + else if (ediff > 53) \ + lo = lo >> (ediff - 53); \ +- else if (eldbl.ieee.exponent2 == 0 && ediff < 53) \ ++ else if (u.d[1].ieee.exponent == 0 && ediff < 53) \ + lo = lo << (53 - ediff); \ +- if (eldbl.ieee.negative != eldbl.ieee.negative2 \ +- && (eldbl.ieee.exponent2 != 0 || lo != 0L)) \ ++ if (u.d[0].ieee.negative != u.d[1].ieee.negative \ ++ && (u.d[1].ieee.exponent != 0 || lo != 0L)) \ + { \ + lo = (1ULL << 60) - lo; \ + if (hi == 0L) \ +@@ -59,7 +59,7 @@ + /* we have a borrow from the hidden bit, so shift left 1. */ \ + hi = 0xffffffffffffeLL | (lo >> 59); \ + lo = 0xfffffffffffffffLL & (lo << 1); \ +- eldbl.ieee.exponent--; \ ++ u.d[0].ieee.exponent--; \ + } \ + else \ + hi--; \ +@@ -110,9 +110,9 @@ + *--wnumstr = L'0'; \ + } \ + \ +- leading = eldbl.ieee.exponent == 0 ? '0' : '1'; \ ++ leading = u.d[0].ieee.exponent == 0 ? '0' : '1'; \ + \ +- exponent = eldbl.ieee.exponent; \ ++ exponent = u.d[0].ieee.exponent; \ + \ + if (exponent == 0) \ + { \ +@@ -122,18 +122,18 @@ + { \ + /* This is a denormalized number. */ \ + expnegative = 1; \ +- exponent = IBM_EXTENDED_LONG_DOUBLE_BIAS - 1; \ ++ exponent = IEEE754_DOUBLE_BIAS - 1; \ + } \ + } \ +- else if (exponent >= IBM_EXTENDED_LONG_DOUBLE_BIAS) \ ++ else if (exponent >= IEEE754_DOUBLE_BIAS) \ + { \ + expnegative = 0; \ +- exponent -= IBM_EXTENDED_LONG_DOUBLE_BIAS; \ ++ exponent -= IEEE754_DOUBLE_BIAS; \ + } \ + else \ + { \ + expnegative = 1; \ +- exponent = -(exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS); \ ++ exponent = -(exponent - IEEE754_DOUBLE_BIAS); \ + } \ + } while (0) + +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c 2014-05-26 21:08:01.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c 2014-05-26 21:08:10.000000000 -0500 +@@ -33,11 +33,11 @@ + fenv_t env; + static const long double TWO52 = 4503599627370496.0L; + union ibm_extended_long_double u; +- u.d = x; ++ u.ld = x; + +- if (fabs (u.dd[0]) < TWO52) ++ if (fabs (u.d[0].d) < TWO52) + { +- double high = u.dd[0]; ++ double high = u.d[0].d; + feholdexcept (&env); + if (high > 0.0) + { +@@ -51,11 +51,11 @@ + high += TWO52; + if (high == 0.0) high = -0.0; + } +- u.dd[0] = high; +- u.dd[1] = 0.0; ++ u.d[0].d = high; ++ u.d[1].d = 0.0; + fesetenv (&env); + } +- else if (fabs (u.dd[1]) < TWO52 && u.dd[1] != 0.0) ++ else if (fabs (u.d[1].d) < TWO52 && u.d[1].d != 0.0) + { + double high, low, tau; + /* In this case we have to round the low double and handle any +@@ -64,55 +64,55 @@ + may already be rounded and the low double may have the + opposite sign to compensate. */ + feholdexcept (&env); +- if (u.dd[0] > 0.0) ++ if (u.d[0].d > 0.0) + { +- if (u.dd[1] > 0.0) ++ if (u.d[1].d > 0.0) + { + /* If the high/low doubles are the same sign then simply + round the low double. */ +- high = u.dd[0]; +- low = u.dd[1]; ++ high = u.d[0].d; ++ low = u.d[1].d; + } +- else if (u.dd[1] < 0.0) ++ else if (u.d[1].d < 0.0) + { + /* Else the high double is pre rounded and we need to + adjust for that. */ + +- tau = __nextafter (u.dd[0], 0.0); +- tau = (u.dd[0] - tau) * 2.0; +- high = u.dd[0] - tau; +- low = u.dd[1] + tau; ++ tau = __nextafter (u.d[0].d, 0.0); ++ tau = (u.d[0].d - tau) * 2.0; ++ high = u.d[0].d - tau; ++ low = u.d[1].d + tau; + } + low += TWO52; + low -= TWO52; + } +- else if (u.dd[0] < 0.0) ++ else if (u.d[0].d < 0.0) + { +- if (u.dd[1] < 0.0) ++ if (u.d[1].d < 0.0) + { + /* If the high/low doubles are the same sign then simply + round the low double. */ +- high = u.dd[0]; +- low = u.dd[1]; ++ high = u.d[0].d; ++ low = u.d[1].d; + } +- else if (u.dd[1] > 0.0) ++ else if (u.d[1].d > 0.0) + { + /* Else the high double is pre rounded and we need to + adjust for that. */ +- tau = __nextafter (u.dd[0], 0.0); +- tau = (u.dd[0] - tau) * 2.0; +- high = u.dd[0] - tau; +- low = u.dd[1] + tau; ++ tau = __nextafter (u.d[0].d, 0.0); ++ tau = (u.d[0].d - tau) * 2.0; ++ high = u.d[0].d - tau; ++ low = u.d[1].d + tau; + } + low = TWO52 - low; + low = -(low - TWO52); + } +- u.dd[0] = high + low; +- u.dd[1] = high - u.dd[0] + low; ++ u.d[0].d = high + low; ++ u.d[1].d = high - u.d[0].d + low; + fesetenv (&env); + } + +- return u.d; ++ return u.ld; + } + + long_double_symbol (libm, __nearbyintl, nearbyintl); +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/strtold_l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/strtold_l.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/strtold_l.c 2014-05-26 21:08:01.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/strtold_l.c 2014-05-26 21:12:01.000000000 -0500 +@@ -43,12 +43,11 @@ + #define FLOAT_HUGE_VAL HUGE_VALL + # define SET_MANTISSA(flt, mant) \ + do { union ibm_extended_long_double u; \ +- u.d = (flt); \ +- if ((mant & 0xfffffffffffffULL) == 0) \ +- mant = 0x8000000000000ULL; \ +- u.ieee.mantissa0 = ((mant) >> 32) & 0xfffff; \ +- u.ieee.mantissa1 = (mant) & 0xffffffff; \ +- (flt) = u.d; \ ++ u.ld = (flt); \ ++ u.d[0].ieee_nan.mantissa0 = (mant) >> 32; \ ++ u.d[0].ieee_nan.mantissa1 = (mant); \ ++ if ((u.d[0].ieee.mantissa0 | u.d[0].ieee.mantissa1) != 0) \ ++ (flt) = u.ld; \ + } while (0) + + #include +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c 2014-05-26 21:08:01.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c 2014-05-26 21:08:10.000000000 -0500 +@@ -89,23 +89,23 @@ + double vals[12]; + SET_RESTORE_ROUND (FE_TONEAREST); + union ibm_extended_long_double xu, yu; +- xu.d = x; +- yu.d = y; +- if (fabs (xu.dd[1]) < 0x1p-500) +- xu.dd[1] = 0.0; +- if (fabs (yu.dd[1]) < 0x1p-500) +- yu.dd[1] = 0.0; +- mul_split (&vals[1], &vals[0], xu.dd[0], xu.dd[0]); +- mul_split (&vals[3], &vals[2], xu.dd[0], xu.dd[1]); ++ xu.ld = x; ++ yu.ld = y; ++ if (fabs (xu.d[1].d) < 0x1p-500) ++ xu.d[1].d = 0.0; ++ if (fabs (yu.d[1].d) < 0x1p-500) ++ yu.d[1].d = 0.0; ++ mul_split (&vals[1], &vals[0], xu.d[0].d, xu.d[0].d); ++ mul_split (&vals[3], &vals[2], xu.d[0].d, xu.d[1].d); + vals[2] *= 2.0; + vals[3] *= 2.0; +- mul_split (&vals[5], &vals[4], xu.dd[1], xu.dd[1]); +- mul_split (&vals[7], &vals[6], yu.dd[0], yu.dd[0]); +- mul_split (&vals[9], &vals[8], yu.dd[0], yu.dd[1]); ++ mul_split (&vals[5], &vals[4], xu.d[1].d, xu.d[1].d); ++ mul_split (&vals[7], &vals[6], yu.d[0].d, yu.d[0].d); ++ mul_split (&vals[9], &vals[8], yu.d[0].d, yu.d[1].d); + vals[8] *= 2.0; + vals[9] *= 2.0; +- mul_split (&vals[11], &vals[10], yu.dd[1], yu.dd[1]); +- if (xu.dd[0] >= 0.75) ++ mul_split (&vals[11], &vals[10], yu.d[1].d, yu.d[1].d); ++ if (xu.d[0].d >= 0.75) + vals[1] -= 1.0; + else + { diff --git a/packages/glibc/2.17/0027-glibc-ppc64le-05.patch b/packages/glibc/2.17/0027-glibc-ppc64le-05.patch new file mode 100644 index 0000000..d089fda --- /dev/null +++ b/packages/glibc/2.17/0027-glibc-ppc64le-05.patch @@ -0,0 +1,486 @@ +# commit 4cf69995e26e16005d4e3843ad4d18c75cf21a04 +# Author: Alan Modra +# Date: Sat Aug 17 18:19:44 2013 +0930 +# +# Fix for [BZ #15680] IBM long double inaccuracy +# http://sourceware.org/ml/libc-alpha/2013-06/msg00919.html +# +# I discovered a number of places where denormals and other corner cases +# were being handled wrongly. +# +# - printf_fphex.c: Testing for the low double exponent being zero is +# unnecessary. If the difference in exponents is less than 53 then the +# high double exponent must be nearing the low end of its range, and the +# low double exponent hit rock bottom. +# +# - ldbl2mpn.c: A denormal (ie. exponent of zero) value is treated as +# if the exponent was one, so shift mantissa left by one. Code handling +# normalisation of the low double mantissa lacked a test for shift count +# greater than bits in type being shifted, and lacked anything to handle +# the case where the difference in exponents is less than 53 as in +# printf_fphex.c. +# +# - math_ldbl.h (ldbl_extract_mantissa): Same as above, but worse, with +# code testing for exponent > 1 for some reason, probably a typo for >= 1. +# +# - math_ldbl.h (ldbl_insert_mantissa): Round the high double as per +# mpn2ldbl.c (hi is odd or explicit mantissas non-zero) so that the +# number we return won't change when applying ldbl_canonicalize(). +# Add missing overflow checks and normalisation of high mantissa. +# Correct misleading comment: "The hidden bit of the lo mantissa is +# zero" is not always true as can be seen from the code rounding the hi +# mantissa. Also by inspection, lzcount can never be less than zero so +# remove that test. Lastly, masking bitfields to their widths can be +# left to the compiler. +# +# - mpn2ldbl.c: The overflow checks here on rounding of high double were +# just plain wrong. Incrementing the exponent must be accompanied by a +# shift right of the mantissa to keep the value unchanged. Above notes +# for ldbl_insert_mantissa are also relevant. +# +# [BZ #15680] +# * sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c: Comment fix. +# * sysdeps/ieee754/ldbl-128ibm/printf_fphex.c +# (PRINT_FPHEX_LONG_DOUBLE): Tidy code by moving -53 into ediff +# calculation. Remove unnecessary test for denormal exponent. +# * sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c (__mpn_extract_long_double): +# Correct handling of denormals. Avoid undefined shift behaviour. +# Correct normalisation of low mantissa when low double is denormal. +# * sysdeps/ieee754/ldbl-128ibm/math_ldbl.h +# (ldbl_extract_mantissa): Likewise. Comment. Use uint64_t* for hi64. +# (ldbl_insert_mantissa): Make both hi64 and lo64 parms uint64_t. +# Correct normalisation of low mantissa. Test for overflow of high +# mantissa and normalise. +# (ldbl_nearbyint): Use more readable constant for two52. +# * sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c +# (__mpn_construct_long_double): Fix test for overflow of high +# mantissa and correct normalisation. Avoid undefined shift. +# +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c 2014-05-27 19:13:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c 2014-05-27 19:14:45.000000000 -0500 +@@ -243,7 +243,7 @@ + We split the 113 bits of the mantissa into 5 24bit integers + stored in a double array. */ + /* Make the IBM extended format 105 bit mantissa look like the ieee854 112 +- bit mantissa so the next operatation will give the correct result. */ ++ bit mantissa so the next operation will give the correct result. */ + ldbl_extract_mantissa (&ixd, &lxd, &exp, x); + exp = exp - 23; + /* This is faster than doing this in floating point, because we +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c 2014-05-27 19:13:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c 2014-05-27 19:14:45.000000000 -0500 +@@ -36,6 +36,7 @@ + union ibm_extended_long_double u; + unsigned long long hi, lo; + int ediff; ++ + u.ld = value; + + *is_neg = u.d[0].ieee.negative; +@@ -43,27 +44,36 @@ + + lo = ((long long) u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1; + hi = ((long long) u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1; +- /* If the lower double is not a denomal or zero then set the hidden ++ ++ /* If the lower double is not a denormal or zero then set the hidden + 53rd bit. */ +- if (u.d[1].ieee.exponent > 0) +- { +- lo |= 1LL << 52; ++ if (u.d[1].ieee.exponent != 0) ++ lo |= 1ULL << 52; ++ else ++ lo = lo << 1; + +- /* The lower double is normalized separately from the upper. We may +- need to adjust the lower manitissa to reflect this. */ +- ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent; +- if (ediff > 53) +- lo = lo >> (ediff-53); ++ /* The lower double is normalized separately from the upper. We may ++ need to adjust the lower manitissa to reflect this. */ ++ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent - 53; ++ if (ediff > 0) ++ { ++ if (ediff < 64) ++ lo = lo >> ediff; ++ else ++ lo = 0; + } ++ else if (ediff < 0) ++ lo = lo << -ediff; ++ + /* The high double may be rounded and the low double reflects the + difference between the long double and the rounded high double + value. This is indicated by a differnce between the signs of the + high and low doubles. */ +- if ((u.d[0].ieee.negative != u.d[1].ieee.negative) +- && ((u.d[1].ieee.exponent != 0) && (lo != 0L))) ++ if (u.d[0].ieee.negative != u.d[1].ieee.negative ++ && lo != 0) + { + lo = (1ULL << 53) - lo; +- if (hi == 0LL) ++ if (hi == 0) + { + /* we have a borrow from the hidden bit, so shift left 1. */ + hi = 0x0ffffffffffffeLL | (lo >> 51); +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-27 19:13:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-27 19:51:13.000000000 -0500 +@@ -13,77 +13,118 @@ + the number before the decimal point and the second implicit bit + as bit 53 of the mantissa. */ + uint64_t hi, lo; +- int ediff; + union ibm_extended_long_double u; ++ + u.ld = x; + *exp = u.d[0].ieee.exponent - IEEE754_DOUBLE_BIAS; + + lo = ((uint64_t)u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1; + hi = ((uint64_t)u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1; +- /* If the lower double is not a denomal or zero then set the hidden +- 53rd bit. */ +- if (u.d[1].ieee.exponent > 0x001) +- { +- lo |= (1ULL << 52); +- lo = lo << 7; /* pre-shift lo to match ieee854. */ +- /* The lower double is normalized separately from the upper. We +- may need to adjust the lower manitissa to reflect this. */ +- ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent; +- if (ediff > 53) +- lo = lo >> (ediff-53); +- hi |= (1ULL << 52); +- } + +- if ((u.d[0].ieee.negative != u.d[1].ieee.negative) +- && ((u.d[1].ieee.exponent != 0) && (lo != 0LL))) ++ if (u.d[0].ieee.exponent != 0) + { +- hi--; +- lo = (1ULL << 60) - lo; +- if (hi < (1ULL << 52)) ++ int ediff; ++ ++ /* If not a denormal or zero then we have an implicit 53rd bit. */ ++ hi |= (uint64_t) 1 << 52; ++ ++ if (u.d[1].ieee.exponent != 0) ++ lo |= (uint64_t) 1 << 52; ++ else ++ /* A denormal is to be interpreted as having a biased exponent ++ of 1. */ ++ lo = lo << 1; ++ ++ /* We are going to shift 4 bits out of hi later, because we only ++ want 48 bits in *hi64. That means we want 60 bits in lo, but ++ we currently only have 53. Shift the value up. */ ++ lo = lo << 7; ++ ++ /* The lower double is normalized separately from the upper. ++ We may need to adjust the lower mantissa to reflect this. ++ The difference between the exponents can be larger than 53 ++ when the low double is much less than 1ULP of the upper ++ (in which case there are significant bits, all 0's or all ++ 1's, between the two significands). The difference between ++ the exponents can be less than 53 when the upper double ++ exponent is nearing its minimum value (in which case the low ++ double is denormal ie. has an exponent of zero). */ ++ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent - 53; ++ if (ediff > 0) + { +- /* we have a borrow from the hidden bit, so shift left 1. */ +- hi = (hi << 1) | (lo >> 59); +- lo = 0xfffffffffffffffLL & (lo << 1); +- *exp = *exp - 1; ++ if (ediff < 64) ++ lo = lo >> ediff; ++ else ++ lo = 0; ++ } ++ else if (ediff < 0) ++ lo = lo << -ediff; ++ ++ if (u.d[0].ieee.negative != u.d[1].ieee.negative ++ && lo != 0) ++ { ++ hi--; ++ lo = ((uint64_t) 1 << 60) - lo; ++ if (hi < (uint64_t) 1 << 52) ++ { ++ /* We have a borrow from the hidden bit, so shift left 1. */ ++ hi = (hi << 1) | (lo >> 59); ++ lo = (((uint64_t) 1 << 60) - 1) & (lo << 1); ++ *exp = *exp - 1; ++ } + } + } ++ else ++ /* If the larger magnitude double is denormal then the smaller ++ one must be zero. */ ++ hi = hi << 1; ++ + *lo64 = (hi << 60) | lo; + *hi64 = hi >> 4; + } + + static inline long double +-ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64) ++ldbl_insert_mantissa (int sign, int exp, int64_t hi64, uint64_t lo64) + { + union ibm_extended_long_double u; +- unsigned long hidden2, lzcount; +- unsigned long long hi, lo; ++ int expnt2; ++ uint64_t hi, lo; + + u.d[0].ieee.negative = sign; + u.d[1].ieee.negative = sign; + u.d[0].ieee.exponent = exp + IEEE754_DOUBLE_BIAS; +- u.d[1].ieee.exponent = exp-53 + IEEE754_DOUBLE_BIAS; ++ u.d[1].ieee.exponent = 0; ++ expnt2 = exp - 53 + IEEE754_DOUBLE_BIAS; ++ + /* Expect 113 bits (112 bits + hidden) right justified in two longs. + The low order 53 bits (52 + hidden) go into the lower double */ +- lo = (lo64 >> 7)& ((1ULL << 53) - 1); +- hidden2 = (lo64 >> 59) & 1ULL; ++ lo = (lo64 >> 7) & (((uint64_t) 1 << 53) - 1); + /* The high order 53 bits (52 + hidden) go into the upper double */ +- hi = (lo64 >> 60) & ((1ULL << 11) - 1); +- hi |= (hi64 << 4); ++ hi = lo64 >> 60; ++ hi |= hi64 << 4; + +- if (lo != 0LL) ++ if (lo != 0) + { +- /* hidden2 bit of low double controls rounding of the high double. +- If hidden2 is '1' then round up hi and adjust lo (2nd mantissa) ++ int lzcount; ++ ++ /* hidden bit of low double controls rounding of the high double. ++ If hidden is '1' and either the explicit mantissa is non-zero ++ or hi is odd, then round up hi and adjust lo (2nd mantissa) + plus change the sign of the low double to compensate. */ +- if (hidden2) ++ if ((lo & ((uint64_t) 1 << 52)) != 0 ++ && ((hi & 1) != 0 || (lo & (((uint64_t) 1 << 52) - 1)) != 0)) + { + hi++; ++ if ((hi & ((uint64_t) 1 << 53)) != 0) ++ { ++ hi = hi >> 1; ++ u.d[0].ieee.exponent++; ++ } + u.d[1].ieee.negative = !sign; +- lo = (1ULL << 53) - lo; ++ lo = ((uint64_t) 1 << 53) - lo; + } +- /* The hidden bit of the lo mantissa is zero so we need to +- normalize the it for the low double. Shift it left until the +- hidden bit is '1' then adjust the 2nd exponent accordingly. */ ++ /* Normalize the low double. Shift the mantissa left until ++ the hidden bit is '1' and adjust the exponent accordingly. */ + + if (sizeof (lo) == sizeof (long)) + lzcount = __builtin_clzl (lo); +@@ -91,34 +132,30 @@ + lzcount = __builtin_clzl ((long) (lo >> 32)); + else + lzcount = __builtin_clzl ((long) lo) + 32; +- lzcount = lzcount - 11; +- if (lzcount > 0) ++ lzcount = lzcount - (64 - 53); ++ lo <<= lzcount; ++ expnt2 -= lzcount; ++ ++ if (expnt2 >= 1) ++ /* Not denormal. */ ++ u.d[1].ieee.exponent = expnt2; ++ else + { +- int expnt2 = u.d[1].ieee.exponent - lzcount; +- if (expnt2 >= 1) +- { +- /* Not denormal. Normalize and set low exponent. */ +- lo = lo << lzcount; +- u.d[1].ieee.exponent = expnt2; +- } ++ /* Is denormal. Note that biased exponent of 0 is treated ++ as if it was 1, hence the extra shift. */ ++ if (expnt2 > -53) ++ lo >>= 1 - expnt2; + else +- { +- /* Is denormal. */ +- lo = lo << (lzcount + expnt2); +- u.d[1].ieee.exponent = 0; +- } ++ lo = 0; + } + } + else +- { +- u.d[1].ieee.negative = 0; +- u.d[1].ieee.exponent = 0; +- } ++ u.d[1].ieee.negative = 0; + +- u.d[1].ieee.mantissa1 = lo & ((1ULL << 32) - 1); +- u.d[1].ieee.mantissa0 = (lo >> 32) & ((1ULL << 20) - 1); +- u.d[0].ieee.mantissa1 = hi & ((1ULL << 32) - 1); +- u.d[0].ieee.mantissa0 = (hi >> 32) & ((1ULL << 20) - 1); ++ u.d[1].ieee.mantissa1 = lo; ++ u.d[1].ieee.mantissa0 = lo >> 32; ++ u.d[0].ieee.mantissa1 = hi; ++ u.d[0].ieee.mantissa0 = hi >> 32; + return u.ld; + } + +@@ -133,6 +170,10 @@ + return u.ld; + } + ++/* To suit our callers we return *hi64 and *lo64 as if they came from ++ an ieee854 112 bit mantissa, that is, 48 bits in *hi64 (plus one ++ implicit bit) and 64 bits in *lo64. */ ++ + static inline void + default_ldbl_unpack (long double l, double *a, double *aa) + { +@@ -162,13 +203,13 @@ + *aa = xl; + } + +-/* Simple inline nearbyint (double) function . ++/* Simple inline nearbyint (double) function. + Only works in the default rounding mode + but is useful in long double rounding functions. */ + static inline double + ldbl_nearbyint (double a) + { +- double two52 = 0x10000000000000LL; ++ double two52 = 0x1p52; + + if (__builtin_expect ((__builtin_fabs (a) < two52), 1)) + { +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c 2014-05-27 19:13:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c 2014-05-27 19:14:45.000000000 -0500 +@@ -70,9 +70,9 @@ + else + lzcount = __builtin_clzl ((long) val) + 32; + if (hi) +- lzcount = lzcount - 11; ++ lzcount = lzcount - (64 - 53); + else +- lzcount = lzcount + 42; ++ lzcount = lzcount + 53 - (64 - 53); + + if (lzcount > u.d[0].ieee.exponent) + { +@@ -98,29 +98,27 @@ + } + } + +- if (lo != 0L) ++ if (lo != 0) + { +- /* hidden2 bit of low double controls rounding of the high double. +- If hidden2 is '1' and either the explicit mantissa is non-zero ++ /* hidden bit of low double controls rounding of the high double. ++ If hidden is '1' and either the explicit mantissa is non-zero + or hi is odd, then round up hi and adjust lo (2nd mantissa) + plus change the sign of the low double to compensate. */ + if ((lo & (1LL << 52)) != 0 +- && ((hi & 1) != 0 || (lo & ((1LL << 52) - 1)))) ++ && ((hi & 1) != 0 || (lo & ((1LL << 52) - 1)) != 0)) + { + hi++; +- if ((hi & ((1LL << 52) - 1)) == 0) ++ if ((hi & (1LL << 53)) != 0) + { +- if ((hi & (1LL << 53)) != 0) +- hi -= 1LL << 52; ++ hi >>= 1; + u.d[0].ieee.exponent++; + } + u.d[1].ieee.negative = !sign; + lo = (1LL << 53) - lo; + } + +- /* The hidden bit of the lo mantissa is zero so we need to normalize +- it for the low double. Shift it left until the hidden bit is '1' +- then adjust the 2nd exponent accordingly. */ ++ /* Normalize the low double. Shift the mantissa left until ++ the hidden bit is '1' and adjust the exponent accordingly. */ + + if (sizeof (lo) == sizeof (long)) + lzcount = __builtin_clzl (lo); +@@ -128,24 +126,24 @@ + lzcount = __builtin_clzl ((long) (lo >> 32)); + else + lzcount = __builtin_clzl ((long) lo) + 32; +- lzcount = lzcount - 11; +- if (lzcount > 0) +- { +- lo = lo << lzcount; +- exponent2 = exponent2 - lzcount; +- } ++ lzcount = lzcount - (64 - 53); ++ lo <<= lzcount; ++ exponent2 -= lzcount; ++ + if (exponent2 > 0) + u.d[1].ieee.exponent = exponent2; +- else ++ else if (exponent2 > -53) + lo >>= 1 - exponent2; ++ else ++ lo = 0; + } + else + u.d[1].ieee.negative = 0; + +- u.d[1].ieee.mantissa1 = lo & 0xffffffffLL; +- u.d[1].ieee.mantissa0 = (lo >> 32) & 0xfffff; +- u.d[0].ieee.mantissa1 = hi & 0xffffffffLL; +- u.d[0].ieee.mantissa0 = (hi >> 32) & ((1LL << (LDBL_MANT_DIG - 86)) - 1); ++ u.d[1].ieee.mantissa1 = lo; ++ u.d[1].ieee.mantissa0 = lo >> 32; ++ u.d[0].ieee.mantissa1 = hi; ++ u.d[0].ieee.mantissa0 = hi >> 32; + + return u.ld; + } +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c 2014-05-27 19:13:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c 2014-05-27 19:14:45.000000000 -0500 +@@ -43,15 +43,15 @@ + lo <<= 1; \ + /* The lower double is normalized separately from the upper. We \ + may need to adjust the lower manitissa to reflect this. */ \ +- ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent; \ +- if (ediff > 53 + 63) \ ++ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent - 53; \ ++ if (ediff > 63) \ + lo = 0; \ +- else if (ediff > 53) \ +- lo = lo >> (ediff - 53); \ +- else if (u.d[1].ieee.exponent == 0 && ediff < 53) \ +- lo = lo << (53 - ediff); \ ++ else if (ediff > 0) \ ++ lo = lo >> ediff; \ ++ else if (ediff < 0) \ ++ lo = lo << -ediff; \ + if (u.d[0].ieee.negative != u.d[1].ieee.negative \ +- && (u.d[1].ieee.exponent != 0 || lo != 0L)) \ ++ && lo != 0) \ + { \ + lo = (1ULL << 60) - lo; \ + if (hi == 0L) \ diff --git a/packages/glibc/2.17/0028-glibc-ppc64le-06.patch b/packages/glibc/2.17/0028-glibc-ppc64le-06.patch new file mode 100644 index 0000000..fe7cf61 --- /dev/null +++ b/packages/glibc/2.17/0028-glibc-ppc64le-06.patch @@ -0,0 +1,652 @@ +# commit 1b6adf888de14675bc3207578dcb7132ed5f8ecc +# Author: Alan Modra +# Date: Sat Aug 17 18:21:58 2013 +0930 +# +# PowerPC floating point little-endian [1 of 15] +# http://sourceware.org/ml/libc-alpha/2013-08/msg00081.html +# +# This is the first of a series of patches to ban ieee854_long_double +# and the ieee854_long_double macros when using IBM long double. union +# ieee854_long_double just isn't correct for IBM long double, especially +# when little-endian, and pretending it is OK has allowed a number of +# bugs to remain undetected in sysdeps/ieee754/ldbl-128ibm/. +# +# This changes the few places in generic code that use it. +# +# * stdio-common/printf_size.c (__printf_size): Don't use +# union ieee854_long_double in fpnum union. +# * stdio-common/printf_fphex.c (__printf_fphex): Likewise. Use +# signbit macro to retrieve sign from long double. +# * stdio-common/printf_fp.c (___printf_fp): Use signbit macro to +# retrieve sign from long double. +# * sysdeps/ieee754/ldbl-128ibm/printf_fphex.c: Adjust for fpnum change. +# * sysdeps/ieee754/ldbl-128/printf_fphex.c: Likewise. +# * sysdeps/ieee754/ldbl-96/printf_fphex.c: Likewise. +# * sysdeps/x86_64/fpu/printf_fphex.c: Likewise. +# * math/test-misc.c (main): Don't use union ieee854_long_double. +# ports/ +# * sysdeps/ia64/fpu/printf_fphex.c: Adjust for fpnum change. +# +diff -urN glibc-2.17-c758a686/math/test-misc.c glibc-2.17-c758a686/math/test-misc.c +--- glibc-2.17-c758a686/math/test-misc.c 2014-05-27 19:53:22.000000000 -0500 ++++ glibc-2.17-c758a686/math/test-misc.c 2014-05-27 19:53:45.000000000 -0500 +@@ -721,300 +721,161 @@ + + #ifndef NO_LONG_DOUBLE + { +- union ieee854_long_double v1; +- union ieee854_long_double v2; +- long double ld; ++ long double v1, v2; + +- v1.d = ld = LDBL_MIN; +- if (fpclassify (ld) != FP_NORMAL) ++ v1 = LDBL_MIN; ++ if (fpclassify (v1) != FP_NORMAL) + { +- printf ("fpclassify (LDBL_MIN) failed: %d\n", fpclassify (ld)); ++ printf ("fpclassify (LDBL_MIN) failed: %d (%La)\n", ++ fpclassify (v1), v1); + result = 1; + } +- ld = nextafterl (ld, LDBL_MIN / 2.0); +- if (fpclassify (ld) != FP_SUBNORMAL) ++ v2 = nextafterl (v1, LDBL_MIN / 2.0); ++ if (fpclassify (v2) != FP_SUBNORMAL) + { + printf ("fpclassify (LDBL_MIN-epsilon) failed: %d (%La)\n", +- fpclassify (ld), ld); ++ fpclassify (v2), v2); + result = 1; + } +- v2.d = ld = nextafterl (ld, LDBL_MIN); +- if (fpclassify (ld) != FP_NORMAL) ++ v2 = nextafterl (v2, LDBL_MIN); ++ if (fpclassify (v2) != FP_NORMAL) + { + printf ("fpclassify (LDBL_MIN-epsilon+epsilon) failed: %d (%La)\n", +- fpclassify (ld), ld); ++ fpclassify (v2), v2); + result = 1; + } + +- if (v1.ieee.mantissa0 != v2.ieee.mantissa0) ++ if (v1 != v2) + { +- printf ("LDBL_MIN: mantissa0 differs: %8x vs %8x\n", +- v1.ieee.mantissa0, v2.ieee.mantissa0); +- result = 1; +- } +- if (v1.ieee.mantissa1 != v2.ieee.mantissa1) +- { +- printf ("LDBL_MIN: mantissa1 differs: %8x vs %8x\n", +- v1.ieee.mantissa1, v2.ieee.mantissa1); +- result = 1; +- } +- if (v1.ieee.exponent != v2.ieee.exponent) +- { +- printf ("LDBL_MIN: exponent differs: %4x vs %4x\n", +- v1.ieee.exponent, v2.ieee.exponent); +- result = 1; +- } +- if (v1.ieee.negative != v2.ieee.negative) +- { +- printf ("LDBL_MIN: negative differs: %d vs %d\n", +- v1.ieee.negative, v2.ieee.negative); ++ printf ("LDBL_MIN-epsilon+epsilon != LDBL_MIN: %La vs %La\n", v2, v1); + result = 1; + } + +- v1.d = ld = -LDBL_MIN; +- if (fpclassify (ld) != FP_NORMAL) ++ v1 = -LDBL_MIN; ++ if (fpclassify (v1) != FP_NORMAL) + { +- printf ("fpclassify (-LDBL_MIN) failed: %d\n", fpclassify (ld)); ++ printf ("fpclassify (-LDBL_MIN) failed: %d (%La)\n", ++ fpclassify (v1), v1); + result = 1; + } +- ld = nextafterl (ld, -LDBL_MIN / 2.0); +- if (fpclassify (ld) != FP_SUBNORMAL) ++ v2 = nextafterl (v1, -LDBL_MIN / 2.0); ++ if (fpclassify (v2) != FP_SUBNORMAL) + { + printf ("fpclassify (-LDBL_MIN-epsilon) failed: %d (%La)\n", +- fpclassify (ld), ld); ++ fpclassify (v2), v2); + result = 1; + } +- v2.d = ld = nextafterl (ld, -LDBL_MIN); +- if (fpclassify (ld) != FP_NORMAL) ++ v2 = nextafterl (v2, -LDBL_MIN); ++ if (fpclassify (v2) != FP_NORMAL) + { + printf ("fpclassify (-LDBL_MIN-epsilon+epsilon) failed: %d (%La)\n", +- fpclassify (ld), ld); ++ fpclassify (v2), v2); + result = 1; + } + +- if (v1.ieee.mantissa0 != v2.ieee.mantissa0) ++ if (v1 != v2) + { +- printf ("-LDBL_MIN: mantissa0 differs: %8x vs %8x\n", +- v1.ieee.mantissa0, v2.ieee.mantissa0); +- result = 1; +- } +- if (v1.ieee.mantissa1 != v2.ieee.mantissa1) +- { +- printf ("-LDBL_MIN: mantissa1 differs: %8x vs %8x\n", +- v1.ieee.mantissa1, v2.ieee.mantissa1); +- result = 1; +- } +- if (v1.ieee.exponent != v2.ieee.exponent) +- { +- printf ("-LDBL_MIN: exponent differs: %4x vs %4x\n", +- v1.ieee.exponent, v2.ieee.exponent); +- result = 1; +- } +- if (v1.ieee.negative != v2.ieee.negative) +- { +- printf ("-LDBL_MIN: negative differs: %d vs %d\n", +- v1.ieee.negative, v2.ieee.negative); ++ printf ("-LDBL_MIN-epsilon+epsilon != -LDBL_MIN: %La vs %La\n", v2, v1); + result = 1; + } + +- ld = LDBL_MAX; +- if (fpclassify (ld) != FP_NORMAL) ++ v1 = LDBL_MAX; ++ if (fpclassify (v1) != FP_NORMAL) + { +- printf ("fpclassify (LDBL_MAX) failed: %d\n", fpclassify (ld)); ++ printf ("fpclassify (LDBL_MAX) failed: %d (%La)\n", ++ fpclassify (v1), v1); + result = 1; + } +- ld = nextafterl (ld, INFINITY); +- if (fpclassify (ld) != FP_INFINITE) ++ v2 = nextafterl (v1, INFINITY); ++ if (fpclassify (v2) != FP_INFINITE) + { +- printf ("fpclassify (LDBL_MAX+epsilon) failed: %d\n", fpclassify (ld)); ++ printf ("fpclassify (LDBL_MAX+epsilon) failed: %d (%La)\n", ++ fpclassify (v2), v2); + result = 1; + } + +- ld = -LDBL_MAX; +- if (fpclassify (ld) != FP_NORMAL) ++ v1 = -LDBL_MAX; ++ if (fpclassify (v1) != FP_NORMAL) + { +- printf ("fpclassify (-LDBL_MAX) failed: %d\n", fpclassify (ld)); ++ printf ("fpclassify (-LDBL_MAX) failed: %d (%La)\n", ++ fpclassify (v1), v1); + result = 1; + } +- ld = nextafterl (ld, -INFINITY); +- if (fpclassify (ld) != FP_INFINITE) ++ v2 = nextafterl (v1, -INFINITY); ++ if (fpclassify (v2) != FP_INFINITE) + { +- printf ("fpclassify (-LDBL_MAX-epsilon) failed: %d\n", +- fpclassify (ld)); ++ printf ("fpclassify (-LDBL_MAX-epsilon) failed: %d (%La)\n", ++ fpclassify (v2), v2); + result = 1; + } + +- v1.d = ld = 0.0625; +- ld = nextafterl (ld, 0.0); +- v2.d = ld = nextafterl (ld, 1.0); ++ v1 = 0.0625; ++ v2 = nextafterl (v1, 0.0); ++ v2 = nextafterl (v2, 1.0); + +- if (v1.ieee.mantissa0 != v2.ieee.mantissa0) +- { +- printf ("0.0625L down: mantissa0 differs: %8x vs %8x\n", +- v1.ieee.mantissa0, v2.ieee.mantissa0); +- result = 1; +- } +- if (v1.ieee.mantissa1 != v2.ieee.mantissa1) +- { +- printf ("0.0625L down: mantissa1 differs: %8x vs %8x\n", +- v1.ieee.mantissa1, v2.ieee.mantissa1); +- result = 1; +- } +- if (v1.ieee.exponent != v2.ieee.exponent) +- { +- printf ("0.0625L down: exponent differs: %4x vs %4x\n", +- v1.ieee.exponent, v2.ieee.exponent); +- result = 1; +- } +- if (v1.ieee.negative != v2.ieee.negative) ++ if (v1 != v2) + { +- printf ("0.0625L down: negative differs: %d vs %d\n", +- v1.ieee.negative, v2.ieee.negative); ++ printf ("0.0625L-epsilon+epsilon != 0.0625L: %La vs %La\n", v2, v1); + result = 1; + } + +- v1.d = ld = 0.0625; +- ld = nextafterl (ld, 1.0); +- v2.d = ld = nextafterl (ld, 0.0); ++ v1 = 0.0625; ++ v2 = nextafterl (v1, 1.0); ++ v2 = nextafterl (v2, 0.0); + +- if (v1.ieee.mantissa0 != v2.ieee.mantissa0) +- { +- printf ("0.0625L up: mantissa0 differs: %8x vs %8x\n", +- v1.ieee.mantissa0, v2.ieee.mantissa0); +- result = 1; +- } +- if (v1.ieee.mantissa1 != v2.ieee.mantissa1) +- { +- printf ("0.0625L up: mantissa1 differs: %8x vs %8x\n", +- v1.ieee.mantissa1, v2.ieee.mantissa1); +- result = 1; +- } +- if (v1.ieee.exponent != v2.ieee.exponent) ++ if (v1 != v2) + { +- printf ("0.0625L up: exponent differs: %4x vs %4x\n", +- v1.ieee.exponent, v2.ieee.exponent); +- result = 1; +- } +- if (v1.ieee.negative != v2.ieee.negative) +- { +- printf ("0.0625L up: negative differs: %d vs %d\n", +- v1.ieee.negative, v2.ieee.negative); ++ printf ("0.0625L+epsilon-epsilon != 0.0625L: %La vs %La\n", v2, v1); + result = 1; + } + +- v1.d = ld = -0.0625; +- ld = nextafterl (ld, 0.0); +- v2.d = ld = nextafterl (ld, -1.0); ++ v1 = -0.0625; ++ v2 = nextafterl (v1, 0.0); ++ v2 = nextafterl (v2, -1.0); + +- if (v1.ieee.mantissa0 != v2.ieee.mantissa0) +- { +- printf ("-0.0625L up: mantissa0 differs: %8x vs %8x\n", +- v1.ieee.mantissa0, v2.ieee.mantissa0); +- result = 1; +- } +- if (v1.ieee.mantissa1 != v2.ieee.mantissa1) +- { +- printf ("-0.0625L up: mantissa1 differs: %8x vs %8x\n", +- v1.ieee.mantissa1, v2.ieee.mantissa1); +- result = 1; +- } +- if (v1.ieee.exponent != v2.ieee.exponent) ++ if (v1 != v2) + { +- printf ("-0.0625L up: exponent differs: %4x vs %4x\n", +- v1.ieee.exponent, v2.ieee.exponent); +- result = 1; +- } +- if (v1.ieee.negative != v2.ieee.negative) +- { +- printf ("-0.0625L up: negative differs: %d vs %d\n", +- v1.ieee.negative, v2.ieee.negative); ++ printf ("-0.0625L+epsilon-epsilon != -0.0625L: %La vs %La\n", v2, v1); + result = 1; + } + +- v1.d = ld = -0.0625; +- ld = nextafterl (ld, -1.0); +- v2.d = ld = nextafterl (ld, 0.0); ++ v1 = -0.0625; ++ v2 = nextafterl (v1, -1.0); ++ v2 = nextafterl (v2, 0.0); + +- if (v1.ieee.mantissa0 != v2.ieee.mantissa0) +- { +- printf ("-0.0625L down: mantissa0 differs: %8x vs %8x\n", +- v1.ieee.mantissa0, v2.ieee.mantissa0); +- result = 1; +- } +- if (v1.ieee.mantissa1 != v2.ieee.mantissa1) ++ if (v1 != v2) + { +- printf ("-0.0625L down: mantissa1 differs: %8x vs %8x\n", +- v1.ieee.mantissa1, v2.ieee.mantissa1); +- result = 1; +- } +- if (v1.ieee.exponent != v2.ieee.exponent) +- { +- printf ("-0.0625L down: exponent differs: %4x vs %4x\n", +- v1.ieee.exponent, v2.ieee.exponent); +- result = 1; +- } +- if (v1.ieee.negative != v2.ieee.negative) +- { +- printf ("-0.0625L down: negative differs: %d vs %d\n", +- v1.ieee.negative, v2.ieee.negative); ++ printf ("-0.0625L-epsilon+epsilon != -0.0625L: %La vs %La\n", v2, v1); + result = 1; + } + +- v1.d = ld = 0.0; +- ld = nextafterl (ld, 1.0); +- v2.d = nextafterl (ld, -1.0); ++ v1 = 0.0; ++ v2 = nextafterl (v1, 1.0); ++ v2 = nextafterl (v2, -1.0); + +- if (v1.ieee.mantissa0 != v2.ieee.mantissa0) +- { +- printf ("0.0L up: mantissa0 differs: %8x vs %8x\n", +- v1.ieee.mantissa0, v2.ieee.mantissa0); +- result = 1; +- } +- if (v1.ieee.mantissa1 != v2.ieee.mantissa1) +- { +- printf ("0.0L up: mantissa1 differs: %8x vs %8x\n", +- v1.ieee.mantissa1, v2.ieee.mantissa1); +- result = 1; +- } +- if (v1.ieee.exponent != v2.ieee.exponent) ++ if (v1 != v2) + { +- printf ("0.0L up: exponent differs: %4x vs %4x\n", +- v1.ieee.exponent, v2.ieee.exponent); ++ printf ("0.0+epsilon-epsilon != 0.0L: %La vs %La\n", v2, v1); + result = 1; + } +- if (0 != v2.ieee.negative) ++ if (signbit (v2)) + { +- printf ("0.0L up: negative differs: 0 vs %d\n", +- v2.ieee.negative); ++ printf ("0.0+epsilon-epsilon is negative\n"); + result = 1; + } + +- v1.d = ld = 0.0; +- ld = nextafterl (ld, -1.0); +- v2.d = nextafterl (ld, 1.0); ++ v1 = 0.0; ++ v2 = nextafterl (v1, -1.0); ++ v2 = nextafterl (v2, 1.0); + +- if (v1.ieee.mantissa0 != v2.ieee.mantissa0) +- { +- printf ("0.0L down: mantissa0 differs: %8x vs %8x\n", +- v1.ieee.mantissa0, v2.ieee.mantissa0); +- result = 1; +- } +- if (v1.ieee.mantissa1 != v2.ieee.mantissa1) +- { +- printf ("0.0L down: mantissa1 differs: %8x vs %8x\n", +- v1.ieee.mantissa1, v2.ieee.mantissa1); +- result = 1; +- } +- if (v1.ieee.exponent != v2.ieee.exponent) ++ if (v1 != v2) + { +- printf ("0.0L down: exponent differs: %4x vs %4x\n", +- v1.ieee.exponent, v2.ieee.exponent); ++ printf ("0.0-epsilon+epsilon != 0.0L: %La vs %La\n", v2, v1); + result = 1; + } +- if (1 != v2.ieee.negative) ++ if (!signbit (v2)) + { +- printf ("0.0L down: negative differs: 1 vs %d\n", +- v2.ieee.negative); ++ printf ("0.0-epsilon+epsilon is positive\n"); + result = 1; + } + +diff -urN glibc-2.17-c758a686/ports/sysdeps/ia64/fpu/printf_fphex.c glibc-2.17-c758a686/ports/sysdeps/ia64/fpu/printf_fphex.c +--- glibc-2.17-c758a686/ports/sysdeps/ia64/fpu/printf_fphex.c 2014-05-27 19:53:21.000000000 -0500 ++++ glibc-2.17-c758a686/ports/sysdeps/ia64/fpu/printf_fphex.c 2014-05-27 19:53:45.000000000 -0500 +@@ -25,9 +25,11 @@ + /* The "strange" 80 bit format on ia64 has an explicit \ + leading digit in the 64 bit mantissa. */ \ + unsigned long long int num; \ ++ union ieee854_long_double u; \ ++ u.d = fpnum.ldbl; \ + \ +- num = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32 \ +- | fpnum.ldbl.ieee.mantissa1); \ ++ num = (((unsigned long long int) u.ieee.mantissa0) << 32 \ ++ | u.ieee.mantissa1); \ + \ + zero_mantissa = num == 0; \ + \ +@@ -49,8 +51,8 @@ + \ + /* We have 3 bits from the mantissa in the leading nibble. \ + Therefore we are here using `IEEE854_LONG_DOUBLE_BIAS + 3'. */ \ +- exponent = fpnum.ldbl.ieee.exponent; \ +- \ ++ exponent = u.ieee.exponent; \ ++ \ + if (exponent == 0) \ + { \ + if (zero_mantissa) \ +diff -urN glibc-2.17-c758a686/stdio-common/printf_fp.c glibc-2.17-c758a686/stdio-common/printf_fp.c +--- glibc-2.17-c758a686/stdio-common/printf_fp.c 2014-05-27 19:53:22.000000000 -0500 ++++ glibc-2.17-c758a686/stdio-common/printf_fp.c 2014-05-27 19:53:45.000000000 -0500 +@@ -335,8 +335,7 @@ + int res; + if (__isnanl (fpnum.ldbl)) + { +- union ieee854_long_double u = { .d = fpnum.ldbl }; +- is_neg = u.ieee.negative != 0; ++ is_neg = signbit (fpnum.ldbl); + if (isupper (info->spec)) + { + special = "NAN"; +diff -urN glibc-2.17-c758a686/stdio-common/printf_fphex.c glibc-2.17-c758a686/stdio-common/printf_fphex.c +--- glibc-2.17-c758a686/stdio-common/printf_fphex.c 2014-05-27 19:53:22.000000000 -0500 ++++ glibc-2.17-c758a686/stdio-common/printf_fphex.c 2014-05-27 19:53:45.000000000 -0500 +@@ -93,7 +93,7 @@ + union + { + union ieee754_double dbl; +- union ieee854_long_double ldbl; ++ long double ldbl; + } + fpnum; + +@@ -162,12 +162,11 @@ + #ifndef __NO_LONG_DOUBLE_MATH + if (info->is_long_double && sizeof (long double) > sizeof (double)) + { +- fpnum.ldbl.d = *(const long double *) args[0]; ++ fpnum.ldbl = *(const long double *) args[0]; + + /* Check for special values: not a number or infinity. */ +- if (__isnanl (fpnum.ldbl.d)) ++ if (__isnanl (fpnum.ldbl)) + { +- negative = fpnum.ldbl.ieee.negative != 0; + if (isupper (info->spec)) + { + special = "NAN"; +@@ -181,8 +180,7 @@ + } + else + { +- int res = __isinfl (fpnum.ldbl.d); +- if (res) ++ if (__isinfl (fpnum.ldbl)) + { + if (isupper (info->spec)) + { +@@ -194,11 +192,9 @@ + special = "inf"; + wspecial = L"inf"; + } +- negative = res < 0; + } +- else +- negative = signbit (fpnum.ldbl.d); + } ++ negative = signbit (fpnum.ldbl); + } + else + #endif /* no long double */ +diff -urN glibc-2.17-c758a686/stdio-common/printf_size.c glibc-2.17-c758a686/stdio-common/printf_size.c +--- glibc-2.17-c758a686/stdio-common/printf_size.c 2014-05-27 19:53:22.000000000 -0500 ++++ glibc-2.17-c758a686/stdio-common/printf_size.c 2014-05-27 19:53:45.000000000 -0500 +@@ -103,7 +103,7 @@ + union + { + union ieee754_double dbl; +- union ieee854_long_double ldbl; ++ long double ldbl; + } + fpnum; + const void *ptr = &fpnum; +@@ -123,25 +123,25 @@ + #ifndef __NO_LONG_DOUBLE_MATH + if (info->is_long_double && sizeof (long double) > sizeof (double)) + { +- fpnum.ldbl.d = *(const long double *) args[0]; ++ fpnum.ldbl = *(const long double *) args[0]; + + /* Check for special values: not a number or infinity. */ +- if (__isnanl (fpnum.ldbl.d)) ++ if (__isnanl (fpnum.ldbl)) + { + special = "nan"; + wspecial = L"nan"; + // fpnum_sign = 0; Already zero + } +- else if ((res = __isinfl (fpnum.ldbl.d))) ++ else if ((res = __isinfl (fpnum.ldbl))) + { + fpnum_sign = res; + special = "inf"; + wspecial = L"inf"; + } + else +- while (fpnum.ldbl.d >= divisor && tag[1] != '\0') ++ while (fpnum.ldbl >= divisor && tag[1] != '\0') + { +- fpnum.ldbl.d /= divisor; ++ fpnum.ldbl /= divisor; + ++tag; + } + } +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128/printf_fphex.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128/printf_fphex.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128/printf_fphex.c 2014-05-27 19:53:20.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128/printf_fphex.c 2014-05-27 19:53:45.000000000 -0500 +@@ -24,13 +24,15 @@ + digits we use only the implicit digits for the number before \ + the decimal point. */ \ + unsigned long long int num0, num1; \ ++ union ieee854_long_double u; \ ++ u.d = fpnum.ldbl; \ + \ + assert (sizeof (long double) == 16); \ + \ +- num0 = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32 \ +- | fpnum.ldbl.ieee.mantissa1); \ +- num1 = (((unsigned long long int) fpnum.ldbl.ieee.mantissa2) << 32 \ +- | fpnum.ldbl.ieee.mantissa3); \ ++ num0 = (((unsigned long long int) u.ieee.mantissa0) << 32 \ ++ | u.ieee.mantissa1); \ ++ num1 = (((unsigned long long int) u.ieee.mantissa2) << 32 \ ++ | u.ieee.mantissa3); \ + \ + zero_mantissa = (num0|num1) == 0; \ + \ +@@ -75,9 +77,9 @@ + *--wnumstr = L'0'; \ + } \ + \ +- leading = fpnum.ldbl.ieee.exponent == 0 ? '0' : '1'; \ ++ leading = u.ieee.exponent == 0 ? '0' : '1'; \ + \ +- exponent = fpnum.ldbl.ieee.exponent; \ ++ exponent = u.ieee.exponent; \ + \ + if (exponent == 0) \ + { \ +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c 2014-05-27 19:53:20.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c 2014-05-27 19:53:45.000000000 -0500 +@@ -28,14 +28,14 @@ + unsigned long long hi, lo; \ + int ediff; \ + union ibm_extended_long_double u; \ +- u.ld = fpnum.ldbl.d; \ ++ u.ld = fpnum.ldbl; \ + \ + assert (sizeof (long double) == 16); \ + \ + lo = ((long long)u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1; \ + hi = ((long long)u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1; \ + lo <<= 7; /* pre-shift lo to match ieee854. */ \ +- /* If the lower double is not a denomal or zero then set the hidden \ ++ /* If the lower double is not a denormal or zero then set the hidden \ + 53rd bit. */ \ + if (u.d[1].ieee.exponent != 0) \ + lo |= (1ULL << (52 + 7)); \ +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-96/printf_fphex.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-96/printf_fphex.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-96/printf_fphex.c 2014-05-27 19:53:20.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-96/printf_fphex.c 2014-05-27 19:53:45.000000000 -0500 +@@ -25,11 +25,13 @@ + /* The "strange" 80 bit format on ix86 and m68k has an explicit \ + leading digit in the 64 bit mantissa. */ \ + unsigned long long int num; \ ++ union ieee854_long_double u; \ ++ u.d = fpnum.ldbl; \ + \ + assert (sizeof (long double) == 12); \ + \ +- num = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32 \ +- | fpnum.ldbl.ieee.mantissa1); \ ++ num = (((unsigned long long int) u.ieee.mantissa0) << 32 \ ++ | u.ieee.mantissa1); \ + \ + zero_mantissa = num == 0; \ + \ +@@ -62,7 +64,7 @@ + \ + /* We have 3 bits from the mantissa in the leading nibble. \ + Therefore we are here using `IEEE854_LONG_DOUBLE_BIAS + 3'. */ \ +- exponent = fpnum.ldbl.ieee.exponent; \ ++ exponent = u.ieee.exponent; \ + \ + if (exponent == 0) \ + { \ +diff -urN glibc-2.17-c758a686/sysdeps/x86_64/fpu/printf_fphex.c glibc-2.17-c758a686/sysdeps/x86_64/fpu/printf_fphex.c +--- glibc-2.17-c758a686/sysdeps/x86_64/fpu/printf_fphex.c 2014-05-27 19:53:20.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/x86_64/fpu/printf_fphex.c 2014-05-27 19:53:45.000000000 -0500 +@@ -25,10 +25,11 @@ + /* The "strange" 80 bit format on ix86 and m68k has an explicit \ + leading digit in the 64 bit mantissa. */ \ + unsigned long long int num; \ ++ union ieee854_long_double u; \ ++ u.d = fpnum.ldbl; \ + \ +- \ +- num = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32 \ +- | fpnum.ldbl.ieee.mantissa1); \ ++ num = (((unsigned long long int) u.ieee.mantissa0) << 32 \ ++ | u.ieee.mantissa1); \ + \ + zero_mantissa = num == 0; \ + \ +@@ -61,7 +62,7 @@ + \ + /* We have 3 bits from the mantissa in the leading nibble. \ + Therefore we are here using `IEEE854_LONG_DOUBLE_BIAS + 3'. */ \ +- exponent = fpnum.ldbl.ieee.exponent; \ ++ exponent = u.ieee.exponent; \ + \ + if (exponent == 0) \ + { \ diff --git a/packages/glibc/2.17/0029-glibc-ppc64le-07.patch b/packages/glibc/2.17/0029-glibc-ppc64le-07.patch new file mode 100644 index 0000000..120576e --- /dev/null +++ b/packages/glibc/2.17/0029-glibc-ppc64le-07.patch @@ -0,0 +1,651 @@ +# commit 4ebd120cd983c8d2ac7a234884b3ac6805d82973 +# Author: Alan Modra +# Date: Sat Aug 17 18:24:05 2013 +0930 +# +# PowerPC floating point little-endian [2 of 15] +# http://sourceware.org/ml/libc-alpha/2013-08/msg00082.html +# +# This patch replaces occurrences of GET_LDOUBLE_* and SET_LDOUBLE_* +# macros, and union ieee854_long_double_shape_type in ldbl-128ibm/, +# and a stray one in the 32-bit fpu support. These files have no +# significant changes apart from rewriting the long double bit access. +# +# * sysdeps/ieee754/ldbl-128ibm/math_ldbl.h (ldbl_high): Define. +# * sysdeps/ieee754/ldbl-128ibm/e_acoshl.c (__ieee754_acoshl): Rewrite +# all uses of ieee854 long double macros and unions. +# * sysdeps/ieee754/ldbl-128ibm/e_acosl.c (__ieee754_acosl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/e_asinl.c (__ieee754_asinl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/e_atanhl.c (__ieee754_atanhl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/e_coshl.c (__ieee754_coshl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/e_log2l.c (__ieee754_log2l): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c (__ieee754_rem_pio2l): +# Likewise. +# * sysdeps/ieee754/ldbl-128ibm/e_sinhl.c (__ieee754_sinhl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/k_cosl.c (__kernel_cosl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/k_sincosl.c (__kernel_sincosl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/k_sinl.c (__kernel_sinl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_asinhl.c (__asinhl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_atanl.c (__atanl): Likewise. +# Simplify sign and nan test too. +# * sysdeps/ieee754/ldbl-128ibm/s_cosl.c (__cosl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_fabsl.c (__fabsl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_finitel.c (___finitel): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c (___fpclassifyl): +# Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_isnanl.c (___isnanl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_issignalingl.c (__issignalingl): +# Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_logbl.c (__logbl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_signbitl.c (___signbitl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_sincosl.c (__sincosl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_sinl.c (__sinl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_tanl.c (__tanl): Likewise. +# * sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c (__logbl): Likewise. +# +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -36,8 +36,12 @@ + { + long double t; + int64_t hx; +- u_int64_t lx; +- GET_LDOUBLE_WORDS64(hx,lx,x); ++ uint64_t lx; ++ double xhi, xlo; ++ ++ ldbl_unpack (x, &xhi, &xlo); ++ EXTRACT_WORDS64 (hx, xhi); ++ EXTRACT_WORDS64 (lx, xlo); + if(hx<0x3ff0000000000000LL) { /* x < 1 */ + return (x-x)/(x-x); + } else if(hx >=0x41b0000000000000LL) { /* x > 2**28 */ +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acosl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acosl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acosl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acosl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -151,26 +151,25 @@ + long double + __ieee754_acosl (long double x) + { +- long double z, r, w, p, q, s, t, f2; +- ieee854_long_double_shape_type u; ++ long double a, z, r, w, p, q, s, t, f2; + +- u.value = __builtin_fabsl (x); +- if (u.value == 1.0L) ++ a = __builtin_fabsl (x); ++ if (a == 1.0L) + { + if (x > 0.0L) + return 0.0; /* acos(1) = 0 */ + else + return (2.0 * pio2_hi) + (2.0 * pio2_lo); /* acos(-1)= pi */ + } +- else if (u.value > 1.0L) ++ else if (a > 1.0L) + { + return (x - x) / (x - x); /* acos(|x| > 1) is NaN */ + } +- if (u.value < 0.5L) ++ if (a < 0.5L) + { +- if (u.value < 6.938893903907228e-18L) /* |x| < 2**-57 */ ++ if (a < 6.938893903907228e-18L) /* |x| < 2**-57 */ + return pio2_hi + pio2_lo; +- if (u.value < 0.4375L) ++ if (a < 0.4375L) + { + /* Arcsine of x. */ + z = x * x; +@@ -199,7 +198,7 @@ + return z; + } + /* .4375 <= |x| < .5 */ +- t = u.value - 0.4375L; ++ t = a - 0.4375L; + p = ((((((((((P10 * t + + P9) * t + + P8) * t +@@ -230,9 +229,9 @@ + r = acosr4375 + r; + return r; + } +- else if (u.value < 0.625L) ++ else if (a < 0.625L) + { +- t = u.value - 0.5625L; ++ t = a - 0.5625L; + p = ((((((((((rS10 * t + + rS9) * t + + rS8) * t +@@ -264,7 +263,9 @@ + } + else + { /* |x| >= .625 */ +- z = (one - u.value) * 0.5; ++ double shi, slo; ++ ++ z = (one - a) * 0.5; + s = __ieee754_sqrtl (z); + /* Compute an extended precision square root from + the Newton iteration s -> 0.5 * (s + z / s). +@@ -273,12 +274,11 @@ + Express s = f1 + f2 where f1 * f1 is exactly representable. + w = (z - s^2)/2s = (z - f1^2 - 2 f1 f2 - f2^2)/2s . + s + w has extended precision. */ +- u.value = s; +- u.parts32.w2 = 0; +- u.parts32.w3 = 0; +- f2 = s - u.value; +- w = z - u.value * u.value; +- w = w - 2.0 * u.value * f2; ++ ldbl_unpack (s, &shi, &slo); ++ a = shi; ++ f2 = slo; ++ w = z - a * a; ++ w = w - 2.0 * a * f2; + w = w - f2 * f2; + w = w / (2.0 * s); + /* Arcsine of s. */ +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_asinl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_asinl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_asinl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_asinl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -131,19 +131,18 @@ + long double + __ieee754_asinl (long double x) + { +- long double t, w, p, q, c, r, s; ++ long double a, t, w, p, q, c, r, s; + int flag; +- ieee854_long_double_shape_type u; + + flag = 0; +- u.value = __builtin_fabsl (x); +- if (u.value == 1.0L) /* |x|>= 1 */ ++ a = __builtin_fabsl (x); ++ if (a == 1.0L) /* |x|>= 1 */ + return x * pio2_hi + x * pio2_lo; /* asin(1)=+-pi/2 with inexact */ +- else if (u.value >= 1.0L) ++ else if (a >= 1.0L) + return (x - x) / (x - x); /* asin(|x|>1) is NaN */ +- else if (u.value < 0.5L) ++ else if (a < 0.5L) + { +- if (u.value < 6.938893903907228e-18L) /* |x| < 2**-57 */ ++ if (a < 6.938893903907228e-18L) /* |x| < 2**-57 */ + { + if (huge + x > one) + return x; /* return x with inexact if x!=0 */ +@@ -155,9 +154,9 @@ + flag = 1; + } + } +- else if (u.value < 0.625L) ++ else if (a < 0.625L) + { +- t = u.value - 0.5625; ++ t = a - 0.5625; + p = ((((((((((rS10 * t + + rS9) * t + + rS8) * t +@@ -190,7 +189,7 @@ + else + { + /* 1 > |x| >= 0.625 */ +- w = one - u.value; ++ w = one - a; + t = w * 0.5; + } + +@@ -223,17 +222,14 @@ + } + + s = __ieee754_sqrtl (t); +- if (u.value > 0.975L) ++ if (a > 0.975L) + { + w = p / q; + t = pio2_hi - (2.0 * (s + s * w) - pio2_lo); + } + else + { +- u.value = s; +- u.parts32.w3 = 0; +- u.parts32.w2 = 0; +- w = u.value; ++ w = ldbl_high (s); + c = (t - w * w) / (s + w); + r = p / q; + p = 2.0 * s * r - (pio2_lo - 2.0 * c); +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -40,8 +40,10 @@ + { + long double t; + int64_t hx,ix; +- u_int64_t lx __attribute__ ((unused)); +- GET_LDOUBLE_WORDS64(hx,lx,x); ++ double xhi; ++ ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (hx, xhi); + ix = hx&0x7fffffffffffffffLL; + if (ix >= 0x3ff0000000000000LL) { /* |x|>=1 */ + if (ix > 0x3ff0000000000000LL) +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_coshl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_coshl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_coshl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_coshl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -41,9 +41,11 @@ + { + long double t,w; + int64_t ix; ++ double xhi; + + /* High word of |x|. */ +- GET_LDOUBLE_MSW64(ix,x); ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (ix, xhi); + ix &= 0x7fffffffffffffffLL; + + /* x is INF or NaN */ +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_log2l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_log2l.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_log2l.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_log2l.c 2014-05-27 19:59:00.000000000 -0500 +@@ -177,11 +177,13 @@ + long double z; + long double y; + int e; +- int64_t hx, lx; ++ int64_t hx; ++ double xhi; + + /* Test for domain */ +- GET_LDOUBLE_WORDS64 (hx, lx, x); +- if (((hx & 0x7fffffffffffffffLL) | (lx & 0x7fffffffffffffffLL)) == 0) ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (hx, xhi); ++ if ((hx & 0x7fffffffffffffffLL) == 0) + return (-1.0L / (x - x)); + if (hx < 0) + return (x - x) / (x - x); +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c 2014-05-27 19:59:00.000000000 -0500 +@@ -200,10 +200,11 @@ + double tx[8]; + int exp; + int64_t n, ix, hx, ixd; +- u_int64_t lx __attribute__ ((unused)); + u_int64_t lxd; ++ double xhi; + +- GET_LDOUBLE_WORDS64 (hx, lx, x); ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (hx, xhi); + ix = hx & 0x7fffffffffffffffLL; + if (ix <= 0x3fe921fb54442d10LL) /* x in <-pi/4, pi/4> */ + { +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -38,9 +38,11 @@ + { + long double t,w,h; + int64_t ix,jx; ++ double xhi; + + /* High word of |x|. */ +- GET_LDOUBLE_MSW64(jx,x); ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (jx, xhi); + ix = jx&0x7fffffffffffffffLL; + + /* x is INF or NaN */ +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_cosl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_cosl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_cosl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_cosl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -81,8 +81,11 @@ + { + long double h, l, z, sin_l, cos_l_m1; + int64_t ix; +- u_int32_t tix, hix, index; +- GET_LDOUBLE_MSW64 (ix, x); ++ uint32_t tix, hix, index; ++ double xhi, hhi; ++ ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (ix, xhi); + tix = ((u_int64_t)ix) >> 32; + tix &= ~0x80000000; /* tix = |x|'s high 32 bits */ + if (tix < 0x3fc30000) /* |x| < 0.1484375 */ +@@ -136,7 +139,8 @@ + case 2: index = (hix - 0x3fc30000) >> 14; break; + } + */ +- SET_LDOUBLE_WORDS64(h, ((u_int64_t)hix) << 32, 0); ++ INSERT_WORDS64 (hhi, ((uint64_t)hix) << 32); ++ h = hhi; + l = y - (h - x); + z = l * l; + sin_l = l*(ONE+z*(SSIN1+z*(SSIN2+z*(SSIN3+z*(SSIN4+z*SSIN5))))); +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -100,9 +100,12 @@ + { + long double h, l, z, sin_l, cos_l_m1; + int64_t ix; +- u_int32_t tix, hix, index; +- GET_LDOUBLE_MSW64 (ix, x); +- tix = ((u_int64_t)ix) >> 32; ++ uint32_t tix, hix, index; ++ double xhi, hhi; ++ ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (ix, xhi); ++ tix = ((uint64_t)ix) >> 32; + tix &= ~0x80000000; /* tix = |x|'s high 32 bits */ + if (tix < 0x3fc30000) /* |x| < 0.1484375 */ + { +@@ -164,7 +167,8 @@ + case 2: index = (hix - 0x3fc30000) >> 14; break; + } + */ +- SET_LDOUBLE_WORDS64(h, ((u_int64_t)hix) << 32, 0); ++ INSERT_WORDS64 (hhi, ((uint64_t)hix) << 32); ++ h = hhi; + if (iy) + l = y - (h - x); + else +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sinl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sinl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sinl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sinl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -82,7 +82,10 @@ + long double h, l, z, sin_l, cos_l_m1; + int64_t ix; + u_int32_t tix, hix, index; +- GET_LDOUBLE_MSW64 (ix, x); ++ double xhi, hhi; ++ ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (ix, xhi); + tix = ((u_int64_t)ix) >> 32; + tix &= ~0x80000000; /* tix = |x|'s high 32 bits */ + if (tix < 0x3fc30000) /* |x| < 0.1484375 */ +@@ -132,7 +135,8 @@ + case 2: index = (hix - 0x3fc30000) >> 14; break; + } + */ +- SET_LDOUBLE_WORDS64(h, ((u_int64_t)hix) << 32, 0); ++ INSERT_WORDS64 (hhi, ((uint64_t)hix) << 32); ++ h = hhi; + if (iy) + l = (ix < 0 ? -y : y) - (h - x); + else +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -38,7 +38,10 @@ + { + long double t,w; + int64_t hx,ix; +- GET_LDOUBLE_MSW64(hx,x); ++ double xhi; ++ ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (hx, xhi); + ix = hx&0x7fffffffffffffffLL; + if(ix>=0x7ff0000000000000LL) return x+x; /* x is inf or NaN */ + if(ix< 0x3e20000000000000LL) { /* |x|<2**-29 */ +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_atanl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_atanl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_atanl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_atanl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -173,23 +173,20 @@ + long double + __atanl (long double x) + { +- int k, sign; ++ int32_t k, sign, lx; + long double t, u, p, q; +- ieee854_long_double_shape_type s; ++ double xhi; + +- s.value = x; +- k = s.parts32.w0; +- if (k & 0x80000000) +- sign = 1; +- else +- sign = 0; ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS (k, lx, xhi); ++ sign = k & 0x80000000; + + /* Check for IEEE special cases. */ + k &= 0x7fffffff; + if (k >= 0x7ff00000) + { + /* NaN. */ +- if ((k & 0xfffff) | s.parts32.w1 ) ++ if (((k - 0x7ff00000) | lx) != 0) + return (x + x); + + /* Infinity. */ +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_cosl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_cosl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_cosl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_cosl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -53,9 +53,11 @@ + { + long double y[2],z=0.0L; + int64_t n, ix; ++ double xhi; + + /* High word of x. */ +- GET_LDOUBLE_MSW64(ix,x); ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (ix, xhi); + + /* |x| ~< pi/4 */ + ix &= 0x7fffffffffffffffLL; +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -29,10 +29,16 @@ + long double __fabsl(long double x) + { + u_int64_t hx, lx; +- GET_LDOUBLE_WORDS64(hx,lx,x); ++ double xhi, xlo; ++ ++ ldbl_unpack (x, &xhi, &xlo); ++ EXTRACT_WORDS64 (hx, xhi); ++ EXTRACT_WORDS64 (lx, xlo); + lx = lx ^ ( hx & 0x8000000000000000LL ); + hx = hx & 0x7fffffffffffffffLL; +- SET_LDOUBLE_WORDS64(x,hx,lx); ++ INSERT_WORDS64 (xhi, hx); ++ INSERT_WORDS64 (xlo, lx); ++ x = ldbl_pack (xhi, xlo); + return x; + } + long_double_symbol (libm, __fabsl, fabsl); +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_finitel.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_finitel.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_finitel.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_finitel.c 2014-05-27 19:59:00.000000000 -0500 +@@ -29,10 +29,14 @@ + int + ___finitel (long double x) + { +- int64_t hx; +- GET_LDOUBLE_MSW64(hx,x); +- return (int)((u_int64_t)((hx&0x7fffffffffffffffLL) +- -0x7ff0000000000000LL)>>63); ++ uint64_t hx; ++ double xhi; ++ ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (hx, xhi); ++ hx &= 0x7fffffffffffffffLL; ++ hx -= 0x7ff0000000000000LL; ++ return hx >> 63; + } + hidden_ver (___finitel, __finitel) + weak_alias (___finitel, ____finitel) +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -46,8 +46,10 @@ + { + u_int64_t hx, lx; + int retval = FP_NORMAL; ++ double xhi, xlo; + +- GET_LDOUBLE_WORDS64 (hx, lx, x); ++ ldbl_unpack (x, &xhi, &xlo); ++ EXTRACT_WORDS64 (hx, xhi); + if ((hx & 0x7ff0000000000000ULL) == 0x7ff0000000000000ULL) { + /* +/-NaN or +/-Inf */ + if (hx & 0x000fffffffffffffULL) { +@@ -65,6 +67,7 @@ + retval = FP_NORMAL; + } else { + if ((hx & 0x7ff0000000000000ULL) == 0x0360000000000000ULL) { ++ EXTRACT_WORDS64 (lx, xlo); + if ((lx & 0x7fffffffffffffff) /* lower is non-zero */ + && ((lx^hx) & 0x8000000000000000ULL)) { /* and sign differs */ + /* +/- denormal */ +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c 2014-05-27 19:59:00.000000000 -0500 +@@ -29,12 +29,14 @@ + int + ___isnanl (long double x) + { +- int64_t hx; +- int64_t lx __attribute__ ((unused)); +- GET_LDOUBLE_WORDS64(hx,lx,x); +- hx &= 0x7fffffffffffffffLL; +- hx = 0x7ff0000000000000LL - hx; +- return (int)((u_int64_t)hx>>63); ++ uint64_t hx; ++ double xhi; ++ ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (hx, xhi); ++ hx &= 0x7fffffffffffffffLL; ++ hx = 0x7ff0000000000000LL - hx; ++ return (int) (hx >> 63); + } + hidden_ver (___isnanl, __isnanl) + #ifndef IS_IN_libm +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_logbl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_logbl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_logbl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_logbl.c 2014-05-27 19:59:19.000000000 -0500 +@@ -27,9 +27,10 @@ + __logbl (long double x) + { + int64_t hx, rhx; +- int64_t lx __attribute__ ((unused)); ++ double xhi; + +- GET_LDOUBLE_WORDS64 (hx, lx, x); ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (hx, xhi); + hx &= 0x7fffffffffffffffLL; /* high |x| */ + if (hx == 0) + return -1.0 / fabs (x); +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c 2014-05-27 19:59:19.000000000 -0500 +@@ -25,8 +25,10 @@ + ___signbitl (long double x) + { + int64_t e; ++ double xhi; + +- GET_LDOUBLE_MSW64 (e, x); ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (e, xhi); + return e < 0; + } + #ifdef IS_IN_libm +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c 2014-05-27 19:59:19.000000000 -0500 +@@ -27,9 +27,11 @@ + __sincosl (long double x, long double *sinx, long double *cosx) + { + int64_t ix; ++ double xhi; + + /* High word of x. */ +- GET_LDOUBLE_MSW64 (ix, x); ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (ix, xhi); + + /* |x| ~< pi/4 */ + ix &= 0x7fffffffffffffffLL; +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sinl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sinl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sinl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sinl.c 2014-05-27 19:59:19.000000000 -0500 +@@ -53,9 +53,11 @@ + { + long double y[2],z=0.0L; + int64_t n, ix; ++ double xhi; + + /* High word of x. */ +- GET_LDOUBLE_MSW64(ix,x); ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (ix, xhi); + + /* |x| ~< pi/4 */ + ix &= 0x7fffffffffffffffLL; +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_tanl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_tanl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_tanl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_tanl.c 2014-05-27 19:59:19.000000000 -0500 +@@ -53,9 +53,11 @@ + { + long double y[2],z=0.0L; + int64_t n, ix; ++ double xhi; + + /* High word of x. */ +- GET_LDOUBLE_MSW64(ix,x); ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (ix, xhi); + + /* |x| ~< pi/4 */ + ix &= 0x7fffffffffffffffLL; +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c 2014-05-27 19:58:07.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c 2014-05-27 19:59:19.000000000 -0500 +@@ -35,14 +35,14 @@ + long double + __logbl (long double x) + { +- double xh, xl; ++ double xh; + double ret; + + if (__builtin_expect (x == 0.0L, 0)) + /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF]. */ + return -1.0L / __builtin_fabsl (x); + +- ldbl_unpack (x, &xh, &xl); ++ xh = ldbl_high (x); + /* ret = x & 0x7ff0000000000000; */ + asm ( + "xxland %x0,%x1,%x2\n" +@@ -58,9 +58,9 @@ + { + /* POSIX specifies that denormal number is treated as + though it were normalized. */ +- int64_t lx, hx; ++ int64_t hx; + +- GET_LDOUBLE_WORDS64 (hx, lx, x); ++ EXTRACT_WORDS64 (hx, xh); + return (long double) (-1023 - (__builtin_clzll (hx) - 12)); + } + /* Test to avoid logb_downward (0.0) == -0.0. */ diff --git a/packages/glibc/2.17/0030-glibc-ppc64le-08.patch b/packages/glibc/2.17/0030-glibc-ppc64le-08.patch new file mode 100644 index 0000000..5f79844 --- /dev/null +++ b/packages/glibc/2.17/0030-glibc-ppc64le-08.patch @@ -0,0 +1,1235 @@ +# commit 765714cafcad7e6168518c61111f07bd955a9fee +# Author: Alan Modra +# Date: Sat Aug 17 18:24:58 2013 +0930 +# +# PowerPC floating point little-endian [3 of 15] +# http://sourceware.org/ml/libc-alpha/2013-08/msg00083.html +# +# Further replacement of ieee854 macros and unions. These files also +# have some optimisations for comparison against 0.0L, infinity and nan. +# Since the ABI specifies that the high double of an IBM long double +# pair is the value rounded to double, a high double of 0.0 means the +# low double must also be 0.0. The ABI also says that infinity and +# nan are encoded in the high double, with the low double unspecified. +# This means that tests for 0.0L, +/-Infinity and +/-NaN need only check +# the high double. +# +# * sysdeps/ieee754/ldbl-128ibm/e_atan2l.c (__ieee754_atan2l): Rewrite +# all uses of ieee854 long double macros and unions. Simplify tests +# for long doubles that are fully specified by the high double. +# * sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c (__ieee754_gammal_r): +# Likewise. +# * sysdeps/ieee754/ldbl-128ibm/e_ilogbl.c (__ieee754_ilogbl): Likewise. +# Remove dead code too. +# * sysdeps/ieee754/ldbl-128ibm/e_jnl.c (__ieee754_jnl): Likewise. +# (__ieee754_ynl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/e_log10l.c (__ieee754_log10l): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/e_logl.c (__ieee754_logl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/e_powl.c (__ieee754_powl): Likewise. +# Remove dead code too. +# * sysdeps/ieee754/ldbl-128ibm/k_tanl.c (__kernel_tanl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_expm1l.c (__expm1l): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_frexpl.c (__frexpl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_isinf_nsl.c (__isinf_nsl): Likewise. +# Simplify. +# * sysdeps/ieee754/ldbl-128ibm/s_isinfl.c (___isinfl): Likewise. +# Simplify. +# * sysdeps/ieee754/ldbl-128ibm/s_log1pl.c (__log1pl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_modfl.c (__modfl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c (__nextafterl): Likewise. +# Comment on variable precision. +# * sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c (__nexttoward): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c (__nexttowardf): +# Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_remquol.c (__remquol): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_scalblnl.c (__scalblnl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_scalbnl.c (__scalbnl): Likewise. +# * sysdeps/ieee754/ldbl-128ibm/s_tanhl.c (__tanhl): Likewise. +# * sysdeps/powerpc/fpu/libm-test-ulps: Adjust tan_towardzero ulps. +# +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c 2014-05-27 23:05:55.000000000 -0500 +@@ -56,11 +56,15 @@ + { + long double z; + int64_t k,m,hx,hy,ix,iy; +- u_int64_t lx,ly; ++ uint64_t lx; ++ double xhi, xlo, yhi; + +- GET_LDOUBLE_WORDS64(hx,lx,x); ++ ldbl_unpack (x, &xhi, &xlo); ++ EXTRACT_WORDS64 (hx, xhi); ++ EXTRACT_WORDS64 (lx, xlo); + ix = hx&0x7fffffffffffffffLL; +- GET_LDOUBLE_WORDS64(hy,ly,y); ++ yhi = ldbl_high (y); ++ EXTRACT_WORDS64 (hy, yhi); + iy = hy&0x7fffffffffffffffLL; + if(((ix)>0x7ff0000000000000LL)|| + ((iy)>0x7ff0000000000000LL)) /* x or y is NaN */ +@@ -70,7 +74,7 @@ + m = ((hy>>63)&1)|((hx>>62)&2); /* 2*sign(x)+sign(y) */ + + /* when y = 0 */ +- if((iy|(ly&0x7fffffffffffffffLL))==0) { ++ if(iy==0) { + switch(m) { + case 0: + case 1: return y; /* atan(+-0,+anything)=+-0 */ +@@ -79,7 +83,7 @@ + } + } + /* when x = 0 */ +- if((ix|(lx&0x7fffffffffffffff))==0) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny; ++ if(ix==0) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny; + + /* when x is INF */ + if(ix==0x7ff0000000000000LL) { +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c 2014-05-27 23:05:55.000000000 -0500 +@@ -29,11 +29,12 @@ + and the exp function. But due to the required boundary + conditions we must check some values separately. */ + int64_t hx; +- u_int64_t lx; ++ double xhi; + +- GET_LDOUBLE_WORDS64 (hx, lx, x); ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (hx, xhi); + +- if (((hx | lx) & 0x7fffffffffffffffLL) == 0) ++ if ((hx & 0x7fffffffffffffffLL) == 0) + { + /* Return value for x == 0 is Inf with divide by zero exception. */ + *signgamp = 0; +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_ilogbl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_ilogbl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_ilogbl.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_ilogbl.c 2014-05-27 23:05:55.000000000 -0500 +@@ -31,26 +31,24 @@ + + int __ieee754_ilogbl(long double x) + { +- int64_t hx,lx; ++ int64_t hx; + int ix; ++ double xhi; + +- GET_LDOUBLE_WORDS64(hx,lx,x); ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (hx, xhi); + hx &= 0x7fffffffffffffffLL; + if(hx <= 0x0010000000000000LL) { +- if((hx|(lx&0x7fffffffffffffffLL))==0) ++ if(hx==0) + return FP_ILOGB0; /* ilogbl(0) = FP_ILOGB0 */ + else /* subnormal x */ +- if(hx==0) { +- for (ix = -1043; lx>0; lx<<=1) ix -=1; +- } else { +- for (ix = -1022, hx<<=11; hx>0; hx<<=1) ix -=1; +- } ++ for (ix = -1022, hx<<=11; hx>0; hx<<=1) ix -=1; + return ix; + } + else if (hx<0x7ff0000000000000LL) return (hx>>52)-0x3ff; + else if (FP_ILOGBNAN != INT_MAX) { + /* ISO C99 requires ilogbl(+-Inf) == INT_MAX. */ +- if (((hx^0x7ff0000000000000LL)|lx) == 0) ++ if (hx==0x7ff0000000000000LL) + return INT_MAX; + } + return FP_ILOGBNAN; +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_jnl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_jnl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_jnl.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_jnl.c 2014-05-27 23:05:55.000000000 -0500 +@@ -70,26 +70,25 @@ + long double + __ieee754_jnl (int n, long double x) + { +- u_int32_t se; ++ uint32_t se, lx; + int32_t i, ix, sgn; + long double a, b, temp, di; + long double z, w; +- ieee854_long_double_shape_type u; ++ double xhi; + + + /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) + * Thus, J(-n,x) = J(n,-x) + */ + +- u.value = x; +- se = u.parts32.w0; ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS (se, lx, xhi); + ix = se & 0x7fffffff; + + /* if J(n,NaN) is NaN */ + if (ix >= 0x7ff00000) + { +- if ((u.parts32.w0 & 0xfffff) | u.parts32.w1 +- | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3) ++ if (((ix - 0x7ff00000) | lx) != 0) + return x + x; + } + +@@ -298,21 +297,20 @@ + long double + __ieee754_ynl (int n, long double x) + { +- u_int32_t se; ++ uint32_t se, lx; + int32_t i, ix; + int32_t sign; + long double a, b, temp; +- ieee854_long_double_shape_type u; ++ double xhi; + +- u.value = x; +- se = u.parts32.w0; ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS (se, lx, xhi); + ix = se & 0x7fffffff; + + /* if Y(n,NaN) is NaN */ + if (ix >= 0x7ff00000) + { +- if ((u.parts32.w0 & 0xfffff) | u.parts32.w1 +- | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3) ++ if (((ix - 0x7ff00000) | lx) != 0) + return x + x; + } + if (x <= 0.0L) +@@ -377,14 +375,16 @@ + a = __ieee754_y0l (x); + b = __ieee754_y1l (x); + /* quit if b is -inf */ +- u.value = b; +- se = u.parts32.w0 & 0xfff00000; ++ xhi = ldbl_high (b); ++ GET_HIGH_WORD (se, xhi); ++ se &= 0xfff00000; + for (i = 1; i < n && se != 0xfff00000; i++) + { + temp = b; + b = ((long double) (i + i) / x) * b - a; +- u.value = b; +- se = u.parts32.w0 & 0xfff00000; ++ xhi = ldbl_high (b); ++ GET_HIGH_WORD (se, xhi); ++ se &= 0xfff00000; + a = temp; + } + } +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_log10l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_log10l.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_log10l.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_log10l.c 2014-05-27 23:05:55.000000000 -0500 +@@ -182,11 +182,13 @@ + long double z; + long double y; + int e; +- int64_t hx, lx; ++ int64_t hx; ++ double xhi; + + /* Test for domain */ +- GET_LDOUBLE_WORDS64 (hx, lx, x); +- if (((hx & 0x7fffffffffffffffLL) | (lx & 0x7fffffffffffffffLL)) == 0) ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (hx, xhi); ++ if ((hx & 0x7fffffffffffffffLL) == 0) + return (-1.0L / (x - x)); + if (hx < 0) + return (x - x) / (x - x); +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_logl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_logl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_logl.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_logl.c 2014-05-27 23:05:55.000000000 -0500 +@@ -185,18 +185,20 @@ + long double + __ieee754_logl(long double x) + { +- long double z, y, w; +- ieee854_long_double_shape_type u, t; ++ long double z, y, w, t; + unsigned int m; + int k, e; ++ double xhi; ++ uint32_t hx, lx; + +- u.value = x; +- m = u.parts32.w0; ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS (hx, lx, xhi); ++ m = hx; + + /* Check for IEEE special cases. */ + k = m & 0x7fffffff; + /* log(0) = -infinity. */ +- if ((k | u.parts32.w1 | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3) == 0) ++ if ((k | lx) == 0) + { + return -0.5L / ZERO; + } +@@ -216,7 +218,7 @@ + { + z = x - 1.0L; + k = 64; +- t.value = 1.0L; ++ t = 1.0L; + e = 0; + } + else +@@ -233,10 +235,8 @@ + k = (m - 0xff000) >> 13; + /* t is the argument 0.5 + (k+26)/128 + of the nearest item to u in the lookup table. */ +- t.parts32.w0 = 0x3ff00000 + (k << 13); +- t.parts32.w1 = 0; +- t.parts32.w2 = 0; +- t.parts32.w3 = 0; ++ INSERT_WORDS (xhi, 0x3ff00000 + (k << 13), 0); ++ t = xhi; + w0 += 0x100000; + e -= 1; + k += 64; +@@ -244,17 +244,15 @@ + else + { + k = (m - 0xfe000) >> 14; +- t.parts32.w0 = 0x3fe00000 + (k << 14); +- t.parts32.w1 = 0; +- t.parts32.w2 = 0; +- t.parts32.w3 = 0; ++ INSERT_WORDS (xhi, 0x3fe00000 + (k << 14), 0); ++ t = xhi; + } +- u.value = __scalbnl (u.value, ((int) ((w0 - u.parts32.w0) * 2)) >> 21); ++ x = __scalbnl (x, ((int) ((w0 - hx) * 2)) >> 21); + /* log(u) = log( t u/t ) = log(t) + log(u/t) + log(t) is tabulated in the lookup table. + Express log(u/t) = log(1+z), where z = u/t - 1 = (u-t)/t. + cf. Cody & Waite. */ +- z = (u.value - t.value) / t.value; ++ z = (x - t) / t; + } + /* Series expansion of log(1+z). */ + w = z * z; +@@ -275,7 +273,7 @@ + y += e * ln2b; /* Base 2 exponent offset times ln(2). */ + y += z; + y += logtbl[k-26]; /* log(t) - (t-1) */ +- y += (t.value - 1.0L); ++ y += (t - 1.0L); + y += e * ln2a; + return y; + } +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_powl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_powl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_powl.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_powl.c 2014-05-27 23:05:55.000000000 -0500 +@@ -151,37 +151,32 @@ + long double y1, t1, t2, r, s, t, u, v, w; + long double s2, s_h, s_l, t_h, t_l, ay; + int32_t i, j, k, yisint, n; +- u_int32_t ix, iy; +- int32_t hx, hy; +- ieee854_long_double_shape_type o, p, q; ++ uint32_t ix, iy; ++ int32_t hx, hy, hax; ++ double ohi, xhi, xlo, yhi, ylo; ++ uint32_t lx, ly, lj; + +- p.value = x; +- hx = p.parts32.w0; ++ ldbl_unpack (x, &xhi, &xlo); ++ EXTRACT_WORDS (hx, lx, xhi); + ix = hx & 0x7fffffff; + +- q.value = y; +- hy = q.parts32.w0; ++ ldbl_unpack (y, &yhi, &ylo); ++ EXTRACT_WORDS (hy, ly, yhi); + iy = hy & 0x7fffffff; + +- + /* y==zero: x**0 = 1 */ +- if ((iy | q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0) ++ if ((iy | ly) == 0) + return one; + + /* 1.0**y = 1; -1.0**+-Inf = 1 */ + if (x == one) + return one; +- if (x == -1.0L && iy == 0x7ff00000 +- && (q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0) ++ if (x == -1.0L && ((iy - 0x7ff00000) | ly) == 0) + return one; + + /* +-NaN return x+y */ +- if ((ix > 0x7ff00000) +- || ((ix == 0x7ff00000) +- && ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) != 0)) +- || (iy > 0x7ff00000) +- || ((iy == 0x7ff00000) +- && ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) != 0))) ++ if ((ix >= 0x7ff00000 && ((ix - 0x7ff00000) | lx) != 0) ++ || (iy >= 0x7ff00000 && ((iy - 0x7ff00000) | ly) != 0)) + return x + y; + + /* determine if y is an odd int when x < 0 +@@ -192,7 +187,10 @@ + yisint = 0; + if (hx < 0) + { +- if ((q.parts32.w2 & 0x7fffffff) >= 0x43400000) /* Low part >= 2^53 */ ++ uint32_t low_ye; ++ ++ GET_HIGH_WORD (low_ye, ylo); ++ if ((low_ye & 0x7fffffff) >= 0x43400000) /* Low part >= 2^53 */ + yisint = 2; /* even integer y */ + else if (iy >= 0x3ff00000) /* 1.0 */ + { +@@ -207,42 +205,43 @@ + } + } + ++ ax = fabsl (x); ++ + /* special value of y */ +- if ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0) ++ if (ly == 0) + { +- if (iy == 0x7ff00000 && q.parts32.w1 == 0) /* y is +-inf */ ++ if (iy == 0x7ff00000) /* y is +-inf */ + { +- if (((ix - 0x3ff00000) | p.parts32.w1 +- | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0) +- return y - y; /* inf**+-1 is NaN */ +- else if (ix > 0x3ff00000 || fabsl (x) > 1.0L) ++ if (ax > one) + /* (|x|>1)**+-inf = inf,0 */ + return (hy >= 0) ? y : zero; + else + /* (|x|<1)**-,+inf = inf,0 */ + return (hy < 0) ? -y : zero; + } +- if (iy == 0x3ff00000) +- { /* y is +-1 */ +- if (hy < 0) +- return one / x; +- else +- return x; +- } +- if (hy == 0x40000000) +- return x * x; /* y is 2 */ +- if (hy == 0x3fe00000) +- { /* y is 0.5 */ +- if (hx >= 0) /* x >= +0 */ +- return __ieee754_sqrtl (x); ++ if (ylo == 0.0) ++ { ++ if (iy == 0x3ff00000) ++ { /* y is +-1 */ ++ if (hy < 0) ++ return one / x; ++ else ++ return x; ++ } ++ if (hy == 0x40000000) ++ return x * x; /* y is 2 */ ++ if (hy == 0x3fe00000) ++ { /* y is 0.5 */ ++ if (hx >= 0) /* x >= +0 */ ++ return __ieee754_sqrtl (x); ++ } + } + } + +- ax = fabsl (x); + /* special value of x */ +- if ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0) ++ if (lx == 0) + { +- if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000) ++ if (ix == 0x7ff00000 || ix == 0 || (ix == 0x3ff00000 && xlo == 0.0)) + { + z = ax; /*x is +-0,+-inf,+-1 */ + if (hy < 0) +@@ -294,8 +293,8 @@ + { + ax *= two113; + n -= 113; +- o.value = ax; +- ix = o.parts32.w0; ++ ohi = ldbl_high (ax); ++ GET_HIGH_WORD (ix, ohi); + } + n += ((ix) >> 20) - 0x3ff; + j = ix & 0x000fffff; +@@ -312,26 +311,19 @@ + ix -= 0x00100000; + } + +- o.value = ax; +- o.value = __scalbnl (o.value, ((int) ((ix - o.parts32.w0) * 2)) >> 21); +- ax = o.value; ++ ohi = ldbl_high (ax); ++ GET_HIGH_WORD (hax, ohi); ++ ax = __scalbnl (ax, ((int) ((ix - hax) * 2)) >> 21); + + /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ + u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */ + v = one / (ax + bp[k]); + s = u * v; +- s_h = s; ++ s_h = ldbl_high (s); + +- o.value = s_h; +- o.parts32.w3 = 0; +- o.parts32.w2 = 0; +- s_h = o.value; + /* t_h=ax+bp[k] High */ + t_h = ax + bp[k]; +- o.value = t_h; +- o.parts32.w3 = 0; +- o.parts32.w2 = 0; +- t_h = o.value; ++ t_h = ldbl_high (t_h); + t_l = ax - (t_h - bp[k]); + s_l = v * ((u - s_h * t_h) - s_h * t_l); + /* compute log(ax) */ +@@ -342,30 +334,21 @@ + r += s_l * (s_h + s); + s2 = s_h * s_h; + t_h = 3.0 + s2 + r; +- o.value = t_h; +- o.parts32.w3 = 0; +- o.parts32.w2 = 0; +- t_h = o.value; ++ t_h = ldbl_high (t_h); + t_l = r - ((t_h - 3.0) - s2); + /* u+v = s*(1+...) */ + u = s_h * t_h; + v = s_l * t_h + t_l * s; + /* 2/(3log2)*(s+...) */ + p_h = u + v; +- o.value = p_h; +- o.parts32.w3 = 0; +- o.parts32.w2 = 0; +- p_h = o.value; ++ p_h = ldbl_high (p_h); + p_l = v - (p_h - u); + z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */ + z_l = cp_l * p_h + p_l * cp + dp_l[k]; + /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */ + t = (long double) n; + t1 = (((z_h + z_l) + dp_h[k]) + t); +- o.value = t1; +- o.parts32.w3 = 0; +- o.parts32.w2 = 0; +- t1 = o.value; ++ t1 = ldbl_high (t1); + t2 = z_l - (((t1 - t) - dp_h[k]) - z_h); + + /* s (sign of result -ve**odd) = -1 else = 1 */ +@@ -374,21 +357,16 @@ + s = -one; /* (-ve)**(odd int) */ + + /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ +- y1 = y; +- o.value = y1; +- o.parts32.w3 = 0; +- o.parts32.w2 = 0; +- y1 = o.value; ++ y1 = ldbl_high (y); + p_l = (y - y1) * t1 + y * t2; + p_h = y1 * t1; + z = p_l + p_h; +- o.value = z; +- j = o.parts32.w0; ++ ohi = ldbl_high (z); ++ EXTRACT_WORDS (j, lj, ohi); + if (j >= 0x40d00000) /* z >= 16384 */ + { + /* if z > 16384 */ +- if (((j - 0x40d00000) | o.parts32.w1 +- | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0) ++ if (((j - 0x40d00000) | lj) != 0) + return s * huge * huge; /* overflow */ + else + { +@@ -399,8 +377,7 @@ + else if ((j & 0x7fffffff) >= 0x40d01b90) /* z <= -16495 */ + { + /* z < -16495 */ +- if (((j - 0xc0d01bc0) | o.parts32.w1 +- | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0) ++ if (((j - 0xc0d01bc0) | lj) != 0) + return s * tiny * tiny; /* underflow */ + else + { +@@ -419,10 +396,7 @@ + p_h -= t; + } + t = p_l + p_h; +- o.value = t; +- o.parts32.w3 = 0; +- o.parts32.w2 = 0; +- t = o.value; ++ t = ldbl_high (t); + u = t * lg2_h; + v = (p_l - (t - p_h)) * lg2 + t * lg2_l; + z = u + v; +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_tanl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_tanl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_tanl.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_tanl.c 2014-05-27 23:05:55.000000000 -0500 +@@ -85,17 +85,17 @@ + __kernel_tanl (long double x, long double y, int iy) + { + long double z, r, v, w, s; +- int32_t ix, sign; +- ieee854_long_double_shape_type u, u1; ++ int32_t ix, sign, hx, lx; ++ double xhi; + +- u.value = x; +- ix = u.parts32.w0 & 0x7fffffff; ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS (hx, lx, xhi); ++ ix = hx & 0x7fffffff; + if (ix < 0x3c600000) /* x < 2**-57 */ + { +- if ((int) x == 0) +- { /* generate inexact */ +- if ((ix | u.parts32.w1 | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3 +- | (iy + 1)) == 0) ++ if ((int) x == 0) /* generate inexact */ ++ { ++ if ((ix | lx | (iy + 1)) == 0) + return one / fabs (x); + else + return (iy == 1) ? x : -one / x; +@@ -103,7 +103,7 @@ + } + if (ix >= 0x3fe59420) /* |x| >= 0.6743316650390625 */ + { +- if ((u.parts32.w0 & 0x80000000) != 0) ++ if ((hx & 0x80000000) != 0) + { + x = -x; + y = -y; +@@ -139,15 +139,13 @@ + { /* if allow error up to 2 ulp, + simply return -1.0/(x+r) here */ + /* compute -1.0/(x+r) accurately */ +- u1.value = w; +- u1.parts32.w2 = 0; +- u1.parts32.w3 = 0; +- v = r - (u1.value - x); /* u1+v = r+x */ ++ long double u1, z1; ++ ++ u1 = ldbl_high (w); ++ v = r - (u1 - x); /* u1+v = r+x */ + z = -1.0 / w; +- u.value = z; +- u.parts32.w2 = 0; +- u.parts32.w3 = 0; +- s = 1.0 + u.value * u1.value; +- return u.value + z * (s + u.value * v); ++ z1 = ldbl_high (z); ++ s = 1.0 + z1 * u1; ++ return z1 + z * (s + z1 * v); + } + } +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_expm1l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_expm1l.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_expm1l.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_expm1l.c 2014-05-27 23:05:55.000000000 -0500 +@@ -92,19 +92,19 @@ + __expm1l (long double x) + { + long double px, qx, xx; +- int32_t ix, sign; +- ieee854_long_double_shape_type u; ++ int32_t ix, lx, sign; + int k; ++ double xhi; + + /* Detect infinity and NaN. */ +- u.value = x; +- ix = u.parts32.w0; ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS (ix, lx, xhi); + sign = ix & 0x80000000; + ix &= 0x7fffffff; + if (ix >= 0x7ff00000) + { + /* Infinity. */ +- if (((ix & 0xfffff) | u.parts32.w1 | (u.parts32.w2&0x7fffffff) | u.parts32.w3) == 0) ++ if (((ix - 0x7ff00000) | lx) == 0) + { + if (sign) + return -1.0L; +@@ -116,7 +116,7 @@ + } + + /* expm1(+- 0) = +- 0. */ +- if ((ix == 0) && (u.parts32.w1 | (u.parts32.w2&0x7fffffff) | u.parts32.w3) == 0) ++ if ((ix | lx) == 0) + return x; + + /* Overflow. */ +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c 2014-05-27 23:05:55.000000000 -0500 +@@ -36,16 +36,21 @@ + + long double __frexpl(long double x, int *eptr) + { +- u_int64_t hx, lx, ix, ixl; ++ uint64_t hx, lx, ix, ixl; + int64_t explo; +- GET_LDOUBLE_WORDS64(hx,lx,x); ++ double xhi, xlo; ++ ++ ldbl_unpack (x, &xhi, &xlo); ++ EXTRACT_WORDS64 (hx, xhi); ++ EXTRACT_WORDS64 (lx, xlo); + ixl = 0x7fffffffffffffffULL&lx; + ix = 0x7fffffffffffffffULL&hx; + *eptr = 0; +- if(ix>=0x7ff0000000000000ULL||((ix|ixl)==0)) return x; /* 0,inf,nan */ ++ if(ix>=0x7ff0000000000000ULL||ix==0) return x; /* 0,inf,nan */ + if (ix<0x0010000000000000ULL) { /* subnormal */ + x *= two107; +- GET_LDOUBLE_MSW64(hx,x); ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (hx, xhi); + ix = hx&0x7fffffffffffffffULL; + *eptr = -107; + } +@@ -54,7 +59,7 @@ + if (ixl != 0ULL) { + explo = (ixl>>52) - (ix>>52) + 0x3fe; + if ((ixl&0x7ff0000000000000ULL) == 0LL) { +- /* the lower double is a denomal so we need to correct its ++ /* the lower double is a denormal so we need to correct its + mantissa and perhaps its exponent. */ + int cnt; + +@@ -73,7 +78,9 @@ + lx = 0ULL; + + hx = (hx&0x800fffffffffffffULL) | 0x3fe0000000000000ULL; +- SET_LDOUBLE_WORDS64(x,hx,lx); ++ INSERT_WORDS64 (xhi, hx); ++ INSERT_WORDS64 (xlo, lx); ++ x = ldbl_pack (xhi, xlo); + return x; + } + #ifdef IS_IN_libm +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isinf_nsl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isinf_nsl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isinf_nsl.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isinf_nsl.c 2014-05-27 23:05:55.000000000 -0500 +@@ -1,6 +1,7 @@ + /* + * __isinf_nsl(x) returns != 0 if x is ±inf, else 0; + * no branching! ++ * slightly dodgy in relying on signed shift right copying sign bit + */ + + #include +@@ -9,8 +10,14 @@ + int + __isinf_nsl (long double x) + { +- int64_t hx,lx; +- GET_LDOUBLE_WORDS64(hx,lx,x); +- return !((lx & 0x7fffffffffffffffLL) +- | ((hx & 0x7fffffffffffffffLL) ^ 0x7ff0000000000000LL)); ++ double xhi; ++ int64_t hx, mask; ++ ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (hx, xhi); ++ ++ mask = (hx & 0x7fffffffffffffffLL) ^ 0x7ff0000000000000LL; ++ mask |= -mask; ++ mask >>= 63; ++ return ~mask; + } +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isinfl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isinfl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isinfl.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isinfl.c 2014-05-27 23:05:55.000000000 -0500 +@@ -11,6 +11,7 @@ + /* + * isinfl(x) returns 1 if x is inf, -1 if x is -inf, else 0; + * no branching! ++ * slightly dodgy in relying on signed shift right copying sign bit + */ + + #include +@@ -20,12 +21,16 @@ + int + ___isinfl (long double x) + { +- int64_t hx,lx; +- GET_LDOUBLE_WORDS64(hx,lx,x); +- lx = (lx & 0x7fffffffffffffffLL); +- lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7ff0000000000000LL; +- lx |= -lx; +- return ~(lx >> 63) & (hx >> 62); ++ double xhi; ++ int64_t hx, mask; ++ ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (hx, xhi); ++ ++ mask = (hx & 0x7fffffffffffffffLL) ^ 0x7ff0000000000000LL; ++ mask |= -mask; ++ mask >>= 63; ++ return ~mask & (hx >> 62); + } + hidden_ver (___isinfl, __isinfl) + #ifndef IS_IN_libm +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_log1pl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_log1pl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_log1pl.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_log1pl.c 2014-05-27 23:05:55.000000000 -0500 +@@ -126,19 +126,18 @@ + __log1pl (long double xm1) + { + long double x, y, z, r, s; +- ieee854_long_double_shape_type u; +- int32_t hx; ++ double xhi; ++ int32_t hx, lx; + int e; + + /* Test for NaN or infinity input. */ +- u.value = xm1; +- hx = u.parts32.w0; ++ xhi = ldbl_high (xm1); ++ EXTRACT_WORDS (hx, lx, xhi); + if (hx >= 0x7ff00000) + return xm1; + + /* log1p(+- 0) = +- 0. */ +- if (((hx & 0x7fffffff) == 0) +- && (u.parts32.w1 | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3) == 0) ++ if (((hx & 0x7fffffff) | lx) == 0) + return xm1; + + x = xm1 + 1.0L; +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_modfl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_modfl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_modfl.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_modfl.c 2014-05-27 23:05:55.000000000 -0500 +@@ -37,43 +37,54 @@ + { + int64_t i0,i1,j0; + u_int64_t i; +- GET_LDOUBLE_WORDS64(i0,i1,x); ++ double xhi, xlo; ++ ++ ldbl_unpack (x, &xhi, &xlo); ++ EXTRACT_WORDS64 (i0, xhi); ++ EXTRACT_WORDS64 (i1, xlo); + i1 &= 0x000fffffffffffffLL; + j0 = ((i0>>52)&0x7ff)-0x3ff; /* exponent of x */ + if(j0<52) { /* integer part in high x */ + if(j0<0) { /* |x|<1 */ + /* *iptr = +-0 */ +- SET_LDOUBLE_WORDS64(*iptr,i0&0x8000000000000000ULL,0); ++ INSERT_WORDS64 (xhi, i0&0x8000000000000000ULL); ++ *iptr = xhi; + return x; + } else { + i = (0x000fffffffffffffLL)>>j0; + if(((i0&i)|(i1&0x7fffffffffffffffLL))==0) { /* x is integral */ + *iptr = x; + /* return +-0 */ +- SET_LDOUBLE_WORDS64(x,i0&0x8000000000000000ULL,0); ++ INSERT_WORDS64 (xhi, i0&0x8000000000000000ULL); ++ x = xhi; + return x; + } else { +- SET_LDOUBLE_WORDS64(*iptr,i0&(~i),0); ++ INSERT_WORDS64 (xhi, i0&(~i)); ++ *iptr = xhi; + return x - *iptr; + } + } + } else if (j0>103) { /* no fraction part */ + *iptr = x*one; + /* We must handle NaNs separately. */ +- if (j0 == 0x400 && ((i0 & 0x000fffffffffffffLL) | i1)) ++ if ((i0 & 0x7fffffffffffffffLL) > 0x7ff0000000000000LL) + return x*one; + /* return +-0 */ +- SET_LDOUBLE_WORDS64(x,i0&0x8000000000000000ULL,0); ++ INSERT_WORDS64 (xhi, i0&0x8000000000000000ULL); ++ x = xhi; + return x; + } else { /* fraction part in low x */ + i = -1ULL>>(j0-52); + if((i1&i)==0) { /* x is integral */ + *iptr = x; + /* return +-0 */ +- SET_LDOUBLE_WORDS64(x,i0&0x8000000000000000ULL,0); ++ INSERT_WORDS64 (xhi, i0&0x8000000000000000ULL); ++ x = xhi; + return x; + } else { +- SET_LDOUBLE_WORDS64(*iptr,i0,i1&(~i)); ++ INSERT_WORDS64 (xhi, i0); ++ INSERT_WORDS64 (xlo, i1&(~i)); ++ *iptr = ldbl_pack (xhi, xlo); + return x - *iptr; + } + } +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c 2014-05-27 23:05:55.000000000 -0500 +@@ -30,27 +30,28 @@ + + long double __nextafterl(long double x, long double y) + { +- int64_t hx,hy,ihx,ihy,ilx; +- u_int64_t lx; +- u_int64_t ly __attribute__ ((unused)); ++ int64_t hx,hy,ihx,ihy; ++ uint64_t lx; ++ double xhi, xlo, yhi; + +- GET_LDOUBLE_WORDS64(hx,lx,x); +- GET_LDOUBLE_WORDS64(hy,ly,y); ++ ldbl_unpack (x, &xhi, &xlo); ++ EXTRACT_WORDS64 (hx, xhi); ++ EXTRACT_WORDS64 (lx, xlo); ++ yhi = ldbl_high (y); ++ EXTRACT_WORDS64 (hy, yhi); + ihx = hx&0x7fffffffffffffffLL; /* |hx| */ +- ilx = lx&0x7fffffffffffffffLL; /* |lx| */ + ihy = hy&0x7fffffffffffffffLL; /* |hy| */ + +- if((((ihx&0x7ff0000000000000LL)==0x7ff0000000000000LL)&& +- ((ihx&0x000fffffffffffffLL)!=0)) || /* x is nan */ +- (((ihy&0x7ff0000000000000LL)==0x7ff0000000000000LL)&& +- ((ihy&0x000fffffffffffffLL)!=0))) /* y is nan */ ++ if((ihx>0x7ff0000000000000LL) || /* x is nan */ ++ (ihy>0x7ff0000000000000LL)) /* y is nan */ + return x+y; /* signal the nan */ + if(x==y) + return y; /* x=y, return y */ +- if(ihx == 0 && ilx == 0) { /* x == 0 */ +- long double u; ++ if(ihx == 0) { /* x == 0 */ ++ long double u; /* return +-minsubnormal */ + hy = (hy & 0x8000000000000000ULL) | 1; +- SET_LDOUBLE_WORDS64(x,hy,0ULL);/* return +-minsubnormal */ ++ INSERT_WORDS64 (yhi, hy); ++ x = yhi; + u = math_opt_barrier (x); + u = u * u; + math_force_eval (u); /* raise underflow flag */ +@@ -59,10 +60,16 @@ + + long double u; + if(x > y) { /* x > y, x -= ulp */ ++ /* This isn't the largest magnitude correctly rounded ++ long double as you can see from the lowest mantissa ++ bit being zero. It is however the largest magnitude ++ long double with a 106 bit mantissa, and nextafterl ++ is insane with variable precision. So to make ++ nextafterl sane we assume 106 bit precision. */ + if((hx==0xffefffffffffffffLL)&&(lx==0xfc8ffffffffffffeLL)) + return x+x; /* overflow, return -inf */ + if (hx >= 0x7ff0000000000000LL) { +- SET_LDOUBLE_WORDS64(u,0x7fefffffffffffffLL,0x7c8ffffffffffffeLL); ++ u = 0x1.fffffffffffff7ffffffffffff8p+1023L; + return u; + } + if(ihx <= 0x0360000000000000LL) { /* x <= LDBL_MIN */ +@@ -77,16 +84,19 @@ + return x; + } + if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */ +- SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL); ++ INSERT_WORDS64 (yhi, hx & (0x7ffLL<<52)); ++ u = yhi; + u *= 0x1.0000000000000p-105L; +- } else +- SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL); ++ } else { ++ INSERT_WORDS64 (yhi, (hx & (0x7ffLL<<52))-(0x069LL<<52)); ++ u = yhi; ++ } + return x - u; + } else { /* x < y, x += ulp */ + if((hx==0x7fefffffffffffffLL)&&(lx==0x7c8ffffffffffffeLL)) + return x+x; /* overflow, return +inf */ +- if ((u_int64_t) hx >= 0xfff0000000000000ULL) { +- SET_LDOUBLE_WORDS64(u,0xffefffffffffffffLL,0xfc8ffffffffffffeLL); ++ if ((uint64_t) hx >= 0xfff0000000000000ULL) { ++ u = -0x1.fffffffffffff7ffffffffffff8p+1023L; + return u; + } + if(ihx <= 0x0360000000000000LL) { /* x <= LDBL_MIN */ +@@ -103,10 +113,13 @@ + return x; + } + if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */ +- SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL); ++ INSERT_WORDS64 (yhi, hx & (0x7ffLL<<52)); ++ u = yhi; + u *= 0x1.0000000000000p-105L; +- } else +- SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL); ++ } else { ++ INSERT_WORDS64 (yhi, (hx & (0x7ffLL<<52))-(0x069LL<<52)); ++ u = yhi; ++ } + return x + u; + } + } +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c 2014-05-27 23:10:26.000000000 -0500 +@@ -34,23 +34,23 @@ + { + int32_t hx,ix; + int64_t hy,iy; +- u_int32_t lx; +- u_int64_t ly,uly; ++ uint32_t lx; ++ double yhi; ++ + + EXTRACT_WORDS(hx,lx,x); +- GET_LDOUBLE_WORDS64(hy,ly,y); ++ yhi = ldbl_high (y); ++ EXTRACT_WORDS64(hy,yhi); + ix = hx&0x7fffffff; /* |x| */ + iy = hy&0x7fffffffffffffffLL; /* |y| */ +- uly = ly&0x7fffffffffffffffLL; /* |y| */ + + if(((ix>=0x7ff00000)&&((ix-0x7ff00000)|lx)!=0) || /* x is nan */ +- ((iy>=0x7ff0000000000000LL)&&((iy-0x7ff0000000000000LL)|uly)!=0)) +- /* y is nan */ ++ iy>0x7ff0000000000000LL) /* y is nan */ + return x+y; + if((long double) x==y) return y; /* x=y, return y */ + if((ix|lx)==0) { /* x == 0 */ + double u; +- INSERT_WORDS(x,(u_int32_t)((hy>>32)&0x80000000),1);/* return +-minsub */ ++ INSERT_WORDS(x,(uint32_t)((hy>>32)&0x80000000),1);/* return +-minsub */ + u = math_opt_barrier (x); + u = u * u; + math_force_eval (u); /* raise underflow flag */ +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c 2014-05-27 23:05:55.000000000 -0500 +@@ -27,16 +27,16 @@ + { + int32_t hx,ix; + int64_t hy,iy; +- u_int64_t ly, uly; ++ double yhi; + + GET_FLOAT_WORD(hx,x); +- GET_LDOUBLE_WORDS64(hy,ly,y); ++ yhi = ldbl_high (y); ++ EXTRACT_WORDS64 (hy, yhi); + ix = hx&0x7fffffff; /* |x| */ + iy = hy&0x7fffffffffffffffLL; /* |y| */ +- uly = ly&0x7fffffffffffffffLL; /* |y| */ + + if((ix>0x7f800000) || /* x is nan */ +- ((iy>=0x7ff0000000000000LL)&&((iy-0x7ff0000000000000LL)|uly)!=0)) ++ (iy>0x7ff0000000000000LL)) + /* y is nan */ + return x+y; + if((long double) x==y) return y; /* x=y, return y */ +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_remquol.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_remquol.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_remquol.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_remquol.c 2014-05-27 23:05:55.000000000 -0500 +@@ -33,20 +33,24 @@ + int64_t hx,hy; + u_int64_t sx,lx,ly,qs; + int cquo; ++ double xhi, xlo, yhi, ylo; + +- GET_LDOUBLE_WORDS64 (hx, lx, x); +- GET_LDOUBLE_WORDS64 (hy, ly, y); ++ ldbl_unpack (x, &xhi, &xlo); ++ EXTRACT_WORDS64 (hx, xhi); ++ EXTRACT_WORDS64 (lx, xlo); ++ ldbl_unpack (y, &yhi, &ylo); ++ EXTRACT_WORDS64 (hy, yhi); ++ EXTRACT_WORDS64 (ly, ylo); + sx = hx & 0x8000000000000000ULL; + qs = sx ^ (hy & 0x8000000000000000ULL); + hy &= 0x7fffffffffffffffLL; + hx &= 0x7fffffffffffffffLL; + + /* Purge off exception values. */ +- if ((hy | (ly & 0x7fffffffffffffff)) == 0) ++ if (hy == 0) + return (x * y) / (x * y); /* y = 0 */ + if ((hx >= 0x7ff0000000000000LL) /* x not finite */ +- || ((hy >= 0x7ff0000000000000LL) /* y is NaN */ +- && (((hy - 0x7ff0000000000000LL) | ly) != 0))) ++ || (hy > 0x7ff0000000000000LL)) /* y is NaN */ + return (x * y) / (x * y); + + if (hy <= 0x7fbfffffffffffffLL) +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_scalblnl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_scalblnl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_scalblnl.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_scalblnl.c 2014-05-27 23:15:30.000000000 -0500 +@@ -41,11 +41,15 @@ + { + int64_t k,l,hx,lx; + union { int64_t i; double d; } u; +- GET_LDOUBLE_WORDS64(hx,lx,x); ++ double xhi, xlo; ++ ++ ldbl_unpack (x, &xhi, &xlo); ++ EXTRACT_WORDS64 (hx, xhi); ++ EXTRACT_WORDS64 (lx, xlo); + k = (hx>>52)&0x7ff; /* extract exponent */ + l = (lx>>52)&0x7ff; + if (k==0) { /* 0 or subnormal x */ +- if (((hx|lx)&0x7fffffffffffffffULL)==0) return x; /* +-0 */ ++ if ((hx&0x7fffffffffffffffULL)==0) return x; /* +-0 */ + u.i = hx; + u.d *= two54; + hx = u.i; +@@ -61,7 +65,9 @@ + if (k > 0) { /* normal result */ + hx = (hx&0x800fffffffffffffULL)|(k<<52); + if ((lx & 0x7fffffffffffffffULL) == 0) { /* low part +-0 */ +- SET_LDOUBLE_WORDS64(x,hx,lx); ++ INSERT_WORDS64 (xhi, hx); ++ INSERT_WORDS64 (xlo, lx); ++ x = ldbl_pack (xhi, xlo); + return x; + } + if (l == 0) { /* low part subnormal */ +@@ -81,14 +87,19 @@ + u.d *= twom54; + lx = u.i; + } +- SET_LDOUBLE_WORDS64(x,hx,lx); ++ INSERT_WORDS64 (xhi, hx); ++ INSERT_WORDS64 (xlo, lx); ++ x = ldbl_pack (xhi, xlo); + return x; + } + if (k <= -54) + return tiny*__copysignl(tiny,x); /*underflow*/ + k += 54; /* subnormal result */ + lx &= 0x8000000000000000ULL; +- SET_LDOUBLE_WORDS64(x,(hx&0x800fffffffffffffULL)|(k<<52),lx); ++ hx &= 0x800fffffffffffffULL; ++ INSERT_WORDS64 (xhi, hx|(k<<52)); ++ INSERT_WORDS64 (xlo, lx); ++ x = ldbl_pack (xhi, xlo); + return x*twolm54; + } + long_double_symbol (libm, __scalblnl, scalblnl); +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_scalbnl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_scalbnl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_scalbnl.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_scalbnl.c 2014-05-27 23:16:25.000000000 -0500 +@@ -41,11 +41,15 @@ + { + int64_t k,l,hx,lx; + union { int64_t i; double d; } u; +- GET_LDOUBLE_WORDS64(hx,lx,x); ++ double xhi, xlo; ++ ++ ldbl_unpack (x, &xhi, &xlo); ++ EXTRACT_WORDS64 (hx, xhi); ++ EXTRACT_WORDS64 (lx, xlo); + k = (hx>>52)&0x7ff; /* extract exponent */ + l = (lx>>52)&0x7ff; + if (k==0) { /* 0 or subnormal x */ +- if (((hx|lx)&0x7fffffffffffffffULL)==0) return x; /* +-0 */ ++ if ((hx&0x7fffffffffffffffULL)==0) return x; /* +-0 */ + u.i = hx; + u.d *= two54; + hx = u.i; +@@ -61,7 +65,9 @@ + if (k > 0) { /* normal result */ + hx = (hx&0x800fffffffffffffULL)|(k<<52); + if ((lx & 0x7fffffffffffffffULL) == 0) { /* low part +-0 */ +- SET_LDOUBLE_WORDS64(x,hx,lx); ++ INSERT_WORDS64 (xhi, hx); ++ INSERT_WORDS64 (xlo, lx); ++ x = ldbl_pack (xhi, xlo); + return x; + } + if (l == 0) { /* low part subnormal */ +@@ -81,14 +87,19 @@ + u.d *= twom54; + lx = u.i; + } +- SET_LDOUBLE_WORDS64(x,hx,lx); ++ INSERT_WORDS64 (xhi, hx); ++ INSERT_WORDS64 (xlo, lx); ++ x = ldbl_pack (xhi, xlo); + return x; + } + if (k <= -54) + return tiny*__copysignl(tiny,x); /*underflow*/ + k += 54; /* subnormal result */ + lx &= 0x8000000000000000ULL; +- SET_LDOUBLE_WORDS64(x,(hx&0x800fffffffffffffULL)|(k<<52),lx); ++ hx &= 0x800fffffffffffffULL; ++ INSERT_WORDS64 (xhi, hx|(k<<52)); ++ INSERT_WORDS64 (xlo, lx); ++ x = ldbl_pack (xhi, xlo); + return x*twolm54; + } + #ifdef IS_IN_libm +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_tanhl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_tanhl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_tanhl.c 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_tanhl.c 2014-05-27 23:05:55.000000000 -0500 +@@ -47,10 +47,12 @@ + long double __tanhl(long double x) + { + long double t,z; +- int64_t jx,ix,lx; ++ int64_t jx,ix; ++ double xhi; + + /* High word of |x|. */ +- GET_LDOUBLE_WORDS64(jx,lx,x); ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (jx, xhi); + ix = jx&0x7fffffffffffffffLL; + + /* x is INF or NaN */ +@@ -61,7 +63,7 @@ + + /* |x| < 22 */ + if (ix < 0x4036000000000000LL) { /* |x|<22 */ +- if ((ix | (lx&0x7fffffffffffffffLL)) == 0) ++ if (ix == 0) + return x; /* x == +-0 */ + if (ix<0x3c60000000000000LL) /* |x|<2**-57 */ + return x*(one+x); /* tanh(small) = small */ +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/libm-test-ulps glibc-2.17-c758a686/sysdeps/powerpc/fpu/libm-test-ulps +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/libm-test-ulps 2014-05-27 23:05:51.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/libm-test-ulps 2014-05-27 23:08:26.000000000 -0500 +@@ -2641,6 +2641,9 @@ + ifloat: 1 + ildouble: 2 + ldouble: 2 ++Test "tan_towardzero (2)": ++ildouble: 1 ++ldouble: 1 + Test "tan_towardzero (3) == -0.1425465430742778052956354105339134932261": + float: 1 + ifloat: 1 diff --git a/packages/glibc/2.17/0031-glibc-ppc64le-09.patch b/packages/glibc/2.17/0031-glibc-ppc64le-09.patch new file mode 100644 index 0000000..969d3a1 --- /dev/null +++ b/packages/glibc/2.17/0031-glibc-ppc64le-09.patch @@ -0,0 +1,567 @@ +# commit 650ef4bd7976e36831cba22d838b567d3b5f6e8f +# Author: Alan Modra +# Date: Sat Aug 17 18:25:51 2013 +0930 +# +# PowerPC floating point little-endian [4 of 15] +# http://sourceware.org/ml/libc-alpha/2013-08/msg00084.html +# +# Another batch of ieee854 macros and union replacement. These four +# files also have bugs fixed with this patch. The fact that the two +# doubles in an IBM long double may have different signs means that +# negation and absolute value operations can't just twiddle one sign bit +# as you can with ieee864 style extended double. fmodl, remainderl, +# erfl and erfcl all had errors of this type. erfl also returned +1 for +# large magnitude negative input where it should return -1. The hypotl +# error is innocuous since the value adjusted twice is only used as a +# flag. The e_hypotl.c tests for large "a" and small "b" are mutually +# exclusive because we've already exited when x/y > 2**120. That allows +# some further small simplifications. +# +# [BZ #15734], [BZ #15735] +# * sysdeps/ieee754/ldbl-128ibm/e_fmodl.c (__ieee754_fmodl): Rewrite +# all uses of ieee875 long double macros and unions. Simplify test +# for 0.0L. Correct |x|<|y| and |x|=|y| test. Use +# ldbl_extract_mantissa value for ix,iy exponents. Properly +# normalize after ldbl_extract_mantissa, and don't add hidden bit +# already handled. Don't treat low word of ieee854 mantissa like +# low word of IBM long double and mask off bit when testing for +# zero. +# * sysdeps/ieee754/ldbl-128ibm/e_hypotl.c (__ieee754_hypotl): Rewrite +# all uses of ieee875 long double macros and unions. Simplify tests +# for 0.0L and inf. Correct double adjustment of k. Delete dead code +# adjusting ha,hb. Simplify code setting kld. Delete two600 and +# two1022, instead use their values. Recognise that tests for large +# "a" and small "b" are mutually exclusive. Rename vars. Comment. +# * sysdeps/ieee754/ldbl-128ibm/e_remainderl.c (__ieee754_remainderl): +# Rewrite all uses of ieee875 long double macros and unions. Simplify +# test for 0.0L and nan. Correct negation. +# * sysdeps/ieee754/ldbl-128ibm/s_erfl.c (__erfl): Rewrite all uses of +# ieee875 long double macros and unions. Correct output for large +# magnitude x. Correct absolute value calculation. +# (__erfcl): Likewise. +# * math/libm-test.inc: Add tests for errors discovered in IBM long +# double versions of fmodl, remainderl, erfl and erfcl. +# +diff -urN glibc-2.17-c758a686/math/libm-test.inc glibc-2.17-c758a686/math/libm-test.inc +--- glibc-2.17-c758a686/math/libm-test.inc 2014-05-27 20:02:29.000000000 -0500 ++++ glibc-2.17-c758a686/math/libm-test.inc 2014-05-27 20:09:59.000000000 -0500 +@@ -4040,6 +4040,10 @@ + TEST_f_f (erf, 2.0L, 0.995322265018952734162069256367252929L); + TEST_f_f (erf, 4.125L, 0.999999994576599200434933994687765914L); + TEST_f_f (erf, 27.0L, 1.0L); ++#if defined TEST_LDOUBLE && LDBL_MANT_DIG >= 54 ++ /* The input is not exactly representable as a double. */ ++ TEST_f_f (erf, -0x1.fffffffffffff8p-2L, -0.5204998778130465132916303345518417673509L); ++#endif + + END (erf); + } +@@ -4071,6 +4075,10 @@ + TEST_f_f (erfc, 0x1.ffa002p+2L, 1.233585992097580296336099501489175967033e-29L); + TEST_f_f (erfc, 0x1.ffffc8p+2L, 1.122671365033056305522366683719541099329e-29L); + #ifdef TEST_LDOUBLE ++# if LDBL_MANT_DIG >= 54 ++ /* The input is not exactly representable as a double. */ ++ TEST_f_f (erfc, -0x1.fffffffffffff8p-2L, 1.52049987781304651329163033455184176735L); ++# endif + /* The result can only be represented in long double. */ + # if LDBL_MIN_10_EXP < -319 + TEST_f_f (erfc, 27.0L, 0.523704892378925568501606768284954709e-318L); +@@ -5634,6 +5642,13 @@ + #if defined TEST_LDOUBLE && LDBL_MIN_EXP <= -16381 + TEST_ff_f (fmod, 0x0.fffffffffffffffep-16382L, 0x1p-16445L, plus_zero); + #endif ++#if defined TEST_LDOUBLE && LDBL_MANT_DIG >= 56 ++ TEST_ff_f (fmod, -0x1.00000000000004p+0L, 0x1.fffffffffffff8p-1L, -0x1p-53L); ++ TEST_ff_f (fmod, 0x1.fffffffffffffap-1L, 0x1.fffffffffffff8p-1L, 0x1p-56L); ++ TEST_ff_f (fmod, -0x1.fffffffffffffap-1L, 0x1.fffffffffffff8p-1L, -0x1p-56L); ++ TEST_ff_f (fmod, 0x1.fffffffffffffap-1L, -0x1.fffffffffffff8p-1L, 0x1p-56L); ++ TEST_ff_f (fmod, -0x1.fffffffffffffap-1L, -0x1.fffffffffffff8p-1L, -0x1p-56L); ++#endif + + END (fmod); + } +@@ -8642,6 +8657,9 @@ + TEST_ff_f (remainder, -1.625, -1.0, 0.375); + TEST_ff_f (remainder, 5.0, 2.0, 1.0); + TEST_ff_f (remainder, 3.0, 2.0, -1.0); ++#if defined TEST_LDOUBLE && LDBL_MANT_DIG >= 56 ++ TEST_ff_f (remainder, -0x1.80000000000002p1L, 2.0, 0x1.fffffffffffff8p-1L); ++#endif + + END (remainder); + } +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c 2014-05-27 20:02:27.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c 2014-05-27 20:04:08.000000000 -0500 +@@ -27,76 +27,83 @@ + long double + __ieee754_fmodl (long double x, long double y) + { +- int64_t n,hx,hy,hz,ix,iy,sx, i; +- u_int64_t lx,ly,lz; +- int temp; +- +- GET_LDOUBLE_WORDS64(hx,lx,x); +- GET_LDOUBLE_WORDS64(hy,ly,y); ++ int64_t hx, hy, hz, sx, sy; ++ uint64_t lx, ly, lz; ++ int n, ix, iy; ++ double xhi, xlo, yhi, ylo; ++ ++ ldbl_unpack (x, &xhi, &xlo); ++ EXTRACT_WORDS64 (hx, xhi); ++ EXTRACT_WORDS64 (lx, xlo); ++ ldbl_unpack (y, &yhi, &ylo); ++ EXTRACT_WORDS64 (hy, yhi); ++ EXTRACT_WORDS64 (ly, ylo); + sx = hx&0x8000000000000000ULL; /* sign of x */ +- hx ^=sx; /* |x| */ +- hy &= 0x7fffffffffffffffLL; /* |y| */ ++ hx ^= sx; /* |x| */ ++ sy = hy&0x8000000000000000ULL; /* sign of y */ ++ hy ^= sy; /* |y| */ + + /* purge off exception values */ +- if(__builtin_expect((hy|(ly&0x7fffffffffffffff))==0 || ++ if(__builtin_expect(hy==0 || + (hx>=0x7ff0000000000000LL)|| /* y=0,or x not finite */ + (hy>0x7ff0000000000000LL),0)) /* or y is NaN */ + return (x*y)/(x*y); +- if(__builtin_expect(hx<=hy,0)) { +- if((hx>63]; /* |x|=|y| return x*0*/ ++ if (__builtin_expect (hx <= hy, 0)) ++ { ++ /* If |x| < |y| return x. */ ++ if (hx < hy) ++ return x; ++ /* At this point the absolute value of the high doubles of ++ x and y must be equal. */ ++ /* If the low double of y is the same sign as the high ++ double of y (ie. the low double increases |y|)... */ ++ if (((ly ^ sy) & 0x8000000000000000LL) == 0 ++ /* ... then a different sign low double to high double ++ for x or same sign but lower magnitude... */ ++ && (int64_t) (lx ^ sx) < (int64_t) (ly ^ sy)) ++ /* ... means |x| < |y|. */ ++ return x; ++ /* If the low double of x differs in sign to the high ++ double of x (ie. the low double decreases |x|)... */ ++ if (((lx ^ sx) & 0x8000000000000000LL) != 0 ++ /* ... then a different sign low double to high double ++ for y with lower magnitude (we've already caught ++ the same sign for y case above)... */ ++ && (int64_t) (lx ^ sx) > (int64_t) (ly ^ sy)) ++ /* ... means |x| < |y|. */ ++ return x; ++ /* If |x| == |y| return x*0. */ ++ if ((lx ^ sx) == (ly ^ sy)) ++ return Zero[(uint64_t) sx >> 63]; + } + +- /* determine ix = ilogb(x) */ +- if(__builtin_expect(hx<0x0010000000000000LL,0)) { /* subnormal x */ +- if(hx==0) { +- for (ix = -1043, i=lx; i>0; i<<=1) ix -=1; +- } else { +- for (ix = -1022, i=(hx<<11); i>0; i<<=1) ix -=1; +- } +- } else ix = (hx>>52)-0x3ff; +- +- /* determine iy = ilogb(y) */ +- if(__builtin_expect(hy<0x0010000000000000LL,0)) { /* subnormal y */ +- if(hy==0) { +- for (iy = -1043, i=ly; i>0; i<<=1) iy -=1; +- } else { +- for (iy = -1022, i=(hy<<11); i>0; i<<=1) iy -=1; +- } +- } else iy = (hy>>52)-0x3ff; +- + /* Make the IBM extended format 105 bit mantissa look like the ieee854 112 + bit mantissa so the following operations will give the correct + result. */ +- ldbl_extract_mantissa(&hx, &lx, &temp, x); +- ldbl_extract_mantissa(&hy, &ly, &temp, y); ++ ldbl_extract_mantissa(&hx, &lx, &ix, x); ++ ldbl_extract_mantissa(&hy, &ly, &iy, y); + +- /* set up {hx,lx}, {hy,ly} and align y to x */ +- if(__builtin_expect(ix >= -1022, 1)) +- hx = 0x0001000000000000LL|(0x0000ffffffffffffLL&hx); +- else { /* subnormal x, shift x to normal */ +- n = -1022-ix; +- if(n<=63) { +- hx = (hx<>(64-n)); +- lx <<= n; +- } else { +- hx = lx<<(n-64); +- lx = 0; +- } +- } +- if(__builtin_expect(iy >= -1022, 1)) +- hy = 0x0001000000000000LL|(0x0000ffffffffffffLL&hy); +- else { /* subnormal y, shift y to normal */ +- n = -1022-iy; +- if(n<=63) { +- hy = (hy<>(64-n)); +- ly <<= n; +- } else { +- hy = ly<<(n-64); +- ly = 0; +- } +- } ++ if (__builtin_expect (ix == -IEEE754_DOUBLE_BIAS, 0)) ++ { ++ /* subnormal x, shift x to normal. */ ++ while ((hx & (1LL << 48)) == 0) ++ { ++ hx = (hx << 1) | (lx >> 63); ++ lx = lx << 1; ++ ix -= 1; ++ } ++ } ++ ++ if (__builtin_expect (iy == -IEEE754_DOUBLE_BIAS, 0)) ++ { ++ /* subnormal y, shift y to normal. */ ++ while ((hy & (1LL << 48)) == 0) ++ { ++ hy = (hy << 1) | (ly >> 63); ++ ly = ly << 1; ++ iy -= 1; ++ } ++ } + + /* fix point fmod */ + n = ix - iy; +@@ -104,7 +111,7 @@ + hz=hx-hy;lz=lx-ly; if(lx>63); lx = lx+lx;} + else { +- if((hz|(lz&0x7fffffffffffffff))==0) /* return sign(x)*0 */ ++ if((hz|lz)==0) /* return sign(x)*0 */ + return Zero[(u_int64_t)sx>>63]; + hx = hz+hz+(lz>>63); lx = lz+lz; + } +@@ -113,7 +120,7 @@ + if(hz>=0) {hx=hz;lx=lz;} + + /* convert back to floating value and restore the sign */ +- if((hx|(lx&0x7fffffffffffffff))==0) /* return sign(x)*0 */ ++ if((hx|lx)==0) /* return sign(x)*0 */ + return Zero[(u_int64_t)sx>>63]; + while(hx<0x0001000000000000LL) { /* normalize x */ + hx = hx+hx+(lx>>63); lx = lx+lx; +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c 2014-05-27 20:02:27.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c 2014-05-27 20:04:08.000000000 -0500 +@@ -45,76 +45,84 @@ + #include + #include + +-static const long double two600 = 0x1.0p+600L; +-static const long double two1022 = 0x1.0p+1022L; +- + long double + __ieee754_hypotl(long double x, long double y) + { +- long double a,b,t1,t2,y1,y2,w,kld; ++ long double a,b,a1,a2,b1,b2,w,kld; + int64_t j,k,ha,hb; ++ double xhi, yhi, hi, lo; + +- GET_LDOUBLE_MSW64(ha,x); ++ xhi = ldbl_high (x); ++ EXTRACT_WORDS64 (ha, xhi); ++ yhi = ldbl_high (y); ++ EXTRACT_WORDS64 (hb, yhi); + ha &= 0x7fffffffffffffffLL; +- GET_LDOUBLE_MSW64(hb,y); + hb &= 0x7fffffffffffffffLL; + if(hb > ha) {a=y;b=x;j=ha; ha=hb;hb=j;} else {a=x;b=y;} + a = fabsl(a); /* a <- |a| */ + b = fabsl(b); /* b <- |b| */ +- if((ha-hb)>0x780000000000000LL) {return a+b;} /* x/y > 2**120 */ ++ if((ha-hb)>0x0780000000000000LL) {return a+b;} /* x/y > 2**120 */ + k=0; + kld = 1.0L; + if(ha > 0x5f30000000000000LL) { /* a>2**500 */ + if(ha >= 0x7ff0000000000000LL) { /* Inf or NaN */ +- u_int64_t low; + w = a+b; /* for sNaN */ +- GET_LDOUBLE_LSW64(low,a); +- if(((ha&0xfffffffffffffLL)|(low&0x7fffffffffffffffLL))==0) ++ if(ha == 0x7ff0000000000000LL) + w = a; +- GET_LDOUBLE_LSW64(low,b); +- if(((hb^0x7ff0000000000000LL)|(low&0x7fffffffffffffffLL))==0) ++ if(hb == 0x7ff0000000000000LL) + w = b; + return w; + } + /* scale a and b by 2**-600 */ +- ha -= 0x2580000000000000LL; hb -= 0x2580000000000000LL; k += 600; +- a /= two600; +- b /= two600; +- k += 600; +- kld = two600; ++ a *= 0x1p-600L; ++ b *= 0x1p-600L; ++ k = 600; ++ kld = 0x1p+600L; + } +- if(hb < 0x23d0000000000000LL) { /* b < 2**-450 */ ++ else if(hb < 0x23d0000000000000LL) { /* b < 2**-450 */ + if(hb <= 0x000fffffffffffffLL) { /* subnormal b or 0 */ +- u_int64_t low; +- GET_LDOUBLE_LSW64(low,b); +- if((hb|(low&0x7fffffffffffffffLL))==0) return a; +- t1=two1022; /* t1=2^1022 */ +- b *= t1; +- a *= t1; +- k -= 1022; +- kld = kld / two1022; ++ if(hb==0) return a; ++ a *= 0x1p+1022L; ++ b *= 0x1p+1022L; ++ k = -1022; ++ kld = 0x1p-1022L; + } else { /* scale a and b by 2^600 */ +- ha += 0x2580000000000000LL; /* a *= 2^600 */ +- hb += 0x2580000000000000LL; /* b *= 2^600 */ +- k -= 600; +- a *= two600; +- b *= two600; +- kld = kld / two600; ++ a *= 0x1p+600L; ++ b *= 0x1p+600L; ++ k = -600; ++ kld = 0x1p-600L; + } + } + /* medium size a and b */ + w = a-b; + if (w>b) { +- SET_LDOUBLE_WORDS64(t1,ha,0); +- t2 = a-t1; +- w = __ieee754_sqrtl(t1*t1-(b*(-b)-t2*(a+t1))); ++ ldbl_unpack (a, &hi, &lo); ++ a1 = hi; ++ a2 = lo; ++ /* a*a + b*b ++ = (a1+a2)*a + b*b ++ = a1*a + a2*a + b*b ++ = a1*(a1+a2) + a2*a + b*b ++ = a1*a1 + a1*a2 + a2*a + b*b ++ = a1*a1 + a2*(a+a1) + b*b */ ++ w = __ieee754_sqrtl(a1*a1-(b*(-b)-a2*(a+a1))); + } else { + a = a+a; +- SET_LDOUBLE_WORDS64(y1,hb,0); +- y2 = b - y1; +- SET_LDOUBLE_WORDS64(t1,ha+0x0010000000000000LL,0); +- t2 = a - t1; +- w = __ieee754_sqrtl(t1*y1-(w*(-w)-(t1*y2+t2*b))); ++ ldbl_unpack (b, &hi, &lo); ++ b1 = hi; ++ b2 = lo; ++ ldbl_unpack (a, &hi, &lo); ++ a1 = hi; ++ a2 = lo; ++ /* a*a + b*b ++ = a*a + (a-b)*(a-b) - (a-b)*(a-b) + b*b ++ = a*a + w*w - (a*a - 2*a*b + b*b) + b*b ++ = w*w + 2*a*b ++ = w*w + (a1+a2)*b ++ = w*w + a1*b + a2*b ++ = w*w + a1*(b1+b2) + a2*b ++ = w*w + a1*b1 + a1*b2 + a2*b */ ++ w = __ieee754_sqrtl(a1*b1-(w*(-w)-(a1*b2+a2*b))); + } + if(k!=0) + return w*kld; +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c 2014-05-27 20:02:27.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c 2014-05-27 20:04:08.000000000 -0500 +@@ -33,18 +33,22 @@ + int64_t hx,hp; + u_int64_t sx,lx,lp; + long double p_half; ++ double xhi, xlo, phi, plo; + +- GET_LDOUBLE_WORDS64(hx,lx,x); +- GET_LDOUBLE_WORDS64(hp,lp,p); ++ ldbl_unpack (x, &xhi, &xlo); ++ EXTRACT_WORDS64 (hx, xhi); ++ EXTRACT_WORDS64 (lx, xlo); ++ ldbl_unpack (p, &phi, &plo); ++ EXTRACT_WORDS64 (hp, phi); ++ EXTRACT_WORDS64 (lp, plo); + sx = hx&0x8000000000000000ULL; + hp &= 0x7fffffffffffffffLL; + hx &= 0x7fffffffffffffffLL; + + /* purge off exception values */ +- if((hp|(lp&0x7fffffffffffffff))==0) return (x*p)/(x*p); /* p = 0 */ ++ if(hp==0) return (x*p)/(x*p); /* p = 0 */ + if((hx>=0x7ff0000000000000LL)|| /* x not finite */ +- ((hp>=0x7ff0000000000000LL)&& /* p is NaN */ +- (((hp-0x7ff0000000000000LL)|lp)!=0))) ++ (hp>0x7ff0000000000000LL)) /* p is NaN */ + return (x*p)/(x*p); + + +@@ -64,8 +68,8 @@ + if(x>=p_half) x -= p; + } + } +- GET_LDOUBLE_MSW64(hx,x); +- SET_LDOUBLE_MSW64(x,hx^sx); ++ if (sx) ++ x = -x; + return x; + } + strong_alias (__ieee754_remainderl, __remainderl_finite) +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_erfl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_erfl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_erfl.c 2014-05-27 20:02:27.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_erfl.c 2014-05-27 20:04:08.000000000 -0500 +@@ -760,16 +760,16 @@ + __erfl (long double x) + { + long double a, y, z; +- int32_t i, ix, sign; +- ieee854_long_double_shape_type u; ++ int32_t i, ix, hx; ++ double xhi; + +- u.value = x; +- sign = u.parts32.w0; +- ix = sign & 0x7fffffff; ++ xhi = ldbl_high (x); ++ GET_HIGH_WORD (hx, xhi); ++ ix = hx & 0x7fffffff; + + if (ix >= 0x7ff00000) + { /* erf(nan)=nan */ +- i = ((sign & 0xfff00000) >> 31) << 1; ++ i = ((uint32_t) hx >> 31) << 1; + return (long double) (1 - i) + one / x; /* erf(+-inf)=+-1 */ + } + +@@ -778,7 +778,7 @@ + if (ix >= 0x4039A0DE) + { + /* __erfcl (x) underflows if x > 25.6283 */ +- if (sign) ++ if ((hx & 0x80000000) == 0) + return one-tiny; + else + return tiny-one; +@@ -789,8 +789,9 @@ + return (one - y); + } + } +- u.parts32.w0 = ix; +- a = u.value; ++ a = x; ++ if ((hx & 0x80000000) != 0) ++ a = -a; + z = x * x; + if (ix < 0x3fec0000) /* a < 0.875 */ + { +@@ -814,7 +815,7 @@ + y = erf_const + neval (a, TN2, NTN2) / deval (a, TD2, NTD2); + } + +- if (sign & 0x80000000) /* x < 0 */ ++ if (hx & 0x80000000) /* x < 0 */ + y = -y; + return( y ); + } +@@ -824,18 +825,18 @@ + __erfcl (long double x) + { + long double y, z, p, r; +- int32_t i, ix, sign; +- ieee854_long_double_shape_type u; +- +- u.value = x; +- sign = u.parts32.w0; +- ix = sign & 0x7fffffff; +- u.parts32.w0 = ix; ++ int32_t i, ix; ++ uint32_t hx; ++ double xhi; ++ ++ xhi = ldbl_high (x); ++ GET_HIGH_WORD (hx, xhi); ++ ix = hx & 0x7fffffff; + + if (ix >= 0x7ff00000) + { /* erfc(nan)=nan */ + /* erfc(+-inf)=0,2 */ +- return (long double) (((u_int32_t) sign >> 31) << 1) + one / x; ++ return (long double) ((hx >> 31) << 1) + one / x; + } + + if (ix < 0x3fd00000) /* |x| <1/4 */ +@@ -846,7 +847,8 @@ + } + if (ix < 0x3ff40000) /* 1.25 */ + { +- x = u.value; ++ if ((hx & 0x80000000) != 0) ++ x = -x; + i = 8.0 * x; + switch (i) + { +@@ -891,7 +893,7 @@ + y += C20a; + break; + } +- if (sign & 0x80000000) ++ if (hx & 0x80000000) + y = 2.0L - y; + return y; + } +@@ -899,10 +901,11 @@ + if (ix < 0x405ac000) + { + /* x < -9 */ +- if ((ix >= 0x40220000) && (sign & 0x80000000)) ++ if (hx >= 0xc0220000) + return two - tiny; + +- x = fabsl (x); ++ if ((hx & 0x80000000) != 0) ++ x = -x; + z = one / (x * x); + i = 8.0 / x; + switch (i) +@@ -933,21 +936,17 @@ + p = neval (z, RNr8, NRNr8) / deval (z, RDr8, NRDr8); + break; + } +- u.value = x; +- u.parts32.w3 = 0; +- u.parts32.w2 = 0; +- u.parts32.w1 &= 0xf8000000; +- z = u.value; ++ z = (float) x; + r = __ieee754_expl (-z * z - 0.5625) * + __ieee754_expl ((z - x) * (z + x) + p); +- if ((sign & 0x80000000) == 0) ++ if ((hx & 0x80000000) == 0) + return r / x; + else + return two - r / x; + } + else + { +- if ((sign & 0x80000000) == 0) ++ if ((hx & 0x80000000) == 0) + return tiny * tiny; + else + return two - tiny; diff --git a/packages/glibc/2.17/0032-glibc-ppc64le-10.patch b/packages/glibc/2.17/0032-glibc-ppc64le-10.patch new file mode 100644 index 0000000..ae80fe6 --- /dev/null +++ b/packages/glibc/2.17/0032-glibc-ppc64le-10.patch @@ -0,0 +1,91 @@ +# commit 32c301dfc9b786453e59b61fe4a821a89e1a206b +# Author: Alan Modra +# Date: Sat Aug 17 18:26:39 2013 +0930 +# +# PowerPC floating point little-endian [5 of 15] +# http://sourceware.org/ml/libc-alpha/2013-08/msg00085.html +# +# Rid ourselves of ieee854. +# +# * sysdeps/ieee754/ldbl-128ibm/ieee754.h (union ieee854_long_double): +# Delete. +# (IEEE854_LONG_DOUBLE_BIAS): Delete. +# * sysdeps/ieee754/ldbl-128ibm/math_ldbl.h: Don't include ieee854 +# version of math_ldbl.h. +# +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h 2014-05-27 22:10:43.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h 2014-05-27 22:11:10.000000000 -0500 +@@ -112,61 +112,6 @@ + #define IEEE754_DOUBLE_BIAS 0x3ff /* Added to exponent. */ + + +-union ieee854_long_double +- { +- long double d; +- +- /* This is the IEEE 854 quad-precision format. */ +- struct +- { +-#if __BYTE_ORDER == __BIG_ENDIAN +- unsigned int negative:1; +- unsigned int exponent:15; +- /* Together these comprise the mantissa. */ +- unsigned int mantissa0:16; +- unsigned int mantissa1:32; +- unsigned int mantissa2:32; +- unsigned int mantissa3:32; +-#endif /* Big endian. */ +-#if __BYTE_ORDER == __LITTLE_ENDIAN +- /* Together these comprise the mantissa. */ +- unsigned int mantissa3:32; +- unsigned int mantissa2:32; +- unsigned int mantissa1:32; +- unsigned int mantissa0:16; +- unsigned int exponent:15; +- unsigned int negative:1; +-#endif /* Little endian. */ +- } ieee; +- +- /* This format makes it easier to see if a NaN is a signalling NaN. */ +- struct +- { +-#if __BYTE_ORDER == __BIG_ENDIAN +- unsigned int negative:1; +- unsigned int exponent:15; +- unsigned int quiet_nan:1; +- /* Together these comprise the mantissa. */ +- unsigned int mantissa0:15; +- unsigned int mantissa1:32; +- unsigned int mantissa2:32; +- unsigned int mantissa3:32; +-#else +- /* Together these comprise the mantissa. */ +- unsigned int mantissa3:32; +- unsigned int mantissa2:32; +- unsigned int mantissa1:32; +- unsigned int mantissa0:15; +- unsigned int quiet_nan:1; +- unsigned int exponent:15; +- unsigned int negative:1; +-#endif +- } ieee_nan; +- }; +- +-#define IEEE854_LONG_DOUBLE_BIAS 0x3fff /* Added to exponent. */ +- +- + /* IBM extended format for long double. + + Each long double is made up of two IEEE doubles. The value of the +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-27 22:10:43.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-27 22:11:10.000000000 -0500 +@@ -2,7 +2,6 @@ + #error "Never use directly; include instead." + #endif + +-#include + #include + + static inline void diff --git a/packages/glibc/2.17/0033-glibc-ppc64le-11.patch b/packages/glibc/2.17/0033-glibc-ppc64le-11.patch new file mode 100644 index 0000000..ec12797 --- /dev/null +++ b/packages/glibc/2.17/0033-glibc-ppc64le-11.patch @@ -0,0 +1,113 @@ +# commit 62a728aeff93507ce5975f245a5f1d2046fb4503 +# Author: Alan Modra +# Date: Sat Aug 17 18:27:19 2013 +0930 +# +# PowerPC floating point little-endian [6 of 15] +# http://sourceware.org/ml/libc-alpha/2013-07/msg00197.html +# +# A rewrite to make this code correct for little-endian. +# +# * sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c (mynumber): Replace +# union 32-bit int array member with 64-bit int array. +# (t515, tm256): Double rather than long double. +# (__ieee754_sqrtl): Rewrite using 64-bit arithmetic. +# +diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c +--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c 2014-05-27 22:20:12.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c 2014-05-27 22:21:39.000000000 -0500 +@@ -34,15 +34,13 @@ + + #include + +-typedef unsigned int int4; +-typedef union {int4 i[4]; long double x; double d[2]; } mynumber; ++typedef union {int64_t i[2]; long double x; double d[2]; } mynumber; + +-static const mynumber +- t512 = {{0x5ff00000, 0x00000000, 0x00000000, 0x00000000 }}, /* 2^512 */ +- tm256 = {{0x2ff00000, 0x00000000, 0x00000000, 0x00000000 }}; /* 2^-256 */ + static const double +-two54 = 1.80143985094819840000e+16, /* 0x4350000000000000 */ +-twom54 = 5.55111512312578270212e-17; /* 0x3C90000000000000 */ ++ t512 = 0x1p512, ++ tm256 = 0x1p-256, ++ two54 = 0x1p54, /* 0x4350000000000000 */ ++ twom54 = 0x1p-54; /* 0x3C90000000000000 */ + + /*********************************************************************/ + /* An ultimate sqrt routine. Given an IEEE double machine number x */ +@@ -54,56 +52,53 @@ + static const long double big = 134217728.0, big1 = 134217729.0; + long double t,s,i; + mynumber a,c; +- int4 k, l, m; +- int n; ++ uint64_t k, l; ++ int64_t m, n; + double d; + + a.x=x; +- k=a.i[0] & 0x7fffffff; ++ k=a.i[0] & INT64_C(0x7fffffffffffffff); + /*----------------- 2^-1022 <= | x |< 2^1024 -----------------*/ +- if (k>0x000fffff && k<0x7ff00000) { ++ if (k>INT64_C(0x000fffff00000000) && k> 21; +- m = (a.i[2] >> 20) & 0x7ff; ++ l = (k&INT64_C(0x001fffffffffffff))|INT64_C(0x3fe0000000000000); ++ if ((a.i[1] & INT64_C(0x7fffffffffffffff)) != 0) { ++ n = (int64_t) ((l - k) * 2) >> 53; ++ m = (a.i[1] >> 52) & 0x7ff; + if (m == 0) { + a.d[1] *= two54; +- m = ((a.i[2] >> 20) & 0x7ff) - 54; ++ m = ((a.i[1] >> 52) & 0x7ff) - 54; + } + m += n; +- if ((int) m > 0) +- a.i[2] = (a.i[2] & 0x800fffff) | (m << 20); +- else if ((int) m <= -54) { +- a.i[2] &= 0x80000000; +- a.i[3] = 0; ++ if (m > 0) ++ a.i[1] = (a.i[1] & INT64_C(0x800fffffffffffff)) | (m << 52); ++ else if (m <= -54) { ++ a.i[1] &= INT64_C(0x8000000000000000); + } else { + m += 54; +- a.i[2] = (a.i[2] & 0x800fffff) | (m << 20); ++ a.i[1] = (a.i[1] & INT64_C(0x800fffffffffffff)) | (m << 52); + a.d[1] *= twom54; + } + } + a.i[0] = l; + s = a.x; + d = __ieee754_sqrt (a.d[0]); +- c.i[0] = 0x20000000+((k&0x7fe00000)>>1); ++ c.i[0] = INT64_C(0x2000000000000000)+((k&INT64_C(0x7fe0000000000000))>>1); + c.i[1] = 0; +- c.i[2] = 0; +- c.i[3] = 0; + i = d; + t = 0.5L * (i + s / i); + i = 0.5L * (t + s / t); + return c.x * i; + } + else { +- if (k>=0x7ff00000) { +- if (a.i[0] == 0xfff00000 && a.i[1] == 0) ++ if (k>=INT64_C(0x7ff0000000000000)) { ++ if (a.i[0] == INT64_C(0xfff0000000000000)) + return (big1-big1)/(big-big); /* sqrt (-Inf) = NaN. */ + return x; /* sqrt (NaN) = NaN, sqrt (+Inf) = +Inf. */ + } + if (x == 0) return x; + if (x < 0) return (big1-big1)/(big-big); +- return tm256.x*__ieee754_sqrtl(x*t512.x); ++ return tm256*__ieee754_sqrtl(x*t512); + } + } + strong_alias (__ieee754_sqrtl, __sqrtl_finite) diff --git a/packages/glibc/2.17/0034-glibc-ppc64le-12.patch b/packages/glibc/2.17/0034-glibc-ppc64le-12.patch new file mode 100644 index 0000000..5e2d378 --- /dev/null +++ b/packages/glibc/2.17/0034-glibc-ppc64le-12.patch @@ -0,0 +1,75 @@ +# commit 2ca85d2bbbaa60b9c83bf1f57a2801c84e0a3625 +# Author: Anton Blanchard +# Date: Sat Aug 17 18:28:06 2013 +0930 +# +# PowerPC floating point little-endian [7 of 15] +# http://sourceware.org/ml/libc-alpha/2013-08/msg00086.html +# +# * sysdeps/powerpc/bits/mathinline.h (__signbitf): Use builtin. +# (__signbit): Likewise. Correct for little-endian. +# (__signbitl): Call __signbit. +# (lrint): Correct for little-endian. +# (lrintf): Call lrint. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/bits/mathinline.h glibc-2.17-c758a686/sysdeps/powerpc/fpu/bits/mathinline.h +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/bits/mathinline.h 2014-05-27 22:28:12.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/bits/mathinline.h 2014-05-27 22:28:37.000000000 -0500 +@@ -62,21 +62,28 @@ + __MATH_INLINE int + __NTH (__signbitf (float __x)) + { ++#if __GNUC_PREREQ (4, 0) ++ return __builtin_signbitf (__x); ++#else + __extension__ union { float __f; int __i; } __u = { __f: __x }; + return __u.__i < 0; ++#endif + } + __MATH_INLINE int + __NTH (__signbit (double __x)) + { +- __extension__ union { double __d; int __i[2]; } __u = { __d: __x }; +- return __u.__i[0] < 0; ++#if __GNUC_PREREQ (4, 0) ++ return __builtin_signbit (__x); ++#else ++ __extension__ union { double __d; long long __i; } __u = { __d: __x }; ++ return __u.__i < 0; ++#endif + } + # ifdef __LONG_DOUBLE_128__ + __MATH_INLINE int + __NTH (__signbitl (long double __x)) + { +- __extension__ union { long double __d; int __i[4]; } __u = { __d: __x }; +- return __u.__i[0] < 0; ++ return __signbit ((double) __x); + } + # endif + # endif +@@ -93,22 +100,17 @@ + { + union { + double __d; +- int __ll[2]; ++ long long __ll; + } __u; + __asm__ ("fctiw %0,%1" : "=f"(__u.__d) : "f"(__x)); +- return __u.__ll[1]; ++ return __u.__ll; + } + + __MATH_INLINE long int lrintf (float __x) __THROW; + __MATH_INLINE long int + __NTH (lrintf (float __x)) + { +- union { +- double __d; +- int __ll[2]; +- } __u; +- __asm__ ("fctiw %0,%1" : "=f"(__u.__d) : "f"(__x)); +- return __u.__ll[1]; ++ return lrint ((double) __x); + } + # endif + diff --git a/packages/glibc/2.17/0035-glibc-ppc64le-13.patch b/packages/glibc/2.17/0035-glibc-ppc64le-13.patch new file mode 100644 index 0000000..52830a1 --- /dev/null +++ b/packages/glibc/2.17/0035-glibc-ppc64le-13.patch @@ -0,0 +1,283 @@ +# commit 4a28b3ca4bc52d9a3ac0d9edb53d3de510e1b77c +# Author: Anton Blanchard +# Date: Sat Aug 17 18:28:55 2013 +0930 +# +# PowerPC floating point little-endian [8 of 15] +# http://sourceware.org/ml/libc-alpha/2013-07/msg00199.html +# +# Corrects floating-point environment code for little-endian. +# +# * sysdeps/powerpc/fpu/fenv_libc.h (fenv_union_t): Replace int +# array with long long. +# * sysdeps/powerpc/fpu/e_sqrt.c (__slow_ieee754_sqrt): Adjust. +# * sysdeps/powerpc/fpu/e_sqrtf.c (__slow_ieee754_sqrtf): Adjust. +# * sysdeps/powerpc/fpu/fclrexcpt.c (__feclearexcept): Adjust. +# * sysdeps/powerpc/fpu/fedisblxcpt.c (fedisableexcept): Adjust. +# * sysdeps/powerpc/fpu/feenablxcpt.c (feenableexcept): Adjust. +# * sysdeps/powerpc/fpu/fegetexcept.c (__fegetexcept): Adjust. +# * sysdeps/powerpc/fpu/feholdexcpt.c (feholdexcept): Adjust. +# * sysdeps/powerpc/fpu/fesetenv.c (__fesetenv): Adjust. +# * sysdeps/powerpc/fpu/feupdateenv.c (__feupdateenv): Adjust. +# * sysdeps/powerpc/fpu/fgetexcptflg.c (__fegetexceptflag): Adjust. +# * sysdeps/powerpc/fpu/fraiseexcpt.c (__feraiseexcept): Adjust. +# * sysdeps/powerpc/fpu/fsetexcptflg.c (__fesetexceptflag): Adjust. +# * sysdeps/powerpc/fpu/ftestexcept.c (fetestexcept): Adjust. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrt.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrt.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrt.c 2014-05-27 22:31:42.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrt.c 2014-05-27 22:31:43.000000000 -0500 +@@ -145,7 +145,7 @@ + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; +- if ((u.l[1] & FE_INVALID) == 0) ++ if ((u.l & FE_INVALID) == 0) + #endif + feraiseexcept (FE_INVALID); + x = a_nan.value; +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrtf.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrtf.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrtf.c 2014-05-27 22:31:42.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrtf.c 2014-05-27 22:31:43.000000000 -0500 +@@ -121,7 +121,7 @@ + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; +- if ((u.l[1] & FE_INVALID) == 0) ++ if ((u.l & FE_INVALID) == 0) + #endif + feraiseexcept (FE_INVALID); + x = a_nan.value; +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fclrexcpt.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/fclrexcpt.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fclrexcpt.c 2014-05-27 22:31:42.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fclrexcpt.c 2014-05-27 22:31:43.000000000 -0500 +@@ -28,8 +28,8 @@ + u.fenv = fegetenv_register (); + + /* Clear the relevant bits. */ +- u.l[1] = u.l[1] & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID) +- | (excepts & FPSCR_STICKY_BITS)); ++ u.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID) ++ | (excepts & FPSCR_STICKY_BITS)); + + /* Put the new state in effect. */ + fesetenv_register (u.fenv); +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fedisblxcpt.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/fedisblxcpt.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fedisblxcpt.c 2014-05-27 22:31:42.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fedisblxcpt.c 2014-05-27 22:31:43.000000000 -0500 +@@ -32,15 +32,15 @@ + + fe.fenv = fegetenv_register (); + if (excepts & FE_INEXACT) +- fe.l[1] &= ~(1 << (31 - FPSCR_XE)); ++ fe.l &= ~(1 << (31 - FPSCR_XE)); + if (excepts & FE_DIVBYZERO) +- fe.l[1] &= ~(1 << (31 - FPSCR_ZE)); ++ fe.l &= ~(1 << (31 - FPSCR_ZE)); + if (excepts & FE_UNDERFLOW) +- fe.l[1] &= ~(1 << (31 - FPSCR_UE)); ++ fe.l &= ~(1 << (31 - FPSCR_UE)); + if (excepts & FE_OVERFLOW) +- fe.l[1] &= ~(1 << (31 - FPSCR_OE)); ++ fe.l &= ~(1 << (31 - FPSCR_OE)); + if (excepts & FE_INVALID) +- fe.l[1] &= ~(1 << (31 - FPSCR_VE)); ++ fe.l &= ~(1 << (31 - FPSCR_VE)); + fesetenv_register (fe.fenv); + + new = __fegetexcept (); +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/feenablxcpt.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/feenablxcpt.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/feenablxcpt.c 2014-05-27 22:31:42.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/feenablxcpt.c 2014-05-27 22:31:43.000000000 -0500 +@@ -32,15 +32,15 @@ + + fe.fenv = fegetenv_register (); + if (excepts & FE_INEXACT) +- fe.l[1] |= (1 << (31 - FPSCR_XE)); ++ fe.l |= (1 << (31 - FPSCR_XE)); + if (excepts & FE_DIVBYZERO) +- fe.l[1] |= (1 << (31 - FPSCR_ZE)); ++ fe.l |= (1 << (31 - FPSCR_ZE)); + if (excepts & FE_UNDERFLOW) +- fe.l[1] |= (1 << (31 - FPSCR_UE)); ++ fe.l |= (1 << (31 - FPSCR_UE)); + if (excepts & FE_OVERFLOW) +- fe.l[1] |= (1 << (31 - FPSCR_OE)); ++ fe.l |= (1 << (31 - FPSCR_OE)); + if (excepts & FE_INVALID) +- fe.l[1] |= (1 << (31 - FPSCR_VE)); ++ fe.l |= (1 << (31 - FPSCR_VE)); + fesetenv_register (fe.fenv); + + new = __fegetexcept (); +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fegetexcept.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/fegetexcept.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fegetexcept.c 2014-05-27 22:31:42.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fegetexcept.c 2014-05-27 22:31:43.000000000 -0500 +@@ -27,15 +27,15 @@ + + fe.fenv = fegetenv_register (); + +- if (fe.l[1] & (1 << (31 - FPSCR_XE))) ++ if (fe.l & (1 << (31 - FPSCR_XE))) + result |= FE_INEXACT; +- if (fe.l[1] & (1 << (31 - FPSCR_ZE))) ++ if (fe.l & (1 << (31 - FPSCR_ZE))) + result |= FE_DIVBYZERO; +- if (fe.l[1] & (1 << (31 - FPSCR_UE))) ++ if (fe.l & (1 << (31 - FPSCR_UE))) + result |= FE_UNDERFLOW; +- if (fe.l[1] & (1 << (31 - FPSCR_OE))) ++ if (fe.l & (1 << (31 - FPSCR_OE))) + result |= FE_OVERFLOW; +- if (fe.l[1] & (1 << (31 - FPSCR_VE))) ++ if (fe.l & (1 << (31 - FPSCR_VE))) + result |= FE_INVALID; + + return result; +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/feholdexcpt.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/feholdexcpt.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/feholdexcpt.c 2014-05-27 22:31:42.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/feholdexcpt.c 2014-05-27 22:33:09.000000000 -0500 +@@ -30,13 +30,12 @@ + + /* Clear everything except for the rounding modes and non-IEEE arithmetic + flag. */ +- new.l[1] = old.l[1] & 7; +- new.l[0] = old.l[0]; ++ new.l = old.l & 0xffffffff00000007LL; + + /* If the old env had any eabled exceptions, then mask SIGFPE in the + MSR FE0/FE1 bits. This may allow the FPU to run faster because it + always takes the default action and can not generate SIGFPE. */ +- if ((old.l[1] & _FPU_MASK_ALL) != 0) ++ if ((old.l & _FPU_MASK_ALL) != 0) + (void)__fe_mask_env (); + + /* Put the new state in effect. */ +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fenv_libc.h glibc-2.17-c758a686/sysdeps/powerpc/fpu/fenv_libc.h +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fenv_libc.h 2014-05-27 22:31:42.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fenv_libc.h 2014-05-27 22:31:43.000000000 -0500 +@@ -69,7 +69,7 @@ + typedef union + { + fenv_t fenv; +- unsigned int l[2]; ++ unsigned long long l; + } fenv_union_t; + + +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fesetenv.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/fesetenv.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fesetenv.c 2014-05-27 22:31:42.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fesetenv.c 2014-05-27 22:35:18.000000000 -0500 +@@ -36,14 +36,14 @@ + exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the + hardware into "precise mode" and may cause the FPU to run slower on some + hardware. */ +- if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0) ++ if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0) + (void)__fe_nomask_env (); + + /* If the old env had any enabled exceptions and the new env has no enabled + exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the + FPU to run faster because it always takes the default action and can not + generate SIGFPE. */ +- if ((old.l[1] & _FPU_MASK_ALL) != 0 && (new.l[1] & _FPU_MASK_ALL) == 0) ++ if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0) + (void)__fe_mask_env (); + + fesetenv_register (*envp); +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/feupdateenv.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/feupdateenv.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/feupdateenv.c 2014-05-27 22:31:42.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/feupdateenv.c 2014-05-27 22:34:23.000000000 -0500 +@@ -36,20 +36,20 @@ + /* Restore rounding mode and exception enable from *envp and merge + exceptions. Leave fraction rounded/inexact and FP result/CC bits + unchanged. */ +- new.l[1] = (old.l[1] & 0x1FFFFF00) | (new.l[1] & 0x1FF80FFF); ++ new.l = (old.l & 0xffffffff1fffff00LL) | (new.l & 0x1ff80fff); + + /* If the old env has no eabled exceptions and the new env has any enabled + exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put + the hardware into "precise mode" and may cause the FPU to run slower on + some hardware. */ +- if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0) ++ if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0) + (void)__fe_nomask_env (); + + /* If the old env had any eabled exceptions and the new env has no enabled + exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the + FPU to run faster because it always takes the default action and can not + generate SIGFPE. */ +- if ((old.l[1] & _FPU_MASK_ALL) != 0 && (new.l[1] & _FPU_MASK_ALL) == 0) ++ if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0) + (void)__fe_mask_env (); + + /* Atomically enable and raise (if appropriate) exceptions set in `new'. */ +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fgetexcptflg.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/fgetexcptflg.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fgetexcptflg.c 2014-05-27 22:31:42.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fgetexcptflg.c 2014-05-27 22:31:43.000000000 -0500 +@@ -28,7 +28,7 @@ + u.fenv = fegetenv_register (); + + /* Return (all of) it. */ +- *flagp = u.l[1] & excepts & FE_ALL_EXCEPT; ++ *flagp = u.l & excepts & FE_ALL_EXCEPT; + + /* Success. */ + return 0; +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fraiseexcpt.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/fraiseexcpt.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fraiseexcpt.c 2014-05-27 22:31:42.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fraiseexcpt.c 2014-05-27 22:31:43.000000000 -0500 +@@ -34,11 +34,11 @@ + u.fenv = fegetenv_register (); + + /* Add the exceptions */ +- u.l[1] = (u.l[1] +- | (excepts & FPSCR_STICKY_BITS) +- /* Turn FE_INVALID into FE_INVALID_SOFTWARE. */ +- | (excepts >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT)) +- & FE_INVALID_SOFTWARE)); ++ u.l = (u.l ++ | (excepts & FPSCR_STICKY_BITS) ++ /* Turn FE_INVALID into FE_INVALID_SOFTWARE. */ ++ | (excepts >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT)) ++ & FE_INVALID_SOFTWARE)); + + /* Store the new status word (along with the rest of the environment), + triggering any appropriate exceptions. */ +@@ -50,7 +50,7 @@ + don't have FE_INVALID_SOFTWARE implemented. Detect this + case and raise FE_INVALID_SNAN instead. */ + u.fenv = fegetenv_register (); +- if ((u.l[1] & FE_INVALID) == 0) ++ if ((u.l & FE_INVALID) == 0) + set_fpscr_bit (FPSCR_VXSNAN); + } + +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fsetexcptflg.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/fsetexcptflg.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fsetexcptflg.c 2014-05-27 22:31:42.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fsetexcptflg.c 2014-05-27 22:31:43.000000000 -0500 +@@ -32,10 +32,10 @@ + flag = *flagp & excepts; + + /* Replace the exception status */ +- u.l[1] = ((u.l[1] & ~(FPSCR_STICKY_BITS & excepts)) +- | (flag & FPSCR_STICKY_BITS) +- | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT)) +- & FE_INVALID_SOFTWARE)); ++ u.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts)) ++ | (flag & FPSCR_STICKY_BITS) ++ | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT)) ++ & FE_INVALID_SOFTWARE)); + + /* Store the new status word (along with the rest of the environment). + This may cause floating-point exceptions if the restored state +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/ftestexcept.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/ftestexcept.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/ftestexcept.c 2014-05-27 22:31:42.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/ftestexcept.c 2014-05-27 22:31:43.000000000 -0500 +@@ -28,6 +28,6 @@ + + /* The FE_INVALID bit is dealt with correctly by the hardware, so we can + just: */ +- return u.l[1] & excepts; ++ return u.l & excepts; + } + libm_hidden_def (fetestexcept) diff --git a/packages/glibc/2.17/0036-glibc-ppc64le-14.patch b/packages/glibc/2.17/0036-glibc-ppc64le-14.patch new file mode 100644 index 0000000..9025fd7 --- /dev/null +++ b/packages/glibc/2.17/0036-glibc-ppc64le-14.patch @@ -0,0 +1,120 @@ +# commit 603e84104cdc709c8e7dcbac54b9a585bf8dff78 +# Author: Alan Modra +# Date: Sat Aug 17 18:29:43 2013 +0930 +# +# PowerPC floating point little-endian [9 of 15] +# http://sourceware.org/ml/libc-alpha/2013-07/msg00200.html +# +# This works around the fact that vsx is disabled in current +# little-endian gcc. Also, float constants take 4 bytes in memory +# vs. 16 bytes for vector constants, and we don't need to write one lot +# of masks for double (register format) and another for float (mem +# format). +# +# * sysdeps/powerpc/fpu/s_float_bitwise.h (__float_and_test28): Don't +# use vector int constants. +# (__float_and_test24, __float_and8, __float_get_exp): Likewise. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_float_bitwise.h glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_float_bitwise.h +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_float_bitwise.h 2014-05-27 22:37:18.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_float_bitwise.h 2014-05-27 22:37:20.000000000 -0500 +@@ -23,18 +23,19 @@ + #include + + /* Returns (int)(num & 0x7FFFFFF0 == value) */ +-static inline +-int __float_and_test28 (float num, float value) ++static inline int ++__float_and_test28 (float num, float value) + { + float ret; + #ifdef _ARCH_PWR7 +- vector int mask = (vector int) { +- 0x7ffffffe, 0x00000000, 0x00000000, 0x0000000 +- }; ++ union { ++ int i; ++ float f; ++ } mask = { .i = 0x7ffffff0 }; + __asm__ ( +- /* the 'f' constrain is use on mask because we just need ++ /* the 'f' constraint is used on mask because we just need + * to compare floats, not full vector */ +- "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask) ++ "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f) + ); + #else + int32_t inum; +@@ -46,16 +47,17 @@ + } + + /* Returns (int)(num & 0x7FFFFF00 == value) */ +-static inline +-int __float_and_test24 (float num, float value) ++static inline int ++__float_and_test24 (float num, float value) + { + float ret; + #ifdef _ARCH_PWR7 +- vector int mask = (vector int) { +- 0x7fffffe0, 0x00000000, 0x00000000, 0x0000000 +- }; ++ union { ++ int i; ++ float f; ++ } mask = { .i = 0x7fffff00 }; + __asm__ ( +- "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask) ++ "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f) + ); + #else + int32_t inum; +@@ -67,16 +69,17 @@ + } + + /* Returns (float)(num & 0x7F800000) */ +-static inline +-float __float_and8 (float num) ++static inline float ++__float_and8 (float num) + { + float ret; + #ifdef _ARCH_PWR7 +- vector int mask = (vector int) { +- 0x7ff00000, 0x00000000, 0x00000000, 0x00000000 +- }; ++ union { ++ int i; ++ float f; ++ } mask = { .i = 0x7f800000 }; + __asm__ ( +- "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask) ++ "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f) + ); + #else + int32_t inum; +@@ -88,17 +91,18 @@ + } + + /* Returns ((int32_t)(num & 0x7F800000) >> 23) */ +-static inline +-int32_t __float_get_exp (float num) ++static inline int32_t ++__float_get_exp (float num) + { + int32_t inum; + #ifdef _ARCH_PWR7 + float ret; +- vector int mask = (vector int) { +- 0x7ff00000, 0x00000000, 0x00000000, 0x00000000 +- }; ++ union { ++ int i; ++ float f; ++ } mask = { .i = 0x7f800000 }; + __asm__ ( +- "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask) ++ "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f) + ); + GET_FLOAT_WORD(inum, ret); + #else diff --git a/packages/glibc/2.17/0037-glibc-ppc64le-15.patch b/packages/glibc/2.17/0037-glibc-ppc64le-15.patch new file mode 100644 index 0000000..29b3b54 --- /dev/null +++ b/packages/glibc/2.17/0037-glibc-ppc64le-15.patch @@ -0,0 +1,119 @@ +# commit da13146da10360436941e843834c90a9aef5fd7a +# Author: Alan Modra +# Date: Sat Aug 17 18:30:23 2013 +0930 +# +# PowerPC floating point little-endian [10 of 15] +# http://sourceware.org/ml/libc-alpha/2013-07/msg00201.html +# +# These two functions oddly test x+1>0 when a double x is >= 0.0, and +# similarly when x is negative. I don't see the point of that since the +# test should always be true. I also don't see any need to convert x+1 +# to integer rather than simply using xr+1. Note that the standard +# allows these functions to return any value when the input is outside +# the range of long long, but it's not too hard to prevent xr+1 +# overflowing so that's what I've done. +# +# (With rounding mode FE_UPWARD, x+1 can be a lot more than what you +# might naively expect, but perhaps that situation was covered by the +# x - xrf < 1.0 test.) +# +# * sysdeps/powerpc/fpu/s_llround.c (__llround): Rewrite. +# * sysdeps/powerpc/fpu/s_llroundf.c (__llroundf): Rewrite. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c 2014-05-27 22:38:55.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c 2014-05-27 22:38:58.000000000 -0500 +@@ -19,29 +19,28 @@ + #include + #include + +-/* I think that what this routine is supposed to do is round a value +- to the nearest integer, with values exactly on the boundary rounded +- away from zero. */ +-/* This routine relies on (long long)x, when x is out of range of a long long, +- clipping to MAX_LLONG or MIN_LLONG. */ ++/* Round to the nearest integer, with values exactly on a 0.5 boundary ++ rounded away from zero, regardless of the current rounding mode. ++ If (long long)x, when x is out of range of a long long, clips at ++ LLONG_MAX or LLONG_MIN, then this implementation also clips. */ + + long long int + __llround (double x) + { +- double xrf; +- long long int xr; +- xr = (long long int) x; +- xrf = (double) xr; ++ long long xr = (long long) x; ++ double xrf = (double) xr; ++ + if (x >= 0.0) +- if (x - xrf >= 0.5 && x - xrf < 1.0 && x+1 > 0) +- return x+1; +- else +- return x; ++ { ++ if (x - xrf >= 0.5) ++ xr += (long long) ((unsigned long long) xr + 1) > 0; ++ } + else +- if (xrf - x >= 0.5 && xrf - x < 1.0 && x-1 < 0) +- return x-1; +- else +- return x; ++ { ++ if (xrf - x >= 0.5) ++ xr -= (long long) ((unsigned long long) xr - 1) < 0; ++ } ++ return xr; + } + weak_alias (__llround, llround) + #ifdef NO_LONG_DOUBLE +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llroundf.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llroundf.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llroundf.c 2014-05-27 22:38:55.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llroundf.c 2014-05-27 22:38:58.000000000 -0500 +@@ -18,28 +18,27 @@ + + #include + +-/* I think that what this routine is supposed to do is round a value +- to the nearest integer, with values exactly on the boundary rounded +- away from zero. */ +-/* This routine relies on (long long)x, when x is out of range of a long long, +- clipping to MAX_LLONG or MIN_LLONG. */ ++/* Round to the nearest integer, with values exactly on a 0.5 boundary ++ rounded away from zero, regardless of the current rounding mode. ++ If (long long)x, when x is out of range of a long long, clips at ++ LLONG_MAX or LLONG_MIN, then this implementation also clips. */ + + long long int + __llroundf (float x) + { +- float xrf; +- long long int xr; +- xr = (long long int) x; +- xrf = (float) xr; ++ long long xr = (long long) x; ++ float xrf = (float) xr; ++ + if (x >= 0.0) +- if (x - xrf >= 0.5 && x - xrf < 1.0 && x+1 > 0) +- return x+1; +- else +- return x; ++ { ++ if (x - xrf >= 0.5) ++ xr += (long long) ((unsigned long long) xr + 1) > 0; ++ } + else +- if (xrf - x >= 0.5 && xrf - x < 1.0 && x-1 < 0) +- return x-1; +- else +- return x; ++ { ++ if (xrf - x >= 0.5) ++ xr -= (long long) ((unsigned long long) xr - 1) < 0; ++ } ++ return xr; + } + weak_alias (__llroundf, llroundf) diff --git a/packages/glibc/2.17/0038-glibc-ppc64le-16.patch b/packages/glibc/2.17/0038-glibc-ppc64le-16.patch new file mode 100644 index 0000000..0da0865 --- /dev/null +++ b/packages/glibc/2.17/0038-glibc-ppc64le-16.patch @@ -0,0 +1,163 @@ +# commit 9c008155b7d5d1bd81d909497850a2ece28aec50 +# Author: Alan Modra +# Date: Sat Aug 17 18:31:05 2013 +0930 +# +# PowerPC floating point little-endian [11 of 15] +# http://sourceware.org/ml/libc-alpha/2013-07/msg00202.html +# +# Another little-endian fix. +# +# * sysdeps/powerpc/fpu_control.h (_FPU_GETCW): Rewrite using +# 64-bit int/double union. +# (_FPU_SETCW): Likewise. +# * sysdeps/powerpc/fpu/tst-setcontext-fpscr.c (_GET_DI_FPSCR): Likewise. +# (_SET_DI_FPSCR, _GET_SI_FPSCR, _SET_SI_FPSCR): Likewise. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fpu_control.h glibc-2.17-c758a686/sysdeps/powerpc/fpu/fpu_control.h +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fpu_control.h 2014-05-27 22:40:18.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fpu_control.h 2014-05-27 22:43:40.000000000 -0500 +@@ -45,22 +45,26 @@ + #define _FPU_IEEE 0x000000f0 + + /* Type of the control word. */ +-typedef unsigned int fpu_control_t __attribute__ ((__mode__ (__SI__))); ++typedef unsigned int fpu_control_t; + + /* Macros for accessing the hardware control word. */ +-#define _FPU_GETCW(__cw) ( { \ +- union { double d; fpu_control_t cw[2]; } \ +- tmp __attribute__ ((__aligned__(8))); \ +- __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0"); \ +- (__cw)=tmp.cw[1]; \ +- tmp.cw[1]; } ) +-#define _FPU_SETCW(__cw) { \ +- union { double d; fpu_control_t cw[2]; } \ +- tmp __attribute__ ((__aligned__(8))); \ +- tmp.cw[0] = 0xFFF80000; /* More-or-less arbitrary; this is a QNaN. */ \ +- tmp.cw[1] = __cw; \ +- __asm__ ("lfd%U0 0,%0; mtfsf 255,0" : : "m" (tmp.d) : "fr0"); \ +-} ++#define _FPU_GETCW(cw) \ ++ ({union { double __d; unsigned long long __ll; } __u; \ ++ register double __fr; \ ++ __asm__ ("mffs %0" : "=f" (__fr)); \ ++ __u.__d = __fr; \ ++ (cw) = (fpu_control_t) __u.__ll; \ ++ (fpu_control_t) __u.__ll; \ ++ }) ++ ++#define _FPU_SETCW(cw) \ ++ { union { double __d; unsigned long long __ll; } __u; \ ++ register double __fr; \ ++ __u.__ll = 0xfff80000LL << 32; /* This is a QNaN. */ \ ++ __u.__ll |= (cw) & 0xffffffffLL; \ ++ __fr = __u.__d; \ ++ __asm__ ("mtfsf 255,%0" : : "f" (__fr)); \ ++ } + + /* Default control word set at startup. */ + extern fpu_control_t __fpu_control; +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c 2014-05-27 22:40:18.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c 2014-05-27 22:40:21.000000000 -0500 +@@ -83,7 +83,7 @@ + return 0; + } + +-typedef unsigned long long di_fpscr_t __attribute__ ((__mode__ (__DI__))); ++typedef unsigned int di_fpscr_t __attribute__ ((__mode__ (__DI__))); + typedef unsigned int si_fpscr_t __attribute__ ((__mode__ (__SI__))); + + #define _FPSCR_RESERVED 0xfffffff8ffffff04ULL +@@ -95,50 +95,51 @@ + #define _FPSCR_TEST1_RN 0x0000000000000002ULL + + /* Macros for accessing the hardware control word on Power6[x]. */ +-# define _GET_DI_FPSCR(__fpscr) ({ \ +- union { double d; \ +- di_fpscr_t fpscr; } \ +- tmp __attribute__ ((__aligned__(8))); \ +- __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0"); \ +- (__fpscr)=tmp.fpscr; \ +- tmp.fpscr; }) ++#define _GET_DI_FPSCR(__fpscr) \ ++ ({union { double d; di_fpscr_t fpscr; } u; \ ++ register double fr; \ ++ __asm__ ("mffs %0" : "=f" (fr)); \ ++ u.d = fr; \ ++ (__fpscr) = u.fpscr; \ ++ u.fpscr; \ ++ }) + +-/* We make sure to zero fp0 after we use it in order to prevent stale data ++/* We make sure to zero fp after we use it in order to prevent stale data + in an fp register from making a test-case pass erroneously. */ +-# define _SET_DI_FPSCR(__fpscr) { \ +- union { double d; di_fpscr_t fpscr; } \ +- tmp __attribute__ ((__aligned__(8))); \ +- tmp.fpscr = __fpscr; \ +- /* Set the entire 64-bit FPSCR. */ \ +- __asm__ ("lfd%U0 0,%0; " \ +- ".machine push; " \ +- ".machine \"power6\"; " \ +- "mtfsf 255,0,1,0; " \ +- ".machine pop" : : "m" (tmp.d) : "fr0"); \ +- tmp.d = 0; \ +- __asm__("lfd%U0 0,%0" : : "m" (tmp.d) : "fr0"); \ +-} +- +-# define _GET_SI_FPSCR(__fpscr) ({ \ +- union { double d; \ +- si_fpscr_t cw[2]; } \ +- tmp __attribute__ ((__aligned__(8))); \ +- __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0"); \ +- (__fpscr)=tmp.cw[1]; \ +- tmp.cw[0]; }) ++# define _SET_DI_FPSCR(__fpscr) \ ++ { union { double d; di_fpscr_t fpscr; } u; \ ++ register double fr; \ ++ u.fpscr = __fpscr; \ ++ fr = u.d; \ ++ /* Set the entire 64-bit FPSCR. */ \ ++ __asm__ (".machine push; " \ ++ ".machine \"power6\"; " \ ++ "mtfsf 255,%0,1,0; " \ ++ ".machine pop" : : "f" (fr)); \ ++ fr = 0.0; \ ++ } ++ ++# define _GET_SI_FPSCR(__fpscr) \ ++ ({union { double d; di_fpscr_t fpscr; } u; \ ++ register double fr; \ ++ __asm__ ("mffs %0" : "=f" (fr)); \ ++ u.d = fr; \ ++ (__fpscr) = (si_fpscr_t) u.fpscr; \ ++ (si_fpscr_t) u.fpscr; \ ++ }) + +-/* We make sure to zero fp0 after we use it in order to prevent stale data ++/* We make sure to zero fp after we use it in order to prevent stale data + in an fp register from making a test-case pass erroneously. */ +-# define _SET_SI_FPSCR(__fpscr) { \ +- union { double d; si_fpscr_t fpscr[2]; } \ +- tmp __attribute__ ((__aligned__(8))); \ +- /* More-or-less arbitrary; this is a QNaN. */ \ +- tmp.fpscr[0] = 0xFFF80000; \ +- tmp.fpscr[1] = __fpscr; \ +- __asm__ ("lfd%U0 0,%0; mtfsf 255,0" : : "m" (tmp.d) : "fr0"); \ +- tmp.d = 0; \ +- __asm__("lfd%U0 0,%0" : : "m" (tmp.d) : "fr0"); \ +-} ++# define _SET_SI_FPSCR(__fpscr) \ ++ { union { double d; di_fpscr_t fpscr; } u; \ ++ register double fr; \ ++ /* More-or-less arbitrary; this is a QNaN. */ \ ++ u.fpscr = 0xfff80000ULL << 32; \ ++ u.fpscr |= __fpscr & 0xffffffffULL; \ ++ fr = u.d; \ ++ __asm__ ("mtfsf 255,%0" : : "f" (fr)); \ ++ fr = 0.0; \ ++ } + + void prime_special_regs(int which) + { diff --git a/packages/glibc/2.17/0039-glibc-ppc64le-17.patch b/packages/glibc/2.17/0039-glibc-ppc64le-17.patch new file mode 100644 index 0000000..5ed69a9 --- /dev/null +++ b/packages/glibc/2.17/0039-glibc-ppc64le-17.patch @@ -0,0 +1,312 @@ +# commit 7b88401f3b25325b1381798a0eccb3efe7751fec +# Author: Alan Modra +# Date: Sat Aug 17 18:31:45 2013 +0930 +# +# PowerPC floating point little-endian [12 of 15] +# http://sourceware.org/ml/libc-alpha/2013-08/msg00087.html +# +# Fixes for little-endian in 32-bit assembly. +# +# * sysdeps/powerpc/sysdep.h (LOWORD, HIWORD, HISHORT): Define. +# * sysdeps/powerpc/powerpc32/fpu/s_copysign.S: Load little-endian +# words of double from correct stack offsets. +# * sysdeps/powerpc/powerpc32/fpu/s_copysignl.S: Likewise. +# * sysdeps/powerpc/powerpc32/fpu/s_lrint.S: Likewise. +# * sysdeps/powerpc/powerpc32/fpu/s_lround.S: Likewise. +# * sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S: Likewise. +# * sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S: Likewise. +# * sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S: Likewise. +# * sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S: Likewise. +# * sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S: Likewise. +# * sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S: Likewise. +# * sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S: Likewise. +# * sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S: Likewise. +# * sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S: Likewise. +# * sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S: Likewise. +# * sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S: Likewise. +# * sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S: Likewise. +# * sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S: Use HISHORT. +# * sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S: Likewise. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysign.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysign.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysign.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysign.S 2014-05-27 22:45:46.000000000 -0500 +@@ -29,7 +29,7 @@ + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + stfd fp2,8(r1) +- lwz r3,8(r1) ++ lwz r3,8+HIWORD(r1) + cmpwi r3,0 + addi r1,r1,16 + cfi_adjust_cfa_offset (-16) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S 2014-05-27 22:45:46.000000000 -0500 +@@ -30,7 +30,7 @@ + fmr fp0,fp1 + fabs fp1,fp1 + fcmpu cr7,fp0,fp1 +- lwz r3,8(r1) ++ lwz r3,8+HIWORD(r1) + cmpwi cr6,r3,0 + addi r1,r1,16 + cfi_adjust_cfa_offset (-16) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lrint.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lrint.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lrint.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lrint.S 2014-05-27 22:45:46.000000000 -0500 +@@ -24,10 +24,10 @@ + stwu r1,-16(r1) + fctiw fp13,fp1 + stfd fp13,8(r1) +- nop /* Insure the following load is in a different dispatch group */ ++ nop /* Ensure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop +- lwz r3,12(r1) ++ lwz r3,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__lrint) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lround.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lround.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lround.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lround.S 2014-05-27 22:45:46.000000000 -0500 +@@ -67,7 +67,7 @@ + nop /* Ensure the following load is in a different dispatch */ + nop /* group to avoid pipe stall on POWER4&5. */ + nop +- lwz r3,12(r1) /* Load return as integer. */ ++ lwz r3,8+LOWORD(r1) /* Load return as integer. */ + .Lout: + addi r1,r1,16 + blr +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S 2014-05-27 22:48:09.000000000 -0500 +@@ -29,8 +29,8 @@ + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop +- lwz r3,8(r1) +- lwz r4,12(r1) ++ lwz r3,8+HIWORD(r1) ++ lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llrint) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S 2014-05-27 22:48:44.000000000 -0500 +@@ -28,8 +28,8 @@ + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop +- lwz r3,8(r1) +- lwz r4,12(r1) ++ lwz r3,8+HIWORD(r1) ++ lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llrintf) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S 2014-05-27 22:45:46.000000000 -0500 +@@ -27,8 +27,8 @@ + ori r1,r1,0 + stfd fp1,24(r1) /* copy FPR to GPR */ + ori r1,r1,0 +- lwz r4,24(r1) +- lwz r5,28(r1) ++ lwz r4,24+HIWORD(r1) ++ lwz r5,24+LOWORD(r1) + lis r0,0x7ff0 /* const long r0 0x7ff00000 00000000 */ + clrlwi r4,r4,1 /* x = fabs(x) */ + cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */ +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S 2014-05-27 22:45:46.000000000 -0500 +@@ -39,8 +39,8 @@ + nop /* Ensure the following load is in a different dispatch */ + nop /* group to avoid pipe stall on POWER4&5. */ + nop +- lwz r4,12(r1) +- lwz r3,8(r1) ++ lwz r3,8+HIWORD(r1) ++ lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llround) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S 2014-05-27 22:45:46.000000000 -0500 +@@ -38,7 +38,7 @@ + nop /* Ensure the following load is in a different dispatch */ + nop /* group to avoid pipe stall on POWER4&5. */ + nop +- lwz r3,12(r1) ++ lwz r3,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__lround) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S 2014-05-27 22:45:46.000000000 -0500 +@@ -27,8 +27,8 @@ + ori r1,r1,0 + stfd fp1,24(r1) /* copy FPR to GPR */ + ori r1,r1,0 +- lwz r4,24(r1) +- lwz r5,28(r1) ++ lwz r4,24+HIWORD(r1) ++ lwz r5,24+LOWORD(r1) + lis r0,0x7ff0 /* const long r0 0x7ff00000 00000000 */ + clrlwi r4,r4,1 /* x = fabs(x) */ + cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */ +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S 2014-05-27 22:46:52.000000000 -0500 +@@ -29,8 +29,8 @@ + /* Insure the following load is in a different dispatch group by + inserting "group ending nop". */ + ori r1,r1,0 +- lwz r3,8(r1) +- lwz r4,12(r1) ++ lwz r3,8+HIWORD(r1) ++ lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llrint) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S 2014-05-27 22:47:29.000000000 -0500 +@@ -28,8 +28,8 @@ + /* Insure the following load is in a different dispatch group by + inserting "group ending nop". */ + ori r1,r1,0 +- lwz r3,8(r1) +- lwz r4,12(r1) ++ lwz r3,8+HIWORD(r1) ++ lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llrintf) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S 2014-05-27 22:45:46.000000000 -0500 +@@ -39,8 +39,8 @@ + /* Insure the following load is in a different dispatch group by + inserting "group ending nop". */ + ori r1,r1,0 +- lwz r4,12(r1) +- lwz r3,8(r1) ++ lwz r3,8+HIWORD(r1) ++ lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llround) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S 2014-05-27 22:45:46.000000000 -0500 +@@ -54,9 +54,8 @@ + stfd fp1,8(r1) /* Transfer FP to GPR's. */ + + ori 2,2,0 /* Force a new dispatch group. */ +- lhz r0,8(r1) /* Fetch the upper portion of the high word of +- the FP value (where the exponent and sign bits +- are). */ ++ lhz r0,8+HISHORT(r1) /* Fetch the upper 16 bits of the FP value ++ (biased exponent and sign bit). */ + clrlwi r0,r0,17 /* r0 = abs(r0). */ + addi r1,r1,16 /* Reset the stack pointer. */ + cmpwi cr7,r0,0x7ff0 /* r4 == 0x7ff0?. */ +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S 2014-05-27 22:45:46.000000000 -0500 +@@ -48,14 +48,13 @@ + li r3,0 + bflr 29 /* If not INF, return. */ + +- /* Either we have -INF/+INF or a denormal. */ ++ /* Either we have +INF or -INF. */ + + stwu r1,-16(r1) /* Allocate stack space. */ + stfd fp1,8(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ +- lhz r4,8(r1) /* Fetch the upper portion of the high word of +- the FP value (where the exponent and sign bits +- are). */ ++ lhz r4,8+HISHORT(r1) /* Fetch the upper 16 bits of the FP value ++ (biased exponent and sign bit). */ + addi r1,r1,16 /* Reset the stack pointer. */ + cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */ + li r3,1 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S 2014-05-27 22:45:46.000000000 -0500 +@@ -53,8 +53,8 @@ + stwu r1,-16(r1) /* Allocate stack space. */ + stfd fp1,8(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ +- lwz r4,8(r1) /* Load the upper half of the FP value. */ +- lwz r5,12(r1) /* Load the lower half of the FP value. */ ++ lwz r4,8+HIWORD(r1) /* Load the upper half of the FP value. */ ++ lwz r5,8+LOWORD(r1) /* Load the lower half of the FP value. */ + addi r1,r1,16 /* Reset the stack pointer. */ + lis r0,0x7ff0 /* Load the upper portion for an INF/NaN. */ + clrlwi r4,r4,1 /* r4 = abs(r4). */ +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S 2014-05-27 22:45:46.000000000 -0500 +@@ -39,10 +39,8 @@ + + stfd fp1,-16(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ +- +- lhz r4,-16(r1) /* Fetch the upper portion of the high word of +- the FP value (where the exponent and sign bits +- are). */ ++ lhz r4,-16+HISHORT(r1) /* Fetch the upper 16 bits of the FP value ++ (biased exponent and sign bit). */ + clrlwi r4,r4,17 /* r4 = abs(r4). */ + cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */ + bltlr cr7 /* LT means finite, other non-finite. */ +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S 2014-05-27 22:45:46.000000000 -0500 +@@ -38,9 +38,8 @@ + + stfd fp1,-16(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ +- lhz r4,-16(r1) /* Fetch the upper portion of the high word of +- the FP value (where the exponent and sign bits +- are). */ ++ lhz r4,-16+HISHORT(r1) /* Fetch the upper 16 bits of the FP value ++ (biased exponent and sign bit). */ + cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */ + li r3,1 + beqlr cr7 /* EQ means INF, otherwise -INF. */ +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/sysdep.h glibc-2.17-c758a686/sysdeps/powerpc/sysdep.h +--- glibc-2.17-c758a686/sysdeps/powerpc/sysdep.h 2014-05-27 22:45:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/sysdep.h 2014-05-27 22:45:46.000000000 -0500 +@@ -144,6 +144,21 @@ + + #define VRSAVE 256 + ++/* The 32-bit words of a 64-bit dword are at these offsets in memory. */ ++#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN ++# define LOWORD 0 ++# define HIWORD 4 ++#else ++# define LOWORD 4 ++# define HIWORD 0 ++#endif ++ ++/* The high 16-bit word of a 64-bit dword is at this offset in memory. */ ++#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN ++# define HISHORT 6 ++#else ++# define HISHORT 0 ++#endif + + /* This seems to always be the case on PPC. */ + #define ALIGNARG(log2) log2 diff --git a/packages/glibc/2.17/0040-glibc-ppc64le-18.patch b/packages/glibc/2.17/0040-glibc-ppc64le-18.patch new file mode 100644 index 0000000..f5e327e --- /dev/null +++ b/packages/glibc/2.17/0040-glibc-ppc64le-18.patch @@ -0,0 +1,81 @@ +# commit 6a31fe7f9cce72b69fce8fe499a2c6ad492c2311 +# Author: Alan Modra +# Date: Sat Aug 17 18:32:18 2013 +0930 +# +# PowerPC floating point little-endian [13 of 15] +# http://sourceware.org/ml/libc-alpha/2013-08/msg00088.html +# +# * sysdeps/powerpc/powerpc32/fpu/s_roundf.S: Increase alignment of +# constants to usual value for .cst8 section, and remove redundant +# high address load. +# * sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S: Use float +# constant for 0x1p52. Load little-endian words of double from +# correct stack offsets. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_roundf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_roundf.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_roundf.S 2014-05-27 22:50:13.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_roundf.S 2014-05-27 22:50:13.000000000 -0500 +@@ -19,7 +19,7 @@ + #include + + .section .rodata.cst8,"aM",@progbits,8 +- .align 2 ++ .align 3 + .LC0: /* 2**23 */ + .long 0x4b000000 + .LC1: /* 0.5 */ +@@ -60,7 +60,6 @@ + #ifdef SHARED + lfs fp10,.LC1-.LC0(r9) + #else +- lis r9,.LC1@ha + lfs fp10,.LC1@l(r9) + #endif + ble- cr6,.L4 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S 2014-05-27 22:50:13.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S 2014-05-27 22:50:13.000000000 -0500 +@@ -19,12 +19,10 @@ + #include + #include + +- .section .rodata.cst12,"aM",@progbits,12 ++ .section .rodata.cst8,"aM",@progbits,8 + .align 3 +- .LC0: /* 0x1.0000000000000p+52 == 2^52 */ +- .long 0x43300000 +- .long 0x00000000 +- .long 0x3f000000 /* Use this for 0.5 */ ++ .LC0: .long (52+127)<<23 /* 0x1p+52 */ ++ .long (-1+127)<<23 /* 0.5 */ + + .section ".text" + +@@ -57,12 +55,12 @@ + addi r9,r9,.LC0-got_label@l + mtlr r11 + cfi_same_value (lr) +- lfd fp9,0(r9) +- lfs fp10,8(r9) ++ lfs fp9,0(r9) ++ lfs fp10,4(r9) + #else + lis r9,.LC0@ha +- lfd fp9,.LC0@l(r9) /* Load 2^52 into fpr9. */ +- lfs fp10,.LC0@l+8(r9) /* Load 0.5 into fpr10. */ ++ lfs fp9,.LC0@l(r9) /* Load 2^52 into fpr9. */ ++ lfs fp10,.LC0@l+4(r9) /* Load 0.5 into fpr10. */ + #endif + fabs fp2,fp1 /* Get the absolute value of x. */ + fsub fp12,fp10,fp10 /* Compute 0.0 into fpr12. */ +@@ -80,8 +78,8 @@ + nop + nop + nop +- lwz r4,12(r1) /* Load return as integer. */ +- lwz r3,8(r1) ++ lwz r3,8+HIWORD(r1) /* Load return as integer. */ ++ lwz r4,8+LOWORD(r1) + .Lout: + addi r1,r1,16 + blr diff --git a/packages/glibc/2.17/0041-glibc-ppc64le-19.patch b/packages/glibc/2.17/0041-glibc-ppc64le-19.patch new file mode 100644 index 0000000..bbf7228 --- /dev/null +++ b/packages/glibc/2.17/0041-glibc-ppc64le-19.patch @@ -0,0 +1,110 @@ +# commit 76a66d510a3737674563133a420f4fd22da42c1b +# Author: Anton Blanchard +# Date: Sat Aug 17 18:33:02 2013 +0930 +# +# PowerPC floating point little-endian [14 of 15] +# http://sourceware.org/ml/libc-alpha/2013-07/msg00205.html +# +# These all wrongly specified float constants in a 64-bit word. +# +# * sysdeps/powerpc/powerpc64/fpu/s_ceilf.S: Correct float constants +# for little-endian. +# * sysdeps/powerpc/powerpc64/fpu/s_floorf.S: Likewise. +# * sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S: Likewise. +# * sysdeps/powerpc/powerpc64/fpu/s_rintf.S: Likewise. +# * sysdeps/powerpc/powerpc64/fpu/s_roundf.S: Likewise. +# * sysdeps/powerpc/powerpc64/fpu/s_truncf.S: Likewise. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S 2014-05-27 22:52:12.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S 2014-05-27 22:52:18.000000000 -0500 +@@ -19,8 +19,10 @@ + #include + + .section ".toc","aw" ++ .p2align 3 + .LC0: /* 2**23 */ +- .tc FD_4b000000_0[TC],0x4b00000000000000 ++ .long 0x4b000000 ++ .long 0x0 + .section ".text" + + EALIGN (__ceilf, 4, 0) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_floorf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_floorf.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_floorf.S 2014-05-27 22:52:12.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_floorf.S 2014-05-27 22:52:18.000000000 -0500 +@@ -19,8 +19,10 @@ + #include + + .section ".toc","aw" ++ .p2align 3 + .LC0: /* 2**23 */ +- .tc FD_4b000000_0[TC],0x4b00000000000000 ++ .long 0x4b000000 ++ .long 0x0 + .section ".text" + + EALIGN (__floorf, 4, 0) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S 2014-05-27 22:52:12.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S 2014-05-27 22:52:18.000000000 -0500 +@@ -26,8 +26,10 @@ + /* float [fp1] nearbyintf(float [fp1]) */ + + .section ".toc","aw" ++ .p2align 3 + .LC0: /* 2**23 */ +- .tc FD_4b000000_0[TC],0x4b00000000000000 ++ .long 0x4b000000 ++ .long 0x0 + .section ".text" + + EALIGN (__nearbyintf, 4, 0) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S 2014-05-27 22:52:12.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S 2014-05-27 22:52:18.000000000 -0500 +@@ -19,8 +19,10 @@ + #include + + .section ".toc","aw" ++ .p2align 3 + .LC0: /* 2**23 */ +- .tc FD_4b000000_0[TC],0x4b00000000000000 ++ .long 0x4b000000 ++ .long 0x0 + .section ".text" + + EALIGN (__rintf, 4, 0) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_roundf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_roundf.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_roundf.S 2014-05-27 22:52:12.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_roundf.S 2014-05-27 22:52:18.000000000 -0500 +@@ -19,10 +19,12 @@ + #include + + .section ".toc","aw" ++ .p2align 3 + .LC0: /* 2**23 */ +- .tc FD_4b000000_0[TC],0x4b00000000000000 ++ .long 0x4b000000 + .LC1: /* 0.5 */ +- .tc FD_3f000000_0[TC],0x3f00000000000000 ++ .long 0x3f000000 ++ + .section ".text" + + /* float [fp1] roundf (float x [fp1]) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_truncf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_truncf.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_truncf.S 2014-05-27 22:52:12.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_truncf.S 2014-05-27 22:52:18.000000000 -0500 +@@ -19,8 +19,10 @@ + #include + + .section ".toc","aw" ++ .p2align 3 + .LC0: /* 2**23 */ +- .tc FD_4b000000_0[TC],0x4b00000000000000 ++ .long 0x4b000000 ++ .long 0x0 + .section ".text" + + /* float [fp1] truncf (float x [fp1]) diff --git a/packages/glibc/2.17/0042-glibc-ppc64le-20.patch b/packages/glibc/2.17/0042-glibc-ppc64le-20.patch new file mode 100644 index 0000000..1342f97 --- /dev/null +++ b/packages/glibc/2.17/0042-glibc-ppc64le-20.patch @@ -0,0 +1,43 @@ +# commit fef13a78ea30d4c26d6bab48d731ebe864ee31b0 +# Author: Alan Modra +# Date: Sat Aug 17 18:33:45 2013 +0930 +# +# PowerPC floating point little-endian [15 of 15] +# http://sourceware.org/ml/libc-alpha/2013-07/msg00206.html +# +# The union loses when little-endian. +# +# * sysdeps/powerpc/powerpc32/power4/hp-timing.h (HP_TIMING_NOW): +# Don't use a union to pack hi/low value. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/hp-timing.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/hp-timing.h +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/hp-timing.h 2014-05-27 22:53:37.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/hp-timing.h 2014-05-27 22:53:39.000000000 -0500 +@@ -87,18 +87,15 @@ + + #define HP_TIMING_NOW(Var) \ + do { \ +- union { long long ll; long ii[2]; } _var; \ +- long tmp; \ +- __asm__ __volatile__ ( \ +- "1: mfspr %0,269;" \ +- " mfspr %1,268;" \ +- " mfspr %2,269;" \ +- " cmpw %0,%2;" \ +- " bne 1b;" \ +- : "=r" (_var.ii[0]), "=r" (_var.ii[1]) , "=r" (tmp) \ +- : : "cr0" \ +- ); \ +- Var = _var.ll; \ ++ unsigned int hi, lo, tmp; \ ++ __asm__ __volatile__ ("1: mfspr %0,269;" \ ++ " mfspr %1,268;" \ ++ " mfspr %2,269;" \ ++ " cmpw %0,%2;" \ ++ " bne 1b;" \ ++ : "=&r" (hi), "=&r" (lo), "=&r" (tmp) \ ++ : : "cr0"); \ ++ Var = ((hp_timing_t) hi << 32) | lo; \ + } while (0) + + diff --git a/packages/glibc/2.17/0043-glibc-ppc64le-21.patch b/packages/glibc/2.17/0043-glibc-ppc64le-21.patch new file mode 100644 index 0000000..a945404 --- /dev/null +++ b/packages/glibc/2.17/0043-glibc-ppc64le-21.patch @@ -0,0 +1,294 @@ +# commit be1e5d311342e08ae1f8013342df27b7ded2c156 +# Author: Anton Blanchard +# Date: Sat Aug 17 18:34:40 2013 +0930 +# +# PowerPC LE setjmp/longjmp +# http://sourceware.org/ml/libc-alpha/2013-08/msg00089.html +# +# Little-endian fixes for setjmp/longjmp. When writing these I noticed +# the setjmp code corrupts the non volatile VMX registers when using an +# unaligned buffer. Anton fixed this, and also simplified it quite a +# bit. +# +# The current code uses boilerplate for the case where we want to store +# 16 bytes to an unaligned address. For that we have to do a +# read/modify/write of two aligned 16 byte quantities. In our case we +# are storing a bunch of back to back data (consective VMX registers), +# and only the start and end of the region need the read/modify/write. +# +# [BZ #15723] +# * sysdeps/powerpc/jmpbuf-offsets.h: Comment fix. +# * sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S: Correct +# _dl_hwcap access for little-endian. +# * sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Likewise. Don't +# destroy vmx regs when saving unaligned. +# * sysdeps/powerpc/powerpc64/__longjmp-common.S: Correct CR load. +# * sysdeps/powerpc/powerpc64/setjmp-common.S: Likewise CR save. Don't +# destroy vmx regs when saving unaligned. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/jmpbuf-offsets.h glibc-2.17-c758a686/sysdeps/powerpc/jmpbuf-offsets.h +--- glibc-2.17-c758a686/sysdeps/powerpc/jmpbuf-offsets.h 2014-05-27 22:55:23.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/jmpbuf-offsets.h 2014-05-27 22:55:27.000000000 -0500 +@@ -21,12 +21,10 @@ + #define JB_LR 2 /* The address we will return to */ + #if __WORDSIZE == 64 + # define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18*2 words total. */ +-# define JB_CR 21 /* Condition code registers with the VRSAVE at */ +- /* offset 172 (low half of the double word. */ ++# define JB_CR 21 /* Shared dword with VRSAVE. CR word at offset 172. */ + # define JB_FPRS 22 /* FPRs 14 through 31 are saved, 18*2 words total. */ + # define JB_SIZE (64 * 8) /* As per PPC64-VMX ABI. */ +-# define JB_VRSAVE 21 /* VRSAVE shares a double word with the CR at offset */ +- /* 168 (high half of the double word). */ ++# define JB_VRSAVE 21 /* Shared dword with CR. VRSAVE word at offset 168. */ + # define JB_VRS 40 /* VRs 20 through 31 are saved, 12*4 words total. */ + #else + # define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18 in total. */ +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S 2014-05-27 22:55:23.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S 2014-05-27 22:55:27.000000000 -0500 +@@ -46,16 +46,16 @@ + # endif + mtlr r6 + cfi_same_value (lr) +- lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5) ++ lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5) + # else + lwz r5,_dl_hwcap@got(r5) + mtlr r6 + cfi_same_value (lr) +- lwz r5,4(r5) ++ lwz r5,LOWORD(r5) + # endif + # else +- lis r5,(_dl_hwcap+4)@ha +- lwz r5,(_dl_hwcap+4)@l(r5) ++ lis r5,(_dl_hwcap+LOWORD)@ha ++ lwz r5,(_dl_hwcap+LOWORD)@l(r5) + # endif + andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(no_vmx) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S 2014-05-27 22:55:23.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S 2014-05-27 22:55:27.000000000 -0500 +@@ -97,14 +97,14 @@ + # else + lwz r5,_rtld_global_ro@got(r5) + # endif +- lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5) ++ lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5) + # else + lwz r5,_dl_hwcap@got(r5) +- lwz r5,4(r5) ++ lwz r5,LOWORD(r5) + # endif + # else +- lis r6,(_dl_hwcap+4)@ha +- lwz r5,(_dl_hwcap+4)@l(r6) ++ lis r6,(_dl_hwcap+LOWORD)@ha ++ lwz r5,(_dl_hwcap+LOWORD)@l(r6) + # endif + andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(no_vmx) +@@ -114,44 +114,43 @@ + stw r0,((JB_VRSAVE)*4)(3) + addi r6,r5,16 + beq+ L(aligned_save_vmx) ++ + lvsr v0,0,r5 +- vspltisb v1,-1 /* set v1 to all 1's */ +- vspltisb v2,0 /* set v2 to all 0's */ +- vperm v3,v2,v1,v0 /* v3 contains shift mask with num all 1 bytes on left = misalignment */ +- +- +- /* Special case for v20 we need to preserve what is in save area below v20 before obliterating it */ +- lvx v5,0,r5 +- vperm v20,v20,v20,v0 +- vsel v5,v5,v20,v3 +- vsel v20,v20,v2,v3 +- stvx v5,0,r5 +- +-#define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \ +- addi addgpr,addgpr,32; \ +- vperm savevr,savevr,savevr,shiftvr; \ +- vsel hivr,prev_savevr,savevr,maskvr; \ +- stvx hivr,0,savegpr; +- +- save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5) +- save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6) +- save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5) +- save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6) +- save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5) +- save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6) +- save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5) +- save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6) +- save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5) +- save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6) +- +- /* Special case for r31 we need to preserve what is in save area above v31 before obliterating it */ +- addi r5,r5,32 +- vperm v31,v31,v31,v0 +- lvx v4,0,r5 +- vsel v5,v30,v31,v3 +- stvx v5,0,r6 +- vsel v4,v31,v4,v3 +- stvx v4,0,r5 ++ lvsl v1,0,r5 ++ addi r6,r5,-16 ++ ++# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \ ++ addi addgpr,addgpr,32; \ ++ vperm tmpvr,prevvr,savevr,shiftvr; \ ++ stvx tmpvr,0,savegpr ++ ++ /* ++ * We have to be careful not to corrupt the data below v20 and ++ * above v31. To keep things simple we just rotate both ends in ++ * the opposite direction to our main permute so we can use ++ * the common macro. ++ */ ++ ++ /* load and rotate data below v20 */ ++ lvx v2,0,r5 ++ vperm v2,v2,v2,v1 ++ save_misaligned_vmx(v20,v2,v0,v3,r5,r6) ++ save_misaligned_vmx(v21,v20,v0,v3,r6,r5) ++ save_misaligned_vmx(v22,v21,v0,v3,r5,r6) ++ save_misaligned_vmx(v23,v22,v0,v3,r6,r5) ++ save_misaligned_vmx(v24,v23,v0,v3,r5,r6) ++ save_misaligned_vmx(v25,v24,v0,v3,r6,r5) ++ save_misaligned_vmx(v26,v25,v0,v3,r5,r6) ++ save_misaligned_vmx(v27,v26,v0,v3,r6,r5) ++ save_misaligned_vmx(v28,v27,v0,v3,r5,r6) ++ save_misaligned_vmx(v29,v28,v0,v3,r6,r5) ++ save_misaligned_vmx(v30,v29,v0,v3,r5,r6) ++ save_misaligned_vmx(v31,v30,v0,v3,r6,r5) ++ /* load and rotate data above v31 */ ++ lvx v2,0,r6 ++ vperm v2,v2,v2,v1 ++ save_misaligned_vmx(v2,v31,v0,v3,r5,r6) ++ + b L(no_vmx) + + L(aligned_save_vmx): +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S 2014-05-27 22:55:23.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S 2014-05-27 22:55:27.000000000 -0500 +@@ -60,7 +60,7 @@ + beq L(no_vmx) + la r5,((JB_VRS)*8)(3) + andi. r6,r5,0xf +- lwz r0,((JB_VRSAVE)*8)(3) ++ lwz r0,((JB_VRSAVE)*8)(3) /* 32-bit VRSAVE. */ + mtspr VRSAVE,r0 + beq+ L(aligned_restore_vmx) + addi r6,r5,16 +@@ -156,7 +156,7 @@ + lfd fp21,((JB_FPRS+7)*8)(r3) + ld r22,((JB_GPRS+8)*8)(r3) + lfd fp22,((JB_FPRS+8)*8)(r3) +- ld r0,(JB_CR*8)(r3) ++ lwz r0,((JB_CR*8)+4)(r3) /* 32-bit CR. */ + ld r23,((JB_GPRS+9)*8)(r3) + lfd fp23,((JB_FPRS+9)*8)(r3) + ld r24,((JB_GPRS+10)*8)(r3) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S 2014-05-27 22:55:23.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S 2014-05-27 22:55:27.000000000 -0500 +@@ -98,7 +98,7 @@ + mfcr r0 + std r16,((JB_GPRS+2)*8)(3) + stfd fp16,((JB_FPRS+2)*8)(3) +- std r0,(JB_CR*8)(3) ++ stw r0,((JB_CR*8)+4)(3) /* 32-bit CR. */ + std r17,((JB_GPRS+3)*8)(3) + stfd fp17,((JB_FPRS+3)*8)(3) + std r18,((JB_GPRS+4)*8)(3) +@@ -142,50 +142,46 @@ + la r5,((JB_VRS)*8)(3) + andi. r6,r5,0xf + mfspr r0,VRSAVE +- stw r0,((JB_VRSAVE)*8)(3) ++ stw r0,((JB_VRSAVE)*8)(3) /* 32-bit VRSAVE. */ + addi r6,r5,16 + beq+ L(aligned_save_vmx) ++ + lvsr v0,0,r5 +- vspltisb v1,-1 /* set v1 to all 1's */ +- vspltisb v2,0 /* set v2 to all 0's */ +- vperm v3,v2,v1,v0 /* v3 contains shift mask with num all 1 bytes +- on left = misalignment */ +- +- +- /* Special case for v20 we need to preserve what is in save area +- below v20 before obliterating it */ +- lvx v5,0,r5 +- vperm v20,v20,v20,v0 +- vsel v5,v5,v20,v3 +- vsel v20,v20,v2,v3 +- stvx v5,0,r5 +- +-# define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \ +- addi addgpr,addgpr,32; \ +- vperm savevr,savevr,savevr,shiftvr; \ +- vsel hivr,prev_savevr,savevr,maskvr; \ +- stvx hivr,0,savegpr; +- +- save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5) +- save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6) +- save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5) +- save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6) +- save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5) +- save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6) +- save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5) +- save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6) +- save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5) +- save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6) +- +- /* Special case for r31 we need to preserve what is in save area +- above v31 before obliterating it */ +- addi r5,r5,32 +- vperm v31,v31,v31,v0 +- lvx v4,0,r5 +- vsel v5,v30,v31,v3 +- stvx v5,0,r6 +- vsel v4,v31,v4,v3 +- stvx v4,0,r5 ++ lvsl v1,0,r5 ++ addi r6,r5,-16 ++ ++# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \ ++ addi addgpr,addgpr,32; \ ++ vperm tmpvr,prevvr,savevr,shiftvr; \ ++ stvx tmpvr,0,savegpr ++ ++ /* ++ * We have to be careful not to corrupt the data below v20 and ++ * above v31. To keep things simple we just rotate both ends in ++ * the opposite direction to our main permute so we can use ++ * the common macro. ++ */ ++ ++ /* load and rotate data below v20 */ ++ lvx v2,0,r5 ++ vperm v2,v2,v2,v1 ++ save_misaligned_vmx(v20,v2,v0,v3,r5,r6) ++ save_misaligned_vmx(v21,v20,v0,v3,r6,r5) ++ save_misaligned_vmx(v22,v21,v0,v3,r5,r6) ++ save_misaligned_vmx(v23,v22,v0,v3,r6,r5) ++ save_misaligned_vmx(v24,v23,v0,v3,r5,r6) ++ save_misaligned_vmx(v25,v24,v0,v3,r6,r5) ++ save_misaligned_vmx(v26,v25,v0,v3,r5,r6) ++ save_misaligned_vmx(v27,v26,v0,v3,r6,r5) ++ save_misaligned_vmx(v28,v27,v0,v3,r5,r6) ++ save_misaligned_vmx(v29,v28,v0,v3,r6,r5) ++ save_misaligned_vmx(v30,v29,v0,v3,r5,r6) ++ save_misaligned_vmx(v31,v30,v0,v3,r6,r5) ++ /* load and rotate data above v31 */ ++ lvx v2,0,r6 ++ vperm v2,v2,v2,v1 ++ save_misaligned_vmx(v2,v31,v0,v3,r5,r6) ++ + b L(no_vmx) + + L(aligned_save_vmx): diff --git a/packages/glibc/2.17/0044-glibc-ppc64le-22.patch b/packages/glibc/2.17/0044-glibc-ppc64le-22.patch new file mode 100644 index 0000000..12a0ff0 --- /dev/null +++ b/packages/glibc/2.17/0044-glibc-ppc64le-22.patch @@ -0,0 +1,228 @@ +# commit 9b874b2f1eb2550e39d3e9c38772e64a767e9de2 +# Author: Alan Modra +# Date: Sat Aug 17 18:35:40 2013 +0930 +# +# PowerPC ugly symbol versioning +# http://sourceware.org/ml/libc-alpha/2013-08/msg00090.html +# +# This patch fixes symbol versioning in setjmp/longjmp. The existing +# code uses raw versions, which results in wrong symbol versioning when +# you want to build glibc with a base version of 2.19 for LE. +# +# Note that the merging the 64-bit and 32-bit versions in novmx-lonjmp.c +# and pt-longjmp.c doesn't result in GLIBC_2.0 versions for 64-bit, due +# to the base in shlib_versions. +# +# * sysdeps/powerpc/longjmp.c: Use proper symbol versioning macros. +# * sysdeps/powerpc/novmx-longjmp.c: Likewise. +# * sysdeps/powerpc/powerpc32/bsd-_setjmp.S: Likewise. +# * sysdeps/powerpc/powerpc32/bsd-setjmp.S: Likewise. +# * sysdeps/powerpc/powerpc32/fpu/__longjmp.S: Likewise. +# * sysdeps/powerpc/powerpc32/fpu/setjmp.S: Likewise. +# * sysdeps/powerpc/powerpc32/mcount.c: Likewise. +# * sysdeps/powerpc/powerpc32/setjmp.S: Likewise. +# * sysdeps/powerpc/powerpc64/setjmp.S: Likewise. +# * nptl/sysdeps/unix/sysv/linux/powerpc/pt-longjmp.c: Likewise. +# +diff -urN glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pt-longjmp.c glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pt-longjmp.c +--- glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pt-longjmp.c 2014-05-27 23:22:12.000000000 -0500 ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pt-longjmp.c 2014-05-27 23:23:44.000000000 -0500 +@@ -41,13 +41,8 @@ + __novmx__libc_longjmp (env, val); + } + +-# if __WORDSIZE == 64 +-symbol_version (__novmx_longjmp,longjmp,GLIBC_2.3); +-symbol_version (__novmx_siglongjmp,siglongjmp,GLIBC_2.3); +-# else +-symbol_version (__novmx_longjmp,longjmp,GLIBC_2.0); +-symbol_version (__novmx_siglongjmp,siglongjmp,GLIBC_2.0); +-# endif ++compat_symbol (libpthread, __novmx_longjmp, longjmp, GLIBC_2_0); ++compat_symbol (libpthread, __novmx_siglongjmp, siglongjmp, GLIBC_2_0); + #endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)) */ + + void +@@ -62,5 +57,5 @@ + __libc_siglongjmp (env, val); + } + +-versioned_symbol (libc, __vmx_longjmp, longjmp, GLIBC_2_3_4); +-versioned_symbol (libc, __vmx_siglongjmp, siglongjmp, GLIBC_2_3_4); ++versioned_symbol (libpthread, __vmx_longjmp, longjmp, GLIBC_2_3_4); ++versioned_symbol (libpthread, __vmx_siglongjmp, siglongjmp, GLIBC_2_3_4); +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/longjmp.c glibc-2.17-c758a686/sysdeps/powerpc/longjmp.c +--- glibc-2.17-c758a686/sysdeps/powerpc/longjmp.c 2014-05-27 23:22:10.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/longjmp.c 2014-05-27 23:22:12.000000000 -0500 +@@ -56,6 +56,6 @@ + + default_symbol_version (__vmx__libc_longjmp, __libc_longjmp, GLIBC_PRIVATE); + default_symbol_version (__vmx__libc_siglongjmp, __libc_siglongjmp, GLIBC_PRIVATE); +-default_symbol_version (__vmx_longjmp, _longjmp, GLIBC_2.3.4); +-default_symbol_version (__vmxlongjmp, longjmp, GLIBC_2.3.4); +-default_symbol_version (__vmxsiglongjmp, siglongjmp, GLIBC_2.3.4); ++versioned_symbol (libc, __vmx_longjmp, _longjmp, GLIBC_2_3_4); ++versioned_symbol (libc, __vmxlongjmp, longjmp, GLIBC_2_3_4); ++versioned_symbol (libc, __vmxsiglongjmp, siglongjmp, GLIBC_2_3_4); +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/novmx-longjmp.c glibc-2.17-c758a686/sysdeps/powerpc/novmx-longjmp.c +--- glibc-2.17-c758a686/sysdeps/powerpc/novmx-longjmp.c 2014-05-27 23:22:10.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/novmx-longjmp.c 2014-05-27 23:22:12.000000000 -0500 +@@ -51,13 +51,7 @@ + weak_alias (__novmx__libc_siglongjmp, __novmxlongjmp) + weak_alias (__novmx__libc_siglongjmp, __novmxsiglongjmp) + +-# if __WORDSIZE == 64 +-symbol_version (__novmx_longjmp,_longjmp,GLIBC_2.3); +-symbol_version (__novmxlongjmp,longjmp,GLIBC_2.3); +-symbol_version (__novmxsiglongjmp,siglongjmp,GLIBC_2.3); +-# else +-symbol_version (__novmx_longjmp,_longjmp,GLIBC_2.0); +-symbol_version (__novmxlongjmp,longjmp,GLIBC_2.0); +-symbol_version (__novmxsiglongjmp,siglongjmp,GLIBC_2.0); +-# endif ++compat_symbol (libc, __novmx_longjmp, _longjmp, GLIBC_2_0); ++compat_symbol (libc, __novmxlongjmp, longjmp, GLIBC_2_0); ++compat_symbol (libc, __novmxsiglongjmp, siglongjmp, GLIBC_2_0); + #endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)) */ +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-_setjmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-_setjmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-_setjmp.S 2014-05-27 23:22:10.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-_setjmp.S 2014-05-27 23:22:12.000000000 -0500 +@@ -32,7 +32,7 @@ + /* Build a versioned object for libc. */ + + # if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +-symbol_version (__novmx_setjmp,_setjmp,GLIBC_2.0); ++compat_symbol (libc, __novmx_setjmp, _setjmp, GLIBC_2_0); + + ENTRY (BP_SYM (__novmx_setjmp)) + li r4,0 /* Set second argument to 0. */ +@@ -41,7 +41,7 @@ + libc_hidden_def (__novmx_setjmp) + # endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) */ + +-default_symbol_version (__vmx_setjmp,_setjmp,GLIBC_2.3.4) ++versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4) + /* __GI__setjmp prototype is needed for ntpl i.e. _setjmp is defined + as a libc_hidden_proto & is used in sysdeps/generic/libc-start.c + if HAVE_CLEANUP_JMP_BUF is defined */ +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-setjmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-setjmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-setjmp.S 2014-05-27 23:22:10.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-setjmp.S 2014-05-27 23:22:12.000000000 -0500 +@@ -27,7 +27,7 @@ + b __novmx__sigsetjmp@local + END (__novmxsetjmp) + strong_alias (__novmxsetjmp, __novmx__setjmp) +-symbol_version (__novmxsetjmp, setjmp, GLIBC_2.0) ++compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_0) + + #endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) ) */ + +@@ -37,4 +37,4 @@ + END (__vmxsetjmp) + strong_alias (__vmxsetjmp, __vmx__setjmp) + strong_alias (__vmx__setjmp, __setjmp) +-default_symbol_version (__vmxsetjmp,setjmp,GLIBC_2.3.4) ++versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp.S 2014-05-27 23:22:10.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp.S 2014-05-27 23:22:12.000000000 -0500 +@@ -26,14 +26,14 @@ + + #else /* !NOT_IN_libc */ + /* Build a versioned object for libc. */ +-default_symbol_version (__vmx__longjmp,__longjmp,GLIBC_2.3.4); ++versioned_symbol (libc, __vmx__longjmp, __longjmp, GLIBC_2_3_4); + # define __longjmp __vmx__longjmp + # include "__longjmp-common.S" + + # if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) + # define __NO_VMX__ + # undef JB_SIZE +-symbol_version (__novmx__longjmp,__longjmp,GLIBC_2.0); ++compat_symbol (libc, __novmx__longjmp, __longjmp, GLIBC_2_0); + # undef __longjmp + # define __longjmp __novmx__longjmp + # include "__longjmp-common.S" +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp.S 2014-05-27 23:22:10.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp.S 2014-05-27 23:22:12.000000000 -0500 +@@ -26,7 +26,7 @@ + + #else /* !NOT_IN_libc */ + /* Build a versioned object for libc. */ +-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4) ++versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4) + # define __sigsetjmp __vmx__sigsetjmp + # define __sigjmp_save __vmx__sigjmp_save + # include "setjmp-common.S" +@@ -36,7 +36,7 @@ + # undef __sigsetjmp + # undef __sigjmp_save + # undef JB_SIZE +-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.0) ++compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0) + # define __sigsetjmp __novmx__sigsetjmp + # define __sigjmp_save __novmx__sigjmp_save + # include "setjmp-common.S" +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/mcount.c glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/mcount.c +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/mcount.c 2014-05-27 23:22:10.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/mcount.c 2014-05-27 23:22:12.000000000 -0500 +@@ -9,7 +9,7 @@ + /* __mcount_internal was added in glibc 2.15 with version GLIBC_PRIVATE, + but it should have been put in version GLIBC_2.15. Mark the + GLIBC_PRIVATE version obsolete and add it to GLIBC_2.16 instead. */ +-default_symbol_version (___mcount_internal, __mcount_internal, GLIBC_2.16); ++versioned_symbol (libc, ___mcount_internal, __mcount_internal, GLIBC_2_16); + + #if SHLIB_COMPAT (libc, GLIBC_2_15, GLIBC_2_16) + strong_alias (___mcount_internal, ___mcount_internal_private); +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/setjmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/setjmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/setjmp.S 2014-05-27 23:22:10.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/setjmp.S 2014-05-27 23:22:12.000000000 -0500 +@@ -25,7 +25,7 @@ + + #else /* !NOT_IN_libc */ + /* Build a versioned object for libc. */ +-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4) ++versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4) + # define __sigsetjmp __vmx__sigsetjmp + # define __sigjmp_save __vmx__sigjmp_save + # include "setjmp-common.S" +@@ -35,7 +35,7 @@ + # undef __sigsetjmp + # undef __sigjmp_save + # undef JB_SIZE +-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.0) ++compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0) + # define __sigsetjmp __novmx__sigsetjmp + # define __sigjmp_save __novmx__sigjmp_save + # include "setjmp-common.S" +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp.S 2014-05-27 23:22:10.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp.S 2014-05-27 23:22:12.000000000 -0500 +@@ -26,9 +26,9 @@ + + #else /* !NOT_IN_libc */ + /* Build a versioned object for libc. */ +-default_symbol_version (__vmxsetjmp, setjmp, GLIBC_2.3.4) +-default_symbol_version (__vmx_setjmp,_setjmp,GLIBC_2.3.4) +-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4) ++versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4) ++versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4) ++versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4) + # define setjmp __vmxsetjmp + # define _setjmp __vmx_setjmp + # define __sigsetjmp __vmx__sigsetjmp +@@ -44,9 +44,9 @@ + # undef __sigjmp_save + # undef JB_SIZE + # define __NO_VMX__ +-symbol_version (__novmxsetjmp, setjmp, GLIBC_2.3) +-symbol_version (__novmx_setjmp,_setjmp,GLIBC_2.3); +-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.3) ++compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_3) ++compat_symbol (libc, __novmx_setjmp,_setjmp, GLIBC_2_3); ++compat_symbol (libc, __novmx__sigsetjmp,__sigsetjmp, GLIBC_2_3) + # define setjmp __novmxsetjmp + # define _setjmp __novmx_setjmp + # define __sigsetjmp __novmx__sigsetjmp diff --git a/packages/glibc/2.17/0045-glibc-ppc64le-23.patch b/packages/glibc/2.17/0045-glibc-ppc64le-23.patch new file mode 100644 index 0000000..00288b9 --- /dev/null +++ b/packages/glibc/2.17/0045-glibc-ppc64le-23.patch @@ -0,0 +1,102 @@ +# commit 02f04a6c7fea2b474b026bbce721d8c658d71fda +# Author: Alan Modra +# Date: Sat Aug 17 18:36:11 2013 +0930 +# +# PowerPC LE _dl_hwcap access +# http://sourceware.org/ml/libc-alpha/2013-08/msg00091.html +# +# More LE support, correcting word accesses to _dl_hwcap. +# +# * sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S: Use +# HIWORD/LOWORD. +# * sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S: Ditto. +# * sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S: Ditto. +# +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S 2014-05-27 23:25:35.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S 2014-05-27 23:25:38.000000000 -0500 +@@ -151,15 +151,15 @@ + # ifdef SHARED + lwz r7,_rtld_global_ro@got(r7) + mtlr r8 +- lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7) ++ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7) + # else + lwz r7,_dl_hwcap@got(r7) + mtlr r8 +- lwz r7,4(r7) ++ lwz r7,LOWORD(r7) + # endif + # else +- lis r7,(_dl_hwcap+4)@ha +- lwz r7,(_dl_hwcap+4)@l(r7) ++ lis r7,(_dl_hwcap+LOWORD)@ha ++ lwz r7,(_dl_hwcap+LOWORD)@l(r7) + # endif + andis. r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16) + +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S 2014-05-27 23:25:35.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S 2014-05-27 23:25:38.000000000 -0500 +@@ -79,15 +79,15 @@ + # ifdef SHARED + lwz r7,_rtld_global_ro@got(r7) + mtlr r8 +- lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7) ++ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7) + # else + lwz r7,_dl_hwcap@got(r7) + mtlr r8 +- lwz r7,4(r7) ++ lwz r7,LOWORD(r7) + # endif + #else +- lis r7,(_dl_hwcap+4)@ha +- lwz r7,(_dl_hwcap+4)@l(r7) ++ lis r7,(_dl_hwcap+LOWORD)@ha ++ lwz r7,(_dl_hwcap+LOWORD)@l(r7) + #endif + + #ifdef __CONTEXT_ENABLE_FPRS +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S 2014-05-27 23:25:35.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S 2014-05-27 23:25:38.000000000 -0500 +@@ -152,15 +152,15 @@ + # ifdef SHARED + lwz r7,_rtld_global_ro@got(r7) + mtlr r8 +- lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7) ++ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7) + # else + lwz r7,_dl_hwcap@got(r7) + mtlr r8 +- lwz r7,4(r7) ++ lwz r7,LOWORD(r7) + # endif + # else +- lis r7,(_dl_hwcap+4)@ha +- lwz r7,(_dl_hwcap+4)@l(r7) ++ lis r7,(_dl_hwcap+LOWORD)@ha ++ lwz r7,(_dl_hwcap+LOWORD)@l(r7) + # endif + + # ifdef __CONTEXT_ENABLE_VRS +@@ -308,14 +308,14 @@ + mtlr r8 + # ifdef SHARED + lwz r7,_rtld_global_ro@got(r7) +- lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7) ++ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7) + # else + lwz r7,_dl_hwcap@got(r7) +- lwz r7,4(r7) ++ lwz r7,LOWORD(r7) + # endif + # else +- lis r7,(_dl_hwcap+4)@ha +- lwz r7,(_dl_hwcap+4)@l(r7) ++ lis r7,(_dl_hwcap+LOWORD)@ha ++ lwz r7,(_dl_hwcap+LOWORD)@l(r7) + # endif + andis. r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16) + la r10,(_UC_VREGS)(r31) diff --git a/packages/glibc/2.17/0046-glibc-ppc64le-24.patch b/packages/glibc/2.17/0046-glibc-ppc64le-24.patch new file mode 100644 index 0000000..5870d4f --- /dev/null +++ b/packages/glibc/2.17/0046-glibc-ppc64le-24.patch @@ -0,0 +1,55 @@ +# commit 0b2c2ace3601d5d59cf89130b16840e7f132f7a6 +# Author: Alan Modra +# Date: Sat Aug 17 18:36:45 2013 +0930 +# +# PowerPC makecontext +# http://sourceware.org/ml/libc-alpha/2013-08/msg00092.html +# +# Use conditional form of branch and link to avoid destroying the cpu +# link stack used to predict blr return addresses. +# +# * sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S: Use +# conditional form of branch and link when obtaining pc. +# * sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S: Likewise. +# +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S 2014-05-28 12:25:49.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S 2014-05-28 12:25:51.000000000 -0500 +@@ -47,7 +47,9 @@ + #ifdef PIC + mflr r0 + cfi_register(lr,r0) +- bl 1f ++ /* Use this conditional form of branch and link to avoid destroying ++ the cpu link stack used to predict blr return addresses. */ ++ bcl 20,31,1f + 1: mflr r6 + addi r6,r6,L(exitcode)-1b + mtlr r0 +@@ -136,7 +138,9 @@ + #ifdef PIC + mflr r0 + cfi_register(lr,r0) +- bl 1f ++ /* Use this conditional form of branch and link to avoid destroying ++ the cpu link stack used to predict blr return addresses. */ ++ bcl 20,31,1f + 1: mflr r6 + addi r6,r6,L(novec_exitcode)-1b + mtlr r0 +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S 2014-05-28 12:25:49.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S 2014-05-28 12:25:51.000000000 -0500 +@@ -124,8 +124,10 @@ + + /* If the target function returns we need to do some cleanup. We use a + code trick to get the address of our cleanup function into the link +- register. Do not add any code between here and L(exitcode). */ +- bl L(gotexitcodeaddr); ++ register. Do not add any code between here and L(exitcode). ++ Use this conditional form of branch and link to avoid destroying ++ the cpu link stack used to predict blr return addresses. */ ++ bcl 20,31,L(gotexitcodeaddr); + + /* This is the helper code which gets called if a function which + is registered with 'makecontext' returns. In this case we diff --git a/packages/glibc/2.17/0047-glibc-ppc64le-25.patch b/packages/glibc/2.17/0047-glibc-ppc64le-25.patch new file mode 100644 index 0000000..144d3f3 --- /dev/null +++ b/packages/glibc/2.17/0047-glibc-ppc64le-25.patch @@ -0,0 +1,411 @@ +# commit db9b4570c5dc550074140ac1d1677077fba29a26 +# Author: Alan Modra +# Date: Sat Aug 17 18:40:11 2013 +0930 +# +# PowerPC LE strlen +# http://sourceware.org/ml/libc-alpha/2013-08/msg00097.html +# +# This is the first of nine patches adding little-endian support to the +# existing optimised string and memory functions. I did spend some +# time with a power7 simulator looking at cycle by cycle behaviour for +# memchr, but most of these patches have not been run on cpu simulators +# to check that we are going as fast as possible. I'm sure PowerPC can +# do better. However, the little-endian support mostly leaves main +# loops unchanged, so I'm banking on previous authors having done a +# good job on big-endian.. As with most code you stare at long enough, +# I found some improvements for big-endian too. +# +# Little-endian support for strlen. Like most of the string functions, +# I leave the main word or multiple-word loops substantially unchanged, +# just needing to modify the tail. +# +# Removing the branch in the power7 functions is just a tidy. .align +# produces a branch anyway. Modifying regs in the non-power7 functions +# is to suit the new little-endian tail. +# +# * sysdeps/powerpc/powerpc64/power7/strlen.S (strlen): Add little-endian +# support. Don't branch over align. +# * sysdeps/powerpc/powerpc32/power7/strlen.S: Likewise. +# * sysdeps/powerpc/powerpc64/strlen.S (strlen): Add little-endian support. +# Rearrange tmp reg use to suit. Comment. +# * sysdeps/powerpc/powerpc32/strlen.S: Likewise. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strlen.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strlen.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strlen.S 2014-05-28 12:28:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strlen.S 2014-05-28 12:28:45.000000000 -0500 +@@ -31,7 +31,11 @@ + li r0,0 /* Word with null chars to use with cmpb. */ + li r5,-1 /* MASK = 0xffffffffffffffff. */ + lwz r12,0(r4) /* Load word from memory. */ ++#ifdef __LITTLE_ENDIAN__ ++ slw r5,r5,r6 ++#else + srw r5,r5,r6 /* MASK = MASK >> padding. */ ++#endif + orc r9,r12,r5 /* Mask bits that are not part of the string. */ + cmpb r10,r9,r0 /* Check for null bytes in WORD1. */ + cmpwi cr7,r10,0 /* If r10 == 0, no null's have been found. */ +@@ -49,9 +53,6 @@ + cmpb r10,r12,r0 + cmpwi cr7,r10,0 + bne cr7,L(done) +- b L(loop) /* We branch here (rather than falling through) +- to skip the nops due to heavy alignment +- of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ +@@ -88,9 +89,15 @@ + 0xff in the same position as the null byte in the original + word from the string. Use that to calculate the length. */ + L(done): +- cntlzw r0,r10 /* Count leading zeroes before the match. */ ++#ifdef __LITTLE_ENDIAN__ ++ addi r9, r10, -1 /* Form a mask from trailing zeros. */ ++ andc r9, r9, r10 ++ popcntw r0, r9 /* Count the bits in the mask. */ ++#else ++ cntlzw r0,r10 /* Count leading zeros before the match. */ ++#endif + subf r5,r3,r4 +- srwi r0,r0,3 /* Convert leading zeroes to bytes. */ ++ srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r5,r0 /* Compute final length. */ + blr + END (BP_SYM (strlen)) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strlen.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strlen.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strlen.S 2014-05-28 12:28:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strlen.S 2014-05-28 12:32:24.000000000 -0500 +@@ -31,7 +31,12 @@ + 1 is subtracted you get a value in the range 0x00-0x7f, none of which + have their high bit set. The expression here is + (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when +- there were no 0x00 bytes in the word. ++ there were no 0x00 bytes in the word. You get 0x80 in bytes that ++ match, but possibly false 0x80 matches in the next more significant ++ byte to a true match due to carries. For little-endian this is ++ of no consequence since the least significant match is the one ++ we're interested in, but big-endian needs method 2 to find which ++ byte matches. + + 2) Given a word 'x', we can test to see _which_ byte was zero by + calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f). +@@ -74,7 +79,7 @@ + + ENTRY (BP_SYM (strlen)) + +-#define rTMP1 r0 ++#define rTMP4 r0 + #define rRTN r3 /* incoming STR arg, outgoing result */ + #define rSTR r4 /* current string position */ + #define rPADN r5 /* number of padding bits we prepend to the +@@ -84,9 +89,9 @@ + #define rWORD1 r8 /* current string word */ + #define rWORD2 r9 /* next string word */ + #define rMASK r9 /* mask for first string word */ +-#define rTMP2 r10 +-#define rTMP3 r11 +-#define rTMP4 r12 ++#define rTMP1 r10 ++#define rTMP2 r11 ++#define rTMP3 r12 + + CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2) + +@@ -96,15 +101,20 @@ + lwz rWORD1, 0(rSTR) + li rMASK, -1 + addi r7F7F, r7F7F, 0x7f7f +-/* That's the setup done, now do the first pair of words. +- We make an exception and use method (2) on the first two words, to reduce +- overhead. */ ++/* We use method (2) on the first two words, because rFEFE isn't ++ required which reduces setup overhead. Also gives a faster return ++ for small strings on big-endian due to needing to recalculate with ++ method (2) anyway. */ ++#ifdef __LITTLE_ENDIAN__ ++ slw rMASK, rMASK, rPADN ++#else + srw rMASK, rMASK, rPADN ++#endif + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + add rTMP1, rTMP1, r7F7F +- nor rTMP1, rTMP2, rTMP1 +- and. rWORD1, rTMP1, rMASK ++ nor rTMP3, rTMP2, rTMP1 ++ and. rTMP3, rTMP3, rMASK + mtcrf 0x01, rRTN + bne L(done0) + lis rFEFE, -0x101 +@@ -113,11 +123,12 @@ + bt 29, L(loop) + + /* Handle second word of pair. */ ++/* Perhaps use method (1) here for little-endian, saving one instruction? */ + lwzu rWORD1, 4(rSTR) + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + add rTMP1, rTMP1, r7F7F +- nor. rWORD1, rTMP2, rTMP1 ++ nor. rTMP3, rTMP2, rTMP1 + bne L(done0) + + /* The loop. */ +@@ -131,29 +142,53 @@ + add rTMP3, rFEFE, rWORD2 + nor rTMP4, r7F7F, rWORD2 + bne L(done1) +- and. rTMP1, rTMP3, rTMP4 ++ and. rTMP3, rTMP3, rTMP4 + beq L(loop) + ++#ifndef __LITTLE_ENDIAN__ + and rTMP1, r7F7F, rWORD2 + add rTMP1, rTMP1, r7F7F +- andc rWORD1, rTMP4, rTMP1 ++ andc rTMP3, rTMP4, rTMP1 + b L(done0) + + L(done1): + and rTMP1, r7F7F, rWORD1 + subi rSTR, rSTR, 4 + add rTMP1, rTMP1, r7F7F +- andc rWORD1, rTMP2, rTMP1 ++ andc rTMP3, rTMP2, rTMP1 + + /* When we get to here, rSTR points to the first word in the string that +- contains a zero byte, and the most significant set bit in rWORD1 is in that +- byte. */ ++ contains a zero byte, and rTMP3 has 0x80 for bytes that are zero, ++ and 0x00 otherwise. */ + L(done0): +- cntlzw rTMP3, rWORD1 ++ cntlzw rTMP3, rTMP3 + subf rTMP1, rRTN, rSTR + srwi rTMP3, rTMP3, 3 + add rRTN, rTMP1, rTMP3 + /* GKM FIXME: check high bound. */ + blr ++#else ++ ++L(done0): ++ addi rTMP1, rTMP3, -1 /* Form a mask from trailing zeros. */ ++ andc rTMP1, rTMP1, rTMP3 ++ cntlzw rTMP1, rTMP1 /* Count bits not in the mask. */ ++ subf rTMP3, rRTN, rSTR ++ subfic rTMP1, rTMP1, 32-7 ++ srwi rTMP1, rTMP1, 3 ++ add rRTN, rTMP1, rTMP3 ++ blr ++ ++L(done1): ++ addi rTMP3, rTMP1, -1 ++ andc rTMP3, rTMP3, rTMP1 ++ cntlzw rTMP3, rTMP3 ++ subf rTMP1, rRTN, rSTR ++ subfic rTMP3, rTMP3, 32-7-32 ++ srawi rTMP3, rTMP3, 3 ++ add rRTN, rTMP1, rTMP3 ++ blr ++#endif ++ + END (BP_SYM (strlen)) + libc_hidden_builtin_def (strlen) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strlen.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strlen.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strlen.S 2014-05-28 12:28:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strlen.S 2014-05-28 12:28:45.000000000 -0500 +@@ -32,7 +32,11 @@ + with cmpb. */ + li r5,-1 /* MASK = 0xffffffffffffffff. */ + ld r12,0(r4) /* Load doubleword from memory. */ ++#ifdef __LITTLE_ENDIAN__ ++ sld r5,r5,r6 ++#else + srd r5,r5,r6 /* MASK = MASK >> padding. */ ++#endif + orc r9,r12,r5 /* Mask bits that are not part of the string. */ + cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */ +@@ -50,9 +54,6 @@ + cmpb r10,r12,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) +- b L(loop) /* We branch here (rather than falling through) +- to skip the nops due to heavy alignment +- of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ +@@ -89,9 +90,15 @@ + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the length. */ + L(done): +- cntlzd r0,r10 /* Count leading zeroes before the match. */ ++#ifdef __LITTLE_ENDIAN__ ++ addi r9, r10, -1 /* Form a mask from trailing zeros. */ ++ andc r9, r9, r10 ++ popcntd r0, r9 /* Count the bits in the mask. */ ++#else ++ cntlzd r0,r10 /* Count leading zeros before the match. */ ++#endif + subf r5,r3,r4 +- srdi r0,r0,3 /* Convert leading zeroes to bytes. */ ++ srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r5,r0 /* Compute final length. */ + blr + END (BP_SYM (strlen)) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strlen.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strlen.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strlen.S 2014-05-28 12:28:44.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strlen.S 2014-05-28 12:38:17.000000000 -0500 +@@ -31,7 +31,12 @@ + 1 is subtracted you get a value in the range 0x00-0x7f, none of which + have their high bit set. The expression here is + (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when +- there were no 0x00 bytes in the word. ++ there were no 0x00 bytes in the word. You get 0x80 in bytes that ++ match, but possibly false 0x80 matches in the next more significant ++ byte to a true match due to carries. For little-endian this is ++ of no consequence since the least significant match is the one ++ we're interested in, but big-endian needs method 2 to find which ++ byte matches. + + 2) Given a word 'x', we can test to see _which_ byte was zero by + calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f). +@@ -64,7 +69,7 @@ + Answer: + 1) Added a Data Cache Block Touch early to prefetch the first 128 + byte cache line. Adding dcbt instructions to the loop would not be +- effective since most strings will be shorter than the cache line.*/ ++ effective since most strings will be shorter than the cache line. */ + + /* Some notes on register usage: Under the SVR4 ABI, we can use registers + 0 and 3 through 12 (so long as we don't call any procedures) without +@@ -80,7 +85,7 @@ + ENTRY (BP_SYM (strlen)) + CALL_MCOUNT 1 + +-#define rTMP1 r0 ++#define rTMP4 r0 + #define rRTN r3 /* incoming STR arg, outgoing result */ + #define rSTR r4 /* current string position */ + #define rPADN r5 /* number of padding bits we prepend to the +@@ -90,9 +95,9 @@ + #define rWORD1 r8 /* current string doubleword */ + #define rWORD2 r9 /* next string doubleword */ + #define rMASK r9 /* mask for first string doubleword */ +-#define rTMP2 r10 +-#define rTMP3 r11 +-#define rTMP4 r12 ++#define rTMP1 r10 ++#define rTMP2 r11 ++#define rTMP3 r12 + + /* Note: The Bounded pointer support in this code is broken. This code + was inherited from PPC32 and that support was never completed. +@@ -109,30 +114,36 @@ + addi r7F7F, r7F7F, 0x7f7f + li rMASK, -1 + insrdi r7F7F, r7F7F, 32, 0 +-/* That's the setup done, now do the first pair of doublewords. +- We make an exception and use method (2) on the first two doublewords, +- to reduce overhead. */ +- srd rMASK, rMASK, rPADN ++/* We use method (2) on the first two doublewords, because rFEFE isn't ++ required which reduces setup overhead. Also gives a faster return ++ for small strings on big-endian due to needing to recalculate with ++ method (2) anyway. */ ++#ifdef __LITTLE_ENDIAN__ ++ sld rMASK, rMASK, rPADN ++#else ++ srd rMASK, rMASK, rPADN ++#endif + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + lis rFEFE, -0x101 + add rTMP1, rTMP1, r7F7F + addi rFEFE, rFEFE, -0x101 +- nor rTMP1, rTMP2, rTMP1 +- and. rWORD1, rTMP1, rMASK ++ nor rTMP3, rTMP2, rTMP1 ++ and. rTMP3, rTMP3, rMASK + mtcrf 0x01, rRTN + bne L(done0) +- sldi rTMP1, rFEFE, 32 +- add rFEFE, rFEFE, rTMP1 ++ sldi rTMP1, rFEFE, 32 ++ add rFEFE, rFEFE, rTMP1 + /* Are we now aligned to a doubleword boundary? */ + bt 28, L(loop) + + /* Handle second doubleword of pair. */ ++/* Perhaps use method (1) here for little-endian, saving one instruction? */ + ldu rWORD1, 8(rSTR) + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + add rTMP1, rTMP1, r7F7F +- nor. rWORD1, rTMP2, rTMP1 ++ nor. rTMP3, rTMP2, rTMP1 + bne L(done0) + + /* The loop. */ +@@ -146,29 +157,53 @@ + add rTMP3, rFEFE, rWORD2 + nor rTMP4, r7F7F, rWORD2 + bne L(done1) +- and. rTMP1, rTMP3, rTMP4 ++ and. rTMP3, rTMP3, rTMP4 + beq L(loop) + ++#ifndef __LITTLE_ENDIAN__ + and rTMP1, r7F7F, rWORD2 + add rTMP1, rTMP1, r7F7F +- andc rWORD1, rTMP4, rTMP1 ++ andc rTMP3, rTMP4, rTMP1 + b L(done0) + + L(done1): + and rTMP1, r7F7F, rWORD1 + subi rSTR, rSTR, 8 + add rTMP1, rTMP1, r7F7F +- andc rWORD1, rTMP2, rTMP1 ++ andc rTMP3, rTMP2, rTMP1 + + /* When we get to here, rSTR points to the first doubleword in the string that +- contains a zero byte, and the most significant set bit in rWORD1 is in that +- byte. */ ++ contains a zero byte, and rTMP3 has 0x80 for bytes that are zero, and 0x00 ++ otherwise. */ + L(done0): +- cntlzd rTMP3, rWORD1 ++ cntlzd rTMP3, rTMP3 + subf rTMP1, rRTN, rSTR + srdi rTMP3, rTMP3, 3 + add rRTN, rTMP1, rTMP3 + /* GKM FIXME: check high bound. */ + blr ++#else ++ ++L(done0): ++ addi rTMP1, rTMP3, -1 /* Form a mask from trailing zeros. */ ++ andc rTMP1, rTMP1, rTMP3 ++ cntlzd rTMP1, rTMP1 /* Count bits not in the mask. */ ++ subf rTMP3, rRTN, rSTR ++ subfic rTMP1, rTMP1, 64-7 ++ srdi rTMP1, rTMP1, 3 ++ add rRTN, rTMP1, rTMP3 ++ blr ++ ++L(done1): ++ addi rTMP3, rTMP1, -1 ++ andc rTMP3, rTMP3, rTMP1 ++ cntlzd rTMP3, rTMP3 ++ subf rTMP1, rRTN, rSTR ++ subfic rTMP3, rTMP3, 64-7-64 ++ sradi rTMP3, rTMP3, 3 ++ add rRTN, rTMP1, rTMP3 ++ blr ++#endif ++ + END (BP_SYM (strlen)) + libc_hidden_builtin_def (strlen) diff --git a/packages/glibc/2.17/0048-glibc-ppc64le-26.patch b/packages/glibc/2.17/0048-glibc-ppc64le-26.patch new file mode 100644 index 0000000..93fd255 --- /dev/null +++ b/packages/glibc/2.17/0048-glibc-ppc64le-26.patch @@ -0,0 +1,379 @@ +# commit 33ee81de05e83ce12f32a491270bb4c1611399c7 +# Author: Alan Modra +# Date: Sat Aug 17 18:40:48 2013 +0930 +# +# PowerPC LE strnlen +# http://sourceware.org/ml/libc-alpha/2013-08/msg00098.html +# +# The existing strnlen code has a number of defects, so this patch is more +# than just adding little-endian support. The changes here are similar to +# those for memchr. +# +# * sysdeps/powerpc/powerpc64/power7/strnlen.S (strnlen): Add +# little-endian support. Remove unnecessary "are we done" tests. +# Handle "s" wrapping around zero and extremely large "size". +# Correct main loop count. Handle single left-over word from main +# loop inline rather than by using small_loop. Correct comments. +# Delete "zero" tail, use "end_max" instead. +# * sysdeps/powerpc/powerpc32/power7/strnlen.S: Likewise. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strnlen.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strnlen.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strnlen.S 2014-05-28 12:40:17.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strnlen.S 2014-05-28 12:44:52.000000000 -0500 +@@ -30,51 +30,47 @@ + add r7,r3,r4 /* Calculate the last acceptable address. */ + cmplwi r4,16 + li r0,0 /* Word with null chars. */ ++ addi r7,r7,-1 + ble L(small_range) + +- cmplw cr7,r3,r7 /* Is the address equal or less than r3? If +- it's equal or less, it means size is either 0 +- or a negative number. */ +- ble cr7,L(proceed) +- +- li r7,-1 /* Make r11 the biggest if r4 <= 0. */ +-L(proceed): + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + lwz r12,0(r8) /* Load word from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ ++#ifdef __LITTLE_ENDIAN__ ++ srw r10,r10,r6 ++ slw r10,r10,r6 ++#else + slw r10,r10,r6 + srw r10,r10,r6 ++#endif + cmplwi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + +- /* Are we done already? */ +- addi r9,r8,4 +- cmplw cr6,r9,r7 +- bge cr6,L(end_max) +- ++ clrrwi r7,r7,2 /* Address of last word. */ + mtcrf 0x01,r8 + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop_setup) + +- /* Handle DWORD2 of pair. */ ++ /* Handle WORD2 of pair. */ + lwzu r12,4(r8) + cmpb r10,r12,r0 + cmplwi cr7,r10,0 + bne cr7,L(done) + +- /* Are we done already? */ +- addi r9,r8,4 +- cmplw cr6,r9,r7 +- bge cr6,L(end_max) +- + L(loop_setup): +- sub r5,r7,r9 ++ /* The last word we want to read in the loop below is the one ++ containing the last byte of the string, ie. the word at ++ (s + size - 1) & ~3, or r7. The first word read is at ++ r8 + 4, we read 2 * cnt words, so the last word read will ++ be at r8 + 4 + 8 * cnt - 4. Solving for cnt gives ++ cnt = (r7 - r8) / 8 */ ++ sub r5,r7,r8 + srwi r6,r5,3 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ +- b L(loop) +- /* Main loop to look for the null byte backwards in the string. Since ++ ++ /* Main loop to look for the null byte in the string. Since + it's a small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 + L(loop): +@@ -90,15 +86,18 @@ + cmplwi cr7,r5,0 + bne cr7,L(found) + bdnz L(loop) +- /* We're here because the counter reached 0, and that means we +- didn't have any matches for null in the whole range. Just return +- the original size. */ +- addi r9,r8,4 +- cmplw cr6,r9,r7 +- blt cr6,L(loop_small) ++ ++ /* We may have one more word to read. */ ++ cmplw cr6,r8,r7 ++ beq cr6,L(end_max) ++ ++ lwzu r12,4(r8) ++ cmpb r10,r12,r0 ++ cmplwi cr6,r10,0 ++ bne cr6,L(done) + + L(end_max): +- sub r3,r7,r3 ++ mr r3,r4 + blr + + /* OK, one (or both) of the words contains a null byte. Check +@@ -123,49 +122,56 @@ + We need to make sure the null char is *before* the end of the + range. */ + L(done): +- cntlzw r0,r10 /* Count leading zeroes before the match. */ +- srwi r0,r0,3 /* Convert leading zeroes to bytes. */ +- add r9,r8,r0 +- sub r6,r9,r3 /* Length until the match. */ +- cmplw r9,r7 +- bgt L(end_max) +- mr r3,r6 +- blr +- +- .align 4 +-L(zero): +- li r3,0 ++#ifdef __LITTLE_ENDIAN__ ++ addi r0,r10,-1 ++ andc r0,r0,r10 ++ popcntw r0,r0 ++#else ++ cntlzw r0,r10 /* Count leading zeros before the match. */ ++#endif ++ sub r3,r8,r3 ++ srwi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ ++ add r3,r3,r0 /* Length until the match. */ ++ cmplw r3,r4 ++ blelr ++ mr r3,r4 + blr + +-/* Deals with size <= 32. */ ++/* Deals with size <= 16. */ + .align 4 + L(small_range): + cmplwi r4,0 +- beq L(zero) ++ beq L(end_max) ++ ++ clrrwi r7,r7,2 /* Address of last word. */ + + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + lwz r12,0(r8) /* Load word from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in WORD1. */ ++#ifdef __LITTLE_ENDIAN__ ++ srw r10,r10,r6 ++ slw r10,r10,r6 ++#else + slw r10,r10,r6 + srw r10,r10,r6 ++#endif + cmplwi cr7,r10,0 + bne cr7,L(done) + +- addi r9,r8,4 +- cmplw r9,r7 +- bge L(end_max) +- b L(loop_small) ++ cmplw r8,r7 ++ beq L(end_max) + + .p2align 5 + L(loop_small): + lwzu r12,4(r8) + cmpb r10,r12,r0 +- addi r9,r8,4 + cmplwi cr6,r10,0 + bne cr6,L(done) +- cmplw r9,r7 +- bge L(end_max) +- b L(loop_small) ++ cmplw r8,r7 ++ bne L(loop_small) ++ mr r3,r4 ++ blr ++ + END (BP_SYM (__strnlen)) + weak_alias (BP_SYM (__strnlen), BP_SYM(strnlen)) + libc_hidden_builtin_def (strnlen) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strnlen.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strnlen.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strnlen.S 2014-05-28 12:40:17.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strnlen.S 2014-05-28 13:24:41.000000000 -0500 +@@ -26,33 +26,29 @@ + ENTRY (BP_SYM (__strnlen)) + CALL_MCOUNT 2 + dcbt 0,r3 +- clrrdi r8,r3,3 ++ clrrdi r8,r3,3 + add r7,r3,r4 /* Calculate the last acceptable address. */ + cmpldi r4,32 + li r0,0 /* Doubleword with null chars. */ ++ addi r7,r7,-1 ++ + /* If we have less than 33 bytes to search, skip to a faster code. */ + ble L(small_range) + +- cmpld cr7,r3,r7 /* Is the address equal or less than r3? If +- it's equal or less, it means size is either 0 +- or a negative number. */ +- ble cr7,L(proceed) +- +- li r7,-1 /* Make r11 the biggest if r4 <= 0. */ +-L(proceed): + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ ++#ifdef __LITTLE_ENDIAN__ ++ srd r10,r10,r6 ++ sld r10,r10,r6 ++#else + sld r10,r10,r6 + srd r10,r10,r6 ++#endif + cmpldi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + +- /* Are we done already? */ +- addi r9,r8,8 +- cmpld cr6,r9,r7 +- bge cr6,L(end_max) +- ++ clrrdi r7,r7,3 /* Address of last doubleword. */ + mtcrf 0x01,r8 + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ +@@ -65,17 +61,18 @@ + cmpldi cr7,r10,0 + bne cr7,L(done) + +- /* Are we done already? */ +- addi r9,r8,8 +- cmpld cr6,r9,r7 +- bge cr6,L(end_max) +- + L(loop_setup): +- sub r5,r7,r9 ++ /* The last dword we want to read in the loop below is the one ++ containing the last byte of the string, ie. the dword at ++ (s + size - 1) & ~7, or r7. The first dword read is at ++ r8 + 8, we read 2 * cnt dwords, so the last dword read will ++ be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives ++ cnt = (r7 - r8) / 16 */ ++ sub r5,r7,r8 + srdi r6,r5,4 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ +- b L(loop) +- /* Main loop to look for the null byte backwards in the string. Since ++ ++ /* Main loop to look for the null byte in the string. Since + it's a small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 + L(loop): +@@ -91,15 +88,18 @@ + cmpldi cr7,r5,0 + bne cr7,L(found) + bdnz L(loop) +- /* We're here because the counter reached 0, and that means we +- didn't have any matches for null in the whole range. Just return +- the original size. */ +- addi r9,r8,8 +- cmpld cr6,r9,r7 +- blt cr6,L(loop_small) ++ ++ /* We may have one more dword to read. */ ++ cmpld cr6,r8,r7 ++ beq cr6,L(end_max) ++ ++ ldu r12,8(r8) ++ cmpb r10,r12,r0 ++ cmpldi cr6,r10,0 ++ bne cr6,L(done) + + L(end_max): +- sub r3,r7,r3 ++ mr r3,r4 + blr + + /* OK, one (or both) of the doublewords contains a null byte. Check +@@ -121,52 +121,59 @@ + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the length. +- We need to make sure the null char is *before* the start of the +- range (since we're going backwards). */ ++ We need to make sure the null char is *before* the end of the ++ range. */ + L(done): +- cntlzd r0,r10 /* Count leading zeroes before the match. */ +- srdi r0,r0,3 /* Convert leading zeroes to bytes. */ +- add r9,r8,r0 +- sub r6,r9,r3 /* Length until the match. */ +- cmpld r9,r7 +- bgt L(end_max) +- mr r3,r6 +- blr +- +- .align 4 +-L(zero): +- li r3,0 ++#ifdef __LITTLE_ENDIAN__ ++ addi r0,r10,-1 ++ andc r0,r0,r10 ++ popcntd r0,r0 ++#else ++ cntlzd r0,r10 /* Count leading zeros before the match. */ ++#endif ++ sub r3,r8,r3 ++ srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ ++ add r3,r3,r0 /* Length until the match. */ ++ cmpld r3,r4 ++ blelr ++ mr r3,r4 + blr + + /* Deals with size <= 32. */ + .align 4 + L(small_range): + cmpldi r4,0 +- beq L(zero) ++ beq L(end_max) ++ ++ clrrdi r7,r7,3 /* Address of last doubleword. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ +- ld r12,0(r8) /* Load word from memory. */ ++ ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ ++#ifdef __LITTLE_ENDIAN__ ++ srd r10,r10,r6 ++ sld r10,r10,r6 ++#else + sld r10,r10,r6 + srd r10,r10,r6 ++#endif + cmpldi cr7,r10,0 + bne cr7,L(done) + +- addi r9,r8,8 +- cmpld r9,r7 +- bge L(end_max) +- b L(loop_small) ++ cmpld r8,r7 ++ beq L(end_max) + + .p2align 5 + L(loop_small): + ldu r12,8(r8) + cmpb r10,r12,r0 +- addi r9,r8,8 + cmpldi cr6,r10,0 + bne cr6,L(done) +- cmpld r9,r7 +- bge L(end_max) +- b L(loop_small) ++ cmpld r8,r7 ++ bne L(loop_small) ++ mr r3,r4 ++ blr ++ + END (BP_SYM (__strnlen)) + weak_alias (BP_SYM (__strnlen), BP_SYM(strnlen)) + libc_hidden_builtin_def (strnlen) diff --git a/packages/glibc/2.17/0049-glibc-ppc64le-27.patch b/packages/glibc/2.17/0049-glibc-ppc64le-27.patch new file mode 100644 index 0000000..83ca794 --- /dev/null +++ b/packages/glibc/2.17/0049-glibc-ppc64le-27.patch @@ -0,0 +1,861 @@ +# commit 8a7413f9b036da83ffde491a37d9d2340bc321a7 +# Author: Alan Modra +# Date: Sat Aug 17 18:41:17 2013 +0930 +# +# PowerPC LE strcmp and strncmp +# http://sourceware.org/ml/libc-alpha/2013-08/msg00099.html +# +# More little-endian support. I leave the main strcmp loops unchanged, +# (well, except for renumbering rTMP to something other than r0 since +# it's needed in an addi insn) and modify the tail for little-endian. +# +# I noticed some of the big-endian tail code was a little untidy so have +# cleaned that up too. +# +# * sysdeps/powerpc/powerpc64/strcmp.S (rTMP2): Define as r0. +# (rTMP): Define as r11. +# (strcmp): Add little-endian support. Optimise tail. +# * sysdeps/powerpc/powerpc32/strcmp.S: Similarly. +# * sysdeps/powerpc/powerpc64/strncmp.S: Likewise. +# * sysdeps/powerpc/powerpc32/strncmp.S: Likewise. +# * sysdeps/powerpc/powerpc64/power4/strncmp.S: Likewise. +# * sysdeps/powerpc/powerpc32/power4/strncmp.S: Likewise. +# * sysdeps/powerpc/powerpc64/power7/strncmp.S: Likewise. +# * sysdeps/powerpc/powerpc32/power7/strncmp.S: Likewise. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/strncmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/strncmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/strncmp.S 2014-05-28 13:26:59.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/strncmp.S 2014-05-28 13:27:02.000000000 -0500 +@@ -26,7 +26,7 @@ + + EALIGN (BP_SYM(strncmp), 4, 0) + +-#define rTMP r0 ++#define rTMP2 r0 + #define rRTN r3 + #define rSTR1 r3 /* first string arg */ + #define rSTR2 r4 /* second string arg */ +@@ -42,6 +42,7 @@ + #define r7F7F r9 /* constant 0x7f7f7f7f */ + #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */ + #define rBITDIF r11 /* bits that differ in s1 & s2 words */ ++#define rTMP r12 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 +@@ -80,12 +81,45 @@ + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + ++#ifdef __LITTLE_ENDIAN__ ++L(endstring): ++ slwi rTMP, rTMP, 1 ++ addi rTMP2, rTMP, -1 ++ andc rTMP2, rTMP2, rTMP ++ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ ++ and rWORD1, rWORD1, rTMP2 ++ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ ++ rlwinm rTMP, rWORD1, 8, 0xffffffff ++ rldimi rTMP2, rWORD2, 24, 32 ++ rldimi rTMP, rWORD1, 24, 32 ++ rlwimi rTMP2, rWORD2, 24, 16, 23 ++ rlwimi rTMP, rWORD1, 24, 16, 23 ++ xor. rBITDIF, rTMP, rTMP2 ++ sub rRTN, rTMP, rTMP2 ++ bgelr+ ++ ori rRTN, rTMP2, 1 ++ blr ++ ++L(different): ++ lwz rWORD1, -4(rSTR1) ++ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ ++ rlwinm rTMP, rWORD1, 8, 0xffffffff ++ rldimi rTMP2, rWORD2, 24, 32 ++ rldimi rTMP, rWORD1, 24, 32 ++ rlwimi rTMP2, rWORD2, 24, 16, 23 ++ rlwimi rTMP, rWORD1, 24, 16, 23 ++ xor. rBITDIF, rTMP, rTMP2 ++ sub rRTN, rTMP, rTMP2 ++ bgelr+ ++ ori rRTN, rTMP2, 1 ++ blr ++ ++#else + L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 +- + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzw rBITDIF, rBITDIF +@@ -93,28 +127,20 @@ + addi rNEG, rNEG, 7 + cmpw cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 +- blt- cr1, L(equal) +- srawi rRTN, rRTN, 31 +- ori rRTN, rRTN, 1 +- blr ++ bgelr+ cr1 + L(equal): + li rRTN, 0 + blr + + L(different): +- lwzu rWORD1, -4(rSTR1) ++ lwz rWORD1, -4(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 +- blt- L(highbit) +- srawi rRTN, rRTN, 31 +- ori rRTN, rRTN, 1 +- blr ++ bgelr+ + L(highbit): +- srwi rWORD2, rWORD2, 24 +- srwi rWORD1, rWORD1, 24 +- sub rRTN, rWORD1, rWORD2 ++ ori rRTN, rWORD2, 1 + blr +- ++#endif + + /* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strncmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strncmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strncmp.S 2014-05-28 13:26:59.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strncmp.S 2014-05-28 13:27:02.000000000 -0500 +@@ -28,7 +28,7 @@ + + EALIGN (BP_SYM(strncmp),5,0) + +-#define rTMP r0 ++#define rTMP2 r0 + #define rRTN r3 + #define rSTR1 r3 /* first string arg */ + #define rSTR2 r4 /* second string arg */ +@@ -44,6 +44,7 @@ + #define r7F7F r9 /* constant 0x7f7f7f7f */ + #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */ + #define rBITDIF r11 /* bits that differ in s1 & s2 words */ ++#define rTMP r12 + + dcbt 0,rSTR1 + nop +@@ -83,13 +84,45 @@ + /* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ ++#ifdef __LITTLE_ENDIAN__ ++L(endstring): ++ slwi rTMP, rTMP, 1 ++ addi rTMP2, rTMP, -1 ++ andc rTMP2, rTMP2, rTMP ++ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ ++ and rWORD1, rWORD1, rTMP2 ++ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ ++ rlwinm rTMP, rWORD1, 8, 0xffffffff ++ rldimi rTMP2, rWORD2, 24, 32 ++ rldimi rTMP, rWORD1, 24, 32 ++ rlwimi rTMP2, rWORD2, 24, 16, 23 ++ rlwimi rTMP, rWORD1, 24, 16, 23 ++ xor. rBITDIF, rTMP, rTMP2 ++ sub rRTN, rTMP, rTMP2 ++ bgelr ++ ori rRTN, rTMP2, 1 ++ blr ++ ++L(different): ++ lwz rWORD1, -4(rSTR1) ++ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ ++ rlwinm rTMP, rWORD1, 8, 0xffffffff ++ rldimi rTMP2, rWORD2, 24, 32 ++ rldimi rTMP, rWORD1, 24, 32 ++ rlwimi rTMP2, rWORD2, 24, 16, 23 ++ rlwimi rTMP, rWORD1, 24, 16, 23 ++ xor. rBITDIF, rTMP, rTMP2 ++ sub rRTN, rTMP, rTMP2 ++ bgelr ++ ori rRTN, rTMP2, 1 ++ blr + ++#else + L(endstring): + and rTMP,r7F7F,rWORD1 + beq cr1,L(equal) + add rTMP,rTMP,r7F7F + xor. rBITDIF,rWORD1,rWORD2 +- + andc rNEG,rNEG,rTMP + blt L(highbit) + cntlzw rBITDIF,rBITDIF +@@ -97,28 +130,20 @@ + addi rNEG,rNEG,7 + cmpw cr1,rNEG,rBITDIF + sub rRTN,rWORD1,rWORD2 +- blt cr1,L(equal) +- srawi rRTN,rRTN,31 +- ori rRTN,rRTN,1 +- blr ++ bgelr cr1 + L(equal): + li rRTN,0 + blr + + L(different): +- lwzu rWORD1,-4(rSTR1) ++ lwz rWORD1,-4(rSTR1) + xor. rBITDIF,rWORD1,rWORD2 + sub rRTN,rWORD1,rWORD2 +- blt L(highbit) +- srawi rRTN,rRTN,31 +- ori rRTN,rRTN,1 +- blr ++ bgelr + L(highbit): +- srwi rWORD2,rWORD2,24 +- srwi rWORD1,rWORD1,24 +- sub rRTN,rWORD1,rWORD2 ++ ori rRTN, rWORD2, 1 + blr +- ++#endif + + /* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcmp.S 2014-05-28 13:26:59.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcmp.S 2014-05-28 13:27:02.000000000 -0500 +@@ -26,7 +26,7 @@ + + EALIGN (BP_SYM (strcmp), 4, 0) + +-#define rTMP r0 ++#define rTMP2 r0 + #define rRTN r3 + #define rSTR1 r3 /* first string arg */ + #define rSTR2 r4 /* second string arg */ +@@ -40,6 +40,7 @@ + #define r7F7F r8 /* constant 0x7f7f7f7f */ + #define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */ + #define rBITDIF r10 /* bits that differ in s1 & s2 words */ ++#define rTMP r11 + + CHECK_BOUNDS_LOW (rSTR1, rTMP, rHIGH1) + CHECK_BOUNDS_LOW (rSTR2, rTMP, rHIGH2) +@@ -64,10 +65,45 @@ + and. rTMP, rTMP, rNEG + cmpw cr1, rWORD1, rWORD2 + beq+ L(g0) +-L(endstring): ++ + /* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ ++#ifdef __LITTLE_ENDIAN__ ++L(endstring): ++ addi rTMP2, rTMP, -1 ++ andc rTMP2, rTMP2, rTMP ++ rlwimi rTMP2, rTMP2, 1, 0, 30 ++ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ ++ and rWORD1, rWORD1, rTMP2 ++ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ ++ rlwinm rTMP, rWORD1, 8, 0xffffffff ++ rlwimi rTMP2, rWORD2, 24, 0, 7 ++ rlwimi rTMP, rWORD1, 24, 0, 7 ++ rlwimi rTMP2, rWORD2, 24, 16, 23 ++ rlwimi rTMP, rWORD1, 24, 16, 23 ++ xor. rBITDIF, rTMP, rTMP2 ++ sub rRTN, rTMP, rTMP2 ++ bgelr+ ++ ori rRTN, rTMP2, 1 ++ blr ++ ++L(different): ++ lwz rWORD1, -4(rSTR1) ++ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ ++ rlwinm rTMP, rWORD1, 8, 0xffffffff ++ rlwimi rTMP2, rWORD2, 24, 0, 7 ++ rlwimi rTMP, rWORD1, 24, 0, 7 ++ rlwimi rTMP2, rWORD2, 24, 16, 23 ++ rlwimi rTMP, rWORD1, 24, 16, 23 ++ xor. rBITDIF, rTMP, rTMP2 ++ sub rRTN, rTMP, rTMP2 ++ bgelr+ ++ ori rRTN, rTMP2, 1 ++ blr ++ ++#else ++L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F +@@ -94,7 +130,7 @@ + ori rRTN, rWORD2, 1 + /* GKM FIXME: check high bounds. */ + blr +- ++#endif + + /* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strncmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strncmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strncmp.S 2014-05-28 13:26:59.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strncmp.S 2014-05-28 13:27:02.000000000 -0500 +@@ -26,7 +26,7 @@ + + EALIGN (BP_SYM(strncmp), 4, 0) + +-#define rTMP r0 ++#define rTMP2 r0 + #define rRTN r3 + #define rSTR1 r3 /* first string arg */ + #define rSTR2 r4 /* second string arg */ +@@ -40,6 +40,7 @@ + #define r7F7F r9 /* constant 0x7f7f7f7f */ + #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */ + #define rBITDIF r11 /* bits that differ in s1 & s2 words */ ++#define rTMP r12 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 +@@ -78,12 +79,45 @@ + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + ++#ifdef __LITTLE_ENDIAN__ ++L(endstring): ++ slwi rTMP, rTMP, 1 ++ addi rTMP2, rTMP, -1 ++ andc rTMP2, rTMP2, rTMP ++ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ ++ and rWORD1, rWORD1, rTMP2 ++ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ ++ rlwinm rTMP, rWORD1, 8, 0xffffffff ++ rlwimi rTMP2, rWORD2, 24, 0, 7 ++ rlwimi rTMP, rWORD1, 24, 0, 7 ++ rlwimi rTMP2, rWORD2, 24, 16, 23 ++ rlwimi rTMP, rWORD1, 24, 16, 23 ++ xor. rBITDIF, rTMP, rTMP2 ++ sub rRTN, rTMP, rTMP2 ++ bgelr+ ++ ori rRTN, rTMP2, 1 ++ blr ++ ++L(different): ++ lwz rWORD1, -4(rSTR1) ++ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ ++ rlwinm rTMP, rWORD1, 8, 0xffffffff ++ rlwimi rTMP2, rWORD2, 24, 0, 7 ++ rlwimi rTMP, rWORD1, 24, 0, 7 ++ rlwimi rTMP2, rWORD2, 24, 16, 23 ++ rlwimi rTMP, rWORD1, 24, 16, 23 ++ xor. rBITDIF, rTMP, rTMP2 ++ sub rRTN, rTMP, rTMP2 ++ bgelr+ ++ ori rRTN, rTMP2, 1 ++ blr ++ ++#else + L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 +- + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzw rBITDIF, rBITDIF +@@ -91,28 +125,20 @@ + addi rNEG, rNEG, 7 + cmpw cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 +- blt- cr1, L(equal) +- srawi rRTN, rRTN, 31 +- ori rRTN, rRTN, 1 +- blr ++ bgelr+ cr1 + L(equal): + li rRTN, 0 + blr + + L(different): +- lwzu rWORD1, -4(rSTR1) ++ lwz rWORD1, -4(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 +- blt- L(highbit) +- srawi rRTN, rRTN, 31 +- ori rRTN, rRTN, 1 +- blr ++ bgelr+ + L(highbit): +- srwi rWORD2, rWORD2, 24 +- srwi rWORD1, rWORD1, 24 +- sub rRTN, rWORD1, rWORD2 ++ ori rRTN, rWORD2, 1 + blr +- ++#endif + + /* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/strncmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/strncmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/strncmp.S 2014-05-28 13:26:59.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/strncmp.S 2014-05-28 13:27:02.000000000 -0500 +@@ -27,7 +27,7 @@ + EALIGN (BP_SYM(strncmp), 4, 0) + CALL_MCOUNT 3 + +-#define rTMP r0 ++#define rTMP2 r0 + #define rRTN r3 + #define rSTR1 r3 /* first string arg */ + #define rSTR2 r4 /* second string arg */ +@@ -43,6 +43,7 @@ + #define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ + #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ + #define rBITDIF r11 /* bits that differ in s1 & s2 words */ ++#define rTMP r12 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 +@@ -84,12 +85,59 @@ + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + ++#ifdef __LITTLE_ENDIAN__ ++L(endstring): ++ addi rTMP2, rTMP, -1 ++ beq cr1, L(equal) ++ andc rTMP2, rTMP2, rTMP ++ rldimi rTMP2, rTMP2, 1, 0 ++ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ ++ and rWORD1, rWORD1, rTMP2 ++ cmpd cr1, rWORD1, rWORD2 ++ beq cr1, L(equal) ++ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ ++ neg rNEG, rBITDIF ++ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ ++ cntlzd rNEG, rNEG /* bitcount of the bit. */ ++ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ ++ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ ++ sld rWORD2, rWORD2, rNEG ++ xor. rBITDIF, rWORD1, rWORD2 ++ sub rRTN, rWORD1, rWORD2 ++ blt- L(highbit) ++ sradi rRTN, rRTN, 63 /* must return an int. */ ++ ori rRTN, rRTN, 1 ++ blr ++L(equal): ++ li rRTN, 0 ++ blr ++ ++L(different): ++ ld rWORD1, -8(rSTR1) ++ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ ++ neg rNEG, rBITDIF ++ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ ++ cntlzd rNEG, rNEG /* bitcount of the bit. */ ++ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ ++ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ ++ sld rWORD2, rWORD2, rNEG ++ xor. rBITDIF, rWORD1, rWORD2 ++ sub rRTN, rWORD1, rWORD2 ++ blt- L(highbit) ++ sradi rRTN, rRTN, 63 ++ ori rRTN, rRTN, 1 ++ blr ++L(highbit): ++ sradi rRTN, rWORD2, 63 ++ ori rRTN, rRTN, 1 ++ blr ++ ++#else + L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 +- + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzd rBITDIF, rBITDIF +@@ -98,7 +146,7 @@ + cmpd cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + blt- cr1, L(equal) +- sradi rRTN, rRTN, 63 ++ sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr + L(equal): +@@ -106,7 +154,7 @@ + blr + + L(different): +- ldu rWORD1, -8(rSTR1) ++ ld rWORD1, -8(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) +@@ -114,11 +162,10 @@ + ori rRTN, rRTN, 1 + blr + L(highbit): +- srdi rWORD2, rWORD2, 56 +- srdi rWORD1, rWORD1, 56 +- sub rRTN, rWORD1, rWORD2 ++ sradi rRTN, rWORD2, 63 ++ ori rRTN, rRTN, 1 + blr +- ++#endif + + /* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strncmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strncmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strncmp.S 2014-05-28 13:26:59.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strncmp.S 2014-05-28 13:27:02.000000000 -0500 +@@ -29,7 +29,7 @@ + EALIGN (BP_SYM(strncmp),5,0) + CALL_MCOUNT 3 + +-#define rTMP r0 ++#define rTMP2 r0 + #define rRTN r3 + #define rSTR1 r3 /* first string arg */ + #define rSTR2 r4 /* second string arg */ +@@ -45,6 +45,7 @@ + #define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ + #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ + #define rBITDIF r11 /* bits that differ in s1 & s2 words */ ++#define rTMP r12 + + dcbt 0,rSTR1 + nop +@@ -88,12 +89,57 @@ + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + ++#ifdef __LITTLE_ENDIAN__ ++L(endstring): ++ addi rTMP2, rTMP, -1 ++ beq cr1, L(equal) ++ andc rTMP2, rTMP2, rTMP ++ rldimi rTMP2, rTMP2, 1, 0 ++ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ ++ and rWORD1, rWORD1, rTMP2 ++ cmpd cr1, rWORD1, rWORD2 ++ beq cr1, L(equal) ++ cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */ ++ addi rNEG, rBITDIF, 1 ++ orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */ ++ sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */ ++ andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */ ++ andc rWORD2, rWORD2, rNEG ++ xor. rBITDIF, rWORD1, rWORD2 ++ sub rRTN, rWORD1, rWORD2 ++ blt L(highbit) ++ sradi rRTN, rRTN, 63 /* must return an int. */ ++ ori rRTN, rRTN, 1 ++ blr ++L(equal): ++ li rRTN, 0 ++ blr ++ ++L(different): ++ ld rWORD1, -8(rSTR1) ++ cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */ ++ addi rNEG, rBITDIF, 1 ++ orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */ ++ sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */ ++ andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */ ++ andc rWORD2, rWORD2, rNEG ++ xor. rBITDIF, rWORD1, rWORD2 ++ sub rRTN, rWORD1, rWORD2 ++ blt L(highbit) ++ sradi rRTN, rRTN, 63 ++ ori rRTN, rRTN, 1 ++ blr ++L(highbit): ++ sradi rRTN, rWORD2, 63 ++ ori rRTN, rRTN, 1 ++ blr ++ ++#else + L(endstring): + and rTMP,r7F7F,rWORD1 + beq cr1,L(equal) + add rTMP,rTMP,r7F7F + xor. rBITDIF,rWORD1,rWORD2 +- + andc rNEG,rNEG,rTMP + blt L(highbit) + cntlzd rBITDIF,rBITDIF +@@ -102,7 +148,7 @@ + cmpd cr1,rNEG,rBITDIF + sub rRTN,rWORD1,rWORD2 + blt cr1,L(equal) +- sradi rRTN,rRTN,63 ++ sradi rRTN,rRTN,63 /* must return an int. */ + ori rRTN,rRTN,1 + blr + L(equal): +@@ -110,7 +156,7 @@ + blr + + L(different): +- ldu rWORD1,-8(rSTR1) ++ ld rWORD1,-8(rSTR1) + xor. rBITDIF,rWORD1,rWORD2 + sub rRTN,rWORD1,rWORD2 + blt L(highbit) +@@ -118,11 +164,10 @@ + ori rRTN,rRTN,1 + blr + L(highbit): +- srdi rWORD2,rWORD2,56 +- srdi rWORD1,rWORD1,56 +- sub rRTN,rWORD1,rWORD2 ++ sradi rRTN,rWORD2,63 ++ ori rRTN,rRTN,1 + blr +- ++#endif + + /* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcmp.S 2014-05-28 13:26:59.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcmp.S 2014-05-28 13:37:15.000000000 -0500 +@@ -27,7 +27,7 @@ + EALIGN (BP_SYM(strcmp), 4, 0) + CALL_MCOUNT 2 + +-#define rTMP r0 ++#define rTMP2 r0 + #define rRTN r3 + #define rSTR1 r3 /* first string arg */ + #define rSTR2 r4 /* second string arg */ +@@ -46,6 +46,7 @@ + #define r7F7F r8 /* constant 0x7f7f7f7f7f7f7f7f */ + #define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ + #define rBITDIF r10 /* bits that differ in s1 & s2 words */ ++#define rTMP r11 + + CHECK_BOUNDS_LOW (rSTR1, rTMP, rHIGH1) + CHECK_BOUNDS_LOW (rSTR2, rTMP, rHIGH2) +@@ -72,19 +73,66 @@ + ldu rWORD2, 8(rSTR2) + L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 +- + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + beq+ L(g0) +-L(endstring): ++ + /* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ ++#ifdef __LITTLE_ENDIAN__ ++L(endstring): ++ addi rTMP2, rTMP, -1 ++ beq cr1, L(equal) ++ andc rTMP2, rTMP2, rTMP ++ rldimi rTMP2, rTMP2, 1, 0 ++ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ ++ and rWORD1, rWORD1, rTMP2 ++ cmpd cr1, rWORD1, rWORD2 ++ beq cr1, L(equal) ++ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ ++ neg rNEG, rBITDIF ++ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ ++ cntlzd rNEG, rNEG /* bitcount of the bit. */ ++ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ ++ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ ++ sld rWORD2, rWORD2, rNEG ++ xor. rBITDIF, rWORD1, rWORD2 ++ sub rRTN, rWORD1, rWORD2 ++ blt- L(highbit) ++ sradi rRTN, rRTN, 63 /* must return an int. */ ++ ori rRTN, rRTN, 1 ++ blr ++L(equal): ++ li rRTN, 0 ++ blr ++ ++L(different): ++ ld rWORD1, -8(rSTR1) ++ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ ++ neg rNEG, rBITDIF ++ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ ++ cntlzd rNEG, rNEG /* bitcount of the bit. */ ++ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ ++ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ ++ sld rWORD2, rWORD2, rNEG ++ xor. rBITDIF, rWORD1, rWORD2 ++ sub rRTN, rWORD1, rWORD2 ++ blt- L(highbit) ++ sradi rRTN, rRTN, 63 ++ ori rRTN, rRTN, 1 ++ blr ++L(highbit): ++ sradi rRTN, rWORD2, 63 ++ ori rRTN, rRTN, 1 ++ blr ++ ++#else ++L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 +- + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzd rBITDIF, rBITDIF +@@ -93,7 +141,7 @@ + cmpd cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + blt- cr1, L(equal) +- sradi rRTN, rRTN, 63 ++ sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr + L(equal): +@@ -110,12 +158,11 @@ + ori rRTN, rRTN, 1 + blr + L(highbit): +- srdi rWORD2, rWORD2, 56 +- srdi rWORD1, rWORD1, 56 +- sub rRTN, rWORD1, rWORD2 ++ sradi rRTN, rWORD2, 63 ++ ori rRTN, rRTN, 1 + /* GKM FIXME: check high bounds. */ + blr +- ++#endif + + /* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strncmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strncmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strncmp.S 2014-05-28 13:26:59.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strncmp.S 2014-05-28 13:38:31.000000000 -0500 +@@ -27,7 +27,7 @@ + EALIGN (BP_SYM(strncmp), 4, 0) + CALL_MCOUNT 3 + +-#define rTMP r0 ++#define rTMP2 r0 + #define rRTN r3 + #define rSTR1 r3 /* first string arg */ + #define rSTR2 r4 /* second string arg */ +@@ -41,6 +41,7 @@ + #define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ + #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ + #define rBITDIF r11 /* bits that differ in s1 & s2 words */ ++#define rTMP r12 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 +@@ -81,13 +82,60 @@ + /* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ +- ++ ++#ifdef __LITTLE_ENDIAN__ ++L(endstring): ++ addi rTMP2, rTMP, -1 ++ beq cr1, L(equal) ++ andc rTMP2, rTMP2, rTMP ++ rldimi rTMP2, rTMP2, 1, 0 ++ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ ++ and rWORD1, rWORD1, rTMP2 ++ cmpd cr1, rWORD1, rWORD2 ++ beq cr1, L(equal) ++ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ ++ neg rNEG, rBITDIF ++ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ ++ cntlzd rNEG, rNEG /* bitcount of the bit. */ ++ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ ++ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ ++ sld rWORD2, rWORD2, rNEG ++ xor. rBITDIF, rWORD1, rWORD2 ++ sub rRTN, rWORD1, rWORD2 ++ blt- L(highbit) ++ sradi rRTN, rRTN, 63 /* must return an int. */ ++ ori rRTN, rRTN, 1 ++ blr ++L(equal): ++ li rRTN, 0 ++ blr ++ ++L(different): ++ ld rWORD1, -8(rSTR1) ++ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ ++ neg rNEG, rBITDIF ++ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ ++ cntlzd rNEG, rNEG /* bitcount of the bit. */ ++ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ ++ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ ++ sld rWORD2, rWORD2, rNEG ++ xor. rBITDIF, rWORD1, rWORD2 ++ sub rRTN, rWORD1, rWORD2 ++ blt- L(highbit) ++ sradi rRTN, rRTN, 63 ++ ori rRTN, rRTN, 1 ++ blr ++L(highbit): ++ sradi rRTN, rWORD2, 63 ++ ori rRTN, rRTN, 1 ++ blr ++ ++#else + L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 +- + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzd rBITDIF, rBITDIF +@@ -96,7 +144,7 @@ + cmpd cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + blt- cr1, L(equal) +- sradi rRTN, rRTN, 63 ++ sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr + L(equal): +@@ -104,7 +152,7 @@ + blr + + L(different): +- ldu rWORD1, -8(rSTR1) ++ ld rWORD1, -8(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) +@@ -112,11 +160,10 @@ + ori rRTN, rRTN, 1 + blr + L(highbit): +- srdi rWORD2, rWORD2, 56 +- srdi rWORD1, rWORD1, 56 +- sub rRTN, rWORD1, rWORD2 ++ sradi rRTN, rWORD2, 63 ++ ori rRTN, rRTN, 1 + blr +- ++#endif + + /* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 diff --git a/packages/glibc/2.17/0050-glibc-ppc64le-28.patch b/packages/glibc/2.17/0050-glibc-ppc64le-28.patch new file mode 100644 index 0000000..05f63e9 --- /dev/null +++ b/packages/glibc/2.17/0050-glibc-ppc64le-28.patch @@ -0,0 +1,167 @@ +# commit 43b84013714c46e6dcae4a5564c5527777ad5e08 +# Author: Alan Modra +# Date: Sat Aug 17 18:45:31 2013 +0930 +# +# PowerPC LE strcpy +# http://sourceware.org/ml/libc-alpha/2013-08/msg00100.html +# +# The strcpy changes for little-endian are quite straight-forward, just +# a matter of rotating the last word differently. +# +# I'll note that the powerpc64 version of stpcpy is just begging to be +# converted to use 64-bit loads and stores.. +# +# * sysdeps/powerpc/powerpc64/strcpy.S: Add little-endian support: +# * sysdeps/powerpc/powerpc32/strcpy.S: Likewise. +# * sysdeps/powerpc/powerpc64/stpcpy.S: Likewise. +# * sysdeps/powerpc/powerpc32/stpcpy.S: Likewise. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/stpcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/stpcpy.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/stpcpy.S 2014-05-28 13:40:01.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/stpcpy.S 2014-05-28 13:40:01.000000000 -0500 +@@ -74,7 +74,22 @@ + + mr rALT, rWORD + /* We've hit the end of the string. Do the rest byte-by-byte. */ +-L(g1): rlwinm. rTMP, rALT, 8, 24, 31 ++L(g1): ++#ifdef __LITTLE_ENDIAN__ ++ rlwinm. rTMP, rALT, 0, 24, 31 ++ stbu rALT, 4(rDEST) ++ beqlr- ++ rlwinm. rTMP, rALT, 24, 24, 31 ++ stbu rTMP, 1(rDEST) ++ beqlr- ++ rlwinm. rTMP, rALT, 16, 24, 31 ++ stbu rTMP, 1(rDEST) ++ beqlr- ++ rlwinm rTMP, rALT, 8, 24, 31 ++ stbu rTMP, 1(rDEST) ++ blr ++#else ++ rlwinm. rTMP, rALT, 8, 24, 31 + stbu rTMP, 4(rDEST) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 +@@ -87,6 +102,7 @@ + CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt) + STORE_RETURN_VALUE (rDEST) + blr ++#endif + + /* Oh well. In this case, we just do a byte-by-byte copy. */ + .align 4 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcpy.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcpy.S 2014-05-28 13:40:01.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcpy.S 2014-05-28 13:40:01.000000000 -0500 +@@ -78,7 +78,22 @@ + + mr rALT, rWORD + /* We've hit the end of the string. Do the rest byte-by-byte. */ +-L(g1): rlwinm. rTMP, rALT, 8, 24, 31 ++L(g1): ++#ifdef __LITTLE_ENDIAN__ ++ rlwinm. rTMP, rALT, 0, 24, 31 ++ stb rALT, 4(rDEST) ++ beqlr- ++ rlwinm. rTMP, rALT, 24, 24, 31 ++ stb rTMP, 5(rDEST) ++ beqlr- ++ rlwinm. rTMP, rALT, 16, 24, 31 ++ stb rTMP, 6(rDEST) ++ beqlr- ++ rlwinm rTMP, rALT, 8, 24, 31 ++ stb rTMP, 7(rDEST) ++ blr ++#else ++ rlwinm. rTMP, rALT, 8, 24, 31 + stb rTMP, 4(rDEST) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 +@@ -90,6 +105,7 @@ + stb rALT, 7(rDEST) + /* GKM FIXME: check high bound. */ + blr ++#endif + + /* Oh well. In this case, we just do a byte-by-byte copy. */ + .align 4 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/stpcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/stpcpy.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/stpcpy.S 2014-05-28 13:40:01.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/stpcpy.S 2014-05-28 13:40:01.000000000 -0500 +@@ -75,7 +75,22 @@ + + mr rALT, rWORD + /* We've hit the end of the string. Do the rest byte-by-byte. */ +-L(g1): rlwinm. rTMP, rALT, 8, 24, 31 ++L(g1): ++#ifdef __LITTLE_ENDIAN__ ++ rlwinm. rTMP, rALT, 0, 24, 31 ++ stbu rALT, 4(rDEST) ++ beqlr- ++ rlwinm. rTMP, rALT, 24, 24, 31 ++ stbu rTMP, 1(rDEST) ++ beqlr- ++ rlwinm. rTMP, rALT, 16, 24, 31 ++ stbu rTMP, 1(rDEST) ++ beqlr- ++ rlwinm rTMP, rALT, 8, 24, 31 ++ stbu rTMP, 1(rDEST) ++ blr ++#else ++ rlwinm. rTMP, rALT, 8, 24, 31 + stbu rTMP, 4(rDEST) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 +@@ -88,6 +103,7 @@ + CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt) + STORE_RETURN_VALUE (rDEST) + blr ++#endif + + /* Oh well. In this case, we just do a byte-by-byte copy. */ + .align 4 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcpy.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcpy.S 2014-05-28 13:40:01.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcpy.S 2014-05-28 13:40:01.000000000 -0500 +@@ -90,6 +90,32 @@ + mr rALT, rWORD + /* We've hit the end of the string. Do the rest byte-by-byte. */ + L(g1): ++#ifdef __LITTLE_ENDIAN__ ++ extrdi. rTMP, rALT, 8, 56 ++ stb rALT, 8(rDEST) ++ beqlr- ++ extrdi. rTMP, rALT, 8, 48 ++ stb rTMP, 9(rDEST) ++ beqlr- ++ extrdi. rTMP, rALT, 8, 40 ++ stb rTMP, 10(rDEST) ++ beqlr- ++ extrdi. rTMP, rALT, 8, 32 ++ stb rTMP, 11(rDEST) ++ beqlr- ++ extrdi. rTMP, rALT, 8, 24 ++ stb rTMP, 12(rDEST) ++ beqlr- ++ extrdi. rTMP, rALT, 8, 16 ++ stb rTMP, 13(rDEST) ++ beqlr- ++ extrdi. rTMP, rALT, 8, 8 ++ stb rTMP, 14(rDEST) ++ beqlr- ++ extrdi rTMP, rALT, 8, 0 ++ stb rTMP, 15(rDEST) ++ blr ++#else + extrdi. rTMP, rALT, 8, 0 + stb rTMP, 8(rDEST) + beqlr- +@@ -114,6 +140,7 @@ + stb rALT, 15(rDEST) + /* GKM FIXME: check high bound. */ + blr ++#endif + + /* Oh well. In this case, we just do a byte-by-byte copy. */ + .align 4 diff --git a/packages/glibc/2.17/0051-glibc-ppc64le-29.patch b/packages/glibc/2.17/0051-glibc-ppc64le-29.patch new file mode 100644 index 0000000..291155e --- /dev/null +++ b/packages/glibc/2.17/0051-glibc-ppc64le-29.patch @@ -0,0 +1,642 @@ +# commit 664318c3eb07032e2bfcf47cb2aa3c89280c19e7 +# Author: Alan Modra +# Date: Sat Aug 17 18:46:05 2013 +0930 +# +# PowerPC LE strchr +# http://sourceware.org/ml/libc-alpha/2013-08/msg00101.html +# +# Adds little-endian support to optimised strchr assembly. I've also +# tweaked the big-endian code a little. In power7/strchr.S there's a +# check in the tail of the function that we didn't match 0 before +# finding a c match, done by comparing leading zero counts. It's just +# as valid, and quicker, to compare the raw output from cmpb. +# +# Another little tweak is to use rldimi/insrdi in place of rlwimi for +# the power7 strchr functions. Since rlwimi is cracked, it is a few +# cycles slower. rldimi can be used on the 32-bit power7 functions +# too. +# +# * sysdeps/powerpc/powerpc64/power7/strchr.S (strchr): Add little-endian +# support. Correct typos, formatting. Optimize tail. Use insrdi +# rather than rlwimi. +# * sysdeps/powerpc/powerpc32/power7/strchr.S: Likewise. +# * sysdeps/powerpc/powerpc64/power7/strchrnul.S (__strchrnul): Add +# little-endian support. Correct typos. +# * sysdeps/powerpc/powerpc32/power7/strchrnul.S: Likewise. Use insrdi +# rather than rlwimi. +# * sysdeps/powerpc/powerpc64/strchr.S (rTMP4, rTMP5): Define. Use +# in loop and entry code to keep "and." results. +# (strchr): Add little-endian support. Comment. Move cntlzd +# earlier in tail. +# * sysdeps/powerpc/powerpc32/strchr.S: Likewise. +# +Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strchr.S +=================================================================== +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strchr.S.orig ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strchr.S +@@ -37,8 +37,8 @@ ENTRY (BP_SYM(strchr)) + beq cr7,L(null_match) + + /* Replicate byte to word. */ +- rlwimi r4,r4,8,16,23 +- rlwimi r4,r4,16,0,15 ++ insrdi r4,r4,8,48 ++ insrdi r4,r4,16,32 + + /* Now r4 has a word of c bytes and r0 has + a word of null bytes. */ +@@ -48,11 +48,17 @@ ENTRY (BP_SYM(strchr)) + + /* Move the words left and right to discard the bits that are + not part of the string and to bring them back as zeros. */ +- ++#ifdef __LITTLE_ENDIAN__ ++ srw r10,r10,r6 ++ srw r11,r11,r6 ++ slw r10,r10,r6 ++ slw r11,r11,r6 ++#else + slw r10,r10,r6 + slw r11,r11,r6 + srw r10,r10,r6 + srw r11,r11,r6 ++#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpwi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ +@@ -67,7 +73,7 @@ ENTRY (BP_SYM(strchr)) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r8) +- cmpb r10,r12,r4 ++ cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpwi cr7,r5,0 +@@ -102,22 +108,31 @@ L(loop): + bne cr6,L(done) + + /* The c/null byte must be in the second word. Adjust the address +- again and move the result of cmpb to r10 so we can calculate the +- pointer. */ ++ again and move the result of cmpb to r10/r11 so we can calculate ++ the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,4 + +- /* r5 has the output of the cmpb instruction, that is, it contains ++ /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + word from the string. Use that to calculate the pointer. */ + L(done): +- cntlzw r4,r10 /* Count leading zeroes before c matches. */ +- cntlzw r0,r11 /* Count leading zeroes before null matches. */ +- cmplw cr7,r4,r0 ++#ifdef __LITTLE_ENDIAN__ ++ addi r3,r10,-1 ++ andc r3,r3,r10 ++ popcntw r0,r3 ++ addi r4,r11,-1 ++ andc r4,r4,r11 ++ cmplw cr7,r3,r4 ++ bgt cr7,L(no_match) ++#else ++ cntlzw r0,r10 /* Count leading zeros before c matches. */ ++ cmplw cr7,r11,r10 + bgt cr7,L(no_match) +- srwi r0,r4,3 /* Convert leading zeroes to bytes. */ ++#endif ++ srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + blr +@@ -135,10 +150,14 @@ L(null_match): + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the words left and right to discard the bits that are +- not part of the string and to bring them back as zeros. */ +- ++ not part of the string and bring them back as zeros. */ ++#ifdef __LITTLE_ENDIAN__ ++ srw r5,r5,r6 ++ slw r5,r5,r6 ++#else + slw r5,r5,r6 + srw r5,r5,r6 ++#endif + cmpwi cr7,r5,0 /* If r10 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) +@@ -193,7 +212,13 @@ L(loop_null): + 0xff in the same position as the null byte in the original + word from the string. Use that to calculate the pointer. */ + L(done_null): ++#ifdef __LITTLE_ENDIAN__ ++ addi r0,r5,-1 ++ andc r0,r0,r5 ++ popcntw r0,r0 ++#else + cntlzw r0,r5 /* Count leading zeros before the match. */ ++#endif + srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr +Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strchrnul.S +=================================================================== +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strchrnul.S.orig ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strchrnul.S +@@ -29,8 +29,8 @@ ENTRY (BP_SYM(__strchrnul)) + clrrwi r8,r3,2 /* Align the address to word boundary. */ + + /* Replicate byte to word. */ +- rlwimi r4,r4,8,16,23 +- rlwimi r4,r4,16,0,15 ++ insrdi r4,r4,8,48 ++ insrdi r4,r4,16,32 + + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + lwz r12,0(r8) /* Load word from memory. */ +@@ -45,10 +45,17 @@ ENTRY (BP_SYM(__strchrnul)) + + /* Move the words left and right to discard the bits that are + not part of the string and bring them back as zeros. */ ++#ifdef __LITTLE_ENDIAN__ ++ srw r10,r10,r6 ++ srw r9,r9,r6 ++ slw r10,r10,r6 ++ slw r9,r9,r6 ++#else + slw r10,r10,r6 + slw r9,r9,r6 + srw r10,r10,r6 + srw r9,r9,r6 ++#endif + or r5,r9,r10 /* OR the results to speed things up. */ + cmpwi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ +@@ -56,7 +63,7 @@ ENTRY (BP_SYM(__strchrnul)) + + mtcrf 0x01,r8 + +- /* Are we now aligned to a quadword boundary? If so, skip to ++ /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop) +@@ -78,7 +85,7 @@ L(loop): + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + lwz r12,4(r8) +- lwzu r11,8(r8) ++ lwzu r11,8(r8) + cmpb r10,r12,r0 + cmpb r9,r12,r4 + cmpb r6,r11,r0 +@@ -97,9 +104,9 @@ L(loop): + addi r8,r8,-4 + bne cr6,L(done) + +- /* The c/null byte must be in the second word. Adjust the +- address again and move the result of cmpb to r10 so we can calculate +- the pointer. */ ++ /* The c/null byte must be in the second word. Adjust the address ++ again and move the result of cmpb to r5 so we can calculate the ++ pointer. */ + mr r5,r10 + addi r8,r8,4 + +@@ -107,7 +114,13 @@ L(loop): + 0xff in the same position as the c/null byte in the original + word from the string. Use that to calculate the pointer. */ + L(done): ++#ifdef __LITTLE_ENDIAN__ ++ addi r0,r5,-1 ++ andc r0,r0,r5 ++ popcntw r0,r0 ++#else + cntlzw r0,r5 /* Count leading zeros before the match. */ ++#endif + srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of matching c/null byte. */ + blr +Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strchr.S +=================================================================== +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strchr.S.orig ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strchr.S +@@ -44,6 +44,8 @@ ENTRY (BP_SYM (strchr)) + #define rIGN r10 /* number of bits we should ignore in the first word */ + #define rMASK r11 /* mask with the bits to ignore set to 0 */ + #define rTMP3 r12 ++#define rTMP4 rIGN ++#define rTMP5 rMASK + + CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2) + STORE_RETURN_BOUNDS (rTMP1, rTMP2) +@@ -59,53 +61,74 @@ ENTRY (BP_SYM (strchr)) + addi r7F7F, r7F7F, 0x7f7f + /* Test the first (partial?) word. */ + lwz rWORD, 0(rSTR) ++#ifdef __LITTLE_ENDIAN__ ++ slw rMASK, rMASK, rIGN ++#else + srw rMASK, rMASK, rIGN ++#endif + orc rWORD, rWORD, rMASK + add rTMP1, rFEFE, rWORD + nor rTMP2, r7F7F, rWORD +- and. rTMP1, rTMP1, rTMP2 ++ and. rTMP4, rTMP1, rTMP2 + xor rTMP3, rCHR, rWORD + orc rTMP3, rTMP3, rMASK + b L(loopentry) + + /* The loop. */ + +-L(loop):lwzu rWORD, 4(rSTR) +- and. rTMP1, rTMP1, rTMP2 ++L(loop): ++ lwzu rWORD, 4(rSTR) ++ and. rTMP5, rTMP1, rTMP2 + /* Test for 0. */ +- add rTMP1, rFEFE, rWORD +- nor rTMP2, r7F7F, rWORD ++ add rTMP1, rFEFE, rWORD /* x - 0x01010101. */ ++ nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */ + bne L(foundit) +- and. rTMP1, rTMP1, rTMP2 ++ and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */ + /* Start test for the bytes we're looking for. */ + xor rTMP3, rCHR, rWORD + L(loopentry): + add rTMP1, rFEFE, rTMP3 + nor rTMP2, r7F7F, rTMP3 + beq L(loop) ++ + /* There is a zero byte in the word, but may also be a matching byte (either + before or after the zero byte). In fact, we may be looking for a +- zero byte, in which case we return a match. We guess that this hasn't +- happened, though. */ +-L(missed): +- and. rTMP1, rTMP1, rTMP2 ++ zero byte, in which case we return a match. */ ++ and. rTMP5, rTMP1, rTMP2 + li rRTN, 0 + STORE_RETURN_VALUE (rSTR) + beqlr +-/* It did happen. Decide which one was first... +- I'm not sure if this is actually faster than a sequence of +- rotates, compares, and branches (we use it anyway because it's shorter). */ ++/* At this point: ++ rTMP5 bytes are 0x80 for each match of c, 0 otherwise. ++ rTMP4 bytes are 0x80 for each match of 0, 0 otherwise. ++ But there may be false matches in the next most significant byte from ++ a true match due to carries. This means we need to recalculate the ++ matches using a longer method for big-endian. */ ++#ifdef __LITTLE_ENDIAN__ ++ addi rTMP1, rTMP5, -1 ++ andc rTMP1, rTMP1, rTMP5 ++ cntlzw rCLZB, rTMP1 ++ addi rTMP2, rTMP4, -1 ++ andc rTMP2, rTMP2, rTMP4 ++ cmplw rTMP1, rTMP2 ++ bgtlr ++ subfic rCLZB, rCLZB, 32-7 ++#else ++/* I think we could reduce this by two instructions by keeping the "nor" ++ results from the loop for reuse here. See strlen.S tail. Similarly ++ one instruction could be pruned from L(foundit). */ + and rFEFE, r7F7F, rWORD +- or rMASK, r7F7F, rWORD ++ or rTMP5, r7F7F, rWORD + and rTMP1, r7F7F, rTMP3 +- or rIGN, r7F7F, rTMP3 ++ or rTMP4, r7F7F, rTMP3 + add rFEFE, rFEFE, r7F7F + add rTMP1, rTMP1, r7F7F +- nor rWORD, rMASK, rFEFE +- nor rTMP2, rIGN, rTMP1 ++ nor rWORD, rTMP5, rFEFE ++ nor rTMP2, rTMP4, rTMP1 ++ cntlzw rCLZB, rTMP2 + cmplw rWORD, rTMP2 + bgtlr +- cntlzw rCLZB, rTMP2 ++#endif + srwi rCLZB, rCLZB, 3 + add rRTN, rSTR, rCLZB + CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge) +@@ -113,13 +136,21 @@ L(missed): + blr + + L(foundit): ++#ifdef __LITTLE_ENDIAN__ ++ addi rTMP1, rTMP5, -1 ++ andc rTMP1, rTMP1, rTMP5 ++ cntlzw rCLZB, rTMP1 ++ subfic rCLZB, rCLZB, 32-7-32 ++ srawi rCLZB, rCLZB, 3 ++#else + and rTMP1, r7F7F, rTMP3 +- or rIGN, r7F7F, rTMP3 ++ or rTMP4, r7F7F, rTMP3 + add rTMP1, rTMP1, r7F7F +- nor rTMP2, rIGN, rTMP1 ++ nor rTMP2, rTMP4, rTMP1 + cntlzw rCLZB, rTMP2 + subi rSTR, rSTR, 4 + srwi rCLZB, rCLZB, 3 ++#endif + add rRTN, rSTR, rCLZB + CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge) + STORE_RETURN_VALUE (rSTR) +Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strchr.S +=================================================================== +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strchr.S.orig ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strchr.S +@@ -37,8 +37,8 @@ ENTRY (BP_SYM(strchr)) + beq cr7,L(null_match) + + /* Replicate byte to doubleword. */ +- rlwimi r4,r4,8,16,23 +- rlwimi r4,r4,16,0,15 ++ insrdi r4,r4,8,48 ++ insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* Now r4 has a doubleword of c bytes and r0 has +@@ -49,11 +49,17 @@ ENTRY (BP_SYM(strchr)) + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +- ++#ifdef __LITTLE_ENDIAN__ ++ srd r10,r10,r6 ++ srd r11,r11,r6 ++ sld r10,r10,r6 ++ sld r11,r11,r6 ++#else + sld r10,r10,r6 + sld r11,r11,r6 + srd r10,r10,r6 + srd r11,r11,r6 ++#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ +@@ -110,15 +116,24 @@ L(loop): + mr r11,r7 + addi r8,r8,8 + +- /* r5 has the output of the cmpb instruction, that is, it contains ++ /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ + L(done): +- cntlzd r4,r10 /* Count leading zeroes before c matches. */ +- cntlzd r0,r11 /* Count leading zeroes before null matches. */ +- cmpld cr7,r4,r0 ++#ifdef __LITTLE_ENDIAN__ ++ addi r3,r10,-1 ++ andc r3,r3,r10 ++ popcntd r0,r3 ++ addi r4,r11,-1 ++ andc r4,r4,r11 ++ cmpld cr7,r3,r4 + bgt cr7,L(no_match) +- srdi r0,r4,3 /* Convert leading zeroes to bytes. */ ++#else ++ cntlzd r0,r10 /* Count leading zeros before c matches. */ ++ cmpld cr7,r11,r10 ++ bgt cr7,L(no_match) ++#endif ++ srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + blr +@@ -137,9 +152,13 @@ L(null_match): + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +- ++#ifdef __LITTLE_ENDIAN__ ++ srd r5,r5,r6 ++ sld r5,r5,r6 ++#else + sld r5,r5,r6 + srd r5,r5,r6 ++#endif + cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) +@@ -194,7 +213,13 @@ L(loop_null): + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the pointer. */ + L(done_null): ++#ifdef __LITTLE_ENDIAN__ ++ addi r0,r5,-1 ++ andc r0,r0,r5 ++ popcntd r0,r0 ++#else + cntlzd r0,r5 /* Count leading zeros before the match. */ ++#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr +Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strchrnul.S +=================================================================== +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strchrnul.S.orig ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strchrnul.S +@@ -29,8 +29,8 @@ ENTRY (BP_SYM(__strchrnul)) + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + + /* Replicate byte to doubleword. */ +- rlwimi r4,r4,8,16,23 +- rlwimi r4,r4,16,0,15 ++ insrdi r4,r4,8,48 ++ insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ +@@ -46,10 +46,17 @@ ENTRY (BP_SYM(__strchrnul)) + + /* Move the doublewords left and right to discard the bits that are + not part of the string and to bring them back as zeros. */ ++#ifdef __LITTLE_ENDIAN__ ++ srd r10,r10,r6 ++ srd r9,r9,r6 ++ sld r10,r10,r6 ++ sld r9,r9,r6 ++#else + sld r10,r10,r6 + sld r9,r9,r6 + srd r10,r10,r6 + srd r9,r9,r6 ++#endif + or r5,r9,r10 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ +@@ -99,7 +106,7 @@ L(loop): + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the +- address again and move the result of cmpb to r10 so we can calculate ++ address again and move the result of cmpb to r5 so we can calculate + the pointer. */ + mr r5,r10 + addi r8,r8,8 +@@ -108,7 +115,13 @@ L(loop): + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ + L(done): ++#ifdef __LITTLE_ENDIAN__ ++ addi r0,r5,-1 ++ andc r0,r0,r5 ++ popcntd r0,r0 ++#else + cntlzd r0,r5 /* Count leading zeros before the match. */ ++#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of matching c/null byte. */ + blr +Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strchr.S +=================================================================== +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strchr.S.orig ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strchr.S +@@ -50,14 +50,16 @@ ENTRY (BP_SYM (strchr)) + #define rIGN r10 /* number of bits we should ignore in the first word */ + #define rMASK r11 /* mask with the bits to ignore set to 0 */ + #define rTMP3 r12 ++#define rTMP4 rIGN ++#define rTMP5 rMASK + + CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2) + STORE_RETURN_BOUNDS (rTMP1, rTMP2) + + dcbt 0,rRTN +- rlwimi rCHR, rCHR, 8, 16, 23 ++ insrdi rCHR, rCHR, 8, 48 + li rMASK, -1 +- rlwimi rCHR, rCHR, 16, 0, 15 ++ insrdi rCHR, rCHR, 16, 32 + rlwinm rIGN, rRTN, 3, 26, 28 + insrdi rCHR, rCHR, 32, 0 + lis rFEFE, -0x101 +@@ -70,53 +72,74 @@ ENTRY (BP_SYM (strchr)) + add rFEFE, rFEFE, rTMP1 + /* Test the first (partial?) word. */ + ld rWORD, 0(rSTR) ++#ifdef __LITTLE_ENDIAN__ ++ sld rMASK, rMASK, rIGN ++#else + srd rMASK, rMASK, rIGN ++#endif + orc rWORD, rWORD, rMASK + add rTMP1, rFEFE, rWORD + nor rTMP2, r7F7F, rWORD +- and. rTMP1, rTMP1, rTMP2 ++ and. rTMP4, rTMP1, rTMP2 + xor rTMP3, rCHR, rWORD + orc rTMP3, rTMP3, rMASK + b L(loopentry) + + /* The loop. */ + +-L(loop):ldu rWORD, 8(rSTR) +- and. rTMP1, rTMP1, rTMP2 ++L(loop): ++ ldu rWORD, 8(rSTR) ++ and. rTMP5, rTMP1, rTMP2 + /* Test for 0. */ +- add rTMP1, rFEFE, rWORD +- nor rTMP2, r7F7F, rWORD ++ add rTMP1, rFEFE, rWORD /* x - 0x01010101. */ ++ nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */ + bne L(foundit) +- and. rTMP1, rTMP1, rTMP2 ++ and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */ + /* Start test for the bytes we're looking for. */ + xor rTMP3, rCHR, rWORD + L(loopentry): + add rTMP1, rFEFE, rTMP3 + nor rTMP2, r7F7F, rTMP3 + beq L(loop) ++ + /* There is a zero byte in the word, but may also be a matching byte (either + before or after the zero byte). In fact, we may be looking for a +- zero byte, in which case we return a match. We guess that this hasn't +- happened, though. */ +-L(missed): +- and. rTMP1, rTMP1, rTMP2 ++ zero byte, in which case we return a match. */ ++ and. rTMP5, rTMP1, rTMP2 + li rRTN, 0 + STORE_RETURN_VALUE (rSTR) + beqlr +-/* It did happen. Decide which one was first... +- I'm not sure if this is actually faster than a sequence of +- rotates, compares, and branches (we use it anyway because it's shorter). */ ++/* At this point: ++ rTMP5 bytes are 0x80 for each match of c, 0 otherwise. ++ rTMP4 bytes are 0x80 for each match of 0, 0 otherwise. ++ But there may be false matches in the next most significant byte from ++ a true match due to carries. This means we need to recalculate the ++ matches using a longer method for big-endian. */ ++#ifdef __LITTLE_ENDIAN__ ++ addi rTMP1, rTMP5, -1 ++ andc rTMP1, rTMP1, rTMP5 ++ cntlzd rCLZB, rTMP1 ++ addi rTMP2, rTMP4, -1 ++ andc rTMP2, rTMP2, rTMP4 ++ cmpld rTMP1, rTMP2 ++ bgtlr ++ subfic rCLZB, rCLZB, 64-7 ++#else ++/* I think we could reduce this by two instructions by keeping the "nor" ++ results from the loop for reuse here. See strlen.S tail. Similarly ++ one instruction could be pruned from L(foundit). */ + and rFEFE, r7F7F, rWORD +- or rMASK, r7F7F, rWORD ++ or rTMP5, r7F7F, rWORD + and rTMP1, r7F7F, rTMP3 +- or rIGN, r7F7F, rTMP3 ++ or rTMP4, r7F7F, rTMP3 + add rFEFE, rFEFE, r7F7F + add rTMP1, rTMP1, r7F7F +- nor rWORD, rMASK, rFEFE +- nor rTMP2, rIGN, rTMP1 ++ nor rWORD, rTMP5, rFEFE ++ nor rTMP2, rTMP4, rTMP1 ++ cntlzd rCLZB, rTMP2 + cmpld rWORD, rTMP2 + bgtlr +- cntlzd rCLZB, rTMP2 ++#endif + srdi rCLZB, rCLZB, 3 + add rRTN, rSTR, rCLZB + CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge) +@@ -124,13 +147,21 @@ L(missed): + blr + + L(foundit): ++#ifdef __LITTLE_ENDIAN__ ++ addi rTMP1, rTMP5, -1 ++ andc rTMP1, rTMP1, rTMP5 ++ cntlzd rCLZB, rTMP1 ++ subfic rCLZB, rCLZB, 64-7-64 ++ sradi rCLZB, rCLZB, 3 ++#else + and rTMP1, r7F7F, rTMP3 +- or rIGN, r7F7F, rTMP3 ++ or rTMP4, r7F7F, rTMP3 + add rTMP1, rTMP1, r7F7F +- nor rTMP2, rIGN, rTMP1 ++ nor rTMP2, rTMP4, rTMP1 + cntlzd rCLZB, rTMP2 + subi rSTR, rSTR, 8 + srdi rCLZB, rCLZB, 3 ++#endif + add rRTN, rSTR, rCLZB + CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge) + STORE_RETURN_VALUE (rSTR) diff --git a/packages/glibc/2.17/0052-glibc-ppc64le-30.patch b/packages/glibc/2.17/0052-glibc-ppc64le-30.patch new file mode 100644 index 0000000..3834dcc --- /dev/null +++ b/packages/glibc/2.17/0052-glibc-ppc64le-30.patch @@ -0,0 +1,7383 @@ +# commit fe6e95d7171eba5f3e07848f081676fae4e86322 +# Author: Alan Modra +# Date: Sat Aug 17 18:46:47 2013 +0930 +# +# PowerPC LE memcmp +# http://sourceware.org/ml/libc-alpha/2013-08/msg00102.html +# +# This is a rather large patch due to formatting and renaming. The +# formatting changes were to make it possible to compare power7 and +# power4 versions of memcmp. Using different register defines came +# about while I was wrestling with the code, trying to find spare +# registers at one stage. I found it much simpler if we refer to a reg +# by the same name throughout a function, so it's better if short-term +# multiple use regs like rTMP are referred to using their register +# number. I made the cr field usage changes when attempting to reload +# rWORDn regs in the exit path to byte swap before comparing when +# little-endian. That proved a bad idea due to the pipelining involved +# in the main loop; Offsets to reload the regs were different first +# time around the loop.. Anyway, I left the cr field usage changes in +# place for consistency. +# +# Aside from these more-or-less cosmetic changes, I fixed a number of +# places where an early exit path restores regs unnecessarily, removed +# some dead code, and optimised one or two exits. +# +# * sysdeps/powerpc/powerpc64/power7/memcmp.S: Add little-endian support. +# Formatting. Consistently use rXXX register defines or rN defines. +# Use early exit labels that avoid restoring unused non-volatile regs. +# Make cr field use more consistent with rWORDn compares. Rename +# regs used as shift registers for unaligned loop, using rN defines +# for short lifetime/multiple use regs. +# * sysdeps/powerpc/powerpc64/power4/memcmp.S: Likewise. +# * sysdeps/powerpc/powerpc32/power7/memcmp.S: Likewise. Exit with +# addi 1,1,64 to pop stack frame. Simplify return value code. +# * sysdeps/powerpc/powerpc32/power4/memcmp.S: Likewise. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memcmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memcmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memcmp.S 2014-05-28 19:22:37.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memcmp.S 2014-05-28 23:55:52.000000000 -0500 +@@ -1,4 +1,4 @@ +-/* Optimized strcmp implementation for PowerPC64. ++/* Optimized strcmp implementation for PowerPC32. + Copyright (C) 2003, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + +@@ -20,13 +20,14 @@ + #include + #include + +-/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ +- ++/* int [r3] memcmp (const char *s1 [r3], ++ const char *s2 [r4], ++ size_t size [r5]) */ ++ + .machine power4 + EALIGN (BP_SYM(memcmp), 4, 0) + CALL_MCOUNT + +-#define rTMP r0 + #define rRTN r3 + #define rSTR1 r3 /* first string arg */ + #define rSTR2 r4 /* second string arg */ +@@ -37,33 +38,32 @@ + #define rWORD4 r9 /* next word in s2 */ + #define rWORD5 r10 /* next word in s1 */ + #define rWORD6 r11 /* next word in s2 */ +-#define rBITDIF r12 /* bits that differ in s1 & s2 words */ + #define rWORD7 r30 /* next word in s1 */ + #define rWORD8 r31 /* next word in s2 */ + +- xor rTMP, rSTR2, rSTR1 ++ xor r0, rSTR2, rSTR1 + cmplwi cr6, rN, 0 + cmplwi cr1, rN, 12 +- clrlwi. rTMP, rTMP, 30 +- clrlwi rBITDIF, rSTR1, 30 +- cmplwi cr5, rBITDIF, 0 ++ clrlwi. r0, r0, 30 ++ clrlwi r12, rSTR1, 30 ++ cmplwi cr5, r12, 0 + beq- cr6, L(zeroLength) +- dcbt 0,rSTR1 +- dcbt 0,rSTR2 ++ dcbt 0, rSTR1 ++ dcbt 0, rSTR2 + /* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) +- stwu 1,-64(1) ++ stwu 1, -64(r1) + cfi_adjust_cfa_offset(64) +- stw r31,48(1) +- cfi_offset(31,(48-64)) +- stw r30,44(1) +- cfi_offset(30,(44-64)) ++ stw rWORD8, 48(r1) ++ cfi_offset(rWORD8, (48-64)) ++ stw rWORD7, 44(r1) ++ cfi_offset(rWORD7, (44-64)) + bne L(unaligned) + /* At this point we know both strings have the same alignment and the +- compare length is at least 8 bytes. rBITDIF contains the low order ++ compare length is at least 8 bytes. r12 contains the low order + 2 bits of rSTR1 and cr5 contains the result of the logical compare +- of rBITDIF to 0. If rBITDIF == 0 then we are already word ++ of r12 to 0. If r12 == 0 then we are already word + aligned and can perform the word aligned loop. + + Otherwise we know the two strings have the same alignment (but not +@@ -72,74 +72,95 @@ + eliminate bits preceeding the first byte. Since we want to join the + normal (word aligned) compare loop, starting at the second word, + we need to adjust the length (rN) and special case the loop +- versioning for the first word. This insures that the loop count is ++ versioning for the first word. This ensures that the loop count is + correct and the first word (shifted) is in the expected register pair. */ +- .align 4 ++ .align 4 + L(samealignment): + clrrwi rSTR1, rSTR1, 2 + clrrwi rSTR2, rSTR2, 2 + beq cr5, L(Waligned) +- add rN, rN, rBITDIF +- slwi r11, rBITDIF, 3 +- srwi rTMP, rN, 4 /* Divide by 16 */ +- andi. rBITDIF, rN, 12 /* Get the word remainder */ ++ add rN, rN, r12 ++ slwi rWORD6, r12, 3 ++ srwi r0, rN, 4 /* Divide by 16 */ ++ andi. r12, rN, 12 /* Get the word remainder */ ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +- cmplwi cr1, rBITDIF, 8 ++#endif ++ cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + beq L(dPs4) +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + + /* Remainder is 4 */ +- .align 3 ++ .align 3 + L(dsP1): +- slw rWORD5, rWORD1, r11 +- slw rWORD6, rWORD2, r11 ++ slw rWORD5, rWORD1, rWORD6 ++ slw rWORD6, rWORD2, rWORD6 + cmplw cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) + /* Do something useful in this cycle since we have to branch anyway. */ ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +- cmplw cr0, rWORD1, rWORD2 ++#endif ++ cmplw cr7, rWORD1, rWORD2 + b L(dP1e) + /* Remainder is 8 */ +- .align 4 ++ .align 4 + L(dPs2): +- slw rWORD5, rWORD1, r11 +- slw rWORD6, rWORD2, r11 ++ slw rWORD5, rWORD1, rWORD6 ++ slw rWORD6, rWORD2, rWORD6 + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) + /* Do something useful in this cycle since we have to branch anyway. */ ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) ++#endif + cmplw cr5, rWORD7, rWORD8 + b L(dP2e) + /* Remainder is 12 */ +- .align 4 ++ .align 4 + L(dPs3): +- slw rWORD3, rWORD1, r11 +- slw rWORD4, rWORD2, r11 ++ slw rWORD3, rWORD1, rWORD6 ++ slw rWORD4, rWORD2, rWORD6 + cmplw cr1, rWORD3, rWORD4 + b L(dP3e) + /* Count is a multiple of 16, remainder is 0 */ +- .align 4 ++ .align 4 + L(dPs4): +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ +- slw rWORD1, rWORD1, r11 +- slw rWORD2, rWORD2, r11 +- cmplw cr0, rWORD1, rWORD2 ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ ++ slw rWORD1, rWORD1, rWORD6 ++ slw rWORD2, rWORD2, rWORD6 ++ cmplw cr7, rWORD1, rWORD2 + b L(dP4e) + + /* At this point we know both strings are word aligned and the + compare length is at least 8 bytes. */ +- .align 4 ++ .align 4 + L(Waligned): +- andi. rBITDIF, rN, 12 /* Get the word remainder */ +- srwi rTMP, rN, 4 /* Divide by 16 */ +- cmplwi cr1, rBITDIF, 8 ++ andi. r12, rN, 12 /* Get the word remainder */ ++ srwi r0, rN, 4 /* Divide by 16 */ ++ cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + beq L(dP4) +@@ -147,177 +168,352 @@ + beq cr1, L(dP2) + + /* Remainder is 4 */ +- .align 4 ++ .align 4 + L(dP1): +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD5, 0(rSTR1) + lwz rWORD6, 0(rSTR2) ++#endif + cmplw cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +- cmplw cr0, rWORD1, rWORD2 ++#endif ++ cmplw cr7, rWORD1, rWORD2 + L(dP1e): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) ++#endif + cmplw cr1, rWORD3, rWORD4 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) ++#endif + cmplw cr6, rWORD5, rWORD6 +- bne cr5, L(dLcr5) +- bne cr0, L(dLcr0) +- ++ bne cr5, L(dLcr5x) ++ bne cr7, L(dLcr7x) ++ ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) ++#endif + bne cr1, L(dLcr1) + cmplw cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) +- lwz r30,44(1) +- lwz r31,48(1) +- .align 3 ++ lwz rWORD7, 44(r1) ++ lwz rWORD8, 48(r1) ++ .align 3 + L(dP1x): + slwi. r12, rN, 3 +- bne cr5, L(dLcr5) ++ bne cr5, L(dLcr5x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ +- lwz 1,0(1) ++ addi 1, 1, 64 ++ cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + + /* Remainder is 8 */ +- .align 4 ++ .align 4 ++ cfi_adjust_cfa_offset(64) + L(dP2): +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD5, 0(rSTR1) + lwz rWORD6, 0(rSTR2) ++#endif + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) ++#endif + cmplw cr5, rWORD7, rWORD8 + L(dP2e): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) +- cmplw cr0, rWORD1, rWORD2 ++#endif ++ cmplw cr7, rWORD1, rWORD2 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD3, 12(rSTR1) + lwz rWORD4, 12(rSTR2) ++#endif + cmplw cr1, rWORD3, rWORD4 ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 ++#endif + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) + /* Again we are on a early exit path (16-23 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ +- .align 4 ++ .align 4 + L(dP2x): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) +- cmplw cr5, rWORD3, rWORD4 ++#endif ++ cmplw cr1, rWORD3, rWORD4 + slwi. r12, rN, 3 +- bne cr6, L(dLcr6) ++ bne cr6, L(dLcr6x) ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +- bne cr5, L(dLcr5) ++#endif ++ bne cr1, L(dLcr1x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ +- lwz 1,0(1) ++ addi 1, 1, 64 ++ cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + + /* Remainder is 12 */ +- .align 4 ++ .align 4 ++ cfi_adjust_cfa_offset(64) + L(dP3): +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD3, 0(rSTR1) + lwz rWORD4, 0(rSTR2) ++#endif + cmplw cr1, rWORD3, rWORD4 + L(dP3e): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD5, 4(rSTR1) + lwz rWORD6, 4(rSTR2) ++#endif + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD7, 8(rSTR1) + lwz rWORD8, 8(rSTR2) ++#endif + cmplw cr5, rWORD7, rWORD8 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD1, 12(rSTR1) + lwz rWORD2, 12(rSTR2) +- cmplw cr0, rWORD1, rWORD2 ++#endif ++ cmplw cr7, rWORD1, rWORD2 ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 ++#endif + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) + /* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ +- .align 4 ++ .align 4 + L(dP3x): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) +- cmplw cr5, rWORD1, rWORD2 ++#endif ++ cmplw cr7, rWORD1, rWORD2 + slwi. r12, rN, 3 +- bne cr1, L(dLcr1) ++ bne cr1, L(dLcr1x) ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +- bne cr6, L(dLcr6) ++#endif ++ bne cr6, L(dLcr6x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ +- bne cr5, L(dLcr5) +- lwz 1,0(1) ++ bne cr7, L(dLcr7x) ++ addi 1, 1, 64 ++ cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + + /* Count is a multiple of 16, remainder is 0 */ +- .align 4 ++ .align 4 ++ cfi_adjust_cfa_offset(64) + L(dP4): +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +- cmplw cr0, rWORD1, rWORD2 ++#endif ++ cmplw cr7, rWORD1, rWORD2 + L(dP4e): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) ++#endif + cmplw cr1, rWORD3, rWORD4 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD5, 8(rSTR1) + lwz rWORD6, 8(rSTR2) ++#endif + cmplw cr6, rWORD5, rWORD6 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwzu rWORD7, 12(rSTR1) + lwzu rWORD8, 12(rSTR2) ++#endif + cmplw cr5, rWORD7, rWORD8 +- bne cr0, L(dLcr0) ++ bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ + /* This is the primary loop */ +- .align 4 ++ .align 4 + L(dLoop): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) ++#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + L(dLoop1): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) ++#endif + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + L(dLoop2): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) ++#endif + cmplw cr5, rWORD7, rWORD8 +- bne cr0, L(dLcr0) ++ bne cr7, L(dLcr7) + L(dLoop3): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) ++#endif + bne- cr1, L(dLcr1) +- cmplw cr0, rWORD1, rWORD2 ++ cmplw cr7, rWORD1, rWORD2 + bdnz+ L(dLoop) + + L(dL4): +@@ -327,7 +523,7 @@ + bne cr5, L(dLcr5) + cmplw cr5, rWORD7, rWORD8 + L(d44): +- bne cr0, L(dLcr0) ++ bne cr7, L(dLcr7) + L(d34): + bne cr1, L(dLcr1) + L(d24): +@@ -336,69 +532,82 @@ + slwi. r12, rN, 3 + bne cr5, L(dLcr5) + L(d04): +- lwz r30,44(1) +- lwz r31,48(1) +- lwz 1,0(1) ++ lwz rWORD7, 44(r1) ++ lwz rWORD8, 48(r1) ++ addi 1, 1, 64 ++ cfi_adjust_cfa_offset(-64) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + beq L(zeroLength) + /* At this point we have a remainder of 1 to 3 bytes to compare. Since + we are aligned it is safe to load the whole word, and use +- shift right to eliminate bits beyond the compare length. */ ++ shift right to eliminate bits beyond the compare length. */ + L(d00): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) ++#endif + srw rWORD1, rWORD1, rN + srw rWORD2, rWORD2, rN +- cmplw rWORD1,rWORD2 +- li rRTN,0 +- beqlr +- li rRTN,1 +- bgtlr +- li rRTN,-1 +- blr +- +- .align 4 +-L(dLcr0): +- lwz r30,44(1) +- lwz r31,48(1) ++ sub rRTN, rWORD1, rWORD2 ++ blr ++ ++ .align 4 ++ cfi_adjust_cfa_offset(64) ++L(dLcr7): ++ lwz rWORD7, 44(r1) ++ lwz rWORD8, 48(r1) ++L(dLcr7x): + li rRTN, 1 +- lwz 1,0(1) +- bgtlr cr0 ++ addi 1, 1, 64 ++ cfi_adjust_cfa_offset(-64) ++ bgtlr cr7 + li rRTN, -1 + blr +- .align 4 ++ .align 4 ++ cfi_adjust_cfa_offset(64) + L(dLcr1): +- lwz r30,44(1) +- lwz r31,48(1) ++ lwz rWORD7, 44(r1) ++ lwz rWORD8, 48(r1) ++L(dLcr1x): + li rRTN, 1 +- lwz 1,0(1) ++ addi 1, 1, 64 ++ cfi_adjust_cfa_offset(-64) + bgtlr cr1 + li rRTN, -1 + blr +- .align 4 ++ .align 4 ++ cfi_adjust_cfa_offset(64) + L(dLcr6): +- lwz r30,44(1) +- lwz r31,48(1) ++ lwz rWORD7, 44(r1) ++ lwz rWORD8, 48(r1) ++L(dLcr6x): + li rRTN, 1 +- lwz 1,0(1) ++ addi 1, 1, 64 ++ cfi_adjust_cfa_offset(-64) + bgtlr cr6 + li rRTN, -1 + blr +- .align 4 ++ .align 4 ++ cfi_adjust_cfa_offset(64) + L(dLcr5): +- lwz r30,44(1) +- lwz r31,48(1) ++ lwz rWORD7, 44(r1) ++ lwz rWORD8, 48(r1) + L(dLcr5x): + li rRTN, 1 +- lwz 1,0(1) ++ addi 1, 1, 64 ++ cfi_adjust_cfa_offset(-64) + bgtlr cr5 + li rRTN, -1 + blr + +- .align 4 ++ .align 4 + L(bytealigned): +- cfi_adjust_cfa_offset(-64) +- mtctr rN /* Power4 wants mtctr 1st in dispatch group */ ++ mtctr rN /* Power4 wants mtctr 1st in dispatch group */ + + /* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to +@@ -413,7 +622,7 @@ + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz- L(b11) +- cmplw cr0, rWORD1, rWORD2 ++ cmplw cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz- L(b12) +@@ -421,11 +630,11 @@ + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz- L(b13) +- .align 4 ++ .align 4 + L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) +- bne- cr0, L(bLcr0) ++ bne- cr7, L(bLcr7) + + cmplw cr6, rWORD5, rWORD6 + bdz- L(b3i) +@@ -434,7 +643,7 @@ + lbzu rWORD4, 1(rSTR2) + bne- cr1, L(bLcr1) + +- cmplw cr0, rWORD1, rWORD2 ++ cmplw cr7, rWORD1, rWORD2 + bdz- L(b2i) + + lbzu rWORD5, 1(rSTR1) +@@ -451,23 +660,23 @@ + tested. In this case we must complete the pending operations + before returning. */ + L(b1i): +- bne- cr0, L(bLcr0) ++ bne- cr7, L(bLcr7) + bne- cr1, L(bLcr1) + b L(bx56) +- .align 4 ++ .align 4 + L(b2i): + bne- cr6, L(bLcr6) +- bne- cr0, L(bLcr0) ++ bne- cr7, L(bLcr7) + b L(bx34) +- .align 4 ++ .align 4 + L(b3i): + bne- cr1, L(bLcr1) + bne- cr6, L(bLcr6) + b L(bx12) +- .align 4 +-L(bLcr0): ++ .align 4 ++L(bLcr7): + li rRTN, 1 +- bgtlr cr0 ++ bgtlr cr7 + li rRTN, -1 + blr + L(bLcr1): +@@ -482,36 +691,31 @@ + blr + + L(b13): +- bne- cr0, L(bx12) ++ bne- cr7, L(bx12) + bne- cr1, L(bx34) + L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop + L(b12): +- bne- cr0, L(bx12) ++ bne- cr7, L(bx12) + L(bx34): + sub rRTN, rWORD3, rWORD4 + blr +- + L(b11): + L(bx12): + sub rRTN, rWORD1, rWORD2 + blr +- +- .align 4 +-L(zeroLengthReturn): +- ++ .align 4 + L(zeroLength): + li rRTN, 0 + blr + +- cfi_adjust_cfa_offset(64) +- .align 4 ++ .align 4 + /* At this point we know the strings have different alignment and the +- compare length is at least 8 bytes. rBITDIF contains the low order ++ compare length is at least 8 bytes. r12 contains the low order + 2 bits of rSTR1 and cr5 contains the result of the logical compare +- of rBITDIF to 0. If rBITDIF == 0 then rStr1 is word aligned and can ++ of r12 to 0. If r12 == 0 then rStr1 is word aligned and can + perform the Wunaligned loop. + + Otherwise we know that rSTR1 is not aready word aligned yet. +@@ -520,79 +724,88 @@ + eliminate bits preceeding the first byte. Since we want to join the + normal (Wualigned) compare loop, starting at the second word, + we need to adjust the length (rN) and special case the loop +- versioning for the first W. This insures that the loop count is ++ versioning for the first W. This ensures that the loop count is + correct and the first W (shifted) is in the expected resister pair. */ + #define rSHL r29 /* Unaligned shift left count. */ + #define rSHR r28 /* Unaligned shift right count. */ +-#define rB r27 /* Left rotation temp for rWORD2. */ +-#define rD r26 /* Left rotation temp for rWORD4. */ +-#define rF r25 /* Left rotation temp for rWORD6. */ +-#define rH r24 /* Left rotation temp for rWORD8. */ +-#define rA r0 /* Right rotation temp for rWORD2. */ +-#define rC r12 /* Right rotation temp for rWORD4. */ +-#define rE r0 /* Right rotation temp for rWORD6. */ +-#define rG r12 /* Right rotation temp for rWORD8. */ ++#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ ++#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ ++#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ ++#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ ++ cfi_adjust_cfa_offset(64) + L(unaligned): +- stw r29,40(r1) +- cfi_offset(r29,(40-64)) ++ stw rSHL, 40(r1) ++ cfi_offset(rSHL, (40-64)) + clrlwi rSHL, rSTR2, 30 +- stw r28,36(r1) +- cfi_offset(r28,(36-64)) ++ stw rSHR, 36(r1) ++ cfi_offset(rSHR, (36-64)) + beq cr5, L(Wunaligned) +- stw r27,32(r1) +- cfi_offset(r27,(32-64)) ++ stw rWORD8_SHIFT, 32(r1) ++ cfi_offset(rWORD8_SHIFT, (32-64)) + /* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 W. */ +- sub r27, rSTR2, rBITDIF ++ sub rWORD8_SHIFT, rSTR2, r12 + /* But do not attempt to address the W before that W that contains + the actual start of rSTR2. */ + clrrwi rSTR2, rSTR2, 2 +- stw r26,28(r1) +- cfi_offset(r26,(28-64)) +-/* Compute the left/right shift counts for the unalign rSTR2, ++ stw rWORD2_SHIFT, 28(r1) ++ cfi_offset(rWORD2_SHIFT, (28-64)) ++/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (W aligned) start of rSTR1. */ +- clrlwi rSHL, r27, 30 ++ clrlwi rSHL, rWORD8_SHIFT, 30 + clrrwi rSTR1, rSTR1, 2 +- stw r25,24(r1) +- cfi_offset(r25,(24-64)) ++ stw rWORD4_SHIFT, 24(r1) ++ cfi_offset(rWORD4_SHIFT, (24-64)) + slwi rSHL, rSHL, 3 +- cmplw cr5, r27, rSTR2 +- add rN, rN, rBITDIF +- slwi r11, rBITDIF, 3 +- stw r24,20(r1) +- cfi_offset(r24,(20-64)) ++ cmplw cr5, rWORD8_SHIFT, rSTR2 ++ add rN, rN, r12 ++ slwi rWORD6, r12, 3 ++ stw rWORD6_SHIFT, 20(r1) ++ cfi_offset(rWORD6_SHIFT, (20-64)) + subfic rSHR, rSHL, 32 +- srwi rTMP, rN, 4 /* Divide by 16 */ +- andi. rBITDIF, rN, 12 /* Get the W remainder */ ++ srwi r0, rN, 4 /* Divide by 16 */ ++ andi. r12, rN, 12 /* Get the W remainder */ + /* We normally need to load 2 Ws to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a W where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD8, 0(rSTR2) +- la rSTR2, 4(rSTR2) ++ addi rSTR2, rSTR2, 4 ++#endif + slw rWORD8, rWORD8, rSHL + + L(dus0): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +- cmplwi cr1, rBITDIF, 8 ++#endif ++ cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 +- srw rG, rWORD2, rSHR ++ srw r12, rWORD2, rSHR + clrlwi rN, rN, 30 + beq L(duPs4) +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ +- or rWORD8, rG, rWORD8 ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ ++ or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + + /* Remainder is 4 */ +- .align 4 ++ .align 4 + L(dusP1): +- slw rB, rWORD2, rSHL +- slw rWORD7, rWORD1, r11 +- slw rWORD8, rWORD8, r11 ++ slw rWORD8_SHIFT, rWORD2, rSHL ++ slw rWORD7, rWORD1, rWORD6 ++ slw rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) + /* At this point we exit early with the first word compare + complete and remainder of 0 to 3 bytes. See L(du14) for details on +@@ -602,95 +815,133 @@ + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) +- li rA, 0 ++ li r0, 0 + ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD2, 4(rSTR2) +- srw rA, rWORD2, rSHR ++#endif ++ srw r0, rWORD2, rSHR + b L(dutrim) + /* Remainder is 8 */ +- .align 4 ++ .align 4 + L(duPs2): +- slw rH, rWORD2, rSHL +- slw rWORD5, rWORD1, r11 +- slw rWORD6, rWORD8, r11 ++ slw rWORD6_SHIFT, rWORD2, rSHL ++ slw rWORD5, rWORD1, rWORD6 ++ slw rWORD6, rWORD8, rWORD6 + b L(duP2e) + /* Remainder is 12 */ +- .align 4 ++ .align 4 + L(duPs3): +- slw rF, rWORD2, rSHL +- slw rWORD3, rWORD1, r11 +- slw rWORD4, rWORD8, r11 ++ slw rWORD4_SHIFT, rWORD2, rSHL ++ slw rWORD3, rWORD1, rWORD6 ++ slw rWORD4, rWORD8, rWORD6 + b L(duP3e) + /* Count is a multiple of 16, remainder is 0 */ +- .align 4 ++ .align 4 + L(duPs4): +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ +- or rWORD8, rG, rWORD8 +- slw rD, rWORD2, rSHL +- slw rWORD1, rWORD1, r11 +- slw rWORD2, rWORD8, r11 ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ ++ or rWORD8, r12, rWORD8 ++ slw rWORD2_SHIFT, rWORD2, rSHL ++ slw rWORD1, rWORD1, rWORD6 ++ slw rWORD2, rWORD8, rWORD6 + b L(duP4e) + + /* At this point we know rSTR1 is word aligned and the + compare length is at least 8 bytes. */ +- .align 4 ++ .align 4 + L(Wunaligned): +- stw r27,32(r1) +- cfi_offset(r27,(32-64)) ++ stw rWORD8_SHIFT, 32(r1) ++ cfi_offset(rWORD8_SHIFT, (32-64)) + clrrwi rSTR2, rSTR2, 2 +- stw r26,28(r1) +- cfi_offset(r26,(28-64)) +- srwi rTMP, rN, 4 /* Divide by 16 */ +- stw r25,24(r1) +- cfi_offset(r25,(24-64)) +- andi. rBITDIF, rN, 12 /* Get the W remainder */ +- stw r24,20(r1) +- cfi_offset(r24,(20-64)) ++ stw rWORD2_SHIFT, 28(r1) ++ cfi_offset(rWORD2_SHIFT, (28-64)) ++ srwi r0, rN, 4 /* Divide by 16 */ ++ stw rWORD4_SHIFT, 24(r1) ++ cfi_offset(rWORD4_SHIFT, (24-64)) ++ andi. r12, rN, 12 /* Get the W remainder */ ++ stw rWORD6_SHIFT, 20(r1) ++ cfi_offset(rWORD6_SHIFT, (20-64)) + slwi rSHL, rSHL, 3 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD6, 0(rSTR2) + lwzu rWORD8, 4(rSTR2) +- cmplwi cr1, rBITDIF, 8 ++#endif ++ cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + subfic rSHR, rSHL, 32 +- slw rH, rWORD6, rSHL ++ slw rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(duP3) + beq cr1, L(duP2) + + /* Remainder is 4 */ +- .align 4 ++ .align 4 + L(duP1): +- srw rG, rWORD8, rSHR ++ srw r12, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ addi rSTR1, rSTR1, 4 ++#else + lwz rWORD7, 0(rSTR1) +- slw rB, rWORD8, rSHL +- or rWORD8, rG, rH ++#endif ++ slw rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) + L(duP1e): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) ++#endif + cmplw cr5, rWORD7, rWORD8 +- srw rA, rWORD2, rSHR +- slw rD, rWORD2, rSHL +- or rWORD2, rA, rB ++ srw r0, rWORD2, rSHR ++ slw rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +- cmplw cr0, rWORD1, rWORD2 +- srw rC, rWORD4, rSHR +- slw rF, rWORD4, rSHL ++#endif ++ cmplw cr7, rWORD1, rWORD2 ++ srw r12, rWORD4, rSHR ++ slw rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) +- or rWORD4, rC, rD ++ or rWORD4, r12, rWORD2_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) ++#endif + cmplw cr1, rWORD3, rWORD4 +- srw rE, rWORD6, rSHR +- slw rH, rWORD6, rSHL +- bne cr0, L(duLcr0) +- or rWORD6, rE, rF ++ srw r0, rWORD6, rSHR ++ slw rWORD6_SHIFT, rWORD6, rSHL ++ bne cr7, L(duLcr7) ++ or rWORD6, r0, rWORD4_SHIFT + cmplw cr6, rWORD5, rWORD6 + b L(duLoop3) +- .align 4 ++ .align 4 + /* At this point we exit early with the first word compare + complete and remainder of 0 to 3 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +@@ -700,186 +951,321 @@ + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) +- li rA, 0 ++ li r0, 0 + ble cr7, L(dutrim) +- ld rWORD2, 8(rSTR2) +- srw rA, rWORD2, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD2, 8(rSTR2) ++#endif ++ srw r0, rWORD2, rSHR + b L(dutrim) + /* Remainder is 8 */ +- .align 4 ++ .align 4 + L(duP2): +- srw rE, rWORD8, rSHR ++ srw r0, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ addi rSTR1, rSTR1, 4 ++#else + lwz rWORD5, 0(rSTR1) +- or rWORD6, rE, rH +- slw rH, rWORD8, rSHL ++#endif ++ or rWORD6, r0, rWORD6_SHIFT ++ slw rWORD6_SHIFT, rWORD8, rSHL + L(duP2e): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) ++#endif + cmplw cr6, rWORD5, rWORD6 +- srw rG, rWORD8, rSHR +- slw rB, rWORD8, rSHL +- or rWORD8, rG, rH ++ srw r12, rWORD8, rSHR ++ slw rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) ++#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) +- srw rA, rWORD2, rSHR +- slw rD, rWORD2, rSHL +- or rWORD2, rA, rB ++ srw r0, rWORD2, rSHR ++ slw rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD3, 12(rSTR1) + lwz rWORD4, 12(rSTR2) +- cmplw cr0, rWORD1, rWORD2 ++#endif ++ cmplw cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) +- srw rC, rWORD4, rSHR +- slw rF, rWORD4, rSHL +- or rWORD4, rC, rD ++ srw r12, rWORD4, rSHR ++ slw rWORD4_SHIFT, rWORD4, rSHL ++ or rWORD4, r12, rWORD2_SHIFT ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 ++#endif + cmplw cr1, rWORD3, rWORD4 + b L(duLoop2) +- .align 4 ++ .align 4 + L(duP2x): + cmplw cr5, rWORD7, rWORD8 ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 ++#endif + bne cr6, L(duLcr6) + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) +- li rA, 0 ++ li r0, 0 + ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD2, 4(rSTR2) +- srw rA, rWORD2, rSHR ++#endif ++ srw r0, rWORD2, rSHR + b L(dutrim) + + /* Remainder is 12 */ +- .align 4 ++ .align 4 + L(duP3): +- srw rC, rWORD8, rSHR ++ srw r12, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ addi rSTR1, rSTR1, 4 ++#else + lwz rWORD3, 0(rSTR1) +- slw rF, rWORD8, rSHL +- or rWORD4, rC, rH ++#endif ++ slw rWORD4_SHIFT, rWORD8, rSHL ++ or rWORD4, r12, rWORD6_SHIFT + L(duP3e): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD5, 4(rSTR1) + lwz rWORD6, 4(rSTR2) ++#endif + cmplw cr1, rWORD3, rWORD4 +- srw rE, rWORD6, rSHR +- slw rH, rWORD6, rSHL +- or rWORD6, rE, rF ++ srw r0, rWORD6, rSHR ++ slw rWORD6_SHIFT, rWORD6, rSHL ++ or rWORD6, r0, rWORD4_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD7, 8(rSTR1) + lwz rWORD8, 8(rSTR2) ++#endif + cmplw cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) +- srw rG, rWORD8, rSHR +- slw rB, rWORD8, rSHL +- or rWORD8, rG, rH ++ srw r12, rWORD8, rSHR ++ slw rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD1, 12(rSTR1) + lwz rWORD2, 12(rSTR2) ++#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) +- srw rA, rWORD2, rSHR +- slw rD, rWORD2, rSHL +- or rWORD2, rA, rB ++ srw r0, rWORD2, rSHR ++ slw rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +- cmplw cr0, rWORD1, rWORD2 ++#endif ++ cmplw cr7, rWORD1, rWORD2 + b L(duLoop1) +- .align 4 ++ .align 4 + L(duP3x): ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 ++#endif ++#if 0 ++/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) ++#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) +- li rA, 0 ++ li r0, 0 + ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD2, 4(rSTR2) +- srw rA, rWORD2, rSHR ++#endif ++ srw r0, rWORD2, rSHR + b L(dutrim) + + /* Count is a multiple of 16, remainder is 0 */ +- .align 4 ++ .align 4 + L(duP4): +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ +- srw rA, rWORD8, rSHR ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ ++ srw r0, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ addi rSTR1, rSTR1, 4 ++#else + lwz rWORD1, 0(rSTR1) +- slw rD, rWORD8, rSHL +- or rWORD2, rA, rH ++#endif ++ slw rWORD2_SHIFT, rWORD8, rSHL ++ or rWORD2, r0, rWORD6_SHIFT + L(duP4e): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) +- cmplw cr0, rWORD1, rWORD2 +- srw rC, rWORD4, rSHR +- slw rF, rWORD4, rSHL +- or rWORD4, rC, rD ++#endif ++ cmplw cr7, rWORD1, rWORD2 ++ srw r12, rWORD4, rSHR ++ slw rWORD4_SHIFT, rWORD4, rSHL ++ or rWORD4, r12, rWORD2_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD5, 8(rSTR1) + lwz rWORD6, 8(rSTR2) ++#endif + cmplw cr1, rWORD3, rWORD4 +- bne cr0, L(duLcr0) +- srw rE, rWORD6, rSHR +- slw rH, rWORD6, rSHL +- or rWORD6, rE, rF ++ bne cr7, L(duLcr7) ++ srw r0, rWORD6, rSHR ++ slw rWORD6_SHIFT, rWORD6, rSHL ++ or rWORD6, r0, rWORD4_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwzu rWORD7, 12(rSTR1) + lwzu rWORD8, 12(rSTR2) ++#endif + cmplw cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) +- srw rG, rWORD8, rSHR +- slw rB, rWORD8, rSHL +- or rWORD8, rG, rH ++ srw r12, rWORD8, rSHR ++ slw rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT + cmplw cr5, rWORD7, rWORD8 + bdz- L(du24) /* Adjust CTR as we start with +4 */ + /* This is the primary loop */ +- .align 4 ++ .align 4 + L(duLoop): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) ++#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) +- srw rA, rWORD2, rSHR +- slw rD, rWORD2, rSHL +- or rWORD2, rA, rB ++ srw r0, rWORD2, rSHR ++ slw rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT + L(duLoop1): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) ++#endif + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) +- srw rC, rWORD4, rSHR +- slw rF, rWORD4, rSHL +- or rWORD4, rC, rD ++ srw r12, rWORD4, rSHR ++ slw rWORD4_SHIFT, rWORD4, rSHL ++ or rWORD4, r12, rWORD2_SHIFT + L(duLoop2): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) ++#endif + cmplw cr5, rWORD7, rWORD8 +- bne cr0, L(duLcr0) +- srw rE, rWORD6, rSHR +- slw rH, rWORD6, rSHL +- or rWORD6, rE, rF ++ bne cr7, L(duLcr7) ++ srw r0, rWORD6, rSHR ++ slw rWORD6_SHIFT, rWORD6, rSHL ++ or rWORD6, r0, rWORD4_SHIFT + L(duLoop3): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) +- cmplw cr0, rWORD1, rWORD2 ++#endif ++ cmplw cr7, rWORD1, rWORD2 + bne- cr1, L(duLcr1) +- srw rG, rWORD8, rSHR +- slw rB, rWORD8, rSHL +- or rWORD8, rG, rH ++ srw r12, rWORD8, rSHR ++ slw rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT + bdnz+ L(duLoop) + + L(duL4): ++#if 0 ++/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) ++#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmplw cr5, rWORD7, rWORD8 + L(du44): +- bne cr0, L(duLcr0) ++ bne cr7, L(duLcr7) + L(du34): + bne cr1, L(duLcr1) + L(du24): +@@ -889,95 +1275,101 @@ + bne cr5, L(duLcr5) + /* At this point we have a remainder of 1 to 3 bytes to compare. We use + shift right to eliminate bits beyond the compare length. ++ This allows the use of word subtract to compute the final result. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in +- rB). */ ++ rWORD8_SHIFT). */ + cmplw cr7, rN, rSHR + beq L(duZeroReturn) +- li rA, 0 ++ li r0, 0 + ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++#else + lwz rWORD2, 4(rSTR2) +- srw rA, rWORD2, rSHR +- .align 4 ++#endif ++ srw r0, rWORD2, rSHR ++ .align 4 + L(dutrim): ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++#else + lwz rWORD1, 4(rSTR1) +- lwz r31,48(1) ++#endif ++ lwz rWORD8, 48(r1) + subfic rN, rN, 32 /* Shift count is 32 - (rN * 8). */ +- or rWORD2, rA, rB +- lwz r30,44(1) +- lwz r29,40(r1) ++ or rWORD2, r0, rWORD8_SHIFT ++ lwz rWORD7, 44(r1) ++ lwz rSHL, 40(r1) + srw rWORD1, rWORD1, rN + srw rWORD2, rWORD2, rN +- lwz r28,36(r1) +- lwz r27,32(r1) +- cmplw rWORD1,rWORD2 +- li rRTN,0 +- beq L(dureturn26) +- li rRTN,1 +- bgt L(dureturn26) +- li rRTN,-1 +- b L(dureturn26) +- .align 4 +-L(duLcr0): +- lwz r31,48(1) +- lwz r30,44(1) +- li rRTN, 1 +- bgt cr0, L(dureturn29) +- lwz r29,40(r1) +- lwz r28,36(r1) ++ lwz rSHR, 36(r1) ++ lwz rWORD8_SHIFT, 32(r1) ++ sub rRTN, rWORD1, rWORD2 ++ b L(dureturn26) ++ .align 4 ++L(duLcr7): ++ lwz rWORD8, 48(r1) ++ lwz rWORD7, 44(r1) ++ li rRTN, 1 ++ bgt cr7, L(dureturn29) ++ lwz rSHL, 40(r1) ++ lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) +- .align 4 ++ .align 4 + L(duLcr1): +- lwz r31,48(1) +- lwz r30,44(1) ++ lwz rWORD8, 48(r1) ++ lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr1, L(dureturn29) +- lwz r29,40(r1) +- lwz r28,36(r1) ++ lwz rSHL, 40(r1) ++ lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) +- .align 4 ++ .align 4 + L(duLcr6): +- lwz r31,48(1) +- lwz r30,44(1) ++ lwz rWORD8, 48(r1) ++ lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr6, L(dureturn29) +- lwz r29,40(r1) +- lwz r28,36(r1) ++ lwz rSHL, 40(r1) ++ lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) +- .align 4 ++ .align 4 + L(duLcr5): +- lwz r31,48(1) +- lwz r30,44(1) ++ lwz rWORD8, 48(r1) ++ lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr5, L(dureturn29) +- lwz r29,40(r1) +- lwz r28,36(r1) ++ lwz rSHL, 40(r1) ++ lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 3 + L(duZeroReturn): +- li rRTN,0 ++ li rRTN, 0 + .align 4 + L(dureturn): +- lwz r31,48(1) +- lwz r30,44(1) ++ lwz rWORD8, 48(r1) ++ lwz rWORD7, 44(r1) + L(dureturn29): +- lwz r29,40(r1) +- lwz r28,36(r1) ++ lwz rSHL, 40(r1) ++ lwz rSHR, 36(r1) + L(dureturn27): +- lwz r27,32(r1) ++ lwz rWORD8_SHIFT, 32(r1) + L(dureturn26): +- lwz r26,28(r1) ++ lwz rWORD2_SHIFT, 28(r1) + L(dureturn25): +- lwz r25,24(r1) +- lwz r24,20(r1) +- lwz 1,0(1) ++ lwz rWORD4_SHIFT, 24(r1) ++ lwz rWORD6_SHIFT, 20(r1) ++ addi 1, 1, 64 ++ cfi_adjust_cfa_offset(-64) + blr + END (BP_SYM (memcmp)) + +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memcmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memcmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memcmp.S 2014-05-28 19:22:37.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memcmp.S 2014-05-28 21:44:57.000000000 -0500 +@@ -25,10 +25,9 @@ + size_t size [r5]) */ + + .machine power7 +-EALIGN (BP_SYM(memcmp),4,0) ++EALIGN (BP_SYM(memcmp), 4, 0) + CALL_MCOUNT + +-#define rTMP r0 + #define rRTN r3 + #define rSTR1 r3 /* first string arg */ + #define rSTR2 r4 /* second string arg */ +@@ -39,35 +38,32 @@ + #define rWORD4 r9 /* next word in s2 */ + #define rWORD5 r10 /* next word in s1 */ + #define rWORD6 r11 /* next word in s2 */ +-#define rBITDIF r12 /* bits that differ in s1 & s2 words */ + #define rWORD7 r30 /* next word in s1 */ + #define rWORD8 r31 /* next word in s2 */ + +- xor rTMP,rSTR2,rSTR1 +- cmplwi cr6,rN,0 +- cmplwi cr1,rN,12 +- clrlwi. rTMP,rTMP,30 +- clrlwi rBITDIF,rSTR1,30 +- cmplwi cr5,rBITDIF,0 +- beq- cr6,L(zeroLength) +- dcbt 0,rSTR1 +- dcbt 0,rSTR2 +- +- /* If less than 8 bytes or not aligned, use the unaligned +- byte loop. */ +- +- blt cr1,L(bytealigned) +- stwu 1,-64(1) ++ xor r0, rSTR2, rSTR1 ++ cmplwi cr6, rN, 0 ++ cmplwi cr1, rN, 12 ++ clrlwi. r0, r0, 30 ++ clrlwi r12, rSTR1, 30 ++ cmplwi cr5, r12, 0 ++ beq- cr6, L(zeroLength) ++ dcbt 0, rSTR1 ++ dcbt 0, rSTR2 ++/* If less than 8 bytes or not aligned, use the unaligned ++ byte loop. */ ++ blt cr1, L(bytealigned) ++ stwu 1, -64(r1) + cfi_adjust_cfa_offset(64) +- stw r31,48(1) +- cfi_offset(31,(48-64)) +- stw r30,44(1) +- cfi_offset(30,(44-64)) ++ stw rWORD8, 48(r1) ++ cfi_offset(rWORD8, (48-64)) ++ stw rWORD7, 44(r1) ++ cfi_offset(rWORD7, (44-64)) + bne L(unaligned) + /* At this point we know both strings have the same alignment and the +- compare length is at least 8 bytes. rBITDIF contains the low order ++ compare length is at least 8 bytes. r12 contains the low order + 2 bits of rSTR1 and cr5 contains the result of the logical compare +- of rBITDIF to 0. If rBITDIF == 0 then we are already word ++ of r12 to 0. If r12 == 0 then we are already word + aligned and can perform the word aligned loop. + + Otherwise we know the two strings have the same alignment (but not +@@ -76,332 +72,541 @@ + eliminate bits preceeding the first byte. Since we want to join the + normal (word aligned) compare loop, starting at the second word, + we need to adjust the length (rN) and special case the loop +- versioning for the first word. This insures that the loop count is ++ versioning for the first word. This ensures that the loop count is + correct and the first word (shifted) is in the expected register pair. */ + .align 4 + L(samealignment): +- clrrwi rSTR1,rSTR1,2 +- clrrwi rSTR2,rSTR2,2 +- beq cr5,L(Waligned) +- add rN,rN,rBITDIF +- slwi r11,rBITDIF,3 +- srwi rTMP,rN,4 /* Divide by 16 */ +- andi. rBITDIF,rN,12 /* Get the word remainder */ +- lwz rWORD1,0(rSTR1) +- lwz rWORD2,0(rSTR2) +- cmplwi cr1,rBITDIF,8 +- cmplwi cr7,rN,16 +- clrlwi rN,rN,30 ++ clrrwi rSTR1, rSTR1, 2 ++ clrrwi rSTR2, rSTR2, 2 ++ beq cr5, L(Waligned) ++ add rN, rN, r12 ++ slwi rWORD6, r12, 3 ++ srwi r0, rN, 4 /* Divide by 16 */ ++ andi. r12, rN, 12 /* Get the word remainder */ ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD1, 0(rSTR1) ++ lwz rWORD2, 0(rSTR2) ++#endif ++ cmplwi cr1, r12, 8 ++ cmplwi cr7, rN, 16 ++ clrlwi rN, rN, 30 + beq L(dPs4) +- mtctr rTMP +- bgt cr1,L(dPs3) +- beq cr1,L(dPs2) ++ mtctr r0 ++ bgt cr1, L(dPs3) ++ beq cr1, L(dPs2) + + /* Remainder is 4 */ + .align 3 + L(dsP1): +- slw rWORD5,rWORD1,r11 +- slw rWORD6,rWORD2,r11 +- cmplw cr5,rWORD5,rWORD6 +- blt cr7,L(dP1x) ++ slw rWORD5, rWORD1, rWORD6 ++ slw rWORD6, rWORD2, rWORD6 ++ cmplw cr5, rWORD5, rWORD6 ++ blt cr7, L(dP1x) + /* Do something useful in this cycle since we have to branch anyway. */ +- lwz rWORD1,4(rSTR1) +- lwz rWORD2,4(rSTR2) +- cmplw cr0,rWORD1,rWORD2 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD1, 4(rSTR1) ++ lwz rWORD2, 4(rSTR2) ++#endif ++ cmplw cr7, rWORD1, rWORD2 + b L(dP1e) + /* Remainder is 8 */ + .align 4 + L(dPs2): +- slw rWORD5,rWORD1,r11 +- slw rWORD6,rWORD2,r11 +- cmplw cr6,rWORD5,rWORD6 +- blt cr7,L(dP2x) ++ slw rWORD5, rWORD1, rWORD6 ++ slw rWORD6, rWORD2, rWORD6 ++ cmplw cr6, rWORD5, rWORD6 ++ blt cr7, L(dP2x) + /* Do something useful in this cycle since we have to branch anyway. */ +- lwz rWORD7,4(rSTR1) +- lwz rWORD8,4(rSTR2) +- cmplw cr5,rWORD7,rWORD8 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD7, 4(rSTR1) ++ lwz rWORD8, 4(rSTR2) ++#endif ++ cmplw cr5, rWORD7, rWORD8 + b L(dP2e) + /* Remainder is 12 */ + .align 4 + L(dPs3): +- slw rWORD3,rWORD1,r11 +- slw rWORD4,rWORD2,r11 +- cmplw cr1,rWORD3,rWORD4 ++ slw rWORD3, rWORD1, rWORD6 ++ slw rWORD4, rWORD2, rWORD6 ++ cmplw cr1, rWORD3, rWORD4 + b L(dP3e) + /* Count is a multiple of 16, remainder is 0 */ + .align 4 + L(dPs4): +- mtctr rTMP +- slw rWORD1,rWORD1,r11 +- slw rWORD2,rWORD2,r11 +- cmplw cr0,rWORD1,rWORD2 ++ mtctr r0 ++ slw rWORD1, rWORD1, rWORD6 ++ slw rWORD2, rWORD2, rWORD6 ++ cmplw cr7, rWORD1, rWORD2 + b L(dP4e) + + /* At this point we know both strings are word aligned and the + compare length is at least 8 bytes. */ + .align 4 + L(Waligned): +- andi. rBITDIF,rN,12 /* Get the word remainder */ +- srwi rTMP,rN,4 /* Divide by 16 */ +- cmplwi cr1,rBITDIF,8 +- cmplwi cr7,rN,16 +- clrlwi rN,rN,30 ++ andi. r12, rN, 12 /* Get the word remainder */ ++ srwi r0, rN, 4 /* Divide by 16 */ ++ cmplwi cr1, r12, 8 ++ cmplwi cr7, rN, 16 ++ clrlwi rN, rN, 30 + beq L(dP4) +- bgt cr1,L(dP3) +- beq cr1,L(dP2) ++ bgt cr1, L(dP3) ++ beq cr1, L(dP2) + + /* Remainder is 4 */ + .align 4 + L(dP1): +- mtctr rTMP ++ mtctr r0 + /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ +- lwz rWORD5,0(rSTR1) +- lwz rWORD6,0(rSTR2) +- cmplw cr5,rWORD5,rWORD6 +- blt cr7,L(dP1x) +- lwz rWORD1,4(rSTR1) +- lwz rWORD2,4(rSTR2) +- cmplw cr0,rWORD1,rWORD2 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD5, 0(rSTR1) ++ lwz rWORD6, 0(rSTR2) ++#endif ++ cmplw cr5, rWORD5, rWORD6 ++ blt cr7, L(dP1x) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD1, 4(rSTR1) ++ lwz rWORD2, 4(rSTR2) ++#endif ++ cmplw cr7, rWORD1, rWORD2 + L(dP1e): +- lwz rWORD3,8(rSTR1) +- lwz rWORD4,8(rSTR2) +- cmplw cr1,rWORD3,rWORD4 +- lwz rWORD5,12(rSTR1) +- lwz rWORD6,12(rSTR2) +- cmplw cr6,rWORD5,rWORD6 +- bne cr5,L(dLcr5) +- bne cr0,L(dLcr0) +- +- lwzu rWORD7,16(rSTR1) +- lwzu rWORD8,16(rSTR2) +- bne cr1,L(dLcr1) +- cmplw cr5,rWORD7,rWORD8 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD3, 8(rSTR1) ++ lwz rWORD4, 8(rSTR2) ++#endif ++ cmplw cr1, rWORD3, rWORD4 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD5, 12(rSTR1) ++ lwz rWORD6, 12(rSTR2) ++#endif ++ cmplw cr6, rWORD5, rWORD6 ++ bne cr5, L(dLcr5x) ++ bne cr7, L(dLcr7x) ++ ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwzu rWORD7, 16(rSTR1) ++ lwzu rWORD8, 16(rSTR2) ++#endif ++ bne cr1, L(dLcr1) ++ cmplw cr5, rWORD7, rWORD8 + bdnz L(dLoop) +- bne cr6,L(dLcr6) +- lwz r30,44(1) +- lwz r31,48(1) ++ bne cr6, L(dLcr6) ++ lwz rWORD7, 44(r1) ++ lwz rWORD8, 48(r1) + .align 3 + L(dP1x): +- slwi. r12,rN,3 +- bne cr5,L(dLcr5) +- subfic rN,r12,32 /* Shift count is 32 - (rN * 8). */ +- lwz 1,0(1) ++ slwi. r12, rN, 3 ++ bne cr5, L(dLcr5x) ++ subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ ++ addi r1, r1, 64 ++ cfi_adjust_cfa_offset(-64) + bne L(d00) +- li rRTN,0 ++ li rRTN, 0 + blr + + /* Remainder is 8 */ + .align 4 ++ cfi_adjust_cfa_offset(64) + L(dP2): +- mtctr rTMP +- lwz rWORD5,0(rSTR1) +- lwz rWORD6,0(rSTR2) +- cmplw cr6,rWORD5,rWORD6 +- blt cr7,L(dP2x) +- lwz rWORD7,4(rSTR1) +- lwz rWORD8,4(rSTR2) +- cmplw cr5,rWORD7,rWORD8 ++ mtctr r0 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD5, 0(rSTR1) ++ lwz rWORD6, 0(rSTR2) ++#endif ++ cmplw cr6, rWORD5, rWORD6 ++ blt cr7, L(dP2x) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD7, 4(rSTR1) ++ lwz rWORD8, 4(rSTR2) ++#endif ++ cmplw cr5, rWORD7, rWORD8 + L(dP2e): +- lwz rWORD1,8(rSTR1) +- lwz rWORD2,8(rSTR2) +- cmplw cr0,rWORD1,rWORD2 +- lwz rWORD3,12(rSTR1) +- lwz rWORD4,12(rSTR2) +- cmplw cr1,rWORD3,rWORD4 +- addi rSTR1,rSTR1,4 +- addi rSTR2,rSTR2,4 +- bne cr6,L(dLcr6) +- bne cr5,L(dLcr5) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD1, 8(rSTR1) ++ lwz rWORD2, 8(rSTR2) ++#endif ++ cmplw cr7, rWORD1, rWORD2 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD3, 12(rSTR1) ++ lwz rWORD4, 12(rSTR2) ++#endif ++ cmplw cr1, rWORD3, rWORD4 ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#endif ++ bne cr6, L(dLcr6) ++ bne cr5, L(dLcr5) + b L(dLoop2) + /* Again we are on a early exit path (16-23 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 + L(dP2x): +- lwz rWORD3,4(rSTR1) +- lwz rWORD4,4(rSTR2) +- cmplw cr5,rWORD3,rWORD4 +- slwi. r12,rN,3 +- bne cr6,L(dLcr6) +- addi rSTR1,rSTR1,4 +- addi rSTR2,rSTR2,4 +- bne cr5,L(dLcr5) +- subfic rN,r12,32 /* Shift count is 32 - (rN * 8). */ +- lwz 1,0(1) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD3, 4(rSTR1) ++ lwz rWORD4, 4(rSTR2) ++#endif ++ cmplw cr1, rWORD3, rWORD4 ++ slwi. r12, rN, 3 ++ bne cr6, L(dLcr6x) ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#endif ++ bne cr1, L(dLcr1x) ++ subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ ++ addi r1, r1, 64 ++ cfi_adjust_cfa_offset(-64) + bne L(d00) +- li rRTN,0 ++ li rRTN, 0 + blr + + /* Remainder is 12 */ + .align 4 ++ cfi_adjust_cfa_offset(64) + L(dP3): +- mtctr rTMP +- lwz rWORD3,0(rSTR1) +- lwz rWORD4,0(rSTR2) +- cmplw cr1,rWORD3,rWORD4 ++ mtctr r0 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD3, 0(rSTR1) ++ lwz rWORD4, 0(rSTR2) ++#endif ++ cmplw cr1, rWORD3, rWORD4 + L(dP3e): +- lwz rWORD5,4(rSTR1) +- lwz rWORD6,4(rSTR2) +- cmplw cr6,rWORD5,rWORD6 +- blt cr7,L(dP3x) +- lwz rWORD7,8(rSTR1) +- lwz rWORD8,8(rSTR2) +- cmplw cr5,rWORD7,rWORD8 +- lwz rWORD1,12(rSTR1) +- lwz rWORD2,12(rSTR2) +- cmplw cr0,rWORD1,rWORD2 +- addi rSTR1,rSTR1,8 +- addi rSTR2,rSTR2,8 +- bne cr1,L(dLcr1) +- bne cr6,L(dLcr6) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD5, 4(rSTR1) ++ lwz rWORD6, 4(rSTR2) ++#endif ++ cmplw cr6, rWORD5, rWORD6 ++ blt cr7, L(dP3x) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD7, 8(rSTR1) ++ lwz rWORD8, 8(rSTR2) ++#endif ++ cmplw cr5, rWORD7, rWORD8 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD1, 12(rSTR1) ++ lwz rWORD2, 12(rSTR2) ++#endif ++ cmplw cr7, rWORD1, rWORD2 ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#endif ++ bne cr1, L(dLcr1) ++ bne cr6, L(dLcr6) + b L(dLoop1) + /* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 + L(dP3x): +- lwz rWORD1,8(rSTR1) +- lwz rWORD2,8(rSTR2) +- cmplw cr5,rWORD1,rWORD2 +- slwi. r12,rN,3 +- bne cr1,L(dLcr1) +- addi rSTR1,rSTR1,8 +- addi rSTR2,rSTR2,8 +- bne cr6,L(dLcr6) +- subfic rN,r12,32 /* Shift count is 32 - (rN * 8). */ +- bne cr5,L(dLcr5) +- lwz 1,0(1) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD1, 8(rSTR1) ++ lwz rWORD2, 8(rSTR2) ++#endif ++ cmplw cr7, rWORD1, rWORD2 ++ slwi. r12, rN, 3 ++ bne cr1, L(dLcr1x) ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#endif ++ bne cr6, L(dLcr6x) ++ subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ ++ bne cr7, L(dLcr7x) ++ addi r1, r1, 64 ++ cfi_adjust_cfa_offset(-64) + bne L(d00) +- li rRTN,0 ++ li rRTN, 0 + blr + + /* Count is a multiple of 16, remainder is 0 */ + .align 4 ++ cfi_adjust_cfa_offset(64) + L(dP4): +- mtctr rTMP +- lwz rWORD1,0(rSTR1) +- lwz rWORD2,0(rSTR2) +- cmplw cr0,rWORD1,rWORD2 ++ mtctr r0 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD1, 0(rSTR1) ++ lwz rWORD2, 0(rSTR2) ++#endif ++ cmplw cr7, rWORD1, rWORD2 + L(dP4e): +- lwz rWORD3,4(rSTR1) +- lwz rWORD4,4(rSTR2) +- cmplw cr1,rWORD3,rWORD4 +- lwz rWORD5,8(rSTR1) +- lwz rWORD6,8(rSTR2) +- cmplw cr6,rWORD5,rWORD6 +- lwzu rWORD7,12(rSTR1) +- lwzu rWORD8,12(rSTR2) +- cmplw cr5,rWORD7,rWORD8 +- bne cr0,L(dLcr0) +- bne cr1,L(dLcr1) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD3, 4(rSTR1) ++ lwz rWORD4, 4(rSTR2) ++#endif ++ cmplw cr1, rWORD3, rWORD4 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD5, 8(rSTR1) ++ lwz rWORD6, 8(rSTR2) ++#endif ++ cmplw cr6, rWORD5, rWORD6 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwzu rWORD7, 12(rSTR1) ++ lwzu rWORD8, 12(rSTR2) ++#endif ++ cmplw cr5, rWORD7, rWORD8 ++ bne cr7, L(dLcr7) ++ bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ + /* This is the primary loop */ + .align 4 + L(dLoop): +- lwz rWORD1,4(rSTR1) +- lwz rWORD2,4(rSTR2) +- cmplw cr1,rWORD3,rWORD4 +- bne cr6,L(dLcr6) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD1, 4(rSTR1) ++ lwz rWORD2, 4(rSTR2) ++#endif ++ cmplw cr1, rWORD3, rWORD4 ++ bne cr6, L(dLcr6) + L(dLoop1): +- lwz rWORD3,8(rSTR1) +- lwz rWORD4,8(rSTR2) +- cmplw cr6,rWORD5,rWORD6 +- bne cr5,L(dLcr5) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD3, 8(rSTR1) ++ lwz rWORD4, 8(rSTR2) ++#endif ++ cmplw cr6, rWORD5, rWORD6 ++ bne cr5, L(dLcr5) + L(dLoop2): +- lwz rWORD5,12(rSTR1) +- lwz rWORD6,12(rSTR2) +- cmplw cr5,rWORD7,rWORD8 +- bne cr0,L(dLcr0) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD5, 12(rSTR1) ++ lwz rWORD6, 12(rSTR2) ++#endif ++ cmplw cr5, rWORD7, rWORD8 ++ bne cr7, L(dLcr7) + L(dLoop3): +- lwzu rWORD7,16(rSTR1) +- lwzu rWORD8,16(rSTR2) +- bne cr1,L(dLcr1) +- cmplw cr0,rWORD1,rWORD2 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwzu rWORD7, 16(rSTR1) ++ lwzu rWORD8, 16(rSTR2) ++#endif ++ bne cr1, L(dLcr1) ++ cmplw cr7, rWORD1, rWORD2 + bdnz L(dLoop) + + L(dL4): +- cmplw cr1,rWORD3,rWORD4 +- bne cr6,L(dLcr6) +- cmplw cr6,rWORD5,rWORD6 +- bne cr5,L(dLcr5) +- cmplw cr5,rWORD7,rWORD8 ++ cmplw cr1, rWORD3, rWORD4 ++ bne cr6, L(dLcr6) ++ cmplw cr6, rWORD5, rWORD6 ++ bne cr5, L(dLcr5) ++ cmplw cr5, rWORD7, rWORD8 + L(d44): +- bne cr0,L(dLcr0) ++ bne cr7, L(dLcr7) + L(d34): +- bne cr1,L(dLcr1) ++ bne cr1, L(dLcr1) + L(d24): +- bne cr6,L(dLcr6) ++ bne cr6, L(dLcr6) + L(d14): +- slwi. r12,rN,3 +- bne cr5,L(dLcr5) ++ slwi. r12, rN, 3 ++ bne cr5, L(dLcr5) + L(d04): +- lwz r30,44(1) +- lwz r31,48(1) +- lwz 1,0(1) +- subfic rN,r12,32 /* Shift count is 32 - (rN * 8). */ ++ lwz rWORD7, 44(r1) ++ lwz rWORD8, 48(r1) ++ addi r1, r1, 64 ++ cfi_adjust_cfa_offset(-64) ++ subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + beq L(zeroLength) + /* At this point we have a remainder of 1 to 3 bytes to compare. Since + we are aligned it is safe to load the whole word, and use +- shift right to eliminate bits beyond the compare length. */ ++ shift right to eliminate bits beyond the compare length. */ + L(d00): +- lwz rWORD1,4(rSTR1) +- lwz rWORD2,4(rSTR2) +- srw rWORD1,rWORD1,rN +- srw rWORD2,rWORD2,rN +- cmplw rWORD1,rWORD2 +- li rRTN,0 +- beqlr +- li rRTN,1 +- bgtlr +- li rRTN,-1 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD1, 4(rSTR1) ++ lwz rWORD2, 4(rSTR2) ++#endif ++ srw rWORD1, rWORD1, rN ++ srw rWORD2, rWORD2, rN ++ sub rRTN, rWORD1, rWORD2 + blr + + .align 4 +-L(dLcr0): +- lwz r30,44(1) +- lwz r31,48(1) +- li rRTN,1 +- lwz 1,0(1) +- bgtlr cr0 +- li rRTN,-1 ++ cfi_adjust_cfa_offset(64) ++L(dLcr7): ++ lwz rWORD7, 44(r1) ++ lwz rWORD8, 48(r1) ++L(dLcr7x): ++ li rRTN, 1 ++ addi r1, r1, 64 ++ cfi_adjust_cfa_offset(-64) ++ bgtlr cr7 ++ li rRTN, -1 + blr + .align 4 ++ cfi_adjust_cfa_offset(64) + L(dLcr1): +- lwz r30,44(1) +- lwz r31,48(1) +- li rRTN,1 +- lwz 1,0(1) ++ lwz rWORD7, 44(r1) ++ lwz rWORD8, 48(r1) ++L(dLcr1x): ++ li rRTN, 1 ++ addi r1, r1, 64 ++ cfi_adjust_cfa_offset(-64) + bgtlr cr1 +- li rRTN,-1 ++ li rRTN, -1 + blr + .align 4 ++ cfi_adjust_cfa_offset(64) + L(dLcr6): +- lwz r30,44(1) +- lwz r31,48(1) +- li rRTN,1 +- lwz 1,0(1) ++ lwz rWORD7, 44(r1) ++ lwz rWORD8, 48(r1) ++L(dLcr6x): ++ li rRTN, 1 ++ addi r1, r1, 64 ++ cfi_adjust_cfa_offset(-64) + bgtlr cr6 +- li rRTN,-1 ++ li rRTN, -1 + blr + .align 4 ++ cfi_adjust_cfa_offset(64) + L(dLcr5): +- lwz r30,44(1) +- lwz r31,48(1) ++ lwz rWORD7, 44(r1) ++ lwz rWORD8, 48(r1) + L(dLcr5x): +- li rRTN,1 +- lwz 1,0(1) ++ li rRTN, 1 ++ addi r1, r1, 64 ++ cfi_adjust_cfa_offset(-64) + bgtlr cr5 +- li rRTN,-1 ++ li rRTN, -1 + blr + + .align 4 + L(bytealigned): +- cfi_adjust_cfa_offset(-64) + mtctr rN + + /* We need to prime this loop. This loop is swing modulo scheduled +@@ -413,38 +618,39 @@ + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ +- lbz rWORD1,0(rSTR1) +- lbz rWORD2,0(rSTR2) ++ ++ lbz rWORD1, 0(rSTR1) ++ lbz rWORD2, 0(rSTR2) + bdz L(b11) +- cmplw cr0,rWORD1,rWORD2 +- lbz rWORD3,1(rSTR1) +- lbz rWORD4,1(rSTR2) ++ cmplw cr7, rWORD1, rWORD2 ++ lbz rWORD3, 1(rSTR1) ++ lbz rWORD4, 1(rSTR2) + bdz L(b12) +- cmplw cr1,rWORD3,rWORD4 +- lbzu rWORD5,2(rSTR1) +- lbzu rWORD6,2(rSTR2) ++ cmplw cr1, rWORD3, rWORD4 ++ lbzu rWORD5, 2(rSTR1) ++ lbzu rWORD6, 2(rSTR2) + bdz L(b13) + .align 4 + L(bLoop): +- lbzu rWORD1,1(rSTR1) +- lbzu rWORD2,1(rSTR2) +- bne cr0,L(bLcr0) ++ lbzu rWORD1, 1(rSTR1) ++ lbzu rWORD2, 1(rSTR2) ++ bne cr7, L(bLcr7) + +- cmplw cr6,rWORD5,rWORD6 ++ cmplw cr6, rWORD5, rWORD6 + bdz L(b3i) + +- lbzu rWORD3,1(rSTR1) +- lbzu rWORD4,1(rSTR2) +- bne cr1,L(bLcr1) ++ lbzu rWORD3, 1(rSTR1) ++ lbzu rWORD4, 1(rSTR2) ++ bne cr1, L(bLcr1) + +- cmplw cr0,rWORD1,rWORD2 ++ cmplw cr7, rWORD1, rWORD2 + bdz L(b2i) + +- lbzu rWORD5,1(rSTR1) +- lbzu rWORD6,1(rSTR2) +- bne cr6,L(bLcr6) ++ lbzu rWORD5, 1(rSTR1) ++ lbzu rWORD6, 1(rSTR2) ++ bne cr6, L(bLcr6) + +- cmplw cr1,rWORD3,rWORD4 ++ cmplw cr1, rWORD3, rWORD4 + bdnz L(bLoop) + + /* We speculatively loading bytes before we have tested the previous +@@ -454,67 +660,62 @@ + tested. In this case we must complete the pending operations + before returning. */ + L(b1i): +- bne cr0,L(bLcr0) +- bne cr1,L(bLcr1) ++ bne cr7, L(bLcr7) ++ bne cr1, L(bLcr1) + b L(bx56) + .align 4 + L(b2i): +- bne cr6,L(bLcr6) +- bne cr0,L(bLcr0) ++ bne cr6, L(bLcr6) ++ bne cr7, L(bLcr7) + b L(bx34) + .align 4 + L(b3i): +- bne cr1,L(bLcr1) +- bne cr6,L(bLcr6) ++ bne cr1, L(bLcr1) ++ bne cr6, L(bLcr6) + b L(bx12) + .align 4 +-L(bLcr0): +- li rRTN,1 +- bgtlr cr0 +- li rRTN,-1 ++L(bLcr7): ++ li rRTN, 1 ++ bgtlr cr7 ++ li rRTN, -1 + blr + L(bLcr1): +- li rRTN,1 ++ li rRTN, 1 + bgtlr cr1 +- li rRTN,-1 ++ li rRTN, -1 + blr + L(bLcr6): +- li rRTN,1 ++ li rRTN, 1 + bgtlr cr6 +- li rRTN,-1 ++ li rRTN, -1 + blr + + L(b13): +- bne cr0,L(bx12) +- bne cr1,L(bx34) ++ bne cr7, L(bx12) ++ bne cr1, L(bx34) + L(bx56): +- sub rRTN,rWORD5,rWORD6 ++ sub rRTN, rWORD5, rWORD6 + blr + nop + L(b12): +- bne cr0,L(bx12) ++ bne cr7, L(bx12) + L(bx34): +- sub rRTN,rWORD3,rWORD4 ++ sub rRTN, rWORD3, rWORD4 + blr +- + L(b11): + L(bx12): +- sub rRTN,rWORD1,rWORD2 ++ sub rRTN, rWORD1, rWORD2 + blr +- + .align 4 +-L(zeroLengthReturn): +- + L(zeroLength): +- li rRTN,0 ++ li rRTN, 0 + blr + +- cfi_adjust_cfa_offset(64) + .align 4 + /* At this point we know the strings have different alignment and the +- compare length is at least 8 bytes. rBITDIF contains the low order ++ compare length is at least 8 bytes. r12 contains the low order + 2 bits of rSTR1 and cr5 contains the result of the logical compare +- of rBITDIF to 0. If rBITDIF == 0 then rStr1 is word aligned and can ++ of r12 to 0. If r12 == 0 then rStr1 is word aligned and can + perform the Wunaligned loop. + + Otherwise we know that rSTR1 is not aready word aligned yet. +@@ -523,465 +724,654 @@ + eliminate bits preceeding the first byte. Since we want to join the + normal (Wualigned) compare loop, starting at the second word, + we need to adjust the length (rN) and special case the loop +- versioning for the first W. This insures that the loop count is ++ versioning for the first W. This ensures that the loop count is + correct and the first W (shifted) is in the expected resister pair. */ + #define rSHL r29 /* Unaligned shift left count. */ + #define rSHR r28 /* Unaligned shift right count. */ +-#define rB r27 /* Left rotation temp for rWORD2. */ +-#define rD r26 /* Left rotation temp for rWORD4. */ +-#define rF r25 /* Left rotation temp for rWORD6. */ +-#define rH r24 /* Left rotation temp for rWORD8. */ +-#define rA r0 /* Right rotation temp for rWORD2. */ +-#define rC r12 /* Right rotation temp for rWORD4. */ +-#define rE r0 /* Right rotation temp for rWORD6. */ +-#define rG r12 /* Right rotation temp for rWORD8. */ ++#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ ++#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ ++#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ ++#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ ++ cfi_adjust_cfa_offset(64) + L(unaligned): +- stw r29,40(r1) +- cfi_offset(r29,(40-64)) +- clrlwi rSHL,rSTR2,30 +- stw r28,36(r1) +- cfi_offset(r28,(36-64)) +- beq cr5,L(Wunaligned) +- stw r27,32(r1) +- cfi_offset(r27,(32-64)) ++ stw rSHL, 40(r1) ++ cfi_offset(rSHL, (40-64)) ++ clrlwi rSHL, rSTR2, 30 ++ stw rSHR, 36(r1) ++ cfi_offset(rSHR, (36-64)) ++ beq cr5, L(Wunaligned) ++ stw rWORD8_SHIFT, 32(r1) ++ cfi_offset(rWORD8_SHIFT, (32-64)) + /* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 W. */ +- sub r27,rSTR2,rBITDIF ++ sub rWORD8_SHIFT, rSTR2, r12 + /* But do not attempt to address the W before that W that contains + the actual start of rSTR2. */ +- clrrwi rSTR2,rSTR2,2 +- stw r26,28(r1) +- cfi_offset(r26,(28-64)) +-/* Compute the left/right shift counts for the unalign rSTR2, ++ clrrwi rSTR2, rSTR2, 2 ++ stw rWORD2_SHIFT, 28(r1) ++ cfi_offset(rWORD2_SHIFT, (28-64)) ++/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (W aligned) start of rSTR1. */ +- clrlwi rSHL,r27,30 +- clrrwi rSTR1,rSTR1,2 +- stw r25,24(r1) +- cfi_offset(r25,(24-64)) +- slwi rSHL,rSHL,3 +- cmplw cr5,r27,rSTR2 +- add rN,rN,rBITDIF +- slwi r11,rBITDIF,3 +- stw r24,20(r1) +- cfi_offset(r24,(20-64)) +- subfic rSHR,rSHL,32 +- srwi rTMP,rN,4 /* Divide by 16 */ +- andi. rBITDIF,rN,12 /* Get the W remainder */ ++ clrlwi rSHL, rWORD8_SHIFT, 30 ++ clrrwi rSTR1, rSTR1, 2 ++ stw rWORD4_SHIFT, 24(r1) ++ cfi_offset(rWORD4_SHIFT, (24-64)) ++ slwi rSHL, rSHL, 3 ++ cmplw cr5, rWORD8_SHIFT, rSTR2 ++ add rN, rN, r12 ++ slwi rWORD6, r12, 3 ++ stw rWORD6_SHIFT, 20(r1) ++ cfi_offset(rWORD6_SHIFT, (20-64)) ++ subfic rSHR, rSHL, 32 ++ srwi r0, rN, 4 /* Divide by 16 */ ++ andi. r12, rN, 12 /* Get the W remainder */ + /* We normally need to load 2 Ws to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a W where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ +- li rWORD8,0 +- blt cr5,L(dus0) +- lwz rWORD8,0(rSTR2) +- la rSTR2,4(rSTR2) +- slw rWORD8,rWORD8,rSHL ++ li rWORD8, 0 ++ blt cr5, L(dus0) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD8, 0(rSTR2) ++ addi rSTR2, rSTR2, 4 ++#endif ++ slw rWORD8, rWORD8, rSHL + + L(dus0): +- lwz rWORD1,0(rSTR1) +- lwz rWORD2,0(rSTR2) +- cmplwi cr1,rBITDIF,8 +- cmplwi cr7,rN,16 +- srw rG,rWORD2,rSHR +- clrlwi rN,rN,30 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD1, 0(rSTR1) ++ lwz rWORD2, 0(rSTR2) ++#endif ++ cmplwi cr1, r12, 8 ++ cmplwi cr7, rN, 16 ++ srw r12, rWORD2, rSHR ++ clrlwi rN, rN, 30 + beq L(duPs4) +- mtctr rTMP +- or rWORD8,rG,rWORD8 +- bgt cr1,L(duPs3) +- beq cr1,L(duPs2) ++ mtctr r0 ++ or rWORD8, r12, rWORD8 ++ bgt cr1, L(duPs3) ++ beq cr1, L(duPs2) + + /* Remainder is 4 */ + .align 4 + L(dusP1): +- slw rB,rWORD2,rSHL +- slw rWORD7,rWORD1,r11 +- slw rWORD8,rWORD8,r11 +- bge cr7,L(duP1e) ++ slw rWORD8_SHIFT, rWORD2, rSHL ++ slw rWORD7, rWORD1, rWORD6 ++ slw rWORD8, rWORD8, rWORD6 ++ bge cr7, L(duP1e) + /* At this point we exit early with the first word compare + complete and remainder of 0 to 3 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +- cmplw cr5,rWORD7,rWORD8 +- slwi. rN,rN,3 +- bne cr5,L(duLcr5) +- cmplw cr7,rN,rSHR ++ cmplw cr5, rWORD7, rWORD8 ++ slwi. rN, rN, 3 ++ bne cr5, L(duLcr5) ++ cmplw cr7, rN, rSHR + beq L(duZeroReturn) +- li rA,0 +- ble cr7,L(dutrim) +- lwz rWORD2,4(rSTR2) +- srw rA,rWORD2,rSHR ++ li r0, 0 ++ ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD2, 4(rSTR2) ++#endif ++ srw r0, rWORD2, rSHR + b L(dutrim) + /* Remainder is 8 */ + .align 4 + L(duPs2): +- slw rH,rWORD2,rSHL +- slw rWORD5,rWORD1,r11 +- slw rWORD6,rWORD8,r11 ++ slw rWORD6_SHIFT, rWORD2, rSHL ++ slw rWORD5, rWORD1, rWORD6 ++ slw rWORD6, rWORD8, rWORD6 + b L(duP2e) + /* Remainder is 12 */ + .align 4 + L(duPs3): +- slw rF,rWORD2,rSHL +- slw rWORD3,rWORD1,r11 +- slw rWORD4,rWORD8,r11 ++ slw rWORD4_SHIFT, rWORD2, rSHL ++ slw rWORD3, rWORD1, rWORD6 ++ slw rWORD4, rWORD8, rWORD6 + b L(duP3e) + /* Count is a multiple of 16, remainder is 0 */ + .align 4 + L(duPs4): +- mtctr rTMP +- or rWORD8,rG,rWORD8 +- slw rD,rWORD2,rSHL +- slw rWORD1,rWORD1,r11 +- slw rWORD2,rWORD8,r11 ++ mtctr r0 ++ or rWORD8, r12, rWORD8 ++ slw rWORD2_SHIFT, rWORD2, rSHL ++ slw rWORD1, rWORD1, rWORD6 ++ slw rWORD2, rWORD8, rWORD6 + b L(duP4e) + + /* At this point we know rSTR1 is word aligned and the + compare length is at least 8 bytes. */ + .align 4 + L(Wunaligned): +- stw r27,32(r1) +- cfi_offset(r27,(32-64)) +- clrrwi rSTR2,rSTR2,2 +- stw r26,28(r1) +- cfi_offset(r26,(28-64)) +- srwi rTMP,rN,4 /* Divide by 16 */ +- stw r25,24(r1) +- cfi_offset(r25,(24-64)) +- andi. rBITDIF,rN,12 /* Get the W remainder */ +- stw r24,20(r1) +- cfi_offset(r24,(24-64)) +- slwi rSHL,rSHL,3 +- lwz rWORD6,0(rSTR2) +- lwzu rWORD8,4(rSTR2) +- cmplwi cr1,rBITDIF,8 +- cmplwi cr7,rN,16 +- clrlwi rN,rN,30 +- subfic rSHR,rSHL,32 +- slw rH,rWORD6,rSHL ++ stw rWORD8_SHIFT, 32(r1) ++ cfi_offset(rWORD8_SHIFT, (32-64)) ++ clrrwi rSTR2, rSTR2, 2 ++ stw rWORD2_SHIFT, 28(r1) ++ cfi_offset(rWORD2_SHIFT, (28-64)) ++ srwi r0, rN, 4 /* Divide by 16 */ ++ stw rWORD4_SHIFT, 24(r1) ++ cfi_offset(rWORD4_SHIFT, (24-64)) ++ andi. r12, rN, 12 /* Get the W remainder */ ++ stw rWORD6_SHIFT, 20(r1) ++ cfi_offset(rWORD6_SHIFT, (20-64)) ++ slwi rSHL, rSHL, 3 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD6, 0(rSTR2) ++ lwzu rWORD8, 4(rSTR2) ++#endif ++ cmplwi cr1, r12, 8 ++ cmplwi cr7, rN, 16 ++ clrlwi rN, rN, 30 ++ subfic rSHR, rSHL, 32 ++ slw rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) +- mtctr rTMP +- bgt cr1,L(duP3) +- beq cr1,L(duP2) ++ mtctr r0 ++ bgt cr1, L(duP3) ++ beq cr1, L(duP2) + + /* Remainder is 4 */ + .align 4 + L(duP1): +- srw rG,rWORD8,rSHR +- lwz rWORD7,0(rSTR1) +- slw rB,rWORD8,rSHL +- or rWORD8,rG,rH +- blt cr7,L(duP1x) ++ srw r12, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ addi rSTR1, rSTR1, 4 ++#else ++ lwz rWORD7, 0(rSTR1) ++#endif ++ slw rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT ++ blt cr7, L(duP1x) + L(duP1e): +- lwz rWORD1,4(rSTR1) +- lwz rWORD2,4(rSTR2) +- cmplw cr5,rWORD7,rWORD8 +- srw rA,rWORD2,rSHR +- slw rD,rWORD2,rSHL +- or rWORD2,rA,rB +- lwz rWORD3,8(rSTR1) +- lwz rWORD4,8(rSTR2) +- cmplw cr0,rWORD1,rWORD2 +- srw rC,rWORD4,rSHR +- slw rF,rWORD4,rSHL +- bne cr5,L(duLcr5) +- or rWORD4,rC,rD +- lwz rWORD5,12(rSTR1) +- lwz rWORD6,12(rSTR2) +- cmplw cr1,rWORD3,rWORD4 +- srw rE,rWORD6,rSHR +- slw rH,rWORD6,rSHL +- bne cr0,L(duLcr0) +- or rWORD6,rE,rF +- cmplw cr6,rWORD5,rWORD6 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD1, 4(rSTR1) ++ lwz rWORD2, 4(rSTR2) ++#endif ++ cmplw cr5, rWORD7, rWORD8 ++ srw r0, rWORD2, rSHR ++ slw rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD3, 8(rSTR1) ++ lwz rWORD4, 8(rSTR2) ++#endif ++ cmplw cr7, rWORD1, rWORD2 ++ srw r12, rWORD4, rSHR ++ slw rWORD4_SHIFT, rWORD4, rSHL ++ bne cr5, L(duLcr5) ++ or rWORD4, r12, rWORD2_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD5, 12(rSTR1) ++ lwz rWORD6, 12(rSTR2) ++#endif ++ cmplw cr1, rWORD3, rWORD4 ++ srw r0, rWORD6, rSHR ++ slw rWORD6_SHIFT, rWORD6, rSHL ++ bne cr7, L(duLcr7) ++ or rWORD6, r0, rWORD4_SHIFT ++ cmplw cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 + /* At this point we exit early with the first word compare + complete and remainder of 0 to 3 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + L(duP1x): +- cmplw cr5,rWORD7,rWORD8 +- slwi. rN,rN,3 +- bne cr5,L(duLcr5) +- cmplw cr7,rN,rSHR ++ cmplw cr5, rWORD7, rWORD8 ++ slwi. rN, rN, 3 ++ bne cr5, L(duLcr5) ++ cmplw cr7, rN, rSHR + beq L(duZeroReturn) +- li rA,0 +- ble cr7,L(dutrim) +- ld rWORD2,8(rSTR2) +- srw rA,rWORD2,rSHR ++ li r0, 0 ++ ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD2, 8(rSTR2) ++#endif ++ srw r0, rWORD2, rSHR + b L(dutrim) + /* Remainder is 8 */ + .align 4 + L(duP2): +- srw rE,rWORD8,rSHR +- lwz rWORD5,0(rSTR1) +- or rWORD6,rE,rH +- slw rH,rWORD8,rSHL ++ srw r0, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ addi rSTR1, rSTR1, 4 ++#else ++ lwz rWORD5, 0(rSTR1) ++#endif ++ or rWORD6, r0, rWORD6_SHIFT ++ slw rWORD6_SHIFT, rWORD8, rSHL + L(duP2e): +- lwz rWORD7,4(rSTR1) +- lwz rWORD8,4(rSTR2) +- cmplw cr6,rWORD5,rWORD6 +- srw rG,rWORD8,rSHR +- slw rB,rWORD8,rSHL +- or rWORD8,rG,rH +- blt cr7,L(duP2x) +- lwz rWORD1,8(rSTR1) +- lwz rWORD2,8(rSTR2) +- cmplw cr5,rWORD7,rWORD8 +- bne cr6,L(duLcr6) +- srw rA,rWORD2,rSHR +- slw rD,rWORD2,rSHL +- or rWORD2,rA,rB +- lwz rWORD3,12(rSTR1) +- lwz rWORD4,12(rSTR2) +- cmplw cr0,rWORD1,rWORD2 +- bne cr5,L(duLcr5) +- srw rC,rWORD4,rSHR +- slw rF,rWORD4,rSHL +- or rWORD4,rC,rD +- addi rSTR1,rSTR1,4 +- addi rSTR2,rSTR2,4 +- cmplw cr1,rWORD3,rWORD4 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD7, 4(rSTR1) ++ lwz rWORD8, 4(rSTR2) ++#endif ++ cmplw cr6, rWORD5, rWORD6 ++ srw r12, rWORD8, rSHR ++ slw rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT ++ blt cr7, L(duP2x) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD1, 8(rSTR1) ++ lwz rWORD2, 8(rSTR2) ++#endif ++ cmplw cr5, rWORD7, rWORD8 ++ bne cr6, L(duLcr6) ++ srw r0, rWORD2, rSHR ++ slw rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD3, 12(rSTR1) ++ lwz rWORD4, 12(rSTR2) ++#endif ++ cmplw cr7, rWORD1, rWORD2 ++ bne cr5, L(duLcr5) ++ srw r12, rWORD4, rSHR ++ slw rWORD4_SHIFT, rWORD4, rSHL ++ or rWORD4, r12, rWORD2_SHIFT ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#endif ++ cmplw cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 + L(duP2x): +- cmplw cr5,rWORD7,rWORD8 +- addi rSTR1,rSTR1,4 +- addi rSTR2,rSTR2,4 +- bne cr6,L(duLcr6) +- slwi. rN,rN,3 +- bne cr5,L(duLcr5) +- cmplw cr7,rN,rSHR ++ cmplw cr5, rWORD7, rWORD8 ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#endif ++ bne cr6, L(duLcr6) ++ slwi. rN, rN, 3 ++ bne cr5, L(duLcr5) ++ cmplw cr7, rN, rSHR + beq L(duZeroReturn) +- li rA,0 +- ble cr7,L(dutrim) +- lwz rWORD2,4(rSTR2) +- srw rA,rWORD2,rSHR ++ li r0, 0 ++ ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD2, 4(rSTR2) ++#endif ++ srw r0, rWORD2, rSHR + b L(dutrim) + + /* Remainder is 12 */ + .align 4 + L(duP3): +- srw rC,rWORD8,rSHR +- lwz rWORD3,0(rSTR1) +- slw rF,rWORD8,rSHL +- or rWORD4,rC,rH ++ srw r12, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ addi rSTR1, rSTR1, 4 ++#else ++ lwz rWORD3, 0(rSTR1) ++#endif ++ slw rWORD4_SHIFT, rWORD8, rSHL ++ or rWORD4, r12, rWORD6_SHIFT + L(duP3e): +- lwz rWORD5,4(rSTR1) +- lwz rWORD6,4(rSTR2) +- cmplw cr1,rWORD3,rWORD4 +- srw rE,rWORD6,rSHR +- slw rH,rWORD6,rSHL +- or rWORD6,rE,rF +- lwz rWORD7,8(rSTR1) +- lwz rWORD8,8(rSTR2) +- cmplw cr6,rWORD5,rWORD6 +- bne cr1,L(duLcr1) +- srw rG,rWORD8,rSHR +- slw rB,rWORD8,rSHL +- or rWORD8,rG,rH +- blt cr7,L(duP3x) +- lwz rWORD1,12(rSTR1) +- lwz rWORD2,12(rSTR2) +- cmplw cr5,rWORD7,rWORD8 +- bne cr6,L(duLcr6) +- srw rA,rWORD2,rSHR +- slw rD,rWORD2,rSHL +- or rWORD2,rA,rB +- addi rSTR1,rSTR1,8 +- addi rSTR2,rSTR2,8 +- cmplw cr0,rWORD1,rWORD2 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD5, 4(rSTR1) ++ lwz rWORD6, 4(rSTR2) ++#endif ++ cmplw cr1, rWORD3, rWORD4 ++ srw r0, rWORD6, rSHR ++ slw rWORD6_SHIFT, rWORD6, rSHL ++ or rWORD6, r0, rWORD4_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD7, 8(rSTR1) ++ lwz rWORD8, 8(rSTR2) ++#endif ++ cmplw cr6, rWORD5, rWORD6 ++ bne cr1, L(duLcr1) ++ srw r12, rWORD8, rSHR ++ slw rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT ++ blt cr7, L(duP3x) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD1, 12(rSTR1) ++ lwz rWORD2, 12(rSTR2) ++#endif ++ cmplw cr5, rWORD7, rWORD8 ++ bne cr6, L(duLcr6) ++ srw r0, rWORD2, rSHR ++ slw rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#endif ++ cmplw cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 + L(duP3x): +- addi rSTR1,rSTR1,8 +- addi rSTR2,rSTR2,8 +- bne cr1,L(duLcr1) +- cmplw cr5,rWORD7,rWORD8 +- bne cr6,L(duLcr6) +- slwi. rN,rN,3 +- bne cr5,L(duLcr5) +- cmplw cr7,rN,rSHR ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#endif ++#if 0 ++/* Huh? We've already branched on cr1! */ ++ bne cr1, L(duLcr1) ++#endif ++ cmplw cr5, rWORD7, rWORD8 ++ bne cr6, L(duLcr6) ++ slwi. rN, rN, 3 ++ bne cr5, L(duLcr5) ++ cmplw cr7, rN, rSHR + beq L(duZeroReturn) +- li rA,0 +- ble cr7,L(dutrim) +- lwz rWORD2,4(rSTR2) +- srw rA,rWORD2,rSHR ++ li r0, 0 ++ ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD2, 4(rSTR2) ++#endif ++ srw r0, rWORD2, rSHR + b L(dutrim) + + /* Count is a multiple of 16, remainder is 0 */ + .align 4 + L(duP4): +- mtctr rTMP +- srw rA,rWORD8,rSHR +- lwz rWORD1,0(rSTR1) +- slw rD,rWORD8,rSHL +- or rWORD2,rA,rH ++ mtctr r0 ++ srw r0, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ addi rSTR1, rSTR1, 4 ++#else ++ lwz rWORD1, 0(rSTR1) ++#endif ++ slw rWORD2_SHIFT, rWORD8, rSHL ++ or rWORD2, r0, rWORD6_SHIFT + L(duP4e): +- lwz rWORD3,4(rSTR1) +- lwz rWORD4,4(rSTR2) +- cmplw cr0,rWORD1,rWORD2 +- srw rC,rWORD4,rSHR +- slw rF,rWORD4,rSHL +- or rWORD4,rC,rD +- lwz rWORD5,8(rSTR1) +- lwz rWORD6,8(rSTR2) +- cmplw cr1,rWORD3,rWORD4 +- bne cr0,L(duLcr0) +- srw rE,rWORD6,rSHR +- slw rH,rWORD6,rSHL +- or rWORD6,rE,rF +- lwzu rWORD7,12(rSTR1) +- lwzu rWORD8,12(rSTR2) +- cmplw cr6,rWORD5,rWORD6 +- bne cr1,L(duLcr1) +- srw rG,rWORD8,rSHR +- slw rB,rWORD8,rSHL +- or rWORD8,rG,rH +- cmplw cr5,rWORD7,rWORD8 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD3, 4(rSTR1) ++ lwz rWORD4, 4(rSTR2) ++#endif ++ cmplw cr7, rWORD1, rWORD2 ++ srw r12, rWORD4, rSHR ++ slw rWORD4_SHIFT, rWORD4, rSHL ++ or rWORD4, r12, rWORD2_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD5, 8(rSTR1) ++ lwz rWORD6, 8(rSTR2) ++#endif ++ cmplw cr1, rWORD3, rWORD4 ++ bne cr7, L(duLcr7) ++ srw r0, rWORD6, rSHR ++ slw rWORD6_SHIFT, rWORD6, rSHL ++ or rWORD6, r0, rWORD4_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwzu rWORD7, 12(rSTR1) ++ lwzu rWORD8, 12(rSTR2) ++#endif ++ cmplw cr6, rWORD5, rWORD6 ++ bne cr1, L(duLcr1) ++ srw r12, rWORD8, rSHR ++ slw rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT ++ cmplw cr5, rWORD7, rWORD8 + bdz L(du24) /* Adjust CTR as we start with +4 */ + /* This is the primary loop */ + .align 4 + L(duLoop): +- lwz rWORD1,4(rSTR1) +- lwz rWORD2,4(rSTR2) +- cmplw cr1,rWORD3,rWORD4 +- bne cr6,L(duLcr6) +- srw rA,rWORD2,rSHR +- slw rD,rWORD2,rSHL +- or rWORD2,rA,rB ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD1, 4(rSTR1) ++ lwz rWORD2, 4(rSTR2) ++#endif ++ cmplw cr1, rWORD3, rWORD4 ++ bne cr6, L(duLcr6) ++ srw r0, rWORD2, rSHR ++ slw rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT + L(duLoop1): +- lwz rWORD3,8(rSTR1) +- lwz rWORD4,8(rSTR2) +- cmplw cr6,rWORD5,rWORD6 +- bne cr5,L(duLcr5) +- srw rC,rWORD4,rSHR +- slw rF,rWORD4,rSHL +- or rWORD4,rC,rD ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD3, 0, rSTR1 ++ lwbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD3, 8(rSTR1) ++ lwz rWORD4, 8(rSTR2) ++#endif ++ cmplw cr6, rWORD5, rWORD6 ++ bne cr5, L(duLcr5) ++ srw r12, rWORD4, rSHR ++ slw rWORD4_SHIFT, rWORD4, rSHL ++ or rWORD4, r12, rWORD2_SHIFT + L(duLoop2): +- lwz rWORD5,12(rSTR1) +- lwz rWORD6,12(rSTR2) +- cmplw cr5,rWORD7,rWORD8 +- bne cr0,L(duLcr0) +- srw rE,rWORD6,rSHR +- slw rH,rWORD6,rSHL +- or rWORD6,rE,rF ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD5, 0, rSTR1 ++ lwbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD5, 12(rSTR1) ++ lwz rWORD6, 12(rSTR2) ++#endif ++ cmplw cr5, rWORD7, rWORD8 ++ bne cr7, L(duLcr7) ++ srw r0, rWORD6, rSHR ++ slw rWORD6_SHIFT, rWORD6, rSHL ++ or rWORD6, r0, rWORD4_SHIFT + L(duLoop3): +- lwzu rWORD7,16(rSTR1) +- lwzu rWORD8,16(rSTR2) +- cmplw cr0,rWORD1,rWORD2 +- bne cr1,L(duLcr1) +- srw rG,rWORD8,rSHR +- slw rB,rWORD8,rSHL +- or rWORD8,rG,rH ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD7, 0, rSTR1 ++ lwbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 4 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwzu rWORD7, 16(rSTR1) ++ lwzu rWORD8, 16(rSTR2) ++#endif ++ cmplw cr7, rWORD1, rWORD2 ++ bne cr1, L(duLcr1) ++ srw r12, rWORD8, rSHR ++ slw rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT + bdnz L(duLoop) + + L(duL4): +- bne cr1,L(duLcr1) +- cmplw cr1,rWORD3,rWORD4 +- bne cr6,L(duLcr6) +- cmplw cr6,rWORD5,rWORD6 +- bne cr5,L(duLcr5) +- cmplw cr5,rWORD7,rWORD8 ++#if 0 ++/* Huh? We've already branched on cr1! */ ++ bne cr1, L(duLcr1) ++#endif ++ cmplw cr1, rWORD3, rWORD4 ++ bne cr6, L(duLcr6) ++ cmplw cr6, rWORD5, rWORD6 ++ bne cr5, L(duLcr5) ++ cmplw cr5, rWORD7, rWORD8 + L(du44): +- bne cr0,L(duLcr0) ++ bne cr7, L(duLcr7) + L(du34): +- bne cr1,L(duLcr1) ++ bne cr1, L(duLcr1) + L(du24): +- bne cr6,L(duLcr6) ++ bne cr6, L(duLcr6) + L(du14): +- slwi. rN,rN,3 +- bne cr5,L(duLcr5) ++ slwi. rN, rN, 3 ++ bne cr5, L(duLcr5) + /* At this point we have a remainder of 1 to 3 bytes to compare. We use + shift right to eliminate bits beyond the compare length. ++ This allows the use of word subtract to compute the final result. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in +- rB). */ +- cmplw cr7,rN,rSHR ++ rWORD8_SHIFT). */ ++ cmplw cr7, rN, rSHR + beq L(duZeroReturn) +- li rA,0 +- ble cr7,L(dutrim) +- lwz rWORD2,4(rSTR2) +- srw rA,rWORD2,rSHR ++ li r0, 0 ++ ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 4 ++#else ++ lwz rWORD2, 4(rSTR2) ++#endif ++ srw r0, rWORD2, rSHR + .align 4 + L(dutrim): +- lwz rWORD1,4(rSTR1) +- lwz r31,48(1) +- subfic rN,rN,32 /* Shift count is 32 - (rN * 8). */ +- or rWORD2,rA,rB +- lwz r30,44(1) +- lwz r29,40(r1) +- srw rWORD1,rWORD1,rN +- srw rWORD2,rWORD2,rN +- lwz r28,36(r1) +- lwz r27,32(r1) +- cmplw rWORD1,rWORD2 +- li rRTN,0 +- beq L(dureturn26) +- li rRTN,1 +- bgt L(dureturn26) +- li rRTN,-1 ++#ifdef __LITTLE_ENDIAN__ ++ lwbrx rWORD1, 0, rSTR1 ++#else ++ lwz rWORD1, 4(rSTR1) ++#endif ++ lwz rWORD8, 48(r1) ++ subfic rN, rN, 32 /* Shift count is 32 - (rN * 8). */ ++ or rWORD2, r0, rWORD8_SHIFT ++ lwz rWORD7, 44(r1) ++ lwz rSHL, 40(r1) ++ srw rWORD1, rWORD1, rN ++ srw rWORD2, rWORD2, rN ++ lwz rSHR, 36(r1) ++ lwz rWORD8_SHIFT, 32(r1) ++ sub rRTN, rWORD1, rWORD2 + b L(dureturn26) + .align 4 +-L(duLcr0): +- lwz r31,48(1) +- lwz r30,44(1) +- li rRTN,1 +- bgt cr0,L(dureturn29) +- lwz r29,40(r1) +- lwz r28,36(r1) +- li rRTN,-1 ++L(duLcr7): ++ lwz rWORD8, 48(r1) ++ lwz rWORD7, 44(r1) ++ li rRTN, 1 ++ bgt cr7, L(dureturn29) ++ lwz rSHL, 40(r1) ++ lwz rSHR, 36(r1) ++ li rRTN, -1 + b L(dureturn27) + .align 4 + L(duLcr1): +- lwz r31,48(1) +- lwz r30,44(1) +- li rRTN,1 +- bgt cr1,L(dureturn29) +- lwz r29,40(r1) +- lwz r28,36(r1) +- li rRTN,-1 ++ lwz rWORD8, 48(r1) ++ lwz rWORD7, 44(r1) ++ li rRTN, 1 ++ bgt cr1, L(dureturn29) ++ lwz rSHL, 40(r1) ++ lwz rSHR, 36(r1) ++ li rRTN, -1 + b L(dureturn27) + .align 4 + L(duLcr6): +- lwz r31,48(1) +- lwz r30,44(1) +- li rRTN,1 +- bgt cr6,L(dureturn29) +- lwz r29,40(r1) +- lwz r28,36(r1) +- li rRTN,-1 ++ lwz rWORD8, 48(r1) ++ lwz rWORD7, 44(r1) ++ li rRTN, 1 ++ bgt cr6, L(dureturn29) ++ lwz rSHL, 40(r1) ++ lwz rSHR, 36(r1) ++ li rRTN, -1 + b L(dureturn27) + .align 4 + L(duLcr5): +- lwz r31,48(1) +- lwz r30,44(1) +- li rRTN,1 +- bgt cr5,L(dureturn29) +- lwz r29,40(r1) +- lwz r28,36(r1) +- li rRTN,-1 ++ lwz rWORD8, 48(r1) ++ lwz rWORD7, 44(r1) ++ li rRTN, 1 ++ bgt cr5, L(dureturn29) ++ lwz rSHL, 40(r1) ++ lwz rSHR, 36(r1) ++ li rRTN, -1 + b L(dureturn27) + .align 3 + L(duZeroReturn): +- li rRTN,0 ++ li rRTN, 0 + .align 4 + L(dureturn): +- lwz r31,48(1) +- lwz r30,44(1) ++ lwz rWORD8, 48(r1) ++ lwz rWORD7, 44(r1) + L(dureturn29): +- lwz r29,40(r1) +- lwz r28,36(r1) ++ lwz rSHL, 40(r1) ++ lwz rSHR, 36(r1) + L(dureturn27): +- lwz r27,32(r1) ++ lwz rWORD8_SHIFT, 32(r1) + L(dureturn26): +- lwz r26,28(r1) ++ lwz rWORD2_SHIFT, 28(r1) + L(dureturn25): +- lwz r25,24(r1) +- lwz r24,20(r1) +- lwz 1,0(1) ++ lwz rWORD4_SHIFT, 24(r1) ++ lwz rWORD6_SHIFT, 20(r1) ++ addi r1, r1, 64 ++ cfi_adjust_cfa_offset(-64) + blr + END (BP_SYM (memcmp)) ++ + libc_hidden_builtin_def (memcmp) + weak_alias (memcmp,bcmp) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memcmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memcmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memcmp.S 2014-05-28 19:22:37.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memcmp.S 2014-05-29 09:35:25.000000000 -0500 +@@ -1,5 +1,5 @@ +-/* Optimized strcmp implementation for PowerPC64. +- Copyright (C) 2003, 2006, 2011 Free Software Foundation, Inc. ++/* Optimized memcmp implementation for PowerPC64. ++ Copyright (C) 2003-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -17,307 +17,492 @@ + . */ + + #include +-#include +-#include + +-/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ ++/* int [r3] memcmp (const char *s1 [r3], ++ const char *s2 [r4], ++ size_t size [r5]) */ + + .machine power4 +-EALIGN (BP_SYM(memcmp), 4, 0) ++EALIGN (memcmp, 4, 0) + CALL_MCOUNT 3 + +-#define rTMP r0 + #define rRTN r3 + #define rSTR1 r3 /* first string arg */ + #define rSTR2 r4 /* second string arg */ + #define rN r5 /* max string length */ +-/* Note: The Bounded pointer support in this code is broken. This code +- was inherited from PPC32 and that support was never completed. +- Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */ + #define rWORD1 r6 /* current word in s1 */ + #define rWORD2 r7 /* current word in s2 */ + #define rWORD3 r8 /* next word in s1 */ + #define rWORD4 r9 /* next word in s2 */ + #define rWORD5 r10 /* next word in s1 */ + #define rWORD6 r11 /* next word in s2 */ +-#define rBITDIF r12 /* bits that differ in s1 & s2 words */ + #define rWORD7 r30 /* next word in s1 */ + #define rWORD8 r31 /* next word in s2 */ + +- xor rTMP, rSTR2, rSTR1 ++ xor r0, rSTR2, rSTR1 + cmpldi cr6, rN, 0 + cmpldi cr1, rN, 12 +- clrldi. rTMP, rTMP, 61 +- clrldi rBITDIF, rSTR1, 61 +- cmpldi cr5, rBITDIF, 0 ++ clrldi. r0, r0, 61 ++ clrldi r12, rSTR1, 61 ++ cmpldi cr5, r12, 0 + beq- cr6, L(zeroLength) +- dcbt 0,rSTR1 +- dcbt 0,rSTR2 +-/* If less than 8 bytes or not aligned, use the unalligned ++ dcbt 0, rSTR1 ++ dcbt 0, rSTR2 ++/* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) +- std rWORD8,-8(r1) +- cfi_offset(rWORD8,-8) +- std rWORD7,-16(r1) +- cfi_offset(rWORD7,-16) ++ std rWORD8, -8(r1) ++ cfi_offset(rWORD8, -8) ++ std rWORD7, -16(r1) ++ cfi_offset(rWORD7, -16) + bne L(unaligned) + /* At this point we know both strings have the same alignment and the +- compare length is at least 8 bytes. rBITDIF containes the low order ++ compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare +- of rBITDIF to 0. If rBITDIF == 0 then we are already double word +- aligned and can perform the DWaligned loop. +- ++ of r12 to 0. If r12 == 0 then we are already double word ++ aligned and can perform the DW aligned loop. ++ + Otherwise we know the two strings have the same alignment (but not +- yet DW). So we can force the string addresses to the next lower DW +- boundary and special case this first DW word using shift left to +- ellimiate bits preceeding the first byte. Since we want to join the +- normal (DWaligned) compare loop, starting at the second double word, ++ yet DW). So we force the string addresses to the next lower DW ++ boundary and special case this first DW using shift left to ++ eliminate bits preceding the first byte. Since we want to join the ++ normal (DW aligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop +- versioning for the first DW. This insures that the loop count is +- correct and the first DW (shifted) is in the expected resister pair. */ +- .align 4 ++ versioning for the first DW. This ensures that the loop count is ++ correct and the first DW (shifted) is in the expected register pair. */ ++ .align 4 + L(samealignment): + clrrdi rSTR1, rSTR1, 3 + clrrdi rSTR2, rSTR2, 3 + beq cr5, L(DWaligned) +- add rN, rN, rBITDIF +- sldi r11, rBITDIF, 3 +- srdi rTMP, rN, 5 /* Divide by 32 */ +- andi. rBITDIF, rN, 24 /* Get the DW remainder */ ++ add rN, rN, r12 ++ sldi rWORD6, r12, 3 ++ srdi r0, rN, 5 /* Divide by 32 */ ++ andi. r12, rN, 24 /* Get the DW remainder */ ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) +- cmpldi cr1, rBITDIF, 16 ++#endif ++ cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dPs4) +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + + /* Remainder is 8 */ +- .align 3 ++ .align 3 + L(dsP1): +- sld rWORD5, rWORD1, r11 +- sld rWORD6, rWORD2, r11 ++ sld rWORD5, rWORD1, rWORD6 ++ sld rWORD6, rWORD2, rWORD6 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) + /* Do something useful in this cycle since we have to branch anyway. */ ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +- cmpld cr0, rWORD1, rWORD2 ++#endif ++ cmpld cr7, rWORD1, rWORD2 + b L(dP1e) + /* Remainder is 16 */ +- .align 4 ++ .align 4 + L(dPs2): +- sld rWORD5, rWORD1, r11 +- sld rWORD6, rWORD2, r11 ++ sld rWORD5, rWORD1, rWORD6 ++ sld rWORD6, rWORD2, rWORD6 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) + /* Do something useful in this cycle since we have to branch anyway. */ ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD7, 8(rSTR1) + ld rWORD8, 8(rSTR2) ++#endif + cmpld cr5, rWORD7, rWORD8 + b L(dP2e) + /* Remainder is 24 */ +- .align 4 ++ .align 4 + L(dPs3): +- sld rWORD3, rWORD1, r11 +- sld rWORD4, rWORD2, r11 ++ sld rWORD3, rWORD1, rWORD6 ++ sld rWORD4, rWORD2, rWORD6 + cmpld cr1, rWORD3, rWORD4 + b L(dP3e) + /* Count is a multiple of 32, remainder is 0 */ +- .align 4 ++ .align 4 + L(dPs4): +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ +- sld rWORD1, rWORD1, r11 +- sld rWORD2, rWORD2, r11 +- cmpld cr0, rWORD1, rWORD2 ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ ++ sld rWORD1, rWORD1, rWORD6 ++ sld rWORD2, rWORD2, rWORD6 ++ cmpld cr7, rWORD1, rWORD2 + b L(dP4e) + + /* At this point we know both strings are double word aligned and the + compare length is at least 8 bytes. */ +- .align 4 ++ .align 4 + L(DWaligned): +- andi. rBITDIF, rN, 24 /* Get the DW remainder */ +- srdi rTMP, rN, 5 /* Divide by 32 */ +- cmpldi cr1, rBITDIF, 16 ++ andi. r12, rN, 24 /* Get the DW remainder */ ++ srdi r0, rN, 5 /* Divide by 32 */ ++ cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dP4) + bgt cr1, L(dP3) + beq cr1, L(dP2) +- ++ + /* Remainder is 8 */ +- .align 4 ++ .align 4 + L(dP1): +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early +- (8-15 byte compare), we want to use only volitile registers. This +- means we can avoid restoring non-volitile registers since we did not ++ (8-15 byte compare), we want to use only volatile registers. This ++ means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early +- exit path only cares about the condition code (cr5), not about which ++ exit path only cares about the condition code (cr5), not about which + register pair was used. */ ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD5, 0(rSTR1) + ld rWORD6, 0(rSTR2) ++#endif + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +- cmpld cr0, rWORD1, rWORD2 ++#endif ++ cmpld cr7, rWORD1, rWORD2 + L(dP1e): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) ++#endif + cmpld cr1, rWORD3, rWORD4 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) ++#endif + cmpld cr6, rWORD5, rWORD6 +- bne cr5, L(dLcr5) +- bne cr0, L(dLcr0) +- ++ bne cr5, L(dLcr5x) ++ bne cr7, L(dLcr7x) ++ ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ldu rWORD7, 32(rSTR1) + ldu rWORD8, 32(rSTR2) ++#endif + bne cr1, L(dLcr1) + cmpld cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) +- .align 3 ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) ++ .align 3 + L(dP1x): + sldi. r12, rN, 3 +- bne cr5, L(dLcr5) ++ bne cr5, L(dLcr5x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + li rRTN, 0 + blr +- ++ + /* Remainder is 16 */ +- .align 4 ++ .align 4 + L(dP2): +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD5, 0(rSTR1) + ld rWORD6, 0(rSTR2) ++#endif + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD7, 8(rSTR1) + ld rWORD8, 8(rSTR2) ++#endif + cmpld cr5, rWORD7, rWORD8 + L(dP2e): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD1, 16(rSTR1) + ld rWORD2, 16(rSTR2) +- cmpld cr0, rWORD1, rWORD2 ++#endif ++ cmpld cr7, rWORD1, rWORD2 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD3, 24(rSTR1) + ld rWORD4, 24(rSTR2) ++#endif + cmpld cr1, rWORD3, rWORD4 ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 ++#endif + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) + /* Again we are on a early exit path (16-23 byte compare), we want to +- only use volitile registers and avoid restoring non-volitile ++ only use volatile registers and avoid restoring non-volatile + registers. */ +- .align 4 ++ .align 4 + L(dP2x): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD3, 8(rSTR1) + ld rWORD4, 8(rSTR2) +- cmpld cr5, rWORD3, rWORD4 ++#endif ++ cmpld cr1, rWORD3, rWORD4 + sldi. r12, rN, 3 +- bne cr6, L(dLcr6) ++ bne cr6, L(dLcr6x) ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +- bne cr5, L(dLcr5) ++#endif ++ bne cr1, L(dLcr1x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + li rRTN, 0 + blr +- ++ + /* Remainder is 24 */ +- .align 4 ++ .align 4 + L(dP3): +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD3, 0(rSTR1) + ld rWORD4, 0(rSTR2) ++#endif + cmpld cr1, rWORD3, rWORD4 + L(dP3e): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD5, 8(rSTR1) + ld rWORD6, 8(rSTR2) ++#endif + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD7, 16(rSTR1) + ld rWORD8, 16(rSTR2) ++#endif + cmpld cr5, rWORD7, rWORD8 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD1, 24(rSTR1) + ld rWORD2, 24(rSTR2) +- cmpld cr0, rWORD1, rWORD2 ++#endif ++ cmpld cr7, rWORD1, rWORD2 ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 ++#endif + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) + /* Again we are on a early exit path (24-31 byte compare), we want to +- only use volitile registers and avoid restoring non-volitile ++ only use volatile registers and avoid restoring non-volatile + registers. */ +- .align 4 ++ .align 4 + L(dP3x): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD1, 16(rSTR1) + ld rWORD2, 16(rSTR2) +- cmpld cr5, rWORD1, rWORD2 ++#endif ++ cmpld cr7, rWORD1, rWORD2 + sldi. r12, rN, 3 +- bne cr1, L(dLcr1) ++ bne cr1, L(dLcr1x) ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +- bne cr6, L(dLcr6) ++#endif ++ bne cr6, L(dLcr6x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ +- bne cr5, L(dLcr5) ++ bne cr7, L(dLcr7x) + bne L(d00) + li rRTN, 0 + blr +- ++ + /* Count is a multiple of 32, remainder is 0 */ +- .align 4 ++ .align 4 + L(dP4): +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) +- cmpld cr0, rWORD1, rWORD2 ++#endif ++ cmpld cr7, rWORD1, rWORD2 + L(dP4e): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD3, 8(rSTR1) + ld rWORD4, 8(rSTR2) ++#endif + cmpld cr1, rWORD3, rWORD4 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD5, 16(rSTR1) + ld rWORD6, 16(rSTR2) ++#endif + cmpld cr6, rWORD5, rWORD6 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ldu rWORD7, 24(rSTR1) + ldu rWORD8, 24(rSTR2) ++#endif + cmpld cr5, rWORD7, rWORD8 +- bne cr0, L(dLcr0) ++ bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ + /* This is the primary loop */ +- .align 4 ++ .align 4 + L(dLoop): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) ++#endif + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + L(dLoop1): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) ++#endif + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + L(dLoop2): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) ++#endif + cmpld cr5, rWORD7, rWORD8 +- bne cr0, L(dLcr0) ++ bne cr7, L(dLcr7) + L(dLoop3): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ldu rWORD7, 32(rSTR1) + ldu rWORD8, 32(rSTR2) ++#endif + bne- cr1, L(dLcr1) +- cmpld cr0, rWORD1, rWORD2 +- bdnz+ L(dLoop) +- ++ cmpld cr7, rWORD1, rWORD2 ++ bdnz+ L(dLoop) ++ + L(dL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) +@@ -325,84 +510,98 @@ + bne cr5, L(dLcr5) + cmpld cr5, rWORD7, rWORD8 + L(d44): +- bne cr0, L(dLcr0) ++ bne cr7, L(dLcr7) + L(d34): + bne cr1, L(dLcr1) + L(d24): + bne cr6, L(dLcr6) + L(d14): + sldi. r12, rN, 3 +- bne cr5, L(dLcr5) ++ bne cr5, L(dLcr5) + L(d04): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + beq L(zeroLength) + /* At this point we have a remainder of 1 to 7 bytes to compare. Since + we are aligned it is safe to load the whole double word, and use +- shift right double to elliminate bits beyond the compare length. */ ++ shift right double to eliminate bits beyond the compare length. */ + L(d00): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD1, 8(rSTR1) +- ld rWORD2, 8(rSTR2) ++ ld rWORD2, 8(rSTR2) ++#endif + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN +- cmpld cr5, rWORD1, rWORD2 +- bne cr5, L(dLcr5x) ++ cmpld cr7, rWORD1, rWORD2 ++ bne cr7, L(dLcr7x) + li rRTN, 0 + blr +- .align 4 +-L(dLcr0): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) ++ ++ .align 4 ++L(dLcr7): ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) ++L(dLcr7x): + li rRTN, 1 +- bgtlr cr0 ++ bgtlr cr7 + li rRTN, -1 + blr +- .align 4 ++ .align 4 + L(dLcr1): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) ++L(dLcr1x): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr +- .align 4 ++ .align 4 + L(dLcr6): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) ++L(dLcr6x): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr +- .align 4 ++ .align 4 + L(dLcr5): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) + L(dLcr5x): + li rRTN, 1 + bgtlr cr5 + li rRTN, -1 + blr +- +- .align 4 ++ ++ .align 4 + L(bytealigned): +- mtctr rN /* Power4 wants mtctr 1st in dispatch group */ ++ mtctr rN /* Power4 wants mtctr 1st in dispatch group */ ++#if 0 ++/* Huh? We've already branched on cr6! */ + beq- cr6, L(zeroLength) ++#endif + + /* We need to prime this loop. This loop is swing modulo scheduled +- to avoid pipe delays. The dependent instruction latencies (load to ++ to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively +- the first iteration of the loop only serves to load operands and +- branches based on compares are delayed until the next loop. ++ the first iteration of the loop only serves to load operands and ++ branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ +- ++ + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz- L(b11) +- cmpld cr0, rWORD1, rWORD2 ++ cmpld cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz- L(b12) +@@ -410,20 +609,20 @@ + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz- L(b13) +- .align 4 ++ .align 4 + L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) +- bne- cr0, L(bLcr0) ++ bne- cr7, L(bLcr7) + + cmpld cr6, rWORD5, rWORD6 + bdz- L(b3i) +- ++ + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + bne- cr1, L(bLcr1) + +- cmpld cr0, rWORD1, rWORD2 ++ cmpld cr7, rWORD1, rWORD2 + bdz- L(b2i) + + lbzu rWORD5, 1(rSTR1) +@@ -432,31 +631,31 @@ + + cmpld cr1, rWORD3, rWORD4 + bdnz+ L(bLoop) +- ++ + /* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to +- prevent these speculative loads from causing a segfault. In this ++ prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ + L(b1i): +- bne- cr0, L(bLcr0) ++ bne- cr7, L(bLcr7) + bne- cr1, L(bLcr1) + b L(bx56) +- .align 4 ++ .align 4 + L(b2i): + bne- cr6, L(bLcr6) +- bne- cr0, L(bLcr0) ++ bne- cr7, L(bLcr7) + b L(bx34) +- .align 4 ++ .align 4 + L(b3i): + bne- cr1, L(bLcr1) + bne- cr6, L(bLcr6) + b L(bx12) +- .align 4 +-L(bLcr0): ++ .align 4 ++L(bLcr7): + li rRTN, 1 +- bgtlr cr0 ++ bgtlr cr7 + li rRTN, -1 + blr + L(bLcr1): +@@ -471,116 +670,121 @@ + blr + + L(b13): +- bne- cr0, L(bx12) ++ bne- cr7, L(bx12) + bne- cr1, L(bx34) + L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop + L(b12): +- bne- cr0, L(bx12) +-L(bx34): ++ bne- cr7, L(bx12) ++L(bx34): + sub rRTN, rWORD3, rWORD4 + blr + L(b11): + L(bx12): + sub rRTN, rWORD1, rWORD2 + blr +- .align 4 +-L(zeroLengthReturn): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) ++ .align 4 + L(zeroLength): + li rRTN, 0 + blr + +- .align 4 ++ .align 4 + /* At this point we know the strings have different alignment and the +- compare length is at least 8 bytes. rBITDIF containes the low order ++ compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare +- of rBITDIF to 0. If rBITDIF == 0 then rStr1 is double word ++ of r12 to 0. If r12 == 0 then rStr1 is double word + aligned and can perform the DWunaligned loop. +- +- Otherwise we know that rSTR1 is not aready DW aligned yet. ++ ++ Otherwise we know that rSTR1 is not already DW aligned yet. + So we can force the string addresses to the next lower DW +- boundary and special case this first DW word using shift left to +- ellimiate bits preceeding the first byte. Since we want to join the ++ boundary and special case this first DW using shift left to ++ eliminate bits preceding the first byte. Since we want to join the + normal (DWaligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop +- versioning for the first DW. This insures that the loop count is ++ versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected resister pair. */ +-#define rSHL r29 /* Unaligned shift left count. */ +-#define rSHR r28 /* Unaligned shift right count. */ +-#define rB r27 /* Left rotation temp for rWORD2. */ +-#define rD r26 /* Left rotation temp for rWORD4. */ +-#define rF r25 /* Left rotation temp for rWORD6. */ +-#define rH r24 /* Left rotation temp for rWORD8. */ +-#define rA r0 /* Right rotation temp for rWORD2. */ +-#define rC r12 /* Right rotation temp for rWORD4. */ +-#define rE r0 /* Right rotation temp for rWORD6. */ +-#define rG r12 /* Right rotation temp for rWORD8. */ ++#define rSHL r29 /* Unaligned shift left count. */ ++#define rSHR r28 /* Unaligned shift right count. */ ++#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ ++#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ ++#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ ++#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ + L(unaligned): +- std r29,-24(r1) +- cfi_offset(r29,-24) ++ std rSHL, -24(r1) ++ cfi_offset(rSHL, -24) + clrldi rSHL, rSTR2, 61 + beq- cr6, L(duzeroLength) +- std r28,-32(r1) +- cfi_offset(r28,-32) ++ std rSHR, -32(r1) ++ cfi_offset(rSHR, -32) + beq cr5, L(DWunaligned) +- std r27,-40(r1) +- cfi_offset(r27,-40) +-/* Adjust the logical start of rSTR2 ro compensate for the extra bits ++ std rWORD8_SHIFT, -40(r1) ++ cfi_offset(rWORD8_SHIFT, -40) ++/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 DW. */ +- sub r27, rSTR2, rBITDIF ++ sub rWORD8_SHIFT, rSTR2, r12 + /* But do not attempt to address the DW before that DW that contains + the actual start of rSTR2. */ + clrrdi rSTR2, rSTR2, 3 +- std r26,-48(r1) +- cfi_offset(r26,-48) +-/* Compute the leaft/right shift counts for the unalign rSTR2, +- compensating for the logical (DW aligned) start of rSTR1. */ +- clrldi rSHL, r27, 61 +- clrrdi rSTR1, rSTR1, 3 +- std r25,-56(r1) +- cfi_offset(r25,-56) ++ std rWORD2_SHIFT, -48(r1) ++ cfi_offset(rWORD2_SHIFT, -48) ++/* Compute the left/right shift counts for the unaligned rSTR2, ++ compensating for the logical (DW aligned) start of rSTR1. */ ++ clrldi rSHL, rWORD8_SHIFT, 61 ++ clrrdi rSTR1, rSTR1, 3 ++ std rWORD4_SHIFT, -56(r1) ++ cfi_offset(rWORD4_SHIFT, -56) + sldi rSHL, rSHL, 3 +- cmpld cr5, r27, rSTR2 +- add rN, rN, rBITDIF +- sldi r11, rBITDIF, 3 +- std r24,-64(r1) +- cfi_offset(r24,-64) ++ cmpld cr5, rWORD8_SHIFT, rSTR2 ++ add rN, rN, r12 ++ sldi rWORD6, r12, 3 ++ std rWORD6_SHIFT, -64(r1) ++ cfi_offset(rWORD6_SHIFT, -64) + subfic rSHR, rSHL, 64 +- srdi rTMP, rN, 5 /* Divide by 32 */ +- andi. rBITDIF, rN, 24 /* Get the DW remainder */ ++ srdi r0, rN, 5 /* Divide by 32 */ ++ andi. r12, rN, 24 /* Get the DW remainder */ + /* We normally need to load 2 DWs to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a DW where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD8, 0(rSTR2) +- la rSTR2, 8(rSTR2) ++ addi rSTR2, rSTR2, 8 ++#endif + sld rWORD8, rWORD8, rSHL + + L(dus0): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) +- cmpldi cr1, rBITDIF, 16 ++#endif ++ cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 +- srd rG, rWORD2, rSHR ++ srd r12, rWORD2, rSHR + clrldi rN, rN, 61 + beq L(duPs4) +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ +- or rWORD8, rG, rWORD8 ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ ++ or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + + /* Remainder is 8 */ +- .align 4 ++ .align 4 + L(dusP1): +- sld rB, rWORD2, rSHL +- sld rWORD7, rWORD1, r11 +- sld rWORD8, rWORD8, r11 ++ sld rWORD8_SHIFT, rWORD2, rSHL ++ sld rWORD7, rWORD1, rWORD6 ++ sld rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) + /* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on +@@ -590,95 +794,133 @@ + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) +- li rA, 0 ++ li r0, 0 + ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD2, 8(rSTR2) +- srd rA, rWORD2, rSHR ++#endif ++ srd r0, rWORD2, rSHR + b L(dutrim) + /* Remainder is 16 */ +- .align 4 ++ .align 4 + L(duPs2): +- sld rH, rWORD2, rSHL +- sld rWORD5, rWORD1, r11 +- sld rWORD6, rWORD8, r11 ++ sld rWORD6_SHIFT, rWORD2, rSHL ++ sld rWORD5, rWORD1, rWORD6 ++ sld rWORD6, rWORD8, rWORD6 + b L(duP2e) + /* Remainder is 24 */ +- .align 4 ++ .align 4 + L(duPs3): +- sld rF, rWORD2, rSHL +- sld rWORD3, rWORD1, r11 +- sld rWORD4, rWORD8, r11 ++ sld rWORD4_SHIFT, rWORD2, rSHL ++ sld rWORD3, rWORD1, rWORD6 ++ sld rWORD4, rWORD8, rWORD6 + b L(duP3e) + /* Count is a multiple of 32, remainder is 0 */ +- .align 4 ++ .align 4 + L(duPs4): +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ +- or rWORD8, rG, rWORD8 +- sld rD, rWORD2, rSHL +- sld rWORD1, rWORD1, r11 +- sld rWORD2, rWORD8, r11 ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ ++ or rWORD8, r12, rWORD8 ++ sld rWORD2_SHIFT, rWORD2, rSHL ++ sld rWORD1, rWORD1, rWORD6 ++ sld rWORD2, rWORD8, rWORD6 + b L(duP4e) + + /* At this point we know rSTR1 is double word aligned and the + compare length is at least 8 bytes. */ +- .align 4 ++ .align 4 + L(DWunaligned): +- std r27,-40(r1) +- cfi_offset(r27,-40) ++ std rWORD8_SHIFT, -40(r1) ++ cfi_offset(rWORD8_SHIFT, -40) + clrrdi rSTR2, rSTR2, 3 +- std r26,-48(r1) +- cfi_offset(r26,-48) +- srdi rTMP, rN, 5 /* Divide by 32 */ +- std r25,-56(r1) +- cfi_offset(r25,-56) +- andi. rBITDIF, rN, 24 /* Get the DW remainder */ +- std r24,-64(r1) +- cfi_offset(r24,-64) ++ std rWORD2_SHIFT, -48(r1) ++ cfi_offset(rWORD2_SHIFT, -48) ++ srdi r0, rN, 5 /* Divide by 32 */ ++ std rWORD4_SHIFT, -56(r1) ++ cfi_offset(rWORD4_SHIFT, -56) ++ andi. r12, rN, 24 /* Get the DW remainder */ ++ std rWORD6_SHIFT, -64(r1) ++ cfi_offset(rWORD6_SHIFT, -64) + sldi rSHL, rSHL, 3 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD6, 0(rSTR2) + ldu rWORD8, 8(rSTR2) +- cmpldi cr1, rBITDIF, 16 ++#endif ++ cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + subfic rSHR, rSHL, 64 +- sld rH, rWORD6, rSHL ++ sld rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(duP3) + beq cr1, L(duP2) +- ++ + /* Remainder is 8 */ +- .align 4 ++ .align 4 + L(duP1): +- srd rG, rWORD8, rSHR ++ srd r12, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ addi rSTR1, rSTR1, 8 ++#else + ld rWORD7, 0(rSTR1) +- sld rB, rWORD8, rSHL +- or rWORD8, rG, rH ++#endif ++ sld rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) + L(duP1e): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) ++#endif + cmpld cr5, rWORD7, rWORD8 +- srd rA, rWORD2, rSHR +- sld rD, rWORD2, rSHL +- or rWORD2, rA, rB ++ srd r0, rWORD2, rSHR ++ sld rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) +- cmpld cr0, rWORD1, rWORD2 +- srd rC, rWORD4, rSHR +- sld rF, rWORD4, rSHL ++#endif ++ cmpld cr7, rWORD1, rWORD2 ++ srd r12, rWORD4, rSHR ++ sld rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) +- or rWORD4, rC, rD ++ or rWORD4, r12, rWORD2_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) ++#endif + cmpld cr1, rWORD3, rWORD4 +- srd rE, rWORD6, rSHR +- sld rH, rWORD6, rSHL +- bne cr0, L(duLcr0) +- or rWORD6, rE, rF ++ srd r0, rWORD6, rSHR ++ sld rWORD6_SHIFT, rWORD6, rSHL ++ bne cr7, L(duLcr7) ++ or rWORD6, r0, rWORD4_SHIFT + cmpld cr6, rWORD5, rWORD6 +- b L(duLoop3) +- .align 4 ++ b L(duLoop3) ++ .align 4 + /* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +@@ -688,186 +930,321 @@ + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) +- li rA, 0 ++ li r0, 0 + ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD2, 8(rSTR2) +- srd rA, rWORD2, rSHR ++#endif ++ srd r0, rWORD2, rSHR + b L(dutrim) + /* Remainder is 16 */ +- .align 4 ++ .align 4 + L(duP2): +- srd rE, rWORD8, rSHR ++ srd r0, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ addi rSTR1, rSTR1, 8 ++#else + ld rWORD5, 0(rSTR1) +- or rWORD6, rE, rH +- sld rH, rWORD8, rSHL ++#endif ++ or rWORD6, r0, rWORD6_SHIFT ++ sld rWORD6_SHIFT, rWORD8, rSHL + L(duP2e): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD7, 8(rSTR1) + ld rWORD8, 8(rSTR2) ++#endif + cmpld cr6, rWORD5, rWORD6 +- srd rG, rWORD8, rSHR +- sld rB, rWORD8, rSHL +- or rWORD8, rG, rH ++ srd r12, rWORD8, rSHR ++ sld rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD1, 16(rSTR1) + ld rWORD2, 16(rSTR2) ++#endif + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) +- srd rA, rWORD2, rSHR +- sld rD, rWORD2, rSHL +- or rWORD2, rA, rB ++ srd r0, rWORD2, rSHR ++ sld rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD3, 24(rSTR1) + ld rWORD4, 24(rSTR2) +- cmpld cr0, rWORD1, rWORD2 ++#endif ++ cmpld cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) +- srd rC, rWORD4, rSHR +- sld rF, rWORD4, rSHL +- or rWORD4, rC, rD ++ srd r12, rWORD4, rSHR ++ sld rWORD4_SHIFT, rWORD4, rSHL ++ or rWORD4, r12, rWORD2_SHIFT ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 ++#endif + cmpld cr1, rWORD3, rWORD4 + b L(duLoop2) +- .align 4 ++ .align 4 + L(duP2x): + cmpld cr5, rWORD7, rWORD8 ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 ++#endif + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) +- li rA, 0 ++ li r0, 0 + ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD2, 8(rSTR2) +- srd rA, rWORD2, rSHR ++#endif ++ srd r0, rWORD2, rSHR + b L(dutrim) +- ++ + /* Remainder is 24 */ +- .align 4 ++ .align 4 + L(duP3): +- srd rC, rWORD8, rSHR ++ srd r12, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ addi rSTR1, rSTR1, 8 ++#else + ld rWORD3, 0(rSTR1) +- sld rF, rWORD8, rSHL +- or rWORD4, rC, rH ++#endif ++ sld rWORD4_SHIFT, rWORD8, rSHL ++ or rWORD4, r12, rWORD6_SHIFT + L(duP3e): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD5, 8(rSTR1) + ld rWORD6, 8(rSTR2) ++#endif + cmpld cr1, rWORD3, rWORD4 +- srd rE, rWORD6, rSHR +- sld rH, rWORD6, rSHL +- or rWORD6, rE, rF ++ srd r0, rWORD6, rSHR ++ sld rWORD6_SHIFT, rWORD6, rSHL ++ or rWORD6, r0, rWORD4_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD7, 16(rSTR1) + ld rWORD8, 16(rSTR2) ++#endif + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) +- srd rG, rWORD8, rSHR +- sld rB, rWORD8, rSHL +- or rWORD8, rG, rH ++ srd r12, rWORD8, rSHR ++ sld rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD1, 24(rSTR1) + ld rWORD2, 24(rSTR2) ++#endif + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) +- srd rA, rWORD2, rSHR +- sld rD, rWORD2, rSHL +- or rWORD2, rA, rB ++ srd r0, rWORD2, rSHR ++ sld rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +- cmpld cr0, rWORD1, rWORD2 ++#endif ++ cmpld cr7, rWORD1, rWORD2 + b L(duLoop1) +- .align 4 ++ .align 4 + L(duP3x): ++#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 ++#endif ++#if 0 ++/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) ++#endif + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) +- li rA, 0 ++ li r0, 0 + ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD2, 8(rSTR2) +- srd rA, rWORD2, rSHR ++#endif ++ srd r0, rWORD2, rSHR + b L(dutrim) +- ++ + /* Count is a multiple of 32, remainder is 0 */ +- .align 4 ++ .align 4 + L(duP4): +- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ +- srd rA, rWORD8, rSHR ++ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ ++ srd r0, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ addi rSTR1, rSTR1, 8 ++#else + ld rWORD1, 0(rSTR1) +- sld rD, rWORD8, rSHL +- or rWORD2, rA, rH ++#endif ++ sld rWORD2_SHIFT, rWORD8, rSHL ++ or rWORD2, r0, rWORD6_SHIFT + L(duP4e): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD3, 8(rSTR1) + ld rWORD4, 8(rSTR2) +- cmpld cr0, rWORD1, rWORD2 +- srd rC, rWORD4, rSHR +- sld rF, rWORD4, rSHL +- or rWORD4, rC, rD ++#endif ++ cmpld cr7, rWORD1, rWORD2 ++ srd r12, rWORD4, rSHR ++ sld rWORD4_SHIFT, rWORD4, rSHL ++ or rWORD4, r12, rWORD2_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD5, 16(rSTR1) + ld rWORD6, 16(rSTR2) ++#endif + cmpld cr1, rWORD3, rWORD4 +- bne cr0, L(duLcr0) +- srd rE, rWORD6, rSHR +- sld rH, rWORD6, rSHL +- or rWORD6, rE, rF ++ bne cr7, L(duLcr7) ++ srd r0, rWORD6, rSHR ++ sld rWORD6_SHIFT, rWORD6, rSHL ++ or rWORD6, r0, rWORD4_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ldu rWORD7, 24(rSTR1) + ldu rWORD8, 24(rSTR2) ++#endif + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) +- srd rG, rWORD8, rSHR +- sld rB, rWORD8, rSHL +- or rWORD8, rG, rH ++ srd r12, rWORD8, rSHR ++ sld rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT + cmpld cr5, rWORD7, rWORD8 + bdz- L(du24) /* Adjust CTR as we start with +4 */ + /* This is the primary loop */ +- .align 4 ++ .align 4 + L(duLoop): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) ++#endif + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) +- srd rA, rWORD2, rSHR +- sld rD, rWORD2, rSHL +- or rWORD2, rA, rB ++ srd r0, rWORD2, rSHR ++ sld rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT + L(duLoop1): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) ++#endif + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) +- srd rC, rWORD4, rSHR +- sld rF, rWORD4, rSHL +- or rWORD4, rC, rD ++ srd r12, rWORD4, rSHR ++ sld rWORD4_SHIFT, rWORD4, rSHL ++ or rWORD4, r12, rWORD2_SHIFT + L(duLoop2): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) ++#endif + cmpld cr5, rWORD7, rWORD8 +- bne cr0, L(duLcr0) +- srd rE, rWORD6, rSHR +- sld rH, rWORD6, rSHL +- or rWORD6, rE, rF ++ bne cr7, L(duLcr7) ++ srd r0, rWORD6, rSHR ++ sld rWORD6_SHIFT, rWORD6, rSHL ++ or rWORD6, r0, rWORD4_SHIFT + L(duLoop3): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else + ldu rWORD7, 32(rSTR1) + ldu rWORD8, 32(rSTR2) +- cmpld cr0, rWORD1, rWORD2 ++#endif ++ cmpld cr7, rWORD1, rWORD2 + bne- cr1, L(duLcr1) +- srd rG, rWORD8, rSHR +- sld rB, rWORD8, rSHL +- or rWORD8, rG, rH +- bdnz+ L(duLoop) +- ++ srd r12, rWORD8, rSHR ++ sld rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT ++ bdnz+ L(duLoop) ++ + L(duL4): ++#if 0 ++/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) ++#endif + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmpld cr5, rWORD7, rWORD8 + L(du44): +- bne cr0, L(duLcr0) ++ bne cr7, L(duLcr7) + L(du34): + bne cr1, L(duLcr1) + L(du24): +@@ -876,106 +1253,113 @@ + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + /* At this point we have a remainder of 1 to 7 bytes to compare. We use +- shift right double to elliminate bits beyond the compare length. +- This allows the use of double word subtract to compute the final +- result. ++ shift right double to eliminate bits beyond the compare length. + +- However it may not be safe to load rWORD2 which may be beyond the ++ However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in +- rB). */ ++ rWORD8_SHIFT). */ + cmpld cr7, rN, rSHR + beq L(duZeroReturn) +- li rA, 0 ++ li r0, 0 + ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++#else + ld rWORD2, 8(rSTR2) +- srd rA, rWORD2, rSHR +- .align 4 ++#endif ++ srd r0, rWORD2, rSHR ++ .align 4 + L(dutrim): ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++#else + ld rWORD1, 8(rSTR1) +- ld rWORD8,-8(r1) +- subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */ +- or rWORD2, rA, rB +- ld rWORD7,-16(r1) +- ld r29,-24(r1) ++#endif ++ ld rWORD8, -8(r1) ++ subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */ ++ or rWORD2, r0, rWORD8_SHIFT ++ ld rWORD7, -16(r1) ++ ld rSHL, -24(r1) + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN +- ld r28,-32(r1) +- ld r27,-40(r1) ++ ld rSHR, -32(r1) ++ ld rWORD8_SHIFT, -40(r1) + li rRTN, 0 +- cmpld cr0, rWORD1, rWORD2 +- ld r26,-48(r1) +- ld r25,-56(r1) +- beq cr0, L(dureturn24) +- li rRTN, 1 +- ld r24,-64(r1) +- bgtlr cr0 +- li rRTN, -1 +- blr +- .align 4 +-L(duLcr0): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) +- li rRTN, 1 +- bgt cr0, L(dureturn29) +- ld r29,-24(r1) +- ld r28,-32(r1) ++ cmpld cr7, rWORD1, rWORD2 ++ ld rWORD2_SHIFT, -48(r1) ++ ld rWORD4_SHIFT, -56(r1) ++ beq cr7, L(dureturn24) ++ li rRTN, 1 ++ ld rWORD6_SHIFT, -64(r1) ++ bgtlr cr7 ++ li rRTN, -1 ++ blr ++ .align 4 ++L(duLcr7): ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) ++ li rRTN, 1 ++ bgt cr7, L(dureturn29) ++ ld rSHL, -24(r1) ++ ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) +- .align 4 ++ .align 4 + L(duLcr1): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) + li rRTN, 1 +- bgt cr1, L(dureturn29) +- ld r29,-24(r1) +- ld r28,-32(r1) ++ bgt cr1, L(dureturn29) ++ ld rSHL, -24(r1) ++ ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) +- .align 4 ++ .align 4 + L(duLcr6): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) + li rRTN, 1 +- bgt cr6, L(dureturn29) +- ld r29,-24(r1) +- ld r28,-32(r1) ++ bgt cr6, L(dureturn29) ++ ld rSHL, -24(r1) ++ ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) +- .align 4 ++ .align 4 + L(duLcr5): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) + li rRTN, 1 +- bgt cr5, L(dureturn29) +- ld r29,-24(r1) +- ld r28,-32(r1) ++ bgt cr5, L(dureturn29) ++ ld rSHL, -24(r1) ++ ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) + .align 3 + L(duZeroReturn): +- li rRTN,0 ++ li rRTN, 0 + .align 4 + L(dureturn): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) +-L(dureturn29): +- ld r29,-24(r1) +- ld r28,-32(r1) +-L(dureturn27): +- ld r27,-40(r1) +-L(dureturn26): +- ld r26,-48(r1) +-L(dureturn25): +- ld r25,-56(r1) ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) ++L(dureturn29): ++ ld rSHL, -24(r1) ++ ld rSHR, -32(r1) ++L(dureturn27): ++ ld rWORD8_SHIFT, -40(r1) ++L(dureturn26): ++ ld rWORD2_SHIFT, -48(r1) ++L(dureturn25): ++ ld rWORD4_SHIFT, -56(r1) + L(dureturn24): +- ld r24,-64(r1) ++ ld rWORD6_SHIFT, -64(r1) + blr + L(duzeroLength): +- li rRTN,0 ++ li rRTN, 0 + blr + +-END (BP_SYM (memcmp)) ++END (memcmp) + libc_hidden_builtin_def (memcmp) + weak_alias (memcmp, bcmp) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memcmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memcmp.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memcmp.S 2014-05-28 19:22:37.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memcmp.S 2014-05-29 09:35:08.000000000 -0500 +@@ -1,5 +1,5 @@ + /* Optimized memcmp implementation for POWER7/PowerPC64. +- Copyright (C) 2010, 2011 Free Software Foundation, Inc. ++ Copyright (C) 2010-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -17,379 +17,576 @@ + . */ + + #include +-#include +-#include + + /* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + + .machine power7 +-EALIGN (BP_SYM(memcmp),4,0) ++EALIGN (memcmp, 4, 0) + CALL_MCOUNT 3 + +-#define rTMP r0 + #define rRTN r3 + #define rSTR1 r3 /* first string arg */ + #define rSTR2 r4 /* second string arg */ + #define rN r5 /* max string length */ +-/* Note: The Bounded pointer support in this code is broken. This code +- was inherited from PPC32 and that support was never completed. +- Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */ + #define rWORD1 r6 /* current word in s1 */ + #define rWORD2 r7 /* current word in s2 */ + #define rWORD3 r8 /* next word in s1 */ + #define rWORD4 r9 /* next word in s2 */ + #define rWORD5 r10 /* next word in s1 */ + #define rWORD6 r11 /* next word in s2 */ +-#define rBITDIF r12 /* bits that differ in s1 & s2 words */ + #define rWORD7 r30 /* next word in s1 */ + #define rWORD8 r31 /* next word in s2 */ + +- xor rTMP,rSTR2,rSTR1 +- cmpldi cr6,rN,0 +- cmpldi cr1,rN,12 +- clrldi. rTMP,rTMP,61 +- clrldi rBITDIF,rSTR1,61 +- cmpldi cr5,rBITDIF,0 +- beq- cr6,L(zeroLength) +- dcbt 0,rSTR1 +- dcbt 0,rSTR2 +-/* If less than 8 bytes or not aligned, use the unalligned ++ xor r0, rSTR2, rSTR1 ++ cmpldi cr6, rN, 0 ++ cmpldi cr1, rN, 12 ++ clrldi. r0, r0, 61 ++ clrldi r12, rSTR1, 61 ++ cmpldi cr5, r12, 0 ++ beq- cr6, L(zeroLength) ++ dcbt 0, rSTR1 ++ dcbt 0, rSTR2 ++/* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ +- blt cr1,L(bytealigned) +- std rWORD8,-8(r1) +- cfi_offset(rWORD8,-8) +- std rWORD7,-16(r1) +- cfi_offset(rWORD7,-16) ++ blt cr1, L(bytealigned) ++ std rWORD8, -8(r1) ++ cfi_offset(rWORD8, -8) ++ std rWORD7, -16(r1) ++ cfi_offset(rWORD7, -16) + bne L(unaligned) + /* At this point we know both strings have the same alignment and the +- compare length is at least 8 bytes. rBITDIF containes the low order ++ compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare +- of rBITDIF to 0. If rBITDIF == 0 then we are already double word +- aligned and can perform the DWaligned loop. ++ of r12 to 0. If r12 == 0 then we are already double word ++ aligned and can perform the DW aligned loop. + + Otherwise we know the two strings have the same alignment (but not +- yet DW). So we can force the string addresses to the next lower DW +- boundary and special case this first DW word using shift left to +- ellimiate bits preceeding the first byte. Since we want to join the +- normal (DWaligned) compare loop, starting at the second double word, ++ yet DW). So we force the string addresses to the next lower DW ++ boundary and special case this first DW using shift left to ++ eliminate bits preceding the first byte. Since we want to join the ++ normal (DW aligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop +- versioning for the first DW. This insures that the loop count is +- correct and the first DW (shifted) is in the expected resister pair. */ ++ versioning for the first DW. This ensures that the loop count is ++ correct and the first DW (shifted) is in the expected register pair. */ + .align 4 + L(samealignment): +- clrrdi rSTR1,rSTR1,3 +- clrrdi rSTR2,rSTR2,3 +- beq cr5,L(DWaligned) +- add rN,rN,rBITDIF +- sldi r11,rBITDIF,3 +- srdi rTMP,rN,5 /* Divide by 32 */ +- andi. rBITDIF,rN,24 /* Get the DW remainder */ +- ld rWORD1,0(rSTR1) +- ld rWORD2,0(rSTR2) +- cmpldi cr1,rBITDIF,16 +- cmpldi cr7,rN,32 +- clrldi rN,rN,61 ++ clrrdi rSTR1, rSTR1, 3 ++ clrrdi rSTR2, rSTR2, 3 ++ beq cr5, L(DWaligned) ++ add rN, rN, r12 ++ sldi rWORD6, r12, 3 ++ srdi r0, rN, 5 /* Divide by 32 */ ++ andi. r12, rN, 24 /* Get the DW remainder */ ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD1, 0(rSTR1) ++ ld rWORD2, 0(rSTR2) ++#endif ++ cmpldi cr1, r12, 16 ++ cmpldi cr7, rN, 32 ++ clrldi rN, rN, 61 + beq L(dPs4) +- mtctr rTMP +- bgt cr1,L(dPs3) +- beq cr1,L(dPs2) ++ mtctr r0 ++ bgt cr1, L(dPs3) ++ beq cr1, L(dPs2) + + /* Remainder is 8 */ + .align 3 + L(dsP1): +- sld rWORD5,rWORD1,r11 +- sld rWORD6,rWORD2,r11 +- cmpld cr5,rWORD5,rWORD6 +- blt cr7,L(dP1x) ++ sld rWORD5, rWORD1, rWORD6 ++ sld rWORD6, rWORD2, rWORD6 ++ cmpld cr5, rWORD5, rWORD6 ++ blt cr7, L(dP1x) + /* Do something useful in this cycle since we have to branch anyway. */ +- ld rWORD1,8(rSTR1) +- ld rWORD2,8(rSTR2) +- cmpld cr0,rWORD1,rWORD2 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD1, 8(rSTR1) ++ ld rWORD2, 8(rSTR2) ++#endif ++ cmpld cr7, rWORD1, rWORD2 + b L(dP1e) + /* Remainder is 16 */ + .align 4 + L(dPs2): +- sld rWORD5,rWORD1,r11 +- sld rWORD6,rWORD2,r11 +- cmpld cr6,rWORD5,rWORD6 +- blt cr7,L(dP2x) ++ sld rWORD5, rWORD1, rWORD6 ++ sld rWORD6, rWORD2, rWORD6 ++ cmpld cr6, rWORD5, rWORD6 ++ blt cr7, L(dP2x) + /* Do something useful in this cycle since we have to branch anyway. */ +- ld rWORD7,8(rSTR1) +- ld rWORD8,8(rSTR2) +- cmpld cr5,rWORD7,rWORD8 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD7, 8(rSTR1) ++ ld rWORD8, 8(rSTR2) ++#endif ++ cmpld cr5, rWORD7, rWORD8 + b L(dP2e) + /* Remainder is 24 */ + .align 4 + L(dPs3): +- sld rWORD3,rWORD1,r11 +- sld rWORD4,rWORD2,r11 +- cmpld cr1,rWORD3,rWORD4 ++ sld rWORD3, rWORD1, rWORD6 ++ sld rWORD4, rWORD2, rWORD6 ++ cmpld cr1, rWORD3, rWORD4 + b L(dP3e) + /* Count is a multiple of 32, remainder is 0 */ + .align 4 + L(dPs4): +- mtctr rTMP +- sld rWORD1,rWORD1,r11 +- sld rWORD2,rWORD2,r11 +- cmpld cr0,rWORD1,rWORD2 ++ mtctr r0 ++ sld rWORD1, rWORD1, rWORD6 ++ sld rWORD2, rWORD2, rWORD6 ++ cmpld cr7, rWORD1, rWORD2 + b L(dP4e) + + /* At this point we know both strings are double word aligned and the + compare length is at least 8 bytes. */ + .align 4 + L(DWaligned): +- andi. rBITDIF,rN,24 /* Get the DW remainder */ +- srdi rTMP,rN,5 /* Divide by 32 */ +- cmpldi cr1,rBITDIF,16 +- cmpldi cr7,rN,32 +- clrldi rN,rN,61 ++ andi. r12, rN, 24 /* Get the DW remainder */ ++ srdi r0, rN, 5 /* Divide by 32 */ ++ cmpldi cr1, r12, 16 ++ cmpldi cr7, rN, 32 ++ clrldi rN, rN, 61 + beq L(dP4) +- bgt cr1,L(dP3) +- beq cr1,L(dP2) ++ bgt cr1, L(dP3) ++ beq cr1, L(dP2) + + /* Remainder is 8 */ + .align 4 + L(dP1): +- mtctr rTMP ++ mtctr r0 + /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early +- (8-15 byte compare), we want to use only volitile registers. This +- means we can avoid restoring non-volitile registers since we did not ++ (8-15 byte compare), we want to use only volatile registers. This ++ means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ +- ld rWORD5,0(rSTR1) +- ld rWORD6,0(rSTR2) +- cmpld cr5,rWORD5,rWORD6 +- blt cr7,L(dP1x) +- ld rWORD1,8(rSTR1) +- ld rWORD2,8(rSTR2) +- cmpld cr0,rWORD1,rWORD2 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD5, 0(rSTR1) ++ ld rWORD6, 0(rSTR2) ++#endif ++ cmpld cr5, rWORD5, rWORD6 ++ blt cr7, L(dP1x) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD1, 8(rSTR1) ++ ld rWORD2, 8(rSTR2) ++#endif ++ cmpld cr7, rWORD1, rWORD2 + L(dP1e): +- ld rWORD3,16(rSTR1) +- ld rWORD4,16(rSTR2) +- cmpld cr1,rWORD3,rWORD4 +- ld rWORD5,24(rSTR1) +- ld rWORD6,24(rSTR2) +- cmpld cr6,rWORD5,rWORD6 +- bne cr5,L(dLcr5) +- bne cr0,L(dLcr0) +- +- ldu rWORD7,32(rSTR1) +- ldu rWORD8,32(rSTR2) +- bne cr1,L(dLcr1) +- cmpld cr5,rWORD7,rWORD8 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD3, 16(rSTR1) ++ ld rWORD4, 16(rSTR2) ++#endif ++ cmpld cr1, rWORD3, rWORD4 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD5, 24(rSTR1) ++ ld rWORD6, 24(rSTR2) ++#endif ++ cmpld cr6, rWORD5, rWORD6 ++ bne cr5, L(dLcr5x) ++ bne cr7, L(dLcr7x) ++ ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ldu rWORD7, 32(rSTR1) ++ ldu rWORD8, 32(rSTR2) ++#endif ++ bne cr1, L(dLcr1) ++ cmpld cr5, rWORD7, rWORD8 + bdnz L(dLoop) +- bne cr6,L(dLcr6) +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) ++ bne cr6, L(dLcr6) ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) + .align 3 + L(dP1x): +- sldi. r12,rN,3 +- bne cr5,L(dLcr5) +- subfic rN,r12,64 /* Shift count is 64 - (rN * 8). */ ++ sldi. r12, rN, 3 ++ bne cr5, L(dLcr5x) ++ subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) +- li rRTN,0 ++ li rRTN, 0 + blr + + /* Remainder is 16 */ + .align 4 + L(dP2): +- mtctr rTMP +- ld rWORD5,0(rSTR1) +- ld rWORD6,0(rSTR2) +- cmpld cr6,rWORD5,rWORD6 +- blt cr7,L(dP2x) +- ld rWORD7,8(rSTR1) +- ld rWORD8,8(rSTR2) +- cmpld cr5,rWORD7,rWORD8 ++ mtctr r0 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD5, 0(rSTR1) ++ ld rWORD6, 0(rSTR2) ++#endif ++ cmpld cr6, rWORD5, rWORD6 ++ blt cr7, L(dP2x) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD7, 8(rSTR1) ++ ld rWORD8, 8(rSTR2) ++#endif ++ cmpld cr5, rWORD7, rWORD8 + L(dP2e): +- ld rWORD1,16(rSTR1) +- ld rWORD2,16(rSTR2) +- cmpld cr0,rWORD1,rWORD2 +- ld rWORD3,24(rSTR1) +- ld rWORD4,24(rSTR2) +- cmpld cr1,rWORD3,rWORD4 +- addi rSTR1,rSTR1,8 +- addi rSTR2,rSTR2,8 +- bne cr6,L(dLcr6) +- bne cr5,L(dLcr5) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD1, 16(rSTR1) ++ ld rWORD2, 16(rSTR2) ++#endif ++ cmpld cr7, rWORD1, rWORD2 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD3, 24(rSTR1) ++ ld rWORD4, 24(rSTR2) ++#endif ++ cmpld cr1, rWORD3, rWORD4 ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#endif ++ bne cr6, L(dLcr6) ++ bne cr5, L(dLcr5) + b L(dLoop2) + /* Again we are on a early exit path (16-23 byte compare), we want to +- only use volitile registers and avoid restoring non-volitile ++ only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 + L(dP2x): +- ld rWORD3,8(rSTR1) +- ld rWORD4,8(rSTR2) +- cmpld cr5,rWORD3,rWORD4 +- sldi. r12,rN,3 +- bne cr6,L(dLcr6) +- addi rSTR1,rSTR1,8 +- addi rSTR2,rSTR2,8 +- bne cr5,L(dLcr5) +- subfic rN,r12,64 /* Shift count is 64 - (rN * 8). */ ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD3, 8(rSTR1) ++ ld rWORD4, 8(rSTR2) ++#endif ++ cmpld cr1, rWORD3, rWORD4 ++ sldi. r12, rN, 3 ++ bne cr6, L(dLcr6x) ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#endif ++ bne cr1, L(dLcr1x) ++ subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) +- li rRTN,0 ++ li rRTN, 0 + blr + + /* Remainder is 24 */ + .align 4 + L(dP3): +- mtctr rTMP +- ld rWORD3,0(rSTR1) +- ld rWORD4,0(rSTR2) +- cmpld cr1,rWORD3,rWORD4 ++ mtctr r0 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD3, 0(rSTR1) ++ ld rWORD4, 0(rSTR2) ++#endif ++ cmpld cr1, rWORD3, rWORD4 + L(dP3e): +- ld rWORD5,8(rSTR1) +- ld rWORD6,8(rSTR2) +- cmpld cr6,rWORD5,rWORD6 +- blt cr7,L(dP3x) +- ld rWORD7,16(rSTR1) +- ld rWORD8,16(rSTR2) +- cmpld cr5,rWORD7,rWORD8 +- ld rWORD1,24(rSTR1) +- ld rWORD2,24(rSTR2) +- cmpld cr0,rWORD1,rWORD2 +- addi rSTR1,rSTR1,16 +- addi rSTR2,rSTR2,16 +- bne cr1,L(dLcr1) +- bne cr6,L(dLcr6) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD5, 8(rSTR1) ++ ld rWORD6, 8(rSTR2) ++#endif ++ cmpld cr6, rWORD5, rWORD6 ++ blt cr7, L(dP3x) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD7, 16(rSTR1) ++ ld rWORD8, 16(rSTR2) ++#endif ++ cmpld cr5, rWORD7, rWORD8 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD1, 24(rSTR1) ++ ld rWORD2, 24(rSTR2) ++#endif ++ cmpld cr7, rWORD1, rWORD2 ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 16 ++ addi rSTR2, rSTR2, 16 ++#endif ++ bne cr1, L(dLcr1) ++ bne cr6, L(dLcr6) + b L(dLoop1) + /* Again we are on a early exit path (24-31 byte compare), we want to +- only use volitile registers and avoid restoring non-volitile ++ only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 + L(dP3x): +- ld rWORD1,16(rSTR1) +- ld rWORD2,16(rSTR2) +- cmpld cr5,rWORD1,rWORD2 +- sldi. r12,rN,3 +- bne cr1,L(dLcr1) +- addi rSTR1,rSTR1,16 +- addi rSTR2,rSTR2,16 +- bne cr6,L(dLcr6) +- subfic rN,r12,64 /* Shift count is 64 - (rN * 8). */ +- bne cr5,L(dLcr5) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD1, 16(rSTR1) ++ ld rWORD2, 16(rSTR2) ++#endif ++ cmpld cr7, rWORD1, rWORD2 ++ sldi. r12, rN, 3 ++ bne cr1, L(dLcr1x) ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 16 ++ addi rSTR2, rSTR2, 16 ++#endif ++ bne cr6, L(dLcr6x) ++ subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ ++ bne cr7, L(dLcr7x) + bne L(d00) +- li rRTN,0 ++ li rRTN, 0 + blr + + /* Count is a multiple of 32, remainder is 0 */ + .align 4 + L(dP4): +- mtctr rTMP +- ld rWORD1,0(rSTR1) +- ld rWORD2,0(rSTR2) +- cmpld cr0,rWORD1,rWORD2 ++ mtctr r0 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD1, 0(rSTR1) ++ ld rWORD2, 0(rSTR2) ++#endif ++ cmpld cr7, rWORD1, rWORD2 + L(dP4e): +- ld rWORD3,8(rSTR1) +- ld rWORD4,8(rSTR2) +- cmpld cr1,rWORD3,rWORD4 +- ld rWORD5,16(rSTR1) +- ld rWORD6,16(rSTR2) +- cmpld cr6,rWORD5,rWORD6 +- ldu rWORD7,24(rSTR1) +- ldu rWORD8,24(rSTR2) +- cmpld cr5,rWORD7,rWORD8 +- bne cr0,L(dLcr0) +- bne cr1,L(dLcr1) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD3, 8(rSTR1) ++ ld rWORD4, 8(rSTR2) ++#endif ++ cmpld cr1, rWORD3, rWORD4 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD5, 16(rSTR1) ++ ld rWORD6, 16(rSTR2) ++#endif ++ cmpld cr6, rWORD5, rWORD6 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ldu rWORD7, 24(rSTR1) ++ ldu rWORD8, 24(rSTR2) ++#endif ++ cmpld cr5, rWORD7, rWORD8 ++ bne cr7, L(dLcr7) ++ bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ + /* This is the primary loop */ + .align 4 + L(dLoop): +- ld rWORD1,8(rSTR1) +- ld rWORD2,8(rSTR2) +- cmpld cr1,rWORD3,rWORD4 +- bne cr6,L(dLcr6) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD1, 8(rSTR1) ++ ld rWORD2, 8(rSTR2) ++#endif ++ cmpld cr1, rWORD3, rWORD4 ++ bne cr6, L(dLcr6) + L(dLoop1): +- ld rWORD3,16(rSTR1) +- ld rWORD4,16(rSTR2) +- cmpld cr6,rWORD5,rWORD6 +- bne cr5,L(dLcr5) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD3, 16(rSTR1) ++ ld rWORD4, 16(rSTR2) ++#endif ++ cmpld cr6, rWORD5, rWORD6 ++ bne cr5, L(dLcr5) + L(dLoop2): +- ld rWORD5,24(rSTR1) +- ld rWORD6,24(rSTR2) +- cmpld cr5,rWORD7,rWORD8 +- bne cr0,L(dLcr0) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD5, 24(rSTR1) ++ ld rWORD6, 24(rSTR2) ++#endif ++ cmpld cr5, rWORD7, rWORD8 ++ bne cr7, L(dLcr7) + L(dLoop3): +- ldu rWORD7,32(rSTR1) +- ldu rWORD8,32(rSTR2) +- bne cr1,L(dLcr1) +- cmpld cr0,rWORD1,rWORD2 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ldu rWORD7, 32(rSTR1) ++ ldu rWORD8, 32(rSTR2) ++#endif ++ bne cr1, L(dLcr1) ++ cmpld cr7, rWORD1, rWORD2 + bdnz L(dLoop) + + L(dL4): +- cmpld cr1,rWORD3,rWORD4 +- bne cr6,L(dLcr6) +- cmpld cr6,rWORD5,rWORD6 +- bne cr5,L(dLcr5) +- cmpld cr5,rWORD7,rWORD8 ++ cmpld cr1, rWORD3, rWORD4 ++ bne cr6, L(dLcr6) ++ cmpld cr6, rWORD5, rWORD6 ++ bne cr5, L(dLcr5) ++ cmpld cr5, rWORD7, rWORD8 + L(d44): +- bne cr0,L(dLcr0) ++ bne cr7, L(dLcr7) + L(d34): +- bne cr1,L(dLcr1) ++ bne cr1, L(dLcr1) + L(d24): +- bne cr6,L(dLcr6) ++ bne cr6, L(dLcr6) + L(d14): +- sldi. r12,rN,3 +- bne cr5,L(dLcr5) ++ sldi. r12, rN, 3 ++ bne cr5, L(dLcr5) + L(d04): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) +- subfic rN,r12,64 /* Shift count is 64 - (rN * 8). */ ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) ++ subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + beq L(zeroLength) + /* At this point we have a remainder of 1 to 7 bytes to compare. Since + we are aligned it is safe to load the whole double word, and use +- shift right double to elliminate bits beyond the compare length. */ ++ shift right double to eliminate bits beyond the compare length. */ + L(d00): +- ld rWORD1,8(rSTR1) +- ld rWORD2,8(rSTR2) +- srd rWORD1,rWORD1,rN +- srd rWORD2,rWORD2,rN +- cmpld cr5,rWORD1,rWORD2 +- bne cr5,L(dLcr5x) +- li rRTN,0 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD1, 8(rSTR1) ++ ld rWORD2, 8(rSTR2) ++#endif ++ srd rWORD1, rWORD1, rN ++ srd rWORD2, rWORD2, rN ++ cmpld cr7, rWORD1, rWORD2 ++ bne cr7, L(dLcr7x) ++ li rRTN, 0 + blr ++ + .align 4 +-L(dLcr0): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) +- li rRTN,1 +- bgtlr cr0 +- li rRTN,-1 ++L(dLcr7): ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) ++L(dLcr7x): ++ li rRTN, 1 ++ bgtlr cr7 ++ li rRTN, -1 + blr + .align 4 + L(dLcr1): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) +- li rRTN,1 ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) ++L(dLcr1x): ++ li rRTN, 1 + bgtlr cr1 +- li rRTN,-1 ++ li rRTN, -1 + blr + .align 4 + L(dLcr6): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) +- li rRTN,1 ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) ++L(dLcr6x): ++ li rRTN, 1 + bgtlr cr6 +- li rRTN,-1 ++ li rRTN, -1 + blr + .align 4 + L(dLcr5): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) + L(dLcr5x): +- li rRTN,1 ++ li rRTN, 1 + bgtlr cr5 +- li rRTN,-1 ++ li rRTN, -1 + blr + + .align 4 + L(bytealigned): + mtctr rN +- beq cr6,L(zeroLength) ++#if 0 ++/* Huh? We've already branched on cr6! */ ++ beq cr6, L(zeroLength) ++#endif + + /* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to +@@ -401,38 +598,38 @@ + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + +- lbz rWORD1,0(rSTR1) +- lbz rWORD2,0(rSTR2) ++ lbz rWORD1, 0(rSTR1) ++ lbz rWORD2, 0(rSTR2) + bdz L(b11) +- cmpld cr0,rWORD1,rWORD2 +- lbz rWORD3,1(rSTR1) +- lbz rWORD4,1(rSTR2) ++ cmpld cr7, rWORD1, rWORD2 ++ lbz rWORD3, 1(rSTR1) ++ lbz rWORD4, 1(rSTR2) + bdz L(b12) +- cmpld cr1,rWORD3,rWORD4 +- lbzu rWORD5,2(rSTR1) +- lbzu rWORD6,2(rSTR2) ++ cmpld cr1, rWORD3, rWORD4 ++ lbzu rWORD5, 2(rSTR1) ++ lbzu rWORD6, 2(rSTR2) + bdz L(b13) + .align 4 + L(bLoop): +- lbzu rWORD1,1(rSTR1) +- lbzu rWORD2,1(rSTR2) +- bne cr0,L(bLcr0) ++ lbzu rWORD1, 1(rSTR1) ++ lbzu rWORD2, 1(rSTR2) ++ bne cr7, L(bLcr7) + +- cmpld cr6,rWORD5,rWORD6 ++ cmpld cr6, rWORD5, rWORD6 + bdz L(b3i) + +- lbzu rWORD3,1(rSTR1) +- lbzu rWORD4,1(rSTR2) +- bne cr1,L(bLcr1) ++ lbzu rWORD3, 1(rSTR1) ++ lbzu rWORD4, 1(rSTR2) ++ bne cr1, L(bLcr1) + +- cmpld cr0,rWORD1,rWORD2 ++ cmpld cr7, rWORD1, rWORD2 + bdz L(b2i) + +- lbzu rWORD5,1(rSTR1) +- lbzu rWORD6,1(rSTR2) +- bne cr6,L(bLcr6) ++ lbzu rWORD5, 1(rSTR1) ++ lbzu rWORD6, 1(rSTR2) ++ bne cr6, L(bLcr6) + +- cmpld cr1,rWORD3,rWORD4 ++ cmpld cr1, rWORD3, rWORD4 + bdnz L(bLoop) + + /* We speculatively loading bytes before we have tested the previous +@@ -442,542 +639,727 @@ + tested. In this case we must complete the pending operations + before returning. */ + L(b1i): +- bne cr0,L(bLcr0) +- bne cr1,L(bLcr1) ++ bne cr7, L(bLcr7) ++ bne cr1, L(bLcr1) + b L(bx56) + .align 4 + L(b2i): +- bne cr6,L(bLcr6) +- bne cr0,L(bLcr0) ++ bne cr6, L(bLcr6) ++ bne cr7, L(bLcr7) + b L(bx34) + .align 4 + L(b3i): +- bne cr1,L(bLcr1) +- bne cr6,L(bLcr6) ++ bne cr1, L(bLcr1) ++ bne cr6, L(bLcr6) + b L(bx12) + .align 4 +-L(bLcr0): +- li rRTN,1 +- bgtlr cr0 +- li rRTN,-1 ++L(bLcr7): ++ li rRTN, 1 ++ bgtlr cr7 ++ li rRTN, -1 + blr + L(bLcr1): +- li rRTN,1 ++ li rRTN, 1 + bgtlr cr1 +- li rRTN,-1 ++ li rRTN, -1 + blr + L(bLcr6): +- li rRTN,1 ++ li rRTN, 1 + bgtlr cr6 +- li rRTN,-1 ++ li rRTN, -1 + blr + + L(b13): +- bne cr0,L(bx12) +- bne cr1,L(bx34) ++ bne cr7, L(bx12) ++ bne cr1, L(bx34) + L(bx56): +- sub rRTN,rWORD5,rWORD6 ++ sub rRTN, rWORD5, rWORD6 + blr + nop + L(b12): +- bne cr0,L(bx12) ++ bne cr7, L(bx12) + L(bx34): +- sub rRTN,rWORD3,rWORD4 ++ sub rRTN, rWORD3, rWORD4 + blr + L(b11): + L(bx12): +- sub rRTN,rWORD1,rWORD2 ++ sub rRTN, rWORD1, rWORD2 + blr + .align 4 +-L(zeroLengthReturn): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) + L(zeroLength): +- li rRTN,0 ++ li rRTN, 0 + blr + + .align 4 + /* At this point we know the strings have different alignment and the +- compare length is at least 8 bytes. rBITDIF containes the low order ++ compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare +- of rBITDIF to 0. If rBITDIF == 0 then rStr1 is double word ++ of r12 to 0. If r12 == 0 then rStr1 is double word + aligned and can perform the DWunaligned loop. + +- Otherwise we know that rSTR1 is not aready DW aligned yet. ++ Otherwise we know that rSTR1 is not already DW aligned yet. + So we can force the string addresses to the next lower DW +- boundary and special case this first DW word using shift left to +- ellimiate bits preceeding the first byte. Since we want to join the ++ boundary and special case this first DW using shift left to ++ eliminate bits preceding the first byte. Since we want to join the + normal (DWaligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop +- versioning for the first DW. This insures that the loop count is ++ versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected resister pair. */ +-#define rSHL r29 /* Unaligned shift left count. */ +-#define rSHR r28 /* Unaligned shift right count. */ +-#define rB r27 /* Left rotation temp for rWORD2. */ +-#define rD r26 /* Left rotation temp for rWORD4. */ +-#define rF r25 /* Left rotation temp for rWORD6. */ +-#define rH r24 /* Left rotation temp for rWORD8. */ +-#define rA r0 /* Right rotation temp for rWORD2. */ +-#define rC r12 /* Right rotation temp for rWORD4. */ +-#define rE r0 /* Right rotation temp for rWORD6. */ +-#define rG r12 /* Right rotation temp for rWORD8. */ ++#define rSHL r29 /* Unaligned shift left count. */ ++#define rSHR r28 /* Unaligned shift right count. */ ++#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ ++#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ ++#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ ++#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ + L(unaligned): +- std r29,-24(r1) +- cfi_offset(r29,-24) +- clrldi rSHL,rSTR2,61 +- beq cr6,L(duzeroLength) +- std r28,-32(r1) +- cfi_offset(r28,-32) +- beq cr5,L(DWunaligned) +- std r27,-40(r1) +- cfi_offset(r27,-40) +-/* Adjust the logical start of rSTR2 ro compensate for the extra bits ++ std rSHL, -24(r1) ++ cfi_offset(rSHL, -24) ++ clrldi rSHL, rSTR2, 61 ++ beq cr6, L(duzeroLength) ++ std rSHR, -32(r1) ++ cfi_offset(rSHR, -32) ++ beq cr5, L(DWunaligned) ++ std rWORD8_SHIFT, -40(r1) ++ cfi_offset(rWORD8_SHIFT, -40) ++/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 DW. */ +- sub r27,rSTR2,rBITDIF ++ sub rWORD8_SHIFT, rSTR2, r12 + /* But do not attempt to address the DW before that DW that contains + the actual start of rSTR2. */ +- clrrdi rSTR2,rSTR2,3 +- std r26,-48(r1) +- cfi_offset(r26,-48) +-/* Compute the leaft/right shift counts for the unalign rSTR2, ++ clrrdi rSTR2, rSTR2, 3 ++ std rWORD2_SHIFT, -48(r1) ++ cfi_offset(rWORD2_SHIFT, -48) ++/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (DW aligned) start of rSTR1. */ +- clrldi rSHL,r27,61 +- clrrdi rSTR1,rSTR1,3 +- std r25,-56(r1) +- cfi_offset(r25,-56) +- sldi rSHL,rSHL,3 +- cmpld cr5,r27,rSTR2 +- add rN,rN,rBITDIF +- sldi r11,rBITDIF,3 +- std r24,-64(r1) +- cfi_offset(r24,-64) +- subfic rSHR,rSHL,64 +- srdi rTMP,rN,5 /* Divide by 32 */ +- andi. rBITDIF,rN,24 /* Get the DW remainder */ ++ clrldi rSHL, rWORD8_SHIFT, 61 ++ clrrdi rSTR1, rSTR1, 3 ++ std rWORD4_SHIFT, -56(r1) ++ cfi_offset(rWORD4_SHIFT, -56) ++ sldi rSHL, rSHL, 3 ++ cmpld cr5, rWORD8_SHIFT, rSTR2 ++ add rN, rN, r12 ++ sldi rWORD6, r12, 3 ++ std rWORD6_SHIFT, -64(r1) ++ cfi_offset(rWORD6_SHIFT, -64) ++ subfic rSHR, rSHL, 64 ++ srdi r0, rN, 5 /* Divide by 32 */ ++ andi. r12, rN, 24 /* Get the DW remainder */ + /* We normally need to load 2 DWs to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a DW where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ +- li rWORD8,0 +- blt cr5,L(dus0) +- ld rWORD8,0(rSTR2) +- la rSTR2,8(rSTR2) +- sld rWORD8,rWORD8,rSHL ++ li rWORD8, 0 ++ blt cr5, L(dus0) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD8, 0(rSTR2) ++ addi rSTR2, rSTR2, 8 ++#endif ++ sld rWORD8, rWORD8, rSHL + + L(dus0): +- ld rWORD1,0(rSTR1) +- ld rWORD2,0(rSTR2) +- cmpldi cr1,rBITDIF,16 +- cmpldi cr7,rN,32 +- srd rG,rWORD2,rSHR +- clrldi rN,rN,61 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD1, 0(rSTR1) ++ ld rWORD2, 0(rSTR2) ++#endif ++ cmpldi cr1, r12, 16 ++ cmpldi cr7, rN, 32 ++ srd r12, rWORD2, rSHR ++ clrldi rN, rN, 61 + beq L(duPs4) +- mtctr rTMP +- or rWORD8,rG,rWORD8 +- bgt cr1,L(duPs3) +- beq cr1,L(duPs2) ++ mtctr r0 ++ or rWORD8, r12, rWORD8 ++ bgt cr1, L(duPs3) ++ beq cr1, L(duPs2) + + /* Remainder is 8 */ + .align 4 + L(dusP1): +- sld rB,rWORD2,rSHL +- sld rWORD7,rWORD1,r11 +- sld rWORD8,rWORD8,r11 +- bge cr7,L(duP1e) ++ sld rWORD8_SHIFT, rWORD2, rSHL ++ sld rWORD7, rWORD1, rWORD6 ++ sld rWORD8, rWORD8, rWORD6 ++ bge cr7, L(duP1e) + /* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +- cmpld cr5,rWORD7,rWORD8 +- sldi. rN,rN,3 +- bne cr5,L(duLcr5) +- cmpld cr7,rN,rSHR ++ cmpld cr5, rWORD7, rWORD8 ++ sldi. rN, rN, 3 ++ bne cr5, L(duLcr5) ++ cmpld cr7, rN, rSHR + beq L(duZeroReturn) +- li rA,0 +- ble cr7,L(dutrim) +- ld rWORD2,8(rSTR2) +- srd rA,rWORD2,rSHR ++ li r0, 0 ++ ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD2, 8(rSTR2) ++#endif ++ srd r0, rWORD2, rSHR + b L(dutrim) + /* Remainder is 16 */ + .align 4 + L(duPs2): +- sld rH,rWORD2,rSHL +- sld rWORD5,rWORD1,r11 +- sld rWORD6,rWORD8,r11 ++ sld rWORD6_SHIFT, rWORD2, rSHL ++ sld rWORD5, rWORD1, rWORD6 ++ sld rWORD6, rWORD8, rWORD6 + b L(duP2e) + /* Remainder is 24 */ + .align 4 + L(duPs3): +- sld rF,rWORD2,rSHL +- sld rWORD3,rWORD1,r11 +- sld rWORD4,rWORD8,r11 ++ sld rWORD4_SHIFT, rWORD2, rSHL ++ sld rWORD3, rWORD1, rWORD6 ++ sld rWORD4, rWORD8, rWORD6 + b L(duP3e) + /* Count is a multiple of 32, remainder is 0 */ + .align 4 + L(duPs4): +- mtctr rTMP +- or rWORD8,rG,rWORD8 +- sld rD,rWORD2,rSHL +- sld rWORD1,rWORD1,r11 +- sld rWORD2,rWORD8,r11 ++ mtctr r0 ++ or rWORD8, r12, rWORD8 ++ sld rWORD2_SHIFT, rWORD2, rSHL ++ sld rWORD1, rWORD1, rWORD6 ++ sld rWORD2, rWORD8, rWORD6 + b L(duP4e) + + /* At this point we know rSTR1 is double word aligned and the + compare length is at least 8 bytes. */ + .align 4 + L(DWunaligned): +- std r27,-40(r1) +- cfi_offset(r27,-40) +- clrrdi rSTR2,rSTR2,3 +- std r26,-48(r1) +- cfi_offset(r26,-48) +- srdi rTMP,rN,5 /* Divide by 32 */ +- std r25,-56(r1) +- cfi_offset(r25,-56) +- andi. rBITDIF,rN,24 /* Get the DW remainder */ +- std r24,-64(r1) +- cfi_offset(r24,-64) +- sldi rSHL,rSHL,3 +- ld rWORD6,0(rSTR2) +- ldu rWORD8,8(rSTR2) +- cmpldi cr1,rBITDIF,16 +- cmpldi cr7,rN,32 +- clrldi rN,rN,61 +- subfic rSHR,rSHL,64 +- sld rH,rWORD6,rSHL ++ std rWORD8_SHIFT, -40(r1) ++ cfi_offset(rWORD8_SHIFT, -40) ++ clrrdi rSTR2, rSTR2, 3 ++ std rWORD2_SHIFT, -48(r1) ++ cfi_offset(rWORD2_SHIFT, -48) ++ srdi r0, rN, 5 /* Divide by 32 */ ++ std rWORD4_SHIFT, -56(r1) ++ cfi_offset(rWORD4_SHIFT, -56) ++ andi. r12, rN, 24 /* Get the DW remainder */ ++ std rWORD6_SHIFT, -64(r1) ++ cfi_offset(rWORD6_SHIFT, -64) ++ sldi rSHL, rSHL, 3 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD6, 0(rSTR2) ++ ldu rWORD8, 8(rSTR2) ++#endif ++ cmpldi cr1, r12, 16 ++ cmpldi cr7, rN, 32 ++ clrldi rN, rN, 61 ++ subfic rSHR, rSHL, 64 ++ sld rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) +- mtctr rTMP +- bgt cr1,L(duP3) +- beq cr1,L(duP2) ++ mtctr r0 ++ bgt cr1, L(duP3) ++ beq cr1, L(duP2) + + /* Remainder is 8 */ + .align 4 + L(duP1): +- srd rG,rWORD8,rSHR +- ld rWORD7,0(rSTR1) +- sld rB,rWORD8,rSHL +- or rWORD8,rG,rH +- blt cr7,L(duP1x) ++ srd r12, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ addi rSTR1, rSTR1, 8 ++#else ++ ld rWORD7, 0(rSTR1) ++#endif ++ sld rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT ++ blt cr7, L(duP1x) + L(duP1e): +- ld rWORD1,8(rSTR1) +- ld rWORD2,8(rSTR2) +- cmpld cr5,rWORD7,rWORD8 +- srd rA,rWORD2,rSHR +- sld rD,rWORD2,rSHL +- or rWORD2,rA,rB +- ld rWORD3,16(rSTR1) +- ld rWORD4,16(rSTR2) +- cmpld cr0,rWORD1,rWORD2 +- srd rC,rWORD4,rSHR +- sld rF,rWORD4,rSHL +- bne cr5,L(duLcr5) +- or rWORD4,rC,rD +- ld rWORD5,24(rSTR1) +- ld rWORD6,24(rSTR2) +- cmpld cr1,rWORD3,rWORD4 +- srd rE,rWORD6,rSHR +- sld rH,rWORD6,rSHL +- bne cr0,L(duLcr0) +- or rWORD6,rE,rF +- cmpld cr6,rWORD5,rWORD6 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD1, 8(rSTR1) ++ ld rWORD2, 8(rSTR2) ++#endif ++ cmpld cr5, rWORD7, rWORD8 ++ srd r0, rWORD2, rSHR ++ sld rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD3, 16(rSTR1) ++ ld rWORD4, 16(rSTR2) ++#endif ++ cmpld cr7, rWORD1, rWORD2 ++ srd r12, rWORD4, rSHR ++ sld rWORD4_SHIFT, rWORD4, rSHL ++ bne cr5, L(duLcr5) ++ or rWORD4, r12, rWORD2_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD5, 24(rSTR1) ++ ld rWORD6, 24(rSTR2) ++#endif ++ cmpld cr1, rWORD3, rWORD4 ++ srd r0, rWORD6, rSHR ++ sld rWORD6_SHIFT, rWORD6, rSHL ++ bne cr7, L(duLcr7) ++ or rWORD6, r0, rWORD4_SHIFT ++ cmpld cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 + /* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + L(duP1x): +- cmpld cr5,rWORD7,rWORD8 +- sldi. rN,rN,3 +- bne cr5,L(duLcr5) +- cmpld cr7,rN,rSHR ++ cmpld cr5, rWORD7, rWORD8 ++ sldi. rN, rN, 3 ++ bne cr5, L(duLcr5) ++ cmpld cr7, rN, rSHR + beq L(duZeroReturn) +- li rA,0 +- ble cr7,L(dutrim) +- ld rWORD2,8(rSTR2) +- srd rA,rWORD2,rSHR ++ li r0, 0 ++ ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD2, 8(rSTR2) ++#endif ++ srd r0, rWORD2, rSHR + b L(dutrim) + /* Remainder is 16 */ + .align 4 + L(duP2): +- srd rE,rWORD8,rSHR +- ld rWORD5,0(rSTR1) +- or rWORD6,rE,rH +- sld rH,rWORD8,rSHL ++ srd r0, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ addi rSTR1, rSTR1, 8 ++#else ++ ld rWORD5, 0(rSTR1) ++#endif ++ or rWORD6, r0, rWORD6_SHIFT ++ sld rWORD6_SHIFT, rWORD8, rSHL + L(duP2e): +- ld rWORD7,8(rSTR1) +- ld rWORD8,8(rSTR2) +- cmpld cr6,rWORD5,rWORD6 +- srd rG,rWORD8,rSHR +- sld rB,rWORD8,rSHL +- or rWORD8,rG,rH +- blt cr7,L(duP2x) +- ld rWORD1,16(rSTR1) +- ld rWORD2,16(rSTR2) +- cmpld cr5,rWORD7,rWORD8 +- bne cr6,L(duLcr6) +- srd rA,rWORD2,rSHR +- sld rD,rWORD2,rSHL +- or rWORD2,rA,rB +- ld rWORD3,24(rSTR1) +- ld rWORD4,24(rSTR2) +- cmpld cr0,rWORD1,rWORD2 +- bne cr5,L(duLcr5) +- srd rC,rWORD4,rSHR +- sld rF,rWORD4,rSHL +- or rWORD4,rC,rD +- addi rSTR1,rSTR1,8 +- addi rSTR2,rSTR2,8 +- cmpld cr1,rWORD3,rWORD4 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD7, 8(rSTR1) ++ ld rWORD8, 8(rSTR2) ++#endif ++ cmpld cr6, rWORD5, rWORD6 ++ srd r12, rWORD8, rSHR ++ sld rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT ++ blt cr7, L(duP2x) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD1, 16(rSTR1) ++ ld rWORD2, 16(rSTR2) ++#endif ++ cmpld cr5, rWORD7, rWORD8 ++ bne cr6, L(duLcr6) ++ srd r0, rWORD2, rSHR ++ sld rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD3, 24(rSTR1) ++ ld rWORD4, 24(rSTR2) ++#endif ++ cmpld cr7, rWORD1, rWORD2 ++ bne cr5, L(duLcr5) ++ srd r12, rWORD4, rSHR ++ sld rWORD4_SHIFT, rWORD4, rSHL ++ or rWORD4, r12, rWORD2_SHIFT ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#endif ++ cmpld cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 + L(duP2x): +- cmpld cr5,rWORD7,rWORD8 +- addi rSTR1,rSTR1,8 +- addi rSTR2,rSTR2,8 +- bne cr6,L(duLcr6) +- sldi. rN,rN,3 +- bne cr5,L(duLcr5) +- cmpld cr7,rN,rSHR ++ cmpld cr5, rWORD7, rWORD8 ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#endif ++ bne cr6, L(duLcr6) ++ sldi. rN, rN, 3 ++ bne cr5, L(duLcr5) ++ cmpld cr7, rN, rSHR + beq L(duZeroReturn) +- li rA,0 +- ble cr7,L(dutrim) +- ld rWORD2,8(rSTR2) +- srd rA,rWORD2,rSHR ++ li r0, 0 ++ ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD2, 8(rSTR2) ++#endif ++ srd r0, rWORD2, rSHR + b L(dutrim) + + /* Remainder is 24 */ + .align 4 + L(duP3): +- srd rC,rWORD8,rSHR +- ld rWORD3,0(rSTR1) +- sld rF,rWORD8,rSHL +- or rWORD4,rC,rH ++ srd r12, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ addi rSTR1, rSTR1, 8 ++#else ++ ld rWORD3, 0(rSTR1) ++#endif ++ sld rWORD4_SHIFT, rWORD8, rSHL ++ or rWORD4, r12, rWORD6_SHIFT + L(duP3e): +- ld rWORD5,8(rSTR1) +- ld rWORD6,8(rSTR2) +- cmpld cr1,rWORD3,rWORD4 +- srd rE,rWORD6,rSHR +- sld rH,rWORD6,rSHL +- or rWORD6,rE,rF +- ld rWORD7,16(rSTR1) +- ld rWORD8,16(rSTR2) +- cmpld cr6,rWORD5,rWORD6 +- bne cr1,L(duLcr1) +- srd rG,rWORD8,rSHR +- sld rB,rWORD8,rSHL +- or rWORD8,rG,rH +- blt cr7,L(duP3x) +- ld rWORD1,24(rSTR1) +- ld rWORD2,24(rSTR2) +- cmpld cr5,rWORD7,rWORD8 +- bne cr6,L(duLcr6) +- srd rA,rWORD2,rSHR +- sld rD,rWORD2,rSHL +- or rWORD2,rA,rB +- addi rSTR1,rSTR1,16 +- addi rSTR2,rSTR2,16 +- cmpld cr0,rWORD1,rWORD2 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD5, 8(rSTR1) ++ ld rWORD6, 8(rSTR2) ++#endif ++ cmpld cr1, rWORD3, rWORD4 ++ srd r0, rWORD6, rSHR ++ sld rWORD6_SHIFT, rWORD6, rSHL ++ or rWORD6, r0, rWORD4_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD7, 16(rSTR1) ++ ld rWORD8, 16(rSTR2) ++#endif ++ cmpld cr6, rWORD5, rWORD6 ++ bne cr1, L(duLcr1) ++ srd r12, rWORD8, rSHR ++ sld rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT ++ blt cr7, L(duP3x) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD1, 24(rSTR1) ++ ld rWORD2, 24(rSTR2) ++#endif ++ cmpld cr5, rWORD7, rWORD8 ++ bne cr6, L(duLcr6) ++ srd r0, rWORD2, rSHR ++ sld rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 16 ++ addi rSTR2, rSTR2, 16 ++#endif ++ cmpld cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 + L(duP3x): +- addi rSTR1,rSTR1,16 +- addi rSTR2,rSTR2,16 +- bne cr1,L(duLcr1) +- cmpld cr5,rWORD7,rWORD8 +- bne cr6,L(duLcr6) +- sldi. rN,rN,3 +- bne cr5,L(duLcr5) +- cmpld cr7,rN,rSHR ++#ifndef __LITTLE_ENDIAN__ ++ addi rSTR1, rSTR1, 16 ++ addi rSTR2, rSTR2, 16 ++#endif ++#if 0 ++/* Huh? We've already branched on cr1! */ ++ bne cr1, L(duLcr1) ++#endif ++ cmpld cr5, rWORD7, rWORD8 ++ bne cr6, L(duLcr6) ++ sldi. rN, rN, 3 ++ bne cr5, L(duLcr5) ++ cmpld cr7, rN, rSHR + beq L(duZeroReturn) +- li rA,0 +- ble cr7,L(dutrim) +- ld rWORD2,8(rSTR2) +- srd rA,rWORD2,rSHR ++ li r0, 0 ++ ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD2, 8(rSTR2) ++#endif ++ srd r0, rWORD2, rSHR + b L(dutrim) + + /* Count is a multiple of 32, remainder is 0 */ + .align 4 + L(duP4): +- mtctr rTMP +- srd rA,rWORD8,rSHR +- ld rWORD1,0(rSTR1) +- sld rD,rWORD8,rSHL +- or rWORD2,rA,rH ++ mtctr r0 ++ srd r0, rWORD8, rSHR ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ addi rSTR1, rSTR1, 8 ++#else ++ ld rWORD1, 0(rSTR1) ++#endif ++ sld rWORD2_SHIFT, rWORD8, rSHL ++ or rWORD2, r0, rWORD6_SHIFT + L(duP4e): +- ld rWORD3,8(rSTR1) +- ld rWORD4,8(rSTR2) +- cmpld cr0,rWORD1,rWORD2 +- srd rC,rWORD4,rSHR +- sld rF,rWORD4,rSHL +- or rWORD4,rC,rD +- ld rWORD5,16(rSTR1) +- ld rWORD6,16(rSTR2) +- cmpld cr1,rWORD3,rWORD4 +- bne cr0,L(duLcr0) +- srd rE,rWORD6,rSHR +- sld rH,rWORD6,rSHL +- or rWORD6,rE,rF +- ldu rWORD7,24(rSTR1) +- ldu rWORD8,24(rSTR2) +- cmpld cr6,rWORD5,rWORD6 +- bne cr1,L(duLcr1) +- srd rG,rWORD8,rSHR +- sld rB,rWORD8,rSHL +- or rWORD8,rG,rH +- cmpld cr5,rWORD7,rWORD8 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD3, 8(rSTR1) ++ ld rWORD4, 8(rSTR2) ++#endif ++ cmpld cr7, rWORD1, rWORD2 ++ srd r12, rWORD4, rSHR ++ sld rWORD4_SHIFT, rWORD4, rSHL ++ or rWORD4, r12, rWORD2_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD5, 16(rSTR1) ++ ld rWORD6, 16(rSTR2) ++#endif ++ cmpld cr1, rWORD3, rWORD4 ++ bne cr7, L(duLcr7) ++ srd r0, rWORD6, rSHR ++ sld rWORD6_SHIFT, rWORD6, rSHL ++ or rWORD6, r0, rWORD4_SHIFT ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ldu rWORD7, 24(rSTR1) ++ ldu rWORD8, 24(rSTR2) ++#endif ++ cmpld cr6, rWORD5, rWORD6 ++ bne cr1, L(duLcr1) ++ srd r12, rWORD8, rSHR ++ sld rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT ++ cmpld cr5, rWORD7, rWORD8 + bdz L(du24) /* Adjust CTR as we start with +4 */ + /* This is the primary loop */ + .align 4 + L(duLoop): +- ld rWORD1,8(rSTR1) +- ld rWORD2,8(rSTR2) +- cmpld cr1,rWORD3,rWORD4 +- bne cr6,L(duLcr6) +- srd rA,rWORD2,rSHR +- sld rD,rWORD2,rSHL +- or rWORD2,rA,rB ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD1, 8(rSTR1) ++ ld rWORD2, 8(rSTR2) ++#endif ++ cmpld cr1, rWORD3, rWORD4 ++ bne cr6, L(duLcr6) ++ srd r0, rWORD2, rSHR ++ sld rWORD2_SHIFT, rWORD2, rSHL ++ or rWORD2, r0, rWORD8_SHIFT + L(duLoop1): +- ld rWORD3,16(rSTR1) +- ld rWORD4,16(rSTR2) +- cmpld cr6,rWORD5,rWORD6 +- bne cr5,L(duLcr5) +- srd rC,rWORD4,rSHR +- sld rF,rWORD4,rSHL +- or rWORD4,rC,rD ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD3, 0, rSTR1 ++ ldbrx rWORD4, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD3, 16(rSTR1) ++ ld rWORD4, 16(rSTR2) ++#endif ++ cmpld cr6, rWORD5, rWORD6 ++ bne cr5, L(duLcr5) ++ srd r12, rWORD4, rSHR ++ sld rWORD4_SHIFT, rWORD4, rSHL ++ or rWORD4, r12, rWORD2_SHIFT + L(duLoop2): +- ld rWORD5,24(rSTR1) +- ld rWORD6,24(rSTR2) +- cmpld cr5,rWORD7,rWORD8 +- bne cr0,L(duLcr0) +- srd rE,rWORD6,rSHR +- sld rH,rWORD6,rSHL +- or rWORD6,rE,rF ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD5, 0, rSTR1 ++ ldbrx rWORD6, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD5, 24(rSTR1) ++ ld rWORD6, 24(rSTR2) ++#endif ++ cmpld cr5, rWORD7, rWORD8 ++ bne cr7, L(duLcr7) ++ srd r0, rWORD6, rSHR ++ sld rWORD6_SHIFT, rWORD6, rSHL ++ or rWORD6, r0, rWORD4_SHIFT + L(duLoop3): +- ldu rWORD7,32(rSTR1) +- ldu rWORD8,32(rSTR2) +- cmpld cr0,rWORD1,rWORD2 +- bne- cr1,L(duLcr1) +- srd rG,rWORD8,rSHR +- sld rB,rWORD8,rSHL +- or rWORD8,rG,rH ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD7, 0, rSTR1 ++ ldbrx rWORD8, 0, rSTR2 ++ addi rSTR1, rSTR1, 8 ++ addi rSTR2, rSTR2, 8 ++#else ++ ldu rWORD7, 32(rSTR1) ++ ldu rWORD8, 32(rSTR2) ++#endif ++ cmpld cr7, rWORD1, rWORD2 ++ bne cr1, L(duLcr1) ++ srd r12, rWORD8, rSHR ++ sld rWORD8_SHIFT, rWORD8, rSHL ++ or rWORD8, r12, rWORD6_SHIFT + bdnz L(duLoop) + + L(duL4): +- bne cr1,L(duLcr1) +- cmpld cr1,rWORD3,rWORD4 +- bne cr6,L(duLcr6) +- cmpld cr6,rWORD5,rWORD6 +- bne cr5,L(duLcr5) +- cmpld cr5,rWORD7,rWORD8 ++#if 0 ++/* Huh? We've already branched on cr1! */ ++ bne cr1, L(duLcr1) ++#endif ++ cmpld cr1, rWORD3, rWORD4 ++ bne cr6, L(duLcr6) ++ cmpld cr6, rWORD5, rWORD6 ++ bne cr5, L(duLcr5) ++ cmpld cr5, rWORD7, rWORD8 + L(du44): +- bne cr0,L(duLcr0) ++ bne cr7, L(duLcr7) + L(du34): +- bne cr1,L(duLcr1) ++ bne cr1, L(duLcr1) + L(du24): +- bne cr6,L(duLcr6) ++ bne cr6, L(duLcr6) + L(du14): +- sldi. rN,rN,3 +- bne cr5,L(duLcr5) ++ sldi. rN, rN, 3 ++ bne cr5, L(duLcr5) + /* At this point we have a remainder of 1 to 7 bytes to compare. We use +- shift right double to elliminate bits beyond the compare length. +- This allows the use of double word subtract to compute the final +- result. ++ shift right double to eliminate bits beyond the compare length. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in +- rB). */ +- cmpld cr7,rN,rSHR ++ rWORD8_SHIFT). */ ++ cmpld cr7, rN, rSHR + beq L(duZeroReturn) +- li rA,0 +- ble cr7,L(dutrim) +- ld rWORD2,8(rSTR2) +- srd rA,rWORD2,rSHR ++ li r0, 0 ++ ble cr7, L(dutrim) ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD2, 0, rSTR2 ++ addi rSTR2, rSTR2, 8 ++#else ++ ld rWORD2, 8(rSTR2) ++#endif ++ srd r0, rWORD2, rSHR + .align 4 + L(dutrim): +- ld rWORD1,8(rSTR1) +- ld rWORD8,-8(r1) +- subfic rN,rN,64 /* Shift count is 64 - (rN * 8). */ +- or rWORD2,rA,rB +- ld rWORD7,-16(r1) +- ld r29,-24(r1) +- srd rWORD1,rWORD1,rN +- srd rWORD2,rWORD2,rN +- ld r28,-32(r1) +- ld r27,-40(r1) +- li rRTN,0 +- cmpld cr0,rWORD1,rWORD2 +- ld r26,-48(r1) +- ld r25,-56(r1) +- beq cr0,L(dureturn24) +- li rRTN,1 +- ld r24,-64(r1) +- bgtlr cr0 +- li rRTN,-1 +- blr +- .align 4 +-L(duLcr0): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) +- li rRTN,1 +- bgt cr0,L(dureturn29) +- ld r29,-24(r1) +- ld r28,-32(r1) +- li rRTN,-1 ++#ifdef __LITTLE_ENDIAN__ ++ ldbrx rWORD1, 0, rSTR1 ++#else ++ ld rWORD1, 8(rSTR1) ++#endif ++ ld rWORD8, -8(r1) ++ subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */ ++ or rWORD2, r0, rWORD8_SHIFT ++ ld rWORD7, -16(r1) ++ ld rSHL, -24(r1) ++ srd rWORD1, rWORD1, rN ++ srd rWORD2, rWORD2, rN ++ ld rSHR, -32(r1) ++ ld rWORD8_SHIFT, -40(r1) ++ li rRTN, 0 ++ cmpld cr7, rWORD1, rWORD2 ++ ld rWORD2_SHIFT, -48(r1) ++ ld rWORD4_SHIFT, -56(r1) ++ beq cr7, L(dureturn24) ++ li rRTN, 1 ++ ld rWORD6_SHIFT, -64(r1) ++ bgtlr cr7 ++ li rRTN, -1 ++ blr ++ .align 4 ++L(duLcr7): ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) ++ li rRTN, 1 ++ bgt cr7, L(dureturn29) ++ ld rSHL, -24(r1) ++ ld rSHR, -32(r1) ++ li rRTN, -1 + b L(dureturn27) + .align 4 + L(duLcr1): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) +- li rRTN,1 +- bgt cr1,L(dureturn29) +- ld r29,-24(r1) +- ld r28,-32(r1) +- li rRTN,-1 ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) ++ li rRTN, 1 ++ bgt cr1, L(dureturn29) ++ ld rSHL, -24(r1) ++ ld rSHR, -32(r1) ++ li rRTN, -1 + b L(dureturn27) + .align 4 + L(duLcr6): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) +- li rRTN,1 +- bgt cr6,L(dureturn29) +- ld r29,-24(r1) +- ld r28,-32(r1) +- li rRTN,-1 ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) ++ li rRTN, 1 ++ bgt cr6, L(dureturn29) ++ ld rSHL, -24(r1) ++ ld rSHR, -32(r1) ++ li rRTN, -1 + b L(dureturn27) + .align 4 + L(duLcr5): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) +- li rRTN,1 +- bgt cr5,L(dureturn29) +- ld r29,-24(r1) +- ld r28,-32(r1) +- li rRTN,-1 ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) ++ li rRTN, 1 ++ bgt cr5, L(dureturn29) ++ ld rSHL, -24(r1) ++ ld rSHR, -32(r1) ++ li rRTN, -1 + b L(dureturn27) + .align 3 + L(duZeroReturn): +- li rRTN,0 ++ li rRTN, 0 + .align 4 + L(dureturn): +- ld rWORD8,-8(r1) +- ld rWORD7,-16(r1) ++ ld rWORD8, -8(r1) ++ ld rWORD7, -16(r1) + L(dureturn29): +- ld r29,-24(r1) +- ld r28,-32(r1) ++ ld rSHL, -24(r1) ++ ld rSHR, -32(r1) + L(dureturn27): +- ld r27,-40(r1) ++ ld rWORD8_SHIFT, -40(r1) + L(dureturn26): +- ld r26,-48(r1) ++ ld rWORD2_SHIFT, -48(r1) + L(dureturn25): +- ld r25,-56(r1) ++ ld rWORD4_SHIFT, -56(r1) + L(dureturn24): +- ld r24,-64(r1) ++ ld rWORD6_SHIFT, -64(r1) + blr + L(duzeroLength): +- li rRTN,0 ++ li rRTN, 0 + blr + +-END (BP_SYM (memcmp)) ++END (memcmp) + libc_hidden_builtin_def (memcmp) +-weak_alias (memcmp,bcmp) ++weak_alias (memcmp, bcmp) diff --git a/packages/glibc/2.17/0053-glibc-ppc64le-31.patch b/packages/glibc/2.17/0053-glibc-ppc64le-31.patch new file mode 100644 index 0000000..de90661 --- /dev/null +++ b/packages/glibc/2.17/0053-glibc-ppc64le-31.patch @@ -0,0 +1,2943 @@ +# commit 759cfef3ac4c07dba1ece0bbc1207e099348816d +# Author: Alan Modra +# Date: Sat Aug 17 18:47:22 2013 +0930 +# +# PowerPC LE memcpy +# http://sourceware.org/ml/libc-alpha/2013-08/msg00103.html +# +# LIttle-endian support for memcpy. I spent some time cleaning up the +# 64-bit power7 memcpy, in order to avoid the extra alignment traps +# power7 takes for little-endian. It probably would have been better +# to copy the linux kernel version of memcpy. +# +# * sysdeps/powerpc/powerpc32/power4/memcpy.S: Add little endian support. +# * sysdeps/powerpc/powerpc32/power6/memcpy.S: Likewise. +# * sysdeps/powerpc/powerpc32/power7/memcpy.S: Likewise. +# * sysdeps/powerpc/powerpc32/power7/mempcpy.S: Likewise. +# * sysdeps/powerpc/powerpc64/memcpy.S: Likewise. +# * sysdeps/powerpc/powerpc64/power4/memcpy.S: Likewise. +# * sysdeps/powerpc/powerpc64/power6/memcpy.S: Likewise. +# * sysdeps/powerpc/powerpc64/power7/memcpy.S: Likewise. +# * sysdeps/powerpc/powerpc64/power7/mempcpy.S: Likewise. Make better +# use of regs. Use power7 mtocrf. Tidy function tails. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memcpy.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memcpy.S 2014-05-29 13:04:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memcpy.S 2014-05-29 13:04:56.000000000 -0500 +@@ -205,15 +205,28 @@ + blt cr6,5f + srwi 7,6,16 + bgt cr6,3f ++#ifdef __LITTLE_ENDIAN__ ++ sth 7,0(3) ++#else + sth 6,0(3) ++#endif + b 7f + .align 4 + 3: ++#ifdef __LITTLE_ENDIAN__ ++ rotlwi 6,6,24 ++ stb 6,0(3) ++ sth 7,1(3) ++#else + stb 7,0(3) + sth 6,1(3) ++#endif + b 7f + .align 4 + 5: ++#ifdef __LITTLE_ENDIAN__ ++ rotlwi 6,6,8 ++#endif + stb 6,0(3) + 7: + cmplwi cr1,10,16 +@@ -341,13 +354,23 @@ + bf 30,1f + + /* there are at least two words to copy, so copy them */ ++#ifdef __LITTLE_ENDIAN__ ++ srw 0,6,10 ++ slw 8,7,9 ++#else + slw 0,6,10 /* shift 1st src word to left align it in R0 */ + srw 8,7,9 /* shift 2nd src word to right align it in R8 */ ++#endif + or 0,0,8 /* or them to get word to store */ + lwz 6,8(5) /* load the 3rd src word */ + stw 0,0(4) /* store the 1st dst word */ ++#ifdef __LITTLE_ENDIAN__ ++ srw 0,7,10 ++ slw 8,6,9 ++#else + slw 0,7,10 /* now left align 2nd src word into R0 */ + srw 8,6,9 /* shift 3rd src word to right align it in R8 */ ++#endif + or 0,0,8 /* or them to get word to store */ + lwz 7,12(5) + stw 0,4(4) /* store the 2nd dst word */ +@@ -355,8 +378,13 @@ + addi 5,5,16 + bf 31,4f + /* there is a third word to copy, so copy it */ ++#ifdef __LITTLE_ENDIAN__ ++ srw 0,6,10 ++ slw 8,7,9 ++#else + slw 0,6,10 /* shift 3rd src word to left align it in R0 */ + srw 8,7,9 /* shift 4th src word to right align it in R8 */ ++#endif + or 0,0,8 /* or them to get word to store */ + stw 0,0(4) /* store 3rd dst word */ + mr 6,7 +@@ -366,8 +394,13 @@ + b 4f + .align 4 + 1: ++#ifdef __LITTLE_ENDIAN__ ++ srw 0,6,10 ++ slw 8,7,9 ++#else + slw 0,6,10 /* shift 1st src word to left align it in R0 */ + srw 8,7,9 /* shift 2nd src word to right align it in R8 */ ++#endif + addi 5,5,8 + or 0,0,8 /* or them to get word to store */ + bf 31,4f +@@ -380,23 +413,43 @@ + .align 4 + 4: + /* copy 16 bytes at a time */ ++#ifdef __LITTLE_ENDIAN__ ++ srw 0,6,10 ++ slw 8,7,9 ++#else + slw 0,6,10 + srw 8,7,9 ++#endif + or 0,0,8 + lwz 6,0(5) + stw 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srw 0,7,10 ++ slw 8,6,9 ++#else + slw 0,7,10 + srw 8,6,9 ++#endif + or 0,0,8 + lwz 7,4(5) + stw 0,4(4) ++#ifdef __LITTLE_ENDIAN__ ++ srw 0,6,10 ++ slw 8,7,9 ++#else + slw 0,6,10 + srw 8,7,9 ++#endif + or 0,0,8 + lwz 6,8(5) + stw 0,8(4) ++#ifdef __LITTLE_ENDIAN__ ++ srw 0,7,10 ++ slw 8,6,9 ++#else + slw 0,7,10 + srw 8,6,9 ++#endif + or 0,0,8 + lwz 7,12(5) + stw 0,12(4) +@@ -405,8 +458,13 @@ + bdnz+ 4b + 8: + /* calculate and store the final word */ ++#ifdef __LITTLE_ENDIAN__ ++ srw 0,6,10 ++ slw 8,7,9 ++#else + slw 0,6,10 + srw 8,7,9 ++#endif + or 0,0,8 + stw 0,0(4) + 3: +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/memcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/memcpy.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/memcpy.S 2014-05-29 13:04:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/memcpy.S 2014-05-29 13:04:56.000000000 -0500 +@@ -221,15 +221,28 @@ + blt cr6,5f + srwi 7,6,16 + bgt cr6,3f ++#ifdef __LITTLE_ENDIAN__ ++ sth 7,0(3) ++#else + sth 6,0(3) ++#endif + b 7f + .align 4 + 3: ++#ifdef __LITTLE_ENDIAN__ ++ rotlwi 6,6,24 ++ stb 6,0(3) ++ sth 7,1(3) ++#else + stb 7,0(3) + sth 6,1(3) ++#endif + b 7f + .align 4 + 5: ++#ifdef __LITTLE_ENDIAN__ ++ rotlwi 6,6,8 ++#endif + stb 6,0(3) + 7: + cmplwi cr1,10,16 +@@ -579,7 +592,11 @@ + lwz 6,-1(4) + cmplwi cr6,31,4 + srwi 8,31,5 /* calculate the 32 byte loop count */ ++#ifdef __LITTLE_ENDIAN__ ++ srwi 6,6,8 ++#else + slwi 6,6,8 ++#endif + clrlwi 31,31,27 /* The remaining bytes, < 32. */ + blt cr5,L(wdu1_32tail) + mtctr 8 +@@ -587,8 +604,12 @@ + + lwz 8,3(4) + lwz 7,4(4) ++#ifdef __LITTLE_ENDIAN__ ++ rldimi 6,8,24,32 ++#else + /* Equivalent to: srwi 8,8,32-8; or 6,6,8 */ + rlwimi 6,8,8,(32-8),31 ++#endif + b L(wdu1_loop32x) + .align 4 + L(wdu1_loop32): +@@ -597,8 +618,12 @@ + lwz 7,4(4) + stw 10,-8(3) + stw 11,-4(3) ++#ifdef __LITTLE_ENDIAN__ ++ rldimi 6,8,24,32 ++#else + /* Equivalent to srwi 8,8,32-8; or 6,6,8 */ + rlwimi 6,8,8,(32-8),31 ++#endif + L(wdu1_loop32x): + lwz 10,8(4) + lwz 11,12(4) +@@ -615,7 +640,11 @@ + stw 6,16(3) + stw 7,20(3) + addi 3,3,32 ++#ifdef __LITTLE_ENDIAN__ ++ srwi 6,8,8 ++#else + slwi 6,8,8 ++#endif + bdnz+ L(wdu1_loop32) + stw 10,-8(3) + stw 11,-4(3) +@@ -626,8 +655,12 @@ + blt cr6,L(wdu_4tail) + /* calculate and store the final word */ + lwz 8,3(4) +-/* Equivalent to: srwi 8,8,32-9; or 6,6,8 */ ++#ifdef __LITTLE_ENDIAN__ ++ rldimi 6,8,24,32 ++#else ++/* Equivalent to: srwi 8,8,32-8; or 6,6,8 */ + rlwimi 6,8,8,(32-8),31 ++#endif + b L(wdu_32tailx) + + L(wdu2_32): +@@ -635,7 +668,11 @@ + lwz 6,-2(4) + cmplwi cr6,31,4 + srwi 8,31,5 /* calculate the 32 byte loop count */ ++#ifdef __LITTLE_ENDIAN__ ++ srwi 6,6,16 ++#else + slwi 6,6,16 ++#endif + clrlwi 31,31,27 /* The remaining bytes, < 32. */ + blt cr5,L(wdu2_32tail) + mtctr 8 +@@ -643,8 +680,11 @@ + + lwz 8,2(4) + lwz 7,4(4) +-/* Equivalent to: srwi 8,8,32-8; or 6,6,8 */ ++#ifdef __LITTLE_ENDIAN__ ++ rldimi 6,8,16,32 ++#else + rlwimi 6,8,16,(32-16),31 ++#endif + b L(wdu2_loop32x) + .align 4 + L(wdu2_loop32): +@@ -653,8 +693,11 @@ + lwz 7,4(4) + stw 10,-8(3) + stw 11,-4(3) +-/* Equivalent to srwi 8,8,32-8; or 6,6,8 */ ++#ifdef __LITTLE_ENDIAN__ ++ rldimi 6,8,16,32 ++#else + rlwimi 6,8,16,(32-16),31 ++#endif + L(wdu2_loop32x): + lwz 10,8(4) + lwz 11,12(4) +@@ -672,7 +715,11 @@ + stw 6,16(3) + stw 7,20(3) + addi 3,3,32 ++#ifdef __LITTLE_ENDIAN__ ++ srwi 6,8,16 ++#else + slwi 6,8,16 ++#endif + bdnz+ L(wdu2_loop32) + stw 10,-8(3) + stw 11,-4(3) +@@ -683,8 +730,11 @@ + blt cr6,L(wdu_4tail) + /* calculate and store the final word */ + lwz 8,2(4) +-/* Equivalent to: srwi 8,8,32-9; or 6,6,8 */ ++#ifdef __LITTLE_ENDIAN__ ++ rldimi 6,8,16,32 ++#else + rlwimi 6,8,16,(32-16),31 ++#endif + b L(wdu_32tailx) + + L(wdu3_32): +@@ -692,7 +742,11 @@ + lwz 6,-3(4) + cmplwi cr6,31,4 + srwi 8,31,5 /* calculate the 32 byte loop count */ ++#ifdef __LITTLE_ENDIAN__ ++ srwi 6,6,24 ++#else + slwi 6,6,24 ++#endif + clrlwi 31,31,27 /* The remaining bytes, < 32. */ + blt cr5,L(wdu3_32tail) + mtctr 8 +@@ -700,8 +754,11 @@ + + lwz 8,1(4) + lwz 7,4(4) +-/* Equivalent to: srwi 8,8,32-8; or 6,6,8 */ ++#ifdef __LITTLE_ENDIAN__ ++ rldimi 6,8,8,32 ++#else + rlwimi 6,8,24,(32-24),31 ++#endif + b L(wdu3_loop32x) + .align 4 + L(wdu3_loop32): +@@ -710,8 +767,11 @@ + lwz 7,4(4) + stw 10,-8(3) + stw 11,-4(3) +-/* Equivalent to srwi 8,8,32-8; or 6,6,8 */ ++#ifdef __LITTLE_ENDIAN__ ++ rldimi 6,8,8,32 ++#else + rlwimi 6,8,24,(32-24),31 ++#endif + L(wdu3_loop32x): + lwz 10,8(4) + lwz 11,12(4) +@@ -728,7 +788,11 @@ + stw 6,16(3) + stw 7,20(3) + addi 3,3,32 ++#ifdef __LITTLE_ENDIAN__ ++ srwi 6,8,24 ++#else + slwi 6,8,24 ++#endif + bdnz+ L(wdu3_loop32) + stw 10,-8(3) + stw 11,-4(3) +@@ -739,8 +803,11 @@ + blt cr6,L(wdu_4tail) + /* calculate and store the final word */ + lwz 8,1(4) +-/* Equivalent to: srwi 8,8,32-9; or 6,6,8 */ ++#ifdef __LITTLE_ENDIAN__ ++ rldimi 6,8,8,32 ++#else + rlwimi 6,8,24,(32-24),31 ++#endif + b L(wdu_32tailx) + .align 4 + L(wdu_32tailx): +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memcpy.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memcpy.S 2014-05-29 13:04:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memcpy.S 2014-05-29 13:04:56.000000000 -0500 +@@ -385,7 +385,7 @@ + + beq L(copy_GE_32_unaligned_cont) + +- /* SRC is not quadword aligned, get it aligned. */ ++ /* DST is not quadword aligned, get it aligned. */ + + mtcrf 0x01,0 + subf 31,0,5 +@@ -437,13 +437,21 @@ + mr 11,12 + mtcrf 0x01,9 + cmplwi cr6,9,1 ++#ifdef __LITTLE_ENDIAN__ ++ lvsr 5,0,12 ++#else + lvsl 5,0,12 ++#endif + lvx 3,0,12 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop . */ + lvx 4,12,6 ++#ifdef __LITTLE_ENDIAN__ ++ vperm 6,4,3,5 ++#else + vperm 6,3,4,5 ++#endif + addi 11,12,16 + addi 10,3,16 + stvx 6,0,3 +@@ -463,11 +471,17 @@ + vector instructions though. */ + + lvx 4,11,6 /* vr4 = r11+16. */ +- vperm 6,3,4,5 /* Merge the correctly-aligned portions +- of vr3/vr4 into vr6. */ ++#ifdef __LITTLE_ENDIAN__ ++ vperm 6,4,3,5 ++#else ++ vperm 6,3,4,5 ++#endif + lvx 3,11,7 /* vr3 = r11+32. */ +- vperm 10,4,3,5 /* Merge the correctly-aligned portions +- of vr3/vr4 into vr10. */ ++#ifdef __LITTLE_ENDIAN__ ++ vperm 10,3,4,5 ++#else ++ vperm 10,4,3,5 ++#endif + addi 11,11,32 + stvx 6,0,10 + stvx 10,10,6 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/mempcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/mempcpy.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/mempcpy.S 2014-05-29 13:04:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/mempcpy.S 2014-05-29 13:04:56.000000000 -0500 +@@ -327,7 +327,7 @@ + + beq L(copy_GE_32_unaligned_cont) + +- /* SRC is not quadword aligned, get it aligned. */ ++ /* DST is not quadword aligned, get it aligned. */ + + mtcrf 0x01,0 + subf 31,0,5 +@@ -379,13 +379,21 @@ + mr 11,12 + mtcrf 0x01,9 + cmplwi cr6,9,1 +- lvsl 5,0,12 ++#ifdef __LITTLE_ENDIAN__ ++ lvsr 5,0,12 ++#else ++ lvsl 5,0,12 ++#endif + lvx 3,0,12 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop . */ + lvx 4,12,6 +- vperm 6,3,4,5 ++#ifdef __LITTLE_ENDIAN__ ++ vperm 6,4,3,5 ++#else ++ vperm 6,3,4,5 ++#endif + addi 11,12,16 + addi 10,3,16 + stvx 6,0,3 +@@ -405,11 +413,17 @@ + vector instructions though. */ + + lvx 4,11,6 /* vr4 = r11+16. */ +- vperm 6,3,4,5 /* Merge the correctly-aligned portions +- of vr3/vr4 into vr6. */ ++#ifdef __LITTLE_ENDIAN__ ++ vperm 6,4,3,5 ++#else ++ vperm 6,3,4,5 ++#endif + lvx 3,11,7 /* vr3 = r11+32. */ +- vperm 10,4,3,5 /* Merge the correctly-aligned portions +- of vr3/vr4 into vr10. */ ++#ifdef __LITTLE_ENDIAN__ ++ vperm 10,3,4,5 ++#else ++ vperm 10,4,3,5 ++#endif + addi 11,11,32 + stvx 6,0,10 + stvx 10,10,6 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/memcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/memcpy.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/memcpy.S 2014-05-29 13:04:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/memcpy.S 2014-05-29 13:04:56.000000000 -0500 +@@ -214,15 +214,28 @@ + blt cr6,5f + srdi 7,6,16 + bgt cr6,3f ++#ifdef __LITTLE_ENDIAN__ ++ sth 7,0(3) ++#else + sth 6,0(3) ++#endif + b 7f + .align 4 + 3: ++#ifdef __LITTLE_ENDIAN__ ++ rotlwi 6,6,24 ++ stb 6,0(3) ++ sth 7,1(3) ++#else + stb 7,0(3) + sth 6,1(3) ++#endif + b 7f + .align 4 + 5: ++#ifdef __LITTLE_ENDIAN__ ++ rotlwi 6,6,8 ++#endif + stb 6,0(3) + 7: + cmpldi cr1,10,16 +@@ -330,7 +343,11 @@ + ld 7,8(5) + subfic 9,10,64 + beq 2f ++#ifdef __LITTLE_ENDIAN__ ++ srd 0,6,10 ++#else + sld 0,6,10 ++#endif + cmpldi 11,1 + mr 6,7 + addi 4,4,-8 +@@ -338,15 +355,25 @@ + b 1f + 2: addi 5,5,8 + .align 4 ++#ifdef __LITTLE_ENDIAN__ ++0: srd 0,6,10 ++ sld 8,7,9 ++#else + 0: sld 0,6,10 + srd 8,7,9 ++#endif + cmpldi 11,2 + ld 6,8(5) + or 0,0,8 + addi 11,11,-2 + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srd 0,7,10 ++1: sld 8,6,9 ++#else + sld 0,7,10 + 1: srd 8,6,9 ++#endif + or 0,0,8 + beq 8f + ld 7,16(5) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memcpy.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memcpy.S 2014-05-29 13:04:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memcpy.S 2014-05-29 13:05:51.000000000 -0500 +@@ -1,5 +1,5 @@ + /* Optimized memcpy implementation for PowerPC64. +- Copyright (C) 2003, 2006, 2011 Free Software Foundation, Inc. ++ Copyright (C) 2003-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -17,26 +17,24 @@ + . */ + + #include +-#include +-#include + + /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + +- Memcpy handles short copies (< 32-bytes) using a binary move blocks +- (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled +- with the appropriate combination of byte and halfword load/stores. +- There is minimal effort to optimize the alignment of short moves. ++ Memcpy handles short copies (< 32-bytes) using a binary move blocks ++ (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled ++ with the appropriate combination of byte and halfword load/stores. ++ There is minimal effort to optimize the alignment of short moves. + The 64-bit implementations of POWER3 and POWER4 do a reasonable job +- of handling unligned load/stores that do not cross 32-byte boundries. ++ of handling unaligned load/stores that do not cross 32-byte boundaries. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination doubleword (8-byte) aligned. Further optimization is +- posible when both source and destination are doubleword aligned. ++ possible when both source and destination are doubleword aligned. + Each case has a optimized unrolled loop. */ + + .machine power4 +-EALIGN (BP_SYM (memcpy), 5, 0) ++EALIGN (memcpy, 5, 0) + CALL_MCOUNT 3 + + cmpldi cr1,5,31 +@@ -44,20 +42,20 @@ + std 3,-16(1) + std 31,-8(1) + cfi_offset(31,-8) +- andi. 11,3,7 /* check alignement of dst. */ ++ andi. 11,3,7 /* check alignment of dst. */ + clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ +- clrldi 10,4,61 /* check alignement of src. */ ++ clrldi 10,4,61 /* check alignment of src. */ + cmpldi cr6,5,8 + ble- cr1,.L2 /* If move < 32 bytes use short move code. */ +- cmpld cr6,10,11 ++ cmpld cr6,10,11 + mr 12,4 + srdi 9,5,3 /* Number of full double words remaining. */ + mtcrf 0x01,0 + mr 31,5 + beq .L0 +- ++ + subf 31,0,5 +- /* Move 0-7 bytes as needed to get the destination doubleword alligned. */ ++ /* Move 0-7 bytes as needed to get the destination doubleword aligned. */ + 1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 +@@ -74,17 +72,17 @@ + stw 6,0(3) + addi 3,3,4 + 0: +- clrldi 10,12,61 /* check alignement of src again. */ ++ clrldi 10,12,61 /* check alignment of src again. */ + srdi 9,31,3 /* Number of full double words remaining. */ +- +- /* Copy doublewords from source to destination, assumpting the ++ ++ /* Copy doublewords from source to destination, assuming the + destination is aligned on a doubleword boundary. + + At this point we know there are at least 25 bytes left (32-7) to copy. +- The next step is to determine if the source is also doubleword aligned. ++ The next step is to determine if the source is also doubleword aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. +- ++ + Otherwise source and destination are doubleword aligned, and we can + the optimized doubleword copy loop. */ + .L0: +@@ -97,14 +95,14 @@ + Use a unrolled loop to copy 4 doubleword (32-bytes) per iteration. + If the copy is not an exact multiple of 32 bytes, 1-3 + doublewords are copied as needed to set up the main loop. After +- the main loop exits there may be a tail of 1-7 bytes. These byte are ++ the main loop exits there may be a tail of 1-7 bytes. These byte are + copied a word/halfword/byte at a time as needed to preserve alignment. */ + + srdi 8,31,5 + cmpldi cr1,9,4 + cmpldi cr6,11,0 + mr 11,12 +- ++ + bf 30,1f + ld 6,0(12) + ld 7,8(12) +@@ -115,7 +113,7 @@ + addi 10,3,16 + bf 31,4f + ld 0,16(12) +- std 0,16(3) ++ std 0,16(3) + blt cr1,3f + addi 11,12,24 + addi 10,3,24 +@@ -129,7 +127,7 @@ + addi 11,12,8 + std 6,0(3) + addi 10,3,8 +- ++ + .align 4 + 4: + ld 6,0(11) +@@ -144,7 +142,7 @@ + std 0,24(10) + addi 10,10,32 + bdnz 4b +-3: ++3: + + rldicr 0,31,0,60 + mtcrf 0x01,31 +@@ -152,9 +150,9 @@ + .L9: + add 3,3,0 + add 12,12,0 +- ++ + /* At this point we have a tail of 0-7 bytes and we know that the +- destiniation is double word aligned. */ ++ destination is double word aligned. */ + 4: bf 29,2f + lwz 6,0(12) + addi 12,12,4 +@@ -173,29 +171,29 @@ + ld 31,-8(1) + ld 3,-16(1) + blr +- +-/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 +- bytes. Each case is handled without loops, using binary (1,2,4,8) +- tests. +- ++ ++/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 ++ bytes. Each case is handled without loops, using binary (1,2,4,8) ++ tests. ++ + In the short (0-8 byte) case no attempt is made to force alignment +- of either source or destination. The hardware will handle the +- unaligned load/stores with small delays for crossing 32- 64-byte, and ++ of either source or destination. The hardware will handle the ++ unaligned load/stores with small delays for crossing 32- 64-byte, and + 4096-byte boundaries. Since these short moves are unlikely to be +- unaligned or cross these boundaries, the overhead to force ++ unaligned or cross these boundaries, the overhead to force + alignment is not justified. +- ++ + The longer (9-31 byte) move is more likely to cross 32- or 64-byte + boundaries. Since only loads are sensitive to the 32-/64-byte +- boundaries it is more important to align the source then the ++ boundaries it is more important to align the source then the + destination. If the source is not already word aligned, we first +- move 1-3 bytes as needed. Since we are only word aligned we don't +- use double word load/stores to insure that all loads are aligned. ++ move 1-3 bytes as needed. Since we are only word aligned we don't ++ use double word load/stores to insure that all loads are aligned. + While the destination and stores may still be unaligned, this + is only an issue for page (4096 byte boundary) crossing, which + should be rare for these short moves. The hardware handles this +- case automatically with a small delay. */ +- ++ case automatically with a small delay. */ ++ + .align 4 + .L2: + mtcrf 0x01,5 +@@ -216,15 +214,28 @@ + blt cr6,5f + srdi 7,6,16 + bgt cr6,3f ++#ifdef __LITTLE_ENDIAN__ ++ sth 7,0(3) ++#else + sth 6,0(3) ++#endif + b 7f + .align 4 + 3: ++#ifdef __LITTLE_ENDIAN__ ++ rotlwi 6,6,24 ++ stb 6,0(3) ++ sth 7,1(3) ++#else + stb 7,0(3) + sth 6,1(3) ++#endif + b 7f + .align 4 + 5: ++#ifdef __LITTLE_ENDIAN__ ++ rotlwi 6,6,8 ++#endif + stb 6,0(3) + 7: + cmpldi cr1,10,16 +@@ -258,11 +269,11 @@ + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) +- addi 3,3,4 ++ addi 3,3,4 + 2: /* Move 2-3 bytes. */ + bf 30,1f + lhz 6,0(12) +- sth 6,0(3) ++ sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) +@@ -283,8 +294,8 @@ + mr 12,4 + bne cr6,4f + /* Would have liked to use use ld/std here but the 630 processors are +- slow for load/store doubles that are not at least word aligned. +- Unaligned Load/Store word execute with only a 1 cycle penaltity. */ ++ slow for load/store doubles that are not at least word aligned. ++ Unaligned Load/Store word execute with only a 1 cycle penalty. */ + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) +@@ -299,14 +310,14 @@ + 6: + bf 30,5f + lhz 7,4(4) +- sth 7,4(3) ++ sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + ld 3,-16(1) + blr + .align 4 +-5: ++5: + bf 31,0f + lbz 6,4(4) + stb 6,4(3) +@@ -336,13 +347,23 @@ + bf 30,1f + + /* there are at least two DWs to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srd 0,6,10 ++ sld 8,7,9 ++#else + sld 0,6,10 + srd 8,7,9 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srd 0,7,10 ++ sld 8,6,9 ++#else + sld 0,7,10 + srd 8,6,9 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) +@@ -351,8 +372,13 @@ + blt cr6,8f /* if total DWs = 3, then bypass loop */ + bf 31,4f + /* there is a third DW to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srd 0,6,10 ++ sld 8,7,9 ++#else + sld 0,6,10 + srd 8,7,9 ++#endif + or 0,0,8 + std 0,0(4) + mr 6,7 +@@ -363,8 +389,13 @@ + b 4f + .align 4 + 1: ++#ifdef __LITTLE_ENDIAN__ ++ srd 0,6,10 ++ sld 8,7,9 ++#else + sld 0,6,10 + srd 8,7,9 ++#endif + addi 5,5,16 + or 0,0,8 + bf 31,4f +@@ -375,23 +406,44 @@ + addi 4,4,8 + .align 4 + /* copy 32 bytes at a time */ +-4: sld 0,6,10 ++4: ++#ifdef __LITTLE_ENDIAN__ ++ srd 0,6,10 ++ sld 8,7,9 ++#else ++ sld 0,6,10 + srd 8,7,9 ++#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srd 0,7,10 ++ sld 8,6,9 ++#else + sld 0,7,10 + srd 8,6,9 ++#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) ++#ifdef __LITTLE_ENDIAN__ ++ srd 0,6,10 ++ sld 8,7,9 ++#else + sld 0,6,10 + srd 8,7,9 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) ++#ifdef __LITTLE_ENDIAN__ ++ srd 0,7,10 ++ sld 8,6,9 ++#else + sld 0,7,10 + srd 8,6,9 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) +@@ -401,9 +453,14 @@ + .align 4 + 8: + /* calculate and store the final DW */ ++#ifdef __LITTLE_ENDIAN__ ++ srd 0,6,10 ++ sld 8,7,9 ++#else + sld 0,6,10 + srd 8,7,9 +- or 0,0,8 ++#endif ++ or 0,0,8 + std 0,0(4) + 3: + rldicr 0,31,0,60 +@@ -413,5 +470,5 @@ + ld 31,-8(1) + ld 3,-16(1) + blr +-END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS) ++END_GEN_TB (memcpy,TB_TOCLESS) + libc_hidden_builtin_def (memcpy) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power6/memcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power6/memcpy.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power6/memcpy.S 2014-05-29 13:04:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power6/memcpy.S 2014-05-29 13:05:27.000000000 -0500 +@@ -1,5 +1,5 @@ + /* Optimized memcpy implementation for PowerPC64. +- Copyright (C) 2003, 2006, 2007, 2011 Free Software Foundation, Inc. ++ Copyright (C) 2003-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -17,52 +17,50 @@ + . */ + + #include +-#include +-#include + + /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + +- Memcpy handles short copies (< 32-bytes) using a binary move blocks +- (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled +- with the appropriate combination of byte and halfword load/stores. +- There is minimal effort to optimize the alignment of short moves. ++ Memcpy handles short copies (< 32-bytes) using a binary move blocks ++ (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled ++ with the appropriate combination of byte and halfword load/stores. ++ There is minimal effort to optimize the alignment of short moves. + The 64-bit implementations of POWER3 and POWER4 do a reasonable job +- of handling unligned load/stores that do not cross 32-byte boundries. ++ of handling unaligned load/stores that do not cross 32-byte boundaries. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination doubleword (8-byte) aligned. Further optimization is +- posible when both source and destination are doubleword aligned. +- Each case has a optimized unrolled loop. +- +- For POWER6 unaligned loads will take a 20+ cycle hicup for any ++ possible when both source and destination are doubleword aligned. ++ Each case has a optimized unrolled loop. ++ ++ For POWER6 unaligned loads will take a 20+ cycle hiccup for any + L1 cache miss that crosses a 32- or 128-byte boundary. Store +- is more forgiving and does not take a hicup until page or +- segment boundaries. So we require doubleword alignment for ++ is more forgiving and does not take a hiccup until page or ++ segment boundaries. So we require doubleword alignment for + the source but may take a risk and only require word alignment + for the destination. */ + + .machine "power6" +-EALIGN (BP_SYM (memcpy), 7, 0) ++EALIGN (memcpy, 7, 0) + CALL_MCOUNT 3 + + cmpldi cr1,5,31 + neg 0,3 + std 3,-16(1) + std 31,-8(1) +- andi. 11,3,7 /* check alignement of dst. */ ++ andi. 11,3,7 /* check alignment of dst. */ + clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ +- clrldi 10,4,61 /* check alignement of src. */ ++ clrldi 10,4,61 /* check alignment of src. */ + cmpldi cr6,5,8 + ble- cr1,.L2 /* If move < 32 bytes use short move code. */ + mtcrf 0x01,0 +- cmpld cr6,10,11 ++ cmpld cr6,10,11 + srdi 9,5,3 /* Number of full double words remaining. */ + beq .L0 +- ++ + subf 5,0,5 +- /* Move 0-7 bytes as needed to get the destination doubleword alligned. +- Duplicate some code to maximize fall-throught and minimize agen delays. */ ++ /* Move 0-7 bytes as needed to get the destination doubleword aligned. ++ Duplicate some code to maximize fall-through and minimize agen delays. */ + 1: bf 31,2f + lbz 6,0(4) + stb 6,0(3) +@@ -78,7 +76,7 @@ + lwz 6,1(4) + stw 6,1(3) + b 0f +- ++ + 2: bf 30,4f + lhz 6,0(4) + sth 6,0(3) +@@ -86,26 +84,26 @@ + lwz 6,2(4) + stw 6,2(3) + b 0f +- ++ + 4: bf 29,0f + lwz 6,0(4) + stw 6,0(3) +-0: ++0: + /* Add the number of bytes until the 1st doubleword of dst to src and dst. */ + add 4,4,0 + add 3,3,0 +- +- clrldi 10,4,61 /* check alignement of src again. */ ++ ++ clrldi 10,4,61 /* check alignment of src again. */ + srdi 9,5,3 /* Number of full double words remaining. */ +- +- /* Copy doublewords from source to destination, assumpting the ++ ++ /* Copy doublewords from source to destination, assuming the + destination is aligned on a doubleword boundary. + + At this point we know there are at least 25 bytes left (32-7) to copy. +- The next step is to determine if the source is also doubleword aligned. ++ The next step is to determine if the source is also doubleword aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. +- ++ + Otherwise source and destination are doubleword aligned, and we can + the optimized doubleword copy loop. */ + .align 4 +@@ -123,14 +121,14 @@ + the main loop exits there may be a tail of 1-7 bytes. These byte + are copied a word/halfword/byte at a time as needed to preserve + alignment. +- ++ + For POWER6 the L1 is store-through and the L2 is store-in. The + L2 is clocked at half CPU clock so we can store 16 bytes every + other cycle. POWER6 also has a load/store bypass so we can do +- load, load, store, store every 2 cycles. +- ++ load, load, store, store every 2 cycles. ++ + The following code is sensitive to cache line alignment. Do not +- make any change with out first making sure thay don't result in ++ make any change with out first making sure they don't result in + splitting ld/std pairs across a cache line. */ + + mtcrf 0x02,5 +@@ -273,7 +271,7 @@ + std 8,16+96(10) + std 0,24+96(10) + ble cr5,L(das_loop_e) +- ++ + mtctr 12 + .align 4 + L(das_loop2): +@@ -326,10 +324,10 @@ + .align 4 + L(das_tail): + beq cr1,0f +- ++ + L(das_tail2): + /* At this point we have a tail of 0-7 bytes and we know that the +- destiniation is double word aligned. */ ++ destination is double word aligned. */ + 4: bf 29,2f + lwz 6,0(4) + stw 6,0(3) +@@ -344,7 +342,7 @@ + lbz 6,4(4) + stb 6,4(3) + b 0f +- ++ + 2: bf 30,1f + lhz 6,0(4) + sth 6,0(3) +@@ -352,7 +350,7 @@ + lbz 6,2(4) + stb 6,2(3) + b 0f +- ++ + 1: bf 31,0f + lbz 6,0(4) + stb 6,0(3) +@@ -361,7 +359,7 @@ + ld 3,-16(1) + blr + +-/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 ++/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 + bytes. Each case is handled without loops, using binary (1,2,4,8) + tests. + +@@ -402,15 +400,28 @@ + blt cr6,5f + srdi 7,6,16 + bgt cr6,3f ++#ifdef __LITTLE_ENDIAN__ ++ sth 7,0(3) ++#else + sth 6,0(3) ++#endif + b 7f + .align 4 + 3: ++#ifdef __LITTLE_ENDIAN__ ++ rotlwi 6,6,24 ++ stb 6,0(3) ++ sth 7,1(3) ++#else + stb 7,0(3) + sth 6,1(3) ++#endif + b 7f + .align 4 + 5: ++#ifdef __LITTLE_ENDIAN__ ++ rotlwi 6,6,8 ++#endif + stb 6,0(3) + 7: + cmpldi cr1,10,16 +@@ -421,7 +432,7 @@ + /* At least 6 bytes left and the source is word aligned. This allows + some speculative loads up front. */ + /* We need to special case the fall-through because the biggest delays +- are due to address computation not being ready in time for the ++ are due to address computation not being ready in time for the + AGEN. */ + lwz 6,0(12) + lwz 7,4(12) +@@ -452,7 +463,7 @@ + ld 3,-16(1) + blr + .align 4 +-L(dus_tail16p8): /* less then 8 bytes left. */ ++L(dus_tail16p8): /* less than 8 bytes left. */ + beq cr1,L(dus_tailX) /* exactly 16 bytes, early exit. */ + cmpldi cr1,10,20 + bf 29,L(dus_tail16p2) +@@ -466,7 +477,7 @@ + ld 3,-16(1) + blr + .align 4 +-L(dus_tail16p4): /* less then 4 bytes left. */ ++L(dus_tail16p4): /* less than 4 bytes left. */ + addi 12,12,24 + addi 3,3,24 + bgt cr0,L(dus_tail2) +@@ -474,7 +485,7 @@ + ld 3,-16(1) + blr + .align 4 +-L(dus_tail16p2): /* 16 bytes moved, less then 4 bytes left. */ ++L(dus_tail16p2): /* 16 bytes moved, less than 4 bytes left. */ + addi 12,12,16 + addi 3,3,16 + b L(dus_tail2) +@@ -499,7 +510,7 @@ + ld 3,-16(1) + blr + .align 4 +-L(dus_tail8p4): /* less then 4 bytes left. */ ++L(dus_tail8p4): /* less than 4 bytes left. */ + addi 12,12,8 + addi 3,3,8 + bgt cr1,L(dus_tail2) +@@ -510,14 +521,14 @@ + .align 4 + L(dus_tail4): /* Move 4 bytes. */ + /* r6 already loaded speculatively. If we are here we know there is +- more then 4 bytes left. So there is no need to test. */ ++ more than 4 bytes left. So there is no need to test. */ + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 + L(dus_tail2): /* Move 2-3 bytes. */ + bf 30,L(dus_tail1) + lhz 6,0(12) +- sth 6,0(3) ++ sth 6,0(3) + bf 31,L(dus_tailX) + lbz 7,2(12) + stb 7,2(3) +@@ -537,7 +548,7 @@ + .LE8: + mr 12,4 + bne cr6,L(dus_4) +-/* Exactly 8 bytes. We may cross a 32-/128-byte boundry and take a ~20 ++/* Exactly 8 bytes. We may cross a 32-/128-byte boundary and take a ~20 + cycle delay. This case should be rare and any attempt to avoid this + would take most of 20 cycles any way. */ + ld 6,0(4) +@@ -552,7 +563,7 @@ + stw 6,0(3) + bf 30,L(dus_5) + lhz 7,4(4) +- sth 7,4(3) ++ sth 7,4(3) + bf 31,L(dus_0) + lbz 8,6(4) + stb 8,6(3) +@@ -590,20 +601,31 @@ + bge cr0, L(du4_do) + blt cr5, L(du1_do) + beq cr5, L(du2_do) +- b L(du3_do) +- ++ b L(du3_do) ++ + .align 4 + L(du1_do): + bf 30,L(du1_1dw) + + /* there are at least two DWs to copy */ ++ /* FIXME: can combine last shift and "or" into "rldimi" */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 8 ++ sldi 8,7, 64-8 ++#else + sldi 0,6, 8 + srdi 8,7, 64-8 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 8 ++ sldi 8,6, 64-8 ++#else + sldi 0,7, 8 + srdi 8,6, 64-8 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) +@@ -612,8 +634,13 @@ + blt cr6,L(du1_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du1_loop) + /* there is a third DW to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 8 ++ sldi 8,7, 64-8 ++#else + sldi 0,6, 8 + srdi 8,7, 64-8 ++#endif + or 0,0,8 + std 0,0(4) + mr 6,7 +@@ -624,8 +651,13 @@ + b L(du1_loop) + .align 4 + L(du1_1dw): ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 8 ++ sldi 8,7, 64-8 ++#else + sldi 0,6, 8 + srdi 8,7, 64-8 ++#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du1_loop) +@@ -637,23 +669,43 @@ + .align 4 + /* copy 32 bytes at a time */ + L(du1_loop): ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 8 ++ sldi 8,7, 64-8 ++#else + sldi 0,6, 8 + srdi 8,7, 64-8 ++#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 8 ++ sldi 8,6, 64-8 ++#else + sldi 0,7, 8 + srdi 8,6, 64-8 ++#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 8 ++ sldi 8,7, 64-8 ++#else + sldi 0,6, 8 + srdi 8,7, 64-8 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 8 ++ sldi 8,6, 64-8 ++#else + sldi 0,7, 8 + srdi 8,6, 64-8 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) +@@ -663,9 +715,14 @@ + .align 4 + L(du1_fini): + /* calculate and store the final DW */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 8 ++ sldi 8,7, 64-8 ++#else + sldi 0,6, 8 + srdi 8,7, 64-8 +- or 0,0,8 ++#endif ++ or 0,0,8 + std 0,0(4) + b L(du_done) + +@@ -674,13 +731,23 @@ + bf 30,L(du2_1dw) + + /* there are at least two DWs to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 16 ++ sldi 8,7, 64-16 ++#else + sldi 0,6, 16 + srdi 8,7, 64-16 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 16 ++ sldi 8,6, 64-16 ++#else + sldi 0,7, 16 + srdi 8,6, 64-16 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) +@@ -689,8 +756,13 @@ + blt cr6,L(du2_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du2_loop) + /* there is a third DW to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 16 ++ sldi 8,7, 64-16 ++#else + sldi 0,6, 16 + srdi 8,7, 64-16 ++#endif + or 0,0,8 + std 0,0(4) + mr 6,7 +@@ -701,8 +773,13 @@ + b L(du2_loop) + .align 4 + L(du2_1dw): ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 16 ++ sldi 8,7, 64-16 ++#else + sldi 0,6, 16 + srdi 8,7, 64-16 ++#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du2_loop) +@@ -714,23 +791,43 @@ + .align 4 + /* copy 32 bytes at a time */ + L(du2_loop): ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 16 ++ sldi 8,7, 64-16 ++#else + sldi 0,6, 16 + srdi 8,7, 64-16 ++#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 16 ++ sldi 8,6, 64-16 ++#else + sldi 0,7, 16 + srdi 8,6, 64-16 ++#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 16 ++ sldi 8,7, 64-16 ++#else + sldi 0,6, 16 + srdi 8,7, 64-16 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 16 ++ sldi 8,6, 64-16 ++#else + sldi 0,7, 16 + srdi 8,6, 64-16 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) +@@ -740,9 +837,14 @@ + .align 4 + L(du2_fini): + /* calculate and store the final DW */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 16 ++ sldi 8,7, 64-16 ++#else + sldi 0,6, 16 + srdi 8,7, 64-16 +- or 0,0,8 ++#endif ++ or 0,0,8 + std 0,0(4) + b L(du_done) + +@@ -751,13 +853,23 @@ + bf 30,L(du3_1dw) + + /* there are at least two DWs to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 24 ++ sldi 8,7, 64-24 ++#else + sldi 0,6, 24 + srdi 8,7, 64-24 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 24 ++ sldi 8,6, 64-24 ++#else + sldi 0,7, 24 + srdi 8,6, 64-24 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) +@@ -766,8 +878,13 @@ + blt cr6,L(du3_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du3_loop) + /* there is a third DW to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 24 ++ sldi 8,7, 64-24 ++#else + sldi 0,6, 24 + srdi 8,7, 64-24 ++#endif + or 0,0,8 + std 0,0(4) + mr 6,7 +@@ -778,8 +895,13 @@ + b L(du3_loop) + .align 4 + L(du3_1dw): ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 24 ++ sldi 8,7, 64-24 ++#else + sldi 0,6, 24 + srdi 8,7, 64-24 ++#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du3_loop) +@@ -791,23 +913,43 @@ + .align 4 + /* copy 32 bytes at a time */ + L(du3_loop): ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 24 ++ sldi 8,7, 64-24 ++#else + sldi 0,6, 24 + srdi 8,7, 64-24 ++#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 24 ++ sldi 8,6, 64-24 ++#else + sldi 0,7, 24 + srdi 8,6, 64-24 ++#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 24 ++ sldi 8,7, 64-24 ++#else + sldi 0,6, 24 + srdi 8,7, 64-24 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 24 ++ sldi 8,6, 64-24 ++#else + sldi 0,7, 24 + srdi 8,6, 64-24 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) +@@ -817,9 +959,14 @@ + .align 4 + L(du3_fini): + /* calculate and store the final DW */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 24 ++ sldi 8,7, 64-24 ++#else + sldi 0,6, 24 + srdi 8,7, 64-24 +- or 0,0,8 ++#endif ++ or 0,0,8 + std 0,0(4) + b L(du_done) + +@@ -834,13 +981,23 @@ + bf 30,L(du4_1dw) + + /* there are at least two DWs to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 32 ++ sldi 8,7, 64-32 ++#else + sldi 0,6, 32 + srdi 8,7, 64-32 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 32 ++ sldi 8,6, 64-32 ++#else + sldi 0,7, 32 + srdi 8,6, 64-32 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) +@@ -849,8 +1006,13 @@ + blt cr6,L(du4_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du4_loop) + /* there is a third DW to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 32 ++ sldi 8,7, 64-32 ++#else + sldi 0,6, 32 + srdi 8,7, 64-32 ++#endif + or 0,0,8 + std 0,0(4) + mr 6,7 +@@ -861,8 +1023,13 @@ + b L(du4_loop) + .align 4 + L(du4_1dw): ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 32 ++ sldi 8,7, 64-32 ++#else + sldi 0,6, 32 + srdi 8,7, 64-32 ++#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du4_loop) +@@ -874,23 +1041,43 @@ + .align 4 + /* copy 32 bytes at a time */ + L(du4_loop): ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 32 ++ sldi 8,7, 64-32 ++#else + sldi 0,6, 32 + srdi 8,7, 64-32 ++#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 32 ++ sldi 8,6, 64-32 ++#else + sldi 0,7, 32 + srdi 8,6, 64-32 ++#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 32 ++ sldi 8,7, 64-32 ++#else + sldi 0,6, 32 + srdi 8,7, 64-32 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 32 ++ sldi 8,6, 64-32 ++#else + sldi 0,7, 32 + srdi 8,6, 64-32 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) +@@ -900,9 +1087,14 @@ + .align 4 + L(du4_fini): + /* calculate and store the final DW */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 32 ++ sldi 8,7, 64-32 ++#else + sldi 0,6, 32 + srdi 8,7, 64-32 +- or 0,0,8 ++#endif ++ or 0,0,8 + std 0,0(4) + b L(du_done) + +@@ -911,13 +1103,23 @@ + bf 30,L(du5_1dw) + + /* there are at least two DWs to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 40 ++ sldi 8,7, 64-40 ++#else + sldi 0,6, 40 + srdi 8,7, 64-40 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 40 ++ sldi 8,6, 64-40 ++#else + sldi 0,7, 40 + srdi 8,6, 64-40 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) +@@ -926,8 +1128,13 @@ + blt cr6,L(du5_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du5_loop) + /* there is a third DW to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 40 ++ sldi 8,7, 64-40 ++#else + sldi 0,6, 40 + srdi 8,7, 64-40 ++#endif + or 0,0,8 + std 0,0(4) + mr 6,7 +@@ -938,8 +1145,13 @@ + b L(du5_loop) + .align 4 + L(du5_1dw): ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 40 ++ sldi 8,7, 64-40 ++#else + sldi 0,6, 40 + srdi 8,7, 64-40 ++#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du5_loop) +@@ -951,23 +1163,43 @@ + .align 4 + /* copy 32 bytes at a time */ + L(du5_loop): ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 40 ++ sldi 8,7, 64-40 ++#else + sldi 0,6, 40 + srdi 8,7, 64-40 ++#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 40 ++ sldi 8,6, 64-40 ++#else + sldi 0,7, 40 + srdi 8,6, 64-40 ++#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 40 ++ sldi 8,7, 64-40 ++#else + sldi 0,6, 40 + srdi 8,7, 64-40 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 40 ++ sldi 8,6, 64-40 ++#else + sldi 0,7, 40 + srdi 8,6, 64-40 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) +@@ -977,9 +1209,14 @@ + .align 4 + L(du5_fini): + /* calculate and store the final DW */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 40 ++ sldi 8,7, 64-40 ++#else + sldi 0,6, 40 + srdi 8,7, 64-40 +- or 0,0,8 ++#endif ++ or 0,0,8 + std 0,0(4) + b L(du_done) + +@@ -988,13 +1225,23 @@ + bf 30,L(du6_1dw) + + /* there are at least two DWs to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 48 ++ sldi 8,7, 64-48 ++#else + sldi 0,6, 48 + srdi 8,7, 64-48 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 48 ++ sldi 8,6, 64-48 ++#else + sldi 0,7, 48 + srdi 8,6, 64-48 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) +@@ -1003,8 +1250,13 @@ + blt cr6,L(du6_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du6_loop) + /* there is a third DW to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 48 ++ sldi 8,7, 64-48 ++#else + sldi 0,6, 48 + srdi 8,7, 64-48 ++#endif + or 0,0,8 + std 0,0(4) + mr 6,7 +@@ -1015,8 +1267,13 @@ + b L(du6_loop) + .align 4 + L(du6_1dw): ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 48 ++ sldi 8,7, 64-48 ++#else + sldi 0,6, 48 + srdi 8,7, 64-48 ++#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du6_loop) +@@ -1028,23 +1285,43 @@ + .align 4 + /* copy 32 bytes at a time */ + L(du6_loop): ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 48 ++ sldi 8,7, 64-48 ++#else + sldi 0,6, 48 + srdi 8,7, 64-48 ++#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 48 ++ sldi 8,6, 64-48 ++#else + sldi 0,7, 48 + srdi 8,6, 64-48 ++#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 48 ++ sldi 8,7, 64-48 ++#else + sldi 0,6, 48 + srdi 8,7, 64-48 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 48 ++ sldi 8,6, 64-48 ++#else + sldi 0,7, 48 + srdi 8,6, 64-48 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) +@@ -1054,9 +1331,14 @@ + .align 4 + L(du6_fini): + /* calculate and store the final DW */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 48 ++ sldi 8,7, 64-48 ++#else + sldi 0,6, 48 + srdi 8,7, 64-48 +- or 0,0,8 ++#endif ++ or 0,0,8 + std 0,0(4) + b L(du_done) + +@@ -1065,13 +1347,23 @@ + bf 30,L(du7_1dw) + + /* there are at least two DWs to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 56 ++ sldi 8,7, 64-56 ++#else + sldi 0,6, 56 + srdi 8,7, 64-56 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 56 ++ sldi 8,6, 64-56 ++#else + sldi 0,7, 56 + srdi 8,6, 64-56 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) +@@ -1080,8 +1372,13 @@ + blt cr6,L(du7_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du7_loop) + /* there is a third DW to copy */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 56 ++ sldi 8,7, 64-56 ++#else + sldi 0,6, 56 + srdi 8,7, 64-56 ++#endif + or 0,0,8 + std 0,0(4) + mr 6,7 +@@ -1092,8 +1389,13 @@ + b L(du7_loop) + .align 4 + L(du7_1dw): ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 56 ++ sldi 8,7, 64-56 ++#else + sldi 0,6, 56 + srdi 8,7, 64-56 ++#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du7_loop) +@@ -1105,23 +1407,43 @@ + .align 4 + /* copy 32 bytes at a time */ + L(du7_loop): ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 56 ++ sldi 8,7, 64-56 ++#else + sldi 0,6, 56 + srdi 8,7, 64-56 ++#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 56 ++ sldi 8,6, 64-56 ++#else + sldi 0,7, 56 + srdi 8,6, 64-56 ++#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 56 ++ sldi 8,7, 64-56 ++#else + sldi 0,6, 56 + srdi 8,7, 64-56 ++#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,7, 56 ++ sldi 8,6, 64-56 ++#else + sldi 0,7, 56 + srdi 8,6, 64-56 ++#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) +@@ -1131,12 +1453,17 @@ + .align 4 + L(du7_fini): + /* calculate and store the final DW */ ++#ifdef __LITTLE_ENDIAN__ ++ srdi 0,6, 56 ++ sldi 8,7, 64-56 ++#else + sldi 0,6, 56 + srdi 8,7, 64-56 +- or 0,0,8 ++#endif ++ or 0,0,8 + std 0,0(4) + b L(du_done) +- ++ + .align 4 + L(du_done): + rldicr 0,31,0,60 +@@ -1144,9 +1471,9 @@ + beq cr1,0f /* If the tail is 0 bytes we are done! */ + + add 3,3,0 +- add 12,12,0 ++ add 12,12,0 + /* At this point we have a tail of 0-7 bytes and we know that the +- destiniation is double word aligned. */ ++ destination is double word aligned. */ + 4: bf 29,2f + lwz 6,0(12) + addi 12,12,4 +@@ -1165,5 +1492,5 @@ + ld 31,-8(1) + ld 3,-16(1) + blr +-END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS) ++END_GEN_TB (memcpy,TB_TOCLESS) + libc_hidden_builtin_def (memcpy) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memcpy.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memcpy.S 2014-05-29 13:04:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memcpy.S 2014-05-29 13:05:40.000000000 -0500 +@@ -1,5 +1,5 @@ + /* Optimized memcpy implementation for PowerPC64/POWER7. +- Copyright (C) 2010, 2011 Free Software Foundation, Inc. ++ Copyright (C) 2010-2014 Free Software Foundation, Inc. + Contributed by Luis Machado . + This file is part of the GNU C Library. + +@@ -18,425 +18,366 @@ + . */ + + #include +-#include +-#include + + + /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. */ + ++#define dst 11 /* Use r11 so r3 kept unchanged. */ ++#define src 4 ++#define cnt 5 ++ + .machine power7 +-EALIGN (BP_SYM (memcpy), 5, 0) ++EALIGN (memcpy, 5, 0) + CALL_MCOUNT 3 + +- cmpldi cr1,5,31 ++ cmpldi cr1,cnt,31 + neg 0,3 +- std 3,-16(1) +- std 31,-8(1) +- cfi_offset(31,-8) + ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + +- andi. 11,3,7 /* Check alignment of DST. */ +- ++#ifdef __LITTLE_ENDIAN__ ++/* In little-endian mode, power7 takes an alignment trap on any lxvd2x ++ or stxvd2x crossing a 32-byte boundary, so ensure the aligned_copy ++ loop is only used for quadword aligned copies. */ ++ andi. 10,3,15 ++ clrldi 11,4,60 ++#else ++ andi. 10,3,7 /* Check alignment of DST. */ ++ clrldi 11,4,61 /* Check alignment of SRC. */ ++#endif ++ cmpld cr6,10,11 /* SRC and DST alignments match? */ + +- clrldi 10,4,61 /* Check alignment of SRC. */ +- cmpld cr6,10,11 /* SRC and DST alignments match? */ +- mr 12,4 +- mr 31,5 ++ mr dst,3 + bne cr6,L(copy_GE_32_unaligned) ++ beq L(aligned_copy) + +- srdi 9,5,3 /* Number of full quadwords remaining. */ +- +- beq L(copy_GE_32_aligned_cont) +- +- clrldi 0,0,61 +- mtcrf 0x01,0 +- subf 31,0,5 +- +- /* Get the SRC aligned to 8 bytes. */ +- +-1: bf 31,2f +- lbz 6,0(12) +- addi 12,12,1 +- stb 6,0(3) +- addi 3,3,1 +-2: bf 30,4f +- lhz 6,0(12) +- addi 12,12,2 +- sth 6,0(3) +- addi 3,3,2 +-4: bf 29,0f +- lwz 6,0(12) +- addi 12,12,4 +- stw 6,0(3) +- addi 3,3,4 +-0: +- clrldi 10,12,61 /* Check alignment of SRC again. */ +- srdi 9,31,3 /* Number of full doublewords remaining. */ +- +-L(copy_GE_32_aligned_cont): +- +- clrldi 11,31,61 +- mtcrf 0x01,9 +- +- srdi 8,31,5 +- cmpldi cr1,9,4 +- cmpldi cr6,11,0 +- mr 11,12 ++ mtocrf 0x01,0 ++#ifdef __LITTLE_ENDIAN__ ++ clrldi 0,0,60 ++#else ++ clrldi 0,0,61 ++#endif + +- /* Copy 1~3 doublewords so the main loop starts +- at a multiple of 32 bytes. */ +- +- bf 30,1f +- ld 6,0(12) +- ld 7,8(12) +- addi 11,12,16 +- mtctr 8 +- std 6,0(3) +- std 7,8(3) +- addi 10,3,16 +- bf 31,4f +- ld 0,16(12) +- std 0,16(3) +- blt cr1,3f +- addi 11,12,24 +- addi 10,3,24 +- b 4f +- +- .align 4 +-1: /* Copy 1 doubleword and set the counter. */ +- mr 10,3 +- mtctr 8 +- bf 31,4f +- ld 6,0(12) +- addi 11,12,8 +- std 6,0(3) +- addi 10,3,8 ++/* Get the DST and SRC aligned to 8 bytes (16 for little-endian). */ ++1: ++ bf 31,2f ++ lbz 6,0(src) ++ addi src,src,1 ++ stb 6,0(dst) ++ addi dst,dst,1 ++2: ++ bf 30,4f ++ lhz 6,0(src) ++ addi src,src,2 ++ sth 6,0(dst) ++ addi dst,dst,2 ++4: ++ bf 29,8f ++ lwz 6,0(src) ++ addi src,src,4 ++ stw 6,0(dst) ++ addi dst,dst,4 ++8: ++#ifdef __LITTLE_ENDIAN__ ++ bf 28,16f ++ ld 6,0(src) ++ addi src,src,8 ++ std 6,0(dst) ++ addi dst,dst,8 ++16: ++#endif ++ subf cnt,0,cnt + ++/* Main aligned copy loop. Copies 128 bytes at a time. */ + L(aligned_copy): +- /* Main aligned copy loop. Copies up to 128-bytes at a time. */ +- .align 4 +-4: +- /* check for any 32-byte or 64-byte lumps that are outside of a +- nice 128-byte range. R8 contains the number of 32-byte +- lumps, so drop this into the CR, and use the SO/EQ bits to help +- handle the 32- or 64- byte lumps. Then handle the rest with an +- unrolled 128-bytes-at-a-time copy loop. */ +- mtocrf 1,8 +- li 6,16 # 16() index +- li 7,32 # 32() index +- li 8,48 # 48() index +- +-L(aligned_32byte): +- /* if the SO bit (indicating a 32-byte lump) is not set, move along. */ +- bns cr7,L(aligned_64byte) +- lxvd2x 6,0,11 +- lxvd2x 7,11,6 +- addi 11,11,32 +- stxvd2x 6,0,10 +- stxvd2x 7,10,6 +- addi 10,10,32 +- +-L(aligned_64byte): +- /* if the EQ bit (indicating a 64-byte lump) is not set, move along. */ +- bne cr7,L(aligned_128setup) +- lxvd2x 6,0,11 +- lxvd2x 7,11,6 +- lxvd2x 8,11,7 +- lxvd2x 9,11,8 +- addi 11,11,64 +- stxvd2x 6,0,10 +- stxvd2x 7,10,6 +- stxvd2x 8,10,7 +- stxvd2x 9,10,8 +- addi 10,10,64 +- +-L(aligned_128setup): +- /* Set up for the 128-byte at a time copy loop. */ +- srdi 8,31,7 +- cmpdi 8,0 # Any 4x lumps left? +- beq 3f # if not, move along. +- lxvd2x 6,0,11 +- lxvd2x 7,11,6 +- mtctr 8 # otherwise, load the ctr and begin. +- li 8,48 # 48() index ++ li 6,16 ++ li 7,32 ++ li 8,48 ++ mtocrf 0x02,cnt ++ srdi 12,cnt,7 ++ cmpdi 12,0 ++ beq L(aligned_tail) ++ lxvd2x 6,0,src ++ lxvd2x 7,src,6 ++ mtctr 12 + b L(aligned_128loop) + ++ .align 4 + L(aligned_128head): + /* for the 2nd + iteration of this loop. */ +- lxvd2x 6,0,11 +- lxvd2x 7,11,6 ++ lxvd2x 6,0,src ++ lxvd2x 7,src,6 + L(aligned_128loop): +- lxvd2x 8,11,7 +- lxvd2x 9,11,8 +- stxvd2x 6,0,10 +- addi 11,11,64 +- stxvd2x 7,10,6 +- stxvd2x 8,10,7 +- stxvd2x 9,10,8 +- lxvd2x 6,0,11 +- lxvd2x 7,11,6 +- addi 10,10,64 +- lxvd2x 8,11,7 +- lxvd2x 9,11,8 +- addi 11,11,64 +- stxvd2x 6,0,10 +- stxvd2x 7,10,6 +- stxvd2x 8,10,7 +- stxvd2x 9,10,8 +- addi 10,10,64 ++ lxvd2x 8,src,7 ++ lxvd2x 9,src,8 ++ stxvd2x 6,0,dst ++ addi src,src,64 ++ stxvd2x 7,dst,6 ++ stxvd2x 8,dst,7 ++ stxvd2x 9,dst,8 ++ lxvd2x 6,0,src ++ lxvd2x 7,src,6 ++ addi dst,dst,64 ++ lxvd2x 8,src,7 ++ lxvd2x 9,src,8 ++ addi src,src,64 ++ stxvd2x 6,0,dst ++ stxvd2x 7,dst,6 ++ stxvd2x 8,dst,7 ++ stxvd2x 9,dst,8 ++ addi dst,dst,64 + bdnz L(aligned_128head) + +-3: +- /* Check for tail bytes. */ +- rldicr 0,31,0,60 +- mtcrf 0x01,31 +- beq cr6,0f +- +-.L9: +- add 3,3,0 +- add 12,12,0 +- +- /* At this point we have a tail of 0-7 bytes and we know that the +- destination is doubleword-aligned. */ +-4: /* Copy 4 bytes. */ +- bf 29,2f +- +- lwz 6,0(12) +- addi 12,12,4 +- stw 6,0(3) +- addi 3,3,4 +-2: /* Copy 2 bytes. */ +- bf 30,1f +- +- lhz 6,0(12) +- addi 12,12,2 +- sth 6,0(3) +- addi 3,3,2 +-1: /* Copy 1 byte. */ +- bf 31,0f +- +- lbz 6,0(12) +- stb 6,0(3) +-0: /* Return original DST pointer. */ +- ld 31,-8(1) +- ld 3,-16(1) ++L(aligned_tail): ++ mtocrf 0x01,cnt ++ bf 25,32f ++ lxvd2x 6,0,src ++ lxvd2x 7,src,6 ++ lxvd2x 8,src,7 ++ lxvd2x 9,src,8 ++ addi src,src,64 ++ stxvd2x 6,0,dst ++ stxvd2x 7,dst,6 ++ stxvd2x 8,dst,7 ++ stxvd2x 9,dst,8 ++ addi dst,dst,64 ++32: ++ bf 26,16f ++ lxvd2x 6,0,src ++ lxvd2x 7,src,6 ++ addi src,src,32 ++ stxvd2x 6,0,dst ++ stxvd2x 7,dst,6 ++ addi dst,dst,32 ++16: ++ bf 27,8f ++ lxvd2x 6,0,src ++ addi src,src,16 ++ stxvd2x 6,0,dst ++ addi dst,dst,16 ++8: ++ bf 28,4f ++ ld 6,0(src) ++ addi src,src,8 ++ std 6,0(dst) ++ addi dst,dst,8 ++4: /* Copies 4~7 bytes. */ ++ bf 29,L(tail2) ++ lwz 6,0(src) ++ stw 6,0(dst) ++ bf 30,L(tail5) ++ lhz 7,4(src) ++ sth 7,4(dst) ++ bflr 31 ++ lbz 8,6(src) ++ stb 8,6(dst) ++ /* Return original DST pointer. */ + blr + +- /* Handle copies of 0~31 bytes. */ +- .align 4 ++ ++/* Handle copies of 0~31 bytes. */ ++ .align 4 + L(copy_LT_32): +- cmpldi cr6,5,8 +- mr 12,4 +- mtcrf 0x01,5 ++ mr dst,3 ++ cmpldi cr6,cnt,8 ++ mtocrf 0x01,cnt + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 +- clrrdi 11,4,2 +- andi. 0,8,3 +- cmpldi cr1,5,16 +- mr 10,5 ++ andi. 0,8,3 ++ cmpldi cr1,cnt,16 + beq L(copy_LT_32_aligned) + +- /* Force 4-bytes alignment for SRC. */ +- mtocrf 0x01,0 +- subf 10,0,5 +-2: bf 30,1f +- +- lhz 6,0(12) +- addi 12,12,2 +- sth 6,0(3) +- addi 3,3,2 +-1: bf 31,L(end_4bytes_alignment) +- +- lbz 6,0(12) +- addi 12,12,1 +- stb 6,0(3) +- addi 3,3,1 ++ /* Force 4-byte alignment for SRC. */ ++ mtocrf 0x01,0 ++ subf cnt,0,cnt ++2: ++ bf 30,1f ++ lhz 6,0(src) ++ addi src,src,2 ++ sth 6,0(dst) ++ addi dst,dst,2 ++1: ++ bf 31,L(end_4bytes_alignment) ++ lbz 6,0(src) ++ addi src,src,1 ++ stb 6,0(dst) ++ addi dst,dst,1 + +- .align 4 ++ .align 4 + L(end_4bytes_alignment): +- cmpldi cr1,10,16 +- mtcrf 0x01,10 ++ cmpldi cr1,cnt,16 ++ mtocrf 0x01,cnt + + L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ +- lwz 6,0(12) +- lwz 7,4(12) +- stw 6,0(3) +- lwz 8,8(12) +- stw 7,4(3) +- lwz 6,12(12) +- addi 12,12,16 +- stw 8,8(3) +- stw 6,12(3) +- addi 3,3,16 ++ lwz 6,0(src) ++ lwz 7,4(src) ++ stw 6,0(dst) ++ lwz 8,8(src) ++ stw 7,4(dst) ++ lwz 6,12(src) ++ addi src,src,16 ++ stw 8,8(dst) ++ stw 6,12(dst) ++ addi dst,dst,16 + 8: /* Copy 8 bytes. */ +- bf 28,4f ++ bf 28,L(tail4) ++ lwz 6,0(src) ++ lwz 7,4(src) ++ addi src,src,8 ++ stw 6,0(dst) ++ stw 7,4(dst) ++ addi dst,dst,8 ++ ++ .align 4 ++/* Copies 4~7 bytes. */ ++L(tail4): ++ bf 29,L(tail2) ++ lwz 6,0(src) ++ stw 6,0(dst) ++ bf 30,L(tail5) ++ lhz 7,4(src) ++ sth 7,4(dst) ++ bflr 31 ++ lbz 8,6(src) ++ stb 8,6(dst) ++ /* Return original DST pointer. */ ++ blr + +- lwz 6,0(12) +- lwz 7,4(12) +- addi 12,12,8 +- stw 6,0(3) +- stw 7,4(3) +- addi 3,3,8 +-4: /* Copy 4 bytes. */ +- bf 29,2f +- +- lwz 6,0(12) +- addi 12,12,4 +- stw 6,0(3) +- addi 3,3,4 +-2: /* Copy 2-3 bytes. */ ++ .align 4 ++/* Copies 2~3 bytes. */ ++L(tail2): + bf 30,1f +- +- lhz 6,0(12) +- sth 6,0(3) +- bf 31,0f +- lbz 7,2(12) +- stb 7,2(3) +- ld 3,-16(1) ++ lhz 6,0(src) ++ sth 6,0(dst) ++ bflr 31 ++ lbz 7,2(src) ++ stb 7,2(dst) + blr + +- .align 4 +-1: /* Copy 1 byte. */ +- bf 31,0f ++ .align 4 ++L(tail5): ++ bflr 31 ++ lbz 6,4(src) ++ stb 6,4(dst) ++ blr + +- lbz 6,0(12) +- stb 6,0(3) +-0: /* Return original DST pointer. */ +- ld 3,-16(1) ++ .align 4 ++1: ++ bflr 31 ++ lbz 6,0(src) ++ stb 6,0(dst) ++ /* Return original DST pointer. */ + blr + +- /* Handles copies of 0~8 bytes. */ +- .align 4 ++ ++/* Handles copies of 0~8 bytes. */ ++ .align 4 + L(copy_LE_8): +- bne cr6,4f ++ bne cr6,L(tail4) + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + +- lwz 6,0(4) +- lwz 7,4(4) +- stw 6,0(3) +- stw 7,4(3) +- ld 3,-16(1) /* Return original DST pointers. */ ++ lwz 6,0(src) ++ lwz 7,4(src) ++ stw 6,0(dst) ++ stw 7,4(dst) + blr + +- .align 4 +-4: /* Copies 4~7 bytes. */ +- bf 29,2b + +- lwz 6,0(4) +- stw 6,0(3) +- bf 30,5f +- lhz 7,4(4) +- sth 7,4(3) +- bf 31,0f +- lbz 8,6(4) +- stb 8,6(3) +- ld 3,-16(1) +- blr +- +- .align 4 +-5: /* Copy 1 byte. */ +- bf 31,0f +- +- lbz 6,4(4) +- stb 6,4(3) +- +-0: /* Return original DST pointer. */ +- ld 3,-16(1) +- blr +- +- /* Handle copies of 32+ bytes where DST is aligned (to quadword) but +- SRC is not. Use aligned quadword loads from SRC, shifted to realign +- the data, allowing for aligned DST stores. */ +- .align 4 ++/* Handle copies of 32+ bytes where DST is aligned (to quadword) but ++ SRC is not. Use aligned quadword loads from SRC, shifted to realign ++ the data, allowing for aligned DST stores. */ ++ .align 4 + L(copy_GE_32_unaligned): +- clrldi 0,0,60 /* Number of bytes until the 1st +- quadword. */ +- andi. 11,3,15 /* Check alignment of DST (against +- quadwords). */ +- srdi 9,5,4 /* Number of full quadwords remaining. */ ++ clrldi 0,0,60 /* Number of bytes until the 1st dst quadword. */ ++#ifndef __LITTLE_ENDIAN__ ++ andi. 10,3,15 /* Check alignment of DST (against quadwords). */ ++#endif ++ srdi 9,cnt,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + +- /* SRC is not quadword aligned, get it aligned. */ ++ /* DST is not quadword aligned, get it aligned. */ + +- mtcrf 0x01,0 +- subf 31,0,5 ++ mtocrf 0x01,0 ++ subf cnt,0,cnt + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +-1: /* Copy 1 byte. */ ++1: + bf 31,2f +- +- lbz 6,0(12) +- addi 12,12,1 +- stb 6,0(3) +- addi 3,3,1 +-2: /* Copy 2 bytes. */ ++ lbz 6,0(src) ++ addi src,src,1 ++ stb 6,0(dst) ++ addi dst,dst,1 ++2: + bf 30,4f +- +- lhz 6,0(12) +- addi 12,12,2 +- sth 6,0(3) +- addi 3,3,2 +-4: /* Copy 4 bytes. */ ++ lhz 6,0(src) ++ addi src,src,2 ++ sth 6,0(dst) ++ addi dst,dst,2 ++4: + bf 29,8f +- +- lwz 6,0(12) +- addi 12,12,4 +- stw 6,0(3) +- addi 3,3,4 +-8: /* Copy 8 bytes. */ ++ lwz 6,0(src) ++ addi src,src,4 ++ stw 6,0(dst) ++ addi dst,dst,4 ++8: + bf 28,0f +- +- ld 6,0(12) +- addi 12,12,8 +- std 6,0(3) +- addi 3,3,8 ++ ld 6,0(src) ++ addi src,src,8 ++ std 6,0(dst) ++ addi dst,dst,8 + 0: +- clrldi 10,12,60 /* Check alignment of SRC. */ +- srdi 9,31,4 /* Number of full quadwords remaining. */ ++ srdi 9,cnt,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ + L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ +- clrldi 11,31,60 +- li 6,16 /* Index for 16-bytes offsets. */ ++ clrldi 10,cnt,60 ++ li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ +- cmpldi cr1,11,0 +- srdi 8,31,5 /* Setup the loop counter. */ +- mr 10,3 +- mr 11,12 +- mtcrf 0x01,9 +- cmpldi cr6,9,1 +- lvsl 5,0,12 +- lvx 3,0,12 +- bf 31,L(setup_unaligned_loop) +- +- /* Copy another 16 bytes to align to 32-bytes due to the loop . */ +- lvx 4,12,6 +- vperm 6,3,4,5 +- addi 11,12,16 +- addi 10,3,16 +- stvx 6,0,3 ++ cmpldi cr1,10,0 ++ srdi 8,cnt,5 /* Setup the loop counter. */ ++ mtocrf 0x01,9 ++ cmpldi cr6,9,1 ++#ifdef __LITTLE_ENDIAN__ ++ lvsr 5,0,src ++#else ++ lvsl 5,0,src ++#endif ++ lvx 3,0,src ++ li 0,0 ++ bf 31,L(setup_unaligned_loop) ++ ++ /* Copy another 16 bytes to align to 32-bytes due to the loop. */ ++ lvx 4,src,6 ++#ifdef __LITTLE_ENDIAN__ ++ vperm 6,4,3,5 ++#else ++ vperm 6,3,4,5 ++#endif ++ addi src,src,16 ++ stvx 6,0,dst ++ addi dst,dst,16 + vor 3,4,4 ++ clrrdi 0,src,60 + + L(setup_unaligned_loop): +- mtctr 8 +- ble cr6,L(end_unaligned_loop) ++ mtctr 8 ++ ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ +- .align 4 ++ .align 4 + L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, +@@ -444,63 +385,56 @@ + some portions again. This is faster than having unaligned + vector instructions though. */ + +- lvx 4,11,6 /* vr4 = r11+16. */ +- vperm 6,3,4,5 /* Merge the correctly-aligned portions +- of vr3/vr4 into vr6. */ +- lvx 3,11,7 /* vr3 = r11+32. */ +- vperm 10,4,3,5 /* Merge the correctly-aligned portions +- of vr3/vr4 into vr10. */ +- addi 11,11,32 +- stvx 6,0,10 +- stvx 10,10,6 +- addi 10,10,32 +- ++ lvx 4,src,6 ++#ifdef __LITTLE_ENDIAN__ ++ vperm 6,4,3,5 ++#else ++ vperm 6,3,4,5 ++#endif ++ lvx 3,src,7 ++#ifdef __LITTLE_ENDIAN__ ++ vperm 10,3,4,5 ++#else ++ vperm 10,4,3,5 ++#endif ++ addi src,src,32 ++ stvx 6,0,dst ++ stvx 10,dst,6 ++ addi dst,dst,32 + bdnz L(unaligned_loop) + +- .align 4 ++ clrrdi 0,src,60 ++ ++ .align 4 + L(end_unaligned_loop): + + /* Check for tail bytes. */ +- rldicr 0,31,0,59 +- mtcrf 0x01,31 +- beq cr1,0f ++ mtocrf 0x01,cnt ++ beqlr cr1 + +- add 3,3,0 +- add 12,12,0 ++ add src,src,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ +-8: /* Copy 8 bytes. */ ++ /* Copy 8 bytes. */ + bf 28,4f +- +- lwz 6,0(12) +- lwz 7,4(12) +- addi 12,12,8 +- stw 6,0(3) +- stw 7,4(3) +- addi 3,3,8 +-4: /* Copy 4 bytes. */ +- bf 29,2f +- +- lwz 6,0(12) +- addi 12,12,4 +- stw 6,0(3) +- addi 3,3,4 +-2: /* Copy 2~3 bytes. */ +- bf 30,1f +- +- lhz 6,0(12) +- addi 12,12,2 +- sth 6,0(3) +- addi 3,3,2 +-1: /* Copy 1 byte. */ +- bf 31,0f +- +- lbz 6,0(12) +- stb 6,0(3) +-0: /* Return original DST pointer. */ +- ld 31,-8(1) +- ld 3,-16(1) ++ lwz 6,0(src) ++ lwz 7,4(src) ++ addi src,src,8 ++ stw 6,0(dst) ++ stw 7,4(dst) ++ addi dst,dst,8 ++4: /* Copy 4~7 bytes. */ ++ bf 29,L(tail2) ++ lwz 6,0(src) ++ stw 6,0(dst) ++ bf 30,L(tail5) ++ lhz 7,4(src) ++ sth 7,4(dst) ++ bflr 31 ++ lbz 8,6(src) ++ stb 8,6(dst) ++ /* Return original DST pointer. */ + blr + +-END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS) ++END_GEN_TB (memcpy,TB_TOCLESS) + libc_hidden_builtin_def (memcpy) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/mempcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/mempcpy.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/mempcpy.S 2014-05-29 13:04:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/mempcpy.S 2014-05-29 13:04:56.000000000 -0500 +@@ -367,13 +367,21 @@ + mr 11,12 + mtcrf 0x01,9 + cmpldi cr6,9,1 +- lvsl 5,0,12 ++#ifdef __LITTLE_ENDIAN__ ++ lvsr 5,0,12 ++#else ++ lvsl 5,0,12 ++#endif + lvx 3,0,12 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop . */ + lvx 4,12,6 +- vperm 6,3,4,5 ++#ifdef __LITTLE_ENDIAN__ ++ vperm 6,4,3,5 ++#else ++ vperm 6,3,4,5 ++#endif + addi 11,12,16 + addi 10,3,16 + stvx 6,0,3 +@@ -393,11 +401,17 @@ + vector instructions though. */ + + lvx 4,11,6 /* vr4 = r11+16. */ +- vperm 6,3,4,5 /* Merge the correctly-aligned portions +- of vr3/vr4 into vr6. */ ++#ifdef __LITTLE_ENDIAN__ ++ vperm 6,4,3,5 ++#else ++ vperm 6,3,4,5 ++#endif + lvx 3,11,7 /* vr3 = r11+32. */ +- vperm 10,4,3,5 /* Merge the correctly-aligned portions +- of vr3/vr4 into vr10. */ ++#ifdef __LITTLE_ENDIAN__ ++ vperm 10,3,4,5 ++#else ++ vperm 10,4,3,5 ++#endif + addi 11,11,32 + stvx 6,0,10 + stvx 10,10,6 diff --git a/packages/glibc/2.17/0054-glibc-ppc64le-32.patch b/packages/glibc/2.17/0054-glibc-ppc64le-32.patch new file mode 100644 index 0000000..058d53a --- /dev/null +++ b/packages/glibc/2.17/0054-glibc-ppc64le-32.patch @@ -0,0 +1,272 @@ +# commit 3be87c77d24c4456ccca4034363b6d1814cd0c84 +# Author: Alan Modra +# Date: Sat Aug 17 18:47:59 2013 +0930 +# +# PowerPC LE memset +# http://sourceware.org/ml/libc-alpha/2013-08/msg00104.html +# +# One of the things I noticed when looking at power7 timing is that rlwimi +# is cracked and the two resulting insns have a register dependency. +# That makes it a little slower than the equivalent rldimi. +# +# * sysdeps/powerpc/powerpc64/memset.S: Replace rlwimi with +# insrdi. Formatting. +# * sysdeps/powerpc/powerpc64/power4/memset.S: Likewise. +# * sysdeps/powerpc/powerpc64/power6/memset.S: Likewise. +# * sysdeps/powerpc/powerpc64/power7/memset.S: Likewise. +# * sysdeps/powerpc/powerpc32/power4/memset.S: Likewise. +# * sysdeps/powerpc/powerpc32/power6/memset.S: Likewise. +# * sysdeps/powerpc/powerpc32/power7/memset.S: Likewise. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memset.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memset.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memset.S 2014-05-29 13:07:41.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memset.S 2014-05-29 13:07:46.000000000 -0500 +@@ -52,7 +52,7 @@ + + /* Align to word boundary. */ + cmplwi cr5, rLEN, 31 +- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */ ++ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + beq+ L(aligned) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 4 +@@ -67,7 +67,7 @@ + /* Handle the case of size < 31. */ + L(aligned): + mtcrf 0x01, rLEN +- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */ ++ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + ble cr5, L(medium) + /* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x1C +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/memset.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/memset.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/memset.S 2014-05-29 13:07:41.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/memset.S 2014-05-29 13:07:46.000000000 -0500 +@@ -50,7 +50,7 @@ + ble- cr1, L(small) + /* Align to word boundary. */ + cmplwi cr5, rLEN, 31 +- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */ ++ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + beq+ L(aligned) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 4 +@@ -66,7 +66,7 @@ + /* Handle the case of size < 31. */ + L(aligned): + mtcrf 0x01, rLEN +- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */ ++ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + ble cr5, L(medium) + /* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x1C +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memset.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memset.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memset.S 2014-05-29 13:07:41.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memset.S 2014-05-29 13:07:46.000000000 -0500 +@@ -37,8 +37,8 @@ + cfi_offset(31,-8) + + /* Replicate byte to word. */ +- rlwimi 4,4,8,16,23 +- rlwimi 4,4,16,0,15 ++ insrdi 4,4,8,48 ++ insrdi 4,4,16,32 + + ble cr6,L(small) /* If length <= 8, use short copy code. */ + +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/memset.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/memset.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/memset.S 2014-05-29 13:07:41.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/memset.S 2014-05-29 13:07:46.000000000 -0500 +@@ -73,14 +73,14 @@ + + /* Align to doubleword boundary. */ + cmpldi cr5, rLEN, 31 +- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */ ++ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + beq+ L(aligned2) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 8 + cror 28,30,31 /* Detect odd word aligned. */ + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN +- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */ ++ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + bt 29, L(g4) + /* Process the even word of doubleword. */ + bf+ 31, L(g2) +@@ -102,14 +102,14 @@ + + /* Handle the case of size < 31. */ + L(aligned2): +- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */ ++ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + L(aligned): + mtcrf 0x01, rLEN + ble cr5, L(medium) + /* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x18 + subfic rALIGN, rALIGN, 0x20 +- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */ ++ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN +@@ -230,7 +230,7 @@ + /* Memset of 0-31 bytes. */ + .align 5 + L(medium): +- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */ ++ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + cmpldi cr1, rLEN, 16 + L(medium_tail2): + add rMEMP, rMEMP, rLEN +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memset.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memset.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memset.S 2014-05-29 13:07:41.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memset.S 2014-05-29 13:07:46.000000000 -0500 +@@ -68,14 +68,14 @@ + + /* Align to doubleword boundary. */ + cmpldi cr5, rLEN, 31 +- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */ ++ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + beq+ L(aligned2) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 8 + cror 28,30,31 /* Detect odd word aligned. */ + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN +- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */ ++ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + bt 29, L(g4) + /* Process the even word of doubleword. */ + bf+ 31, L(g2) +@@ -97,14 +97,14 @@ + + /* Handle the case of size < 31. */ + L(aligned2): +- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */ ++ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + L(aligned): + mtcrf 0x01, rLEN + ble cr5, L(medium) + /* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x18 + subfic rALIGN, rALIGN, 0x20 +- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */ ++ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN +@@ -164,24 +164,24 @@ + L(getCacheAligned): + cmpldi cr1,rLEN,32 + andi. rTMP,rMEMP,127 +- blt cr1,L(handletail32) +- beq L(cacheAligned) ++ blt cr1,L(handletail32) ++ beq L(cacheAligned) + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 +- std rCHR,-32(rMEMP) +- std rCHR,-24(rMEMP) +- std rCHR,-16(rMEMP) +- std rCHR,-8(rMEMP) +- b L(getCacheAligned) ++ std rCHR,-32(rMEMP) ++ std rCHR,-24(rMEMP) ++ std rCHR,-16(rMEMP) ++ std rCHR,-8(rMEMP) ++ b L(getCacheAligned) + + /* Now we are aligned to the cache line and can use dcbz. */ + L(cacheAligned): + cmpld cr1,rLEN,rCLS +- blt cr1,L(handletail32) ++ blt cr1,L(handletail32) + dcbz 0,rMEMP + subf rLEN,rCLS,rLEN +- add rMEMP,rMEMP,rCLS +- b L(cacheAligned) ++ add rMEMP,rMEMP,rCLS ++ b L(cacheAligned) + + /* We are here because the cache line size was set and was not 32-bytes + and the remainder (rLEN) is less than the actual cache line size. +@@ -218,7 +218,7 @@ + /* Memset of 0-31 bytes. */ + .align 5 + L(medium): +- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */ ++ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + cmpldi cr1, rLEN, 16 + L(medium_tail2): + add rMEMP, rMEMP, rLEN +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power6/memset.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power6/memset.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power6/memset.S 2014-05-29 13:07:41.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power6/memset.S 2014-05-29 13:07:46.000000000 -0500 +@@ -65,14 +65,14 @@ + + /* Align to doubleword boundary. */ + cmpldi cr5, rLEN, 31 +- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */ ++ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + beq+ L(aligned2) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 8 + cror 28,30,31 /* Detect odd word aligned. */ + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN +- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */ ++ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + bt 29, L(g4) + /* Process the even word of doubleword. */ + bf+ 31, L(g2) +@@ -94,14 +94,14 @@ + + /* Handle the case of size < 31. */ + L(aligned2): +- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */ ++ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + L(aligned): + mtcrf 0x01, rLEN + ble cr5, L(medium) + /* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x18 + subfic rALIGN, rALIGN, 0x20 +- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */ ++ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN +@@ -362,7 +362,7 @@ + /* Memset of 0-31 bytes. */ + .align 5 + L(medium): +- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */ ++ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + cmpldi cr1, rLEN, 16 + L(medium_tail2): + add rMEMP, rMEMP, rLEN +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memset.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memset.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memset.S 2014-05-29 13:07:41.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memset.S 2014-05-29 13:07:46.000000000 -0500 +@@ -34,8 +34,8 @@ + mr 10,3 + + /* Replicate byte to word. */ +- rlwimi 4,4,8,16,23 +- rlwimi 4,4,16,0,15 ++ insrdi 4,4,8,48 ++ insrdi 4,4,16,32 + ble cr6,L(small) /* If length <= 8, use short copy code. */ + + neg 0,3 +@@ -323,7 +323,7 @@ + clrldi 0,0,62 + beq L(medium_aligned) + +- /* Force 4-bytes alignment for SRC. */ ++ /* Force 4-bytes alignment for DST. */ + mtocrf 0x01,0 + subf 5,0,5 + 1: /* Copy 1 byte. */ diff --git a/packages/glibc/2.17/0055-glibc-ppc64le-33.patch b/packages/glibc/2.17/0055-glibc-ppc64le-33.patch new file mode 100644 index 0000000..9da11df --- /dev/null +++ b/packages/glibc/2.17/0055-glibc-ppc64le-33.patch @@ -0,0 +1,1255 @@ +# commit 466b03933234017473c12dd1d92bda5e7fe49df7 +# Author: Alan Modra +# Date: Sat Aug 17 18:48:36 2013 +0930 +# +# PowerPC LE memchr and memrchr +# http://sourceware.org/ml/libc-alpha/2013-08/msg00105.html +# +# Like strnlen, memchr and memrchr had a number of defects fixed by this +# patch as well as adding little-endian support. The first one I +# noticed was that the entry to the main loop needlessly checked for +# "are we done yet?" when we know the size is large enough that we can't +# be done. The second defect I noticed was that the main loop count was +# wrong, which in turn meant that the small loop needed to handle an +# extra word. Thirdly, there is nothing to say that the string can't +# wrap around zero, except of course that we'd normally hit a segfault +# on trying to read from address zero. Fixing that simplified a number +# of places: +# +# - /* Are we done already? */ +# - addi r9,r8,8 +# - cmpld r9,r7 +# - bge L(null) +# +# becomes +# +# + cmpld r8,r7 +# + beqlr +# +# However, the exit gets an extra test because I test for being on the +# last word then if so whether the byte offset is less than the end. +# Overall, the change is a win. +# +# Lastly, memrchr used the wrong cache hint. +# +# * sysdeps/powerpc/powerpc64/power7/memchr.S: Replace rlwimi with +# insrdi. Make better use of reg selection to speed exit slightly. +# Schedule entry path a little better. Remove useless "are we done" +# checks on entry to main loop. Handle wrapping around zero address. +# Correct main loop count. Handle single left-over word from main +# loop inline rather than by using loop_small. Remove extra word +# case in loop_small caused by wrong loop count. Add little-endian +# support. +# * sysdeps/powerpc/powerpc32/power7/memchr.S: Likewise. +# * sysdeps/powerpc/powerpc64/power7/memrchr.S: Likewise. Use proper +# cache hint. +# * sysdeps/powerpc/powerpc32/power7/memrchr.S: Likewise. +# * sysdeps/powerpc/powerpc64/power7/rawmemchr.S: Add little-endian +# support. Avoid rlwimi. +# * sysdeps/powerpc/powerpc32/power7/rawmemchr.S: Likewise. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memchr.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memchr.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memchr.S 2014-05-29 13:09:17.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memchr.S 2014-05-29 13:13:37.000000000 -0500 +@@ -1,5 +1,5 @@ + /* Optimized memchr implementation for PowerPC32/POWER7 using cmpb insn. +- Copyright (C) 2010-2012 Free Software Foundation, Inc. ++ Copyright (C) 2010-2014 Free Software Foundation, Inc. + Contributed by Luis Machado . + This file is part of the GNU C Library. + +@@ -18,116 +18,118 @@ + . */ + + #include +-#include +-#include + + /* int [r3] memchr (char *s [r3], int byte [r4], int size [r5]) */ + .machine power7 +-ENTRY (BP_SYM (__memchr)) ++ENTRY (__memchr) + CALL_MCOUNT + dcbt 0,r3 + clrrwi r8,r3,2 +- rlwimi r4,r4,8,16,23 +- rlwimi r4,r4,16,0,15 ++ insrwi r4,r4,8,16 /* Replicate byte to word. */ + add r7,r3,r5 /* Calculate the last acceptable address. */ ++ insrwi r4,r4,16,0 + cmplwi r5,16 ++ li r9, -1 ++ rlwinm r6,r3,3,27,28 /* Calculate padding. */ ++ addi r7,r7,-1 ++#ifdef __LITTLE_ENDIAN__ ++ slw r9,r9,r6 ++#else ++ srw r9,r9,r6 ++#endif + ble L(small_range) + +- cmplw cr7,r3,r7 /* Compare the starting address (r3) with the +- ending address (r7). If (r3 >= r7), the size +- passed in is zero or negative. */ +- ble cr7,L(proceed) +- +- li r7,-1 /* Artificially set our ending address (r7) +- such that we will exit early. */ +-L(proceed): +- rlwinm r6,r3,3,27,28 /* Calculate padding. */ +- cmpli cr6,r6,0 /* cr6 == Do we have padding? */ + lwz r12,0(r8) /* Load word from memory. */ +- cmpb r10,r12,r4 /* Check for BYTE's in WORD1. */ +- beq cr6,L(proceed_no_padding) +- slw r10,r10,r6 +- srw r10,r10,r6 +-L(proceed_no_padding): +- cmplwi cr7,r10,0 /* If r10 == 0, no BYTEs have been found. */ ++ cmpb r3,r12,r4 /* Check for BYTEs in WORD1. */ ++ and r3,r3,r9 ++ clrlwi r5,r7,30 /* Byte count - 1 in last word. */ ++ clrrwi r7,r7,2 /* Address of last word. */ ++ cmplwi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */ + bne cr7,L(done) + +- /* Are we done already? */ +- addi r9,r8,4 +- cmplw cr6,r9,r7 +- bge cr6,L(null) +- + mtcrf 0x01,r8 + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ +- + bt 29,L(loop_setup) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r8) +- cmpb r10,r12,r4 +- cmplwi cr7,r10,0 ++ cmpb r3,r12,r4 ++ cmplwi cr7,r3,0 + bne cr7,L(done) + +- /* Are we done already? */ +- addi r9,r8,4 +- cmplw cr6,r9,r7 +- bge cr6,L(null) +- + L(loop_setup): +- sub r5,r7,r9 +- srwi r6,r5,3 /* Number of loop iterations. */ ++ /* The last word we want to read in the loop below is the one ++ containing the last byte of the string, ie. the word at ++ (s + size - 1) & ~3, or r7. The first word read is at ++ r8 + 4, we read 2 * cnt words, so the last word read will ++ be at r8 + 4 + 8 * cnt - 4. Solving for cnt gives ++ cnt = (r7 - r8) / 8 */ ++ sub r6,r7,r8 ++ srwi r6,r6,3 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ +- b L(loop) +- /* Main loop to look for BYTE backwards in the string. Since +- it's a small loop (< 8 instructions), align it to 32-bytes. */ +- .p2align 5 ++ ++ /* Main loop to look for BYTE in the string. Since ++ it's a small loop (8 instructions), align it to 32-bytes. */ ++ .align 5 + L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + lwz r12,4(r8) + lwzu r11,8(r8) +- cmpb r10,r12,r4 ++ cmpb r3,r12,r4 + cmpb r9,r11,r4 +- or r5,r9,r10 /* Merge everything in one word. */ +- cmplwi cr7,r5,0 ++ or r6,r9,r3 /* Merge everything in one word. */ ++ cmplwi cr7,r6,0 + bne cr7,L(found) + bdnz L(loop) + +- /* We're here because the counter reached 0, and that means we +- didn't have any matches for BYTE in the whole range. */ +- subi r11,r7,4 +- cmplw cr6,r8,r11 +- blt cr6,L(loop_small) +- b L(null) ++ /* We may have one more dword to read. */ ++ cmplw r8,r7 ++ beqlr + ++ lwzu r12,4(r8) ++ cmpb r3,r12,r4 ++ cmplwi cr6,r3,0 ++ bne cr6,L(done) ++ blr ++ ++ .align 4 ++L(found): + /* OK, one (or both) of the words contains BYTE. Check + the first word and decrement the address in case the first + word really contains BYTE. */ +- .align 4 +-L(found): +- cmplwi cr6,r10,0 ++ cmplwi cr6,r3,0 + addi r8,r8,-4 + bne cr6,L(done) + + /* BYTE must be in the second word. Adjust the address +- again and move the result of cmpb to r10 so we can calculate the ++ again and move the result of cmpb to r3 so we can calculate the + pointer. */ + +- mr r10,r9 ++ mr r3,r9 + addi r8,r8,4 + +- /* r10 has the output of the cmpb instruction, that is, it contains ++ /* r3 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as BYTE in the original + word from the string. Use that to calculate the pointer. + We need to make sure BYTE is *before* the end of the range. */ + L(done): +- cntlzw r0,r10 /* Count leading zeroes before the match. */ +- srwi r0,r0,3 /* Convert leading zeroes to bytes. */ ++#ifdef __LITTLE_ENDIAN__ ++ addi r0,r3,-1 ++ andc r0,r0,r3 ++ popcntw r0,r0 /* Count trailing zeros. */ ++#else ++ cntlzw r0,r3 /* Count leading zeros before the match. */ ++#endif ++ cmplw r8,r7 /* Are we on the last word? */ ++ srwi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r8,r0 +- cmplw r3,r7 +- bge L(null) ++ cmplw cr7,r0,r5 /* If on the last dword, check byte offset. */ ++ bnelr ++ blelr cr7 ++ li r3,0 + blr + + .align 4 +@@ -139,69 +141,44 @@ + .align 4 + L(small_range): + cmplwi r5,0 +- rlwinm r6,r3,3,27,28 /* Calculate padding. */ +- beq L(null) /* This branch is for the cmplwi r5,0 above */ ++ beq L(null) + lwz r12,0(r8) /* Load word from memory. */ +- cmplwi cr6,r6,0 /* cr6 == Do we have padding? */ +- cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */ +- beq cr6,L(small_no_padding) +- slw r10,r10,r6 +- srw r10,r10,r6 +-L(small_no_padding): +- cmplwi cr7,r10,0 ++ cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ ++ and r3,r3,r9 ++ cmplwi cr7,r3,0 ++ clrlwi r5,r7,30 /* Byte count - 1 in last word. */ ++ clrrwi r7,r7,2 /* Address of last word. */ ++ cmplw r8,r7 /* Are we done already? */ + bne cr7,L(done) ++ beqlr + +- /* Are we done already? */ +- addi r9,r8,4 +- cmplw r9,r7 +- bge L(null) +- +-L(loop_small): /* loop_small has been unrolled. */ + lwzu r12,4(r8) +- cmpb r10,r12,r4 +- addi r9,r8,4 +- cmplwi cr6,r10,0 +- cmplw r9,r7 ++ cmpb r3,r12,r4 ++ cmplwi cr6,r3,0 ++ cmplw r8,r7 + bne cr6,L(done) +- bge L(null) ++ beqlr + + lwzu r12,4(r8) +- cmpb r10,r12,r4 +- addi r9,r8,4 +- cmplwi cr6,r10,0 +- cmplw r9,r7 ++ cmpb r3,r12,r4 ++ cmplwi cr6,r3,0 ++ cmplw r8,r7 + bne cr6,L(done) +- bge L(null) ++ beqlr + + lwzu r12,4(r8) +- cmpb r10,r12,r4 +- addi r9,r8,4 +- cmplwi cr6,r10,0 +- cmplw r9,r7 ++ cmpb r3,r12,r4 ++ cmplwi cr6,r3,0 ++ cmplw r8,r7 + bne cr6,L(done) +- bge L(null) ++ beqlr + + lwzu r12,4(r8) +- cmpb r10,r12,r4 +- addi r9,r8,4 +- cmplwi cr6,r10,0 +- cmplw r9,r7 ++ cmpb r3,r12,r4 ++ cmplwi cr6,r3,0 + bne cr6,L(done) +- bge L(null) +- +- /* For most cases we will never get here. Under some combinations of +- padding + length there is a leftover word that still needs to be +- checked. */ +- lwzu r12,4(r8) +- cmpb r10,r12,r4 +- addi r9,r8,4 +- cmplwi cr6,r10,0 +- bne cr6,L(done) +- +- /* save a branch and exit directly */ +- li r3,0 + blr + +-END (BP_SYM (__memchr)) +-weak_alias (BP_SYM (__memchr), BP_SYM(memchr)) ++END (__memchr) ++weak_alias (__memchr, memchr) + libc_hidden_builtin_def (memchr) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memrchr.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memrchr.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memrchr.S 2014-05-29 13:09:17.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memrchr.S 2014-05-29 13:13:47.000000000 -0500 +@@ -1,5 +1,5 @@ + /* Optimized memrchr implementation for PowerPC32/POWER7 using cmpb insn. +- Copyright (C) 2010 Free Software Foundation, Inc. ++ Copyright (C) 2010-2014 Free Software Foundation, Inc. + Contributed by Luis Machado . + This file is part of the GNU C Library. + +@@ -18,124 +18,136 @@ + . */ + + #include +-#include +-#include + + /* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */ + .machine power7 +-ENTRY (BP_SYM (__memrchr)) ++ENTRY (__memrchr) + CALL_MCOUNT +- dcbt 0,r3 +- mr r7,r3 +- add r3,r7,r5 /* Calculate the last acceptable address. */ +- cmplw cr7,r3,r7 /* Is the address equal or less than r3? */ ++ add r7,r3,r5 /* Calculate the last acceptable address. */ ++ neg r0,r7 ++ addi r7,r7,-1 ++ mr r10,r3 ++ clrrwi r6,r7,7 ++ li r9,3<<5 ++ dcbt r9,r6,16 /* Stream hint, decreasing addresses. */ + + /* Replicate BYTE to word. */ +- rlwimi r4,r4,8,16,23 +- rlwimi r4,r4,16,0,15 +- bge cr7,L(proceed) +- +- li r3,-1 /* Make r11 the biggest if r4 <= 0. */ +-L(proceed): ++ insrwi r4,r4,8,16 ++ insrwi r4,r4,16,0 + li r6,-4 +- addi r9,r3,-1 +- clrrwi r8,r9,2 +- addi r8,r8,4 +- neg r0,r3 ++ li r9,-1 + rlwinm r0,r0,3,27,28 /* Calculate padding. */ +- ++ clrrwi r8,r7,2 ++ srw r9,r9,r0 + cmplwi r5,16 ++ clrrwi r0,r10,2 + ble L(small_range) + +- lwbrx r12,r8,r6 /* Load reversed word from memory. */ +- cmpb r10,r12,r4 /* Check for BYTE in WORD1. */ +- slw r10,r10,r0 +- srw r10,r10,r0 +- cmplwi cr7,r10,0 /* If r10 == 0, no BYTE's have been found. */ ++#ifdef __LITTLE_ENDIAN__ ++ lwzx r12,0,r8 ++#else ++ lwbrx r12,0,r8 /* Load reversed word from memory. */ ++#endif ++ cmpb r3,r12,r4 /* Check for BYTE in WORD1. */ ++ and r3,r3,r9 ++ cmplwi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */ + bne cr7,L(done) + +- /* Are we done already? */ +- addi r9,r8,-4 +- cmplw cr6,r9,r7 +- ble cr6,L(null) +- + mtcrf 0x01,r8 + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ +- mr r8,r9 +- bt 29,L(loop_setup) ++ bf 29,L(loop_setup) + + /* Handle WORD2 of pair. */ ++#ifdef __LITTLE_ENDIAN__ ++ lwzx r12,r8,r6 ++#else + lwbrx r12,r8,r6 +- cmpb r10,r12,r4 +- cmplwi cr7,r10,0 +- bne cr7,L(done) +- +- /* Are we done already? */ ++#endif + addi r8,r8,-4 +- cmplw cr6,r8,r7 +- ble cr6,L(null) ++ cmpb r3,r12,r4 ++ cmplwi cr7,r3,0 ++ bne cr7,L(done) + + L(loop_setup): +- li r0,-8 +- sub r5,r8,r7 +- srwi r9,r5,3 /* Number of loop iterations. */ ++ /* The last word we want to read in the loop below is the one ++ containing the first byte of the string, ie. the word at ++ s & ~3, or r0. The first word read is at r8 - 4, we ++ read 2 * cnt words, so the last word read will be at ++ r8 - 4 - 8 * cnt + 4. Solving for cnt gives ++ cnt = (r8 - r0) / 8 */ ++ sub r5,r8,r0 ++ addi r8,r8,-4 ++ srwi r9,r5,3 /* Number of loop iterations. */ + mtctr r9 /* Setup the counter. */ +- b L(loop) +- /* Main loop to look for BYTE backwards in the string. Since it's a +- small loop (< 8 instructions), align it to 32-bytes. */ +- .p2align 5 ++ ++ /* Main loop to look for BYTE backwards in the string. ++ FIXME: Investigate whether 32 byte align helps with this ++ 9 instruction loop. */ ++ .align 5 + L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + +- lwbrx r12,r8,r6 +- lwbrx r11,r8,r0 +- addi r8,r8,-4 +- cmpb r10,r12,r4 ++#ifdef __LITTLE_ENDIAN__ ++ lwzx r12,0,r8 ++ lwzx r11,r8,r6 ++#else ++ lwbrx r12,0,r8 ++ lwbrx r11,r8,r6 ++#endif ++ cmpb r3,r12,r4 + cmpb r9,r11,r4 +- or r5,r9,r10 /* Merge everything in one word. */ ++ or r5,r9,r3 /* Merge everything in one word. */ + cmplwi cr7,r5,0 + bne cr7,L(found) +- addi r8,r8,-4 ++ addi r8,r8,-8 + bdnz L(loop) +- /* We're here because the counter reached 0, and that means we +- didn't have any matches for BYTE in the whole range. Just return +- the original range. */ +- addi r9,r8,4 +- cmplw cr6,r9,r7 +- bgt cr6,L(loop_small) +- b L(null) + +- /* OK, one (or both) of the words contains BYTE. Check +- the first word and decrement the address in case the first +- word really contains BYTE. */ ++ /* We may have one more word to read. */ ++ cmplw r8,r0 ++ bnelr ++ ++#ifdef __LITTLE_ENDIAN__ ++ lwzx r12,0,r8 ++#else ++ lwbrx r12,0,r8 ++#endif ++ cmpb r3,r12,r4 ++ cmplwi cr7,r3,0 ++ bne cr7,L(done) ++ blr ++ + .align 4 + L(found): +- cmplwi cr6,r10,0 +- addi r8,r8,4 ++ /* OK, one (or both) of the words contains BYTE. Check ++ the first word. */ ++ cmplwi cr6,r3,0 + bne cr6,L(done) + + /* BYTE must be in the second word. Adjust the address +- again and move the result of cmpb to r10 so we can calculate the ++ again and move the result of cmpb to r3 so we can calculate the + pointer. */ + +- mr r10,r9 ++ mr r3,r9 + addi r8,r8,-4 + +- /* r10 has the output of the cmpb instruction, that is, it contains ++ /* r3 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as BYTE in the original + word from the string. Use that to calculate the pointer. + We need to make sure BYTE is *before* the end of the + range. */ + L(done): +- cntlzw r0,r10 /* Count leading zeroes before the match. */ +- srwi r6,r0,3 /* Convert leading zeroes to bytes. */ +- addi r0,r6,1 ++ cntlzw r9,r3 /* Count leading zeros before the match. */ ++ cmplw r8,r0 /* Are we on the last word? */ ++ srwi r6,r9,3 /* Convert leading zeros to bytes. */ ++ addi r0,r6,-3 + sub r3,r8,r0 +- cmplw r3,r7 +- blt L(null) ++ cmplw cr7,r3,r10 ++ bnelr ++ bgelr cr7 ++ li r3,0 + blr + + .align 4 +@@ -149,29 +161,36 @@ + cmplwi r5,0 + beq L(null) + +- lwbrx r12,r8,r6 /* Load reversed word from memory. */ +- cmpb r10,r12,r4 /* Check for null bytes in WORD1. */ +- slw r10,r10,r0 +- srw r10,r10,r0 +- cmplwi cr7,r10,0 ++#ifdef __LITTLE_ENDIAN__ ++ lwzx r12,0,r8 ++#else ++ lwbrx r12,0,r8 /* Load reversed word from memory. */ ++#endif ++ cmpb r3,r12,r4 /* Check for BYTE in WORD1. */ ++ and r3,r3,r9 ++ cmplwi cr7,r3,0 + bne cr7,L(done) + ++ /* Are we done already? */ ++ cmplw r8,r0 + addi r8,r8,-4 +- cmplw r8,r7 +- ble L(null) +- b L(loop_small) ++ beqlr + +- .p2align 5 ++ .align 5 + L(loop_small): +- lwbrx r12,r8,r6 +- cmpb r10,r12,r4 +- cmplwi cr6,r10,0 +- bne cr6,L(done) ++#ifdef __LITTLE_ENDIAN__ ++ lwzx r12,0,r8 ++#else ++ lwbrx r12,0,r8 ++#endif ++ cmpb r3,r12,r4 ++ cmplw r8,r0 ++ cmplwi cr7,r3,0 ++ bne cr7,L(done) + addi r8,r8,-4 +- cmplw r8,r7 +- ble L(null) +- b L(loop_small) ++ bne L(loop_small) ++ blr + +-END (BP_SYM (__memrchr)) +-weak_alias (BP_SYM (__memrchr), BP_SYM(memrchr)) ++END (__memrchr) ++weak_alias (__memrchr, memrchr) + libc_hidden_builtin_def (memrchr) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/rawmemchr.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/rawmemchr.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/rawmemchr.S 2014-05-29 13:09:17.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/rawmemchr.S 2014-05-29 13:09:19.000000000 -0500 +@@ -29,16 +29,21 @@ + clrrwi r8,r3,2 /* Align the address to word boundary. */ + + /* Replicate byte to word. */ +- rlwimi r4,r4,8,16,23 +- rlwimi r4,r4,16,0,15 ++ rldimi r4,r4,8,48 ++ rldimi r4,r4,16,32 + + /* Now r4 has a word of c bytes. */ + + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + lwz r12,0(r8) /* Load word from memory. */ + cmpb r5,r12,r4 /* Compare each byte against c byte. */ ++#ifdef __LITTLE_ENDIAN__ ++ srw r5,r5,r6 ++ slw r5,r5,r6 ++#else + slw r5,r5,r6 /* Move left to discard ignored bits. */ + srw r5,r5,r6 /* Bring the bits back as zeros. */ ++#endif + cmpwi cr7,r5,0 /* If r5 == 0, no c bytes have been found. */ + bne cr7,L(done) + +@@ -92,8 +97,14 @@ + word from the string. Use that fact to find out what is + the position of the byte inside the string. */ + L(done): ++#ifdef __LITTLE_ENDIAN__ ++ addi r0,r5,-1 ++ andc r0,r0,r5 ++ popcntw r0,r0 ++#else + cntlzw r0,r5 /* Count leading zeros before the match. */ +- srwi r0,r0,3 /* Convert leading zeroes to bytes. */ ++#endif ++ srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching char. */ + blr + END (BP_SYM (__rawmemchr)) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memchr.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memchr.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memchr.S 2014-05-29 13:09:17.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memchr.S 2014-05-29 13:13:57.000000000 -0500 +@@ -1,5 +1,5 @@ + /* Optimized memchr implementation for PowerPC64/POWER7 using cmpb insn. +- Copyright (C) 2010-2012 Free Software Foundation, Inc. ++ Copyright (C) 2010-2014 Free Software Foundation, Inc. + Contributed by Luis Machado . + This file is part of the GNU C Library. + +@@ -18,118 +18,119 @@ + . */ + + #include +-#include +-#include + + /* int [r3] memchr (char *s [r3], int byte [r4], int size [r5]) */ + .machine power7 +-ENTRY (BP_SYM (__memchr)) +- CALL_MCOUNT 2 ++ENTRY (__memchr) ++ CALL_MCOUNT 3 + dcbt 0,r3 + clrrdi r8,r3,3 +- rlwimi r4,r4,8,16,23 +- rlwimi r4,r4,16,0,15 ++ insrdi r4,r4,8,48 + add r7,r3,r5 /* Calculate the last acceptable address. */ ++ insrdi r4,r4,16,32 + cmpldi r5,32 ++ li r9, -1 ++ rlwinm r6,r3,3,26,28 /* Calculate padding. */ + insrdi r4,r4,32,0 ++ addi r7,r7,-1 ++#ifdef __LITTLE_ENDIAN__ ++ sld r9,r9,r6 ++#else ++ srd r9,r9,r6 ++#endif + ble L(small_range) + +- cmpld cr7,r3,r7 /* Compare the starting address (r3) with the +- ending address (r7). If (r3 >= r7), +- the size passed in was zero or negative. */ +- ble cr7,L(proceed) +- +- li r7,-1 /* Artificially set our ending address (r7) +- such that we will exit early. */ +- +-L(proceed): +- rlwinm r6,r3,3,26,28 /* Calculate padding. */ +- cmpldi cr6,r6,0 /* cr6 == Do we have padding? */ + ld r12,0(r8) /* Load doubleword from memory. */ +- cmpb r10,r12,r4 /* Check for BYTEs in DWORD1. */ +- beq cr6,L(proceed_no_padding) +- sld r10,r10,r6 +- srd r10,r10,r6 +-L(proceed_no_padding): +- cmpldi cr7,r10,0 /* Does r10 indicate we got a hit? */ ++ cmpb r3,r12,r4 /* Check for BYTEs in DWORD1. */ ++ and r3,r3,r9 ++ clrldi r5,r7,61 /* Byte count - 1 in last dword. */ ++ clrrdi r7,r7,3 /* Address of last doubleword. */ ++ cmpldi cr7,r3,0 /* Does r3 indicate we got a hit? */ + bne cr7,L(done) + +- /* See if we are at the last acceptable address yet. */ +- addi r9,r8,8 +- cmpld cr6,r9,r7 +- bge cr6,L(null) +- + mtcrf 0x01,r8 + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ +- + bt 28,L(loop_setup) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r8) +- cmpb r10,r12,r4 +- cmpldi cr7,r10,0 ++ cmpb r3,r12,r4 ++ cmpldi cr7,r3,0 + bne cr7,L(done) + +- /* Are we done already? */ +- addi r9,r8,8 +- cmpld cr6,r9,r7 +- bge cr6,L(null) +- + L(loop_setup): +- sub r5,r7,r9 +- srdi r6,r5,4 /* Number of loop iterations. */ ++ /* The last dword we want to read in the loop below is the one ++ containing the last byte of the string, ie. the dword at ++ (s + size - 1) & ~7, or r7. The first dword read is at ++ r8 + 8, we read 2 * cnt dwords, so the last dword read will ++ be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives ++ cnt = (r7 - r8) / 16 */ ++ sub r6,r7,r8 ++ srdi r6,r6,4 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ +- b L(loop) +- /* Main loop to look for BYTE backwards in the string. Since +- it's a small loop (< 8 instructions), align it to 32-bytes. */ +- .p2align 5 ++ ++ /* Main loop to look for BYTE in the string. Since ++ it's a small loop (8 instructions), align it to 32-bytes. */ ++ .align 5 + L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) +- cmpb r10,r12,r4 ++ cmpb r3,r12,r4 + cmpb r9,r11,r4 +- or r5,r9,r10 /* Merge everything in one doubleword. */ +- cmpldi cr7,r5,0 ++ or r6,r9,r3 /* Merge everything in one doubleword. */ ++ cmpldi cr7,r6,0 + bne cr7,L(found) + bdnz L(loop) + +- /* We're here because the counter reached 0, and that means we +- didn't have any matches for BYTE in the whole range. */ +- subi r11,r7,8 +- cmpld cr6,r8,r11 +- blt cr6,L(loop_small) +- b L(null) ++ /* We may have one more dword to read. */ ++ cmpld r8,r7 ++ beqlr + ++ ldu r12,8(r8) ++ cmpb r3,r12,r4 ++ cmpldi cr6,r3,0 ++ bne cr6,L(done) ++ blr ++ ++ .align 4 ++L(found): + /* OK, one (or both) of the doublewords contains BYTE. Check + the first doubleword and decrement the address in case the first + doubleword really contains BYTE. */ +- .align 4 +-L(found): +- cmpldi cr6,r10,0 ++ cmpldi cr6,r3,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* BYTE must be in the second doubleword. Adjust the address +- again and move the result of cmpb to r10 so we can calculate the ++ again and move the result of cmpb to r3 so we can calculate the + pointer. */ + +- mr r10,r9 ++ mr r3,r9 + addi r8,r8,8 + +- /* r10 has the output of the cmpb instruction, that is, it contains ++ /* r3 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as BYTE in the original + doubleword from the string. Use that to calculate the pointer. + We need to make sure BYTE is *before* the end of the range. */ + L(done): +- cntlzd r0,r10 /* Count leading zeroes before the match. */ +- srdi r0,r0,3 /* Convert leading zeroes to bytes. */ ++#ifdef __LITTLE_ENDIAN__ ++ addi r0,r3,-1 ++ andc r0,r0,r3 ++ popcntd r0,r0 /* Count trailing zeros. */ ++#else ++ cntlzd r0,r3 /* Count leading zeros before the match. */ ++#endif ++ cmpld r8,r7 /* Are we on the last dword? */ ++ srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r8,r0 +- cmpld r3,r7 +- bge L(null) ++ cmpld cr7,r0,r5 /* If on the last dword, check byte offset. */ ++ bnelr ++ blelr cr7 ++ li r3,0 + blr + + .align 4 +@@ -141,67 +142,44 @@ + .align 4 + L(small_range): + cmpldi r5,0 +- rlwinm r6,r3,3,26,28 /* Calculate padding. */ +- beq L(null) /* This branch is for the cmpldi r5,0 above. */ ++ beq L(null) + ld r12,0(r8) /* Load word from memory. */ +- cmpldi cr6,r6,0 /* cr6 == Do we have padding? */ +- cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */ +- /* If no padding, skip the shifts. */ +- beq cr6,L(small_no_padding) +- sld r10,r10,r6 +- srd r10,r10,r6 +-L(small_no_padding): +- cmpldi cr7,r10,0 ++ cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ ++ and r3,r3,r9 ++ cmpldi cr7,r3,0 ++ clrldi r5,r7,61 /* Byte count - 1 in last dword. */ ++ clrrdi r7,r7,3 /* Address of last doubleword. */ ++ cmpld r8,r7 /* Are we done already? */ + bne cr7,L(done) +- +- /* Are we done already? */ +- addi r9,r8,8 +- cmpld r9,r7 +- bge L(null) +- /* If we're not done, drop through into loop_small. */ +- +-L(loop_small): /* loop_small has been unrolled. */ +- ldu r12,8(r8) +- cmpb r10,r12,r4 +- addi r9,r8,8 +- cmpldi cr6,r10,0 +- cmpld r9,r7 +- bne cr6,L(done) /* Found something. */ +- bge L(null) /* Hit end of string (length). */ ++ beqlr + + ldu r12,8(r8) +- cmpb r10,r12,r4 +- addi r9,r8,8 +- cmpldi cr6,r10,0 +- cmpld r9,r7 ++ cmpb r3,r12,r4 ++ cmpldi cr6,r3,0 ++ cmpld r8,r7 + bne cr6,L(done) /* Found something. */ +- bge L(null) ++ beqlr /* Hit end of string (length). */ + + ldu r12,8(r8) +- subi r11,r7,8 +- cmpb r10,r12,r4 +- cmpldi cr6,r10,0 +- ori r2,r2,0 /* Force a dispatch group. */ ++ cmpb r3,r12,r4 ++ cmpldi cr6,r3,0 ++ cmpld r8,r7 + bne cr6,L(done) ++ beqlr + +- cmpld r8,r11 /* At end of range? */ +- bge L(null) +- +- /* For most cases we will never get here. Under some combinations of +- padding + length there is a leftover double that still needs to be +- checked. */ +- ldu r12,8(r8) +- cmpb r10,r12,r4 +- addi r9,r8,8 +- cmpldi cr6,r10,0 +- cmpld r9,r7 +- bne cr6,L(done) /* Found something. */ ++ ldu r12,8(r8) ++ cmpb r3,r12,r4 ++ cmpldi cr6,r3,0 ++ cmpld r8,r7 ++ bne cr6,L(done) ++ beqlr + +- /* Save a branch and exit directly. */ +- li r3,0 ++ ldu r12,8(r8) ++ cmpb r3,r12,r4 ++ cmpldi cr6,r3,0 ++ bne cr6,L(done) + blr + +- +-END (BP_SYM (__memchr)) +-weak_alias (BP_SYM (__memchr), BP_SYM(memchr)) ++END (__memchr) ++weak_alias (__memchr, memchr) + libc_hidden_builtin_def (memchr) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memrchr.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memrchr.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memrchr.S 2014-05-29 13:09:17.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memrchr.S 2014-05-29 13:14:06.000000000 -0500 +@@ -1,5 +1,5 @@ + /* Optimized memrchr implementation for PowerPC64/POWER7 using cmpb insn. +- Copyright (C) 2010 Free Software Foundation, Inc. ++ Copyright (C) 2010-2014 Free Software Foundation, Inc. + Contributed by Luis Machado . + This file is part of the GNU C Library. + +@@ -18,125 +18,137 @@ + . */ + + #include +-#include +-#include + + /* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */ + .machine power7 +-ENTRY (BP_SYM (__memrchr)) +- CALL_MCOUNT +- dcbt 0,r3 +- mr r7,r3 +- add r3,r7,r5 /* Calculate the last acceptable address. */ +- cmpld cr7,r3,r7 /* Is the address equal or less than r3? */ ++ENTRY (__memrchr) ++ CALL_MCOUNT 3 ++ add r7,r3,r5 /* Calculate the last acceptable address. */ ++ neg r0,r7 ++ addi r7,r7,-1 ++ mr r10,r3 ++ clrrdi r6,r7,7 ++ li r9,3<<5 ++ dcbt r9,r6,8 /* Stream hint, decreasing addresses. */ + + /* Replicate BYTE to doubleword. */ +- rlwimi r4,r4,8,16,23 +- rlwimi r4,r4,16,0,15 ++ insrdi r4,r4,8,48 ++ insrdi r4,r4,16,32 + insrdi r4,r4,32,0 +- bge cr7,L(proceed) +- +- li r3,-1 /* Make r11 the biggest if r4 <= 0. */ +-L(proceed): + li r6,-8 +- addi r9,r3,-1 +- clrrdi r8,r9,3 +- addi r8,r8,8 +- neg r0,r3 ++ li r9,-1 + rlwinm r0,r0,3,26,28 /* Calculate padding. */ +- ++ clrrdi r8,r7,3 ++ srd r9,r9,r0 + cmpldi r5,32 ++ clrrdi r0,r10,3 + ble L(small_range) + +- ldbrx r12,r8,r6 /* Load reversed doubleword from memory. */ +- cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */ +- sld r10,r10,r0 +- srd r10,r10,r0 +- cmpldi cr7,r10,0 /* If r10 == 0, no BYTE's have been found. */ ++#ifdef __LITTLE_ENDIAN__ ++ ldx r12,0,r8 ++#else ++ ldbrx r12,0,r8 /* Load reversed doubleword from memory. */ ++#endif ++ cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ ++ and r3,r3,r9 ++ cmpldi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */ + bne cr7,L(done) + +- /* Are we done already? */ +- addi r9,r8,-8 +- cmpld cr6,r9,r7 +- ble cr6,L(null) +- + mtcrf 0x01,r8 +- /* Are we now aligned to a doubleword boundary? If so, skip to ++ /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ +- mr r8,r9 +- bt 28,L(loop_setup) ++ bf 28,L(loop_setup) + + /* Handle DWORD2 of pair. */ ++#ifdef __LITTLE_ENDIAN__ ++ ldx r12,r8,r6 ++#else + ldbrx r12,r8,r6 +- cmpb r10,r12,r4 +- cmpldi cr7,r10,0 +- bne cr7,L(done) +- +- /* Are we done already. */ ++#endif + addi r8,r8,-8 +- cmpld cr6,r8,r7 +- ble cr6,L(null) ++ cmpb r3,r12,r4 ++ cmpldi cr7,r3,0 ++ bne cr7,L(done) + + L(loop_setup): +- li r0,-16 +- sub r5,r8,r7 +- srdi r9,r5,4 /* Number of loop iterations. */ ++ /* The last dword we want to read in the loop below is the one ++ containing the first byte of the string, ie. the dword at ++ s & ~7, or r0. The first dword read is at r8 - 8, we ++ read 2 * cnt dwords, so the last dword read will be at ++ r8 - 8 - 16 * cnt + 8. Solving for cnt gives ++ cnt = (r8 - r0) / 16 */ ++ sub r5,r8,r0 ++ addi r8,r8,-8 ++ srdi r9,r5,4 /* Number of loop iterations. */ + mtctr r9 /* Setup the counter. */ +- b L(loop) +- /* Main loop to look for BYTE backwards in the string. Since it's a +- small loop (< 8 instructions), align it to 32-bytes. */ +- .p2align 5 ++ ++ /* Main loop to look for BYTE backwards in the string. ++ FIXME: Investigate whether 32 byte align helps with this ++ 9 instruction loop. */ ++ .align 5 + L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + +- ldbrx r12,r8,r6 +- ldbrx r11,r8,r0 +- addi r8,r8,-8 +- cmpb r10,r12,r4 ++#ifdef __LITTLE_ENDIAN__ ++ ldx r12,0,r8 ++ ldx r11,r8,r6 ++#else ++ ldbrx r12,0,r8 ++ ldbrx r11,r8,r6 ++#endif ++ cmpb r3,r12,r4 + cmpb r9,r11,r4 +- or r5,r9,r10 /* Merge everything in one doubleword. */ ++ or r5,r9,r3 /* Merge everything in one doubleword. */ + cmpldi cr7,r5,0 + bne cr7,L(found) +- addi r8,r8,-8 ++ addi r8,r8,-16 + bdnz L(loop) +- /* We're here because the counter reached 0, and that means we +- didn't have any matches for BYTE in the whole range. Just return +- the original range. */ +- addi r9,r8,8 +- cmpld cr6,r9,r7 +- bgt cr6,L(loop_small) +- b L(null) +- +- /* OK, one (or both) of the words contains BYTE. Check +- the first word and decrement the address in case the first +- word really contains BYTE. */ ++ ++ /* We may have one more word to read. */ ++ cmpld r8,r0 ++ bnelr ++ ++#ifdef __LITTLE_ENDIAN__ ++ ldx r12,0,r8 ++#else ++ ldbrx r12,0,r8 ++#endif ++ cmpb r3,r12,r4 ++ cmpldi cr7,r3,0 ++ bne cr7,L(done) ++ blr ++ + .align 4 + L(found): +- cmpldi cr6,r10,0 +- addi r8,r8,8 ++ /* OK, one (or both) of the dwords contains BYTE. Check ++ the first dword. */ ++ cmpldi cr6,r3,0 + bne cr6,L(done) + + /* BYTE must be in the second word. Adjust the address +- again and move the result of cmpb to r10 so we can calculate the ++ again and move the result of cmpb to r3 so we can calculate the + pointer. */ + +- mr r10,r9 ++ mr r3,r9 + addi r8,r8,-8 + +- /* r10 has the output of the cmpb instruction, that is, it contains +- 0xff in the same position as the BYTE in the original ++ /* r3 has the output of the cmpb instruction, that is, it contains ++ 0xff in the same position as BYTE in the original + word from the string. Use that to calculate the pointer. + We need to make sure BYTE is *before* the end of the + range. */ + L(done): +- cntlzd r0,r10 /* Count leading zeroes before the match. */ +- srdi r6,r0,3 /* Convert leading zeroes to bytes. */ +- addi r0,r6,1 ++ cntlzd r9,r3 /* Count leading zeros before the match. */ ++ cmpld r8,r0 /* Are we on the last word? */ ++ srdi r6,r9,3 /* Convert leading zeros to bytes. */ ++ addi r0,r6,-7 + sub r3,r8,r0 +- cmpld r3,r7 +- blt L(null) ++ cmpld cr7,r3,r10 ++ bnelr ++ bgelr cr7 ++ li r3,0 + blr + + .align 4 +@@ -150,30 +162,36 @@ + cmpldi r5,0 + beq L(null) + +- ldbrx r12,r8,r6 /* Load reversed doubleword from memory. */ +- cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */ +- sld r10,r10,r0 +- srd r10,r10,r0 +- cmpldi cr7,r10,0 ++#ifdef __LITTLE_ENDIAN__ ++ ldx r12,0,r8 ++#else ++ ldbrx r12,0,r8 /* Load reversed doubleword from memory. */ ++#endif ++ cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ ++ and r3,r3,r9 ++ cmpldi cr7,r3,0 + bne cr7,L(done) + + /* Are we done already? */ ++ cmpld r8,r0 + addi r8,r8,-8 +- cmpld r8,r7 +- ble L(null) +- b L(loop_small) ++ beqlr + +- .p2align 5 ++ .align 5 + L(loop_small): +- ldbrx r12,r8,r6 +- cmpb r10,r12,r4 +- cmpldi cr6,r10,0 +- bne cr6,L(done) ++#ifdef __LITTLE_ENDIAN__ ++ ldx r12,0,r8 ++#else ++ ldbrx r12,0,r8 ++#endif ++ cmpb r3,r12,r4 ++ cmpld r8,r0 ++ cmpldi cr7,r3,0 ++ bne cr7,L(done) + addi r8,r8,-8 +- cmpld r8,r7 +- ble L(null) +- b L(loop_small) ++ bne L(loop_small) ++ blr + +-END (BP_SYM (__memrchr)) +-weak_alias (BP_SYM (__memrchr), BP_SYM(memrchr)) ++END (__memrchr) ++weak_alias (__memrchr, memrchr) + libc_hidden_builtin_def (memrchr) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/rawmemchr.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/rawmemchr.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/rawmemchr.S 2014-05-29 13:09:17.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/rawmemchr.S 2014-05-29 13:09:19.000000000 -0500 +@@ -29,8 +29,8 @@ + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + + /* Replicate byte to doubleword. */ +- rlwimi r4,r4,8,16,23 +- rlwimi r4,r4,16,0,15 ++ insrdi r4,r4,8,48 ++ insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* Now r4 has a doubleword of c bytes. */ +@@ -38,8 +38,13 @@ + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r5,r12,r4 /* Compare each byte against c byte. */ ++#ifdef __LITTLE_ENDIAN__ ++ srd r5,r5,r6 ++ sld r5,r5,r6 ++#else + sld r5,r5,r6 /* Move left to discard ignored bits. */ + srd r5,r5,r6 /* Bring the bits back as zeros. */ ++#endif + cmpdi cr7,r5,0 /* If r5 == 0, no c bytes have been found. */ + bne cr7,L(done) + +@@ -93,8 +98,14 @@ + doubleword from the string. Use that fact to find out what is + the position of the byte inside the string. */ + L(done): ++#ifdef __LITTLE_ENDIAN__ ++ addi r0,r5,-1 ++ andc r0,r0,r5 ++ popcntd r0,r0 /* Count trailing zeros. */ ++#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +- srdi r0,r0,3 /* Convert leading zeroes to bytes. */ ++#endif ++ srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching char. */ + blr + END (BP_SYM (__rawmemchr)) diff --git a/packages/glibc/2.17/0056-glibc-ppc64le-34.patch b/packages/glibc/2.17/0056-glibc-ppc64le-34.patch new file mode 100644 index 0000000..ef6362d --- /dev/null +++ b/packages/glibc/2.17/0056-glibc-ppc64le-34.patch @@ -0,0 +1,68 @@ +# commit 8f9ebb08af1368962d9f24c4cfacb55cf8eee560 +# Author: Alan Modra +# Date: Thu Oct 3 14:03:03 2013 +0930 +# +# PowerPC LE configury +# http://sourceware.org/ml/libc-alpha/2013-08/msg00096.html +# +# This adds the basic configury bits for powerpc64le and powerpcle. +# +# * configure.in: Map powerpc64le and powerpcle to base_machine/machine. +# * configure: Regenerate. +# * nptl/shlib-versions: Powerpc*le starts at 2.18. +# * shlib-versions: Likewise. +# +# commit 0ff8246327401ae8779e2697d5c7348611cdbf8a +# Author: Adhemerval Zanella +# Date: Tue Feb 4 09:49:08 2014 -0200 +# +# PowerPC: Change powerpc64le start ABI to 2.17. +# +diff -urN glibc-2.17-c758a686/configure glibc-2.17-c758a686/configure +--- glibc-2.17-c758a686/configure 2014-05-26 19:52:31.000000000 -0500 ++++ glibc-2.17-c758a686/configure 2014-05-26 19:54:13.000000000 -0500 +@@ -4195,8 +4195,8 @@ + # base_machine, we don't change it. + test -n "$base_machine" || case "$machine" in + i[34567]86) base_machine=i386 machine=i386/$machine ;; +-powerpc) base_machine=powerpc machine=powerpc/powerpc32 ;; +-powerpc64) base_machine=powerpc machine=powerpc/powerpc64 ;; ++powerpc64*) base_machine=powerpc machine=powerpc/powerpc64 ;; ++powerpc*) base_machine=powerpc machine=powerpc/powerpc32 ;; + s390) base_machine=s390 machine=s390/s390-32 ;; + s390x) base_machine=s390 machine=s390/s390-64 ;; + sh3*) base_machine=sh machine=sh/sh3 ;; +diff -urN glibc-2.17-c758a686/configure.in glibc-2.17-c758a686/configure.in +--- glibc-2.17-c758a686/configure.in 2014-05-26 19:52:30.000000000 -0500 ++++ glibc-2.17-c758a686/configure.in 2014-05-26 19:54:45.000000000 -0500 +@@ -549,8 +549,8 @@ + # base_machine, we don't change it. + test -n "$base_machine" || case "$machine" in + i[34567]86) base_machine=i386 machine=i386/$machine ;; +-powerpc) base_machine=powerpc machine=powerpc/powerpc32 ;; +-powerpc64) base_machine=powerpc machine=powerpc/powerpc64 ;; ++powerpc64*) base_machine=powerpc machine=powerpc/powerpc64 ;; ++powerpc*) base_machine=powerpc machine=powerpc/powerpc32 ;; + s390) base_machine=s390 machine=s390/s390-32 ;; + s390x) base_machine=s390 machine=s390/s390-64 ;; + sh3*) base_machine=sh machine=sh/sh3 ;; +diff -urN glibc-2.17-c758a686/nptl/shlib-versions glibc-2.17-c758a686/nptl/shlib-versions +--- glibc-2.17-c758a686/nptl/shlib-versions 2014-05-26 19:52:31.000000000 -0500 ++++ glibc-2.17-c758a686/nptl/shlib-versions 2014-05-26 19:53:31.000000000 -0500 +@@ -2,4 +2,5 @@ + sh.*-.*-linux.* libpthread=0 GLIBC_2.2 + s390x-.*-linux.* libpthread=0 GLIBC_2.2 + powerpc64-.*-linux.* libpthread=0 GLIBC_2.3 ++powerpc.*le-.*-linux.* libpthread=0 GLIBC_2.17 + .*-.*-linux.* libpthread=0 +diff -urN glibc-2.17-c758a686/shlib-versions glibc-2.17-c758a686/shlib-versions +--- glibc-2.17-c758a686/shlib-versions 2014-05-26 19:52:31.000000000 -0500 ++++ glibc-2.17-c758a686/shlib-versions 2014-05-26 19:53:31.000000000 -0500 +@@ -23,6 +23,7 @@ + + s390x-.*-linux.* DEFAULT GLIBC_2.2 + powerpc64-.*-linux.* DEFAULT GLIBC_2.3 ++powerpc.*le-.*-linux.* DEFAULT GLIBC_2.17 + .*-.*-gnu-gnu.* DEFAULT GLIBC_2.2.6 + + # Configuration ABI Identifier for ABI data files diff --git a/packages/glibc/2.17/0057-glibc-ppc64le-35.patch b/packages/glibc/2.17/0057-glibc-ppc64le-35.patch new file mode 100644 index 0000000..17434b9 --- /dev/null +++ b/packages/glibc/2.17/0057-glibc-ppc64le-35.patch @@ -0,0 +1,106 @@ +# commit 5162e7dd96efcd9b45c1dc1471a964d45278b1e1 +# Author: Ulrich Weigand +# Date: Wed Dec 4 06:41:52 2013 -0600 +# +# PowerPC64: Fix incorrect CFI in *context routines +# +# The context established by "makecontext" has a link register pointing +# back to an error path within the makecontext routine. This is currently +# covered by the CFI FDE for makecontext itself, which is simply wrong +# for the stack frame *inside* the context. When trying to unwind (e.g. +# doing a backtrace) in a routine inside a context created by makecontext, +# this can lead to uninitialized stack slots being accessed, causing the +# unwinder to crash in the worst case. +# +# Similarly, during parts of the "setcontext" routine, when the stack +# pointer has already been switched to point to the new context, the +# address range is still covered by the CFI FDE for setcontext. When +# trying to unwind in that situation (e.g. backtrace from an async +# signal handler for profiling), it is again possible that the unwinder +# crashes. +# +# Theses are all problems in existing code, but the changes in stack +# frame layout appear to make the "worst case" much more likely in +# the ELFv2 ABI context. This causes regressions e.g. in the libgo +# testsuite on ELFv2. +# +# This patch fixes this by ending the makecontext/setcontext FDEs +# before those problematic parts of the assembler, similar to what +# is already done on other platforms. This fixes the libgo +# regression on ELFv2. +# +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S 2014-05-29 13:16:16.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S 2014-05-29 13:16:17.000000000 -0500 +@@ -129,6 +129,10 @@ + the cpu link stack used to predict blr return addresses. */ + bcl 20,31,L(gotexitcodeaddr); + ++ /* End FDE now, because while executing on the context's stack ++ the unwind info would be wrong otherwise. */ ++ cfi_endproc ++ + /* This is the helper code which gets called if a function which + is registered with 'makecontext' returns. In this case we + have to install the context listed in the uc_link element of +@@ -157,6 +161,11 @@ + #endif + b L(do_exit) + ++ /* Re-establish FDE for the rest of the actual makecontext routine. */ ++ cfi_startproc ++ cfi_offset (lr, FRAME_LR_SAVE) ++ cfi_adjust_cfa_offset (128) ++ + /* The address of the exit code is in the link register. Store the lr + in the ucontext as LNK so the target function will return to our + exit code. */ +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S 2014-05-29 13:16:16.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S 2014-05-29 13:16:17.000000000 -0500 +@@ -129,6 +129,10 @@ + lfd fp1,(SIGCONTEXT_FP_REGS+(PT_R1*8))(r31) + lfd fp0,(SIGCONTEXT_FP_REGS+(PT_R0*8))(r31) + ++ /* End FDE now, because the unwind info would be wrong while ++ we're reloading registers to switch to the new context. */ ++ cfi_endproc ++ + ld r0,(SIGCONTEXT_GP_REGS+(PT_LNK*8))(r31) + ld r1,(SIGCONTEXT_GP_REGS+(PT_R1*8))(r31) + mtlr r0 +@@ -177,6 +181,11 @@ + ld r31,(SIGCONTEXT_GP_REGS+(PT_R31*8))(r31) + bctr + ++ /* Re-establish FDE for the rest of the actual setcontext routine. */ ++ cfi_startproc ++ cfi_offset (lr, FRAME_LR_SAVE) ++ cfi_adjust_cfa_offset (128) ++ + L(nv_error_exit): + ld r0,128+FRAME_LR_SAVE(r1) + addi r1,r1,128 +@@ -403,6 +412,10 @@ + lfd fp1,(SIGCONTEXT_FP_REGS+(PT_R1*8))(r31) + lfd fp0,(SIGCONTEXT_FP_REGS+(PT_R0*8))(r31) + ++ /* End FDE now, because the unwind info would be wrong while ++ we're reloading registers to switch to the new context. */ ++ cfi_endproc ++ + ld r0,(SIGCONTEXT_GP_REGS+(PT_LNK*8))(r31) + ld r1,(SIGCONTEXT_GP_REGS+(PT_R1*8))(r31) + mtlr r0 +@@ -451,6 +464,11 @@ + ld r31,(SIGCONTEXT_GP_REGS+(PT_R31*8))(r31) + bctr + ++ /* Re-establish FDE for the rest of the actual setcontext routine. */ ++ cfi_startproc ++ cfi_offset (lr, FRAME_LR_SAVE) ++ cfi_adjust_cfa_offset (128) ++ + L(error_exit): + ld r0,128+FRAME_LR_SAVE(r1) + addi r1,r1,128 diff --git a/packages/glibc/2.17/0058-glibc-ppc64le-36.patch b/packages/glibc/2.17/0058-glibc-ppc64le-36.patch new file mode 100644 index 0000000..fbe351c --- /dev/null +++ b/packages/glibc/2.17/0058-glibc-ppc64le-36.patch @@ -0,0 +1,105 @@ +# commit 7ec07d9a7b501f1b7d740fda02ba5f39d6d684e5 +# Author: Alan Modra +# Date: Wed Dec 4 06:44:06 2013 -0600 +# +# PowerPC64: Report overflow on @h and @ha relocations +# +# This patch updates glibc in accordance with the binutils patch checked in here: +# https://sourceware.org/ml/binutils/2013-10/msg00372.html +# +# This changes the various R_PPC64_..._HI and _HA relocations to report +# 32-bit overflows. The motivation is that existing uses of @h / @ha +# are to build up 32-bit offsets (for the "medium model" TOC access +# that GCC now defaults to), and we'd really like to see failures at +# link / load time rather than silent truncations. +# +# For those rare cases where a modifier is needed to build up a 64-bit +# constant, new relocations _HIGH / _HIGHA are supported. +# +# The patch also fixes a bug in overflow checking for the R_PPC64_ADDR30 +# and R_PPC64_ADDR32 relocations. +# +diff -urN glibc-2.17-c758a686/elf/elf.h glibc-2.17-c758a686/elf/elf.h +--- glibc-2.17-c758a686/elf/elf.h 2014-05-29 13:17:35.000000000 -0500 ++++ glibc-2.17-c758a686/elf/elf.h 2014-05-29 13:17:35.000000000 -0500 +@@ -2243,6 +2243,17 @@ + #define R_PPC64_DTPREL16_HIGHERA 104 /* half16 (sym+add)@dtprel@highera */ + #define R_PPC64_DTPREL16_HIGHEST 105 /* half16 (sym+add)@dtprel@highest */ + #define R_PPC64_DTPREL16_HIGHESTA 106 /* half16 (sym+add)@dtprel@highesta */ ++#define R_PPC64_TLSGD 107 /* none (sym+add)@tlsgd */ ++#define R_PPC64_TLSLD 108 /* none (sym+add)@tlsld */ ++#define R_PPC64_TOCSAVE 109 /* none */ ++ ++/* Added when HA and HI relocs were changed to report overflows. */ ++#define R_PPC64_ADDR16_HIGH 110 ++#define R_PPC64_ADDR16_HIGHA 111 ++#define R_PPC64_TPREL16_HIGH 112 ++#define R_PPC64_TPREL16_HIGHA 113 ++#define R_PPC64_DTPREL16_HIGH 114 ++#define R_PPC64_DTPREL16_HIGHA 115 + + /* GNU extension to support local ifunc. */ + #define R_PPC64_JMP_IREL 247 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 13:17:34.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 13:17:35.000000000 -0500 +@@ -663,11 +663,25 @@ + + case R_PPC64_TPREL16_HI: + value = elf_machine_tprel (map, sym_map, sym, reloc); ++ if (dont_expect (value + 0x80000000 >= 0x100000000LL)) ++ _dl_reloc_overflow (map, "R_PPC64_TPREL16_HI", reloc_addr, refsym); ++ *(Elf64_Half *) reloc_addr = PPC_HI (value); ++ break; ++ ++ case R_PPC64_TPREL16_HIGH: ++ value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HI (value); + break; + + case R_PPC64_TPREL16_HA: + value = elf_machine_tprel (map, sym_map, sym, reloc); ++ if (dont_expect (value + 0x80008000 >= 0x100000000LL)) ++ _dl_reloc_overflow (map, "R_PPC64_TPREL16_HA", reloc_addr, refsym); ++ *(Elf64_Half *) reloc_addr = PPC_HA (value); ++ break; ++ ++ case R_PPC64_TPREL16_HIGHA: ++ value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HA (value); + break; + +@@ -703,17 +717,23 @@ + break; + + case R_PPC64_ADDR16_HI: ++ if (dont_expect (value + 0x80000000 >= 0x100000000LL)) ++ _dl_reloc_overflow (map, "R_PPC64_ADDR16_HI", reloc_addr, refsym); ++ case R_PPC64_ADDR16_HIGH: + *(Elf64_Half *) reloc_addr = PPC_HI (value); + break; + + case R_PPC64_ADDR16_HA: ++ if (dont_expect (value + 0x80008000 >= 0x100000000LL)) ++ _dl_reloc_overflow (map, "R_PPC64_ADDR16_HA", reloc_addr, refsym); ++ case R_PPC64_ADDR16_HIGHA: + *(Elf64_Half *) reloc_addr = PPC_HA (value); + break; + + case R_PPC64_ADDR30: + { + Elf64_Addr delta = value - (Elf64_Xword) reloc_addr; +- if (dont_expect ((delta + 0x80000000) >= 0x10000000 ++ if (dont_expect ((delta + 0x80000000) >= 0x100000000LL + || (delta & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR30", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Word *) reloc_addr, delta, 0xfffffffc); +@@ -762,7 +782,7 @@ + return; + + case R_PPC64_ADDR32: +- if (dont_expect ((value + 0x80000000) >= 0x10000000)) ++ if (dont_expect ((value + 0x80000000) >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_ADDR32", reloc_addr, refsym); + *(Elf64_Word *) reloc_addr = value; + return; diff --git a/packages/glibc/2.17/0059-glibc-ppc64le-37.patch b/packages/glibc/2.17/0059-glibc-ppc64le-37.patch new file mode 100644 index 0000000..dad59a6 --- /dev/null +++ b/packages/glibc/2.17/0059-glibc-ppc64le-37.patch @@ -0,0 +1,31 @@ +# commit b525166bb93b060e1146f0263b76a9c1e7455b06 +# Author: Ulrich Weigand +# Date: Wed Dec 4 06:45:56 2013 -0600 +# +# PowerPC64: Add __private_ss field to TCB header +# +# The TCB header on Intel contains a field __private_ss that is used +# to efficiently implement the -fsplit-stack GCC feature. +# +# In order to prepare for a possible future implementation of that +# feature on powerpc64, we'd like to reserve a similar field in +# the TCB header as well. (It would be good if this went in with +# or before the ELFv2 patches to ensure that this field will be +# available always in the ELFv2 environment.) +# +# The field needs to be added at the front of tcbhead_t structure +# to avoid changing the ABI; see the recent discussion when adding +# the EBB fields. +# +diff -urN glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h +--- glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h 2014-05-29 13:19:25.000000000 -0500 ++++ glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h 2014-05-29 13:19:25.000000000 -0500 +@@ -61,6 +61,8 @@ + are private. */ + typedef struct + { ++ /* GCC split stack support. */ ++ void *__private_ss; + /* Reservation for the Event-Based Branching ABI. */ + uintptr_t ebb_handler; + uintptr_t ebb_ctx_pointer; diff --git a/packages/glibc/2.17/0060-glibc-ppc64le-38.patch b/packages/glibc/2.17/0060-glibc-ppc64le-38.patch new file mode 100644 index 0000000..196f0ad --- /dev/null +++ b/packages/glibc/2.17/0060-glibc-ppc64le-38.patch @@ -0,0 +1,262 @@ +# commit d31beafa8e4ca69faa4cf362784796ef17299341 +# Author: Ulrich Weigand +# Date: Wed Dec 4 06:49:15 2013 -0600 +# +# PowerPC64 ELFv2 ABI 1/6: Code refactoring +# +# This is the first patch to support the new ELFv2 ABI in glibc. +# +# As preparation, this patch simply refactors some of the powerpc64 assembler +# code to move all code related to creating function descriptors (.opd section) +# or using function descriptors (function pointer call) into a central place +# in sysdep.h. +# +# Note that most locations creating .opd entries were already using macros +# in sysdep.h, this patch simply extends this to the remaining places. +# +# No relevant change in generated code expected. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S 2014-05-29 13:56:35.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S 2014-05-29 13:56:37.000000000 -0500 +@@ -60,18 +60,8 @@ + .LC0: + .tc PREINIT_FUNCTION[TC], PREINIT_FUNCTION + #endif +- .type BODY_LABEL (_init), @function +- .globl _init +- .section ".opd", "aw" +- .align 3 +-_init: OPD_ENT (_init) +-#ifdef HAVE_ASM_GLOBAL_DOT_NAME +- .globl BODY_LABEL (_init) +- .size _init, 24 +-#else +- .type _init, @function +-#endif + .section ".init", "ax", @progbits ++ ENTRY_2(_init) + .align ALIGNARG (2) + BODY_LABEL (_init): + mflr 0 +@@ -87,18 +77,8 @@ + nop + 1: + +- .type BODY_LABEL (_fini), @function +- .globl _fini +- .section ".opd", "aw" +- .align 3 +-_fini: OPD_ENT (_fini) +-#ifdef HAVE_ASM_GLOBAL_DOT_NAME +- .globl BODY_LABEL (_fini) +- .size _fini, 24 +-#else +- .type _fini, @function +-#endif + .section ".fini", "ax", @progbits ++ ENTRY_2(_fini) + .align ALIGNARG (2) + BODY_LABEL (_fini): + mflr 0 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 13:56:35.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 13:56:37.000000000 -0500 +@@ -122,14 +122,7 @@ + #define RTLD_START \ + asm (".pushsection \".text\"\n" \ + " .align 2\n" \ +-" .type " BODY_PREFIX "_start,@function\n" \ +-" .pushsection \".opd\",\"aw\"\n" \ +-" .align 3\n" \ +-" .globl _start\n" \ + " " ENTRY_2(_start) "\n" \ +-"_start:\n" \ +-" " OPD_ENT(_start) "\n" \ +-" .popsection\n" \ + BODY_PREFIX "_start:\n" \ + /* We start with the following on the stack, from top: \ + argc (4 bytes); \ +@@ -154,11 +147,6 @@ + ".LT__start_name_end:\n" \ + " .align 2\n" \ + " " END_2(_start) "\n" \ +-" .globl _dl_start_user\n" \ +-" .pushsection \".opd\",\"aw\"\n" \ +-"_dl_start_user:\n" \ +-" " OPD_ENT(_dl_start_user) "\n" \ +-" .popsection\n" \ + " .pushsection \".toc\",\"aw\"\n" \ + DL_STARTING_UP_DEF \ + ".LC__rtld_local:\n" \ +@@ -170,7 +158,6 @@ + ".LC__dl_fini:\n" \ + " .tc _dl_fini[TC],_dl_fini\n" \ + " .popsection\n" \ +-" .type " BODY_PREFIX "_dl_start_user,@function\n" \ + " " ENTRY_2(_dl_start_user) "\n" \ + /* Now, we do our main work of calling initialisation procedures. \ + The ELF ABI doesn't say anything about parameters for these, \ +@@ -228,10 +215,7 @@ + /* Now, call the start function descriptor at r30... */ \ + " .globl ._dl_main_dispatch\n" \ + "._dl_main_dispatch:\n" \ +-" ld 0,0(30)\n" \ +-" ld 2,8(30)\n" \ +-" mtctr 0\n" \ +-" ld 11,16(30)\n" \ ++" " PPC64_LOAD_FUNCPTR(30) "\n" \ + " bctr\n" \ + ".LT__dl_start_user:\n" \ + " .long 0\n" \ +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 13:56:35.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 13:56:37.000000000 -0500 +@@ -71,12 +71,8 @@ + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) + mtcrf 0xFF,r0 +-/* Load the target address, toc and static chain reg from the function +- descriptor returned by fixup. */ +- ld r0,0(r3) +- ld r2,8(r3) +- mtctr r0 +- ld r11,16(r3) ++/* Prepare for calling the function returned by fixup. */ ++ PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) + /* Unwind the stack frame, and jump. */ + addi r1,r1,FRAME_SIZE +@@ -322,13 +318,9 @@ + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) + mtcrf 0xFF,r0 +-/* Load the target address, toc and static chain reg from the function +- descriptor returned by fixup. */ +- ld r0,0(r3) +- ld r2,8(r3) +- ld r11,16(r3) ++/* Prepare for calling the function returned by fixup. */ ++ PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) +- mtctr r0 + /* Load the floating point registers. */ + lfd fp1,FPR_PARMS+0(r1) + lfd fp2,FPR_PARMS+8(r1) +@@ -386,14 +378,10 @@ + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) + mtcrf 0xFF,r0 +-/* Load the target address, toc and static chain reg from the function +- descriptor returned by fixup. */ +- ld r0,0(r3) ++/* Prepare for calling the function returned by fixup. */ + std r2,40(r1) +- ld r2,8(r3) +- ld r11,16(r3) ++ PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) +- mtctr r0 + /* Load the floating point registers. */ + lfd fp1,FPR_PARMS+0(r1) + lfd fp2,FPR_PARMS+8(r1) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h 2014-05-29 13:56:35.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h 2014-05-29 13:56:37.000000000 -0500 +@@ -74,6 +74,14 @@ + #endif + .endm + ++/* Macro to prepare for calling via a function pointer. */ ++ .macro PPC64_LOAD_FUNCPTR PTR ++ ld r12,0(\PTR) ++ ld r2,8(\PTR) ++ mtctr r12 ++ ld r11,16(\PTR) ++ .endm ++ + #ifdef USE_PPC64_OVERLAPPING_OPD + # define OPD_ENT(name) .quad BODY_LABEL (name), .TOC.@tocbase + #else +@@ -81,7 +89,6 @@ + #endif + + #define ENTRY_1(name) \ +- .section ".text"; \ + .type BODY_LABEL(name),@function; \ + .globl name; \ + .section ".opd","aw"; \ +@@ -110,6 +117,7 @@ + #endif + + #define ENTRY(name) \ ++ .section ".text"; \ + ENTRY_2(name) \ + .align ALIGNARG(2); \ + BODY_LABEL(name): \ +@@ -127,6 +135,7 @@ + /* EALIGN is like ENTRY, but does alignment to 'words'*4 bytes + past a 2^alignt boundary. */ + #define EALIGN(name, alignt, words) \ ++ .section ".text"; \ + ENTRY_2(name) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ +@@ -286,24 +295,42 @@ + + #else /* !__ASSEMBLER__ */ + ++#define PPC64_LOAD_FUNCPTR(ptr) \ ++ "ld 12,0(" #ptr ");\n" \ ++ "ld 2,8(" #ptr ");\n" \ ++ "mtctr 12;\n" \ ++ "ld 11,16(" #ptr ");" ++ + #ifdef USE_PPC64_OVERLAPPING_OPD + # define OPD_ENT(name) ".quad " BODY_PREFIX #name ", .TOC.@tocbase;" + #else + # define OPD_ENT(name) ".quad " BODY_PREFIX #name ", .TOC.@tocbase, 0;" + #endif + ++#define ENTRY_1(name) \ ++ ".type " BODY_PREFIX #name ",@function;\n" \ ++ ".globl " #name ";\n" \ ++ ".pushsection \".opd\",\"aw\";\n" \ ++ ".align 3;\n" \ ++#name ":\n" \ ++ OPD_ENT (name) "\n" \ ++ ".popsection;" ++ + #ifdef HAVE_ASM_GLOBAL_DOT_NAME + # define DOT_PREFIX "." + # define BODY_PREFIX "." + # define ENTRY_2(name) \ + ".globl " BODY_PREFIX #name ";\n" \ ++ ENTRY_1(name) "\n" \ + ".size " #name ", 24;" + # define END_2(name) \ + ".size " BODY_PREFIX #name ",.-" BODY_PREFIX #name ";" + #else + # define DOT_PREFIX "" + # define BODY_PREFIX ".LY" +-# define ENTRY_2(name) ".type " #name ",@function;" ++# define ENTRY_2(name) \ ++ ".type " #name ",@function;\n" \ ++ ENTRY_1(name) + # define END_2(name) \ + ".size " #name ",.-" BODY_PREFIX #name ";\n" \ + ".size " BODY_PREFIX #name ",.-" BODY_PREFIX #name ";" +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S 2014-05-29 13:56:35.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S 2014-05-29 13:56:37.000000000 -0500 +@@ -104,9 +104,7 @@ + + std r2,40(r1) + /* Call procedure. */ +- ld r0,0(r30) +- ld r2,8(r30) +- mtctr r0 ++ PPC64_LOAD_FUNCPTR r30 + mr r3,r31 + bctrl + ld r2,40(r1) diff --git a/packages/glibc/2.17/0061-glibc-ppc64le-39.patch b/packages/glibc/2.17/0061-glibc-ppc64le-39.patch new file mode 100644 index 0000000..f837d99 --- /dev/null +++ b/packages/glibc/2.17/0061-glibc-ppc64le-39.patch @@ -0,0 +1,508 @@ +# commit 696caf1d002ff059ddd20fd5eaccd76229c14850 +# Author: Ulrich Weigand +# Date: Wed Dec 4 06:51:11 2013 -0600 +# +# PowerPC64 ELFv2 ABI 2/6: Remove function descriptors +# +# This patch adds support for the ELFv2 ABI feature to remove function +# descriptors. See this GCC patch for in-depth discussion: +# http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01141.html +# +# This mostly involves two types of changes: updating assembler source +# files to the new logic, and updating the dynamic loader. +# +# After the refactoring in the previous patch, most of the assembler source +# changes can be handled simply by providing ELFv2 versions of the +# macros in sysdep.h. One somewhat non-obvious change is in __GI__setjmp: +# this used to "fall through" to the immediately following __setjmp ENTRY +# point. This is no longer safe in the ELFv2 since ENTRY defines both +# a global and a local entry point, and you cannot simply fall through +# to a global entry point as it requires r12 to be set up. +# +# Also, makecontext needs to be updated to set up registers according to +# the new ABI for calling into the context's start routine. +# +# The dynamic linker changes mostly consist of removing special code +# to handle function descriptors. We also need to support the new PLT +# and glink format used by the the ELFv2 linker, see: +# https://sourceware.org/ml/binutils/2013-10/msg00376.html +# +# In addition, the dynamic linker now verifies that the dynamic libraries +# it loads match its own ABI. +# +# The hack in VDSO_IFUNC_RET to "synthesize" a function descriptor +# for vDSO routines is also no longer necessary for ELFv2. +# +diff -urN glibc-2.17-c758a686/elf/elf.h glibc-2.17-c758a686/elf/elf.h +--- glibc-2.17-c758a686/elf/elf.h 2014-05-29 13:58:25.000000000 -0500 ++++ glibc-2.17-c758a686/elf/elf.h 2014-05-29 13:58:25.000000000 -0500 +@@ -2263,6 +2263,12 @@ + #define R_PPC64_REL16_HI 251 /* half16 (sym+add-.)@h */ + #define R_PPC64_REL16_HA 252 /* half16 (sym+add-.)@ha */ + ++/* e_flags bits specifying ABI. ++ 1 for original function descriptor using ABI, ++ 2 for revised ABI without function descriptors, ++ 0 for unspecified or not using any features affected by the differences. */ ++#define EF_PPC64_ABI 3 ++ + /* PowerPC64 specific values for the Dyn d_tag field. */ + #define DT_PPC64_GLINK (DT_LOPROC + 0) + #define DT_PPC64_OPD (DT_LOPROC + 1) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S 2014-05-29 13:58:25.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S 2014-05-29 13:58:25.000000000 -0500 +@@ -64,6 +64,7 @@ + ENTRY_2(_init) + .align ALIGNARG (2) + BODY_LABEL (_init): ++ LOCALENTRY(_init) + mflr 0 + std 0, 16(r1) + stdu r1, -112(r1) +@@ -81,6 +82,7 @@ + ENTRY_2(_fini) + .align ALIGNARG (2) + BODY_LABEL (_fini): ++ LOCALENTRY(_fini) + mflr 0 + std 0, 16(r1) + stdu r1, -112(r1) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-irel.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-irel.h +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-irel.h 2014-05-29 13:58:25.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-irel.h 2014-05-29 13:58:25.000000000 -0500 +@@ -50,7 +50,11 @@ + { + Elf64_Addr *const reloc_addr = (void *) reloc->r_offset; + Elf64_Addr value = elf_ifunc_invoke(reloc->r_addend); ++#if _CALL_ELF != 2 + *(Elf64_FuncDesc *) reloc_addr = *(Elf64_FuncDesc *) value; ++#else ++ *reloc_addr = value; ++#endif + } + else + __libc_fatal ("unexpected reloc type in static binary"); +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 13:58:25.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 14:05:46.000000000 -0500 +@@ -31,6 +31,7 @@ + in l_info array. */ + #define DT_PPC64(x) (DT_PPC64_##x - DT_LOPROC + DT_NUM) + ++#if _CALL_ELF != 2 + /* A PowerPC64 function descriptor. The .plt (procedure linkage + table) and .opd (official procedure descriptor) sections are + arrays of these. */ +@@ -40,6 +41,7 @@ + Elf64_Addr fd_toc; + Elf64_Addr fd_aux; + } Elf64_FuncDesc; ++#endif + + #define ELF_MULT_MACHINES_SUPPORTED + +@@ -47,6 +49,18 @@ + static inline int + elf_machine_matches_host (const Elf64_Ehdr *ehdr) + { ++ /* Verify that the binary matches our ABI version. */ ++ if ((ehdr->e_flags & EF_PPC64_ABI) != 0) ++ { ++#if _CALL_ELF != 2 ++ if ((ehdr->e_flags & EF_PPC64_ABI) != 1) ++ return 0; ++#else ++ if ((ehdr->e_flags & EF_PPC64_ABI) != 2) ++ return 0; ++#endif ++ } ++ + return ehdr->e_machine == EM_PPC64; + } + +@@ -124,6 +138,7 @@ + " .align 2\n" \ + " " ENTRY_2(_start) "\n" \ + BODY_PREFIX "_start:\n" \ ++" " LOCALENTRY(_start) "\n" \ + /* We start with the following on the stack, from top: \ + argc (4 bytes); \ + arguments for program (terminated by NULL); \ +@@ -165,6 +180,7 @@ + Changing these is strongly discouraged (not least because argc is \ + passed by value!). */ \ + BODY_PREFIX "_dl_start_user:\n" \ ++" " LOCALENTRY(_dl_start_user) "\n" \ + /* the address of _start in r30. */ \ + " mr 30,3\n" \ + /* &_dl_argc in 29, &_dl_argv in 27, and _dl_loaded in 28. */ \ +@@ -256,8 +272,22 @@ + relocations behave "normally", ie. always use the real address + like PLT relocations. So always set ELF_RTYPE_CLASS_PLT. */ + ++#if _CALL_ELF != 2 + #define elf_machine_type_class(type) \ + (ELF_RTYPE_CLASS_PLT | (((type) == R_PPC64_COPY) * ELF_RTYPE_CLASS_COPY)) ++#else ++/* And now that you have read that large comment, you can disregard it ++ all for ELFv2. ELFv2 does need the special SHN_UNDEF treatment. */ ++#define IS_PPC64_TLS_RELOC(R) \ ++ (((R) >= R_PPC64_TLS && (R) <= R_PPC64_DTPREL16_HIGHESTA) \ ++ || ((R) >= R_PPC64_TPREL16_HIGH && (R) <= R_PPC64_DTPREL16_HIGHA)) ++ ++#define elf_machine_type_class(type) \ ++ ((((type) == R_PPC64_JMP_SLOT \ ++ || (type) == R_PPC64_ADDR24 \ ++ || IS_PPC64_TLS_RELOC (type)) * ELF_RTYPE_CLASS_PLT) \ ++ | (((type) == R_PPC64_COPY) * ELF_RTYPE_CLASS_COPY)) ++#endif + + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_PPC64_JMP_SLOT +@@ -266,8 +296,19 @@ + #define ELF_MACHINE_NO_REL 1 + + /* Stuff for the PLT. */ ++#if _CALL_ELF != 2 + #define PLT_INITIAL_ENTRY_WORDS 3 ++#define PLT_ENTRY_WORDS 3 + #define GLINK_INITIAL_ENTRY_WORDS 8 ++/* The first 32k entries of glink can set an index and branch using two ++ instructions; past that point, glink uses three instructions. */ ++#define GLINK_ENTRY_WORDS(I) (((I) < 0x8000)? 2 : 3) ++#else ++#define PLT_INITIAL_ENTRY_WORDS 2 ++#define PLT_ENTRY_WORDS 1 ++#define GLINK_INITIAL_ENTRY_WORDS 8 ++#define GLINK_ENTRY_WORDS(I) 1 ++#endif + + #define PPC_DCBST(where) asm volatile ("dcbst 0,%0" : : "r"(where) : "memory") + #define PPC_DCBT(where) asm volatile ("dcbt 0,%0" : : "r"(where) : "memory") +@@ -312,17 +353,12 @@ + + if (lazy) + { +- /* The function descriptor of the appropriate trampline +- routine is used to set the 1st and 2nd doubleword of the +- plt_reserve. */ +- Elf64_FuncDesc *resolve_fd; + Elf64_Word glink_offset; +- /* the plt_reserve area is the 1st 3 doublewords of the PLT */ +- Elf64_FuncDesc *plt_reserve = (Elf64_FuncDesc *) plt; + Elf64_Word offset; ++ Elf64_Addr dlrr; + +- resolve_fd = (Elf64_FuncDesc *) (profile ? _dl_profile_resolve +- : _dl_runtime_resolve); ++ dlrr = (Elf64_Addr) (profile ? _dl_profile_resolve ++ : _dl_runtime_resolve); + if (profile && GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), map)) + /* This is the object we are looking for. Say that we really +@@ -330,20 +366,33 @@ + GL(dl_profile_map) = map; + + ++#if _CALL_ELF != 2 + /* We need to stuff the address/TOC of _dl_runtime_resolve + into doublewords 0 and 1 of plt_reserve. Then we need to + stuff the map address into doubleword 2 of plt_reserve. + This allows the GLINK0 code to transfer control to the + correct trampoline which will transfer control to fixup + in dl-machine.c. */ +- plt_reserve->fd_func = resolve_fd->fd_func; +- plt_reserve->fd_toc = resolve_fd->fd_toc; +- plt_reserve->fd_aux = (Elf64_Addr) map; ++ { ++ /* The plt_reserve area is the 1st 3 doublewords of the PLT. */ ++ Elf64_FuncDesc *plt_reserve = (Elf64_FuncDesc *) plt; ++ Elf64_FuncDesc *resolve_fd = (Elf64_FuncDesc *) dlrr; ++ plt_reserve->fd_func = resolve_fd->fd_func; ++ plt_reserve->fd_toc = resolve_fd->fd_toc; ++ plt_reserve->fd_aux = (Elf64_Addr) map; + #ifdef RTLD_BOOTSTRAP +- /* When we're bootstrapping, the opd entry will not have +- been relocated yet. */ +- plt_reserve->fd_func += l_addr; +- plt_reserve->fd_toc += l_addr; ++ /* When we're bootstrapping, the opd entry will not have ++ been relocated yet. */ ++ plt_reserve->fd_func += l_addr; ++ plt_reserve->fd_toc += l_addr; ++#endif ++ } ++#else ++ /* When we don't have function descriptors, the first doubleword ++ of the PLT holds the address of _dl_runtime_resolve, and the ++ second doubleword holds the map address. */ ++ plt[0] = dlrr; ++ plt[1] = (Elf64_Addr) map; + #endif + + /* Set up the lazy PLT entries. */ +@@ -354,14 +403,8 @@ + { + + plt[offset] = (Elf64_Xword) &glink[glink_offset]; +- offset += 3; +- /* The first 32k entries of glink can set an index and +- branch using two instructions; Past that point, +- glink uses three instructions. */ +- if (i < 0x8000) +- glink_offset += 2; +- else +- glink_offset += 3; ++ offset += PLT_ENTRY_WORDS; ++ glink_offset += GLINK_ENTRY_WORDS (i); + } + + /* Now, we've modified data. We need to write the changes from +@@ -389,6 +432,7 @@ + const Elf64_Rela *reloc, + Elf64_Addr *reloc_addr, Elf64_Addr finaladdr) + { ++#if _CALL_ELF != 2 + Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr; + Elf64_FuncDesc *rel = (Elf64_FuncDesc *) finaladdr; + Elf64_Addr offset = 0; +@@ -426,6 +470,9 @@ + plt->fd_func = rel->fd_func + offset; + PPC_DCBST (&plt->fd_func); + PPC_ISYNC; ++#else ++ *reloc_addr = finaladdr; ++#endif + + return finaladdr; + } +@@ -433,6 +480,7 @@ + static inline void __attribute__ ((always_inline)) + elf_machine_plt_conflict (Elf64_Addr *reloc_addr, Elf64_Addr finaladdr) + { ++#if _CALL_ELF != 2 + Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr; + Elf64_FuncDesc *rel = (Elf64_FuncDesc *) finaladdr; + +@@ -443,6 +491,9 @@ + PPC_DCBST (&plt->fd_aux); + PPC_DCBST (&plt->fd_toc); + PPC_SYNC; ++#else ++ *reloc_addr = finaladdr; ++#endif + } + + /* Return the final value of a plt relocation. */ +@@ -512,6 +563,7 @@ + resolve_ifunc (Elf64_Addr value, + const struct link_map *map, const struct link_map *sym_map) + { ++#if _CALL_ELF != 2 + #ifndef RESOLVE_CONFLICT_FIND_MAP + /* The function we are calling may not yet have its opd entry relocated. */ + Elf64_FuncDesc opd; +@@ -529,6 +581,7 @@ + value = (Elf64_Addr) &opd; + } + #endif ++#endif + return ((Elf64_Addr (*) (unsigned long int)) value) (GLRO(dl_hwcap)); + } + +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S 2014-05-29 13:58:25.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S 2014-05-29 14:00:27.000000000 -0500 +@@ -55,21 +55,22 @@ + that saves r2 since the call won't go via a plt call stub. See + bugz #269. __GI__setjmp is used in csu/libc-start.c when + HAVE_CLEANUP_JMP_BUF is defined. */ +-ENTRY (BP_SYM (__GI__setjmp)) ++ENTRY (__GI__setjmp) + std r2,40(r1) /* Save the callers TOC in the save area. */ +- cfi_endproc +-END_2 (BP_SYM (__GI__setjmp)) +-/* Fall thru. */ ++ CALL_MCOUNT 1 ++ li r4,0 /* Set second argument to 0. */ ++ b JUMPTARGET (GLUE(__sigsetjmp,_ent)) ++END (__GI__setjmp) + #endif + +-ENTRY (BP_SYM (_setjmp)) ++ENTRY (_setjmp) + CALL_MCOUNT 1 + li r4,0 /* Set second argument to 0. */ + b JUMPTARGET (GLUE(__sigsetjmp,_ent)) +-END (BP_SYM (_setjmp)) ++END (_setjmp) + libc_hidden_def (_setjmp) + +-ENTRY (BP_SYM (__sigsetjmp)) ++ENTRY (__sigsetjmp) + CALL_MCOUNT 2 + JUMPTARGET(GLUE(__sigsetjmp,_ent)): + CHECK_BOUNDS_BOTH_WIDE_LIT (r3, r8, r9, JB_SIZE) +@@ -215,18 +216,18 @@ + li r3,0 + blr + #elif defined SHARED +- b JUMPTARGET (BP_SYM (__sigjmp_save)) ++ b JUMPTARGET (__sigjmp_save) + #else + mflr r0 + std r0,16(r1) + stdu r1,-112(r1) + cfi_adjust_cfa_offset(112) + cfi_offset(lr,16) +- bl JUMPTARGET (BP_SYM (__sigjmp_save)) ++ bl JUMPTARGET (__sigjmp_save) + nop + ld r0,112+16(r1) + addi r1,r1,112 + mtlr r0 + blr + #endif +-END (BP_SYM (__sigsetjmp)) ++END (__sigsetjmp) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h 2014-05-29 13:58:25.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h 2014-05-29 13:58:25.000000000 -0500 +@@ -74,6 +74,8 @@ + #endif + .endm + ++#if _CALL_ELF != 2 ++ + /* Macro to prepare for calling via a function pointer. */ + .macro PPC64_LOAD_FUNCPTR PTR + ld r12,0(\PTR) +@@ -115,13 +117,37 @@ + .size name,.-BODY_LABEL(name); \ + .size BODY_LABEL(name),.-BODY_LABEL(name); + #endif ++#define LOCALENTRY(name) ++ ++#else /* _CALL_ELF */ ++ ++/* Macro to prepare for calling via a function pointer. */ ++ .macro PPC64_LOAD_FUNCPTR PTR ++ mr r12,\PTR ++ mtctr r12 ++ .endm ++ ++#define DOT_LABEL(X) X ++#define BODY_LABEL(X) X ++#define ENTRY_2(name) \ ++ .globl name; \ ++ .type name,@function; ++#define END_2(name) \ ++ .size name,.-name; ++#define LOCALENTRY(name) \ ++1: addis r2,r12,.TOC.-1b@ha; \ ++ addi r2,r2,.TOC.-1b@l; \ ++ .localentry name,.-name; ++ ++#endif /* _CALL_ELF */ + + #define ENTRY(name) \ + .section ".text"; \ + ENTRY_2(name) \ + .align ALIGNARG(2); \ + BODY_LABEL(name): \ +- cfi_startproc; ++ cfi_startproc; \ ++ LOCALENTRY(name) + + #define EALIGN_W_0 /* No words to insert. */ + #define EALIGN_W_1 nop +@@ -140,7 +166,8 @@ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + BODY_LABEL(name): \ +- cfi_startproc; ++ cfi_startproc; \ ++ LOCALENTRY(name) + + /* Local labels stripped out by the linker. */ + #undef L +@@ -295,6 +322,8 @@ + + #else /* !__ASSEMBLER__ */ + ++#if _CALL_ELF != 2 ++ + #define PPC64_LOAD_FUNCPTR(ptr) \ + "ld 12,0(" #ptr ");\n" \ + "ld 2,8(" #ptr ");\n" \ +@@ -335,5 +364,26 @@ + ".size " #name ",.-" BODY_PREFIX #name ";\n" \ + ".size " BODY_PREFIX #name ",.-" BODY_PREFIX #name ";" + #endif ++#define LOCALENTRY(name) ++ ++#else /* _CALL_ELF */ ++ ++#define PPC64_LOAD_FUNCPTR(ptr) \ ++ "mr 12," #ptr ";\n" \ ++ "mtctr 12;" ++ ++#define DOT_PREFIX "" ++#define BODY_PREFIX "" ++#define ENTRY_2(name) \ ++ ".type " #name ",@function;\n" \ ++ ".globl " #name ";" ++#define END_2(name) \ ++ ".size " #name ",.-" #name ";" ++#define LOCALENTRY(name) \ ++ "1: addis 2,12,.TOC.-1b@ha;\n" \ ++ "addi 2,2,.TOC.-1b@l;\n" \ ++ ".localentry " #name ",.-" #name ";" ++ ++#endif /* _CALL_ELF */ + + #endif /* __ASSEMBLER__ */ +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ldsodefs.h glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ldsodefs.h +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ldsodefs.h 2014-05-29 13:58:24.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ldsodefs.h 2014-05-29 13:58:25.000000000 -0500 +@@ -23,6 +23,8 @@ + + /* Now define our stuff. */ + ++#if _CALL_ELF != 2 ++ + static __always_inline bool + _dl_ppc64_is_opd_sym (const struct link_map *l, const ElfW(Sym) *sym) + { +@@ -73,4 +75,6 @@ + #define DL_ADDR_SYM_MATCH(L, SYM, MATCHSYM, ADDR) \ + _dl_ppc64_addr_sym_match (L, SYM, MATCHSYM, ADDR) + ++#endif ++ + #endif /* ldsodefs.h */ +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S 2014-05-29 13:58:24.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S 2014-05-29 13:58:25.000000000 -0500 +@@ -111,6 +111,7 @@ + + L(noparms): + ++#if _CALL_ELF != 2 + /* Load the function address and TOC from the function descriptor + and store them in the ucontext as NIP and r2. Store the 3rd + field of the function descriptor into the ucontext as r11 in case +@@ -121,6 +122,12 @@ + std r0,(SIGCONTEXT_GP_REGS+(PT_NIP*8))(r3) + std r10,(SIGCONTEXT_GP_REGS+(PT_R2*8))(r3) + std r9,(SIGCONTEXT_GP_REGS+(PT_R11*8))(r3) ++#else ++ /* In the ELFv2 ABI, the function pointer is already the address. ++ Store it as NIP and r12 as required by the ABI. */ ++ std r4,(SIGCONTEXT_GP_REGS+(PT_NIP*8))(r3) ++ std r4,(SIGCONTEXT_GP_REGS+(PT_R12*8))(r3) ++#endif + + /* If the target function returns we need to do some cleanup. We use a + code trick to get the address of our cleanup function into the link diff --git a/packages/glibc/2.17/0062-glibc-ppc64le-40.patch b/packages/glibc/2.17/0062-glibc-ppc64le-40.patch new file mode 100644 index 0000000..663d2f3 --- /dev/null +++ b/packages/glibc/2.17/0062-glibc-ppc64le-40.patch @@ -0,0 +1,159 @@ +# commit 122b66defdb9e4ded3ccc5c2b290f0520c6fa3cd +# Author: Ulrich Weigand +# Date: Wed Dec 4 06:52:40 2013 -0600 +# +# PowerPC64 ELFv2 ABI 3/6: PLT local entry point optimization +# +# This is a follow-on to the previous patch to support the ELFv2 ABI in the +# dynamic loader, split off into its own patch since it is just an optional +# optimization. +# +# In the ELFv2 ABI, most functions define both a global and a local entry +# point; the local entry requires r2 to be already set up by the caller +# to point to the callee's TOC; while the global entry does not require +# the caller to know about the callee's TOC, but it needs to set up r12 +# to the callee's entry point address. +# +# Now, when setting up a PLT slot, the dynamic linker will usually need +# to enter the target function's global entry point. However, if the +# linker can prove that the target function is in the same DSO as the +# PLT slot itself, and the whole DSO only uses a single TOC (which the +# linker will let ld.so know via a DT_PPC64_OPT entry), then it is +# possible to actually enter the local entry point address into the +# PLT slot, for a slight improvement in performance. +# +# Note that this uncovered a problem on the first call via _dl_runtime_resolve, +# because that routine neglected to restore the caller's TOC before calling +# the target function for the first time, since it assumed that function +# would always reload its own TOC anyway ... +# +diff -urN glibc-2.17-c758a686/elf/elf.h glibc-2.17-c758a686/elf/elf.h +--- glibc-2.17-c758a686/elf/elf.h 2014-05-29 14:08:44.000000000 -0500 ++++ glibc-2.17-c758a686/elf/elf.h 2014-05-29 14:08:44.000000000 -0500 +@@ -2273,8 +2273,19 @@ + #define DT_PPC64_GLINK (DT_LOPROC + 0) + #define DT_PPC64_OPD (DT_LOPROC + 1) + #define DT_PPC64_OPDSZ (DT_LOPROC + 2) ++#define DT_PPC64_OPT (DT_LOPROC + 3) + #define DT_PPC64_NUM 3 + ++/* PowerPC64 specific values for the DT_PPC64_OPT Dyn entry. */ ++#define PPC64_OPT_TLS 1 ++#define PPC64_OPT_MULTI_TOC 2 ++ ++/* PowerPC64 specific values for the Elf64_Sym st_other field. */ ++#define STO_PPC64_LOCAL_BIT 5 ++#define STO_PPC64_LOCAL_MASK (7 << STO_PPC64_LOCAL_BIT) ++#define PPC64_LOCAL_ENTRY_OFFSET(other) \ ++ (((1 << (((other) & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT)) >> 2) << 2) ++ + + /* ARM specific declarations */ + +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 14:08:40.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 14:08:44.000000000 -0500 +@@ -425,6 +425,42 @@ + return lazy; + } + ++#if _CALL_ELF == 2 ++/* If the PLT entry whose reloc is 'reloc' resolves to a function in ++ the same object, return the target function's local entry point ++ offset if usable. */ ++static inline Elf64_Addr __attribute__ ((always_inline)) ++ppc64_local_entry_offset (struct link_map *map, lookup_t sym_map, ++ const Elf64_Rela *reloc) ++{ ++ const Elf64_Sym *symtab; ++ const Elf64_Sym *sym; ++ ++ /* If the target function is in a different object, we cannot ++ use the local entry point. */ ++ if (sym_map != map) ++ return 0; ++ ++ /* If the linker inserted multiple TOCs, we cannot use the ++ local entry point. */ ++ if (map->l_info[DT_PPC64(OPT)] ++ && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_MULTI_TOC)) ++ return 0; ++ ++ /* Otherwise, we can use the local entry point. Retrieve its offset ++ from the symbol's ELF st_other field. */ ++ symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]); ++ sym = &symtab[ELFW(R_SYM) (reloc->r_info)]; ++ ++ /* If the target function is an ifunc then the local entry offset is ++ for the resolver, not the final destination. */ ++ if (__builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0)) ++ return 0; ++ ++ return PPC64_LOCAL_ENTRY_OFFSET (sym->st_other); ++} ++#endif ++ + /* Change the PLT entry whose reloc is 'reloc' to call the actual + routine. */ + static inline Elf64_Addr __attribute__ ((always_inline)) +@@ -471,6 +507,7 @@ + PPC_DCBST (&plt->fd_func); + PPC_ISYNC; + #else ++ finaladdr += ppc64_local_entry_offset (map, sym_map, reloc); + *reloc_addr = finaladdr; + #endif + +@@ -478,7 +515,9 @@ + } + + static inline void __attribute__ ((always_inline)) +-elf_machine_plt_conflict (Elf64_Addr *reloc_addr, Elf64_Addr finaladdr) ++elf_machine_plt_conflict (struct link_map *map, lookup_t sym_map, ++ const Elf64_Rela *reloc, ++ Elf64_Addr *reloc_addr, Elf64_Addr finaladdr) + { + #if _CALL_ELF != 2 + Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr; +@@ -492,6 +531,7 @@ + PPC_DCBST (&plt->fd_toc); + PPC_SYNC; + #else ++ finaladdr += ppc64_local_entry_offset (map, sym_map, reloc); + *reloc_addr = finaladdr; + #endif + } +@@ -641,7 +681,7 @@ + /* Fall thru */ + case R_PPC64_JMP_SLOT: + #ifdef RESOLVE_CONFLICT_FIND_MAP +- elf_machine_plt_conflict (reloc_addr, value); ++ elf_machine_plt_conflict (map, sym_map, reloc, reloc_addr, value); + #else + elf_machine_fixup_plt (map, sym_map, reloc, reloc_addr, value); + #endif +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 14:08:40.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 14:08:44.000000000 -0500 +@@ -74,6 +74,10 @@ + /* Prepare for calling the function returned by fixup. */ + PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) ++#if _CALL_ELF == 2 ++/* Restore the caller's TOC in case we jump to a local entry point. */ ++ ld r2,FRAME_SIZE+40(r1) ++#endif + /* Unwind the stack frame, and jump. */ + addi r1,r1,FRAME_SIZE + bctr +@@ -321,6 +325,10 @@ + /* Prepare for calling the function returned by fixup. */ + PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) ++#if _CALL_ELF == 2 ++/* Restore the caller's TOC in case we jump to a local entry point. */ ++ ld r2,FRAME_SIZE+40(r1) ++#endif + /* Load the floating point registers. */ + lfd fp1,FPR_PARMS+0(r1) + lfd fp2,FPR_PARMS+8(r1) diff --git a/packages/glibc/2.17/0063-glibc-ppc64le-41.patch b/packages/glibc/2.17/0063-glibc-ppc64le-41.patch new file mode 100644 index 0000000..506d761 --- /dev/null +++ b/packages/glibc/2.17/0063-glibc-ppc64le-41.patch @@ -0,0 +1,764 @@ +# commit 8b8a692cfd7d80f1ee7c8b9ab356a259367dd187 +# Author: Ulrich Weigand +# Date: Wed Dec 4 06:55:03 2013 -0600 +# +# PowerPC64 ELFv2 ABI 4/6: Stack frame layout changes +# +# This updates glibc for the changes in the ELFv2 relating to the +# stack frame layout. These are described in more detail here: +# http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01149.html +# http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01146.html +# +# Specifically, the "compiler and linker doublewords" were removed, +# which has the effect that the save slot for the TOC register is +# now at offset 24 rather than 40 to the stack pointer. +# +# In addition, a function may now no longer necessarily assume that +# its caller has set up a 64-byte register save area its use. +# +# To address the first change, the patch goes through all assembler +# files and replaces immediate offsets in instructions accessing the +# ABI-defined stack slots by symbolic offsets. Those already were +# defined in ucontext_i.sym and used in some of the context routines, +# but that doesn't really seem like the right place for those defines. +# +# The patch instead defines those symbolic offsets in sysdeps.h, +# in two variants for the old and new ABI, and uses them systematically +# in all assembler files, not just the context routines. +# +# The second change only affected a few assembler files that used +# the save area to temporarily store some registers. In those +# cases where this happens within a leaf function, this patch +# changes the code to store those registers to the "red zone" +# below the stack pointer. Otherwise, the functions already allocate +# a stack frame, and the patch changes them to add extra space in +# these frames as temporary space for the ELFv2 ABI. +# +diff -urN glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep-cancel.h glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep-cancel.h +--- glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep-cancel.h 2014-05-29 14:10:00.000000000 -0500 ++++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep-cancel.h 2014-05-29 14:10:00.000000000 -0500 +@@ -31,6 +31,14 @@ + # define DASHDASHPFX(str) __##str + # endif + ++#if _CALL_ELF == 2 ++#define CANCEL_FRAMESIZE (FRAME_MIN_SIZE+16+48) ++#define CANCEL_PARM_SAVE (FRAME_MIN_SIZE+16) ++#else ++#define CANCEL_FRAMESIZE (FRAME_MIN_SIZE+16) ++#define CANCEL_PARM_SAVE (CANCEL_FRAMESIZE+FRAME_PARM_SAVE) ++#endif ++ + # undef PSEUDO + # define PSEUDO(name, syscall_name, args) \ + .section ".text"; \ +@@ -44,52 +52,52 @@ + PSEUDO_RET; \ + .size DASHDASHPFX(syscall_name##_nocancel),.-DASHDASHPFX(syscall_name##_nocancel); \ + .Lpseudo_cancel: \ +- stdu 1,-128(1); \ +- cfi_adjust_cfa_offset (128); \ ++ stdu 1,-CANCEL_FRAMESIZE(1); \ ++ cfi_adjust_cfa_offset (CANCEL_FRAMESIZE); \ + mflr 9; \ +- std 9,128+16(1); \ +- cfi_offset (lr, 16); \ ++ std 9,CANCEL_FRAMESIZE+FRAME_LR_SAVE(1); \ ++ cfi_offset (lr, FRAME_LR_SAVE); \ + DOCARGS_##args; /* save syscall args around CENABLE. */ \ + CENABLE; \ +- std 3,112(1); /* store CENABLE return value (MASK). */ \ ++ std 3,FRAME_MIN_SIZE(1); /* store CENABLE return value (MASK). */ \ + UNDOCARGS_##args; /* restore syscall args. */ \ + DO_CALL (SYS_ify (syscall_name)); \ + mfcr 0; /* save CR/R3 around CDISABLE. */ \ +- std 3,120(1); \ +- std 0,128+8(1); \ +- cfi_offset (cr, 8); \ +- ld 3,112(1); /* pass MASK to CDISABLE. */ \ ++ std 3,FRAME_MIN_SIZE+8(1); \ ++ std 0,CANCEL_FRAMESIZE+FRAME_CR_SAVE(1); \ ++ cfi_offset (cr, FRAME_CR_SAVE); \ ++ ld 3,FRAME_MIN_SIZE(1); /* pass MASK to CDISABLE. */ \ + CDISABLE; \ +- ld 9,128+16(1); \ +- ld 0,128+8(1); /* restore CR/R3. */ \ +- ld 3,120(1); \ ++ ld 9,CANCEL_FRAMESIZE+FRAME_LR_SAVE(1); \ ++ ld 0,CANCEL_FRAMESIZE+FRAME_CR_SAVE(1); /* restore CR/R3. */ \ ++ ld 3,FRAME_MIN_SIZE+8(1); \ + mtlr 9; \ + mtcr 0; \ +- addi 1,1,128; \ +- cfi_adjust_cfa_offset (-128); \ ++ addi 1,1,CANCEL_FRAMESIZE; \ ++ cfi_adjust_cfa_offset (-CANCEL_FRAMESIZE); \ + cfi_restore (lr); \ + cfi_restore (cr) + + # define DOCARGS_0 + # define UNDOCARGS_0 + +-# define DOCARGS_1 std 3,128+48(1); DOCARGS_0 +-# define UNDOCARGS_1 ld 3,128+48(1); UNDOCARGS_0 ++# define DOCARGS_1 std 3,CANCEL_PARM_SAVE(1); DOCARGS_0 ++# define UNDOCARGS_1 ld 3,CANCEL_PARM_SAVE(1); UNDOCARGS_0 + +-# define DOCARGS_2 std 4,128+56(1); DOCARGS_1 +-# define UNDOCARGS_2 ld 4,128+56(1); UNDOCARGS_1 ++# define DOCARGS_2 std 4,CANCEL_PARM_SAVE+8(1); DOCARGS_1 ++# define UNDOCARGS_2 ld 4,CANCEL_PARM_SAVE+8(1); UNDOCARGS_1 + +-# define DOCARGS_3 std 5,128+64(1); DOCARGS_2 +-# define UNDOCARGS_3 ld 5,128+64(1); UNDOCARGS_2 ++# define DOCARGS_3 std 5,CANCEL_PARM_SAVE+16(1); DOCARGS_2 ++# define UNDOCARGS_3 ld 5,CANCEL_PARM_SAVE+16(1); UNDOCARGS_2 + +-# define DOCARGS_4 std 6,128+72(1); DOCARGS_3 +-# define UNDOCARGS_4 ld 6,128+72(1); UNDOCARGS_3 ++# define DOCARGS_4 std 6,CANCEL_PARM_SAVE+24(1); DOCARGS_3 ++# define UNDOCARGS_4 ld 6,CANCEL_PARM_SAVE+24(1); UNDOCARGS_3 + +-# define DOCARGS_5 std 7,128+80(1); DOCARGS_4 +-# define UNDOCARGS_5 ld 7,128+80(1); UNDOCARGS_4 ++# define DOCARGS_5 std 7,CANCEL_PARM_SAVE+32(1); DOCARGS_4 ++# define UNDOCARGS_5 ld 7,CANCEL_PARM_SAVE+32(1); UNDOCARGS_4 + +-# define DOCARGS_6 std 8,128+88(1); DOCARGS_5 +-# define UNDOCARGS_6 ld 8,128+88(1); UNDOCARGS_5 ++# define DOCARGS_6 std 8,CANCEL_PARM_SAVE+40(1); DOCARGS_5 ++# define UNDOCARGS_6 ld 8,CANCEL_PARM_SAVE+40(1); UNDOCARGS_5 + + # ifdef IS_IN_libpthread + # ifdef SHARED +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S 2014-05-29 14:09:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S 2014-05-29 14:10:00.000000000 -0500 +@@ -133,7 +133,7 @@ + ld r14,((JB_GPRS+0)*8)(r3) + lfd fp14,((JB_FPRS+0)*8)(r3) + #if defined SHARED && !defined IS_IN_rtld +- std r2,40(r1) /* Restore the callers TOC save area. */ ++ std r2,FRAME_TOC_SAVE(r1) /* Restore the callers TOC save area. */ + #endif + ld r15,((JB_GPRS+1)*8)(r3) + lfd fp15,((JB_FPRS+1)*8)(r3) +@@ -151,7 +151,7 @@ + PTR_DEMANGLE2 (r0, r25) + #endif + mtlr r0 +-/* std r2,40(r1) Restore the TOC save area. */ ++/* std r2,FRAME_TOC_SAVE(r1) Restore the TOC save area. */ + ld r21,((JB_GPRS+7)*8)(r3) + lfd fp21,((JB_FPRS+7)*8)(r3) + ld r22,((JB_GPRS+8)*8)(r3) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S 2014-05-29 14:09:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S 2014-05-29 14:10:00.000000000 -0500 +@@ -66,8 +66,8 @@ + BODY_LABEL (_init): + LOCALENTRY(_init) + mflr 0 +- std 0, 16(r1) +- stdu r1, -112(r1) ++ std 0, FRAME_LR_SAVE(r1) ++ stdu r1, -FRAME_MIN_SIZE_PARM(r1) + #if PREINIT_FUNCTION_WEAK + addis r9, r2, .LC0@toc@ha + ld r0, .LC0@toc@l(r9) +@@ -84,5 +84,5 @@ + BODY_LABEL (_fini): + LOCALENTRY(_fini) + mflr 0 +- std 0, 16(r1) +- stdu r1, -112(r1) ++ std 0, FRAME_LR_SAVE(r1) ++ stdu r1, -FRAME_MIN_SIZE_PARM(r1) +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crtn.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crtn.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crtn.S 2014-05-29 14:09:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crtn.S 2014-05-29 14:10:00.000000000 -0500 +@@ -39,13 +39,13 @@ + #include + + .section .init,"ax",@progbits +- addi r1, r1, 112 +- ld r0, 16(r1) ++ addi r1, r1, FRAME_MIN_SIZE_PARM ++ ld r0, FRAME_LR_SAVE(r1) + mtlr r0 + blr + + .section .fini,"ax",@progbits +- addi r1, r1, 112 +- ld r0, 16(r1) ++ addi r1, r1, FRAME_MIN_SIZE_PARM ++ ld r0, FRAME_LR_SAVE(r1) + mtlr r0 + blr +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 14:09:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 14:10:00.000000000 -0500 +@@ -26,13 +26,13 @@ + parm1 (r3) and the index (r0) need to be converted to an offset + (index * 24) in parm2 (r4). */ + +-#define FRAME_SIZE 176 ++#define FRAME_SIZE (FRAME_MIN_SIZE+64) + /* We need to save the registers used to pass parameters, ie. r3 thru + r10; Use local var space rather than the parameter save area, + because gcc as of 2010/05 doesn't allocate a proper stack frame for + a function that makes no calls except for __tls_get_addr and we + might be here resolving the __tls_get_addr call. */ +-#define INT_PARMS 112 ++#define INT_PARMS FRAME_MIN_SIZE + EALIGN(_dl_runtime_resolve, 4, 0) + stdu r1,-FRAME_SIZE(r1) + cfi_adjust_cfa_offset (FRAME_SIZE) +@@ -48,25 +48,25 @@ + mflr r0 + std r8,INT_PARMS+40(r1) + /* Store the LR in the LR Save area. */ +- std r0,FRAME_SIZE+16(r1) +- cfi_offset (lr, 16) ++ std r0,FRAME_SIZE+FRAME_LR_SAVE(r1) ++ cfi_offset (lr, FRAME_LR_SAVE) + mfcr r0 + std r9,INT_PARMS+48(r1) + std r10,INT_PARMS+56(r1) + /* I'm almost certain we don't have to save cr... be safe. */ +- std r0,FRAME_SIZE+8(r1) ++ std r0,FRAME_SIZE+FRAME_CR_SAVE(r1) + bl JUMPTARGET(_dl_fixup) + #ifndef SHARED + nop + #endif + /* Put the registers back. */ +- ld r0,FRAME_SIZE+16(r1) ++ ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r10,INT_PARMS+56(r1) + ld r9,INT_PARMS+48(r1) + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 +- ld r0,FRAME_SIZE+8(r1) ++ ld r0,FRAME_SIZE+FRAME_CR_SAVE(r1) + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) +@@ -76,7 +76,7 @@ + ld r3,INT_PARMS+0(r1) + #if _CALL_ELF == 2 + /* Restore the caller's TOC in case we jump to a local entry point. */ +- ld r2,FRAME_SIZE+40(r1) ++ ld r2,FRAME_SIZE+FRAME_TOC_SAVE(r1) + #endif + /* Unwind the stack frame, and jump. */ + addi r1,r1,FRAME_SIZE +@@ -86,6 +86,7 @@ + #undef INT_PARMS + + /* Stack layout: ++ (Note: some of these are not required for the ELFv2 ABI.) + +592 previous backchain + +584 spill_r31 + +576 spill_r30 +@@ -147,10 +148,11 @@ + +64 parm3 + +56 parm2 + +48 parm1 +- * Parameter save area, Allocated by the call, at least 8 double words +- +40 TOC save area +- +32 Reserved for linker +- +24 Reserved for compiler ++ * Parameter save area ++ * (v1 ABI: Allocated by the call, at least 8 double words) ++ +40 v1 ABI: TOC save area ++ +32 v1 ABI: Reserved for linker ++ +24 v1 ABI: Reserved for compiler / v2 ABI: TOC save area + +16 LR save area + +8 CR save area + r1+0 stack back chain +@@ -206,15 +208,15 @@ + /* Store the LR in the LR Save area of the previous frame. */ + /* XXX Do we have to do this? */ + la r8,FRAME_SIZE(r1) +- std r5,FRAME_SIZE+16(r1) +- cfi_offset (lr, 16) ++ std r5,FRAME_SIZE+FRAME_LR_SAVE(r1) ++ cfi_offset (lr, FRAME_LR_SAVE) + std r5,CALLING_LR(r1) + mfcr r0 + std r9,INT_PARMS+48(r1) + std r10,INT_PARMS+56(r1) + std r8,CALLING_SP(r1) + /* I'm almost certain we don't have to save cr... be safe. */ +- std r0,FRAME_SIZE+8(r1) ++ std r0,FRAME_SIZE+FRAME_CR_SAVE(r1) + ld r12,.LC__dl_hwcap@toc(r2) + #ifdef SHARED + /* Load _rtld_local_ro._dl_hwcap. */ +@@ -311,13 +313,13 @@ + lvx v12,r11,r10 + lvx v13,r11,r9 + L(restoreFXR): +- ld r0,FRAME_SIZE+16(r1) ++ ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r10,INT_PARMS+56(r1) + ld r9,INT_PARMS+48(r1) + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 +- ld r0,FRAME_SIZE+8(r1) ++ ld r0,FRAME_SIZE+FRAME_CR_SAVE(r1) + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) +@@ -327,7 +329,7 @@ + ld r3,INT_PARMS+0(r1) + #if _CALL_ELF == 2 + /* Restore the caller's TOC in case we jump to a local entry point. */ +- ld r2,FRAME_SIZE+40(r1) ++ ld r2,FRAME_SIZE+FRAME_TOC_SAVE(r1) + #endif + /* Load the floating point registers. */ + lfd fp1,FPR_PARMS+0(r1) +@@ -375,19 +377,19 @@ + lvx v12,r11,r10 + lvx v13,r11,r9 + L(restoreFXR2): +- ld r0,FRAME_SIZE+16(r1) ++ ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r10,INT_PARMS+56(r1) + ld r9,INT_PARMS+48(r1) + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 +- ld r0,FRAME_SIZE+8(r1) ++ ld r0,FRAME_SIZE+FRAME_CR_SAVE(r1) + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) + mtcrf 0xFF,r0 + /* Prepare for calling the function returned by fixup. */ +- std r2,40(r1) ++ std r2,FRAME_TOC_SAVE(r1) + PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) + /* Load the floating point registers. */ +@@ -406,7 +408,7 @@ + lfd fp13,FPR_PARMS+96(r1) + /* Call the target function. */ + bctrl +- ld r2,40(r1) ++ ld r2,FRAME_TOC_SAVE(r1) + lwz r12,VR_VRSAVE(r1) + /* But return here and store the return values. */ + std r3,INT_RTN(r1) +@@ -441,7 +443,7 @@ + beq L(pltexitreturn) + lvx v2,0,r10 + L(pltexitreturn): +- ld r0,FRAME_SIZE+16(r1) ++ ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r31,584(r1) + ld r30,576(r1) + mtlr r0 +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/ppc-mcount.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/ppc-mcount.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/ppc-mcount.S 2014-05-29 14:09:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/ppc-mcount.S 2014-05-29 14:10:00.000000000 -0500 +@@ -24,16 +24,16 @@ + ENTRY(_mcount) + mflr r4 + ld r11, 0(r1) +- stdu r1,-112(r1) +- cfi_adjust_cfa_offset (112) +- std r4, 128(r1) +- cfi_offset (lr, 16) +- ld r3, 16(r11) ++ stdu r1,-FRAME_MIN_SIZE(r1) ++ cfi_adjust_cfa_offset (FRAME_MIN_SIZE) ++ std r4, FRAME_MIN_SIZE+FRAME_LR_SAVE(r1) ++ cfi_offset (lr, FRAME_LR_SAVE) ++ ld r3, FRAME_LR_SAVE(r11) + bl JUMPTARGET(__mcount_internal) + nop +- ld r0, 128(r1) ++ ld r0, FRAME_MIN_SIZE+FRAME_LR_SAVE(r1) + mtlr r0 +- addi r1,r1,112 ++ addi r1,r1,FRAME_MIN_SIZE + blr + END(_mcount) + +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S 2014-05-29 14:09:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S 2014-05-29 14:10:00.000000000 -0500 +@@ -56,7 +56,7 @@ + bugz #269. __GI__setjmp is used in csu/libc-start.c when + HAVE_CLEANUP_JMP_BUF is defined. */ + ENTRY (__GI__setjmp) +- std r2,40(r1) /* Save the callers TOC in the save area. */ ++ std r2,FRAME_TOC_SAVE(r1) /* Save the callers TOC in the save area. */ + CALL_MCOUNT 1 + li r4,0 /* Set second argument to 0. */ + b JUMPTARGET (GLUE(__sigsetjmp,_ent)) +@@ -83,7 +83,7 @@ + #endif + mflr r0 + #if defined SHARED && !defined IS_IN_rtld +- ld r5,40(r1) /* Retrieve the callers TOC. */ ++ ld r5,FRAME_TOC_SAVE(r1) /* Retrieve the callers TOC. */ + std r5,(JB_GPR2*8)(3) + #else + std r2,(JB_GPR2*8)(3) +@@ -219,14 +219,14 @@ + b JUMPTARGET (__sigjmp_save) + #else + mflr r0 +- std r0,16(r1) +- stdu r1,-112(r1) +- cfi_adjust_cfa_offset(112) +- cfi_offset(lr,16) ++ std r0,FRAME_LR_SAVE(r1) ++ stdu r1,-FRAME_MIN_SIZE(r1) ++ cfi_adjust_cfa_offset(FRAME_MIN_SIZE) ++ cfi_offset(lr,FRAME_LR_SAVE) + bl JUMPTARGET (__sigjmp_save) + nop +- ld r0,112+16(r1) +- addi r1,r1,112 ++ ld r0,FRAME_MIN_SIZE+FRAME_LR_SAVE(r1) ++ addi r1,r1,FRAME_MIN_SIZE + mtlr r0 + blr + #endif +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h 2014-05-29 14:09:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h 2014-05-29 14:10:00.000000000 -0500 +@@ -20,25 +20,67 @@ + + #ifdef __ASSEMBLER__ + ++/* Stack frame offsets. */ ++#if _CALL_ELF != 2 ++#define FRAME_MIN_SIZE 112 ++#define FRAME_MIN_SIZE_PARM 112 ++#define FRAME_BACKCHAIN 0 ++#define FRAME_CR_SAVE 8 ++#define FRAME_LR_SAVE 16 ++#define FRAME_TOC_SAVE 40 ++#define FRAME_PARM_SAVE 48 ++#define FRAME_PARM1_SAVE 48 ++#define FRAME_PARM2_SAVE 56 ++#define FRAME_PARM3_SAVE 64 ++#define FRAME_PARM4_SAVE 72 ++#define FRAME_PARM5_SAVE 80 ++#define FRAME_PARM6_SAVE 88 ++#define FRAME_PARM7_SAVE 96 ++#define FRAME_PARM8_SAVE 104 ++#define FRAME_PARM9_SAVE 112 ++#else ++#define FRAME_MIN_SIZE 32 ++#define FRAME_MIN_SIZE_PARM 96 ++#define FRAME_BACKCHAIN 0 ++#define FRAME_CR_SAVE 8 ++#define FRAME_LR_SAVE 16 ++#define FRAME_TOC_SAVE 24 ++#define FRAME_PARM_SAVE 32 ++#define FRAME_PARM1_SAVE 32 ++#define FRAME_PARM2_SAVE 40 ++#define FRAME_PARM3_SAVE 48 ++#define FRAME_PARM4_SAVE 56 ++#define FRAME_PARM5_SAVE 64 ++#define FRAME_PARM6_SAVE 72 ++#define FRAME_PARM7_SAVE 80 ++#define FRAME_PARM8_SAVE 88 ++#define FRAME_PARM9_SAVE 96 ++#endif ++ + /* Support macros for CALL_MCOUNT. */ ++#if _CALL_ELF == 2 ++#define call_mcount_parm_offset (-64) ++#else ++#define call_mcount_parm_offset FRAME_PARM_SAVE ++#endif + .macro SAVE_ARG NARG + .if \NARG + SAVE_ARG \NARG-1 +- std 2+\NARG,40+8*(\NARG)(1) ++ std 2+\NARG,call_mcount_parm_offset-8+8*(\NARG)(1) + .endif + .endm + + .macro REST_ARG NARG + .if \NARG + REST_ARG \NARG-1 +- ld 2+\NARG,112+40+8*(\NARG)(1) ++ ld 2+\NARG,FRAME_MIN_SIZE_PARM+call_mcount_parm_offset-8+8*(\NARG)(1) + .endif + .endm + + .macro CFI_SAVE_ARG NARG + .if \NARG + CFI_SAVE_ARG \NARG-1 +- cfi_offset(2+\NARG,40+8*(\NARG)) ++ cfi_offset(2+\NARG,call_mcount_parm_offset-8+8*(\NARG)) + .endif + .endm + +@@ -55,20 +97,20 @@ + #ifdef PROF + mflr r0 + SAVE_ARG \NARG +- std r0,16(r1) +- stdu r1,-112(r1) +- cfi_adjust_cfa_offset(112) +- cfi_offset(lr,16) ++ std r0,FRAME_LR_SAVE(r1) ++ stdu r1,-FRAME_MIN_SIZE_PARM(r1) ++ cfi_adjust_cfa_offset(FRAME_MIN_SIZE_PARM) ++ cfi_offset(lr,FRAME_LR_SAVE) + CFI_SAVE_ARG \NARG + bl JUMPTARGET (_mcount) + #ifndef SHARED + nop + #endif +- ld r0,128(r1) ++ ld r0,FRAME_MIN_SIZE_PARM+FRAME_LR_SAVE(r1) + REST_ARG \NARG + mtlr r0 +- addi r1,r1,112 +- cfi_adjust_cfa_offset(-112) ++ addi r1,r1,FRAME_MIN_SIZE_PARM ++ cfi_adjust_cfa_offset(-FRAME_MIN_SIZE_PARM) + cfi_restore(lr) + CFI_REST_ARG \NARG + #endif +@@ -267,15 +309,15 @@ + .else; \ + .Local_syscall_error: \ + mflr 0; \ +- std 0,16(1); \ +- stdu 1,-112(1); \ +- cfi_adjust_cfa_offset(112); \ +- cfi_offset(lr,16); \ ++ std 0,FRAME_LR_SAVE(1); \ ++ stdu 1,-FRAME_MIN_SIZE(1); \ ++ cfi_adjust_cfa_offset(FRAME_MIN_SIZE); \ ++ cfi_offset(lr,FRAME_LR_SAVE); \ + bl JUMPTARGET(__syscall_error); \ + nop; \ +- ld 0,112+16(1); \ +- addi 1,1,112; \ +- cfi_adjust_cfa_offset(-112); \ ++ ld 0,FRAME_MIN_SIZE+FRAME_LR_SAVE(1); \ ++ addi 1,1,FRAME_MIN_SIZE; \ ++ cfi_adjust_cfa_offset(-FRAME_MIN_SIZE); \ + mtlr 0; \ + cfi_restore(lr); \ + blr; \ +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/____longjmp_chk.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/____longjmp_chk.S +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/____longjmp_chk.S 2014-05-29 14:09:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/____longjmp_chk.S 2014-05-29 14:10:00.000000000 -0500 +@@ -33,24 +33,24 @@ + cmpld reg, r1; \ + bge+ .Lok; \ + mflr r0; \ +- std r0,16(r1); \ ++ std r0,FRAME_LR_SAVE(r1); \ + mr r31,r3; \ + mr r30,r4; \ +- stdu r1,-144(r1); \ ++ stdu r1,-FRAME_MIN_SIZE-32(r1); \ + cfi_remember_state; \ +- cfi_adjust_cfa_offset (144); \ +- cfi_offset (lr, 16); \ ++ cfi_adjust_cfa_offset (FRAME_MIN_SIZE+32); \ ++ cfi_offset (lr, FRAME_LR_SAVE); \ + li r3,0; \ +- addi r4,r1,112; \ ++ addi r4,r1,FRAME_MIN_SIZE; \ + li r0,__NR_sigaltstack; \ + sc; \ + /* Without working sigaltstack we cannot perform the test. */ \ + bso .Lok2; \ +- lwz r0,112+8(r1); \ ++ lwz r0,FRAME_MIN_SIZE+8(r1); \ + andi. r4,r0,1; \ + beq .Lfail; \ +- ld r0,112+16(r1); \ +- ld r4,112(r1); \ ++ ld r0,FRAME_MIN_SIZE+16(r1); \ ++ ld r4,FRAME_MIN_SIZE(r1); \ + add r4,r4,r0; \ + sub r3,r3,reg; \ + cmpld r3,r0; \ +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S 2014-05-29 14:09:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S 2014-05-29 14:10:00.000000000 -0500 +@@ -31,9 +31,9 @@ + CALL_MCOUNT 1 + DISCARD_BOUNDS (r3) /* the bounds are meaningless, so toss 'em. */ + +- std r3,48(r1) ++ std r3,-8(r1) + DO_CALL(SYS_ify(brk)) +- ld r6,48(r1) ++ ld r6,-8(r1) + ld r5,.LC__curbrk@toc(r2) + std r3,0(r5) + cmpld r6,r3 +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S 2014-05-29 14:09:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S 2014-05-29 14:10:00.000000000 -0500 +@@ -45,22 +45,22 @@ + cror cr0*4+eq,cr1*4+eq,cr0*4+eq + beq- cr0,L(badargs) + +- /* Save some regs in parm save area. */ ++ /* Save some regs in the "red zone". */ + #ifdef RESET_PID +- std r29,48(r1) ++ std r29,-24(r1) + #endif +- std r30,56(r1) +- std r31,64(r1) ++ std r30,-16(r1) ++ std r31,-8(r1) + #ifdef RESET_PID +- cfi_offset(r29,48) ++ cfi_offset(r29,-24) + #endif +- cfi_offset(r30,56) +- cfi_offset(r31,64) ++ cfi_offset(r30,-16) ++ cfi_offset(r31,-8) + + /* Set up stack frame for child. */ + clrrdi r4,r4,4 + li r0,0 +- stdu r0,-112(r4) /* min stack frame is 112 bytes per ABI */ ++ stdu r0,-FRAME_MIN_SIZE_PARM(r4) + + /* Save fn, args, stack across syscall. */ + mr r30,r3 /* Function in r30. */ +@@ -102,12 +102,12 @@ + L(oldpid): + #endif + +- std r2,40(r1) ++ std r2,FRAME_TOC_SAVE(r1) + /* Call procedure. */ + PPC64_LOAD_FUNCPTR r30 + mr r3,r31 + bctrl +- ld r2,40(r1) ++ ld r2,FRAME_TOC_SAVE(r1) + /* Call _exit with result from procedure. */ + #ifdef SHARED + b JUMPTARGET(__GI__exit) +@@ -126,15 +126,15 @@ + L(parent): + /* Parent. Restore registers & return. */ + #ifdef RESET_PID +- cfi_offset(r29,48) ++ cfi_offset(r29,-24) + #endif +- cfi_offset(r30,56) +- cfi_offset(r31,64) ++ cfi_offset(r30,-16) ++ cfi_offset(r31,-8) + #ifdef RESET_PID +- ld r29,48(r1) ++ ld r29,-24(r1) + #endif +- ld r30,56(r1) +- ld r31,64(r1) ++ ld r30,-16(r1) ++ ld r31,-8(r1) + #ifdef RESET_PID + cfi_restore(r29) + #endif +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/socket.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/socket.S +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/socket.S 2014-05-29 14:09:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/socket.S 2014-05-29 14:10:00.000000000 -0500 +@@ -46,8 +46,13 @@ + # endif + #endif + +-#define FRAMESIZE 128 +-#define stackblock FRAMESIZE+48 /* offset to parm save area. */ ++#if _CALL_ELF == 2 ++#define FRAMESIZE (FRAME_MIN_SIZE+16+64) ++#define stackblock (FRAME_MIN_SIZE+16) ++#else ++#define FRAMESIZE (FRAME_MIN_SIZE+16) ++#define stackblock (FRAMESIZE+FRAME_PARM_SAVE) /* offset to parm save area. */ ++#endif + + .text + ENTRY(__socket) +@@ -98,22 +103,22 @@ + .Lsocket_cancel: + cfi_adjust_cfa_offset(FRAMESIZE) + mflr r9 +- std r9,FRAMESIZE+16(r1) +- cfi_offset (lr, 16) ++ std r9,FRAMESIZE+FRAME_LR_SAVE(r1) ++ cfi_offset (lr, FRAME_LR_SAVE) + CENABLE +- std r3,120(r1) ++ std r3,FRAME_MIN_SIZE+8(r1) + li r3,P(SOCKOP_,socket) + addi r4,r1,stackblock + DO_CALL(SYS_ify(socketcall)) + mfcr r0 +- std r3,112(r1) +- std r0,FRAMESIZE+8(r1) +- cfi_offset (cr, 8) +- ld r3,120(r1) ++ std r3,FRAME_MIN_SIZE(r1) ++ std r0,FRAMESIZE+FRAME_CR_SAVE(r1) ++ cfi_offset (cr, FRAME_CR_SAVE) ++ ld r3,FRAME_MIN_SIZE+8(r1) + CDISABLE +- ld r4,FRAMESIZE+16(r1) +- ld r0,FRAMESIZE+8(r1) +- ld r3,112(r1) ++ ld r4,FRAMESIZE+FRAME_LR_SAVE(r1) ++ ld r0,FRAMESIZE+FRAME_CR_SAVE(r1) ++ ld r3,FRAME_MIN_SIZE(r1) + mtlr r4 + mtcr r0 + addi r1,r1,FRAMESIZE +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ucontext_i.sym glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ucontext_i.sym +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ucontext_i.sym 2014-05-29 14:09:56.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ucontext_i.sym 2014-05-29 14:10:00.000000000 -0500 +@@ -8,27 +8,6 @@ + SIG_SETMASK + + +--- Offsets of the fields in the powerpc64 ABI stack frame. +--- XXX Do these correspond to some struct? +- +-FRAME_BACKCHAIN 0 +-FRAME_CR_SAVE 8 +-FRAME_LR_SAVE 16 +-FRAME_COMPILER_DW 24 +-FRAME_LINKER_DW 32 +-FRAME_TOC_SAVE 40 +-FRAME_PARM_SAVE 48 +-FRAME_PARM1_SAVE 48 +-FRAME_PARM2_SAVE 56 +-FRAME_PARM3_SAVE 64 +-FRAME_PARM4_SAVE 72 +-FRAME_PARM5_SAVE 80 +-FRAME_PARM6_SAVE 88 +-FRAME_PARM7_SAVE 96 +-FRAME_PARM8_SAVE 104 +-FRAME_PARM9_SAVE 112 +- +- + -- Offsets of the fields in the ucontext_t structure. + #define ucontext(member) offsetof (ucontext_t, member) + #define mcontext(member) ucontext (uc_mcontext.member) diff --git a/packages/glibc/2.17/0064-glibc-ppc64le-42.patch b/packages/glibc/2.17/0064-glibc-ppc64le-42.patch new file mode 100644 index 0000000..f5fa53e --- /dev/null +++ b/packages/glibc/2.17/0064-glibc-ppc64le-42.patch @@ -0,0 +1,404 @@ +# commit 61cd8fe4017c251617dd300818917e61a12ab48e +# Author: Ulrich Weigand +# Date: Wed Dec 4 06:59:37 2013 -0600 +# +# PowerPC64 ELFv2 ABI 5/6: LD_AUDIT interface changes +# +# The ELFv2 ABI changes the calling convention by passing and returning +# structures in registers in more cases than the old ABI: +# http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01145.html +# http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01147.html +# +# For the most part, this does not affect glibc, since glibc assembler +# files do not use structure parameters / return values. However, one +# place is affected: the LD_AUDIT interface provides a structure to +# the audit routine that contains all registers holding function +# argument and return values for the intercepted PLT call. +# +# Since the new ABI now sometimes uses registers to return values +# that were never used for this purpose in the old ABI, this structure +# has to be extended. To force audit routines to be modified for the +# new ABI if necessary, the patch defines v2 variants of the la_ppc64 +# types and routines. +# +# In addition, the patch contains two unrelated changes to the +# PLT trampoline routines: it fixes a bug where FPR return values +# were stored in the wrong place, and it removes the unnecessary +# save/restore of CR. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/bits/link.h glibc-2.17-c758a686/sysdeps/powerpc/bits/link.h +--- glibc-2.17-c758a686/sysdeps/powerpc/bits/link.h 2014-05-29 14:11:12.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/bits/link.h 2014-05-29 14:11:20.000000000 -0500 +@@ -63,7 +63,7 @@ + + __END_DECLS + +-#else ++#elif _CALL_ELF != 2 + + /* Registers for entry into PLT on PPC64. */ + typedef struct La_ppc64_regs +@@ -107,4 +107,48 @@ + + __END_DECLS + ++#else ++ ++/* Registers for entry into PLT on PPC64 in the ELFv2 ABI. */ ++typedef struct La_ppc64v2_regs ++{ ++ uint64_t lr_reg[8]; ++ double lr_fp[13]; ++ uint32_t __padding; ++ uint32_t lr_vrsave; ++ uint32_t lr_vreg[12][4] __attribute__ ((aligned (16))); ++ uint64_t lr_r1; ++ uint64_t lr_lr; ++} La_ppc64v2_regs; ++ ++/* Return values for calls from PLT on PPC64 in the ELFv2 ABI. */ ++typedef struct La_ppc64v2_retval ++{ ++ uint64_t lrv_r3; ++ uint64_t lrv_r4; ++ double lrv_fp[10]; ++ uint32_t lrv_vreg[8][4] __attribute__ ((aligned (16))); ++} La_ppc64v2_retval; ++ ++ ++__BEGIN_DECLS ++ ++extern Elf64_Addr la_ppc64v2_gnu_pltenter (Elf64_Sym *__sym, ++ unsigned int __ndx, ++ uintptr_t *__refcook, ++ uintptr_t *__defcook, ++ La_ppc64v2_regs *__regs, ++ unsigned int *__flags, ++ const char *__symname, ++ long int *__framesizep); ++extern unsigned int la_ppc64v2_gnu_pltexit (Elf64_Sym *__sym, ++ unsigned int __ndx, ++ uintptr_t *__refcook, ++ uintptr_t *__defcook, ++ const La_ppc64v2_regs *__inregs, ++ La_ppc64v2_retval *__outregs, ++ const char *__symname); ++ ++__END_DECLS ++ + #endif +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/ldsodefs.h glibc-2.17-c758a686/sysdeps/powerpc/ldsodefs.h +--- glibc-2.17-c758a686/sysdeps/powerpc/ldsodefs.h 2014-05-29 14:11:12.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/ldsodefs.h 2014-05-29 14:11:20.000000000 -0500 +@@ -25,6 +25,8 @@ + struct La_ppc32_retval; + struct La_ppc64_regs; + struct La_ppc64_retval; ++struct La_ppc64v2_regs; ++struct La_ppc64v2_retval; + + #define ARCH_PLTENTER_MEMBERS \ + Elf32_Addr (*ppc32_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *, \ +@@ -34,7 +36,12 @@ + Elf64_Addr (*ppc64_gnu_pltenter) (Elf64_Sym *, unsigned int, uintptr_t *, \ + uintptr_t *, struct La_ppc64_regs *, \ + unsigned int *, const char *name, \ +- long int *framesizep) ++ long int *framesizep); \ ++ Elf64_Addr (*ppc64v2_gnu_pltenter) (Elf64_Sym *, unsigned int, \ ++ uintptr_t *, uintptr_t *, \ ++ struct La_ppc64v2_regs *, \ ++ unsigned int *, const char *name, \ ++ long int *framesizep) + + #define ARCH_PLTEXIT_MEMBERS \ + unsigned int (*ppc32_gnu_pltexit) (Elf32_Sym *, unsigned int, \ +@@ -47,7 +54,14 @@ + uintptr_t *, \ + uintptr_t *, \ + const struct La_ppc64_regs *, \ +- struct La_ppc64_retval *, const char *) ++ struct La_ppc64_retval *, \ ++ const char *); \ ++ unsigned int (*ppc64v2_gnu_pltexit) (Elf64_Sym *, unsigned int, \ ++ uintptr_t *, \ ++ uintptr_t *, \ ++ const struct La_ppc64v2_regs *,\ ++ struct La_ppc64v2_retval *, \ ++ const char *) + + #include_next + +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 14:11:12.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 14:11:20.000000000 -0500 +@@ -546,8 +546,13 @@ + + + /* Names of the architecture-specific auditing callback functions. */ ++#if _CALL_ELF != 2 + #define ARCH_LA_PLTENTER ppc64_gnu_pltenter + #define ARCH_LA_PLTEXIT ppc64_gnu_pltexit ++#else ++#define ARCH_LA_PLTENTER ppc64v2_gnu_pltenter ++#define ARCH_LA_PLTEXIT ppc64v2_gnu_pltexit ++#endif + + #endif /* dl_machine_h */ + +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 14:11:12.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 14:11:20.000000000 -0500 +@@ -50,11 +50,8 @@ + /* Store the LR in the LR Save area. */ + std r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + cfi_offset (lr, FRAME_LR_SAVE) +- mfcr r0 + std r9,INT_PARMS+48(r1) + std r10,INT_PARMS+56(r1) +-/* I'm almost certain we don't have to save cr... be safe. */ +- std r0,FRAME_SIZE+FRAME_CR_SAVE(r1) + bl JUMPTARGET(_dl_fixup) + #ifndef SHARED + nop +@@ -66,11 +63,9 @@ + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 +- ld r0,FRAME_SIZE+FRAME_CR_SAVE(r1) + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) +- mtcrf 0xFF,r0 + /* Prepare for calling the function returned by fixup. */ + PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) +@@ -85,18 +80,30 @@ + #undef FRAME_SIZE + #undef INT_PARMS + +- /* Stack layout: +- (Note: some of these are not required for the ELFv2 ABI.) +- +592 previous backchain +- +584 spill_r31 +- +576 spill_r30 +- +560 v1 +- +552 fp4 +- +544 fp3 +- +536 fp2 +- +528 fp1 +- +520 r4 +- +512 r3 ++ /* Stack layout: ELFv2 ABI. ++ +752 previous backchain ++ +744 spill_r31 ++ +736 spill_r30 ++ +720 v8 ++ +704 v7 ++ +688 v6 ++ +672 v5 ++ +656 v4 ++ +640 v3 ++ +624 v2 ++ +608 v1 ++ +600 fp10 ++ ELFv1 ABI +592 fp9 ++ +592 previous backchain +584 fp8 ++ +584 spill_r31 +576 fp7 ++ +576 spill_r30 +568 fp6 ++ +560 v1 +560 fp5 ++ +552 fp4 +552 fp4 ++ +544 fp3 +544 fp3 ++ +536 fp2 +536 fp2 ++ +528 fp1 +528 fp1 ++ +520 r4 +520 r4 ++ +512 r3 +512 r3 + return values + +504 free + +496 stackframe +@@ -157,10 +164,15 @@ + +8 CR save area + r1+0 stack back chain + */ +-#define FRAME_SIZE 592 ++#if _CALL_ELF == 2 ++# define FRAME_SIZE 752 ++# define VR_RTN 608 ++#else ++# define FRAME_SIZE 592 ++# define VR_RTN 560 ++#endif + #define INT_RTN 512 + #define FPR_RTN 528 +-#define VR_RTN 560 + #define STACK_FRAME 496 + #define CALLING_LR 488 + #define CALLING_SP 480 +@@ -205,18 +217,14 @@ + mflr r5 + std r7,INT_PARMS+32(r1) + std r8,INT_PARMS+40(r1) +-/* Store the LR in the LR Save area of the previous frame. */ +-/* XXX Do we have to do this? */ ++/* Store the LR in the LR Save area. */ + la r8,FRAME_SIZE(r1) + std r5,FRAME_SIZE+FRAME_LR_SAVE(r1) + cfi_offset (lr, FRAME_LR_SAVE) + std r5,CALLING_LR(r1) +- mfcr r0 + std r9,INT_PARMS+48(r1) + std r10,INT_PARMS+56(r1) + std r8,CALLING_SP(r1) +-/* I'm almost certain we don't have to save cr... be safe. */ +- std r0,FRAME_SIZE+FRAME_CR_SAVE(r1) + ld r12,.LC__dl_hwcap@toc(r2) + #ifdef SHARED + /* Load _rtld_local_ro._dl_hwcap. */ +@@ -319,11 +327,9 @@ + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 +- ld r0,FRAME_SIZE+FRAME_CR_SAVE(r1) + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) +- mtcrf 0xFF,r0 + /* Prepare for calling the function returned by fixup. */ + PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) +@@ -346,10 +352,11 @@ + lfd fp12,FPR_PARMS+88(r1) + lfd fp13,FPR_PARMS+96(r1) + /* Unwind the stack frame, and jump. */ +- ld r31,584(r1) +- ld r30,576(r1) ++ ld r31,FRAME_SIZE-8(r1) ++ ld r30,FRAME_SIZE-16(r1) + addi r1,r1,FRAME_SIZE + bctr ++ + L(do_pltexit): + la r10,(VR_PARMS+0)(r1) + la r9,(VR_PARMS+16)(r1) +@@ -383,11 +390,9 @@ + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 +- ld r0,FRAME_SIZE+FRAME_CR_SAVE(r1) + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) +- mtcrf 0xFF,r0 + /* Prepare for calling the function returned by fixup. */ + std r2,FRAME_TOC_SAVE(r1) + PPC64_LOAD_FUNCPTR r3 +@@ -413,16 +418,37 @@ + /* But return here and store the return values. */ + std r3,INT_RTN(r1) + std r4,INT_RTN+8(r1) +- stfd fp1,FPR_PARMS+0(r1) +- stfd fp2,FPR_PARMS+8(r1) ++ stfd fp1,FPR_RTN+0(r1) ++ stfd fp2,FPR_RTN+8(r1) + cmpdi cr0,r12,0 + la r10,VR_RTN(r1) +- stfd fp3,FPR_PARMS+16(r1) +- stfd fp4,FPR_PARMS+24(r1) ++ stfd fp3,FPR_RTN+16(r1) ++ stfd fp4,FPR_RTN+24(r1) ++#if _CALL_ELF == 2 ++ la r12,VR_RTN+16(r1) ++ stfd fp5,FPR_RTN+32(r1) ++ stfd fp6,FPR_RTN+40(r1) ++ li r5,32 ++ li r6,64 ++ stfd fp7,FPR_RTN+48(r1) ++ stfd fp8,FPR_RTN+56(r1) ++ stfd fp9,FPR_RTN+64(r1) ++ stfd fp10,FPR_RTN+72(r1) ++#endif + mr r3,r31 + mr r4,r30 + beq L(callpltexit) + stvx v2,0,r10 ++#if _CALL_ELF == 2 ++ stvx v3,0,r12 ++ stvx v4,r5,r10 ++ stvx v5,r5,r12 ++ addi r5,r5,64 ++ stvx v6,r6,r10 ++ stvx v7,r6,r12 ++ stvx v8,r5,r10 ++ stvx v9,r5,r12 ++#endif + L(callpltexit): + addi r5,r1,INT_PARMS + addi r6,r1,INT_RTN +@@ -434,18 +460,39 @@ + lwz r12,VR_VRSAVE(r1) + ld r3,INT_RTN(r1) + ld r4,INT_RTN+8(r1) +- lfd fp1,FPR_PARMS+0(r1) +- lfd fp2,FPR_PARMS+8(r1) ++ lfd fp1,FPR_RTN+0(r1) ++ lfd fp2,FPR_RTN+8(r1) + cmpdi cr0,r12,0 +- la r10,VR_RTN(r1) +- lfd fp3,FPR_PARMS+16(r1) +- lfd fp4,FPR_PARMS+24(r1) ++ la r11,VR_RTN(r1) ++ lfd fp3,FPR_RTN+16(r1) ++ lfd fp4,FPR_RTN+24(r1) ++#if _CALL_ELF == 2 ++ la r12,VR_RTN+16(r1) ++ lfd fp5,FPR_RTN+32(r1) ++ lfd fp6,FPR_RTN+40(r1) ++ li r30,32 ++ li r31,64 ++ lfd fp7,FPR_RTN+48(r1) ++ lfd fp8,FPR_RTN+56(r1) ++ lfd fp9,FPR_RTN+64(r1) ++ lfd fp10,FPR_RTN+72(r1) ++#endif + beq L(pltexitreturn) +- lvx v2,0,r10 ++ lvx v2,0,r11 ++#if _CALL_ELF == 2 ++ lvx v3,0,r12 ++ lvx v4,r30,r11 ++ lvx v5,r30,r12 ++ addi r30,r30,64 ++ lvx v6,r31,r11 ++ lvx v7,r31,r12 ++ lvx v8,r30,r11 ++ lvx v9,r30,r12 ++#endif + L(pltexitreturn): + ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) +- ld r31,584(r1) +- ld r30,576(r1) ++ ld r31,FRAME_SIZE-8(r1) ++ ld r30,FRAME_SIZE-16(r1) + mtlr r0 + ld r1,0(r1) + blr +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/tst-audit.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/tst-audit.h +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/tst-audit.h 2014-05-29 14:11:12.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/tst-audit.h 2014-05-29 14:11:20.000000000 -0500 +@@ -18,8 +18,16 @@ + License along with the GNU C Library. If not, see + . */ + ++#if _CALL_ELF != 2 + #define pltenter la_ppc64_gnu_pltenter + #define pltexit la_ppc64_gnu_pltexit + #define La_regs La_ppc64_regs + #define La_retval La_ppc64_retval + #define int_retval lrv_r3 ++#else ++#define pltenter la_ppc64v2_gnu_pltenter ++#define pltexit la_ppc64v2_gnu_pltexit ++#define La_regs La_ppc64v2_regs ++#define La_retval La_ppc64v2_retval ++#define int_retval lrv_r3 ++#endif diff --git a/packages/glibc/2.17/0065-glibc-ppc64le-43.patch b/packages/glibc/2.17/0065-glibc-ppc64le-43.patch new file mode 100644 index 0000000..8c43abb --- /dev/null +++ b/packages/glibc/2.17/0065-glibc-ppc64le-43.patch @@ -0,0 +1,248 @@ +# commit 5b118558f9fb0620508d51c34c2cb5ba4f1f01c2 +# Author: Ulrich Weigand +# Date: Wed Dec 4 07:08:48 2013 -0600 +# +# PowerPC64 ELFv2 ABI 6/6: Bump ld.so soname version number +# +# To avoid having a ELFv2 binary accidentally picking up an old ABI ld.so, +# this patch bumps the soname to ld64.so.2. +# +# In theory (or for testing purposes) this will also allow co-installing +# ld.so versions for both ABIs on the same system. Note that the kernel +# will already be able to load executables of both ABIs. However, there +# is currently no plan to use that theoretical possibility in a any +# supported distribution environment ... +# +# Note that in order to check which ABI to use, we need to invoke the +# compiler to check the _CALL_ELF macro; this is done in a new configure +# check in sysdeps/unix/sysv/linux/powerpc/powerpc64/configure.ac, +# replacing the hard-coded value of default-abi in the Makefile. +# +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/Makefile glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/Makefile +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/Makefile 2014-05-29 14:12:25.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/Makefile 2014-05-29 14:12:30.000000000 -0500 +@@ -1,9 +1,12 @@ +-abi-variants := 32 64 ++abi-variants := 32 64-v1 64-v2 + abi-32-options := -U__powerpc64__ + abi-32-condition := __WORDSIZE == 32 +-abi-64-options := -D__powerpc64__ +-abi-64-condition := __WORDSIZE == 64 +-abi-64-ld-soname := ld64.so.1 ++abi-64-v1-options := -D__powerpc64__ -U_CALL_ELF -D_CALL_ELF=1 ++abi-64-v1-condition := __WORDSIZE == 64 && _CALL_ELF != 2 ++abi-64-v1-ld-soname := ld64.so.1 ++abi-64-v2-options := -D__powerpc64__ -U_CALL_ELF -D_CALL_ELF=2 ++abi-64-v2-condition := __WORDSIZE == 64 && _CALL_ELF == 2 ++abi-64-v2-ld-soname := ld64.so.2 + + ifeq ($(subdir),rt) + librt-routines += rt-sysdep +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/ldconfig.h glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/ldconfig.h +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/ldconfig.h 2014-05-29 14:12:25.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/ldconfig.h 2014-05-29 14:12:30.000000000 -0500 +@@ -20,7 +20,8 @@ + + #define SYSDEP_KNOWN_INTERPRETER_NAMES \ + { "/lib/ld.so.1", FLAG_ELF_LIBC6 }, \ +- { "/lib64/ld64.so.1", FLAG_ELF_LIBC6 }, ++ { "/lib64/ld64.so.1", FLAG_ELF_LIBC6 }, \ ++ { "/lib64/ld64.so.2", FLAG_ELF_LIBC6 }, + #define SYSDEP_KNOWN_LIBRARY_NAMES \ + { "libc.so.6", FLAG_ELF_LIBC6 }, \ + { "libm.so.6", FLAG_ELF_LIBC6 }, +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/Makefile glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/Makefile +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/Makefile 2014-05-29 14:12:25.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/Makefile 1969-12-31 18:00:00.000000000 -0600 +@@ -1,2 +0,0 @@ +-# See Makeconfig regarding the use of default-abi. +-default-abi := 64 +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure 1969-12-31 18:00:00.000000000 -0600 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure 2014-05-29 14:12:30.000000000 -0500 +@@ -0,0 +1,166 @@ ++# This file is generated from configure.ac by Autoconf. DO NOT EDIT! ++ # Local configure fragment for sysdeps/unix/sysv/linux/powerpc/powerpc64/. ++ ++# Define default-abi according to compiler flags. ++ ++ ++{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 ++$as_echo_n "checking for grep that handles long lines and -e... " >&6; } ++if ${ac_cv_path_GREP+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ if test -z "$GREP"; then ++ ac_path_GREP_found=false ++ # Loop through the user's path and test for each of PROGNAME-LIST ++ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR ++for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin ++do ++ IFS=$as_save_IFS ++ test -z "$as_dir" && as_dir=. ++ for ac_prog in grep ggrep; do ++ for ac_exec_ext in '' $ac_executable_extensions; do ++ ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" ++ { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue ++# Check for GNU ac_path_GREP and select it if it is found. ++ # Check for GNU $ac_path_GREP ++case `"$ac_path_GREP" --version 2>&1` in ++*GNU*) ++ ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; ++*) ++ ac_count=0 ++ $as_echo_n 0123456789 >"conftest.in" ++ while : ++ do ++ cat "conftest.in" "conftest.in" >"conftest.tmp" ++ mv "conftest.tmp" "conftest.in" ++ cp "conftest.in" "conftest.nl" ++ $as_echo 'GREP' >> "conftest.nl" ++ "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break ++ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break ++ as_fn_arith $ac_count + 1 && ac_count=$as_val ++ if test $ac_count -gt ${ac_path_GREP_max-0}; then ++ # Best one so far, save it but keep looking for a better one ++ ac_cv_path_GREP="$ac_path_GREP" ++ ac_path_GREP_max=$ac_count ++ fi ++ # 10*(2^10) chars as input seems more than enough ++ test $ac_count -gt 10 && break ++ done ++ rm -f conftest.in conftest.tmp conftest.nl conftest.out;; ++esac ++ ++ $ac_path_GREP_found && break 3 ++ done ++ done ++ done ++IFS=$as_save_IFS ++ if test -z "$ac_cv_path_GREP"; then ++ as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 ++ fi ++else ++ ac_cv_path_GREP=$GREP ++fi ++ ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 ++$as_echo "$ac_cv_path_GREP" >&6; } ++ GREP="$ac_cv_path_GREP" ++ ++ ++{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 ++$as_echo_n "checking for egrep... " >&6; } ++if ${ac_cv_path_EGREP+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 ++ then ac_cv_path_EGREP="$GREP -E" ++ else ++ if test -z "$EGREP"; then ++ ac_path_EGREP_found=false ++ # Loop through the user's path and test for each of PROGNAME-LIST ++ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR ++for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin ++do ++ IFS=$as_save_IFS ++ test -z "$as_dir" && as_dir=. ++ for ac_prog in egrep; do ++ for ac_exec_ext in '' $ac_executable_extensions; do ++ ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" ++ { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue ++# Check for GNU ac_path_EGREP and select it if it is found. ++ # Check for GNU $ac_path_EGREP ++case `"$ac_path_EGREP" --version 2>&1` in ++*GNU*) ++ ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; ++*) ++ ac_count=0 ++ $as_echo_n 0123456789 >"conftest.in" ++ while : ++ do ++ cat "conftest.in" "conftest.in" >"conftest.tmp" ++ mv "conftest.tmp" "conftest.in" ++ cp "conftest.in" "conftest.nl" ++ $as_echo 'EGREP' >> "conftest.nl" ++ "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break ++ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break ++ as_fn_arith $ac_count + 1 && ac_count=$as_val ++ if test $ac_count -gt ${ac_path_EGREP_max-0}; then ++ # Best one so far, save it but keep looking for a better one ++ ac_cv_path_EGREP="$ac_path_EGREP" ++ ac_path_EGREP_max=$ac_count ++ fi ++ # 10*(2^10) chars as input seems more than enough ++ test $ac_count -gt 10 && break ++ done ++ rm -f conftest.in conftest.tmp conftest.nl conftest.out;; ++esac ++ ++ $ac_path_EGREP_found && break 3 ++ done ++ done ++ done ++IFS=$as_save_IFS ++ if test -z "$ac_cv_path_EGREP"; then ++ as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 ++ fi ++else ++ ac_cv_path_EGREP=$EGREP ++fi ++ ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 ++$as_echo "$ac_cv_path_EGREP" >&6; } ++ EGREP="$ac_cv_path_EGREP" ++ ++ ++{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the compiler is using the PowerPC64 ELFv2 ABI" >&5 ++$as_echo_n "checking whether the compiler is using the PowerPC64 ELFv2 ABI... " >&6; } ++if ${libc_cv_ppc64_elfv2_abi+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ cat confdefs.h - <<_ACEOF >conftest.$ac_ext ++/* end confdefs.h. */ ++#if _CALL_ELF == 2 ++ yes ++ #endif ++ ++_ACEOF ++if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | ++ $EGREP "yes" >/dev/null 2>&1; then : ++ libc_cv_ppc64_elfv2_abi=yes ++else ++ libc_cv_ppc64_elfv2_abi=no ++fi ++rm -f conftest* ++ ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ppc64_elfv2_abi" >&5 ++$as_echo "$libc_cv_ppc64_elfv2_abi" >&6; } ++if test $libc_cv_ppc64_elfv2_abi = yes; then ++ config_vars="$config_vars ++default-abi = 64-v2" ++else ++ config_vars="$config_vars ++default-abi = 64-v1" ++fi +diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure.ac glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure.ac +--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure.ac 1969-12-31 18:00:00.000000000 -0600 ++++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure.ac 2014-05-29 14:12:30.000000000 -0500 +@@ -0,0 +1,15 @@ ++GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. ++# Local configure fragment for sysdeps/unix/sysv/linux/powerpc/powerpc64/. ++ ++# Define default-abi according to compiler flags. ++AC_CACHE_CHECK([whether the compiler is using the PowerPC64 ELFv2 ABI], ++ [libc_cv_ppc64_elfv2_abi], ++ [AC_EGREP_CPP(yes,[#if _CALL_ELF == 2 ++ yes ++ #endif ++ ], libc_cv_ppc64_elfv2_abi=yes, libc_cv_ppc64_elfv2_abi=no)]) ++if test $libc_cv_ppc64_elfv2_abi = yes; then ++ LIBC_CONFIG_VAR([default-abi], [64-v2]) ++else ++ LIBC_CONFIG_VAR([default-abi], [64-v1]) ++fi diff --git a/packages/glibc/2.17/0066-glibc-ppc64le-44.patch b/packages/glibc/2.17/0066-glibc-ppc64le-44.patch new file mode 100644 index 0000000..e074cb9 --- /dev/null +++ b/packages/glibc/2.17/0066-glibc-ppc64le-44.patch @@ -0,0 +1,26 @@ +# commit c859b32e9d76afe8a3f20bb9528961a573c06937 +# Author: Alan Modra +# Date: Tue Apr 1 14:07:42 2014 +1030 +# +# Fix s_copysign stack temp for PowerPC64 ELFv2 +# +# [BZ #16786] +# * sysdeps/powerpc/powerpc64/fpu/s_copysign.S: Don't trash stack. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_copysign.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_copysign.S +--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_copysign.S 2014-05-29 14:13:47.000000000 -0500 ++++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_copysign.S 2014-05-29 14:13:50.000000000 -0500 +@@ -27,11 +27,11 @@ + /* double [f1] copysign (double [f1] x, double [f2] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ +- stfd fp2,56(r1) ++ stfd fp2,-8(r1) + nop + nop + nop +- ld r3,56(r1) ++ ld r3,-8(r1) + cmpdi r3,0 + blt L(0) + fabs fp1,fp1 diff --git a/packages/glibc/2.17/0067-glibc-ppc64le-45.patch b/packages/glibc/2.17/0067-glibc-ppc64le-45.patch new file mode 100644 index 0000000..9030fc2 --- /dev/null +++ b/packages/glibc/2.17/0067-glibc-ppc64le-45.patch @@ -0,0 +1,33 @@ +# +# For PPC64LE only! +# +# This is fixed upstream by the removal of Versions.def +# and auto-generation of the SHLIB_COMPAT required entries. +# See: https://sourceware.org/ml/libc-alpha/2014-02/msg00818.html +# Backporting that infrastructure to RHEL 7.x is too much work +# at this junction for little reward. Instead we simply fix up +# the Versions.def to include GLIBC_2.3 which is used by +# nptl/old_pthread_atfork.c, otherwise ppc64le will get +# pthread_atfork in libpthread.so.0 when it should not. +# +# The ABI testing for libpthread.so now passes for ppc64le. +# +diff -urN glibc-2.17-c758a686/Versions.def glibc-2.17-c758a686/Versions.def +--- glibc-2.17-c758a686/Versions.def 2014-06-02 21:13:12.000000000 +0000 ++++ glibc-2.17-c758a686/Versions.def 2014-06-02 21:14:38.000000000 +0000 +@@ -92,6 +92,7 @@ + GLIBC_2.2 + GLIBC_2.2.3 + GLIBC_2.2.6 ++ GLIBC_2.3 + GLIBC_2.3.2 + GLIBC_2.3.3 + GLIBC_2.3.4 +@@ -99,6 +100,7 @@ + GLIBC_2.6 + GLIBC_2.11 + GLIBC_2.12 ++ GLIBC_2.17 + GLIBC_PRIVATE + } + libresolv { diff --git a/packages/glibc/2.17/0068-glibc-ppc64le-46.patch b/packages/glibc/2.17/0068-glibc-ppc64le-46.patch new file mode 100644 index 0000000..6f3d1d9 --- /dev/null +++ b/packages/glibc/2.17/0068-glibc-ppc64le-46.patch @@ -0,0 +1,22 @@ +# +# On POWER this patch also fixes test-ildoubl and test-ldouble failures where tan +# rounded toward zero had acceptable 1 ULP error. Upstream is using 3 ULP, but +# we prefer to keep the bound tighter unless we have a reason not to. +# +# This is the ppc64le version which is required becuase it applies *after* another +# ppc64le patch that touches the same ULPs file. See glibc-power-libm-test-ulps.patch +# for the ppc64/ppc version. +# +diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/libm-test-ulps glibc-2.17-c758a686/sysdeps/powerpc/fpu/libm-test-ulps +--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/libm-test-ulps 2014-07-25 22:07:06.280020855 -0400 ++++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/libm-test-ulps 2014-07-25 22:26:54.650021033 -0400 +@@ -2644,6 +2644,9 @@ + Test "tan_towardzero (2)": + ildouble: 1 + ldouble: 1 ++Test "tan_towardzero (2) == -2.1850398632615189916433061023136825434320": ++ildouble: 1 ++ldouble: 1 + Test "tan_towardzero (3) == -0.1425465430742778052956354105339134932261": + float: 1 + ifloat: 1 -- cgit v0.10.2-6-g49f6 From 0088351811bf442aa2e7d35c564f36ca67a8a699 Mon Sep 17 00:00:00 2001 From: messense Date: Thu, 13 May 2021 18:54:24 +0800 Subject: fix BZ 18116 - build failure on ppc64le: setcontext.S uses power6 mtfsf when not supported diff --git a/packages/glibc/2.17/0069-uses-power6-mtfsf-when-not-supported.patch b/packages/glibc/2.17/0069-uses-power6-mtfsf-when-not-supported.patch new file mode 100644 index 0000000..8864739 --- /dev/null +++ b/packages/glibc/2.17/0069-uses-power6-mtfsf-when-not-supported.patch @@ -0,0 +1,124 @@ +2015-03-11 Martin Sebor + + * sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S + (__setcontext): Set machine to power6 regardless of whether + or not _ARCH_PWR6 is defined. + * sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S + (__novec_swapcontext): Likewise. + +diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S +index e47a57a..a1ed419 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S ++++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S +@@ -79,12 +79,13 @@ ENTRY(__novec_setcontext) + lfd fp31,(SIGCONTEXT_FP_REGS+(PT_R31*8))(r31) + lfd fp30,(SIGCONTEXT_FP_REGS+(PT_R30*8))(r31) + ++ .machine push ++ .machine "power6" ++ + # ifdef _ARCH_PWR6 + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp0,1,0 + # else +- .machine push +- .machine "power6" + /* Availability of DFP indicates a 64-bit FPSCR. */ + andi. r6,r5,PPC_FEATURE_HAS_DFP + beq 5f +@@ -95,8 +96,10 @@ ENTRY(__novec_setcontext) + 5: + mtfsf 0xff,fp0 + 6: +- .machine pop + # endif /* _ARCH_PWR6 */ ++ ++ .machine pop ++ + lfd fp29,(SIGCONTEXT_FP_REGS+(PT_R29*8))(r31) + lfd fp28,(SIGCONTEXT_FP_REGS+(PT_R28*8))(r31) + lfd fp27,(SIGCONTEXT_FP_REGS+(PT_R27*8))(r31) +@@ -362,12 +365,13 @@ L(has_no_vec): + lfd fp31,(SIGCONTEXT_FP_REGS+(PT_R31*8))(r31) + lfd fp30,(SIGCONTEXT_FP_REGS+(PT_R30*8))(r31) + ++ .machine push ++ .machine "power6" ++ + # ifdef _ARCH_PWR6 + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp0,1,0 + # else +- .machine push +- .machine "power6" + /* Availability of DFP indicates a 64-bit FPSCR. */ + andi. r6,r5,PPC_FEATURE_HAS_DFP + beq 7f +@@ -378,8 +382,10 @@ L(has_no_vec): + 7: + mtfsf 0xff,fp0 + 8: +- .machine pop + # endif /* _ARCH_PWR6 */ ++ ++ .machine pop ++ + lfd fp29,(SIGCONTEXT_FP_REGS+(PT_R29*8))(r31) + lfd fp28,(SIGCONTEXT_FP_REGS+(PT_R28*8))(r31) + lfd fp27,(SIGCONTEXT_FP_REGS+(PT_R27*8))(r31) +diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S +index bc02a21..b25904d 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S ++++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S +@@ -173,6 +173,10 @@ ENTRY(__novec_swapcontext) + lfd fp0,(SIGCONTEXT_FP_REGS+(32*8))(r31) + lfd fp31,(SIGCONTEXT_FP_REGS+(PT_R31*8))(r31) + lfd fp30,(SIGCONTEXT_FP_REGS+(PT_R30*8))(r31) ++ ++ .machine push ++ .machine "power6" ++ + # ifdef _ARCH_PWR6 + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp0,1,0 +@@ -189,8 +193,10 @@ ENTRY(__novec_swapcontext) + 5: + mtfsf 0xff,fp0 + 6: +- .machine pop + #endif /* _ARCH_PWR6 */ ++ ++ .machine pop ++ + lfd fp29,(SIGCONTEXT_FP_REGS+(PT_R29*8))(r31) + lfd fp28,(SIGCONTEXT_FP_REGS+(PT_R28*8))(r31) + lfd fp27,(SIGCONTEXT_FP_REGS+(PT_R27*8))(r31) +@@ -652,12 +658,14 @@ L(has_no_vec2): + lfd fp0,(SIGCONTEXT_FP_REGS+(32*8))(r31) + lfd fp31,(SIGCONTEXT_FP_REGS+(PT_R31*8))(r31) + lfd fp30,(SIGCONTEXT_FP_REGS+(PT_R30*8))(r31) ++ ++ .machine push ++ .machine "power6" ++ + # ifdef _ARCH_PWR6 + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp0,1,0 + # else +- .machine push +- .machine "power6" + /* Availability of DFP indicates a 64-bit FPSCR. */ + andi. r6,r8,PPC_FEATURE_HAS_DFP + beq 7f +@@ -668,8 +676,10 @@ L(has_no_vec2): + 7: + mtfsf 0xff,fp0 + 8: +- .machine pop + #endif /* _ARCH_PWR6 */ ++ ++ .machine pop ++ + lfd fp29,(SIGCONTEXT_FP_REGS+(PT_R29*8))(r31) + lfd fp28,(SIGCONTEXT_FP_REGS+(PT_R28*8))(r31) + lfd fp27,(SIGCONTEXT_FP_REGS+(PT_R27*8))(r31) -- cgit v0.10.2-6-g49f6