patches/glibc/ports-2.10.1/540-alpha-fix-memchr.patch
changeset 1625 fde082da9813
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/patches/glibc/ports-2.10.1/540-alpha-fix-memchr.patch	Fri Nov 13 21:37:18 2009 +0100
     1.3 @@ -0,0 +1,121 @@
     1.4 +2009-07-27  Aurelien Jarno  <aurelien@aurel32.net>
     1.5 +
     1.6 +	* sysdeps/alpha/memchr.S: Use prefetch load.
     1.7 +	* sysdeps/alpha/alphaev6/memchr.S: Likewise.
     1.8 +
     1.9 +diff -durN glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S
    1.10 +--- glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S	2009-05-16 10:36:20.000000000 +0200
    1.11 ++++ glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S	2009-11-13 00:51:15.000000000 +0100
    1.12 +@@ -127,7 +127,7 @@
    1.13 +         cmpbge  $31, $1, $2	# E :
    1.14 +         bne     $2, $found_it	# U :
    1.15 + 	# At least one byte left to process.
    1.16 +-	ldq	$1, 8($0)	# L :
    1.17 ++	ldq	$31, 8($0)	# L :
    1.18 + 	subq	$5, 1, $18	# E : U L U L
    1.19 + 
    1.20 + 	addq	$0, 8, $0	# E :
    1.21 +@@ -143,38 +143,38 @@
    1.22 + 	and	$4, 8, $4	# E : odd number of quads?
    1.23 + 	bne	$4, $odd_quad_count # U :
    1.24 + 	# At least three quads remain to be accessed
    1.25 +-	mov	$1, $4		# E : L U L U : move prefetched value to correct reg
    1.26 ++	nop			# E : L U L U : move prefetched value to correct reg
    1.27 + 
    1.28 + 	.align	4
    1.29 + $unrolled_loop:
    1.30 +-	ldq	$1, 8($0)	# L : prefetch $1
    1.31 +-	xor	$17, $4, $2	# E :
    1.32 +-	cmpbge	$31, $2, $2	# E :
    1.33 +-	bne	$2, $found_it	# U : U L U L
    1.34 ++	ldq	$1, 0($0)	# L : load quad
    1.35 ++	xor	$17, $1, $2	# E :
    1.36 ++	ldq	$31, 8($0)	# L : prefetch next quad
    1.37 ++	cmpbge	$31, $2, $2	# E : U L U L
    1.38 + 
    1.39 ++	bne	$2, $found_it	# U :
    1.40 + 	addq	$0, 8, $0	# E :
    1.41 + 	nop			# E :
    1.42 + 	nop			# E :
    1.43 +-	nop			# E :
    1.44 + 
    1.45 + $odd_quad_count:
    1.46 ++	ldq	$1, 0($0)	# L : load quad
    1.47 + 	xor	$17, $1, $2	# E :
    1.48 +-	ldq	$4, 8($0)	# L : prefetch $4
    1.49 ++	ldq	$31, 8($0)	# L : prefetch $4
    1.50 + 	cmpbge	$31, $2, $2	# E :
    1.51 +-	addq	$0, 8, $6	# E :
    1.52 + 
    1.53 ++	addq	$0, 8, $6	# E :
    1.54 + 	bne	$2, $found_it	# U :
    1.55 + 	cmpult	$6, $18, $6	# E :
    1.56 + 	addq	$0, 8, $0	# E :
    1.57 +-	nop			# E :
    1.58 + 
    1.59 + 	bne	$6, $unrolled_loop # U :
    1.60 +-	mov	$4, $1		# E : move prefetched value into $1
    1.61 + 	nop			# E :
    1.62 + 	nop			# E :
    1.63 +-
    1.64 +-$final:	subq	$5, $0, $18	# E : $18 <- number of bytes left to do
    1.65 + 	nop			# E :
    1.66 ++
    1.67 ++$final:	ldq	$1, 0($0)	# L : load last quad
    1.68 ++	subq	$5, $0, $18	# E : $18 <- number of bytes left to do
    1.69 + 	nop			# E :
    1.70 + 	bne	$18, $last_quad	# U :
    1.71 + 
    1.72 +diff -durN glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/memchr.S glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/memchr.S
    1.73 +--- glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/memchr.S	2009-05-16 10:36:20.000000000 +0200
    1.74 ++++ glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/memchr.S	2009-11-13 00:51:15.000000000 +0100
    1.75 +@@ -119,7 +119,7 @@
    1.76 + 
    1.77 + 	# At least one byte left to process.
    1.78 + 
    1.79 +-	ldq	t0, 8(v0)	# e0	:
    1.80 ++	ldq	zero, 8(v0)	# e0	: prefetch next quad
    1.81 + 	subq	t4, 1, a2	# .. e1 :
    1.82 + 	addq	v0, 8, v0	#-e0	:
    1.83 + 
    1.84 +@@ -138,19 +138,19 @@
    1.85 + 
    1.86 + 	# At least three quads remain to be accessed
    1.87 + 
    1.88 +-	mov	t0, t3		# e0	: move prefetched value to correct reg
    1.89 +-
    1.90 + 	.align	4
    1.91 + $unrolled_loop:
    1.92 +-	ldq	t0, 8(v0)	#-e0	: prefetch t0
    1.93 +-	xor	a1, t3, t1	# .. e1 :
    1.94 +-	cmpbge	zero, t1, t1	# e0	:
    1.95 +-	bne	t1, $found_it	# .. e1 :
    1.96 ++	ldq	t0, 0(v0)	# e0	: load quad
    1.97 ++	xor	a1, t0, t1	# .. e1 :
    1.98 ++	ldq	zero, 8(v0)	# e0	: prefetch next quad
    1.99 ++	cmpbge	zero, t1, t1	# .. e1:
   1.100 ++	bne	t1, $found_it	# e0    :
   1.101 + 
   1.102 +-	addq	v0, 8, v0	#-e0	:
   1.103 ++	addq	v0, 8, v0	#    e1	:
   1.104 + $odd_quad_count:
   1.105 ++	ldq	t0, 0(v0)	# e0	: load quad
   1.106 + 	xor	a1, t0, t1	# .. e1 :
   1.107 +-	ldq	t3, 8(v0)	# e0	: prefetch t3
   1.108 ++	ldq	zero, 8(v0)	# e0	: prefetch next quad
   1.109 + 	cmpbge	zero, t1, t1	# .. e1 :
   1.110 + 	addq	v0, 8, t5	#-e0	:
   1.111 + 	bne	t1, $found_it	# .. e1	:
   1.112 +@@ -159,8 +159,8 @@
   1.113 + 	addq	v0, 8, v0	# .. e1 :
   1.114 + 	bne	t5, $unrolled_loop #-e1 :
   1.115 + 
   1.116 +-	mov	t3, t0		# e0	: move prefetched value into t0
   1.117 +-$final:	subq	t4, v0, a2	# .. e1	: a2 <- number of bytes left to do
   1.118 ++$final:	ldq	t0, 0(v0)	# e0	: load last quad
   1.119 ++	subq	t4, v0, a2	# .. e1	: a2 <- number of bytes left to do
   1.120 + 	bne	a2, $last_quad	# e1	:
   1.121 + 
   1.122 + $not_found:
   1.123 +diff -durN glibc-2.10.1.orig/ports/sysdeps/alpha/alphaev6/memchr.S glibc-2.10.1/ports/sysdeps/alpha/alphaev6/memchr.S
   1.124 +diff -durN glibc-2.10.1.orig/ports/sysdeps/alpha/memchr.S glibc-2.10.1/ports/sysdeps/alpha/memchr.S