yann@1625: 2009-07-27 Aurelien Jarno yann@1625: yann@1625: * sysdeps/alpha/memchr.S: Use prefetch load. yann@1625: * sysdeps/alpha/alphaev6/memchr.S: Likewise. yann@1625: yann@1625: diff -durN glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S yann@1625: --- glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S 2009-05-16 10:36:20.000000000 +0200 yann@1625: +++ glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S 2009-11-13 00:51:15.000000000 +0100 yann@1625: @@ -127,7 +127,7 @@ yann@1625: cmpbge $31, $1, $2 # E : yann@1625: bne $2, $found_it # U : yann@1625: # At least one byte left to process. yann@1625: - ldq $1, 8($0) # L : yann@1625: + ldq $31, 8($0) # L : yann@1625: subq $5, 1, $18 # E : U L U L yann@1625: yann@1625: addq $0, 8, $0 # E : yann@1625: @@ -143,38 +143,38 @@ yann@1625: and $4, 8, $4 # E : odd number of quads? yann@1625: bne $4, $odd_quad_count # U : yann@1625: # At least three quads remain to be accessed yann@1625: - mov $1, $4 # E : L U L U : move prefetched value to correct reg yann@1625: + nop # E : L U L U : move prefetched value to correct reg yann@1625: yann@1625: .align 4 yann@1625: $unrolled_loop: yann@1625: - ldq $1, 8($0) # L : prefetch $1 yann@1625: - xor $17, $4, $2 # E : yann@1625: - cmpbge $31, $2, $2 # E : yann@1625: - bne $2, $found_it # U : U L U L yann@1625: + ldq $1, 0($0) # L : load quad yann@1625: + xor $17, $1, $2 # E : yann@1625: + ldq $31, 8($0) # L : prefetch next quad yann@1625: + cmpbge $31, $2, $2 # E : U L U L yann@1625: yann@1625: + bne $2, $found_it # U : yann@1625: addq $0, 8, $0 # E : yann@1625: nop # E : yann@1625: nop # E : yann@1625: - nop # E : yann@1625: yann@1625: $odd_quad_count: yann@1625: + ldq $1, 0($0) # L : load quad yann@1625: xor $17, $1, $2 # E : yann@1625: - ldq $4, 8($0) # L : prefetch $4 yann@1625: + ldq $31, 8($0) # L : prefetch $4 yann@1625: cmpbge $31, $2, $2 # E : yann@1625: - addq $0, 8, $6 # E : yann@1625: yann@1625: + addq $0, 8, $6 # E : yann@1625: bne $2, $found_it # U : yann@1625: cmpult $6, $18, $6 # E : yann@1625: addq $0, 8, $0 # E : yann@1625: - nop # E : yann@1625: yann@1625: bne $6, $unrolled_loop # U : yann@1625: - mov $4, $1 # E : move prefetched value into $1 yann@1625: nop # E : yann@1625: nop # E : yann@1625: - yann@1625: -$final: subq $5, $0, $18 # E : $18 <- number of bytes left to do yann@1625: nop # E : yann@1625: + yann@1625: +$final: ldq $1, 0($0) # L : load last quad yann@1625: + subq $5, $0, $18 # E : $18 <- number of bytes left to do yann@1625: nop # E : yann@1625: bne $18, $last_quad # U : yann@1625: yann@1625: diff -durN glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/memchr.S glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/memchr.S yann@1625: --- glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/memchr.S 2009-05-16 10:36:20.000000000 +0200 yann@1625: +++ glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/memchr.S 2009-11-13 00:51:15.000000000 +0100 yann@1625: @@ -119,7 +119,7 @@ yann@1625: yann@1625: # At least one byte left to process. yann@1625: yann@1625: - ldq t0, 8(v0) # e0 : yann@1625: + ldq zero, 8(v0) # e0 : prefetch next quad yann@1625: subq t4, 1, a2 # .. e1 : yann@1625: addq v0, 8, v0 #-e0 : yann@1625: yann@1625: @@ -138,19 +138,19 @@ yann@1625: yann@1625: # At least three quads remain to be accessed yann@1625: yann@1625: - mov t0, t3 # e0 : move prefetched value to correct reg yann@1625: - yann@1625: .align 4 yann@1625: $unrolled_loop: yann@1625: - ldq t0, 8(v0) #-e0 : prefetch t0 yann@1625: - xor a1, t3, t1 # .. e1 : yann@1625: - cmpbge zero, t1, t1 # e0 : yann@1625: - bne t1, $found_it # .. e1 : yann@1625: + ldq t0, 0(v0) # e0 : load quad yann@1625: + xor a1, t0, t1 # .. e1 : yann@1625: + ldq zero, 8(v0) # e0 : prefetch next quad yann@1625: + cmpbge zero, t1, t1 # .. e1: yann@1625: + bne t1, $found_it # e0 : yann@1625: yann@1625: - addq v0, 8, v0 #-e0 : yann@1625: + addq v0, 8, v0 # e1 : yann@1625: $odd_quad_count: yann@1625: + ldq t0, 0(v0) # e0 : load quad yann@1625: xor a1, t0, t1 # .. e1 : yann@1625: - ldq t3, 8(v0) # e0 : prefetch t3 yann@1625: + ldq zero, 8(v0) # e0 : prefetch next quad yann@1625: cmpbge zero, t1, t1 # .. e1 : yann@1625: addq v0, 8, t5 #-e0 : yann@1625: bne t1, $found_it # .. e1 : yann@1625: @@ -159,8 +159,8 @@ yann@1625: addq v0, 8, v0 # .. e1 : yann@1625: bne t5, $unrolled_loop #-e1 : yann@1625: yann@1625: - mov t3, t0 # e0 : move prefetched value into t0 yann@1625: -$final: subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do yann@1625: +$final: ldq t0, 0(v0) # e0 : load last quad yann@1625: + subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do yann@1625: bne a2, $last_quad # e1 : yann@1625: yann@1625: $not_found: yann@1625: diff -durN glibc-2.10.1.orig/ports/sysdeps/alpha/alphaev6/memchr.S glibc-2.10.1/ports/sysdeps/alpha/alphaev6/memchr.S yann@1625: diff -durN glibc-2.10.1.orig/ports/sysdeps/alpha/memchr.S glibc-2.10.1/ports/sysdeps/alpha/memchr.S