Original patch from gentoo: gentoo/src/patchsets/gcc/4.3.0/gentoo/20_all_gcc-x86-emit-cld.patch bandaid for the cld issue. to be dropped when gcc-4.3 goes stable. http://gcc.gnu.org/ml/gcc-patches/2008-03/msg00417.html 2008-03-06 Uros Bizjak * config/i386/i386.h (TARGET_CLD): New define. (struct machine_function): Add needs_cld field. (ix86_current_function_needs_cld): New define. * config/i386/i386.md (UNSPEC_CLD): New unspec volatile constant. ("cld"): New isns pattern. ("strmov_singleop"): Set ix86_current_function_needs_cld flag. ("rep_mov"): Ditto. ("strset_singleop"): Ditto. ("rep_stos"): Ditto. ("cmpstrnqi_nz_1"): Ditto. ("cmpstrnqi_1"): Ditto. ("strlenqi_1"): Ditto. * config/i386/i386.c (ix86_expand_prologue): Emit cld insn for TARGET_CLD when ix86_current_function_needs_cld is set. diff -durN gcc-4.3.0.orig/gcc/config/i386/i386.c gcc-4.3.0/gcc/config/i386/i386.c --- gcc-4.3.0.orig/gcc/config/i386/i386.c 2008-02-21 13:30:00.000000000 +0100 +++ gcc-4.3.0/gcc/config/i386/i386.c 2008-06-10 14:44:37.000000000 +0200 @@ -6448,6 +6448,10 @@ emit_insn (gen_prologue_use (pic_offset_table_rtx)); emit_insn (gen_blockage ()); } + + /* Emit cld instruction if stringops are used in the function. */ + if (TARGET_CLD && ix86_current_function_needs_cld) + emit_insn (gen_cld ()); } /* Emit code to restore saved registers using MOV insns. First register diff -durN gcc-4.3.0.orig/gcc/config/i386/i386.h gcc-4.3.0/gcc/config/i386/i386.h --- gcc-4.3.0.orig/gcc/config/i386/i386.h 2008-02-15 09:12:02.000000000 +0100 +++ gcc-4.3.0/gcc/config/i386/i386.h 2008-06-10 14:44:37.000000000 +0200 @@ -388,6 +388,7 @@ extern int x86_prefetch_sse; +#define TARGET_CLD 1 #define TARGET_ABM x86_abm #define TARGET_CMPXCHG16B x86_cmpxchg16b #define TARGET_POPCNT x86_popcnt @@ -2443,8 +2444,9 @@ int save_varrargs_registers; int accesses_prev_frame; int optimize_mode_switching[MAX_386_ENTITIES]; - /* Set by ix86_compute_frame_layout and used by prologue/epilogue expander to - determine the style used. */ + int needs_cld; + /* Set by ix86_compute_frame_layout and used by prologue/epilogue + expander to determine the style used. */ int use_fast_prologue_epilogue; /* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE has been computed for. */ @@ -2464,6 +2466,7 @@ #define ix86_stack_locals (cfun->machine->stack_locals) #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers) #define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching) +#define ix86_current_function_needs_cld (cfun->machine->needs_cld) #define ix86_tls_descriptor_calls_expanded_in_cfun \ (cfun->machine->tls_descriptor_call_expanded_p) /* Since tls_descriptor_call_expanded is not cleared, even if all TLS diff -durN gcc-4.3.0.orig/gcc/config/i386/i386.md gcc-4.3.0/gcc/config/i386/i386.md --- gcc-4.3.0.orig/gcc/config/i386/i386.md 2008-02-06 12:34:00.000000000 +0100 +++ gcc-4.3.0/gcc/config/i386/i386.md 2008-06-10 14:44:37.000000000 +0200 @@ -205,6 +205,7 @@ (UNSPECV_XCHG 12) (UNSPECV_LOCK 13) (UNSPECV_PROLOGUE_USE 14) + (UNSPECV_CLD 15) ]) ;; Constants to represent pcomtrue/pcomfalse variants @@ -18529,6 +18530,14 @@ ;; Block operation instructions +(define_insn "cld" + [(unspec_volatile [(const_int 0)] UNSPECV_CLD)] + "" + "cld" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + (define_expand "movmemsi" [(use (match_operand:BLK 0 "memory_operand" "")) (use (match_operand:BLK 1 "memory_operand" "")) @@ -18601,7 +18610,7 @@ (set (match_operand 2 "register_operand" "") (match_operand 5 "" ""))])] "TARGET_SINGLE_STRINGOP || optimize_size" - "") + "ix86_current_function_needs_cld = 1;") (define_insn "*strmovdi_rex_1" [(set (mem:DI (match_operand:DI 2 "register_operand" "0")) @@ -18718,7 +18727,7 @@ (match_operand 3 "memory_operand" "")) (use (match_dup 4))])] "" - "") + "ix86_current_function_needs_cld = 1;") (define_insn "*rep_movdi_rex64" [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) @@ -18878,7 +18887,7 @@ (set (match_operand 0 "register_operand" "") (match_operand 3 "" ""))])] "TARGET_SINGLE_STRINGOP || optimize_size" - "") + "ix86_current_function_needs_cld = 1;") (define_insn "*strsetdi_rex_1" [(set (mem:DI (match_operand:DI 1 "register_operand" "0")) @@ -18972,7 +18981,7 @@ (use (match_operand 3 "register_operand" "")) (use (match_dup 1))])] "" - "") + "ix86_current_function_needs_cld = 1;") (define_insn "*rep_stosdi_rex64" [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) @@ -19148,7 +19157,7 @@ (clobber (match_operand 1 "register_operand" "")) (clobber (match_dup 2))])] "" - "") + "ix86_current_function_needs_cld = 1;") (define_insn "*cmpstrnqi_nz_1" [(set (reg:CC FLAGS_REG) @@ -19195,7 +19204,7 @@ (clobber (match_operand 1 "register_operand" "")) (clobber (match_dup 2))])] "" - "") + "ix86_current_function_needs_cld = 1;") (define_insn "*cmpstrnqi_1" [(set (reg:CC FLAGS_REG) @@ -19264,7 +19273,7 @@ (clobber (match_operand 1 "register_operand" "")) (clobber (reg:CC FLAGS_REG))])] "" - "") + "ix86_current_function_needs_cld = 1;") (define_insn "*strlenqi_1" [(set (match_operand:SI 0 "register_operand" "=&c")