patches/gcc/3.4.6/140-arm-ldm-peephole.patch
author "Yann E. MORIN" <yann.morin.1998@free.fr>
Tue Jul 31 22:27:29 2012 +0200 (2012-07-31)
changeset 3018 7776e8369284
parent 746 b150d6f590fc
permissions -rw-r--r--
complibs/cloog: create missing m4 dir

Because we now patch configure.in and configure, the Makefile quicks
in a re-build rule as the source files are now more recent than the
bundled generated files, and that fails because the m4 directory
is missing, although on some systems where aclocal is not installed,
the re-build rule does nothing (except a warning).

Always create tht directory.

Reported-by: Per Arnold Blaasmo <per-arnold.blaasmo@atmel.com>
[Also thanks to Thomas De Schampheleire <patrickdepinguin@gmail.com>
for some digging works on this issue]
Signed-off-by: "Yann E. MORIN" <yann.morin.1998@free.fr>
     1 diff -durN gcc-3.4.6.orig/gcc/config/arm/arm.c gcc-3.4.6/gcc/config/arm/arm.c
     2 --- gcc-3.4.6.orig/gcc/config/arm/arm.c	2005-10-01 15:31:38.000000000 +0200
     3 +++ gcc-3.4.6/gcc/config/arm/arm.c	2007-08-15 22:54:59.000000000 +0200
     4 @@ -4857,6 +4857,11 @@
     5        *load_offset = unsorted_offsets[order[0]];
     6      }
     7  
     8 +  /* For XScale a two-word LDM is a performance loss, so only do this if
     9 +     size is more important.  See comments in arm_gen_load_multiple.  */
    10 +  if (nops == 2 && arm_tune_xscale && !optimize_size)
    11 +    return 0;
    12 +
    13    if (unsorted_offsets[order[0]] == 0)
    14      return 1; /* ldmia */
    15  
    16 @@ -5083,6 +5088,11 @@
    17        *load_offset = unsorted_offsets[order[0]];
    18      }
    19  
    20 +  /* For XScale a two-word LDM is a performance loss, so only do this if
    21 +     size is more important.  See comments in arm_gen_load_multiple.  */
    22 +  if (nops == 2 && arm_tune_xscale && !optimize_size)
    23 +    return 0;
    24 +
    25    if (unsorted_offsets[order[0]] == 0)
    26      return 1; /* stmia */
    27  
    28 diff -durN gcc-3.4.6.orig/gcc/config/arm/arm.md gcc-3.4.6/gcc/config/arm/arm.md
    29 --- gcc-3.4.6.orig/gcc/config/arm/arm.md	2005-10-01 15:31:38.000000000 +0200
    30 +++ gcc-3.4.6/gcc/config/arm/arm.md	2007-08-15 22:54:59.000000000 +0200
    31 @@ -8811,13 +8811,16 @@
    32     (set_attr "length" "4,8,8")]
    33  )
    34  
    35 +; Try to convert LDR+LDR+arith into [add+]LDM+arith
    36 +; On XScale, LDM is always slower than two LDRs, so only do this if
    37 +; optimising for size.
    38  (define_insn "*arith_adjacentmem"
    39    [(set (match_operand:SI 0 "s_register_operand" "=r")
    40  	(match_operator:SI 1 "shiftable_operator"
    41  	 [(match_operand:SI 2 "memory_operand" "m")
    42  	  (match_operand:SI 3 "memory_operand" "m")]))
    43     (clobber (match_scratch:SI 4 "=r"))]
    44 -  "TARGET_ARM && adjacent_mem_locations (operands[2], operands[3])"
    45 +  "TARGET_ARM && (!arm_tune_xscale || optimize_size) && adjacent_mem_locations (operands[2], operands[3])"
    46    "*
    47    {
    48      rtx ldm[3];
    49 @@ -8852,6 +8855,8 @@
    50        }
    51     if (val1 && val2)
    52        {
    53 +	/* This would be a loss on a Harvard core, but adjacent_mem_locations()
    54 +	   will prevent it from happening.  */
    55  	rtx ops[3];
    56  	ldm[0] = ops[0] = operands[4];
    57  	ops[1] = XEXP (XEXP (operands[2], 0), 0);
    58 diff -durN gcc-3.4.6.orig/gcc/genpeep.c gcc-3.4.6/gcc/genpeep.c
    59 --- gcc-3.4.6.orig/gcc/genpeep.c	2003-07-05 07:27:22.000000000 +0200
    60 +++ gcc-3.4.6/gcc/genpeep.c	2007-08-15 22:54:59.000000000 +0200
    61 @@ -381,6 +381,7 @@
    62    printf ("#include \"recog.h\"\n");
    63    printf ("#include \"except.h\"\n\n");
    64    printf ("#include \"function.h\"\n\n");
    65 +  printf ("#include \"flags.h\"\n\n");
    66  
    67    printf ("#ifdef HAVE_peephole\n");
    68    printf ("extern rtx peep_operand[];\n\n");