patches/gcc/3.4.6/140-arm-ldm-peephole.patch
author "Yann E. MORIN" <yann.morin.1998@anciens.enib.fr>
Mon Mar 28 01:05:18 2011 +0200 (2011-03-28)
changeset 2362 0888065f8c4d
parent 746 b150d6f590fc
permissions -rw-r--r--
cc/gcc: cleanup the _or_later logic

So far, we've had a version always select appropriate _or_later option,
which in turn would select all previous _or_later options.

Because the dependencies on companion libs were cumulative, that was
working OK. But the upcoming 4.6 will no longer depend on libelf, so
we can't keep the cumulative scheme we've been using so far.

Have each release family select the corresponding dependencies, instead
of relying on selecting previous _or_later.

Signed-off-by: "Yann E. MORIN" <yann.morin.1998@anciens.enib.fr>
yann@339
     1
diff -durN gcc-3.4.6.orig/gcc/config/arm/arm.c gcc-3.4.6/gcc/config/arm/arm.c
yann@339
     2
--- gcc-3.4.6.orig/gcc/config/arm/arm.c	2005-10-01 15:31:38.000000000 +0200
yann@339
     3
+++ gcc-3.4.6/gcc/config/arm/arm.c	2007-08-15 22:54:59.000000000 +0200
yann@339
     4
@@ -4857,6 +4857,11 @@
yann@339
     5
       *load_offset = unsorted_offsets[order[0]];
yann@339
     6
     }
yann@339
     7
 
yann@339
     8
+  /* For XScale a two-word LDM is a performance loss, so only do this if
yann@339
     9
+     size is more important.  See comments in arm_gen_load_multiple.  */
yann@339
    10
+  if (nops == 2 && arm_tune_xscale && !optimize_size)
yann@339
    11
+    return 0;
yann@339
    12
+
yann@339
    13
   if (unsorted_offsets[order[0]] == 0)
yann@339
    14
     return 1; /* ldmia */
yann@339
    15
 
yann@339
    16
@@ -5083,6 +5088,11 @@
yann@339
    17
       *load_offset = unsorted_offsets[order[0]];
yann@339
    18
     }
yann@339
    19
 
yann@339
    20
+  /* For XScale a two-word LDM is a performance loss, so only do this if
yann@339
    21
+     size is more important.  See comments in arm_gen_load_multiple.  */
yann@339
    22
+  if (nops == 2 && arm_tune_xscale && !optimize_size)
yann@339
    23
+    return 0;
yann@339
    24
+
yann@339
    25
   if (unsorted_offsets[order[0]] == 0)
yann@339
    26
     return 1; /* stmia */
yann@339
    27
 
yann@339
    28
diff -durN gcc-3.4.6.orig/gcc/config/arm/arm.md gcc-3.4.6/gcc/config/arm/arm.md
yann@339
    29
--- gcc-3.4.6.orig/gcc/config/arm/arm.md	2005-10-01 15:31:38.000000000 +0200
yann@339
    30
+++ gcc-3.4.6/gcc/config/arm/arm.md	2007-08-15 22:54:59.000000000 +0200
yann@339
    31
@@ -8811,13 +8811,16 @@
yann@339
    32
    (set_attr "length" "4,8,8")]
yann@339
    33
 )
yann@339
    34
 
yann@339
    35
+; Try to convert LDR+LDR+arith into [add+]LDM+arith
yann@339
    36
+; On XScale, LDM is always slower than two LDRs, so only do this if
yann@339
    37
+; optimising for size.
yann@339
    38
 (define_insn "*arith_adjacentmem"
yann@339
    39
   [(set (match_operand:SI 0 "s_register_operand" "=r")
yann@339
    40
 	(match_operator:SI 1 "shiftable_operator"
yann@339
    41
 	 [(match_operand:SI 2 "memory_operand" "m")
yann@339
    42
 	  (match_operand:SI 3 "memory_operand" "m")]))
yann@339
    43
    (clobber (match_scratch:SI 4 "=r"))]
yann@339
    44
-  "TARGET_ARM && adjacent_mem_locations (operands[2], operands[3])"
yann@339
    45
+  "TARGET_ARM && (!arm_tune_xscale || optimize_size) && adjacent_mem_locations (operands[2], operands[3])"
yann@339
    46
   "*
yann@339
    47
   {
yann@339
    48
     rtx ldm[3];
yann@339
    49
@@ -8852,6 +8855,8 @@
yann@339
    50
       }
yann@339
    51
    if (val1 && val2)
yann@339
    52
       {
yann@339
    53
+	/* This would be a loss on a Harvard core, but adjacent_mem_locations()
yann@339
    54
+	   will prevent it from happening.  */
yann@339
    55
 	rtx ops[3];
yann@339
    56
 	ldm[0] = ops[0] = operands[4];
yann@339
    57
 	ops[1] = XEXP (XEXP (operands[2], 0), 0);
yann@339
    58
diff -durN gcc-3.4.6.orig/gcc/genpeep.c gcc-3.4.6/gcc/genpeep.c
yann@339
    59
--- gcc-3.4.6.orig/gcc/genpeep.c	2003-07-05 07:27:22.000000000 +0200
yann@339
    60
+++ gcc-3.4.6/gcc/genpeep.c	2007-08-15 22:54:59.000000000 +0200
yann@339
    61
@@ -381,6 +381,7 @@
yann@339
    62
   printf ("#include \"recog.h\"\n");
yann@339
    63
   printf ("#include \"except.h\"\n\n");
yann@339
    64
   printf ("#include \"function.h\"\n\n");
yann@339
    65
+  printf ("#include \"flags.h\"\n\n");
yann@339
    66
 
yann@339
    67
   printf ("#ifdef HAVE_peephole\n");
yann@339
    68
   printf ("extern rtx peep_operand[];\n\n");