patches/gcc/3.4.6/140-arm-ldm-peephole.patch
author "Yann E. MORIN" <yann.morin.1998@anciens.enib.fr>
Sun Jul 17 22:43:07 2011 +0200 (2011-07-17)
changeset 2893 a8a65758664f
parent 746 b150d6f590fc
permissions -rw-r--r--
cc/gcc: do not use the core pass-2 to build the baremetal compiler

In case we build a baremetal compiler, use the standard passes:
- core_cc is used to build the C library;
- as such, it is meant to run on build, not host;
- the final compiler is meant to run on host;

As the current final compiler step can not build a baremetal compiler,
call the core backend from the final step.

NB: Currently, newlib is built during the start_files pass, so we have
to have a core compiler by then... Once we can build the baremetal
compiler from the final cc step, then we can move the newlib build to
the proper step, and then get rid of the core pass-1 static compiler...

Signed-off-by: "Yann E. MORIN" <yann.morin.1998@anciens.enib.fr>
     1 diff -durN gcc-3.4.6.orig/gcc/config/arm/arm.c gcc-3.4.6/gcc/config/arm/arm.c
     2 --- gcc-3.4.6.orig/gcc/config/arm/arm.c	2005-10-01 15:31:38.000000000 +0200
     3 +++ gcc-3.4.6/gcc/config/arm/arm.c	2007-08-15 22:54:59.000000000 +0200
     4 @@ -4857,6 +4857,11 @@
     5        *load_offset = unsorted_offsets[order[0]];
     6      }
     7  
     8 +  /* For XScale a two-word LDM is a performance loss, so only do this if
     9 +     size is more important.  See comments in arm_gen_load_multiple.  */
    10 +  if (nops == 2 && arm_tune_xscale && !optimize_size)
    11 +    return 0;
    12 +
    13    if (unsorted_offsets[order[0]] == 0)
    14      return 1; /* ldmia */
    15  
    16 @@ -5083,6 +5088,11 @@
    17        *load_offset = unsorted_offsets[order[0]];
    18      }
    19  
    20 +  /* For XScale a two-word LDM is a performance loss, so only do this if
    21 +     size is more important.  See comments in arm_gen_load_multiple.  */
    22 +  if (nops == 2 && arm_tune_xscale && !optimize_size)
    23 +    return 0;
    24 +
    25    if (unsorted_offsets[order[0]] == 0)
    26      return 1; /* stmia */
    27  
    28 diff -durN gcc-3.4.6.orig/gcc/config/arm/arm.md gcc-3.4.6/gcc/config/arm/arm.md
    29 --- gcc-3.4.6.orig/gcc/config/arm/arm.md	2005-10-01 15:31:38.000000000 +0200
    30 +++ gcc-3.4.6/gcc/config/arm/arm.md	2007-08-15 22:54:59.000000000 +0200
    31 @@ -8811,13 +8811,16 @@
    32     (set_attr "length" "4,8,8")]
    33  )
    34  
    35 +; Try to convert LDR+LDR+arith into [add+]LDM+arith
    36 +; On XScale, LDM is always slower than two LDRs, so only do this if
    37 +; optimising for size.
    38  (define_insn "*arith_adjacentmem"
    39    [(set (match_operand:SI 0 "s_register_operand" "=r")
    40  	(match_operator:SI 1 "shiftable_operator"
    41  	 [(match_operand:SI 2 "memory_operand" "m")
    42  	  (match_operand:SI 3 "memory_operand" "m")]))
    43     (clobber (match_scratch:SI 4 "=r"))]
    44 -  "TARGET_ARM && adjacent_mem_locations (operands[2], operands[3])"
    45 +  "TARGET_ARM && (!arm_tune_xscale || optimize_size) && adjacent_mem_locations (operands[2], operands[3])"
    46    "*
    47    {
    48      rtx ldm[3];
    49 @@ -8852,6 +8855,8 @@
    50        }
    51     if (val1 && val2)
    52        {
    53 +	/* This would be a loss on a Harvard core, but adjacent_mem_locations()
    54 +	   will prevent it from happening.  */
    55  	rtx ops[3];
    56  	ldm[0] = ops[0] = operands[4];
    57  	ops[1] = XEXP (XEXP (operands[2], 0), 0);
    58 diff -durN gcc-3.4.6.orig/gcc/genpeep.c gcc-3.4.6/gcc/genpeep.c
    59 --- gcc-3.4.6.orig/gcc/genpeep.c	2003-07-05 07:27:22.000000000 +0200
    60 +++ gcc-3.4.6/gcc/genpeep.c	2007-08-15 22:54:59.000000000 +0200
    61 @@ -381,6 +381,7 @@
    62    printf ("#include \"recog.h\"\n");
    63    printf ("#include \"except.h\"\n\n");
    64    printf ("#include \"function.h\"\n\n");
    65 +  printf ("#include \"flags.h\"\n\n");
    66  
    67    printf ("#ifdef HAVE_peephole\n");
    68    printf ("extern rtx peep_operand[];\n\n");