1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/patches/gcc/4.3.2/360-fix-expensive-optimize.patch	Sat Jan 31 17:49:27 2009 +0000
     1.3 @@ -0,0 +1,207 @@
     1.4 +PR tree-optimization/32044
     1.5 +
     1.6 +From: rakdver
     1.7 +Date: 2008-12-12 21:32:47 +0100
     1.8 +
     1.9 +* tree-scalar-evolution.h (expression_expensive_p): Declare.
    1.10 +* tree-scalar-evolution.c (expression_expensive_p): New function.
    1.11 +(scev_const_prop): Avoid introducing expensive expressions.
    1.12 +* tree-ssa-loop-ivopts.c (may_eliminate_iv): Ditto.
    1.13 +
    1.14 +* gcc.dg/pr34027-1.c: Change outcome.
    1.15 +* gcc.dg/tree-ssa/pr32044.c: New test.
    1.16 +
    1.17 +cherry picked from svn://gcc.gnu.org/svn/gcc/trunk, rev 142719 and adapted to
    1.18 +apply on gcc 4.3.2
    1.19 +
    1.20 +------------------------------------------------------------------------
    1.21 +Index: gcc-4.3.2/gcc/tree-scalar-evolution.c
    1.22 +===================================================================
    1.23 +--- gcc-4.3.2.orig/gcc/tree-scalar-evolution.c	2009-01-28 10:14:37.000000000 +0100
    1.24 ++++ gcc-4.3.2/gcc/tree-scalar-evolution.c	2009-01-28 10:17:50.000000000 +0100
    1.25 +@@ -2716,6 +2716,50 @@
    1.26 +   scalar_evolution_info = NULL;
    1.27 + }
    1.28 + 
    1.29 ++/* Returns true if the expression EXPR is considered to be too expensive
    1.30 ++   for scev_const_prop.  */
    1.31 ++
    1.32 ++bool
    1.33 ++expression_expensive_p (tree expr)
    1.34 ++{
    1.35 ++  enum tree_code code;
    1.36 ++
    1.37 ++  if (is_gimple_val (expr))
    1.38 ++    return false;
    1.39 ++
    1.40 ++  code = TREE_CODE (expr);
    1.41 ++  if (code == TRUNC_DIV_EXPR
    1.42 ++      || code == CEIL_DIV_EXPR
    1.43 ++      || code == FLOOR_DIV_EXPR
    1.44 ++      || code == ROUND_DIV_EXPR
    1.45 ++      || code == TRUNC_MOD_EXPR
    1.46 ++      || code == CEIL_MOD_EXPR
    1.47 ++      || code == FLOOR_MOD_EXPR
    1.48 ++      || code == ROUND_MOD_EXPR
    1.49 ++      || code == EXACT_DIV_EXPR)
    1.50 ++    {
    1.51 ++      /* Division by power of two is usually cheap, so we allow it.
    1.52 ++	 Forbid anything else.  */
    1.53 ++      if (!integer_pow2p (TREE_OPERAND (expr, 1)))
    1.54 ++	return true;
    1.55 ++    }
    1.56 ++
    1.57 ++  switch (TREE_CODE_CLASS (code))
    1.58 ++    {
    1.59 ++    case tcc_binary:
    1.60 ++    case tcc_comparison:
    1.61 ++      if (expression_expensive_p (TREE_OPERAND (expr, 1)))
    1.62 ++	return true;
    1.63 ++
    1.64 ++      /* Fallthru.  */
    1.65 ++    case tcc_unary:
    1.66 ++      return expression_expensive_p (TREE_OPERAND (expr, 0));
    1.67 ++
    1.68 ++    default:
    1.69 ++      return true;
    1.70 ++    }
    1.71 ++}
    1.72 ++
    1.73 + /* Replace ssa names for that scev can prove they are constant by the
    1.74 +    appropriate constants.  Also perform final value replacement in loops,
    1.75 +    in case the replacement expressions are cheap.
    1.76 +@@ -2802,12 +2846,6 @@
    1.77 + 	continue;
    1.78 + 
    1.79 +       niter = number_of_latch_executions (loop);
    1.80 +-      /* We used to check here whether the computation of NITER is expensive,
    1.81 +-	 and avoided final value elimination if that is the case.  The problem
    1.82 +-	 is that it is hard to evaluate whether the expression is too
    1.83 +-	 expensive, as we do not know what optimization opportunities the
    1.84 +-	 the elimination of the final value may reveal.  Therefore, we now
    1.85 +-	 eliminate the final values of induction variables unconditionally.  */
    1.86 +       if (niter == chrec_dont_know)
    1.87 + 	continue;
    1.88 + 
    1.89 +@@ -2838,7 +2876,15 @@
    1.90 + 	      /* Moving the computation from the loop may prolong life range
    1.91 + 		 of some ssa names, which may cause problems if they appear
    1.92 + 		 on abnormal edges.  */
    1.93 +-	      || contains_abnormal_ssa_name_p (def))
    1.94 ++	      || contains_abnormal_ssa_name_p (def)
    1.95 ++	      /* Do not emit expensive expressions.  The rationale is that
    1.96 ++		 when someone writes a code like
    1.97 ++
    1.98 ++		 while (n > 45) n -= 45;
    1.99 ++
   1.100 ++		 he probably knows that n is not large, and does not want it
   1.101 ++		 to be turned into n %= 45.  */
   1.102 ++	      || expression_expensive_p (def))
   1.103 + 	    continue;
   1.104 + 
   1.105 + 	  /* Eliminate the PHI node and replace it by a computation outside
   1.106 +Index: gcc-4.3.2/gcc/tree-scalar-evolution.h
   1.107 +===================================================================
   1.108 +--- gcc-4.3.2.orig/gcc/tree-scalar-evolution.h	2009-01-28 10:22:47.000000000 +0100
   1.109 ++++ gcc-4.3.2/gcc/tree-scalar-evolution.h	2009-01-28 10:23:10.000000000 +0100
   1.110 +@@ -35,6 +35,7 @@
   1.111 + extern void scev_analysis (void);
   1.112 + unsigned int scev_const_prop (void);
   1.113 + 
   1.114 ++bool expression_expensive_p (tree);
   1.115 + extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool);
   1.116 + 
   1.117 + /* Returns the loop of the polynomial chrec CHREC.  */
   1.118 +Index: gcc-4.3.2/gcc/testsuite/gcc.dg/pr34027-1.c
   1.119 +===================================================================
   1.120 +--- gcc-4.3.2.orig/gcc/testsuite/gcc.dg/pr34027-1.c	2009-01-28 10:24:09.000000000 +0100
   1.121 ++++ gcc-4.3.2/gcc/testsuite/gcc.dg/pr34027-1.c	2009-01-28 10:24:43.000000000 +0100
   1.122 +@@ -8,5 +8,9 @@
   1.123 +   return ns;
   1.124 + }
   1.125 + 
   1.126 +-/* { dg-final { scan-tree-dump "ns % 10000" "optimized" } } */
   1.127 ++/* This test was originally introduced to test that we transform
   1.128 ++   to ns % 10000.  See the discussion of PR 32044 why we do not do
   1.129 ++   that anymore.  */
   1.130 ++/* { dg-final { scan-tree-dump-times "%" 0 "optimized" } } */
   1.131 ++/* { dg-final { scan-tree-dump-times "/" 0 "optimized" } } */
   1.132 + /* { dg-final { cleanup-tree-dump "optimized" } } */
   1.133 +Index: gcc-4.3.2/gcc/testsuite/gcc.dg/tree-ssa/pr32044.c
   1.134 +===================================================================
   1.135 +--- /dev/null	1970-01-01 00:00:00.000000000 +0000
   1.136 ++++ gcc-4.3.2/gcc/testsuite/gcc.dg/tree-ssa/pr32044.c	2009-01-28 10:25:50.000000000 +0100
   1.137 +@@ -0,0 +1,55 @@
   1.138 ++/* { dg-do compile } */
   1.139 ++/* { dg-options "-O2 -fdump-tree-empty -fdump-tree-final_cleanup" } */
   1.140 ++
   1.141 ++int foo (int n)
   1.142 ++{
   1.143 ++  while (n >= 45)
   1.144 ++    n -= 45;
   1.145 ++
   1.146 ++  return n;
   1.147 ++}
   1.148 ++
   1.149 ++int bar (int n)
   1.150 ++{
   1.151 ++  while (n >= 64)
   1.152 ++    n -= 64;
   1.153 ++
   1.154 ++  return n;
   1.155 ++}
   1.156 ++
   1.157 ++int bla (int n)
   1.158 ++{
   1.159 ++  int i = 0;
   1.160 ++
   1.161 ++  while (n >= 45)
   1.162 ++    {
   1.163 ++      i++;
   1.164 ++      n -= 45;
   1.165 ++    }
   1.166 ++
   1.167 ++  return i;
   1.168 ++}
   1.169 ++
   1.170 ++int baz (int n)
   1.171 ++{
   1.172 ++  int i = 0;
   1.173 ++
   1.174 ++  while (n >= 64)
   1.175 ++    {
   1.176 ++      i++;
   1.177 ++      n -= 64;
   1.178 ++    }
   1.179 ++
   1.180 ++  return i;
   1.181 ++}
   1.182 ++
   1.183 ++/* The loops computing division/modulo by 64 should be eliminated.  */
   1.184 ++/* { dg-final { scan-tree-dump-times "Removing empty loop" 2 "empty" } } */
   1.185 ++
   1.186 ++/* There should be no division/modulo in the final dump (division and modulo
   1.187 ++   by 64 are done using bit operations).  */
   1.188 ++/* { dg-final { scan-tree-dump-times "/" 0 "final_cleanup" } } */
   1.189 ++/* { dg-final { scan-tree-dump-times "%" 0 "final_cleanup" } } */
   1.190 ++
   1.191 ++/* { dg-final { cleanup-tree-dump "empty" } } */
   1.192 ++/* { dg-final { cleanup-tree-dump "final_cleanup" } } */
   1.193 +Index: gcc-4.3.2/gcc/tree-ssa-loop-ivopts.c
   1.194 +===================================================================
   1.195 +--- gcc-4.3.2.orig/gcc/tree-ssa-loop-ivopts.c	2009-01-28 10:26:04.000000000 +0100
   1.196 ++++ gcc-4.3.2/gcc/tree-ssa-loop-ivopts.c	2009-01-28 10:27:09.000000000 +0100
   1.197 +@@ -3778,7 +3778,12 @@
   1.198 +     return false;
   1.199 + 
   1.200 +   cand_value_at (loop, cand, use->stmt, nit, &bnd);
   1.201 ++
   1.202 +   *bound = aff_combination_to_tree (&bnd);
   1.203 ++  /* It is unlikely that computing the number of iterations using division
   1.204 ++     would be more profitable than keeping the original induction variable.  */
   1.205 ++  if (expression_expensive_p (*bound))
   1.206 ++    return false;
   1.207 +   return true;
   1.208 + }
   1.209 + 
   1.210 +