mirror of git://gcc.gnu.org/git/gcc.git
tree-loop-linear.c (try_interchange_loops): Compare memory access strides against cache sizes.
* tree-loop-linear.c (try_interchange_loops): Compare memory access strides against cache sizes. * testsuite/gcc.dg/tree-ssa/ltrans-8.c: Increase the size of strides to make the interchange profitable. From-SVN: r132765
This commit is contained in:
parent
d973ac5a9f
commit
0c96231b2f
|
|
@ -1,3 +1,8 @@
|
||||||
|
2008-02-29 Sebastian Pop <sebastian.pop@amd.com>
|
||||||
|
|
||||||
|
* tree-loop-linear.c (try_interchange_loops): Compare memory access
|
||||||
|
strides against cache sizes.
|
||||||
|
|
||||||
2008-02-29 Kaz Kojima <kkojima@gcc.gnu.org>
|
2008-02-29 Kaz Kojima <kkojima@gcc.gnu.org>
|
||||||
|
|
||||||
* config/sh/sh.c (sh_secondary_reload): Handle loading a float
|
* config/sh/sh.c (sh_secondary_reload): Handle loading a float
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,8 @@
|
||||||
|
2008-02-29 Sebastian Pop <sebastian.pop@amd.com>
|
||||||
|
|
||||||
|
* testsuite/gcc.dg/tree-ssa/ltrans-8.c: Increase the size of strides
|
||||||
|
to make the interchange profitable.
|
||||||
|
|
||||||
2008-02-28 Daniel Franke <franke.daniel@gmail.com>
|
2008-02-28 Daniel Franke <franke.daniel@gmail.com>
|
||||||
|
|
||||||
PR fortran/31463
|
PR fortran/31463
|
||||||
|
|
|
||||||
|
|
@ -4,9 +4,9 @@ double foo(double *a)
|
||||||
{
|
{
|
||||||
int i,j;
|
int i,j;
|
||||||
double r = 0.0;
|
double r = 0.0;
|
||||||
for (i=0; i<8; ++i)
|
for (i=0; i<100; ++i)
|
||||||
for (j=0; j<8; ++j)
|
for (j=0; j<1000; ++j)
|
||||||
r += a[j*8+i];
|
r += a[j*100+i];
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -179,10 +179,14 @@ try_interchange_loops (lambda_trans_matrix trans,
|
||||||
VEC (data_reference_p, heap) *datarefs,
|
VEC (data_reference_p, heap) *datarefs,
|
||||||
struct loop *first_loop)
|
struct loop *first_loop)
|
||||||
{
|
{
|
||||||
|
bool res;
|
||||||
struct loop *loop_i;
|
struct loop *loop_i;
|
||||||
struct loop *loop_j;
|
struct loop *loop_j;
|
||||||
unsigned int dependence_steps_i, dependence_steps_j;
|
unsigned int dependence_steps_i, dependence_steps_j;
|
||||||
double_int access_strides_i, access_strides_j;
|
double_int access_strides_i, access_strides_j;
|
||||||
|
double_int small, large, nb_iter;
|
||||||
|
double_int l1_cache_size, l2_cache_size;
|
||||||
|
int cmp;
|
||||||
unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
|
unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
|
||||||
struct data_dependence_relation *ddr;
|
struct data_dependence_relation *ddr;
|
||||||
|
|
||||||
|
|
@ -194,7 +198,10 @@ try_interchange_loops (lambda_trans_matrix trans,
|
||||||
ddr = VEC_index (ddr_p, dependence_relations, 0);
|
ddr = VEC_index (ddr_p, dependence_relations, 0);
|
||||||
if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
|
if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
|
||||||
return trans;
|
return trans;
|
||||||
|
|
||||||
|
l1_cache_size = uhwi_to_double_int (L1_CACHE_SIZE * 1024);
|
||||||
|
l2_cache_size = uhwi_to_double_int (L2_CACHE_SIZE * 1024);
|
||||||
|
|
||||||
/* LOOP_I is always the outer loop. */
|
/* LOOP_I is always the outer loop. */
|
||||||
for (loop_j = first_loop->inner;
|
for (loop_j = first_loop->inner;
|
||||||
loop_j;
|
loop_j;
|
||||||
|
|
@ -216,18 +223,38 @@ try_interchange_loops (lambda_trans_matrix trans,
|
||||||
|
|
||||||
/* Heuristics for loop interchange profitability:
|
/* Heuristics for loop interchange profitability:
|
||||||
|
|
||||||
|
0. Don't transform if the smallest stride is larger than
|
||||||
|
the L2 cache, or if the largest stride multiplied by the
|
||||||
|
number of iterations is smaller than the L1 cache.
|
||||||
|
|
||||||
1. (spatial locality) Inner loops should have smallest
|
1. (spatial locality) Inner loops should have smallest
|
||||||
dependence steps.
|
dependence steps.
|
||||||
|
|
||||||
2. (spatial locality) Inner loops should contain more
|
2. (spatial locality) Inner loops should contain more
|
||||||
dependence relations not carried by the loop.
|
dependence relations not carried by the loop.
|
||||||
|
|
||||||
3. (temporal locality) Inner loops should have smallest
|
3. (temporal locality) Inner loops should have smallest
|
||||||
array access strides.
|
array access strides.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
cmp = double_int_ucmp (access_strides_i, access_strides_j);
|
||||||
|
small = cmp < 0 ? access_strides_i : access_strides_j;
|
||||||
|
large = cmp < 0 ? access_strides_j : access_strides_i;
|
||||||
|
|
||||||
|
if (double_int_ucmp (small, l2_cache_size) > 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
res = cmp < 0 ?
|
||||||
|
estimated_loop_iterations (loop_j, false, &nb_iter):
|
||||||
|
estimated_loop_iterations (loop_i, false, &nb_iter);
|
||||||
|
large = double_int_mul (large, nb_iter);
|
||||||
|
|
||||||
|
if (res && double_int_ucmp (large, l1_cache_size) < 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
if (dependence_steps_i < dependence_steps_j
|
if (dependence_steps_i < dependence_steps_j
|
||||||
|| nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
|
|| nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
|
||||||
|| double_int_ucmp (access_strides_i, access_strides_j) < 0)
|
|| cmp < 0)
|
||||||
{
|
{
|
||||||
lambda_matrix_row_exchange (LTM_MATRIX (trans),
|
lambda_matrix_row_exchange (LTM_MATRIX (trans),
|
||||||
loop_depth (loop_i) - loop_depth (first_loop),
|
loop_depth (loop_i) - loop_depth (first_loop),
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue