mirror of git://gcc.gnu.org/git/gcc.git
2009-08-13 Ghassan Shobaki <ghassan.shobaki@amd.com>
* tree-ssa-loop-prefetch.c (prune_ref_by_group_reuse): Enhance probabilistic analysis for long-stride pruning. (compute_miss_rate): New function to compute the probability that two memory references access different cache lines. From-SVN: r150726
This commit is contained in:
parent
bc21d3152f
commit
2c6dd136d3
|
@ -1,3 +1,11 @@
|
||||||
|
2009-08-13 Ghassan Shobaki <ghassan.shobaki@amd.com>
|
||||||
|
|
||||||
|
* tree-ssa-loop-prefetch.c
|
||||||
|
(prune_ref_by_group_reuse): Enhance probabilistic analysis
|
||||||
|
for long-stride pruning.
|
||||||
|
(compute_miss_rate): New function to compute the probability
|
||||||
|
that two memory references access different cache lines.
|
||||||
|
|
||||||
2009-08-13 Dave Korn <dave.korn.cygwin@gmail.com>
|
2009-08-13 Dave Korn <dave.korn.cygwin@gmail.com>
|
||||||
|
|
||||||
* gcc/config/i386/cygwin.h (LINK_SPEC): Add --enable-auto-image-base.
|
* gcc/config/i386/cygwin.h (LINK_SPEC): Add --enable-auto-image-base.
|
||||||
|
|
|
@ -593,6 +593,45 @@ ddown (HOST_WIDE_INT x, unsigned HOST_WIDE_INT by)
|
||||||
return (x + by - 1) / by;
|
return (x + by - 1) / by;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Given a CACHE_LINE_SIZE and two inductive memory references
|
||||||
|
with a common STEP greater than CACHE_LINE_SIZE and an address
|
||||||
|
difference DELTA, compute the probability that they will fall
|
||||||
|
in different cache lines. DISTINCT_ITERS is the number of
|
||||||
|
distinct iterations after which the pattern repeats itself.
|
||||||
|
ALIGN_UNIT is the unit of alignment in bytes. */
|
||||||
|
|
||||||
|
static int
|
||||||
|
compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size,
|
||||||
|
HOST_WIDE_INT step, HOST_WIDE_INT delta,
|
||||||
|
unsigned HOST_WIDE_INT distinct_iters,
|
||||||
|
int align_unit)
|
||||||
|
{
|
||||||
|
unsigned align, iter;
|
||||||
|
int total_positions, miss_positions, miss_rate;
|
||||||
|
int address1, address2, cache_line1, cache_line2;
|
||||||
|
|
||||||
|
total_positions = 0;
|
||||||
|
miss_positions = 0;
|
||||||
|
|
||||||
|
/* Iterate through all possible alignments of the first
|
||||||
|
memory reference within its cache line. */
|
||||||
|
for (align = 0; align < cache_line_size; align += align_unit)
|
||||||
|
|
||||||
|
/* Iterate through all distinct iterations. */
|
||||||
|
for (iter = 0; iter < distinct_iters; iter++)
|
||||||
|
{
|
||||||
|
address1 = align + step * iter;
|
||||||
|
address2 = address1 + delta;
|
||||||
|
cache_line1 = address1 / cache_line_size;
|
||||||
|
cache_line2 = address2 / cache_line_size;
|
||||||
|
total_positions += 1;
|
||||||
|
if (cache_line1 != cache_line2)
|
||||||
|
miss_positions += 1;
|
||||||
|
}
|
||||||
|
miss_rate = 1000 * miss_positions / total_positions;
|
||||||
|
return miss_rate;
|
||||||
|
}
|
||||||
|
|
||||||
/* Prune the prefetch candidate REF using the reuse with BY.
|
/* Prune the prefetch candidate REF using the reuse with BY.
|
||||||
If BY_IS_BEFORE is true, BY is before REF in the loop. */
|
If BY_IS_BEFORE is true, BY is before REF in the loop. */
|
||||||
|
|
||||||
|
@ -606,6 +645,11 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by,
|
||||||
HOST_WIDE_INT delta = delta_b - delta_r;
|
HOST_WIDE_INT delta = delta_b - delta_r;
|
||||||
HOST_WIDE_INT hit_from;
|
HOST_WIDE_INT hit_from;
|
||||||
unsigned HOST_WIDE_INT prefetch_before, prefetch_block;
|
unsigned HOST_WIDE_INT prefetch_before, prefetch_block;
|
||||||
|
int miss_rate;
|
||||||
|
HOST_WIDE_INT reduced_step;
|
||||||
|
unsigned HOST_WIDE_INT reduced_prefetch_block;
|
||||||
|
tree ref_type;
|
||||||
|
int align_unit;
|
||||||
|
|
||||||
if (delta == 0)
|
if (delta == 0)
|
||||||
{
|
{
|
||||||
|
@ -667,25 +711,29 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* A more complicated case. First let us ensure that size of cache line
|
/* A more complicated case with step > prefetch_block. First reduce
|
||||||
and step are coprime (here we assume that PREFETCH_BLOCK is a power
|
the ratio between the step and the cache line size to its simplest
|
||||||
of two. */
|
terms. The resulting denominator will then represent the number of
|
||||||
|
distinct iterations after which each address will go back to its
|
||||||
|
initial location within the cache line. This computation assumes
|
||||||
|
that PREFETCH_BLOCK is a power of two. */
|
||||||
prefetch_block = PREFETCH_BLOCK;
|
prefetch_block = PREFETCH_BLOCK;
|
||||||
while ((step & 1) == 0
|
reduced_prefetch_block = prefetch_block;
|
||||||
&& prefetch_block > 1)
|
reduced_step = step;
|
||||||
|
while ((reduced_step & 1) == 0
|
||||||
|
&& reduced_prefetch_block > 1)
|
||||||
{
|
{
|
||||||
step >>= 1;
|
reduced_step >>= 1;
|
||||||
prefetch_block >>= 1;
|
reduced_prefetch_block >>= 1;
|
||||||
delta >>= 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now step > prefetch_block, and step and prefetch_block are coprime.
|
|
||||||
Determine the probability that the accesses hit the same cache line. */
|
|
||||||
|
|
||||||
prefetch_before = delta / step;
|
prefetch_before = delta / step;
|
||||||
delta %= step;
|
delta %= step;
|
||||||
if ((unsigned HOST_WIDE_INT) delta
|
ref_type = TREE_TYPE (ref->mem);
|
||||||
<= (prefetch_block * ACCEPTABLE_MISS_RATE / 1000))
|
align_unit = TYPE_ALIGN (ref_type) / 8;
|
||||||
|
miss_rate = compute_miss_rate(prefetch_block, step, delta,
|
||||||
|
reduced_prefetch_block, align_unit);
|
||||||
|
if (miss_rate <= ACCEPTABLE_MISS_RATE)
|
||||||
{
|
{
|
||||||
if (prefetch_before < ref->prefetch_before)
|
if (prefetch_before < ref->prefetch_before)
|
||||||
ref->prefetch_before = prefetch_before;
|
ref->prefetch_before = prefetch_before;
|
||||||
|
@ -696,8 +744,9 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by,
|
||||||
/* Try also the following iteration. */
|
/* Try also the following iteration. */
|
||||||
prefetch_before++;
|
prefetch_before++;
|
||||||
delta = step - delta;
|
delta = step - delta;
|
||||||
if ((unsigned HOST_WIDE_INT) delta
|
miss_rate = compute_miss_rate(prefetch_block, step, delta,
|
||||||
<= (prefetch_block * ACCEPTABLE_MISS_RATE / 1000))
|
reduced_prefetch_block, align_unit);
|
||||||
|
if (miss_rate <= ACCEPTABLE_MISS_RATE)
|
||||||
{
|
{
|
||||||
if (prefetch_before < ref->prefetch_before)
|
if (prefetch_before < ref->prefetch_before)
|
||||||
ref->prefetch_before = prefetch_before;
|
ref->prefetch_before = prefetch_before;
|
||||||
|
|
Loading…
Reference in New Issue