diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d4623313b164..535204d3b677 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2018-05-07 Luis Machado + + gcc/ + * config/aarch64/aarch64-protos.h (cpu_prefetch_tune) + : New const int field. + * config/aarch64/aarch64.c (generic_prefetch_tune): Update to include + minimum_stride field. + (exynosm1_prefetch_tune): Likewise. + (thunderxt88_prefetch_tune): Likewise. + (thunderx_prefetch_tune): Likewise. + (thunderx2t99_prefetch_tune): Likewise. + (qdf24xx_prefetch_tune): Likewise. Set minimum_stride to 2048. + (aarch64_override_options_internal): Update to set + PARAM_PREFETCH_MINIMUM_STRIDE. + * doc/invoke.texi (prefetch-minimum-stride): Document new option. + * params.def (PARAM_PREFETCH_MINIMUM_STRIDE): New. + * params.h (PARAM_PREFETCH_MINIMUM_STRIDE): Define. + * tree-ssa-loop-prefetch.c (should_issue_prefetch_p): Return false if + stride is constant and is below the minimum stride threshold. + 2018-05-06 Jakub Jelinek PR c++/85659 diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index cda2895d28e7..5d3b9d7a06e3 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -230,6 +230,9 @@ struct cpu_prefetch_tune const int l1_cache_size; const int l1_cache_line_size; const int l2_cache_size; + /* The minimum constant stride beyond which we should use prefetch + hints for. */ + const int minimum_stride; const int default_opt_level; }; diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 8aad9f74d7ac..7d0ba35b7be1 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -547,6 +547,7 @@ static const cpu_prefetch_tune generic_prefetch_tune = -1, /* l1_cache_size */ -1, /* l1_cache_line_size */ -1, /* l2_cache_size */ + -1, /* minimum_stride */ -1 /* default_opt_level */ }; @@ -556,6 +557,7 @@ static const cpu_prefetch_tune exynosm1_prefetch_tune = -1, /* l1_cache_size */ 64, /* l1_cache_line_size */ -1, /* l2_cache_size */ + -1, /* minimum_stride */ -1 /* default_opt_level */ }; @@ -565,7 +567,8 @@ static const cpu_prefetch_tune qdf24xx_prefetch_tune = 32, /* l1_cache_size */ 64, /* l1_cache_line_size */ 1024, /* l2_cache_size */ - -1 /* default_opt_level */ + 2048, /* minimum_stride */ + 3 /* default_opt_level */ }; static const cpu_prefetch_tune thunderxt88_prefetch_tune = @@ -574,6 +577,7 @@ static const cpu_prefetch_tune thunderxt88_prefetch_tune = 32, /* l1_cache_size */ 128, /* l1_cache_line_size */ 16*1024, /* l2_cache_size */ + -1, /* minimum_stride */ 3 /* default_opt_level */ }; @@ -583,6 +587,7 @@ static const cpu_prefetch_tune thunderx_prefetch_tune = 32, /* l1_cache_size */ 128, /* l1_cache_line_size */ -1, /* l2_cache_size */ + -1, /* minimum_stride */ -1 /* default_opt_level */ }; @@ -592,6 +597,7 @@ static const cpu_prefetch_tune thunderx2t99_prefetch_tune = 32, /* l1_cache_size */ 64, /* l1_cache_line_size */ 256, /* l2_cache_size */ + -1, /* minimum_stride */ -1 /* default_opt_level */ }; @@ -10596,6 +10602,11 @@ aarch64_override_options_internal (struct gcc_options *opts) aarch64_tune_params.prefetch->l2_cache_size, opts->x_param_values, global_options_set.x_param_values); + if (aarch64_tune_params.prefetch->minimum_stride >= 0) + maybe_set_param_value (PARAM_PREFETCH_MINIMUM_STRIDE, + aarch64_tune_params.prefetch->minimum_stride, + opts->x_param_values, + global_options_set.x_param_values); /* Use the alternative scheduling-pressure algorithm by default. */ maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL, diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 255149fcfb87..7c90abcef05a 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -10733,6 +10733,21 @@ The size of L1 cache, in kilobytes. @item l2-cache-size The size of L2 cache, in kilobytes. +@item prefetch-minimum-stride +Minimum constant stride, in bytes, to start using prefetch hints for. If +the stride is less than this threshold, prefetch hints will not be issued. + +This setting is useful for processors that have hardware prefetchers, in +which case there may be conflicts between the hardware prefetchers and +the software prefetchers. If the hardware prefetchers have a maximum +stride they can handle, it should be used here to improve the use of +software prefetchers. + +A value of -1, the default, means we don't have a threshold and therefore +prefetch hints can be issued for any constant stride. + +This setting is only useful for strides that are known and constant. + @item loop-interchange-max-num-stmts The maximum number of stmts in a loop to be interchanged. diff --git a/gcc/params.def b/gcc/params.def index dad47ec2b000..2166deb6a687 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -790,6 +790,15 @@ DEFPARAM (PARAM_L2_CACHE_SIZE, "The size of L2 cache.", 512, 0, 0) +/* The minimum constant stride beyond which we should use prefetch hints + for. */ + +DEFPARAM (PARAM_PREFETCH_MINIMUM_STRIDE, + "prefetch-minimum-stride", + "The minimum constant stride beyond which we should use prefetch " + "hints for.", + -1, 0, 0) + /* Maximum number of statements in loop nest for loop interchange. */ DEFPARAM (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS, diff --git a/gcc/params.h b/gcc/params.h index 98249d2a1f6f..96012db31ff7 100644 --- a/gcc/params.h +++ b/gcc/params.h @@ -196,6 +196,8 @@ extern void init_param_values (int *params); PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE) #define L2_CACHE_SIZE \ PARAM_VALUE (PARAM_L2_CACHE_SIZE) +#define PREFETCH_MINIMUM_STRIDE \ + PARAM_VALUE (PARAM_PREFETCH_MINIMUM_STRIDE) #define USE_CANONICAL_TYPES \ PARAM_VALUE (PARAM_USE_CANONICAL_TYPES) #define IRA_MAX_LOOPS_NUM \ diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 2f10db185904..53104026e3c3 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -992,6 +992,22 @@ prune_by_reuse (struct mem_ref_group *groups) static bool should_issue_prefetch_p (struct mem_ref *ref) { + /* Some processors may have a hardware prefetcher that may conflict with + prefetch hints for a range of strides. Make sure we don't issue + prefetches for such cases if the stride is within this particular + range. */ + if (cst_and_fits_in_hwi (ref->group->step) + && absu_hwi (int_cst_value (ref->group->step)) < PREFETCH_MINIMUM_STRIDE) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Step for reference %u:%u (%d) is less than the mininum " + "required stride of %d\n", + ref->group->uid, ref->uid, int_cst_value (ref->group->step), + PREFETCH_MINIMUM_STRIDE); + return false; + } + /* For now do not issue prefetches for only first few of the iterations. */ if (ref->prefetch_before != PREFETCH_ALL)