mirror of git://gcc.gnu.org/git/gcc.git
[og9] Make OpenACC function-parameter explosion optional
* configure.ac (amdgcn*-*-*): Add target-libffi to noconfigdirs for AMD GCN. * configure: Regenerated. gcc/ * builtin-types.def (BT_FN_VOID_INT_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR): Remove. * config/i386/i386.c (ix86_goacc_explode_args): New. (TARGET_GOACC_EXPLODE_ARGS): Define, using above function. * doc/tm.texi: Regenerated. * doc/tm.texi.in: Add TARGET_GOACC_EXPLODE_ARGS hook. * fortran/types.def (BT_FN_VOID_INT_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR): Remove. * omp-builtins.def (GOACC_parallel_keyed_v2): Remove. * omp-expand.c (expand_omp_target): Use explode_args target hook. Use GOMP_LAUNCH_ARGS_EXPLODED launch tag. * omp-low.c (build_receiver_ref, build_sender_ref, create_omp_child_function, scan_omp_target, lower_omp_target): Use explode_args target hook. * target.def (explode_args): New target hook. * tree-ssa-structalias.c (target.h): Include. (find_func_aliases_for_builtin_call): Conditionalise disabling of pass for OpenACC parallel regions based on explode_args target hook. Remove 'params' from BUILT_IN_GOACC_PARALLEL arguments. (find_func_clobbers): Likewise. (ipa_pta_execute): Update for removed 'params' argument. include/ * gomp-constants.h (GOMP_LAUNCH_ARGS_EXPLODED): Define. libgomp/ * libgomp.map (GOMP_2.0.GOMP_4_BRANCH): Remove GOACC_parallel_keyed_v2. * libgomp_g.h (GOACC_parallel_keyed_v2): Remove prototype. * oacc-parallel.c (GOACC_parallel_keyed_internal): Rename to... (GOACC_parallel_keyed): ...this. Handle GOMP_LAUNCH_ARGS_EXPLODED launch tag. Remove previous wrapper functions. (GOACC_parallel_keyed_v2): Remove.
This commit is contained in:
parent
dfe3cbfb88
commit
d1a4d60a3b
|
|
@ -1,3 +1,9 @@
|
||||||
|
2019-07-31 Julian Brown <julian@codesourcery.com>
|
||||||
|
|
||||||
|
* configure.ac (amdgcn*-*-*): Add target-libffi to noconfigdirs for AMD
|
||||||
|
GCN.
|
||||||
|
* configure: Regenerated.
|
||||||
|
|
||||||
2018-12-20 Maciej W. Rozycki <macro@codesourcery.com>
|
2018-12-20 Maciej W. Rozycki <macro@codesourcery.com>
|
||||||
|
|
||||||
* Makefile.def (lang_env_dependencies): Disable `cxx' dependency
|
* Makefile.def (lang_env_dependencies): Disable `cxx' dependency
|
||||||
|
|
|
||||||
|
|
@ -3466,6 +3466,9 @@ case "${target}" in
|
||||||
alpha*-*-*vms*)
|
alpha*-*-*vms*)
|
||||||
noconfigdirs="$noconfigdirs target-libffi"
|
noconfigdirs="$noconfigdirs target-libffi"
|
||||||
;;
|
;;
|
||||||
|
amdgcn*-*-*)
|
||||||
|
noconfigdirs="$noconfigdirs target-libffi"
|
||||||
|
;;
|
||||||
arm*-*-freebsd*)
|
arm*-*-freebsd*)
|
||||||
noconfigdirs="$noconfigdirs target-libffi"
|
noconfigdirs="$noconfigdirs target-libffi"
|
||||||
;;
|
;;
|
||||||
|
|
|
||||||
|
|
@ -748,6 +748,9 @@ case "${target}" in
|
||||||
alpha*-*-*vms*)
|
alpha*-*-*vms*)
|
||||||
noconfigdirs="$noconfigdirs target-libffi"
|
noconfigdirs="$noconfigdirs target-libffi"
|
||||||
;;
|
;;
|
||||||
|
amdgcn*-*-*)
|
||||||
|
noconfigdirs="$noconfigdirs target-libffi"
|
||||||
|
;;
|
||||||
arm*-*-freebsd*)
|
arm*-*-freebsd*)
|
||||||
noconfigdirs="$noconfigdirs target-libffi"
|
noconfigdirs="$noconfigdirs target-libffi"
|
||||||
;;
|
;;
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,27 @@
|
||||||
|
2019-07-31 Julian Brown <julian@codesourcery.com>
|
||||||
|
|
||||||
|
* builtin-types.def (BT_FN_VOID_INT_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR):
|
||||||
|
Remove.
|
||||||
|
* config/i386/i386.c (ix86_goacc_explode_args): New.
|
||||||
|
(TARGET_GOACC_EXPLODE_ARGS): Define, using above function.
|
||||||
|
* doc/tm.texi: Regenerated.
|
||||||
|
* doc/tm.texi.in: Add TARGET_GOACC_EXPLODE_ARGS hook.
|
||||||
|
* fortran/types.def (BT_FN_VOID_INT_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR):
|
||||||
|
Remove.
|
||||||
|
* omp-builtins.def (GOACC_parallel_keyed_v2): Remove.
|
||||||
|
* omp-expand.c (expand_omp_target): Use explode_args target hook.
|
||||||
|
Use GOMP_LAUNCH_ARGS_EXPLODED launch tag.
|
||||||
|
* omp-low.c (build_receiver_ref, build_sender_ref,
|
||||||
|
create_omp_child_function, scan_omp_target, lower_omp_target): Use
|
||||||
|
explode_args target hook.
|
||||||
|
* target.def (explode_args): New target hook.
|
||||||
|
* tree-ssa-structalias.c (target.h): Include.
|
||||||
|
(find_func_aliases_for_builtin_call): Conditionalise disabling of pass
|
||||||
|
for OpenACC parallel regions based on explode_args target hook. Remove
|
||||||
|
'params' from BUILT_IN_GOACC_PARALLEL arguments.
|
||||||
|
(find_func_clobbers): Likewise.
|
||||||
|
(ipa_pta_execute): Update for removed 'params' argument.
|
||||||
|
|
||||||
2019-07-31 Julian Brown <julian@codesourcery.com>
|
2019-07-31 Julian Brown <julian@codesourcery.com>
|
||||||
Andrew Stubbs <ams@codesourcery.com>
|
Andrew Stubbs <ams@codesourcery.com>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -826,10 +826,6 @@ DEF_FUNCTION_TYPE_VAR_6 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
|
||||||
BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
|
BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
|
||||||
BT_PTR, BT_PTR, BT_PTR)
|
BT_PTR, BT_PTR, BT_PTR)
|
||||||
|
|
||||||
DEF_FUNCTION_TYPE_VAR_7 (BT_FN_VOID_INT_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
|
|
||||||
BT_VOID, BT_INT, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
|
|
||||||
BT_PTR, BT_PTR, BT_PTR)
|
|
||||||
|
|
||||||
DEF_FUNCTION_TYPE_VAR_7 (BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
|
DEF_FUNCTION_TYPE_VAR_7 (BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
|
||||||
BT_VOID, BT_INT, BT_SIZE, BT_PTR, BT_PTR,
|
BT_VOID, BT_INT, BT_SIZE, BT_PTR, BT_PTR,
|
||||||
BT_PTR, BT_INT, BT_INT)
|
BT_PTR, BT_INT, BT_INT)
|
||||||
|
|
|
||||||
|
|
@ -51206,6 +51206,35 @@ ix86_push_rounding (poly_int64 bytes)
|
||||||
return ROUND_UP (bytes, UNITS_PER_WORD);
|
return ROUND_UP (bytes, UNITS_PER_WORD);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return TRUE if offloaded OpenACC target-code regions should have their
|
||||||
|
parameters passed as separate function arguments, rather than in an array.
|
||||||
|
This can be a performance win on some (NVidia) GPUs. */
|
||||||
|
|
||||||
|
bool
|
||||||
|
ix86_goacc_explode_args (void)
|
||||||
|
{
|
||||||
|
#ifdef OFFLOAD_TARGETS
|
||||||
|
const char *offload_targets = OFFLOAD_TARGETS;
|
||||||
|
if (strstr (offload_targets, "nvptx"))
|
||||||
|
{
|
||||||
|
if (strchr (offload_targets, ','))
|
||||||
|
{
|
||||||
|
static bool warned_ptx_args = false;
|
||||||
|
if (!warned_ptx_args)
|
||||||
|
{
|
||||||
|
warning (0, "NVidia PTX parameter-passing optimization disabled "
|
||||||
|
"with multiple offload targets");
|
||||||
|
warned_ptx_args = true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/* Target-specific selftests. */
|
/* Target-specific selftests. */
|
||||||
|
|
||||||
#if CHECKING_P
|
#if CHECKING_P
|
||||||
|
|
@ -51981,6 +52010,9 @@ ix86_run_selftests (void)
|
||||||
#define TARGET_GET_MULTILIB_ABI_NAME \
|
#define TARGET_GET_MULTILIB_ABI_NAME \
|
||||||
ix86_get_multilib_abi_name
|
ix86_get_multilib_abi_name
|
||||||
|
|
||||||
|
#undef TARGET_GOACC_EXPLODE_ARGS
|
||||||
|
#define TARGET_GOACC_EXPLODE_ARGS ix86_goacc_explode_args
|
||||||
|
|
||||||
#if CHECKING_P
|
#if CHECKING_P
|
||||||
#undef TARGET_RUN_TARGET_SELFTESTS
|
#undef TARGET_RUN_TARGET_SELFTESTS
|
||||||
#define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
|
#define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
|
||||||
|
|
|
||||||
|
|
@ -6162,6 +6162,11 @@ memories. A return value of NULL indicates that the target does not
|
||||||
handle this VAR_DECL, and normal RTL expanding is resumed.
|
handle this VAR_DECL, and normal RTL expanding is resumed.
|
||||||
@end deftypefn
|
@end deftypefn
|
||||||
|
|
||||||
|
@deftypefn {Target Hook} bool TARGET_GOACC_EXPLODE_ARGS (void)
|
||||||
|
Define this hook to TRUE if arguments to offload regions should be
|
||||||
|
exploded, i.e. passed as true arguments rather than in an argument array.
|
||||||
|
@end deftypefn
|
||||||
|
|
||||||
@node Anchored Addresses
|
@node Anchored Addresses
|
||||||
@section Anchored Addresses
|
@section Anchored Addresses
|
||||||
@cindex anchored addresses
|
@cindex anchored addresses
|
||||||
|
|
|
||||||
|
|
@ -4210,6 +4210,8 @@ address; but often a machine-dependent strategy can generate better code.
|
||||||
|
|
||||||
@hook TARGET_GOACC_EXPAND_ACCEL_VAR
|
@hook TARGET_GOACC_EXPAND_ACCEL_VAR
|
||||||
|
|
||||||
|
@hook TARGET_GOACC_EXPLODE_ARGS
|
||||||
|
|
||||||
@node Anchored Addresses
|
@node Anchored Addresses
|
||||||
@section Anchored Addresses
|
@section Anchored Addresses
|
||||||
@cindex anchored addresses
|
@cindex anchored addresses
|
||||||
|
|
|
||||||
|
|
@ -272,7 +272,3 @@ DEF_FUNCTION_TYPE_VAR_7 (BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
|
||||||
DEF_FUNCTION_TYPE_VAR_6 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
|
DEF_FUNCTION_TYPE_VAR_6 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
|
||||||
BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
|
BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
|
||||||
BT_PTR, BT_PTR, BT_PTR)
|
BT_PTR, BT_PTR, BT_PTR)
|
||||||
|
|
||||||
DEF_FUNCTION_TYPE_VAR_7 (BT_FN_VOID_INT_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
|
|
||||||
BT_VOID, BT_INT, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
|
|
||||||
BT_PTR, BT_PTR, BT_PTR)
|
|
||||||
|
|
|
||||||
|
|
@ -38,8 +38,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DATA_END, "GOACC_data_end",
|
||||||
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ENTER_EXIT_DATA, "GOACC_enter_exit_data",
|
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ENTER_EXIT_DATA, "GOACC_enter_exit_data",
|
||||||
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
|
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
|
||||||
ATTR_NOTHROW_LIST)
|
ATTR_NOTHROW_LIST)
|
||||||
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel_keyed_v2",
|
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel_keyed",
|
||||||
BT_FN_VOID_INT_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
|
BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
|
||||||
ATTR_NOTHROW_LIST)
|
ATTR_NOTHROW_LIST)
|
||||||
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update",
|
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update",
|
||||||
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
|
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
|
||||||
|
|
|
||||||
|
|
@ -7306,11 +7306,11 @@ expand_omp_target (struct omp_region *region)
|
||||||
gomp_target *entry_stmt;
|
gomp_target *entry_stmt;
|
||||||
gimple *stmt;
|
gimple *stmt;
|
||||||
edge e;
|
edge e;
|
||||||
bool offloaded, data_region, oacc_parallel;
|
bool offloaded, data_region, oacc_explode_args;
|
||||||
|
|
||||||
entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
|
entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
|
||||||
new_bb = region->entry;
|
new_bb = region->entry;
|
||||||
oacc_parallel = false;
|
oacc_explode_args = false;
|
||||||
|
|
||||||
offloaded = is_gimple_omp_offloaded (entry_stmt);
|
offloaded = is_gimple_omp_offloaded (entry_stmt);
|
||||||
switch (gimple_omp_target_kind (entry_stmt))
|
switch (gimple_omp_target_kind (entry_stmt))
|
||||||
|
|
@ -7319,7 +7319,8 @@ expand_omp_target (struct omp_region *region)
|
||||||
case GF_OMP_TARGET_KIND_OACC_SERIAL:
|
case GF_OMP_TARGET_KIND_OACC_SERIAL:
|
||||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
|
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
|
||||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
|
case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
|
||||||
oacc_parallel = true;
|
if (targetm.goacc.explode_args ())
|
||||||
|
oacc_explode_args = true;
|
||||||
gcc_fallthrough ();
|
gcc_fallthrough ();
|
||||||
case GF_OMP_TARGET_KIND_REGION:
|
case GF_OMP_TARGET_KIND_REGION:
|
||||||
case GF_OMP_TARGET_KIND_UPDATE:
|
case GF_OMP_TARGET_KIND_UPDATE:
|
||||||
|
|
@ -7406,7 +7407,7 @@ expand_omp_target (struct omp_region *region)
|
||||||
.OMP_DATA_I may have been converted into a different local
|
.OMP_DATA_I may have been converted into a different local
|
||||||
variable. In which case, we need to keep the assignment. */
|
variable. In which case, we need to keep the assignment. */
|
||||||
tree data_arg = gimple_omp_target_data_arg (entry_stmt);
|
tree data_arg = gimple_omp_target_data_arg (entry_stmt);
|
||||||
if (data_arg && !oacc_parallel)
|
if (data_arg && !oacc_explode_args)
|
||||||
{
|
{
|
||||||
basic_block entry_succ_bb = single_succ (entry_bb);
|
basic_block entry_succ_bb = single_succ (entry_bb);
|
||||||
gimple_stmt_iterator gsi;
|
gimple_stmt_iterator gsi;
|
||||||
|
|
@ -7772,11 +7773,6 @@ expand_omp_target (struct omp_region *region)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
args.quick_push (device);
|
args.quick_push (device);
|
||||||
if (start_ix == BUILT_IN_GOACC_PARALLEL)
|
|
||||||
{
|
|
||||||
tree use_params = oacc_parallel ? integer_one_node : integer_zero_node;
|
|
||||||
args.quick_push (use_params);
|
|
||||||
}
|
|
||||||
if (offloaded)
|
if (offloaded)
|
||||||
args.quick_push (build_fold_addr_expr (child_fn));
|
args.quick_push (build_fold_addr_expr (child_fn));
|
||||||
args.quick_push (t1);
|
args.quick_push (t1);
|
||||||
|
|
@ -7885,6 +7881,10 @@ expand_omp_target (struct omp_region *region)
|
||||||
unsigned_type_node, len);
|
unsigned_type_node, len);
|
||||||
args[t_wait_idx] = len;
|
args[t_wait_idx] = len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tagging && oacc_explode_args)
|
||||||
|
args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ARGS_EXPLODED,
|
||||||
|
NULL_TREE, 0));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
|
||||||
|
|
@ -562,7 +562,7 @@ build_receiver_ref (tree var, bool by_ref, omp_context *ctx)
|
||||||
{
|
{
|
||||||
tree x, field = lookup_field (var, ctx);
|
tree x, field = lookup_field (var, ctx);
|
||||||
|
|
||||||
if (is_oacc_parallel_or_serial (ctx))
|
if (is_oacc_parallel_or_serial (ctx) && targetm.goacc.explode_args ())
|
||||||
x = lookup_parm (var, ctx);
|
x = lookup_parm (var, ctx);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
@ -716,7 +716,7 @@ build_sender_ref (tree var, omp_context *ctx)
|
||||||
static void
|
static void
|
||||||
install_parm_decl (tree var, tree type, omp_context *ctx)
|
install_parm_decl (tree var, tree type, omp_context *ctx)
|
||||||
{
|
{
|
||||||
if (!is_oacc_parallel_or_serial (ctx))
|
if (!is_oacc_parallel_or_serial (ctx) || !targetm.goacc.explode_args ())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
splay_tree_key key = (splay_tree_key) var;
|
splay_tree_key key = (splay_tree_key) var;
|
||||||
|
|
@ -1932,7 +1932,7 @@ create_omp_child_function (omp_context *ctx, bool task_copy,
|
||||||
if (task_copy)
|
if (task_copy)
|
||||||
type = build_function_type_list (void_type_node, ptr_type_node,
|
type = build_function_type_list (void_type_node, ptr_type_node,
|
||||||
ptr_type_node, NULL_TREE);
|
ptr_type_node, NULL_TREE);
|
||||||
else if (is_oacc_parallel_or_serial (ctx))
|
else if (is_oacc_parallel_or_serial (ctx) && targetm.goacc.explode_args ())
|
||||||
{
|
{
|
||||||
tree *arg_types = (tree *) alloca (sizeof (tree) * map_cnt);
|
tree *arg_types = (tree *) alloca (sizeof (tree) * map_cnt);
|
||||||
for (unsigned int i = 0; i < map_cnt; i++)
|
for (unsigned int i = 0; i < map_cnt; i++)
|
||||||
|
|
@ -2012,7 +2012,7 @@ create_omp_child_function (omp_context *ctx, bool task_copy,
|
||||||
DECL_CONTEXT (t) = decl;
|
DECL_CONTEXT (t) = decl;
|
||||||
DECL_RESULT (decl) = t;
|
DECL_RESULT (decl) = t;
|
||||||
|
|
||||||
if (!is_oacc_parallel_or_serial (ctx))
|
if (!is_oacc_parallel_or_serial (ctx) || !targetm.goacc.explode_args ())
|
||||||
{
|
{
|
||||||
tree data_name = get_identifier (".omp_data_i");
|
tree data_name = get_identifier (".omp_data_i");
|
||||||
t = build_decl (DECL_SOURCE_LOCATION (decl), PARM_DECL, data_name,
|
t = build_decl (DECL_SOURCE_LOCATION (decl), PARM_DECL, data_name,
|
||||||
|
|
@ -2947,7 +2947,7 @@ scan_omp_target (gomp_target *stmt, omp_context *outer_ctx)
|
||||||
bool base_pointers_restrict = false;
|
bool base_pointers_restrict = false;
|
||||||
if (offloaded)
|
if (offloaded)
|
||||||
{
|
{
|
||||||
if (!is_oacc_parallel_or_serial (ctx))
|
if (!is_oacc_parallel_or_serial (ctx) || !targetm.goacc.explode_args ())
|
||||||
{
|
{
|
||||||
create_omp_child_function (ctx, false);
|
create_omp_child_function (ctx, false);
|
||||||
gimple_omp_target_set_child_fn (stmt, ctx->cb.dst_fn);
|
gimple_omp_target_set_child_fn (stmt, ctx->cb.dst_fn);
|
||||||
|
|
@ -9826,6 +9826,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
||||||
location_t loc = gimple_location (stmt);
|
location_t loc = gimple_location (stmt);
|
||||||
bool offloaded, data_region;
|
bool offloaded, data_region;
|
||||||
unsigned int map_cnt = 0, init_cnt = 0;
|
unsigned int map_cnt = 0, init_cnt = 0;
|
||||||
|
bool oacc_explode_args = targetm.goacc.explode_args ();
|
||||||
|
|
||||||
offloaded = is_gimple_omp_offloaded (stmt);
|
offloaded = is_gimple_omp_offloaded (stmt);
|
||||||
switch (gimple_omp_target_kind (stmt))
|
switch (gimple_omp_target_kind (stmt))
|
||||||
|
|
@ -9883,7 +9884,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
||||||
|
|
||||||
/* Determine init_cnt to finish initialize ctx. */
|
/* Determine init_cnt to finish initialize ctx. */
|
||||||
|
|
||||||
if (is_oacc_parallel_or_serial (ctx))
|
if (is_oacc_parallel_or_serial (ctx) && oacc_explode_args)
|
||||||
{
|
{
|
||||||
for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
|
for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
|
||||||
switch (OMP_CLAUSE_CODE (c))
|
switch (OMP_CLAUSE_CODE (c))
|
||||||
|
|
@ -10215,7 +10216,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
||||||
|
|
||||||
if (offloaded)
|
if (offloaded)
|
||||||
{
|
{
|
||||||
if (is_oacc_parallel_or_serial (ctx))
|
if (is_oacc_parallel_or_serial (ctx) && oacc_explode_args)
|
||||||
gcc_assert (init_cnt == map_cnt);
|
gcc_assert (init_cnt == map_cnt);
|
||||||
target_nesting_level++;
|
target_nesting_level++;
|
||||||
lower_omp (&tgt_body, ctx);
|
lower_omp (&tgt_body, ctx);
|
||||||
|
|
@ -10459,6 +10460,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
||||||
if (s == NULL_TREE && is_gimple_omp_oacc (ctx->stmt))
|
if (s == NULL_TREE && is_gimple_omp_oacc (ctx->stmt))
|
||||||
s = integer_one_node;
|
s = integer_one_node;
|
||||||
s = fold_convert (size_type_node, s);
|
s = fold_convert (size_type_node, s);
|
||||||
|
if (oacc_explode_args)
|
||||||
decl_args = append_decl_arg (ovar, decl_args, ctx);
|
decl_args = append_decl_arg (ovar, decl_args, ctx);
|
||||||
purpose = size_int (map_idx++);
|
purpose = size_int (map_idx++);
|
||||||
CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
|
CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
|
||||||
|
|
@ -10601,6 +10603,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
||||||
else
|
else
|
||||||
s = TYPE_SIZE_UNIT (TREE_TYPE (ovar));
|
s = TYPE_SIZE_UNIT (TREE_TYPE (ovar));
|
||||||
s = fold_convert (size_type_node, s);
|
s = fold_convert (size_type_node, s);
|
||||||
|
if (oacc_explode_args)
|
||||||
decl_args = append_decl_arg (ovar, decl_args, ctx);
|
decl_args = append_decl_arg (ovar, decl_args, ctx);
|
||||||
purpose = size_int (map_idx++);
|
purpose = size_int (map_idx++);
|
||||||
CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
|
CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
|
||||||
|
|
@ -10674,6 +10677,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
||||||
gimple_build_label (opt_arg_label));
|
gimple_build_label (opt_arg_label));
|
||||||
}
|
}
|
||||||
s = size_int (0);
|
s = size_int (0);
|
||||||
|
if (oacc_explode_args)
|
||||||
decl_args = append_decl_arg (ovar, decl_args, ctx);
|
decl_args = append_decl_arg (ovar, decl_args, ctx);
|
||||||
purpose = size_int (map_idx++);
|
purpose = size_int (map_idx++);
|
||||||
CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
|
CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
|
||||||
|
|
@ -10687,7 +10691,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
gcc_assert (map_idx == map_cnt);
|
gcc_assert (map_idx == map_cnt);
|
||||||
if (is_oacc_parallel_or_serial (ctx))
|
if (is_oacc_parallel_or_serial (ctx) && oacc_explode_args)
|
||||||
DECL_ARGUMENTS (child_fn) = nreverse (decl_args);
|
DECL_ARGUMENTS (child_fn) = nreverse (decl_args);
|
||||||
|
|
||||||
DECL_INITIAL (TREE_VEC_ELT (t, 1))
|
DECL_INITIAL (TREE_VEC_ELT (t, 1))
|
||||||
|
|
@ -10727,7 +10731,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
||||||
{
|
{
|
||||||
t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
|
t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
|
||||||
/* fixup_child_record_type might have changed receiver_decl's type. */
|
/* fixup_child_record_type might have changed receiver_decl's type. */
|
||||||
if (!is_oacc_parallel_or_serial (ctx))
|
if (!is_oacc_parallel_or_serial (ctx) || !oacc_explode_args)
|
||||||
{
|
{
|
||||||
t = fold_convert_loc (loc, TREE_TYPE (ctx->receiver_decl), t);
|
t = fold_convert_loc (loc, TREE_TYPE (ctx->receiver_decl), t);
|
||||||
gimple_seq_add_stmt (&new_body,
|
gimple_seq_add_stmt (&new_body,
|
||||||
|
|
|
||||||
|
|
@ -1729,6 +1729,13 @@ handle this VAR_DECL, and normal RTL expanding is resumed.",
|
||||||
rtx, (tree var),
|
rtx, (tree var),
|
||||||
NULL)
|
NULL)
|
||||||
|
|
||||||
|
DEFHOOK
|
||||||
|
(explode_args,
|
||||||
|
"Define this hook to TRUE if arguments to offload regions should be\n\
|
||||||
|
exploded, i.e. passed as true arguments rather than in an argument array.",
|
||||||
|
bool, (void),
|
||||||
|
hook_bool_void_false)
|
||||||
|
|
||||||
HOOK_VECTOR_END (goacc)
|
HOOK_VECTOR_END (goacc)
|
||||||
|
|
||||||
/* Functions relating to vectorization. */
|
/* Functions relating to vectorization. */
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,8 @@
|
||||||
#include "stringpool.h"
|
#include "stringpool.h"
|
||||||
#include "attribs.h"
|
#include "attribs.h"
|
||||||
#include "tree-ssa.h"
|
#include "tree-ssa.h"
|
||||||
|
#include "target.h"
|
||||||
|
#include "gomp-constants.h"
|
||||||
|
|
||||||
/* The idea behind this analyzer is to generate set constraints from the
|
/* The idea behind this analyzer is to generate set constraints from the
|
||||||
program, then solve the resulting constraints in order to generate the
|
program, then solve the resulting constraints in order to generate the
|
||||||
|
|
@ -4692,10 +4694,10 @@ find_func_aliases_for_builtin_call (struct function *fn, gcall *t)
|
||||||
case BUILT_IN_GOMP_PARALLEL:
|
case BUILT_IN_GOMP_PARALLEL:
|
||||||
case BUILT_IN_GOACC_PARALLEL:
|
case BUILT_IN_GOACC_PARALLEL:
|
||||||
{
|
{
|
||||||
bool oacc_parallel = false;
|
|
||||||
if (in_ipa_mode)
|
if (in_ipa_mode)
|
||||||
{
|
{
|
||||||
unsigned int fnpos, argpos;
|
unsigned int fnpos, argpos;
|
||||||
|
bool oacc_exploded_parallel = false;
|
||||||
switch (DECL_FUNCTION_CODE (fndecl))
|
switch (DECL_FUNCTION_CODE (fndecl))
|
||||||
{
|
{
|
||||||
case BUILT_IN_GOMP_PARALLEL:
|
case BUILT_IN_GOMP_PARALLEL:
|
||||||
|
|
@ -4706,15 +4708,27 @@ find_func_aliases_for_builtin_call (struct function *fn, gcall *t)
|
||||||
case BUILT_IN_GOACC_PARALLEL:
|
case BUILT_IN_GOACC_PARALLEL:
|
||||||
/* __builtin_GOACC_parallel (flags_m, fn, mapnum, hostaddrs,
|
/* __builtin_GOACC_parallel (flags_m, fn, mapnum, hostaddrs,
|
||||||
sizes, kinds, ...). */
|
sizes, kinds, ...). */
|
||||||
fnpos = 2;
|
fnpos = 1;
|
||||||
argpos = 4;
|
argpos = 3;
|
||||||
oacc_parallel = gimple_call_arg (t, 1) == integer_one_node;
|
if (targetm.goacc.explode_args ())
|
||||||
|
for (int i = 6; i < gimple_call_num_args (t); i++)
|
||||||
|
{
|
||||||
|
tree arg = gimple_call_arg (t, i);
|
||||||
|
if (TREE_CODE (arg) == INTEGER_CST
|
||||||
|
&& (tree_to_shwi (arg)
|
||||||
|
== GOMP_LAUNCH_PACK (GOMP_LAUNCH_ARGS_EXPLODED,
|
||||||
|
0, 0)))
|
||||||
|
{
|
||||||
|
oacc_exploded_parallel = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
gcc_unreachable ();
|
gcc_unreachable ();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (oacc_parallel)
|
if (oacc_exploded_parallel)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
tree fnarg = gimple_call_arg (t, fnpos);
|
tree fnarg = gimple_call_arg (t, fnpos);
|
||||||
|
|
@ -5258,7 +5272,7 @@ find_func_clobbers (struct function *fn, gimple *origt)
|
||||||
unsigned int fnpos, argpos;
|
unsigned int fnpos, argpos;
|
||||||
unsigned int implicit_use_args[2];
|
unsigned int implicit_use_args[2];
|
||||||
unsigned int num_implicit_use_args = 0;
|
unsigned int num_implicit_use_args = 0;
|
||||||
bool oacc_parallel = false;
|
bool oacc_exploded_parallel = false;
|
||||||
switch (DECL_FUNCTION_CODE (decl))
|
switch (DECL_FUNCTION_CODE (decl))
|
||||||
{
|
{
|
||||||
case BUILT_IN_GOMP_PARALLEL:
|
case BUILT_IN_GOMP_PARALLEL:
|
||||||
|
|
@ -5269,17 +5283,29 @@ find_func_clobbers (struct function *fn, gimple *origt)
|
||||||
case BUILT_IN_GOACC_PARALLEL:
|
case BUILT_IN_GOACC_PARALLEL:
|
||||||
/* __builtin_GOACC_parallel (flags_m, fn, mapnum, hostaddrs,
|
/* __builtin_GOACC_parallel (flags_m, fn, mapnum, hostaddrs,
|
||||||
sizes, kinds, ...). */
|
sizes, kinds, ...). */
|
||||||
fnpos = 2;
|
fnpos = 1;
|
||||||
argpos = 4;
|
argpos = 3;
|
||||||
|
implicit_use_args[num_implicit_use_args++] = 4;
|
||||||
implicit_use_args[num_implicit_use_args++] = 5;
|
implicit_use_args[num_implicit_use_args++] = 5;
|
||||||
implicit_use_args[num_implicit_use_args++] = 6;
|
if (targetm.goacc.explode_args ())
|
||||||
oacc_parallel = gimple_call_arg (t, 1) == integer_one_node;
|
for (int i = 6; i < gimple_call_num_args (t); i++)
|
||||||
|
{
|
||||||
|
tree arg = gimple_call_arg (t, i);
|
||||||
|
if (TREE_CODE (arg) == INTEGER_CST
|
||||||
|
&& (tree_to_shwi (arg)
|
||||||
|
== GOMP_LAUNCH_PACK (GOMP_LAUNCH_ARGS_EXPLODED,
|
||||||
|
0, 0)))
|
||||||
|
{
|
||||||
|
oacc_exploded_parallel = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
gcc_unreachable ();
|
gcc_unreachable ();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (oacc_parallel)
|
if (oacc_exploded_parallel)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
tree fnarg = gimple_call_arg (t, fnpos);
|
tree fnarg = gimple_call_arg (t, fnpos);
|
||||||
|
|
@ -8216,7 +8242,7 @@ ipa_pta_execute (void)
|
||||||
if (gimple_call_builtin_p (stmt, BUILT_IN_GOMP_PARALLEL))
|
if (gimple_call_builtin_p (stmt, BUILT_IN_GOMP_PARALLEL))
|
||||||
called_decl = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
|
called_decl = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
|
||||||
else if (gimple_call_builtin_p (stmt, BUILT_IN_GOACC_PARALLEL))
|
else if (gimple_call_builtin_p (stmt, BUILT_IN_GOACC_PARALLEL))
|
||||||
called_decl = TREE_OPERAND (gimple_call_arg (stmt, 2), 0);
|
called_decl = TREE_OPERAND (gimple_call_arg (stmt, 1), 0);
|
||||||
|
|
||||||
if (called_decl != NULL_TREE
|
if (called_decl != NULL_TREE
|
||||||
&& !fndecl_maybe_in_other_partition (called_decl))
|
&& !fndecl_maybe_in_other_partition (called_decl))
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,7 @@
|
||||||
|
2019-07-31 Julian Brown <julian@codesourcery.com>
|
||||||
|
|
||||||
|
* gomp-constants.h (GOMP_LAUNCH_ARGS_EXPLODED): Define.
|
||||||
|
|
||||||
2019-07-10 Julian Brown <julian@codesourcery.com>
|
2019-07-10 Julian Brown <julian@codesourcery.com>
|
||||||
|
|
||||||
* gomp-constants.h (gomp_map_kind): Add GOMP_MAP_ATTACH_DETACH.
|
* gomp-constants.h (gomp_map_kind): Add GOMP_MAP_ATTACH_DETACH.
|
||||||
|
|
|
||||||
|
|
@ -293,6 +293,7 @@ enum gomp_map_kind
|
||||||
#define GOMP_LAUNCH_DIM 1 /* Launch dimensions, op = mask */
|
#define GOMP_LAUNCH_DIM 1 /* Launch dimensions, op = mask */
|
||||||
#define GOMP_LAUNCH_ASYNC 2 /* Async, op = cst val if not MAX */
|
#define GOMP_LAUNCH_ASYNC 2 /* Async, op = cst val if not MAX */
|
||||||
#define GOMP_LAUNCH_WAIT 3 /* Waits, op = num waits. */
|
#define GOMP_LAUNCH_WAIT 3 /* Waits, op = num waits. */
|
||||||
|
#define GOMP_LAUNCH_ARGS_EXPLODED 4 /* Exploded args, op ignored. */
|
||||||
#define GOMP_LAUNCH_CODE_SHIFT 28
|
#define GOMP_LAUNCH_CODE_SHIFT 28
|
||||||
#define GOMP_LAUNCH_DEVICE_SHIFT 16
|
#define GOMP_LAUNCH_DEVICE_SHIFT 16
|
||||||
#define GOMP_LAUNCH_OP_SHIFT 0
|
#define GOMP_LAUNCH_OP_SHIFT 0
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,12 @@
|
||||||
|
2019-07-31 Julian Brown <julian@codesourcery.com>
|
||||||
|
|
||||||
|
* libgomp.map (GOMP_2.0.GOMP_4_BRANCH): Remove GOACC_parallel_keyed_v2.
|
||||||
|
* libgomp_g.h (GOACC_parallel_keyed_v2): Remove prototype.
|
||||||
|
* oacc-parallel.c (GOACC_parallel_keyed_internal): Rename to...
|
||||||
|
(GOACC_parallel_keyed): ...this. Handle GOMP_LAUNCH_ARGS_EXPLODED
|
||||||
|
launch tag. Remove previous wrapper functions.
|
||||||
|
(GOACC_parallel_keyed_v2): Remove.
|
||||||
|
|
||||||
2019-07-31 Julian Brown <julian@codesourcery.com>
|
2019-07-31 Julian Brown <julian@codesourcery.com>
|
||||||
Andrew Stubbs <ams@codesourcery.com>
|
Andrew Stubbs <ams@codesourcery.com>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -522,7 +522,6 @@ GOACC_2.0.1 {
|
||||||
GOACC_2.0.GOMP_4_BRANCH {
|
GOACC_2.0.GOMP_4_BRANCH {
|
||||||
global:
|
global:
|
||||||
GOMP_set_offload_targets;
|
GOMP_set_offload_targets;
|
||||||
GOACC_parallel_keyed_v2;
|
|
||||||
} GOACC_2.0.1;
|
} GOACC_2.0.1;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -362,8 +362,6 @@ extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned,
|
||||||
|
|
||||||
extern void GOACC_parallel_keyed (int, void (*) (void *), size_t,
|
extern void GOACC_parallel_keyed (int, void (*) (void *), size_t,
|
||||||
void **, size_t *, unsigned short *, ...);
|
void **, size_t *, unsigned short *, ...);
|
||||||
extern void GOACC_parallel_keyed_v2 (int, int, void (*) (void *), size_t,
|
|
||||||
void **, size_t *, unsigned short *, ...);
|
|
||||||
extern void GOACC_parallel (int, void (*) (void *), size_t, void **, size_t *,
|
extern void GOACC_parallel (int, void (*) (void *), size_t, void **, size_t *,
|
||||||
unsigned short *, int, int, int, int, int, ...);
|
unsigned short *, int, int, int, int, int, ...);
|
||||||
extern void GOACC_data_start (int, size_t, void **, size_t *,
|
extern void GOACC_data_start (int, size_t, void **, size_t *,
|
||||||
|
|
|
||||||
|
|
@ -174,13 +174,14 @@ goacc_call_host_fn (void (*fn) (void *), size_t mapnum, void **hostaddrs,
|
||||||
blocks to be copied to/from the device. Varadic arguments are
|
blocks to be copied to/from the device. Varadic arguments are
|
||||||
keyed optional parameters terminated with a zero. */
|
keyed optional parameters terminated with a zero. */
|
||||||
|
|
||||||
static void
|
void
|
||||||
GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
|
GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum,
|
||||||
size_t mapnum, void **hostaddrs, size_t *sizes,
|
void **hostaddrs, size_t *sizes, unsigned short *kinds,
|
||||||
unsigned short *kinds, va_list *ap)
|
...)
|
||||||
{
|
{
|
||||||
int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
|
int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
|
||||||
|
|
||||||
|
va_list ap;
|
||||||
struct goacc_thread *thr;
|
struct goacc_thread *thr;
|
||||||
struct gomp_device_descr *acc_dev;
|
struct gomp_device_descr *acc_dev;
|
||||||
struct target_mem_desc *tgt;
|
struct target_mem_desc *tgt;
|
||||||
|
|
@ -192,6 +193,7 @@ GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
|
||||||
int async = GOMP_ASYNC_SYNC;
|
int async = GOMP_ASYNC_SYNC;
|
||||||
unsigned dims[GOMP_DIM_MAX];
|
unsigned dims[GOMP_DIM_MAX];
|
||||||
unsigned tag;
|
unsigned tag;
|
||||||
|
bool args_exploded = false;
|
||||||
|
|
||||||
#ifdef HAVE_INTTYPES_H
|
#ifdef HAVE_INTTYPES_H
|
||||||
gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
|
gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
|
||||||
|
|
@ -259,31 +261,14 @@ GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
|
||||||
|
|
||||||
handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
|
handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
|
||||||
|
|
||||||
/* Host fallback if "if" clause is false or if the current device is set to
|
|
||||||
the host. */
|
|
||||||
if (flags & GOACC_FLAG_HOST_FALLBACK)
|
|
||||||
{
|
|
||||||
prof_info.device_type = acc_device_host;
|
|
||||||
api_info.device_type = prof_info.device_type;
|
|
||||||
goacc_save_and_set_bind (acc_device_host);
|
|
||||||
goacc_call_host_fn (fn, mapnum, hostaddrs, params);
|
|
||||||
goacc_restore_bind ();
|
|
||||||
goto out_prof;
|
|
||||||
}
|
|
||||||
else if (acc_device_type (acc_dev->type) == acc_device_host)
|
|
||||||
{
|
|
||||||
goacc_call_host_fn (fn, mapnum, hostaddrs, params);
|
|
||||||
goto out_prof;
|
|
||||||
}
|
|
||||||
else if (profiling_p)
|
|
||||||
api_info.device_api = acc_device_api_cuda;
|
|
||||||
|
|
||||||
/* Default: let the runtime choose. */
|
/* Default: let the runtime choose. */
|
||||||
for (i = 0; i != GOMP_DIM_MAX; i++)
|
for (i = 0; i != GOMP_DIM_MAX; i++)
|
||||||
dims[i] = 0;
|
dims[i] = 0;
|
||||||
|
|
||||||
|
va_start (ap, kinds);
|
||||||
|
|
||||||
/* TODO: This will need amending when device_type is implemented. */
|
/* TODO: This will need amending when device_type is implemented. */
|
||||||
while ((tag = va_arg (*ap, unsigned)) != 0)
|
while ((tag = va_arg (ap, unsigned)) != 0)
|
||||||
{
|
{
|
||||||
if (GOMP_LAUNCH_DEVICE (tag))
|
if (GOMP_LAUNCH_DEVICE (tag))
|
||||||
gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
|
gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
|
||||||
|
|
@ -297,7 +282,7 @@ GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
|
||||||
|
|
||||||
for (i = 0; i != GOMP_DIM_MAX; i++)
|
for (i = 0; i != GOMP_DIM_MAX; i++)
|
||||||
if (mask & GOMP_DIM_MASK (i))
|
if (mask & GOMP_DIM_MASK (i))
|
||||||
dims[i] = va_arg (*ap, unsigned);
|
dims[i] = va_arg (ap, unsigned);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
@ -307,7 +292,7 @@ GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
|
||||||
async = GOMP_LAUNCH_OP (tag);
|
async = GOMP_LAUNCH_OP (tag);
|
||||||
|
|
||||||
if (async == GOMP_LAUNCH_OP_MAX)
|
if (async == GOMP_LAUNCH_OP_MAX)
|
||||||
async = va_arg (*ap, unsigned);
|
async = va_arg (ap, unsigned);
|
||||||
|
|
||||||
if (profiling_p)
|
if (profiling_p)
|
||||||
{
|
{
|
||||||
|
|
@ -321,15 +306,39 @@ GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
|
||||||
case GOMP_LAUNCH_WAIT:
|
case GOMP_LAUNCH_WAIT:
|
||||||
{
|
{
|
||||||
unsigned num_waits = GOMP_LAUNCH_OP (tag);
|
unsigned num_waits = GOMP_LAUNCH_OP (tag);
|
||||||
goacc_wait (async, num_waits, ap);
|
goacc_wait (async, num_waits, &ap);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case GOMP_LAUNCH_ARGS_EXPLODED:
|
||||||
|
args_exploded = true;
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
gomp_fatal ("unrecognized offload code '%d',"
|
gomp_fatal ("unrecognized offload code '%d',"
|
||||||
" libgomp is too old", GOMP_LAUNCH_CODE (tag));
|
" libgomp is too old", GOMP_LAUNCH_CODE (tag));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
va_end (ap);
|
||||||
|
|
||||||
|
/* Host fallback if "if" clause is false or if the current device is set to
|
||||||
|
the host. */
|
||||||
|
if (flags & GOACC_FLAG_HOST_FALLBACK)
|
||||||
|
{
|
||||||
|
prof_info.device_type = acc_device_host;
|
||||||
|
api_info.device_type = prof_info.device_type;
|
||||||
|
goacc_save_and_set_bind (acc_device_host);
|
||||||
|
goacc_call_host_fn (fn, mapnum, hostaddrs, args_exploded);
|
||||||
|
goacc_restore_bind ();
|
||||||
|
goto out_prof;
|
||||||
|
}
|
||||||
|
else if (acc_device_type (acc_dev->type) == acc_device_host)
|
||||||
|
{
|
||||||
|
goacc_call_host_fn (fn, mapnum, hostaddrs, args_exploded);
|
||||||
|
goto out_prof;
|
||||||
|
}
|
||||||
|
else if (profiling_p)
|
||||||
|
api_info.device_api = acc_device_api_cuda;
|
||||||
|
|
||||||
if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
|
if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
|
||||||
{
|
{
|
||||||
|
|
@ -392,7 +401,7 @@ GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
|
||||||
|
|
||||||
if (aq == NULL)
|
if (aq == NULL)
|
||||||
{
|
{
|
||||||
if (params)
|
if (args_exploded)
|
||||||
acc_dev->openacc.exec_params_func (tgt_fn, mapnum, hostaddrs, devaddrs,
|
acc_dev->openacc.exec_params_func (tgt_fn, mapnum, hostaddrs, devaddrs,
|
||||||
dims, tgt);
|
dims, tgt);
|
||||||
else
|
else
|
||||||
|
|
@ -401,7 +410,7 @@ GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (params)
|
if (args_exploded)
|
||||||
acc_dev->openacc.async.exec_params_func (tgt_fn, mapnum, hostaddrs,
|
acc_dev->openacc.async.exec_params_func (tgt_fn, mapnum, hostaddrs,
|
||||||
devaddrs, dims, tgt, aq);
|
devaddrs, dims, tgt, aq);
|
||||||
else
|
else
|
||||||
|
|
@ -452,30 +461,6 @@ GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
|
|
||||||
size_t mapnum, void **hostaddrs, size_t *sizes,
|
|
||||||
unsigned short *kinds, ...)
|
|
||||||
{
|
|
||||||
va_list ap;
|
|
||||||
va_start (ap, kinds);
|
|
||||||
GOACC_parallel_keyed_internal (flags_m, 0, fn, mapnum, hostaddrs, sizes,
|
|
||||||
kinds, &ap);
|
|
||||||
va_end (ap);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
GOACC_parallel_keyed_v2 (int flags_m, int args, void (*fn) (void *),
|
|
||||||
size_t mapnum, void **hostaddrs, size_t *sizes,
|
|
||||||
unsigned short *kinds, ...)
|
|
||||||
{
|
|
||||||
va_list ap;
|
|
||||||
va_start (ap, kinds);
|
|
||||||
GOACC_parallel_keyed_internal (flags_m, args, fn, mapnum, hostaddrs, sizes,
|
|
||||||
kinds, &ap);
|
|
||||||
va_end (ap);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Legacy entry point, only provide host execution. */
|
/* Legacy entry point, only provide host execution. */
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue