mirror of git://gcc.gnu.org/git/gcc.git
openmp: Add support for thread_limit clause on target
OpenMP 5.1 says that thread_limit clause can also appear on target, and similarly to teams should affect the thread-limit-var ICV. On combined target teams, the clause goes to both. We actually passed thread_limit internally on target already before, but only used it for gcn/ptx offloading to hint how many threads should be created and for ptx didn't set thread_limit_var in that case. Similarly for host fallback. Also, I found that we weren't copying the args array that contains encoded thread_limit and num_teams clause for target (etc.) for async target. 2021-11-15 Jakub Jelinek <jakub@redhat.com> gcc/ * gimplify.c (optimize_target_teams): Only add OMP_CLAUSE_THREAD_LIMIT to OMP_TARGET_CLAUSES if it isn't there already. gcc/c-family/ * c-omp.c (c_omp_split_clauses) <case OMP_CLAUSE_THREAD_LIMIT>: Duplicate to both OMP_TARGET and OMP_TEAMS. gcc/c/ * c-parser.c (OMP_TARGET_CLAUSE_MASK): Add PRAGMA_OMP_CLAUSE_THREAD_LIMIT. gcc/cp/ * parser.c (OMP_TARGET_CLAUSE_MASK): Add PRAGMA_OMP_CLAUSE_THREAD_LIMIT. libgomp/ * task.c (gomp_create_target_task): Copy args array as well. * target.c (gomp_target_fallback): Add args argument. Set gomp_icv (true)->thread_limit_var if thread_limit is present. (GOMP_target): Adjust gomp_target_fallback caller. (GOMP_target_ext): Likewise. (gomp_target_task_fn): Likewise. * config/nvptx/team.c (gomp_nvptx_main): Set gomp_global_icv.thread_limit_var. * testsuite/libgomp.c-c++-common/thread-limit-1.c: New test.
This commit is contained in:
parent
fcdf49a0ad
commit
aea7238683
|
@ -1867,7 +1867,6 @@ c_omp_split_clauses (location_t loc, enum tree_code code,
|
||||||
s = C_OMP_CLAUSE_SPLIT_TARGET;
|
s = C_OMP_CLAUSE_SPLIT_TARGET;
|
||||||
break;
|
break;
|
||||||
case OMP_CLAUSE_NUM_TEAMS:
|
case OMP_CLAUSE_NUM_TEAMS:
|
||||||
case OMP_CLAUSE_THREAD_LIMIT:
|
|
||||||
s = C_OMP_CLAUSE_SPLIT_TEAMS;
|
s = C_OMP_CLAUSE_SPLIT_TEAMS;
|
||||||
break;
|
break;
|
||||||
case OMP_CLAUSE_DIST_SCHEDULE:
|
case OMP_CLAUSE_DIST_SCHEDULE:
|
||||||
|
@ -2531,6 +2530,30 @@ c_omp_split_clauses (location_t loc, enum tree_code code,
|
||||||
else
|
else
|
||||||
s = C_OMP_CLAUSE_SPLIT_FOR;
|
s = C_OMP_CLAUSE_SPLIT_FOR;
|
||||||
break;
|
break;
|
||||||
|
/* thread_limit is allowed on target and teams. Distribute it
|
||||||
|
to all. */
|
||||||
|
case OMP_CLAUSE_THREAD_LIMIT:
|
||||||
|
if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_MAP))
|
||||||
|
!= 0)
|
||||||
|
{
|
||||||
|
if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NUM_TEAMS))
|
||||||
|
!= 0)
|
||||||
|
{
|
||||||
|
c = build_omp_clause (OMP_CLAUSE_LOCATION (clauses),
|
||||||
|
OMP_CLAUSE_THREAD_LIMIT);
|
||||||
|
OMP_CLAUSE_THREAD_LIMIT_EXPR (c)
|
||||||
|
= OMP_CLAUSE_THREAD_LIMIT_EXPR (clauses);
|
||||||
|
OMP_CLAUSE_CHAIN (c) = cclauses[C_OMP_CLAUSE_SPLIT_TARGET];
|
||||||
|
cclauses[C_OMP_CLAUSE_SPLIT_TARGET] = c;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
s = C_OMP_CLAUSE_SPLIT_TARGET;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s = C_OMP_CLAUSE_SPLIT_TEAMS;
|
||||||
|
break;
|
||||||
/* Allocate clause is allowed on target, teams, distribute, parallel,
|
/* Allocate clause is allowed on target, teams, distribute, parallel,
|
||||||
for, sections and taskloop. Distribute it to all. */
|
for, sections and taskloop. Distribute it to all. */
|
||||||
case OMP_CLAUSE_ALLOCATE:
|
case OMP_CLAUSE_ALLOCATE:
|
||||||
|
|
|
@ -20963,6 +20963,7 @@ c_parser_omp_target_exit_data (location_t loc, c_parser *parser,
|
||||||
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE) \
|
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE) \
|
||||||
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEFAULTMAP) \
|
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEFAULTMAP) \
|
||||||
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IN_REDUCTION) \
|
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IN_REDUCTION) \
|
||||||
|
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_THREAD_LIMIT) \
|
||||||
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR))
|
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR))
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
|
|
@ -44015,6 +44015,7 @@ cp_parser_omp_target_update (cp_parser *parser, cp_token *pragma_tok,
|
||||||
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEFAULTMAP) \
|
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEFAULTMAP) \
|
||||||
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE) \
|
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE) \
|
||||||
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IN_REDUCTION) \
|
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IN_REDUCTION) \
|
||||||
|
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_THREAD_LIMIT) \
|
||||||
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR))
|
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR))
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
|
|
@ -13637,10 +13637,13 @@ optimize_target_teams (tree target, gimple_seq *pre_p)
|
||||||
if (!DECL_P (expr) && TREE_CODE (expr) != TARGET_EXPR)
|
if (!DECL_P (expr) && TREE_CODE (expr) != TARGET_EXPR)
|
||||||
OMP_CLAUSE_OPERAND (c, 0) = *p;
|
OMP_CLAUSE_OPERAND (c, 0) = *p;
|
||||||
}
|
}
|
||||||
c = build_omp_clause (thread_limit_loc, OMP_CLAUSE_THREAD_LIMIT);
|
if (!omp_find_clause (OMP_TARGET_CLAUSES (target), OMP_CLAUSE_THREAD_LIMIT))
|
||||||
OMP_CLAUSE_THREAD_LIMIT_EXPR (c) = thread_limit;
|
{
|
||||||
OMP_CLAUSE_CHAIN (c) = OMP_TARGET_CLAUSES (target);
|
c = build_omp_clause (thread_limit_loc, OMP_CLAUSE_THREAD_LIMIT);
|
||||||
OMP_TARGET_CLAUSES (target) = c;
|
OMP_CLAUSE_THREAD_LIMIT_EXPR (c) = thread_limit;
|
||||||
|
OMP_CLAUSE_CHAIN (c) = OMP_TARGET_CLAUSES (target);
|
||||||
|
OMP_TARGET_CLAUSES (target) = c;
|
||||||
|
}
|
||||||
c = build_omp_clause (num_teams_loc, OMP_CLAUSE_NUM_TEAMS);
|
c = build_omp_clause (num_teams_loc, OMP_CLAUSE_NUM_TEAMS);
|
||||||
OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c) = num_teams_upper;
|
OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c) = num_teams_upper;
|
||||||
OMP_CLAUSE_NUM_TEAMS_LOWER_EXPR (c) = num_teams_lower;
|
OMP_CLAUSE_NUM_TEAMS_LOWER_EXPR (c) = num_teams_lower;
|
||||||
|
|
|
@ -55,6 +55,7 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data)
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
{
|
{
|
||||||
gomp_global_icv.nthreads_var = ntids;
|
gomp_global_icv.nthreads_var = ntids;
|
||||||
|
gomp_global_icv.thread_limit_var = ntids;
|
||||||
/* Starting additional threads is not supported. */
|
/* Starting additional threads is not supported. */
|
||||||
gomp_global_icv.dyn_var = true;
|
gomp_global_icv.dyn_var = true;
|
||||||
|
|
||||||
|
|
|
@ -2362,7 +2362,7 @@ gomp_unload_device (struct gomp_device_descr *devicep)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
|
gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
|
||||||
struct gomp_device_descr *devicep)
|
struct gomp_device_descr *devicep, void **args)
|
||||||
{
|
{
|
||||||
struct gomp_thread old_thr, *thr = gomp_thread ();
|
struct gomp_thread old_thr, *thr = gomp_thread ();
|
||||||
|
|
||||||
|
@ -2378,6 +2378,25 @@ gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
|
||||||
thr->place = old_thr.place;
|
thr->place = old_thr.place;
|
||||||
thr->ts.place_partition_len = gomp_places_list_len;
|
thr->ts.place_partition_len = gomp_places_list_len;
|
||||||
}
|
}
|
||||||
|
if (args)
|
||||||
|
while (*args)
|
||||||
|
{
|
||||||
|
intptr_t id = (intptr_t) *args++, val;
|
||||||
|
if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
|
||||||
|
val = (intptr_t) *args++;
|
||||||
|
else
|
||||||
|
val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
|
||||||
|
if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
|
||||||
|
continue;
|
||||||
|
id &= GOMP_TARGET_ARG_ID_MASK;
|
||||||
|
if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
|
||||||
|
continue;
|
||||||
|
val = val > INT_MAX ? INT_MAX : val;
|
||||||
|
if (val)
|
||||||
|
gomp_icv (true)->thread_limit_var = val;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
fn (hostaddrs);
|
fn (hostaddrs);
|
||||||
gomp_free_thread (thr);
|
gomp_free_thread (thr);
|
||||||
*thr = old_thr;
|
*thr = old_thr;
|
||||||
|
@ -2478,7 +2497,7 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
|
||||||
/* All shared memory devices should use the GOMP_target_ext function. */
|
/* All shared memory devices should use the GOMP_target_ext function. */
|
||||||
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
|
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
|
||||||
|| !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
|
|| !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
|
||||||
return gomp_target_fallback (fn, hostaddrs, devicep);
|
return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
|
||||||
|
|
||||||
htab_t refcount_set = htab_create (mapnum);
|
htab_t refcount_set = htab_create (mapnum);
|
||||||
struct target_mem_desc *tgt_vars
|
struct target_mem_desc *tgt_vars
|
||||||
|
@ -2617,7 +2636,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
|
||||||
tgt_align, tgt_size);
|
tgt_align, tgt_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gomp_target_fallback (fn, hostaddrs, devicep);
|
gomp_target_fallback (fn, hostaddrs, devicep, args);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3052,7 +3071,8 @@ gomp_target_task_fn (void *data)
|
||||||
|| (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
|
|| (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
|
||||||
{
|
{
|
||||||
ttask->state = GOMP_TARGET_TASK_FALLBACK;
|
ttask->state = GOMP_TARGET_TASK_FALLBACK;
|
||||||
gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep);
|
gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
|
||||||
|
ttask->args);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -745,6 +745,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
|
||||||
size_t depend_size = 0;
|
size_t depend_size = 0;
|
||||||
uintptr_t depend_cnt = 0;
|
uintptr_t depend_cnt = 0;
|
||||||
size_t tgt_align = 0, tgt_size = 0;
|
size_t tgt_align = 0, tgt_size = 0;
|
||||||
|
uintptr_t args_cnt = 0;
|
||||||
|
|
||||||
if (depend != NULL)
|
if (depend != NULL)
|
||||||
{
|
{
|
||||||
|
@ -769,10 +770,22 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
|
||||||
tgt_size += tgt_align - 1;
|
tgt_size += tgt_align - 1;
|
||||||
else
|
else
|
||||||
tgt_size = 0;
|
tgt_size = 0;
|
||||||
|
if (args)
|
||||||
|
{
|
||||||
|
void **cargs = args;
|
||||||
|
while (*cargs)
|
||||||
|
{
|
||||||
|
intptr_t id = (intptr_t) *cargs++;
|
||||||
|
if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
|
||||||
|
cargs++;
|
||||||
|
}
|
||||||
|
args_cnt = cargs + 1 - args;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
task = gomp_malloc (sizeof (*task) + depend_size
|
task = gomp_malloc (sizeof (*task) + depend_size
|
||||||
+ sizeof (*ttask)
|
+ sizeof (*ttask)
|
||||||
|
+ args_cnt * sizeof (void *)
|
||||||
+ mapnum * (sizeof (void *) + sizeof (size_t)
|
+ mapnum * (sizeof (void *) + sizeof (size_t)
|
||||||
+ sizeof (unsigned short))
|
+ sizeof (unsigned short))
|
||||||
+ tgt_size);
|
+ tgt_size);
|
||||||
|
@ -785,9 +798,18 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
|
||||||
ttask->devicep = devicep;
|
ttask->devicep = devicep;
|
||||||
ttask->fn = fn;
|
ttask->fn = fn;
|
||||||
ttask->mapnum = mapnum;
|
ttask->mapnum = mapnum;
|
||||||
ttask->args = args;
|
|
||||||
memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
|
memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
|
||||||
ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
|
if (args_cnt)
|
||||||
|
{
|
||||||
|
ttask->args = (void **) &ttask->hostaddrs[mapnum];
|
||||||
|
memcpy (ttask->args, args, args_cnt * sizeof (void *));
|
||||||
|
ttask->sizes = (size_t *) &ttask->args[args_cnt];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ttask->args = args;
|
||||||
|
ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
|
||||||
|
}
|
||||||
memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
|
memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
|
||||||
ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
|
ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
|
||||||
memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
|
memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
#include <omp.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
void
|
||||||
|
foo ()
|
||||||
|
{
|
||||||
|
{
|
||||||
|
#pragma omp target parallel nowait thread_limit (4) num_threads (1)
|
||||||
|
if (omp_get_thread_limit () > 4)
|
||||||
|
abort ();
|
||||||
|
}
|
||||||
|
#pragma omp taskwait
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
#pragma omp target thread_limit (6)
|
||||||
|
if (omp_get_thread_limit () > 6)
|
||||||
|
abort ();
|
||||||
|
foo ();
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue