mirror of git://gcc.gnu.org/git/gcc.git
Use 'GOMP_MAP_VARS_TARGET' for OpenACC compute constructs [PR90596]
Thereby considerably simplify the device plugins' 'GOMP_OFFLOAD_openacc_exec',
'GOMP_OFFLOAD_openacc_async_exec' functions: in terms of lines of code, but in
particular conceptually: no more device memory allocation, host to device data
copying, device memory deallocation -- 'GOMP_MAP_VARS_TARGET' does all that for
us.
This depends on commit 2b2340e236
"Allow libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral' data",
where I said that "a use will emerge later", which is this one here.
PR libgomp/90596
libgomp/
* target.c (gomp_map_vars_internal): Allow for
'param_kind == GOMP_MAP_VARS_OPENACC | GOMP_MAP_VARS_TARGET'.
* oacc-parallel.c (GOACC_parallel_keyed): Pass
'GOMP_MAP_VARS_TARGET' to 'goacc_map_vars'.
* plugin/plugin-gcn.c (alloc_by_agent, gcn_exec)
(GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec):
Adjust, simplify.
(gomp_offload_free): Remove.
* plugin/plugin-nvptx.c (nvptx_exec, GOMP_OFFLOAD_openacc_exec)
(GOMP_OFFLOAD_openacc_async_exec): Adjust, simplify.
(cuda_free_argmem): Remove.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c:
Adjust.
This commit is contained in:
parent
14f5e56a8a
commit
f8332e52a4
|
@ -108,8 +108,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
|
||||||
va_list ap;
|
va_list ap;
|
||||||
struct goacc_thread *thr;
|
struct goacc_thread *thr;
|
||||||
struct gomp_device_descr *acc_dev;
|
struct gomp_device_descr *acc_dev;
|
||||||
struct target_mem_desc *tgt;
|
|
||||||
void **devaddrs;
|
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
struct splay_tree_key_s k;
|
struct splay_tree_key_s k;
|
||||||
splay_tree_key tgt_fn_key;
|
splay_tree_key tgt_fn_key;
|
||||||
|
@ -290,8 +288,10 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
|
||||||
|
|
||||||
goacc_aq aq = get_goacc_asyncqueue (async);
|
goacc_aq aq = get_goacc_asyncqueue (async);
|
||||||
|
|
||||||
tgt = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
|
struct target_mem_desc *tgt
|
||||||
true, 0);
|
= goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, true,
|
||||||
|
GOMP_MAP_VARS_TARGET);
|
||||||
|
|
||||||
if (profiling_p)
|
if (profiling_p)
|
||||||
{
|
{
|
||||||
prof_info.event_type = acc_ev_enter_data_end;
|
prof_info.event_type = acc_ev_enter_data_end;
|
||||||
|
@ -301,10 +301,7 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
|
||||||
&api_info);
|
&api_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
devaddrs = gomp_alloca (sizeof (void *) * mapnum);
|
void **devaddrs = (void **) tgt->tgt_start;
|
||||||
for (i = 0; i < mapnum; i++)
|
|
||||||
devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i);
|
|
||||||
|
|
||||||
if (aq == NULL)
|
if (aq == NULL)
|
||||||
acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
|
acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
|
||||||
tgt);
|
tgt);
|
||||||
|
|
|
@ -1833,13 +1833,6 @@ alloc_by_agent (struct agent_info *agent, size_t size)
|
||||||
{
|
{
|
||||||
GCN_DEBUG ("Allocating %zu bytes on device %d\n", size, agent->device_id);
|
GCN_DEBUG ("Allocating %zu bytes on device %d\n", size, agent->device_id);
|
||||||
|
|
||||||
/* Zero-size allocations are invalid, so in order to return a valid pointer
|
|
||||||
we need to pass a valid size. One source of zero-size allocations is
|
|
||||||
kernargs for kernels that have no inputs or outputs (the kernel may
|
|
||||||
only use console output, for example). */
|
|
||||||
if (size == 0)
|
|
||||||
size = 4;
|
|
||||||
|
|
||||||
void *ptr;
|
void *ptr;
|
||||||
hsa_status_t status = hsa_fns.hsa_memory_allocate_fn (agent->data_region,
|
hsa_status_t status = hsa_fns.hsa_memory_allocate_fn (agent->data_region,
|
||||||
size, &ptr);
|
size, &ptr);
|
||||||
|
@ -2989,15 +2982,6 @@ copy_data (void *data_)
|
||||||
free (data);
|
free (data);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Free device data. This is intended for use as an async callback event. */
|
|
||||||
|
|
||||||
static void
|
|
||||||
gomp_offload_free (void *ptr)
|
|
||||||
{
|
|
||||||
GCN_DEBUG ("Async thread ?:?: Freeing %p\n", ptr);
|
|
||||||
GOMP_OFFLOAD_free (0, ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Request an asynchronous data copy, to or from a device, on a given queue.
|
/* Request an asynchronous data copy, to or from a device, on a given queue.
|
||||||
The event will be registered as a callback. */
|
The event will be registered as a callback. */
|
||||||
|
|
||||||
|
@ -3064,7 +3048,7 @@ wait_queue (struct goacc_asyncqueue *aq)
|
||||||
/* Execute an OpenACC kernel, synchronously or asynchronously. */
|
/* Execute an OpenACC kernel, synchronously or asynchronously. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
gcn_exec (struct kernel_info *kernel, size_t mapnum,
|
gcn_exec (struct kernel_info *kernel,
|
||||||
void **devaddrs, unsigned *dims, void *targ_mem_desc, bool async,
|
void **devaddrs, unsigned *dims, void *targ_mem_desc, bool async,
|
||||||
struct goacc_asyncqueue *aq)
|
struct goacc_asyncqueue *aq)
|
||||||
{
|
{
|
||||||
|
@ -3074,11 +3058,6 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum,
|
||||||
/* If we get here then this must be an OpenACC kernel. */
|
/* If we get here then this must be an OpenACC kernel. */
|
||||||
kernel->kind = KIND_OPENACC;
|
kernel->kind = KIND_OPENACC;
|
||||||
|
|
||||||
/* devaddrs must be double-indirect on the target. */
|
|
||||||
void **ind_da = alloc_by_agent (kernel->agent, sizeof (void*) * mapnum);
|
|
||||||
for (size_t i = 0; i < mapnum; i++)
|
|
||||||
hsa_fns.hsa_memory_copy_fn (&ind_da[i], &devaddrs[i], sizeof (void *));
|
|
||||||
|
|
||||||
struct hsa_kernel_description *hsa_kernel_desc = NULL;
|
struct hsa_kernel_description *hsa_kernel_desc = NULL;
|
||||||
for (unsigned i = 0; i < kernel->module->image_desc->kernel_count; i++)
|
for (unsigned i = 0; i < kernel->module->image_desc->kernel_count; i++)
|
||||||
{
|
{
|
||||||
|
@ -3190,9 +3169,9 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!async)
|
if (!async)
|
||||||
run_kernel (kernel, ind_da, &kla, NULL, false);
|
run_kernel (kernel, devaddrs, &kla, NULL, false);
|
||||||
else
|
else
|
||||||
queue_push_launch (aq, kernel, ind_da, &kla);
|
queue_push_launch (aq, kernel, devaddrs, &kla);
|
||||||
|
|
||||||
if (profiling_dispatch_p)
|
if (profiling_dispatch_p)
|
||||||
{
|
{
|
||||||
|
@ -3202,16 +3181,6 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum,
|
||||||
&enqueue_launch_event_info,
|
&enqueue_launch_event_info,
|
||||||
api_info);
|
api_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!async)
|
|
||||||
gomp_offload_free (ind_da);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (DEBUG_QUEUES)
|
|
||||||
GCN_DEBUG ("queue_push_callback %d:%d gomp_offload_free, %p\n",
|
|
||||||
aq->agent->device_id, aq->id, ind_da);
|
|
||||||
queue_push_callback (aq, gomp_offload_free, ind_da);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
@ -3884,20 +3853,22 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
|
||||||
already-loaded KERNEL. */
|
already-loaded KERNEL. */
|
||||||
|
|
||||||
void
|
void
|
||||||
GOMP_OFFLOAD_openacc_exec (void (*fn_ptr) (void *), size_t mapnum,
|
GOMP_OFFLOAD_openacc_exec (void (*fn_ptr) (void *),
|
||||||
|
size_t mapnum __attribute__((unused)),
|
||||||
void **hostaddrs __attribute__((unused)),
|
void **hostaddrs __attribute__((unused)),
|
||||||
void **devaddrs, unsigned *dims,
|
void **devaddrs, unsigned *dims,
|
||||||
void *targ_mem_desc)
|
void *targ_mem_desc)
|
||||||
{
|
{
|
||||||
struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
|
struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
|
||||||
|
|
||||||
gcn_exec (kernel, mapnum, devaddrs, dims, targ_mem_desc, false, NULL);
|
gcn_exec (kernel, devaddrs, dims, targ_mem_desc, false, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Run an asynchronous OpenACC kernel on the specified queue. */
|
/* Run an asynchronous OpenACC kernel on the specified queue. */
|
||||||
|
|
||||||
void
|
void
|
||||||
GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *), size_t mapnum,
|
GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *),
|
||||||
|
size_t mapnum __attribute__((unused)),
|
||||||
void **hostaddrs __attribute__((unused)),
|
void **hostaddrs __attribute__((unused)),
|
||||||
void **devaddrs,
|
void **devaddrs,
|
||||||
unsigned *dims, void *targ_mem_desc,
|
unsigned *dims, void *targ_mem_desc,
|
||||||
|
@ -3905,7 +3876,7 @@ GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *), size_t mapnum,
|
||||||
{
|
{
|
||||||
struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
|
struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
|
||||||
|
|
||||||
gcn_exec (kernel, mapnum, devaddrs, dims, targ_mem_desc, true, aq);
|
gcn_exec (kernel, devaddrs, dims, targ_mem_desc, true, aq);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Create a new asynchronous thread and queue for running future kernels. */
|
/* Create a new asynchronous thread and queue for running future kernels. */
|
||||||
|
|
|
@ -742,7 +742,7 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
nvptx_exec (void (*fn), size_t mapnum, unsigned *dims, void *targ_mem_desc,
|
nvptx_exec (void (*fn), unsigned *dims, void *targ_mem_desc,
|
||||||
CUdeviceptr dp, CUstream stream)
|
CUdeviceptr dp, CUstream stream)
|
||||||
{
|
{
|
||||||
struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn;
|
struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn;
|
||||||
|
@ -1528,70 +1528,16 @@ GOMP_OFFLOAD_free (int ord, void *ptr)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
|
GOMP_OFFLOAD_openacc_exec (void (*fn) (void *),
|
||||||
|
size_t mapnum __attribute__((unused)),
|
||||||
void **hostaddrs __attribute__((unused)),
|
void **hostaddrs __attribute__((unused)),
|
||||||
void **devaddrs,
|
void **devaddrs,
|
||||||
unsigned *dims, void *targ_mem_desc)
|
unsigned *dims, void *targ_mem_desc)
|
||||||
{
|
{
|
||||||
GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__);
|
GOMP_PLUGIN_debug (0, "nvptx %s\n", __FUNCTION__);
|
||||||
|
|
||||||
struct goacc_thread *thr = GOMP_PLUGIN_goacc_thread ();
|
CUdeviceptr dp = (CUdeviceptr) devaddrs;
|
||||||
acc_prof_info *prof_info = thr->prof_info;
|
nvptx_exec (fn, dims, targ_mem_desc, dp, NULL);
|
||||||
acc_event_info data_event_info;
|
|
||||||
acc_api_info *api_info = thr->api_info;
|
|
||||||
bool profiling_p = __builtin_expect (prof_info != NULL, false);
|
|
||||||
|
|
||||||
void **hp = NULL;
|
|
||||||
CUdeviceptr dp = 0;
|
|
||||||
|
|
||||||
if (mapnum > 0)
|
|
||||||
{
|
|
||||||
size_t s = mapnum * sizeof (void *);
|
|
||||||
hp = alloca (s);
|
|
||||||
for (int i = 0; i < mapnum; i++)
|
|
||||||
hp[i] = devaddrs[i];
|
|
||||||
CUDA_CALL_ASSERT (cuMemAlloc, &dp, s);
|
|
||||||
if (profiling_p)
|
|
||||||
goacc_profiling_acc_ev_alloc (thr, (void *) dp, s);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy the (device) pointers to arguments to the device (dp and hp might in
|
|
||||||
fact have the same value on a unified-memory system). */
|
|
||||||
if (mapnum > 0)
|
|
||||||
{
|
|
||||||
if (profiling_p)
|
|
||||||
{
|
|
||||||
prof_info->event_type = acc_ev_enqueue_upload_start;
|
|
||||||
|
|
||||||
data_event_info.data_event.event_type = prof_info->event_type;
|
|
||||||
data_event_info.data_event.valid_bytes
|
|
||||||
= _ACC_DATA_EVENT_INFO_VALID_BYTES;
|
|
||||||
data_event_info.data_event.parent_construct
|
|
||||||
= acc_construct_parallel;
|
|
||||||
data_event_info.data_event.implicit = 1; /* Always implicit. */
|
|
||||||
data_event_info.data_event.tool_info = NULL;
|
|
||||||
data_event_info.data_event.var_name = NULL;
|
|
||||||
data_event_info.data_event.bytes = mapnum * sizeof (void *);
|
|
||||||
data_event_info.data_event.host_ptr = hp;
|
|
||||||
data_event_info.data_event.device_ptr = (const void *) dp;
|
|
||||||
|
|
||||||
api_info->device_api = acc_device_api_cuda;
|
|
||||||
|
|
||||||
GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info,
|
|
||||||
api_info);
|
|
||||||
}
|
|
||||||
CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, (void *) hp,
|
|
||||||
mapnum * sizeof (void *));
|
|
||||||
if (profiling_p)
|
|
||||||
{
|
|
||||||
prof_info->event_type = acc_ev_enqueue_upload_end;
|
|
||||||
data_event_info.data_event.event_type = prof_info->event_type;
|
|
||||||
GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info,
|
|
||||||
api_info);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, NULL);
|
|
||||||
|
|
||||||
CUresult r = CUDA_CALL_NOCHECK (cuStreamSynchronize, NULL);
|
CUresult r = CUDA_CALL_NOCHECK (cuStreamSynchronize, NULL);
|
||||||
const char *maybe_abort_msg = "(perhaps abort was called)";
|
const char *maybe_abort_msg = "(perhaps abort was called)";
|
||||||
|
@ -1600,98 +1546,20 @@ GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
|
||||||
maybe_abort_msg);
|
maybe_abort_msg);
|
||||||
else if (r != CUDA_SUCCESS)
|
else if (r != CUDA_SUCCESS)
|
||||||
GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r));
|
GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r));
|
||||||
|
|
||||||
CUDA_CALL_ASSERT (cuMemFree, dp);
|
|
||||||
if (profiling_p)
|
|
||||||
goacc_profiling_acc_ev_free (thr, (void *) dp);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
cuda_free_argmem (void *ptr)
|
|
||||||
{
|
|
||||||
void **block = (void **) ptr;
|
|
||||||
nvptx_free (block[0], (struct ptx_device *) block[1]);
|
|
||||||
free (block);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum,
|
GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *),
|
||||||
|
size_t mapnum __attribute__((unused)),
|
||||||
void **hostaddrs __attribute__((unused)),
|
void **hostaddrs __attribute__((unused)),
|
||||||
void **devaddrs,
|
void **devaddrs,
|
||||||
unsigned *dims, void *targ_mem_desc,
|
unsigned *dims, void *targ_mem_desc,
|
||||||
struct goacc_asyncqueue *aq)
|
struct goacc_asyncqueue *aq)
|
||||||
{
|
{
|
||||||
GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__);
|
GOMP_PLUGIN_debug (0, "nvptx %s\n", __FUNCTION__);
|
||||||
|
|
||||||
struct goacc_thread *thr = GOMP_PLUGIN_goacc_thread ();
|
CUdeviceptr dp = (CUdeviceptr) devaddrs;
|
||||||
acc_prof_info *prof_info = thr->prof_info;
|
nvptx_exec (fn, dims, targ_mem_desc, dp, aq->cuda_stream);
|
||||||
acc_event_info data_event_info;
|
|
||||||
acc_api_info *api_info = thr->api_info;
|
|
||||||
bool profiling_p = __builtin_expect (prof_info != NULL, false);
|
|
||||||
|
|
||||||
void **hp = NULL;
|
|
||||||
CUdeviceptr dp = 0;
|
|
||||||
void **block = NULL;
|
|
||||||
|
|
||||||
if (mapnum > 0)
|
|
||||||
{
|
|
||||||
size_t s = mapnum * sizeof (void *);
|
|
||||||
block = (void **) GOMP_PLUGIN_malloc (2 * sizeof (void *) + s);
|
|
||||||
hp = block + 2;
|
|
||||||
for (int i = 0; i < mapnum; i++)
|
|
||||||
hp[i] = devaddrs[i];
|
|
||||||
CUDA_CALL_ASSERT (cuMemAlloc, &dp, s);
|
|
||||||
if (profiling_p)
|
|
||||||
goacc_profiling_acc_ev_alloc (thr, (void *) dp, s);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy the (device) pointers to arguments to the device (dp and hp might in
|
|
||||||
fact have the same value on a unified-memory system). */
|
|
||||||
if (mapnum > 0)
|
|
||||||
{
|
|
||||||
if (profiling_p)
|
|
||||||
{
|
|
||||||
prof_info->event_type = acc_ev_enqueue_upload_start;
|
|
||||||
|
|
||||||
data_event_info.data_event.event_type = prof_info->event_type;
|
|
||||||
data_event_info.data_event.valid_bytes
|
|
||||||
= _ACC_DATA_EVENT_INFO_VALID_BYTES;
|
|
||||||
data_event_info.data_event.parent_construct
|
|
||||||
= acc_construct_parallel;
|
|
||||||
data_event_info.data_event.implicit = 1; /* Always implicit. */
|
|
||||||
data_event_info.data_event.tool_info = NULL;
|
|
||||||
data_event_info.data_event.var_name = NULL;
|
|
||||||
data_event_info.data_event.bytes = mapnum * sizeof (void *);
|
|
||||||
data_event_info.data_event.host_ptr = hp;
|
|
||||||
data_event_info.data_event.device_ptr = (const void *) dp;
|
|
||||||
|
|
||||||
api_info->device_api = acc_device_api_cuda;
|
|
||||||
|
|
||||||
GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info,
|
|
||||||
api_info);
|
|
||||||
}
|
|
||||||
|
|
||||||
CUDA_CALL_ASSERT (cuMemcpyHtoDAsync, dp, (void *) hp,
|
|
||||||
mapnum * sizeof (void *), aq->cuda_stream);
|
|
||||||
block[0] = (void *) dp;
|
|
||||||
|
|
||||||
struct nvptx_thread *nvthd =
|
|
||||||
(struct nvptx_thread *) GOMP_PLUGIN_acc_thread ();
|
|
||||||
block[1] = (void *) nvthd->ptx_dev;
|
|
||||||
|
|
||||||
if (profiling_p)
|
|
||||||
{
|
|
||||||
prof_info->event_type = acc_ev_enqueue_upload_end;
|
|
||||||
data_event_info.data_event.event_type = prof_info->event_type;
|
|
||||||
GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info,
|
|
||||||
api_info);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, aq->cuda_stream);
|
|
||||||
|
|
||||||
if (mapnum > 0)
|
|
||||||
GOMP_OFFLOAD_openacc_async_queue_callback (aq, cuda_free_argmem, block);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void *
|
void *
|
||||||
|
|
|
@ -983,13 +983,13 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
|
||||||
cbuf.chunk_cnt = -1;
|
cbuf.chunk_cnt = -1;
|
||||||
cbuf.use_cnt = 0;
|
cbuf.use_cnt = 0;
|
||||||
cbuf.buf = NULL;
|
cbuf.buf = NULL;
|
||||||
if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
|
if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
|
||||||
{
|
{
|
||||||
size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
|
size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
|
||||||
cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
|
cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
|
||||||
cbuf.chunk_cnt = 0;
|
cbuf.chunk_cnt = 0;
|
||||||
}
|
}
|
||||||
if (pragma_kind == GOMP_MAP_VARS_TARGET)
|
if (pragma_kind & GOMP_MAP_VARS_TARGET)
|
||||||
{
|
{
|
||||||
size_t align = 4 * sizeof (void *);
|
size_t align = 4 * sizeof (void *);
|
||||||
tgt_align = align;
|
tgt_align = align;
|
||||||
|
@ -1262,7 +1262,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
|
||||||
tgt->tgt_start = (uintptr_t) tgt->to_free;
|
tgt->tgt_start = (uintptr_t) tgt->to_free;
|
||||||
tgt->tgt_end = tgt->tgt_start + sizes[0];
|
tgt->tgt_end = tgt->tgt_start + sizes[0];
|
||||||
}
|
}
|
||||||
else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
|
else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
|
||||||
{
|
{
|
||||||
/* Allocate tgt_align aligned tgt_size block of memory. */
|
/* Allocate tgt_align aligned tgt_size block of memory. */
|
||||||
/* FIXME: Perhaps change interface to allocate properly aligned
|
/* FIXME: Perhaps change interface to allocate properly aligned
|
||||||
|
@ -1300,7 +1300,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
|
||||||
}
|
}
|
||||||
|
|
||||||
tgt_size = 0;
|
tgt_size = 0;
|
||||||
if (pragma_kind == GOMP_MAP_VARS_TARGET)
|
if (pragma_kind & GOMP_MAP_VARS_TARGET)
|
||||||
tgt_size = mapnum * sizeof (void *);
|
tgt_size = mapnum * sizeof (void *);
|
||||||
|
|
||||||
tgt->array = NULL;
|
tgt->array = NULL;
|
||||||
|
@ -1738,7 +1738,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pragma_kind == GOMP_MAP_VARS_TARGET)
|
if (pragma_kind & GOMP_MAP_VARS_TARGET)
|
||||||
{
|
{
|
||||||
for (i = 0; i < mapnum; i++)
|
for (i = 0; i < mapnum; i++)
|
||||||
{
|
{
|
||||||
|
|
|
@ -203,9 +203,7 @@ static void cb_alloc (acc_prof_info *prof_info, acc_event_info *event_info, acc_
|
||||||
# error TODO
|
# error TODO
|
||||||
#else
|
#else
|
||||||
assert (state == 4
|
assert (state == 4
|
||||||
|| state == 6
|
|| state == 104);
|
||||||
|| state == 104
|
|
||||||
|| state == 106);
|
|
||||||
STATE_OP (state, ++);
|
STATE_OP (state, ++);
|
||||||
|
|
||||||
if (state == 5
|
if (state == 5
|
||||||
|
@ -217,13 +215,6 @@ static void cb_alloc (acc_prof_info *prof_info, acc_event_info *event_info, acc_
|
||||||
assert (tool_info->nested->event_info.other_event.event_type == acc_ev_enter_data_start);
|
assert (tool_info->nested->event_info.other_event.event_type == acc_ev_enter_data_start);
|
||||||
assert (tool_info->nested->nested == NULL);
|
assert (tool_info->nested->nested == NULL);
|
||||||
}
|
}
|
||||||
else if (state == 7
|
|
||||||
|| state == 107)
|
|
||||||
{
|
|
||||||
assert (tool_info != NULL);
|
|
||||||
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
|
|
||||||
assert (tool_info->nested == NULL);
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
abort ();
|
abort ();
|
||||||
#endif
|
#endif
|
||||||
|
@ -268,17 +259,10 @@ static void cb_free (acc_prof_info *prof_info, acc_event_info *event_info, acc_a
|
||||||
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
|
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
|
||||||
# error TODO
|
# error TODO
|
||||||
#else
|
#else
|
||||||
assert (state == 9
|
assert (state == 9);
|
||||||
|| state == 11);
|
|
||||||
STATE_OP (state, ++);
|
STATE_OP (state, ++);
|
||||||
|
|
||||||
if (state == 10)
|
if (state == 10)
|
||||||
{
|
|
||||||
assert (tool_info != NULL);
|
|
||||||
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
|
|
||||||
assert (tool_info->nested == NULL);
|
|
||||||
}
|
|
||||||
else if (state == 12)
|
|
||||||
{
|
{
|
||||||
assert (tool_info != NULL);
|
assert (tool_info != NULL);
|
||||||
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
|
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
|
||||||
|
@ -449,19 +433,9 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_
|
||||||
{
|
{
|
||||||
DEBUG_printf ("%s\n", __FUNCTION__);
|
DEBUG_printf ("%s\n", __FUNCTION__);
|
||||||
|
|
||||||
|
assert (state == 8
|
||||||
#if ASYNC_EXIT_DATA
|
#if ASYNC_EXIT_DATA
|
||||||
if (acc_async != acc_async_sync)
|
|| state == 108
|
||||||
{
|
|
||||||
/* Compensate for the deferred 'acc_ev_free'. */
|
|
||||||
state += 1;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
# error TODO
|
|
||||||
#endif
|
|
||||||
|
|
||||||
assert (state == 10
|
|
||||||
#if ASYNC_EXIT_DATA
|
|
||||||
|| state == 110
|
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
STATE_OP (state, ++);
|
STATE_OP (state, ++);
|
||||||
|
@ -525,9 +499,9 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in
|
||||||
{
|
{
|
||||||
DEBUG_printf ("%s\n", __FUNCTION__);
|
DEBUG_printf ("%s\n", __FUNCTION__);
|
||||||
|
|
||||||
assert (state == 12
|
assert (state == 10
|
||||||
#if ASYNC_EXIT_DATA
|
#if ASYNC_EXIT_DATA
|
||||||
|| state == 112
|
|| state == 110
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
STATE_OP (state, ++);
|
STATE_OP (state, ++);
|
||||||
|
@ -654,13 +628,9 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
|
||||||
{
|
{
|
||||||
/* Compensate for the missing 'acc_ev_enter_data_end'. */
|
/* Compensate for the missing 'acc_ev_enter_data_end'. */
|
||||||
state += 1;
|
state += 1;
|
||||||
/* Compensate for the missing 'acc_ev_alloc'. */
|
|
||||||
state += 1;
|
|
||||||
/* Compensate for the missing 'acc_ev_enqueue_launch_start' and
|
/* Compensate for the missing 'acc_ev_enqueue_launch_start' and
|
||||||
'acc_ev_enqueue_launch_end'. */
|
'acc_ev_enqueue_launch_end'. */
|
||||||
state += 2;
|
state += 2;
|
||||||
/* Compensate for the missing 'acc_ev_free'. */
|
|
||||||
state += 1;
|
|
||||||
/* Compensate for the missing 'acc_ev_exit_data_start'. */
|
/* Compensate for the missing 'acc_ev_exit_data_start'. */
|
||||||
state += 1;
|
state += 1;
|
||||||
/* Compensate for the missing 'acc_ev_free'. */
|
/* Compensate for the missing 'acc_ev_free'. */
|
||||||
|
@ -676,8 +646,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
|
||||||
state += 2;
|
state += 2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
assert (state == 13
|
assert (state == 11
|
||||||
|| state == 113);
|
|| state == 111);
|
||||||
STATE_OP (state, ++);
|
STATE_OP (state, ++);
|
||||||
|
|
||||||
assert (tool_info != NULL);
|
assert (tool_info != NULL);
|
||||||
|
@ -731,8 +701,8 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
|
||||||
|
|
||||||
assert (acc_device_type != acc_device_host);
|
assert (acc_device_type != acc_device_host);
|
||||||
|
|
||||||
assert (state == 7
|
assert (state == 6
|
||||||
|| state == 107);
|
|| state == 106);
|
||||||
STATE_OP (state, ++);
|
STATE_OP (state, ++);
|
||||||
|
|
||||||
assert (tool_info != NULL);
|
assert (tool_info != NULL);
|
||||||
|
@ -800,8 +770,8 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
|
||||||
|
|
||||||
assert (acc_device_type != acc_device_host);
|
assert (acc_device_type != acc_device_host);
|
||||||
|
|
||||||
assert (state == 8
|
assert (state == 7
|
||||||
|| state == 108);
|
|| state == 107);
|
||||||
STATE_OP (state, ++);
|
STATE_OP (state, ++);
|
||||||
|
|
||||||
assert (tool_info != NULL);
|
assert (tool_info != NULL);
|
||||||
|
@ -891,7 +861,7 @@ int main()
|
||||||
}
|
}
|
||||||
assert (state_init == 5);
|
assert (state_init == 5);
|
||||||
}
|
}
|
||||||
assert (state == 14);
|
assert (state == 12);
|
||||||
|
|
||||||
STATE_OP (state, = 100);
|
STATE_OP (state, = 100);
|
||||||
|
|
||||||
|
@ -908,7 +878,7 @@ int main()
|
||||||
#pragma acc wait
|
#pragma acc wait
|
||||||
assert (state_init == 105);
|
assert (state_init == 105);
|
||||||
}
|
}
|
||||||
assert (state == 114);
|
assert (state == 112);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue