mirror of git://gcc.gnu.org/git/gcc.git
libgomp: Add OpenACC's acc_memcpy_device{,_async} routines [PR93226]
libgomp/ChangeLog: PR libgomp/93226 * libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_dev2dev): New prototype. * libgomp.h (struct acc_dispatch_t): Add dev2dev_func. (gomp_copy_dev2dev): New prototype. * libgomp.map (OACC_2.6.1): New; add acc_memcpy_device{,_async}. * libgomp.texi (acc_memcpy_device): New. * oacc-mem.c (memcpy_tofrom_device): Change to take from/to device boolean; use memcpy not memmove; add early return if size == 0 or same device + same ptr. (acc_memcpy_to_device, acc_memcpy_to_device_async, acc_memcpy_from_device, acc_memcpy_from_device_async): Update. (acc_memcpy_device, acc_memcpy_device_async): New. * openacc.f90 (acc_memcpy_device, acc_memcpy_device_async): Add interface. * openacc_lib.h (acc_memcpy_device, acc_memcpy_device_async): Likewise. * openacc.h (acc_memcpy_device, acc_memcpy_device_async): Add prototype. * plugin/plugin-gcn.c (GOMP_OFFLOAD_openacc_async_host2dev): Update comment. (GOMP_OFFLOAD_openacc_async_dev2host): Update call. (GOMP_OFFLOAD_openacc_async_dev2dev): New. * plugin/plugin-nvptx.c (cuda_memcpy_dev_sanity_check): New. (GOMP_OFFLOAD_dev2dev): Call it. (GOMP_OFFLOAD_openacc_async_dev2dev): New. * target.c (gomp_copy_dev2dev): New. (gomp_load_plugin_for_device): Load dev2dev and async_dev2dev. * testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c: New test. * testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90: New test.
This commit is contained in:
parent
2047aa4ce2
commit
f4aa6b5a8d
|
@ -200,6 +200,8 @@ extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size
|
|||
struct goacc_asyncqueue *);
|
||||
extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t,
|
||||
struct goacc_asyncqueue *);
|
||||
extern bool GOMP_OFFLOAD_openacc_async_dev2dev (int, void *, const void *, size_t,
|
||||
struct goacc_asyncqueue *);
|
||||
extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void);
|
||||
extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void);
|
||||
extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (struct goacc_asyncqueue *);
|
||||
|
|
|
@ -1360,6 +1360,7 @@ typedef struct acc_dispatch_t
|
|||
__typeof (GOMP_OFFLOAD_openacc_async_exec) *exec_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_dev2host) *dev2host_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_host2dev) *host2dev_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_dev2dev) *dev2dev_func;
|
||||
} async;
|
||||
|
||||
__typeof (GOMP_OFFLOAD_openacc_get_property) *get_property_func;
|
||||
|
@ -1467,6 +1468,9 @@ extern void gomp_copy_host2dev (struct gomp_device_descr *,
|
|||
extern void gomp_copy_dev2host (struct gomp_device_descr *,
|
||||
struct goacc_asyncqueue *, void *, const void *,
|
||||
size_t);
|
||||
extern void gomp_copy_dev2dev (struct gomp_device_descr *,
|
||||
struct goacc_asyncqueue *, void *, const void *,
|
||||
size_t);
|
||||
extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t);
|
||||
extern bool gomp_attach_pointer (struct gomp_device_descr *,
|
||||
struct goacc_asyncqueue *, splay_tree,
|
||||
|
|
|
@ -609,6 +609,12 @@ OACC_2.6 {
|
|||
acc_get_property_string_h_;
|
||||
} OACC_2.5.1;
|
||||
|
||||
OACC_2.6.1 {
|
||||
global:
|
||||
acc_memcpy_device;
|
||||
acc_memcpy_device_async;
|
||||
} OACC_2.6;
|
||||
|
||||
GOACC_2.0 {
|
||||
global:
|
||||
GOACC_data_end;
|
||||
|
|
|
@ -4763,6 +4763,7 @@ acceleration device.
|
|||
present on device.
|
||||
* acc_memcpy_to_device:: Copy host memory to device memory.
|
||||
* acc_memcpy_from_device:: Copy device memory to host memory.
|
||||
* acc_memcpy_device:: Copy memory within a device.
|
||||
* acc_attach:: Let device pointer point to device-pointer target.
|
||||
* acc_detach:: Let device pointer point to host-pointer target.
|
||||
|
||||
|
@ -5837,6 +5838,44 @@ This function copies device memory specified by device address of
|
|||
|
||||
|
||||
|
||||
@node acc_memcpy_device
|
||||
@section @code{acc_memcpy_device} -- Copy memory within a device.
|
||||
@table @asis
|
||||
@item @emph{Description}
|
||||
This function copies device memory from one memory location to another
|
||||
on the current device. It copies @var{bytes} bytes of data from the device
|
||||
address, specified by @var{data_dev_src}, to the device address
|
||||
@var{data_dev_dest}. The @code{_async} version performs the transfer
|
||||
asnychronously using the queue associated with @var{async_arg}.
|
||||
|
||||
@item @emph{C/C++}:
|
||||
@multitable @columnfractions .20 .80
|
||||
@item @emph{Prototype}: @tab @code{void acc_memcpy_device(d_void* data_dev_dest,}
|
||||
@item @tab @code{d_void* data_dev_src, size_t bytes);}
|
||||
@item @emph{Prototype}: @tab @code{void acc_memcpy_device_async(d_void* data_dev_dest,}
|
||||
@item @tab @code{d_void* data_dev_src, size_t bytes, int async_arg);}
|
||||
@end multitable
|
||||
|
||||
@item @emph{Fortran}:
|
||||
@multitable @columnfractions .20 .80
|
||||
@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device(data_dev_dest, &}
|
||||
@item @tab @code{data_dev_src, bytes)}
|
||||
@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device_async(data_dev_dest, &}
|
||||
@item @tab @code{data_dev_src, bytes, async_arg)}
|
||||
@item @tab @code{type(c_ptr), value :: data_dev_dest}
|
||||
@item @tab @code{type(c_ptr), value :: data_dev_src}
|
||||
@item @tab @code{integer(c_size_t), value :: bytes}
|
||||
@item @tab @code{integer(acc_handle_kind), value :: async_arg}
|
||||
@end multitable
|
||||
|
||||
@item @emph{Reference}:
|
||||
@uref{https://www.openacc.org, OpenACC specification v2.6}, section
|
||||
3.2.33. @uref{https://www.openacc.org, OpenACC specification v3.3}, section
|
||||
3.2.28.
|
||||
@end table
|
||||
|
||||
|
||||
|
||||
@node acc_attach
|
||||
@section @code{acc_attach} -- Let device pointer point to device-pointer target.
|
||||
@table @asis
|
||||
|
|
|
@ -171,21 +171,22 @@ acc_free (void *d)
|
|||
}
|
||||
|
||||
static void
|
||||
memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
|
||||
const char *libfnname)
|
||||
memcpy_tofrom_device (bool dev_to, bool dev_from, void *dst, void *src,
|
||||
size_t s, int async, const char *libfnname)
|
||||
{
|
||||
/* No need to call lazy open here, as the device pointer must have
|
||||
been obtained from a routine that did that. */
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
assert (thr && thr->dev);
|
||||
if (s == 0)
|
||||
return;
|
||||
|
||||
if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
||||
{
|
||||
if (from)
|
||||
memmove (h, d, s);
|
||||
else
|
||||
memmove (d, h, s);
|
||||
if (src == dst)
|
||||
return;
|
||||
memcpy (dst, src, s);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -199,10 +200,15 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
|
|||
}
|
||||
|
||||
goacc_aq aq = get_goacc_asyncqueue (async);
|
||||
if (from)
|
||||
gomp_copy_dev2host (thr->dev, aq, h, d, s);
|
||||
if (dev_to && dev_from)
|
||||
{
|
||||
if (dst != src)
|
||||
gomp_copy_dev2dev (thr->dev, aq, dst, src, s);
|
||||
}
|
||||
else if (dev_from)
|
||||
gomp_copy_dev2host (thr->dev, aq, dst, src, s);
|
||||
else
|
||||
gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL);
|
||||
gomp_copy_host2dev (thr->dev, aq, dst, src, s, false, /* TODO: cbuf? */ NULL);
|
||||
|
||||
if (profiling_p)
|
||||
{
|
||||
|
@ -214,25 +220,37 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
|
|||
void
|
||||
acc_memcpy_to_device (void *d, void *h, size_t s)
|
||||
{
|
||||
memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
|
||||
memcpy_tofrom_device (true, false, d, h, s, acc_async_sync, __FUNCTION__);
|
||||
}
|
||||
|
||||
void
|
||||
acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
|
||||
{
|
||||
memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
|
||||
memcpy_tofrom_device (true, false, d, h, s, async, __FUNCTION__);
|
||||
}
|
||||
|
||||
void
|
||||
acc_memcpy_from_device (void *h, void *d, size_t s)
|
||||
{
|
||||
memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
|
||||
memcpy_tofrom_device (false, true, h, d, s, acc_async_sync, __FUNCTION__);
|
||||
}
|
||||
|
||||
void
|
||||
acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
|
||||
{
|
||||
memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
|
||||
memcpy_tofrom_device (false, true, h, d, s, async, __FUNCTION__);
|
||||
}
|
||||
|
||||
void
|
||||
acc_memcpy_device (void *dst, void *src, size_t s)
|
||||
{
|
||||
memcpy_tofrom_device (true, true, dst, src, s, acc_async_sync, __FUNCTION__);
|
||||
}
|
||||
|
||||
void
|
||||
acc_memcpy_device_async (void *dst, void *src, size_t s, int async)
|
||||
{
|
||||
memcpy_tofrom_device (true, true, dst, src, s, async, __FUNCTION__);
|
||||
}
|
||||
|
||||
/* Return the device pointer that corresponds to host data H. Or NULL
|
||||
|
|
|
@ -797,6 +797,7 @@ module openacc
|
|||
public :: acc_copyout_finalize, acc_delete_finalize
|
||||
public :: acc_memcpy_to_device, acc_memcpy_to_device_async
|
||||
public :: acc_memcpy_from_device, acc_memcpy_from_device_async
|
||||
public :: acc_memcpy_device, acc_memcpy_device_async
|
||||
|
||||
integer, parameter :: openacc_version = 201711
|
||||
|
||||
|
@ -1046,6 +1047,27 @@ module openacc
|
|||
end subroutine
|
||||
end interface
|
||||
|
||||
interface
|
||||
subroutine acc_memcpy_device (data_dev_dest, data_dev_src, bytes) bind(C)
|
||||
use iso_c_binding, only: c_ptr, c_size_t
|
||||
type(c_ptr), value :: data_dev_dest
|
||||
type(c_ptr), value :: data_dev_src
|
||||
integer(c_size_t), value :: bytes
|
||||
end subroutine
|
||||
end interface
|
||||
|
||||
interface
|
||||
subroutine acc_memcpy_device_async (data_dev_dest, data_dev_src, &
|
||||
bytes, async_arg) bind(C)
|
||||
use iso_c_binding, only: c_ptr, c_size_t
|
||||
import :: acc_handle_kind
|
||||
type(c_ptr), value :: data_dev_dest
|
||||
type(c_ptr), value :: data_dev_src
|
||||
integer(c_size_t), value :: bytes
|
||||
integer(acc_handle_kind), value :: async_arg
|
||||
end subroutine
|
||||
end interface
|
||||
|
||||
interface acc_copyin_async
|
||||
procedure :: acc_copyin_async_32_h
|
||||
procedure :: acc_copyin_async_64_h
|
||||
|
|
|
@ -123,6 +123,7 @@ void *acc_hostptr (void *) __GOACC_NOTHROW;
|
|||
int acc_is_present (void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_memcpy_device (void *, void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_attach (void **) __GOACC_NOTHROW;
|
||||
void acc_attach_async (void **, int) __GOACC_NOTHROW;
|
||||
void acc_detach (void **) __GOACC_NOTHROW;
|
||||
|
@ -136,7 +137,7 @@ void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
|
|||
void acc_detach_finalize (void **) __GOACC_NOTHROW;
|
||||
void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW;
|
||||
|
||||
/* Async functions, specified in OpenACC 2.5. */
|
||||
/* Async functions, specified in OpenACC 2.5, acc_memcpy_device in 2.6. */
|
||||
void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW;
|
||||
void acc_create_async (void *, size_t, int) __GOACC_NOTHROW;
|
||||
void acc_copyout_async (void *, size_t, int) __GOACC_NOTHROW;
|
||||
|
@ -145,6 +146,7 @@ void acc_update_device_async (void *, size_t, int) __GOACC_NOTHROW;
|
|||
void acc_update_self_async (void *, size_t, int) __GOACC_NOTHROW;
|
||||
void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
|
||||
void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
|
||||
void acc_memcpy_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
|
||||
|
||||
/* CUDA-specific routines. */
|
||||
void *acc_get_current_cuda_device (void) __GOACC_NOTHROW;
|
||||
|
|
|
@ -528,6 +528,30 @@
|
|||
end subroutine
|
||||
end interface
|
||||
|
||||
interface
|
||||
subroutine acc_memcpy_device(data_dev_dest, data_dev_src, &
|
||||
& bytes) bind(C)
|
||||
use iso_c_binding, only: c_ptr, c_size_t
|
||||
type(c_ptr), value :: data_dev_dest
|
||||
type(c_ptr), value :: data_dev_src
|
||||
integer(c_size_t), value :: bytes
|
||||
end subroutine
|
||||
end interface
|
||||
|
||||
interface
|
||||
subroutine acc_memcpy_device_async(data_dev_dest, &
|
||||
& data_dev_src, bytes, &
|
||||
& async_arg) bind(C)
|
||||
use iso_c_binding, only: c_ptr, c_size_t
|
||||
import :: acc_handle_kind
|
||||
type(c_ptr), value :: data_dev_dest
|
||||
type(c_ptr), value :: data_dev_src
|
||||
integer(c_size_t), value :: bytes
|
||||
integer(acc_handle_kind), value :: async_arg
|
||||
end subroutine
|
||||
end interface
|
||||
|
||||
|
||||
interface acc_copyin_async
|
||||
subroutine acc_copyin_async_32_h (a, len, async)
|
||||
use iso_c_binding, only: c_int32_t
|
||||
|
|
|
@ -5079,7 +5079,8 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq,
|
|||
queue_push_callback (aq, fn, data);
|
||||
}
|
||||
|
||||
/* Queue up an asynchronous data copy from host to DEVICE. */
|
||||
/* Queue up an asynchronous data copy from host to DEVICE.
|
||||
(Also handles dev2host and dev2dev.) */
|
||||
|
||||
bool
|
||||
GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src,
|
||||
|
@ -5097,10 +5098,16 @@ bool
|
|||
GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src,
|
||||
size_t n, struct goacc_asyncqueue *aq)
|
||||
{
|
||||
struct agent_info *agent = get_agent_info (device);
|
||||
assert (agent == aq->agent);
|
||||
queue_push_copy (aq, dst, src, n);
|
||||
return true;
|
||||
return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq);
|
||||
}
|
||||
|
||||
/* Queue up an asynchronous data copy from DEVICE to DEVICE. */
|
||||
|
||||
bool
|
||||
GOMP_OFFLOAD_openacc_async_dev2dev (int device, void *dst, const void *src,
|
||||
size_t n, struct goacc_asyncqueue *aq)
|
||||
{
|
||||
return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq);
|
||||
}
|
||||
|
||||
union goacc_property_value
|
||||
|
|
|
@ -2018,6 +2018,34 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq,
|
|||
cuda_callback_wrapper, (void *) b, 0);
|
||||
}
|
||||
|
||||
static bool
|
||||
cuda_memcpy_dev_sanity_check (const void *d1, const void *d2, size_t s)
|
||||
{
|
||||
CUdeviceptr pb1, pb2;
|
||||
size_t ps1, ps2;
|
||||
if (!s)
|
||||
return true;
|
||||
if (!d1 || !d2)
|
||||
{
|
||||
GOMP_PLUGIN_error ("invalid device address");
|
||||
return false;
|
||||
}
|
||||
CUDA_CALL (cuMemGetAddressRange, &pb1, &ps1, (CUdeviceptr) d1);
|
||||
CUDA_CALL (cuMemGetAddressRange, &pb2, &ps2, (CUdeviceptr) d2);
|
||||
if (!pb1 || !pb2)
|
||||
{
|
||||
GOMP_PLUGIN_error ("invalid device address");
|
||||
return false;
|
||||
}
|
||||
if ((void *)(d1 + s) > (void *)(pb1 + ps1)
|
||||
|| (void *)(d2 + s) > (void *)(pb2 + ps2))
|
||||
{
|
||||
GOMP_PLUGIN_error ("invalid size");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
cuda_memcpy_sanity_check (const void *h, const void *d, size_t s)
|
||||
{
|
||||
|
@ -2077,6 +2105,9 @@ GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
|
|||
bool
|
||||
GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
|
||||
{
|
||||
if (!nvptx_attach_host_thread_to_device (ord)
|
||||
|| !cuda_memcpy_dev_sanity_check (dst, src, n))
|
||||
return false;
|
||||
CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, NULL);
|
||||
return true;
|
||||
}
|
||||
|
@ -2288,6 +2319,18 @@ GOMP_OFFLOAD_openacc_async_dev2host (int ord, void *dst, const void *src,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GOMP_OFFLOAD_openacc_async_dev2dev (int ord, void *dst, const void *src,
|
||||
size_t n, struct goacc_asyncqueue *aq)
|
||||
{
|
||||
if (!nvptx_attach_host_thread_to_device (ord)
|
||||
|| !cuda_memcpy_dev_sanity_check (dst, src, n))
|
||||
return false;
|
||||
CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n,
|
||||
aq->cuda_stream);
|
||||
return true;
|
||||
}
|
||||
|
||||
union goacc_property_value
|
||||
GOMP_OFFLOAD_openacc_get_property (int n, enum goacc_property prop)
|
||||
{
|
||||
|
|
|
@ -461,6 +461,19 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep,
|
|||
gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
|
||||
}
|
||||
|
||||
attribute_hidden void
|
||||
gomp_copy_dev2dev (struct gomp_device_descr *devicep,
|
||||
struct goacc_asyncqueue *aq,
|
||||
void *dst, const void *src, size_t sz)
|
||||
{
|
||||
if (__builtin_expect (aq != NULL, 0))
|
||||
goacc_device_copy_async (devicep, devicep->openacc.async.dev2dev_func,
|
||||
"dev", dst, "dev", src, NULL, sz, aq);
|
||||
else
|
||||
gomp_device_copy (devicep, devicep->dev2dev_func, "dev", dst,
|
||||
"dev", src, sz);
|
||||
}
|
||||
|
||||
static void
|
||||
gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
|
||||
{
|
||||
|
@ -5573,6 +5586,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
|
|||
|| !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
|
||||
|| !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
|
||||
|| !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
|
||||
|| !DLSYM_OPT (openacc.async.dev2dev, openacc_async_dev2dev)
|
||||
|| !DLSYM_OPT (openacc.get_property, openacc_get_property))
|
||||
{
|
||||
/* Require all the OpenACC handlers if we have
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
/* { dg-prune-output "using .vector_length \\(32\\)" } */
|
||||
|
||||
/* PR libgomp/93226 */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <openacc.h>
|
||||
|
||||
enum { N = 1024 };
|
||||
|
||||
static int D[N];
|
||||
#pragma acc declare device_resident(D)
|
||||
|
||||
#pragma acc routine
|
||||
intptr_t init_d()
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
D[i] = 27*i;
|
||||
return (intptr_t) &D[0];
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int *a, *b, *e;
|
||||
void *d_a, *d_b, *d_c, *d_d, *d_e, *d_f;
|
||||
intptr_t intptr;
|
||||
bool fail = false;
|
||||
|
||||
a = (int *) malloc (N*sizeof (int));
|
||||
b = (int *) malloc (N*sizeof (int));
|
||||
e = (int *) malloc (N*sizeof (int));
|
||||
d_c = acc_malloc (N*sizeof (int));
|
||||
d_f = acc_malloc (N*sizeof (int));
|
||||
|
||||
memset (e, 0xff, N*sizeof (int));
|
||||
d_e = acc_copyin (e, N*sizeof (int));
|
||||
|
||||
#pragma acc serial copyout(intptr)
|
||||
intptr = init_d ();
|
||||
d_d = (void*) intptr;
|
||||
acc_memcpy_device (d_c, d_d, N*sizeof (int));
|
||||
|
||||
#pragma acc serial copy(fail) deviceptr(d_c) firstprivate(intptr)
|
||||
{
|
||||
int *cc = (int *) d_c;
|
||||
int *dd = (int *) intptr;
|
||||
for (int i = 0; i < N; i++)
|
||||
if (dd[i] != 27*i || cc[i] != 27*i)
|
||||
{
|
||||
fail = true;
|
||||
__builtin_abort ();
|
||||
}
|
||||
}
|
||||
if (fail) __builtin_abort ();
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
a[i] = 11*i;
|
||||
for (int i = 0; i < N; i++)
|
||||
b[i] = 31*i;
|
||||
|
||||
d_a = acc_copyin (a, N*sizeof (int));
|
||||
acc_copyin_async (b, N*sizeof (int), acc_async_noval);
|
||||
|
||||
#pragma acc parallel deviceptr(d_c) async
|
||||
{
|
||||
int *cc = (int *) d_c;
|
||||
#pragma acc loop
|
||||
for (int i = 0; i < N; i++)
|
||||
cc[i] = -17*i;
|
||||
}
|
||||
|
||||
acc_memcpy_device_async (d_d, d_a, N*sizeof (int), acc_async_noval);
|
||||
acc_memcpy_device_async (d_f, d_c, N*sizeof (int), acc_async_noval);
|
||||
acc_wait (acc_async_noval);
|
||||
d_b = acc_deviceptr (b);
|
||||
acc_memcpy_device_async (d_e, d_b, N*sizeof (int), acc_async_noval);
|
||||
acc_wait (acc_async_noval);
|
||||
|
||||
#pragma acc serial deviceptr(d_d, d_e, d_f) copy(fail)
|
||||
{
|
||||
int *dd = (int *) d_d;
|
||||
int *ee = (int *) d_e;
|
||||
int *ff = (int *) d_f;
|
||||
for (int i = 0; i < N; i++)
|
||||
if (dd[i] != 11*i
|
||||
|| ee[i] != 31*i
|
||||
|| ff[i] != -17*i)
|
||||
{
|
||||
fail = true;
|
||||
__builtin_abort ();
|
||||
}
|
||||
}
|
||||
if (fail) __builtin_abort ();
|
||||
}
|
|
@ -0,0 +1,113 @@
|
|||
! { dg-prune-output "using .vector_length \\(32\\)" }
|
||||
|
||||
! PR libgomp/93226 */
|
||||
|
||||
module m
|
||||
use iso_c_binding
|
||||
use openacc
|
||||
implicit none (external, type)
|
||||
|
||||
integer, parameter :: N = 1024
|
||||
|
||||
integer :: D(N)
|
||||
!$acc declare device_resident(D)
|
||||
|
||||
contains
|
||||
|
||||
integer(c_intptr_t) function init_d()
|
||||
!$acc routine
|
||||
integer :: i
|
||||
do i = 1, N
|
||||
D(i) = 27*i
|
||||
end do
|
||||
init_d = loc(D)
|
||||
end
|
||||
end module
|
||||
|
||||
program main
|
||||
use m
|
||||
implicit none (external, type)
|
||||
|
||||
integer, allocatable, target :: a(:), b(:), e(:)
|
||||
type(c_ptr) :: d_a, d_b, d_c, d_d, d_e, d_f
|
||||
integer(c_intptr_t) intptr
|
||||
integer :: i
|
||||
logical fail
|
||||
|
||||
fail = .false.
|
||||
|
||||
allocate(a(N), b(N), e(N))
|
||||
d_c = acc_malloc (N*c_sizeof (i))
|
||||
d_f = acc_malloc (N*c_sizeof (i))
|
||||
|
||||
e = huge(e)
|
||||
call acc_copyin (e, N*c_sizeof (i));
|
||||
d_e = acc_deviceptr (e);
|
||||
|
||||
!$acc serial copyout(intptr)
|
||||
intptr = init_d ()
|
||||
!$acc end serial
|
||||
d_d = transfer(intptr, d_d)
|
||||
call acc_memcpy_device (d_c, d_d, N*c_sizeof (i))
|
||||
|
||||
!$acc serial copy(fail) copy(a) deviceptr(d_c, d_d) firstprivate(intptr)
|
||||
block
|
||||
integer, pointer :: cc(:), dd(:)
|
||||
call c_f_pointer (d_c, cc, [N])
|
||||
call c_f_pointer (d_d, dd, [N])
|
||||
a = cc
|
||||
do i = 1, N
|
||||
if (dd(i) /= 27*i .or. cc(i) /= 27*i) then
|
||||
fail = .true.
|
||||
stop 1
|
||||
end if
|
||||
end do
|
||||
end block
|
||||
!$acc end serial
|
||||
if (fail) error stop 1
|
||||
|
||||
do i = 1, N
|
||||
a(i) = 11*i
|
||||
b(i) = 31*i
|
||||
end do
|
||||
|
||||
call acc_copyin (a, N*c_sizeof (i))
|
||||
d_a = acc_deviceptr (a)
|
||||
call acc_copyin_async (b, N*c_sizeof (i), acc_async_noval)
|
||||
|
||||
!$acc parallel deviceptr(d_c) private(i) async
|
||||
block
|
||||
integer, pointer :: cc(:)
|
||||
call c_f_pointer (d_c, cc, [N])
|
||||
!$acc loop
|
||||
do i = 1, N
|
||||
cc(i) = -17*i
|
||||
end do
|
||||
end block
|
||||
!$acc end parallel
|
||||
|
||||
call acc_memcpy_device_async (d_d, d_a, N*c_sizeof (i), acc_async_noval)
|
||||
call acc_memcpy_device_async (d_f, d_c, N*c_sizeof (i), acc_async_noval)
|
||||
call acc_wait (acc_async_noval)
|
||||
d_b = acc_deviceptr (b)
|
||||
call acc_memcpy_device_async (d_e, d_b, N*c_sizeof (i), acc_async_noval)
|
||||
call acc_wait (acc_async_noval)
|
||||
|
||||
!$acc serial deviceptr(d_d, d_e, d_f) private(i) copy(fail)
|
||||
block
|
||||
integer, pointer :: dd(:), ee(:), ff(:)
|
||||
call c_f_pointer (d_d, dd, [N])
|
||||
call c_f_pointer (d_e, ee, [N])
|
||||
call c_f_pointer (d_f, ff, [N])
|
||||
do i = 1, N
|
||||
if (dd(i) /= 11*i &
|
||||
.or. ee(i) /= 31*i &
|
||||
.or. ff(i) /= -17*i) then
|
||||
fail = .true.
|
||||
stop 2
|
||||
end if
|
||||
end do
|
||||
end block
|
||||
!$acc end serial
|
||||
if (fail) error stop 2
|
||||
end
|
Loading…
Reference in New Issue