mirror of git://gcc.gnu.org/git/gcc.git
libgomp: cuda.h and omp_target_memcpy_rect cleanup
Fixes for commit r14-2792-g25072a477a56a727b369bf9b20f4d18198ff5894
"OpenMP: Call cuMemcpy2D/cuMemcpy3D for nvptx for omp_target_memcpy_rect",
namely:
In that commit, the code was changed to handle shared-memory devices;
however, as pointed out, omp_target_memcpy_check already set the pointer
to NULL in that case. Hence, this commit reverts to the prior version.
In cuda.h, it adds cuMemcpyPeer{,Async} for symmetry for cuMemcpy3DPeer
(all currently unused) and in three structs, fixes reserved-member names
and remove a bogus 'const' in three structs.
And it changes a DLSYM to DLSYM_OPT as not all plugins support the new
functions, yet.
include/ChangeLog:
* cuda/cuda.h (CUDA_MEMCPY2D, CUDA_MEMCPY3D, CUDA_MEMCPY3D_PEER):
Remove bogus 'const' from 'const void *dst' and fix reserved-name
name in those structs.
(cuMemcpyPeer, cuMemcpyPeerAsync): Add.
libgomp/ChangeLog:
* target.c (omp_target_memcpy_rect_worker): Undo dim=1 change for
GOMP_OFFLOAD_CAP_SHARED_MEM.
(omp_target_memcpy_rect_copy): Likewise for lock condition.
(gomp_load_plugin_for_device): Use DLSYM_OPT not DLSYM for
memcpy3d/memcpy2d.
* plugin/plugin-nvptx.c (GOMP_OFFLOAD_memcpy2d,
GOMP_OFFLOAD_memcpy3d): Use memset 0 to nullify reserved and
unused src/dst fields for that mem type; remove '{src,dst}LOD = 0'.
This commit is contained in:
parent
5ffa9d0a5e
commit
8b9e559fe7
|
|
@ -147,7 +147,7 @@ typedef struct {
|
|||
|
||||
size_t dstXInBytes, dstY;
|
||||
CUmemorytype dstMemoryType;
|
||||
const void *dstHost;
|
||||
void *dstHost;
|
||||
CUdeviceptr dstDevice;
|
||||
CUarray dstArray;
|
||||
size_t dstPitch;
|
||||
|
|
@ -162,16 +162,16 @@ typedef struct {
|
|||
const void *srcHost;
|
||||
CUdeviceptr srcDevice;
|
||||
CUarray srcArray;
|
||||
void *dummy;
|
||||
void *reserved0;
|
||||
size_t srcPitch, srcHeight;
|
||||
|
||||
size_t dstXInBytes, dstY, dstZ;
|
||||
size_t dstLOD;
|
||||
CUmemorytype dstMemoryType;
|
||||
const void *dstHost;
|
||||
void *dstHost;
|
||||
CUdeviceptr dstDevice;
|
||||
CUarray dstArray;
|
||||
void *dummy2;
|
||||
void *reserved1;
|
||||
size_t dstPitch, dstHeight;
|
||||
|
||||
size_t WidthInBytes, Height, Depth;
|
||||
|
|
@ -190,7 +190,7 @@ typedef struct {
|
|||
size_t dstXInBytes, dstY, dstZ;
|
||||
size_t dstLOD;
|
||||
CUmemorytype dstMemoryType;
|
||||
const void *dstHost;
|
||||
void *dstHost;
|
||||
CUdeviceptr dstDevice;
|
||||
CUarray dstArray;
|
||||
CUcontext dstContext;
|
||||
|
|
@ -246,6 +246,8 @@ CUresult cuMemAlloc (CUdeviceptr *, size_t);
|
|||
CUresult cuMemAllocHost (void **, size_t);
|
||||
CUresult cuMemHostAlloc (void **, size_t, unsigned int);
|
||||
CUresult cuMemcpy (CUdeviceptr, CUdeviceptr, size_t);
|
||||
CUresult cuMemcpyPeer (CUdeviceptr, CUcontext, CUdeviceptr, CUcontext, size_t);
|
||||
CUresult cuMemcpyPeerAsync (CUdeviceptr, CUcontext, CUdeviceptr, CUcontext, size_t, CUstream);
|
||||
#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
|
||||
CUresult cuMemcpyDtoDAsync (CUdeviceptr, CUdeviceptr, size_t, CUstream);
|
||||
#define cuMemcpyDtoH cuMemcpyDtoH_v2
|
||||
|
|
|
|||
|
|
@ -1794,6 +1794,8 @@ GOMP_OFFLOAD_memcpy2d (int dst_ord, int src_ord, size_t dim1_size,
|
|||
/* TODO: Consider using CU_MEMORYTYPE_UNIFIED if supported. */
|
||||
|
||||
CUDA_MEMCPY2D data;
|
||||
|
||||
memset (&data, 0, sizeof (data));
|
||||
data.WidthInBytes = dim1_size;
|
||||
data.Height = dim0_len;
|
||||
|
||||
|
|
@ -1855,6 +1857,8 @@ GOMP_OFFLOAD_memcpy3d (int dst_ord, int src_ord, size_t dim2_size,
|
|||
/* TODO: Consider using CU_MEMORYTYPE_UNIFIED if supported. */
|
||||
|
||||
CUDA_MEMCPY3D data;
|
||||
|
||||
memset (&data, 0, sizeof (data));
|
||||
data.WidthInBytes = dim2_size;
|
||||
data.Height = dim1_len;
|
||||
data.Depth = dim0_len;
|
||||
|
|
@ -1874,7 +1878,6 @@ GOMP_OFFLOAD_memcpy3d (int dst_ord, int src_ord, size_t dim2_size,
|
|||
data.dstXInBytes = dst_offset2_size;
|
||||
data.dstY = dst_offset1_len;
|
||||
data.dstZ = dst_offset0_len;
|
||||
data.dstLOD = 0;
|
||||
|
||||
if (src_ord == -1)
|
||||
{
|
||||
|
|
@ -1891,7 +1894,6 @@ GOMP_OFFLOAD_memcpy3d (int dst_ord, int src_ord, size_t dim2_size,
|
|||
data.srcXInBytes = src_offset2_size;
|
||||
data.srcY = src_offset1_len;
|
||||
data.srcZ = src_offset0_len;
|
||||
data.srcLOD = 0;
|
||||
|
||||
CUDA_CALL (cuMemcpy3D, &data);
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -4540,33 +4540,22 @@ omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
|
|||
|| __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
|
||||
|| __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
|
||||
return EINVAL;
|
||||
if (src_devicep != NULL && src_devicep == dst_devicep)
|
||||
ret = src_devicep->dev2dev_func (src_devicep->target_id,
|
||||
(char *) dst + dst_off,
|
||||
(const char *) src + src_off,
|
||||
length);
|
||||
else if (src_devicep != NULL
|
||||
&& (dst_devicep == NULL
|
||||
|| (dst_devicep->capabilities
|
||||
& GOMP_OFFLOAD_CAP_SHARED_MEM)))
|
||||
ret = src_devicep->dev2host_func (src_devicep->target_id,
|
||||
(char *) dst + dst_off,
|
||||
(const char *) src + src_off,
|
||||
length);
|
||||
else if (dst_devicep != NULL
|
||||
&& (src_devicep == NULL
|
||||
|| (src_devicep->capabilities
|
||||
& GOMP_OFFLOAD_CAP_SHARED_MEM)))
|
||||
ret = dst_devicep->host2dev_func (dst_devicep->target_id,
|
||||
(char *) dst + dst_off,
|
||||
(const char *) src + src_off,
|
||||
length);
|
||||
else if (dst_devicep == NULL && src_devicep == NULL)
|
||||
if (dst_devicep == NULL && src_devicep == NULL)
|
||||
{
|
||||
memcpy ((char *) dst + dst_off, (const char *) src + src_off,
|
||||
length);
|
||||
ret = 1;
|
||||
}
|
||||
else if (src_devicep == NULL)
|
||||
ret = dst_devicep->host2dev_func (dst_devicep->target_id,
|
||||
(char *) dst + dst_off,
|
||||
(const char *) src + src_off,
|
||||
length);
|
||||
else if (dst_devicep == NULL)
|
||||
ret = src_devicep->dev2host_func (src_devicep->target_id,
|
||||
(char *) dst + dst_off,
|
||||
(const char *) src + src_off,
|
||||
length);
|
||||
else if (src_devicep == dst_devicep)
|
||||
ret = src_devicep->dev2dev_func (src_devicep->target_id,
|
||||
(char *) dst + dst_off,
|
||||
|
|
@ -4584,7 +4573,8 @@ omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
|
|||
else if (*tmp_size < length)
|
||||
{
|
||||
*tmp_size = length;
|
||||
*tmp = realloc (*tmp, length);
|
||||
free (*tmp);
|
||||
*tmp = malloc (length);
|
||||
if (*tmp == NULL)
|
||||
return ENOMEM;
|
||||
}
|
||||
|
|
@ -4599,7 +4589,7 @@ omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
|
|||
return ret ? 0 : EINVAL;
|
||||
}
|
||||
|
||||
/* host->device, device->host and same-device device->device. */
|
||||
/* host->device, device->host and intra device. */
|
||||
if (num_dims == 2
|
||||
&& ((src_devicep
|
||||
&& src_devicep == dst_devicep
|
||||
|
|
@ -4711,16 +4701,8 @@ omp_target_memcpy_rect_copy (void *dst, const void *src,
|
|||
bool lock_src;
|
||||
bool lock_dst;
|
||||
|
||||
lock_src = (src_devicep
|
||||
&& (!dst_devicep
|
||||
|| src_devicep == dst_devicep
|
||||
|| !(src_devicep->capabilities
|
||||
& GOMP_OFFLOAD_CAP_SHARED_MEM)));
|
||||
lock_dst = (dst_devicep
|
||||
&& (!lock_src
|
||||
|| (src_devicep != dst_devicep
|
||||
&& !(dst_devicep->capabilities
|
||||
& GOMP_OFFLOAD_CAP_SHARED_MEM))));
|
||||
lock_src = src_devicep != NULL;
|
||||
lock_dst = dst_devicep != NULL && src_devicep != dst_devicep;
|
||||
if (lock_src)
|
||||
gomp_mutex_lock (&src_devicep->lock);
|
||||
if (lock_dst)
|
||||
|
|
@ -5076,8 +5058,8 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
|
|||
DLSYM (free);
|
||||
DLSYM (dev2host);
|
||||
DLSYM (host2dev);
|
||||
DLSYM (memcpy2d);
|
||||
DLSYM (memcpy3d);
|
||||
DLSYM_OPT (memcpy2d, memcpy2d);
|
||||
DLSYM_OPT (memcpy3d, memcpy3d);
|
||||
device->capabilities = device->get_caps_func ();
|
||||
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
||||
{
|
||||
|
|
|
|||
Loading…
Reference in New Issue