mirror of git://gcc.gnu.org/git/gcc.git
libgomp: Init hash table for 'indirect'-clause of 'declare target' on the host [PR114445, PR119857]
Especially with unified-shared memory and especially with C++'s virtual functions, it is not uncommon to have on the device a function pointer that points to the host function - but has an associated device. If the pointed-to function is (explicitly or implicitly) 'declare target' with the 'indirect' clause, it is added to the lookup table. Before this commit, the conversion of the lookup table into a lookup hash table happened every time a device kernel was launched on the first team - albeit if already converted, the function immediately returned. Ignoring the overhead, there was also a race: If multiple teams were launched, it could happen that another team of the same target region already tried to use the lookup table which it was still being created. Likewise when lauching a kernel with 'nowait' and directly afterward another kernel, there could be a race of creating the table. With this commit, the creating of the kernel has been moved to the host-plugin's GOMP_OFFLOAD_load_image. The previous code stored a pointer to the host/device pointer array, which makes it hard when creating the hash table on the host (data is needed for finding the slot) - but accessing it on the device (where the lookup has to work as well). As the hash-table implementation (only) supports integral value as payload (0 and 1 having special meaning), the solution was to move to an uint128_t variable to store both the host and device address. As the host-side library is typically dynamically linked and the device-side one statically, there is the problem of backward compatibility. The current implementation permits both older binaries and newer libgomp and newer binaries with older libgomp. I could imagine us breaking the latter eventually, but for now there is up and downward compatibility. (Obviously, the race is only fixed if new + new is combined.) Code wise, on the device exist GOMP_INDIRECT_ADDR_MAP which was updated to point to the host/device-address array. Now additionally GOMP_INDIRECT_ADDR_HMAP exists, which contains the hash-table map. If the latter exists, libgomp only updates it and the former remains a NULL pointer; it is also untouched if there are no indirect functions. Being NULL therefore avoids the call to the device-side build_indirect_map. The code also currently supports to have no hash and a linear walk. I think that remained from testing; due to the backward-compat feature, it can actually be turned of on either side. libgomp/ChangeLog: PR libgomp/119857 PR libgomp/114445 * config/accel/target-indirect.c: Change to use uint128_t instead of a struct as data structure and add GOMP_INDIRECT_ADDR_HMAP as host-accessible variable. (struct indirect_map_t): Remove. (USE_HASHTAB_LOOKUP, INDIRECT_DEV_ADDR, INDIRECT_HOST_ADDR, SET_INDIRECT_HOST_ADDR, SET_INDIRECT_ADDRS): Define. (htab_free): Use __builtin_unreachable. (htab_hash, htab_eq, GOMP_target_map_indirect_ptr, build_indirect_map): Update for new representation and new pointer-to-hash variable. * config/gcn/team.c (gomp_gcn_enter_kernel): Only call build_indirect_map when GOMP_INDIRECT_ADDR_MAP. * config/nvptx/team.c (gomp_nvptx_main): Likewise. * libgomp-plugin.h (GOMP_INDIRECT_ADDR_HMAP): Define. * plugin/plugin-gcn.c: Conditionally include build-target-indirect-htab.h. (USE_HASHTAB_LOOKUP_FOR_INDIRECT): Define. (create_target_indirect_map): New prototype. (GOMP_OFFLOAD_load_image): Update to create the device's indirect-function hash table on the host. * plugin/plugin-nvptx.c: Conditionally include build-target-indirect-htab.h. (USE_HASHTAB_LOOKUP_FOR_INDIRECT): Define. (create_target_indirect_map): New prototype. (GOMP_OFFLOAD_load_image): Update to create the device's indirect-function hash table on the host. * plugin/build-target-indirect-htab.h: New file.
This commit is contained in:
parent
16d2b8881c
commit
da5803c794
|
@ -25,73 +25,52 @@
|
|||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "libgomp.h"
|
||||
|
||||
struct indirect_map_t
|
||||
{
|
||||
void *host_addr;
|
||||
void *target_addr;
|
||||
};
|
||||
void *GOMP_INDIRECT_ADDR_MAP = NULL;
|
||||
|
||||
typedef struct indirect_map_t *hash_entry_type;
|
||||
#define USE_HASHTAB_LOOKUP
|
||||
|
||||
#ifdef USE_HASHTAB_LOOKUP
|
||||
|
||||
#include <string.h> /* For memset. */
|
||||
|
||||
/* Use a hashtab to lookup the target address instead of using a linear
|
||||
search.
|
||||
|
||||
With newer libgomp on the host the hash is already initialized on the host
|
||||
(i.e plugin/plugin-gcn.c). Thus, build_indirect_map is only used as
|
||||
fallback with older glibc. */
|
||||
|
||||
void *GOMP_INDIRECT_ADDR_HMAP = NULL;
|
||||
|
||||
typedef unsigned __int128 hash_entry_type;
|
||||
#define INDIRECT_DEV_ADDR(p) ((void*) (uintptr_t) (p >> 64))
|
||||
#define INDIRECT_HOST_ADDR(p) ((void *) (uintptr_t) p)
|
||||
#define SET_INDIRECT_HOST_ADDR(p, host) p = (((unsigned __int128) (uintptr_t) host))
|
||||
#define SET_INDIRECT_ADDRS(p, h, d) \
|
||||
p = (((unsigned __int128) h) + (((unsigned __int128) d) << 64))
|
||||
|
||||
/* Besides the sizes, also the endianness either needs to agree or
|
||||
host-device memcpy needs to take care of this. */
|
||||
_Static_assert (sizeof (unsigned __int128) == 2*sizeof(void*),
|
||||
"indirect_target_map_t size mismatch");
|
||||
|
||||
static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
|
||||
static inline void htab_free (void *ptr) { free (ptr); }
|
||||
static inline void htab_free (void *ptr) { __builtin_unreachable (); }
|
||||
|
||||
#include "hashtab.h"
|
||||
|
||||
static inline hashval_t
|
||||
htab_hash (hash_entry_type element)
|
||||
{
|
||||
return hash_pointer (element->host_addr);
|
||||
return hash_pointer (INDIRECT_HOST_ADDR (element));
|
||||
}
|
||||
|
||||
static inline bool
|
||||
htab_eq (hash_entry_type x, hash_entry_type y)
|
||||
{
|
||||
return x->host_addr == y->host_addr;
|
||||
}
|
||||
|
||||
void **GOMP_INDIRECT_ADDR_MAP = NULL;
|
||||
|
||||
/* Use a hashtab to lookup the target address instead of using a linear
|
||||
search. */
|
||||
#define USE_HASHTAB_LOOKUP
|
||||
|
||||
#ifdef USE_HASHTAB_LOOKUP
|
||||
|
||||
static htab_t indirect_htab = NULL;
|
||||
|
||||
/* Build the hashtab used for host->target address lookups. */
|
||||
|
||||
void
|
||||
build_indirect_map (void)
|
||||
{
|
||||
size_t num_ind_funcs = 0;
|
||||
void **map_entry;
|
||||
|
||||
if (!GOMP_INDIRECT_ADDR_MAP)
|
||||
return;
|
||||
|
||||
if (!indirect_htab)
|
||||
{
|
||||
/* Count the number of entries in the NULL-terminated address map. */
|
||||
for (map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
|
||||
map_entry += 2, num_ind_funcs++);
|
||||
|
||||
/* Build hashtab for address lookup. */
|
||||
indirect_htab = htab_create (num_ind_funcs);
|
||||
map_entry = GOMP_INDIRECT_ADDR_MAP;
|
||||
|
||||
for (int i = 0; i < num_ind_funcs; i++, map_entry += 2)
|
||||
{
|
||||
struct indirect_map_t element = { *map_entry, NULL };
|
||||
hash_entry_type *slot = htab_find_slot (&indirect_htab, &element,
|
||||
INSERT);
|
||||
*slot = (hash_entry_type) map_entry;
|
||||
}
|
||||
}
|
||||
return INDIRECT_HOST_ADDR (x) == INDIRECT_HOST_ADDR (y);
|
||||
}
|
||||
|
||||
void *
|
||||
|
@ -101,11 +80,42 @@ GOMP_target_map_indirect_ptr (void *ptr)
|
|||
if (!ptr)
|
||||
return ptr;
|
||||
|
||||
assert (indirect_htab);
|
||||
assert (GOMP_INDIRECT_ADDR_HMAP);
|
||||
|
||||
struct indirect_map_t element = { ptr, NULL };
|
||||
hash_entry_type entry = htab_find (indirect_htab, &element);
|
||||
return entry ? entry->target_addr : ptr;
|
||||
hash_entry_type element;
|
||||
SET_INDIRECT_HOST_ADDR (element, ptr);
|
||||
hash_entry_type entry = htab_find ((htab_t) GOMP_INDIRECT_ADDR_HMAP, element);
|
||||
return entry ? INDIRECT_DEV_ADDR (entry) : ptr;
|
||||
}
|
||||
|
||||
/* Build the hashtab used for host->target address lookups. */
|
||||
|
||||
void
|
||||
build_indirect_map (void)
|
||||
{
|
||||
size_t num_ind_funcs = 0;
|
||||
uint64_t *map_entry;
|
||||
|
||||
if (!GOMP_INDIRECT_ADDR_MAP || GOMP_INDIRECT_ADDR_HMAP)
|
||||
return;
|
||||
|
||||
/* Count the number of entries in the NULL-terminated address map. */
|
||||
for (map_entry = (uint64_t *) GOMP_INDIRECT_ADDR_MAP; *map_entry;
|
||||
map_entry += 2, num_ind_funcs++);
|
||||
|
||||
/* Build hashtab for address lookup. */
|
||||
htab_t indirect_htab = htab_create (num_ind_funcs);
|
||||
GOMP_INDIRECT_ADDR_HMAP = (void *) indirect_htab;
|
||||
|
||||
map_entry = GOMP_INDIRECT_ADDR_MAP;
|
||||
for (int i = 0; i < num_ind_funcs; i++, map_entry += 2)
|
||||
{
|
||||
hash_entry_type element;
|
||||
SET_INDIRECT_ADDRS (element, *map_entry, *(map_entry + 1));
|
||||
hash_entry_type *slot = htab_find_slot (&indirect_htab, element,
|
||||
INSERT);
|
||||
*slot = element;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
|
|
@ -32,6 +32,10 @@
|
|||
#define LITTLEENDIAN_CPU
|
||||
#include "hsa.h"
|
||||
|
||||
#define UNLIKELY(x) (__builtin_expect ((x), 0))
|
||||
|
||||
extern void *GOMP_INDIRECT_ADDR_MAP;
|
||||
|
||||
/* Defined in basic-allocator.c via config/amdgcn/allocator.c. */
|
||||
void __gcn_lowlat_init (void *heap, size_t size);
|
||||
|
||||
|
@ -57,8 +61,8 @@ gomp_gcn_enter_kernel (void)
|
|||
int numthreads = __builtin_gcn_dim_size (1);
|
||||
int teamid = __builtin_gcn_dim_pos(0);
|
||||
|
||||
/* Initialize indirect function support. */
|
||||
if (teamid == 0)
|
||||
/* Initialize indirect function support for older libgomp. */
|
||||
if (UNLIKELY (GOMP_INDIRECT_ADDR_MAP != NULL && teamid == 0))
|
||||
build_indirect_map ();
|
||||
|
||||
/* Set up the global state.
|
||||
|
|
|
@ -31,6 +31,10 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define UNLIKELY(x) (__builtin_expect ((x), 0))
|
||||
|
||||
extern void *GOMP_INDIRECT_ADDR_MAP;
|
||||
|
||||
struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
|
||||
int __gomp_team_num __attribute__((shared,nocommon));
|
||||
|
||||
|
@ -71,10 +75,10 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data)
|
|||
nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
|
||||
memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
|
||||
|
||||
/* Initialize indirect function support. */
|
||||
/* Initialize indirect function support for older libgomp. */
|
||||
unsigned int block_id;
|
||||
asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
|
||||
if (block_id == 0)
|
||||
if (UNLIKELY (GOMP_INDIRECT_ADDR_MAP != NULL && block_id == 0))
|
||||
build_indirect_map ();
|
||||
|
||||
/* Find the low-latency heap details .... */
|
||||
|
|
|
@ -134,7 +134,11 @@ enum gomp_interop_flag
|
|||
must be stringified). */
|
||||
#define GOMP_ADDITIONAL_ICVS __gomp_additional_icvs
|
||||
|
||||
/* GOMP_INDIRECT_ADDR_HMAP points to a hash table and is to be used by
|
||||
newer libgomp, while GOMP_INDIRECT_ADDR_MAP points to a linear table
|
||||
and exists for backward compatibility. */
|
||||
#define GOMP_INDIRECT_ADDR_MAP __gomp_indirect_addr_map
|
||||
#define GOMP_INDIRECT_ADDR_HMAP __gomp_indirect_addr_hmap
|
||||
|
||||
/* Miscellaneous functions. */
|
||||
extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc));
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
/* Copyright (C) 2023-2025 Free Software Foundation, Inc.
|
||||
|
||||
Contributed by Siemens.
|
||||
|
||||
This file is part of the GNU Offloading and Multi Processing Library
|
||||
(libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
|
||||
/* This file is used to create a hash table on the host that is supposed
|
||||
to get use on the device - that's for the 'indirect' clause feature.
|
||||
|
||||
In order to habe host initialization work, the pointer sizes must be
|
||||
the same - and either the the endianess or the host-device memcopy
|
||||
has to take of it. */
|
||||
|
||||
typedef unsigned __int128 hash_entry_type;
|
||||
#define INDIRECT_HOST_ADDR(p) ((void *) (uintptr_t) p)
|
||||
#define INDIRECT_DEV_ADDR(p) ((void*) (uintptr_t) (p >> 64))
|
||||
#define SET_INDIRECT_ADDRS(p, h, d) \
|
||||
p = (((unsigned __int128) h) + (((unsigned __int128) d) << 64))
|
||||
|
||||
_Static_assert (sizeof (unsigned __int128) == 2 * sizeof (void*),
|
||||
"hash_entry_type size mismatch");
|
||||
|
||||
static inline void *htab_alloc (size_t size) {
|
||||
return malloc (size);
|
||||
}
|
||||
|
||||
static inline void htab_free (void *ptr) { free (ptr); }
|
||||
|
||||
#include "hashtab.h"
|
||||
|
||||
static inline hashval_t
|
||||
htab_hash (hash_entry_type element)
|
||||
{
|
||||
return hash_pointer (INDIRECT_HOST_ADDR (element));
|
||||
}
|
||||
|
||||
static inline bool
|
||||
htab_eq (hash_entry_type x, hash_entry_type y)
|
||||
{
|
||||
return INDIRECT_HOST_ADDR (x) == INDIRECT_HOST_ADDR (x);
|
||||
}
|
||||
|
||||
void*
|
||||
create_target_indirect_map (size_t *h_size, size_t count,
|
||||
uint64_t *host_addrs, uint64_t *device_addrs)
|
||||
{
|
||||
assert (htab_find); /* Silence -Werror=unused-function. */
|
||||
|
||||
htab_t indirect_htab = htab_create (count);
|
||||
|
||||
hash_entry_type element;
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
SET_INDIRECT_ADDRS (element, host_addrs[i], device_addrs[i]);
|
||||
hash_entry_type *slot = htab_find_slot (&indirect_htab, element,
|
||||
INSERT);
|
||||
*slot = element;
|
||||
}
|
||||
*h_size = (sizeof (struct htab)
|
||||
+ htab_size (indirect_htab) * sizeof (hash_entry_type));
|
||||
return (void*) indirect_htab;
|
||||
}
|
|
@ -51,6 +51,14 @@
|
|||
#include "oacc-int.h"
|
||||
#include <assert.h>
|
||||
|
||||
/* Create hash-table for declare target's indirect clause on the host;
|
||||
see build-target-indirect-htab.h for details. */
|
||||
#define USE_HASHTAB_LOOKUP_FOR_INDIRECT
|
||||
#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
|
||||
static void* create_target_indirect_map (size_t *, size_t,
|
||||
uint64_t *, uint64_t *);
|
||||
#endif
|
||||
|
||||
/* These probably won't be in elf.h for a while. */
|
||||
#ifndef R_AMDGPU_NONE
|
||||
#define R_AMDGPU_NONE 0
|
||||
|
@ -3688,37 +3696,28 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
|
|||
(void*) ind_funcs_table_addr,
|
||||
sizeof (ind_funcs_table));
|
||||
|
||||
/* Build host->target address map for indirect functions. */
|
||||
uint64_t ind_fn_map[ind_func_count * 2 + 1];
|
||||
for (unsigned i = 0; i < ind_func_count; i++)
|
||||
{
|
||||
ind_fn_map[i * 2] = host_ind_fn_table[i];
|
||||
ind_fn_map[i * 2 + 1] = ind_funcs_table[i];
|
||||
GCN_DEBUG ("Indirect function %d: %lx->%lx\n",
|
||||
i, host_ind_fn_table[i], ind_funcs_table[i]);
|
||||
}
|
||||
ind_fn_map[ind_func_count * 2] = 0;
|
||||
/* For newer binaries, the hash table for 'indirect' is created on the
|
||||
host. Older binaries don't have GOMP_INDIRECT_ADDR_HMAP on the
|
||||
device side - and have to create the table themselves using
|
||||
GOMP_INDIRECT_ADDR_MAP. */
|
||||
|
||||
/* Write the map onto the target. */
|
||||
void *map_target_addr
|
||||
= GOMP_OFFLOAD_alloc (agent->device_id, sizeof (ind_fn_map));
|
||||
GCN_DEBUG ("Allocated indirect map at %p\n", map_target_addr);
|
||||
|
||||
GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr,
|
||||
(void*) ind_fn_map,
|
||||
sizeof (ind_fn_map));
|
||||
|
||||
/* Write address of the map onto the target. */
|
||||
hsa_executable_symbol_t symbol;
|
||||
|
||||
bool host_init_htab = true;
|
||||
#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
|
||||
status
|
||||
= hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
|
||||
XSTRING (GOMP_INDIRECT_ADDR_MAP),
|
||||
XSTRING (GOMP_INDIRECT_ADDR_HMAP),
|
||||
agent->id, 0, &symbol);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
#endif
|
||||
{
|
||||
host_init_htab = false;
|
||||
status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
|
||||
XSTRING (GOMP_INDIRECT_ADDR_MAP), agent->id, 0, &symbol);
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
hsa_fatal ("Could not find GOMP_INDIRECT_ADDR_MAP in code object",
|
||||
status);
|
||||
|
||||
uint64_t varptr;
|
||||
uint32_t varsize;
|
||||
|
||||
|
@ -3734,9 +3733,51 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
|
|||
hsa_fatal ("Could not extract a variable size from its symbol",
|
||||
status);
|
||||
|
||||
GCN_DEBUG ("Found GOMP_INDIRECT_ADDR_MAP at %lx with size %d\n",
|
||||
varptr, varsize);
|
||||
GCN_DEBUG ("Found GOMP_INDIRECT_ADDR_%sMAP at %lx with size %d\n",
|
||||
host_init_htab ? "H" : "", varptr, varsize);
|
||||
|
||||
void *map_target_addr;
|
||||
if (!host_init_htab)
|
||||
{
|
||||
/* Build host->target address map for indirect functions. */
|
||||
uint64_t ind_fn_map[ind_func_count * 2 + 1];
|
||||
for (unsigned i = 0; i < ind_func_count; i++)
|
||||
{
|
||||
ind_fn_map[i * 2] = host_ind_fn_table[i];
|
||||
ind_fn_map[i * 2 + 1] = ind_funcs_table[i];
|
||||
GCN_DEBUG ("Indirect function %d: %lx->%lx\n",
|
||||
i, host_ind_fn_table[i], ind_funcs_table[i]);
|
||||
}
|
||||
ind_fn_map[ind_func_count * 2] = 0;
|
||||
/* Write the map onto the target. */
|
||||
map_target_addr = GOMP_OFFLOAD_alloc (agent->device_id,
|
||||
sizeof (ind_fn_map));
|
||||
GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr,
|
||||
(void*) ind_fn_map, sizeof (ind_fn_map));
|
||||
}
|
||||
#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
|
||||
else
|
||||
{
|
||||
/* FIXME: Handle multi-kernel load and unload, cf. PR 114690. */
|
||||
size_t host_map_size;
|
||||
void *host_map;
|
||||
host_map = create_target_indirect_map (&host_map_size, ind_func_count,
|
||||
host_ind_fn_table,
|
||||
ind_funcs_table);
|
||||
for (unsigned i = 0; i < ind_func_count; i++)
|
||||
GCN_DEBUG ("Indirect function %d: %lx->%lx\n",
|
||||
i, host_ind_fn_table[i], ind_funcs_table[i]);
|
||||
/* Write the map onto the target. */
|
||||
map_target_addr = GOMP_OFFLOAD_alloc (agent->device_id,
|
||||
host_map_size);
|
||||
GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr,
|
||||
host_map, host_map_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
GCN_DEBUG ("Allocated indirect map at %p\n", map_target_addr);
|
||||
|
||||
/* Write address of the map onto the target. */
|
||||
GOMP_OFFLOAD_host2dev (agent->device_id, (void *) varptr,
|
||||
&map_target_addr,
|
||||
sizeof (map_target_addr));
|
||||
|
@ -5247,4 +5288,8 @@ GOMP_OFFLOAD_openacc_destroy_thread_data (void *data)
|
|||
free (data);
|
||||
}
|
||||
|
||||
#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
|
||||
#include "build-target-indirect-htab.h"
|
||||
#endif
|
||||
|
||||
/* }}} */
|
||||
|
|
|
@ -60,6 +60,14 @@
|
|||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Create hash-table for declare target's indirect clause on the host;
|
||||
see build-target-indirect-htab.h for details. */
|
||||
#define USE_HASHTAB_LOOKUP_FOR_INDIRECT
|
||||
#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
|
||||
static void* create_target_indirect_map (size_t *, size_t,
|
||||
uint64_t *, uint64_t *);
|
||||
#endif
|
||||
|
||||
/* An arbitrary fixed limit (128MB) for the size of the OpenMP soft stacks
|
||||
block to cache between kernel invocations. For soft-stacks blocks bigger
|
||||
than this, we will free the block before attempting another GPU memory
|
||||
|
@ -1626,39 +1634,71 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
|
|||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("cuMemcpyDtoH error: %s", cuda_error (r));
|
||||
|
||||
/* Build host->target address map for indirect functions. */
|
||||
uint64_t ind_fn_map[ind_fn_entries * 2 + 1];
|
||||
for (unsigned k = 0; k < ind_fn_entries; k++)
|
||||
{
|
||||
ind_fn_map[k * 2] = host_ind_fn_table[k];
|
||||
ind_fn_map[k * 2 + 1] = ind_fn_table[k];
|
||||
GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n",
|
||||
k, host_ind_fn_table[k], ind_fn_table[k]);
|
||||
}
|
||||
ind_fn_map[ind_fn_entries * 2] = 0;
|
||||
/* For newer binaries, the hash table for 'indirect' is created on the
|
||||
host. Older binaries don't have GOMP_INDIRECT_ADDR_HMAP on the
|
||||
device side - and have to create the table themselves using
|
||||
GOMP_INDIRECT_ADDR_MAP. */
|
||||
|
||||
/* Write the map onto the target. */
|
||||
void *map_target_addr
|
||||
= GOMP_OFFLOAD_alloc (ord, sizeof (ind_fn_map));
|
||||
GOMP_PLUGIN_debug (0, "Allocated indirect map at %p\n", map_target_addr);
|
||||
|
||||
GOMP_OFFLOAD_host2dev (ord, map_target_addr,
|
||||
(void*) ind_fn_map,
|
||||
sizeof (ind_fn_map));
|
||||
|
||||
/* Write address of the map onto the target. */
|
||||
CUdeviceptr varptr;
|
||||
size_t varsize;
|
||||
bool host_init_htab = true;
|
||||
#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
|
||||
r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize,
|
||||
module, XSTRING (GOMP_INDIRECT_ADDR_MAP));
|
||||
module, XSTRING (GOMP_INDIRECT_ADDR_HMAP));
|
||||
if (r != CUDA_SUCCESS)
|
||||
#endif
|
||||
{
|
||||
host_init_htab = false;
|
||||
r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize,
|
||||
module, XSTRING (GOMP_INDIRECT_ADDR_MAP));
|
||||
}
|
||||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("Indirect map variable not found in image: %s",
|
||||
cuda_error (r));
|
||||
|
||||
GOMP_PLUGIN_debug (0,
|
||||
"Indirect map variable found at %llx with size %ld\n",
|
||||
"%s-style indirect map variable found at %llx with "
|
||||
"size %ld\n", host_init_htab ? "New" : "Old",
|
||||
varptr, varsize);
|
||||
|
||||
void *map_target_addr;
|
||||
if (!host_init_htab)
|
||||
{
|
||||
/* Build host->target address map for indirect functions. */
|
||||
uint64_t ind_fn_map[ind_fn_entries * 2 + 1];
|
||||
for (unsigned k = 0; k < ind_fn_entries; k++)
|
||||
{
|
||||
ind_fn_map[k * 2] = host_ind_fn_table[k];
|
||||
ind_fn_map[k * 2 + 1] = ind_fn_table[k];
|
||||
GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n",
|
||||
k, host_ind_fn_table[k], ind_fn_table[k]);
|
||||
}
|
||||
ind_fn_map[ind_fn_entries * 2] = 0;
|
||||
/* Write the map onto the target. */
|
||||
map_target_addr = GOMP_OFFLOAD_alloc (ord, sizeof (ind_fn_map));
|
||||
GOMP_OFFLOAD_host2dev (ord, map_target_addr,
|
||||
(void *) ind_fn_map, sizeof (ind_fn_map));
|
||||
}
|
||||
#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
|
||||
else
|
||||
{
|
||||
/* FIXME: Handle multi-kernel load and unload, cf. PR 114690. */
|
||||
size_t host_map_size;
|
||||
void *host_map;
|
||||
host_map = create_target_indirect_map (&host_map_size, ind_fn_entries,
|
||||
host_ind_fn_table,
|
||||
ind_fn_table);
|
||||
for (unsigned k = 0; k < ind_fn_entries; k++)
|
||||
GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n",
|
||||
k, host_ind_fn_table[k], ind_fn_table[k]);
|
||||
/* Write the map onto the target. */
|
||||
map_target_addr = GOMP_OFFLOAD_alloc (ord, host_map_size);
|
||||
GOMP_OFFLOAD_host2dev (ord, map_target_addr, host_map, host_map_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
GOMP_PLUGIN_debug (0, "Allocated indirect map at %p\n", map_target_addr);
|
||||
|
||||
/* Write address of the map onto the target. */
|
||||
GOMP_OFFLOAD_host2dev (ord, (void *) varptr, &map_target_addr,
|
||||
sizeof (map_target_addr));
|
||||
}
|
||||
|
@ -2898,3 +2938,7 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
|
|||
}
|
||||
|
||||
/* TODO: Implement GOMP_OFFLOAD_async_run. */
|
||||
|
||||
#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
|
||||
#include "build-target-indirect-htab.h"
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue