mirror of git://gcc.gnu.org/git/gcc.git
Optimize GCN OpenMP malloc performance
2019-11-13 Andrew Stubbs <ams@codesourcery.com> libgomp/ * config/gcn/team.c (gomp_gcn_enter_kernel): Set up the team arena and use team_malloc variants. (gomp_gcn_exit_kernel): Use team_free. * libgomp.h (TEAM_ARENA_SIZE): Define. (TEAM_ARENA_START): Define. (TEAM_ARENA_FREE): Define. (TEAM_ARENA_END): Define. (team_malloc): New function. (team_malloc_cleared): New function. (team_free): New function. * team.c (gomp_new_team): Initialize and use team_malloc. (free_team): Use team_free. (gomp_free_thread): Use team_free. (gomp_pause_host): Use team_free. * work.c (gomp_init_work_share): Use team_malloc. (gomp_fini_work_share): Use team_free. From-SVN: r278136
This commit is contained in:
parent
fa4999953d
commit
cee1645106
|
@ -1,3 +1,22 @@
|
||||||
|
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
|
||||||
|
|
||||||
|
* config/gcn/team.c (gomp_gcn_enter_kernel): Set up the team arena
|
||||||
|
and use team_malloc variants.
|
||||||
|
(gomp_gcn_exit_kernel): Use team_free.
|
||||||
|
* libgomp.h (TEAM_ARENA_SIZE): Define.
|
||||||
|
(TEAM_ARENA_START): Define.
|
||||||
|
(TEAM_ARENA_FREE): Define.
|
||||||
|
(TEAM_ARENA_END): Define.
|
||||||
|
(team_malloc): New function.
|
||||||
|
(team_malloc_cleared): New function.
|
||||||
|
(team_free): New function.
|
||||||
|
* team.c (gomp_new_team): Initialize and use team_malloc.
|
||||||
|
(free_team): Use team_free.
|
||||||
|
(gomp_free_thread): Use team_free.
|
||||||
|
(gomp_pause_host): Use team_free.
|
||||||
|
* work.c (gomp_init_work_share): Use team_malloc.
|
||||||
|
(gomp_fini_work_share): Use team_free.
|
||||||
|
|
||||||
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
|
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
|
||||||
Kwok Cheung Yeung <kcy@codesourcery.com>
|
Kwok Cheung Yeung <kcy@codesourcery.com>
|
||||||
Julian Brown <julian@codesourcery.com>
|
Julian Brown <julian@codesourcery.com>
|
||||||
|
|
|
@ -57,16 +57,28 @@ gomp_gcn_enter_kernel (void)
|
||||||
/* Starting additional threads is not supported. */
|
/* Starting additional threads is not supported. */
|
||||||
gomp_global_icv.dyn_var = true;
|
gomp_global_icv.dyn_var = true;
|
||||||
|
|
||||||
|
/* Initialize the team arena for optimized memory allocation.
|
||||||
|
The arena has been allocated on the host side, and the address
|
||||||
|
passed in via the kernargs. Each team takes a small slice of it. */
|
||||||
|
register void **kernargs asm("s8");
|
||||||
|
void *team_arena = (kernargs[4] + TEAM_ARENA_SIZE*teamid);
|
||||||
|
void * __lds *arena_start = (void * __lds *)TEAM_ARENA_START;
|
||||||
|
void * __lds *arena_free = (void * __lds *)TEAM_ARENA_FREE;
|
||||||
|
void * __lds *arena_end = (void * __lds *)TEAM_ARENA_END;
|
||||||
|
*arena_start = team_arena;
|
||||||
|
*arena_free = team_arena;
|
||||||
|
*arena_end = team_arena + TEAM_ARENA_SIZE;
|
||||||
|
|
||||||
/* Allocate and initialize the team-local-storage data. */
|
/* Allocate and initialize the team-local-storage data. */
|
||||||
struct gomp_thread *thrs = gomp_malloc_cleared (sizeof (*thrs)
|
struct gomp_thread *thrs = team_malloc_cleared (sizeof (*thrs)
|
||||||
* numthreads);
|
* numthreads);
|
||||||
set_gcn_thrs (thrs);
|
set_gcn_thrs (thrs);
|
||||||
|
|
||||||
/* Allocate and initailize a pool of threads in the team.
|
/* Allocate and initailize a pool of threads in the team.
|
||||||
The threads are already running, of course, we just need to manage
|
The threads are already running, of course, we just need to manage
|
||||||
the communication between them. */
|
the communication between them. */
|
||||||
struct gomp_thread_pool *pool = gomp_malloc (sizeof (*pool));
|
struct gomp_thread_pool *pool = team_malloc (sizeof (*pool));
|
||||||
pool->threads = gomp_malloc (sizeof (void *) * numthreads);
|
pool->threads = team_malloc (sizeof (void *) * numthreads);
|
||||||
for (int tid = 0; tid < numthreads; tid++)
|
for (int tid = 0; tid < numthreads; tid++)
|
||||||
pool->threads[tid] = &thrs[tid];
|
pool->threads[tid] = &thrs[tid];
|
||||||
pool->threads_size = numthreads;
|
pool->threads_size = numthreads;
|
||||||
|
@ -91,7 +103,7 @@ void
|
||||||
gomp_gcn_exit_kernel (void)
|
gomp_gcn_exit_kernel (void)
|
||||||
{
|
{
|
||||||
gomp_free_thread (gcn_thrs ());
|
gomp_free_thread (gcn_thrs ());
|
||||||
free (gcn_thrs ());
|
team_free (gcn_thrs ());
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This function contains the idle loop in which a thread waits
|
/* This function contains the idle loop in which a thread waits
|
||||||
|
|
|
@ -106,6 +106,69 @@ extern void gomp_aligned_free (void *);
|
||||||
GCC's builtin alloca(). */
|
GCC's builtin alloca(). */
|
||||||
#define gomp_alloca(x) __builtin_alloca(x)
|
#define gomp_alloca(x) __builtin_alloca(x)
|
||||||
|
|
||||||
|
/* Optimized allocators for team-specific data that will die with the team. */
|
||||||
|
|
||||||
|
#ifdef __AMDGCN__
|
||||||
|
/* The arena is initialized in config/gcn/team.c. */
|
||||||
|
#define TEAM_ARENA_SIZE 64*1024 /* Must match the value in plugin-gcn.c. */
|
||||||
|
#define TEAM_ARENA_START 16 /* LDS offset of free pointer. */
|
||||||
|
#define TEAM_ARENA_FREE 24 /* LDS offset of free pointer. */
|
||||||
|
#define TEAM_ARENA_END 32 /* LDS offset of end pointer. */
|
||||||
|
|
||||||
|
static inline void * __attribute__((malloc))
|
||||||
|
team_malloc (size_t size)
|
||||||
|
{
|
||||||
|
/* 4-byte align the size. */
|
||||||
|
size = (size + 3) & ~3;
|
||||||
|
|
||||||
|
/* Allocate directly from the arena.
|
||||||
|
The compiler does not support DS atomics, yet. */
|
||||||
|
void *result;
|
||||||
|
asm ("ds_add_rtn_u64 %0, %1, %2\n\ts_waitcnt 0"
|
||||||
|
: "=v"(result) : "v"(TEAM_ARENA_FREE), "v"(size), "e"(1L) : "memory");
|
||||||
|
|
||||||
|
/* Handle OOM. */
|
||||||
|
if (result + size > *(void * __lds *)TEAM_ARENA_END)
|
||||||
|
{
|
||||||
|
/* While this is experimental, let's make sure we know when OOM
|
||||||
|
happens. */
|
||||||
|
const char msg[] = "GCN team arena exhausted\n";
|
||||||
|
write (2, msg, sizeof(msg)-1);
|
||||||
|
|
||||||
|
/* Fall back to using the heap (slowly). */
|
||||||
|
result = gomp_malloc (size);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void * __attribute__((malloc))
|
||||||
|
team_malloc_cleared (size_t size)
|
||||||
|
{
|
||||||
|
char *result = team_malloc (size);
|
||||||
|
|
||||||
|
/* Clear the allocated memory. */
|
||||||
|
__builtin_memset (result, 0, size);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
team_free (void *ptr)
|
||||||
|
{
|
||||||
|
/* The whole arena is freed when the kernel exits.
|
||||||
|
However, if we fell back to using heap then we should free it.
|
||||||
|
It would be better if this function could be a no-op, but at least
|
||||||
|
LDS loads are cheap. */
|
||||||
|
if (ptr < *(void * __lds *)TEAM_ARENA_START
|
||||||
|
|| ptr >= *(void * __lds *)TEAM_ARENA_END)
|
||||||
|
free (ptr);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define team_malloc(...) gomp_malloc (__VA_ARGS__)
|
||||||
|
#define team_malloc_cleared(...) gomp_malloc_cleared (__VA_ARGS__)
|
||||||
|
#define team_free(...) free (__VA_ARGS__)
|
||||||
|
#endif
|
||||||
|
|
||||||
/* error.c */
|
/* error.c */
|
||||||
|
|
||||||
extern void gomp_vdebug (int, const char *, va_list);
|
extern void gomp_vdebug (int, const char *, va_list);
|
||||||
|
|
|
@ -171,7 +171,7 @@ gomp_new_team (unsigned nthreads)
|
||||||
{
|
{
|
||||||
size_t extra = sizeof (team->ordered_release[0])
|
size_t extra = sizeof (team->ordered_release[0])
|
||||||
+ sizeof (team->implicit_task[0]);
|
+ sizeof (team->implicit_task[0]);
|
||||||
team = gomp_malloc (sizeof (*team) + nthreads * extra);
|
team = team_malloc (sizeof (*team) + nthreads * extra);
|
||||||
|
|
||||||
#ifndef HAVE_SYNC_BUILTINS
|
#ifndef HAVE_SYNC_BUILTINS
|
||||||
gomp_mutex_init (&team->work_share_list_free_lock);
|
gomp_mutex_init (&team->work_share_list_free_lock);
|
||||||
|
@ -221,7 +221,7 @@ free_team (struct gomp_team *team)
|
||||||
gomp_barrier_destroy (&team->barrier);
|
gomp_barrier_destroy (&team->barrier);
|
||||||
gomp_mutex_destroy (&team->task_lock);
|
gomp_mutex_destroy (&team->task_lock);
|
||||||
priority_queue_free (&team->task_queue);
|
priority_queue_free (&team->task_queue);
|
||||||
free (team);
|
team_free (team);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -285,8 +285,8 @@ gomp_free_thread (void *arg __attribute__((unused)))
|
||||||
if (pool->last_team)
|
if (pool->last_team)
|
||||||
free_team (pool->last_team);
|
free_team (pool->last_team);
|
||||||
#ifndef __nvptx__
|
#ifndef __nvptx__
|
||||||
free (pool->threads);
|
team_free (pool->threads);
|
||||||
free (pool);
|
team_free (pool);
|
||||||
#endif
|
#endif
|
||||||
thr->thread_pool = NULL;
|
thr->thread_pool = NULL;
|
||||||
}
|
}
|
||||||
|
@ -1082,8 +1082,8 @@ gomp_pause_host (void)
|
||||||
if (pool->last_team)
|
if (pool->last_team)
|
||||||
free_team (pool->last_team);
|
free_team (pool->last_team);
|
||||||
#ifndef __nvptx__
|
#ifndef __nvptx__
|
||||||
free (pool->threads);
|
team_free (pool->threads);
|
||||||
free (pool);
|
team_free (pool);
|
||||||
#endif
|
#endif
|
||||||
thr->thread_pool = NULL;
|
thr->thread_pool = NULL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -120,7 +120,7 @@ gomp_init_work_share (struct gomp_work_share *ws, size_t ordered,
|
||||||
else
|
else
|
||||||
ordered = nthreads * sizeof (*ws->ordered_team_ids);
|
ordered = nthreads * sizeof (*ws->ordered_team_ids);
|
||||||
if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE)
|
if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE)
|
||||||
ws->ordered_team_ids = gomp_malloc (ordered);
|
ws->ordered_team_ids = team_malloc (ordered);
|
||||||
else
|
else
|
||||||
ws->ordered_team_ids = ws->inline_ordered_team_ids;
|
ws->ordered_team_ids = ws->inline_ordered_team_ids;
|
||||||
memset (ws->ordered_team_ids, '\0', ordered);
|
memset (ws->ordered_team_ids, '\0', ordered);
|
||||||
|
@ -142,7 +142,7 @@ gomp_fini_work_share (struct gomp_work_share *ws)
|
||||||
{
|
{
|
||||||
gomp_mutex_destroy (&ws->lock);
|
gomp_mutex_destroy (&ws->lock);
|
||||||
if (ws->ordered_team_ids != ws->inline_ordered_team_ids)
|
if (ws->ordered_team_ids != ws->inline_ordered_team_ids)
|
||||||
free (ws->ordered_team_ids);
|
team_free (ws->ordered_team_ids);
|
||||||
gomp_ptrlock_destroy (&ws->next_ws);
|
gomp_ptrlock_destroy (&ws->next_ws);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue