mirror of git://gcc.gnu.org/git/gcc.git
GCN libgomp port
2019-11-13 Andrew Stubbs <ams@codesourcery.com> Kwok Cheung Yeung <kcy@codesourcery.com> Julian Brown <julian@codesourcery.com> Tom de Vries <tom@codesourcery.com> include/ * gomp-constants.h (GOMP_DEVICE_GCN): Define. (GOMP_VERSION_GCN): Define. libgomp/ * Makefile.am (libgomp_la_SOURCES): Add oacc-target.c. * Makefile.in: Regenerate. * config.h.in (PLUGIN_GCN): Add new undef. * config/accel/openacc.f90 (acc_device_gcn): New parameter. * config/gcn/affinity-fmt.c: New file. * config/gcn/bar.c: New file. * config/gcn/bar.h: New file. * config/gcn/doacross.h: New file. * config/gcn/icv-device.c: New file. * config/gcn/oacc-target.c: New file. * config/gcn/simple-bar.h: New file. * config/gcn/target.c: New file. * config/gcn/task.c: New file. * config/gcn/team.c: New file. * config/gcn/time.c: New file. * configure.ac: Add amdgcn*-*-*. * configure: Regenerate. * configure.tgt: Add amdgcn*-*-*. * libgomp-plugin.h (offload_target_type): Add OFFLOAD_TARGET_TYPE_GCN. * libgomp.h (gcn_thrs): Add amdgcn variant. (set_gcn_thrs): Likewise. (gomp_thread): Likewise. * oacc-int.h (goacc_thread): Likewise. * oacc-target.c: New file. * openacc.f90 (acc_device_gcn): New parameter. * openacc.h (acc_device_t): Add acc_device_gcn. * team.c (gomp_free_pool_helper): Add amdgcn support. Co-Authored-By: Julian Brown <julian@codesourcery.com> Co-Authored-By: Kwok Cheung Yeung <kcy@codesourcery.com> Co-Authored-By: Tom de Vries <tom@codesourcery.com> From-SVN: r278135
This commit is contained in:
parent
d2903ce05b
commit
fa4999953d
|
@ -1,3 +1,11 @@
|
||||||
|
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
|
||||||
|
Kwok Cheung Yeung <kcy@codesourcery.com>
|
||||||
|
Julian Brown <julian@codesourcery.com>
|
||||||
|
Tom de Vries <tom@codesourcery.com>
|
||||||
|
|
||||||
|
* gomp-constants.h (GOMP_DEVICE_GCN): Define.
|
||||||
|
(GOMP_VERSION_GCN): Define.
|
||||||
|
|
||||||
2019-08-08 Martin Liska <mliska@suse.cz>
|
2019-08-08 Martin Liska <mliska@suse.cz>
|
||||||
|
|
||||||
PR bootstrap/91352
|
PR bootstrap/91352
|
||||||
|
|
|
@ -174,6 +174,7 @@ enum gomp_map_kind
|
||||||
#define GOMP_DEVICE_NVIDIA_PTX 5
|
#define GOMP_DEVICE_NVIDIA_PTX 5
|
||||||
#define GOMP_DEVICE_INTEL_MIC 6
|
#define GOMP_DEVICE_INTEL_MIC 6
|
||||||
#define GOMP_DEVICE_HSA 7
|
#define GOMP_DEVICE_HSA 7
|
||||||
|
#define GOMP_DEVICE_GCN 8
|
||||||
|
|
||||||
#define GOMP_DEVICE_ICV -1
|
#define GOMP_DEVICE_ICV -1
|
||||||
#define GOMP_DEVICE_HOST_FALLBACK -2
|
#define GOMP_DEVICE_HOST_FALLBACK -2
|
||||||
|
@ -215,6 +216,7 @@ enum gomp_map_kind
|
||||||
#define GOMP_VERSION_NVIDIA_PTX 1
|
#define GOMP_VERSION_NVIDIA_PTX 1
|
||||||
#define GOMP_VERSION_INTEL_MIC 0
|
#define GOMP_VERSION_INTEL_MIC 0
|
||||||
#define GOMP_VERSION_HSA 0
|
#define GOMP_VERSION_HSA 0
|
||||||
|
#define GOMP_VERSION_GCN 1
|
||||||
|
|
||||||
#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV))
|
#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV))
|
||||||
#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff)
|
#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff)
|
||||||
|
|
|
@ -1,3 +1,36 @@
|
||||||
|
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
|
||||||
|
Kwok Cheung Yeung <kcy@codesourcery.com>
|
||||||
|
Julian Brown <julian@codesourcery.com>
|
||||||
|
Tom de Vries <tom@codesourcery.com>
|
||||||
|
|
||||||
|
* Makefile.am (libgomp_la_SOURCES): Add oacc-target.c.
|
||||||
|
* Makefile.in: Regenerate.
|
||||||
|
* config.h.in (PLUGIN_GCN): Add new undef.
|
||||||
|
* config/accel/openacc.f90 (acc_device_gcn): New parameter.
|
||||||
|
* config/gcn/affinity-fmt.c: New file.
|
||||||
|
* config/gcn/bar.c: New file.
|
||||||
|
* config/gcn/bar.h: New file.
|
||||||
|
* config/gcn/doacross.h: New file.
|
||||||
|
* config/gcn/icv-device.c: New file.
|
||||||
|
* config/gcn/oacc-target.c: New file.
|
||||||
|
* config/gcn/simple-bar.h: New file.
|
||||||
|
* config/gcn/target.c: New file.
|
||||||
|
* config/gcn/task.c: New file.
|
||||||
|
* config/gcn/team.c: New file.
|
||||||
|
* config/gcn/time.c: New file.
|
||||||
|
* configure.ac: Add amdgcn*-*-*.
|
||||||
|
* configure: Regenerate.
|
||||||
|
* configure.tgt: Add amdgcn*-*-*.
|
||||||
|
* libgomp-plugin.h (offload_target_type): Add OFFLOAD_TARGET_TYPE_GCN.
|
||||||
|
* libgomp.h (gcn_thrs): Add amdgcn variant.
|
||||||
|
(set_gcn_thrs): Likewise.
|
||||||
|
(gomp_thread): Likewise.
|
||||||
|
* oacc-int.h (goacc_thread): Likewise.
|
||||||
|
* oacc-target.c: New file.
|
||||||
|
* openacc.f90 (acc_device_gcn): New parameter.
|
||||||
|
* openacc.h (acc_device_t): Add acc_device_gcn.
|
||||||
|
* team.c (gomp_free_pool_helper): Add amdgcn support.
|
||||||
|
|
||||||
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
|
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
|
||||||
Julian Brown <julian@codesourcery.com>
|
Julian Brown <julian@codesourcery.com>
|
||||||
|
|
||||||
|
|
|
@ -65,7 +65,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
|
||||||
proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \
|
proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \
|
||||||
splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \
|
splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \
|
||||||
oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
|
oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
|
||||||
affinity-fmt.c teams.c oacc-profiling.c
|
affinity-fmt.c teams.c oacc-profiling.c oacc-target.c
|
||||||
|
|
||||||
include $(top_srcdir)/plugin/Makefrag.am
|
include $(top_srcdir)/plugin/Makefrag.am
|
||||||
|
|
||||||
|
|
|
@ -217,7 +217,7 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
|
||||||
target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \
|
target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \
|
||||||
oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \
|
oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \
|
||||||
oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \
|
oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \
|
||||||
teams.lo oacc-profiling.lo $(am__objects_1)
|
teams.lo oacc-profiling.lo oacc-target.lo $(am__objects_1)
|
||||||
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
|
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
|
||||||
AM_V_P = $(am__v_P_@AM_V@)
|
AM_V_P = $(am__v_P_@AM_V@)
|
||||||
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
|
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
|
||||||
|
@ -552,7 +552,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
|
||||||
affinity.c target.c splay-tree.c libgomp-plugin.c \
|
affinity.c target.c splay-tree.c libgomp-plugin.c \
|
||||||
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
|
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
|
||||||
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
|
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
|
||||||
affinity-fmt.c teams.c oacc-profiling.c $(am__append_3)
|
affinity-fmt.c teams.c oacc-profiling.c oacc-target.c \
|
||||||
|
$(am__append_3)
|
||||||
|
|
||||||
# Nvidia PTX OpenACC plugin.
|
# Nvidia PTX OpenACC plugin.
|
||||||
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
|
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
|
||||||
|
@ -755,6 +756,7 @@ distclean-compile:
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@
|
||||||
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-target.Plo@am__quote@
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@
|
||||||
|
|
|
@ -170,6 +170,9 @@
|
||||||
/* Define to the version of this package. */
|
/* Define to the version of this package. */
|
||||||
#undef PACKAGE_VERSION
|
#undef PACKAGE_VERSION
|
||||||
|
|
||||||
|
/* Define to 1 if the GCN plugin is built, 0 if not. */
|
||||||
|
#undef PLUGIN_GCN
|
||||||
|
|
||||||
/* Define to 1 if the HSA plugin is built, 0 if not. */
|
/* Define to 1 if the HSA plugin is built, 0 if not. */
|
||||||
#undef PLUGIN_HSA
|
#undef PLUGIN_HSA
|
||||||
|
|
||||||
|
|
|
@ -51,6 +51,7 @@ module openacc_kinds
|
||||||
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
|
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
|
||||||
integer (acc_device_kind), parameter :: acc_device_not_host = 4
|
integer (acc_device_kind), parameter :: acc_device_not_host = 4
|
||||||
integer (acc_device_kind), parameter :: acc_device_nvidia = 5
|
integer (acc_device_kind), parameter :: acc_device_nvidia = 5
|
||||||
|
integer (acc_device_kind), parameter :: acc_device_gcn = 8
|
||||||
|
|
||||||
end module
|
end module
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
This file is part of the GNU Offloading and Multi Processing Library
|
||||||
|
(libgomp).
|
||||||
|
|
||||||
|
Libgomp is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
any later version.
|
||||||
|
|
||||||
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
more details.
|
||||||
|
|
||||||
|
Under Section 7 of GPL version 3, you are granted additional
|
||||||
|
permissions described in the GCC Runtime Library Exception, version
|
||||||
|
3.1, as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License and
|
||||||
|
a copy of the GCC Runtime Library Exception along with this program;
|
||||||
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include "libgomp.h"
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#ifdef HAVE_UNISTD_H
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_INTTYPES_H
|
||||||
|
# include <inttypes.h> /* For PRIx64. */
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_UNAME
|
||||||
|
#include <sys/utsname.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The HAVE_GETPID and HAVE_GETHOSTNAME configure tests are passing for nvptx,
|
||||||
|
while the nvptx newlib implementation does not support those functions.
|
||||||
|
Override the configure test results here. */
|
||||||
|
#undef HAVE_GETPID
|
||||||
|
#undef HAVE_GETHOSTNAME
|
||||||
|
|
||||||
|
/* The GCN newlib implementation does not support fwrite, but it does support
|
||||||
|
write. Map fwrite to write. */
|
||||||
|
#undef fwrite
|
||||||
|
#define fwrite(ptr, size, nmemb, stream) write (1, (ptr), (nmemb) * (size))
|
||||||
|
|
||||||
|
#include "../../affinity-fmt.c"
|
||||||
|
|
|
@ -0,0 +1,232 @@
|
||||||
|
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
|
||||||
|
Contributed by Mentor Embedded.
|
||||||
|
|
||||||
|
This file is part of the GNU Offloading and Multi Processing Library
|
||||||
|
(libgomp).
|
||||||
|
|
||||||
|
Libgomp is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
any later version.
|
||||||
|
|
||||||
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
more details.
|
||||||
|
|
||||||
|
Under Section 7 of GPL version 3, you are granted additional
|
||||||
|
permissions described in the GCC Runtime Library Exception, version
|
||||||
|
3.1, as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License and
|
||||||
|
a copy of the GCC Runtime Library Exception along with this program;
|
||||||
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
/* This is an AMD GCN specific implementation of a barrier synchronization
|
||||||
|
mechanism for libgomp. This type is private to the library. This
|
||||||
|
implementation uses atomic instructions and s_barrier instruction. It
|
||||||
|
uses MEMMODEL_RELAXED here because barriers are within workgroups and
|
||||||
|
therefore don't need to flush caches. */
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
|
#include "libgomp.h"
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||||||
|
{
|
||||||
|
if (__builtin_expect (state & BAR_WAS_LAST, 0))
|
||||||
|
{
|
||||||
|
/* Next time we'll be awaiting TOTAL threads again. */
|
||||||
|
bar->awaited = bar->total;
|
||||||
|
__atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
|
||||||
|
MEMMODEL_RELAXED);
|
||||||
|
}
|
||||||
|
asm ("s_barrier" ::: "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
gomp_barrier_wait (gomp_barrier_t *bar)
|
||||||
|
{
|
||||||
|
gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Like gomp_barrier_wait, except that if the encountering thread
|
||||||
|
is not the last one to hit the barrier, it returns immediately.
|
||||||
|
The intended usage is that a thread which intends to gomp_barrier_destroy
|
||||||
|
this barrier calls gomp_barrier_wait, while all other threads
|
||||||
|
call gomp_barrier_wait_last. When gomp_barrier_wait returns,
|
||||||
|
the barrier can be safely destroyed. */
|
||||||
|
|
||||||
|
void
|
||||||
|
gomp_barrier_wait_last (gomp_barrier_t *bar)
|
||||||
|
{
|
||||||
|
/* Deferring to gomp_barrier_wait does not use the optimization opportunity
|
||||||
|
allowed by the interface contract for all-but-last participants. The
|
||||||
|
original implementation in config/linux/bar.c handles this better. */
|
||||||
|
gomp_barrier_wait (bar);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
|
||||||
|
{
|
||||||
|
asm ("s_barrier" ::: "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||||||
|
{
|
||||||
|
unsigned int generation, gen;
|
||||||
|
|
||||||
|
if (__builtin_expect (state & BAR_WAS_LAST, 0))
|
||||||
|
{
|
||||||
|
/* Next time we'll be awaiting TOTAL threads again. */
|
||||||
|
struct gomp_thread *thr = gomp_thread ();
|
||||||
|
struct gomp_team *team = thr->ts.team;
|
||||||
|
|
||||||
|
bar->awaited = bar->total;
|
||||||
|
team->work_share_cancelled = 0;
|
||||||
|
if (__builtin_expect (team->task_count, 0))
|
||||||
|
{
|
||||||
|
gomp_barrier_handle_tasks (state);
|
||||||
|
state &= ~BAR_WAS_LAST;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
state &= ~BAR_CANCELLED;
|
||||||
|
state += BAR_INCR - BAR_WAS_LAST;
|
||||||
|
__atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
|
||||||
|
asm ("s_barrier" ::: "memory");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
generation = state;
|
||||||
|
state &= ~BAR_CANCELLED;
|
||||||
|
int retry = 100;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if (retry-- == 0)
|
||||||
|
{
|
||||||
|
/* It really shouldn't happen that barriers get out of sync, but
|
||||||
|
if they do then this will loop until they realign, so we need
|
||||||
|
to avoid an infinite loop where the thread just isn't there. */
|
||||||
|
const char msg[] = ("Barrier sync failed (another thread died?);"
|
||||||
|
" aborting.");
|
||||||
|
write (2, msg, sizeof (msg)-1);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
asm ("s_barrier" ::: "memory");
|
||||||
|
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
|
||||||
|
if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
|
||||||
|
{
|
||||||
|
gomp_barrier_handle_tasks (state);
|
||||||
|
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
|
||||||
|
}
|
||||||
|
generation |= gen & BAR_WAITING_FOR_TASK;
|
||||||
|
}
|
||||||
|
while (gen != state + BAR_INCR);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
gomp_team_barrier_wait (gomp_barrier_t *bar)
|
||||||
|
{
|
||||||
|
gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
gomp_team_barrier_wait_final (gomp_barrier_t *bar)
|
||||||
|
{
|
||||||
|
gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
|
||||||
|
if (__builtin_expect (state & BAR_WAS_LAST, 0))
|
||||||
|
bar->awaited_final = bar->total;
|
||||||
|
gomp_team_barrier_wait_end (bar, state);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
|
||||||
|
gomp_barrier_state_t state)
|
||||||
|
{
|
||||||
|
unsigned int generation, gen;
|
||||||
|
|
||||||
|
if (__builtin_expect (state & BAR_WAS_LAST, 0))
|
||||||
|
{
|
||||||
|
/* Next time we'll be awaiting TOTAL threads again. */
|
||||||
|
/* BAR_CANCELLED should never be set in state here, because
|
||||||
|
cancellation means that at least one of the threads has been
|
||||||
|
cancelled, thus on a cancellable barrier we should never see
|
||||||
|
all threads to arrive. */
|
||||||
|
struct gomp_thread *thr = gomp_thread ();
|
||||||
|
struct gomp_team *team = thr->ts.team;
|
||||||
|
|
||||||
|
bar->awaited = bar->total;
|
||||||
|
team->work_share_cancelled = 0;
|
||||||
|
if (__builtin_expect (team->task_count, 0))
|
||||||
|
{
|
||||||
|
gomp_barrier_handle_tasks (state);
|
||||||
|
state &= ~BAR_WAS_LAST;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
state += BAR_INCR - BAR_WAS_LAST;
|
||||||
|
__atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
|
||||||
|
asm ("s_barrier" ::: "memory");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (__builtin_expect (state & BAR_CANCELLED, 0))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
generation = state;
|
||||||
|
int retry = 100;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if (retry-- == 0)
|
||||||
|
{
|
||||||
|
/* It really shouldn't happen that barriers get out of sync, but
|
||||||
|
if they do then this will loop until they realign, so we need
|
||||||
|
to avoid an infinite loop where the thread just isn't there. */
|
||||||
|
const char msg[] = ("Barrier sync failed (another thread died?);"
|
||||||
|
" aborting.");
|
||||||
|
write (2, msg, sizeof (msg)-1);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
asm ("s_barrier" ::: "memory");
|
||||||
|
gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
|
||||||
|
if (__builtin_expect (gen & BAR_CANCELLED, 0))
|
||||||
|
return true;
|
||||||
|
if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
|
||||||
|
{
|
||||||
|
gomp_barrier_handle_tasks (state);
|
||||||
|
gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
|
||||||
|
}
|
||||||
|
generation |= gen & BAR_WAITING_FOR_TASK;
|
||||||
|
}
|
||||||
|
while (gen != state + BAR_INCR);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
|
||||||
|
{
|
||||||
|
return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
gomp_team_barrier_cancel (struct gomp_team *team)
|
||||||
|
{
|
||||||
|
gomp_mutex_lock (&team->task_lock);
|
||||||
|
if (team->barrier.generation & BAR_CANCELLED)
|
||||||
|
{
|
||||||
|
gomp_mutex_unlock (&team->task_lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
team->barrier.generation |= BAR_CANCELLED;
|
||||||
|
gomp_mutex_unlock (&team->task_lock);
|
||||||
|
gomp_team_barrier_wake (&team->barrier, INT_MAX);
|
||||||
|
}
|
|
@ -0,0 +1,168 @@
|
||||||
|
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
|
||||||
|
Contributed by Mentor Embedded.
|
||||||
|
|
||||||
|
This file is part of the GNU Offloading and Multi Processing Library
|
||||||
|
(libgomp).
|
||||||
|
|
||||||
|
Libgomp is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
any later version.
|
||||||
|
|
||||||
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
more details.
|
||||||
|
|
||||||
|
Under Section 7 of GPL version 3, you are granted additional
|
||||||
|
permissions described in the GCC Runtime Library Exception, version
|
||||||
|
3.1, as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License and
|
||||||
|
a copy of the GCC Runtime Library Exception along with this program;
|
||||||
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
/* This is an AMD GCN specific implementation of a barrier synchronization
|
||||||
|
mechanism for libgomp. This type is private to the library. This
|
||||||
|
implementation uses atomic instructions and s_barrier instruction. It
|
||||||
|
uses MEMMODEL_RELAXED here because barriers are within workgroups and
|
||||||
|
therefore don't need to flush caches. */
|
||||||
|
|
||||||
|
#ifndef GOMP_BARRIER_H
|
||||||
|
#define GOMP_BARRIER_H 1
|
||||||
|
|
||||||
|
#include "mutex.h"
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
unsigned total;
|
||||||
|
unsigned generation;
|
||||||
|
unsigned awaited;
|
||||||
|
unsigned awaited_final;
|
||||||
|
} gomp_barrier_t;
|
||||||
|
|
||||||
|
typedef unsigned int gomp_barrier_state_t;
|
||||||
|
|
||||||
|
/* The generation field contains a counter in the high bits, with a few
|
||||||
|
low bits dedicated to flags. Note that TASK_PENDING and WAS_LAST can
|
||||||
|
share space because WAS_LAST is never stored back to generation. */
|
||||||
|
#define BAR_TASK_PENDING 1
|
||||||
|
#define BAR_WAS_LAST 1
|
||||||
|
#define BAR_WAITING_FOR_TASK 2
|
||||||
|
#define BAR_CANCELLED 4
|
||||||
|
#define BAR_INCR 8
|
||||||
|
|
||||||
|
static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count)
|
||||||
|
{
|
||||||
|
bar->total = count;
|
||||||
|
bar->awaited = count;
|
||||||
|
bar->awaited_final = count;
|
||||||
|
bar->generation = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count)
|
||||||
|
{
|
||||||
|
__atomic_add_fetch (&bar->awaited, count - bar->total, MEMMODEL_RELAXED);
|
||||||
|
bar->total = count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void gomp_barrier_destroy (gomp_barrier_t *bar)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
extern void gomp_barrier_wait (gomp_barrier_t *);
|
||||||
|
extern void gomp_barrier_wait_last (gomp_barrier_t *);
|
||||||
|
extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t);
|
||||||
|
extern void gomp_team_barrier_wait (gomp_barrier_t *);
|
||||||
|
extern void gomp_team_barrier_wait_final (gomp_barrier_t *);
|
||||||
|
extern void gomp_team_barrier_wait_end (gomp_barrier_t *,
|
||||||
|
gomp_barrier_state_t);
|
||||||
|
extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *);
|
||||||
|
extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *,
|
||||||
|
gomp_barrier_state_t);
|
||||||
|
extern void gomp_team_barrier_wake (gomp_barrier_t *, int);
|
||||||
|
struct gomp_team;
|
||||||
|
extern void gomp_team_barrier_cancel (struct gomp_team *);
|
||||||
|
|
||||||
|
static inline gomp_barrier_state_t
|
||||||
|
gomp_barrier_wait_start (gomp_barrier_t *bar)
|
||||||
|
{
|
||||||
|
unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
|
||||||
|
ret &= -BAR_INCR | BAR_CANCELLED;
|
||||||
|
/* A memory barrier is needed before exiting from the various forms
|
||||||
|
of gomp_barrier_wait, to satisfy OpenMP API version 3.1 section
|
||||||
|
2.8.6 flush Construct, which says there is an implicit flush during
|
||||||
|
a barrier region. This is a convenient place to add the barrier,
|
||||||
|
so we use MEMMODEL_ACQ_REL here rather than MEMMODEL_ACQUIRE. */
|
||||||
|
if (__atomic_add_fetch (&bar->awaited, -1, MEMMODEL_RELAXED) == 0)
|
||||||
|
ret |= BAR_WAS_LAST;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline gomp_barrier_state_t
|
||||||
|
gomp_barrier_wait_cancel_start (gomp_barrier_t *bar)
|
||||||
|
{
|
||||||
|
return gomp_barrier_wait_start (bar);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This is like gomp_barrier_wait_start, except it decrements
|
||||||
|
bar->awaited_final rather than bar->awaited and should be used
|
||||||
|
for the gomp_team_end barrier only. */
|
||||||
|
static inline gomp_barrier_state_t
|
||||||
|
gomp_barrier_wait_final_start (gomp_barrier_t *bar)
|
||||||
|
{
|
||||||
|
unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
|
||||||
|
ret &= -BAR_INCR | BAR_CANCELLED;
|
||||||
|
/* See above gomp_barrier_wait_start comment. */
|
||||||
|
if (__atomic_add_fetch (&bar->awaited_final, -1, MEMMODEL_RELAXED) == 0)
|
||||||
|
ret |= BAR_WAS_LAST;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
gomp_barrier_last_thread (gomp_barrier_state_t state)
|
||||||
|
{
|
||||||
|
return state & BAR_WAS_LAST;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* All the inlines below must be called with team->task_lock
|
||||||
|
held. */
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
gomp_team_barrier_set_task_pending (gomp_barrier_t *bar)
|
||||||
|
{
|
||||||
|
bar->generation |= BAR_TASK_PENDING;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar)
|
||||||
|
{
|
||||||
|
bar->generation &= ~BAR_TASK_PENDING;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar)
|
||||||
|
{
|
||||||
|
bar->generation |= BAR_WAITING_FOR_TASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar)
|
||||||
|
{
|
||||||
|
return (bar->generation & BAR_WAITING_FOR_TASK) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
gomp_team_barrier_cancelled (gomp_barrier_t *bar)
|
||||||
|
{
|
||||||
|
return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||||||
|
{
|
||||||
|
bar->generation = (state & -BAR_INCR) + BAR_INCR;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* GOMP_BARRIER_H */
|
|
@ -0,0 +1,58 @@
|
||||||
|
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
|
||||||
|
Contributed by Mentor Embedded.
|
||||||
|
|
||||||
|
This file is part of the GNU Offloading and Multi Processing Library
|
||||||
|
(libgomp).
|
||||||
|
|
||||||
|
Libgomp is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
any later version.
|
||||||
|
|
||||||
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
more details.
|
||||||
|
|
||||||
|
Under Section 7 of GPL version 3, you are granted additional
|
||||||
|
permissions described in the GCC Runtime Library Exception, version
|
||||||
|
3.1, as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License and
|
||||||
|
a copy of the GCC Runtime Library Exception along with this program;
|
||||||
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
/* This is the AMD GCN implementation of doacross spinning. */
|
||||||
|
|
||||||
|
#ifndef GOMP_DOACROSS_H
|
||||||
|
#define GOMP_DOACROSS_H 1
|
||||||
|
|
||||||
|
#include "libgomp.h"
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
cpu_relax (void)
|
||||||
|
{
|
||||||
|
/* This can be implemented as just a memory barrier, but a sleep seems
|
||||||
|
like it should allow the wavefront to yield (maybe?)
|
||||||
|
Use the shortest possible sleep time of 1*64 cycles. */
|
||||||
|
asm volatile ("s_sleep\t1" ::: "memory");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void doacross_spin (unsigned long *addr, unsigned long expected,
|
||||||
|
unsigned long cur)
|
||||||
|
{
|
||||||
|
/* Prevent compiler from optimizing based on bounds of containing object. */
|
||||||
|
asm ("" : "+r" (addr));
|
||||||
|
do
|
||||||
|
{
|
||||||
|
/* An alternative implementation might use s_setprio to lower the
|
||||||
|
priority temporarily, and then restore it after. */
|
||||||
|
int i = cpu_relax ();
|
||||||
|
cur = addr[i];
|
||||||
|
}
|
||||||
|
while (cur <= expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* GOMP_DOACROSS_H */
|
|
@ -0,0 +1,72 @@
|
||||||
|
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
|
||||||
|
Contributed by Mentor Embedded.
|
||||||
|
|
||||||
|
This file is part of the GNU Offloading and Multi Processing Library
|
||||||
|
(libgomp).
|
||||||
|
|
||||||
|
Libgomp is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
any later version.
|
||||||
|
|
||||||
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
more details.
|
||||||
|
|
||||||
|
Under Section 7 of GPL version 3, you are granted additional
|
||||||
|
permissions described in the GCC Runtime Library Exception, version
|
||||||
|
3.1, as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License and
|
||||||
|
a copy of the GCC Runtime Library Exception along with this program;
|
||||||
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
/* This file defines OpenMP API entry points that accelerator targets are
|
||||||
|
expected to replace. */
|
||||||
|
|
||||||
|
#include "libgomp.h"
|
||||||
|
|
||||||
|
void
|
||||||
|
omp_set_default_device (int device_num __attribute__((unused)))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
omp_get_default_device (void)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
omp_get_num_devices (void)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
omp_get_num_teams (void)
|
||||||
|
{
|
||||||
|
return gomp_num_teams_var + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int __attribute__ ((__optimize__ ("O2")))
|
||||||
|
omp_get_team_num (void)
|
||||||
|
{
|
||||||
|
return __builtin_gcn_dim_pos (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
omp_is_initial_device (void)
|
||||||
|
{
|
||||||
|
/* AMD GCN is an accelerator-only target. */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ialias (omp_set_default_device)
|
||||||
|
ialias (omp_get_default_device)
|
||||||
|
ialias (omp_get_num_devices)
|
||||||
|
ialias (omp_get_num_teams)
|
||||||
|
ialias (omp_get_team_num)
|
||||||
|
ialias (omp_is_initial_device)
|
|
@ -0,0 +1,31 @@
|
||||||
|
/* Oversized reductions lock variable
|
||||||
|
Copyright (C) 2017-2019 Free Software Foundation, Inc.
|
||||||
|
Contributed by Mentor Graphics.
|
||||||
|
|
||||||
|
This file is part of the GNU Offloading and Multi Processing Library
|
||||||
|
(libgomp).
|
||||||
|
|
||||||
|
Libgomp is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
any later version.
|
||||||
|
|
||||||
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
more details.
|
||||||
|
|
||||||
|
Under Section 7 of GPL version 3, you are granted additional
|
||||||
|
permissions described in the GCC Runtime Library Exception, version
|
||||||
|
3.1, as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License and
|
||||||
|
a copy of the GCC Runtime Library Exception along with this program;
|
||||||
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
/* We use a global lock variable for reductions on objects larger than
|
||||||
|
64 bits. Until and unless proven that lock contention for
|
||||||
|
different reductions is a problem, a single lock will suffice. */
|
||||||
|
|
||||||
|
unsigned volatile __reduction_lock = 0;
|
|
@ -0,0 +1,61 @@
|
||||||
|
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
|
||||||
|
Contributed by Mentor Embedded.
|
||||||
|
|
||||||
|
This file is part of the GNU Offloading and Multi Processing Library
|
||||||
|
(libgomp).
|
||||||
|
|
||||||
|
Libgomp is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
any later version.
|
||||||
|
|
||||||
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
more details.
|
||||||
|
|
||||||
|
Under Section 7 of GPL version 3, you are granted additional
|
||||||
|
permissions described in the GCC Runtime Library Exception, version
|
||||||
|
3.1, as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License and
|
||||||
|
a copy of the GCC Runtime Library Exception along with this program;
|
||||||
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
/* This is a simplified barrier that is suitable for thread pool
|
||||||
|
synchronizaton. Only a subset of full barrier API (bar.h) is exposed.
|
||||||
|
Here in the AMD GCN-specific implementation, we expect that thread pool
|
||||||
|
corresponds to the wavefronts within a work group. */
|
||||||
|
|
||||||
|
#ifndef GOMP_SIMPLE_BARRIER_H
|
||||||
|
#define GOMP_SIMPLE_BARRIER_H 1
|
||||||
|
|
||||||
|
/* AMD GCN has no use for this type. */
|
||||||
|
typedef int gomp_simple_barrier_t;
|
||||||
|
|
||||||
|
/* GCN barriers block all wavefronts, so the count is not interesting. */
|
||||||
|
static inline void
|
||||||
|
gomp_simple_barrier_init (gomp_simple_barrier_t *bar, unsigned count)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
gomp_simple_barrier_destroy (gomp_simple_barrier_t *bar)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
gomp_simple_barrier_wait (gomp_simple_barrier_t *bar)
|
||||||
|
{
|
||||||
|
asm volatile ("s_barrier" ::: "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
gomp_simple_barrier_wait_last (gomp_simple_barrier_t *bar)
|
||||||
|
{
|
||||||
|
/* GCN has no way to signal a barrier without waiting. */
|
||||||
|
asm volatile ("s_barrier" ::: "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* GOMP_SIMPLE_BARRIER_H */
|
|
@ -0,0 +1,67 @@
|
||||||
|
/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
|
||||||
|
Contributed by Mentor Embedded.
|
||||||
|
|
||||||
|
This file is part of the GNU Offloading and Multi Processing Library
|
||||||
|
(libgomp).
|
||||||
|
|
||||||
|
Libgomp is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
any later version.
|
||||||
|
|
||||||
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
more details.
|
||||||
|
|
||||||
|
Under Section 7 of GPL version 3, you are granted additional
|
||||||
|
permissions described in the GCC Runtime Library Exception, version
|
||||||
|
3.1, as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License and
|
||||||
|
a copy of the GCC Runtime Library Exception along with this program;
|
||||||
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include "libgomp.h"
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
|
void
|
||||||
|
GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
|
||||||
|
{
|
||||||
|
if (thread_limit)
|
||||||
|
{
|
||||||
|
struct gomp_task_icv *icv = gomp_icv (true);
|
||||||
|
icv->thread_limit_var
|
||||||
|
= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
|
||||||
|
}
|
||||||
|
unsigned int num_workgroups, workgroup_id;
|
||||||
|
num_workgroups = __builtin_gcn_dim_size (0);
|
||||||
|
workgroup_id = __builtin_gcn_dim_pos (0);
|
||||||
|
if (!num_teams || num_teams >= num_workgroups)
|
||||||
|
num_teams = num_workgroups;
|
||||||
|
else if (workgroup_id >= num_teams)
|
||||||
|
{
|
||||||
|
gomp_free_thread (gcn_thrs ());
|
||||||
|
exit (0);
|
||||||
|
}
|
||||||
|
gomp_num_teams_var = num_teams - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
omp_pause_resource (omp_pause_resource_t kind, int device_num)
|
||||||
|
{
|
||||||
|
(void) kind;
|
||||||
|
(void) device_num;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
omp_pause_resource_all (omp_pause_resource_t kind)
|
||||||
|
{
|
||||||
|
(void) kind;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ialias (omp_pause_resource)
|
||||||
|
ialias (omp_pause_resource_all)
|
|
@ -0,0 +1,39 @@
|
||||||
|
/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
|
||||||
|
Contributed by Mentor Embedded.
|
||||||
|
|
||||||
|
This file is part of the GNU Offloading and Multi Processing Library
|
||||||
|
(libgomp).
|
||||||
|
|
||||||
|
Libgomp is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
any later version.
|
||||||
|
|
||||||
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
more details.
|
||||||
|
|
||||||
|
Under Section 7 of GPL version 3, you are granted additional
|
||||||
|
permissions described in the GCC Runtime Library Exception, version
|
||||||
|
3.1, as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License and
|
||||||
|
a copy of the GCC Runtime Library Exception along with this program;
|
||||||
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
/* This file handles the maintainence of tasks in response to task
|
||||||
|
creation and termination. */
|
||||||
|
|
||||||
|
#include "libgomp.h"
|
||||||
|
|
||||||
|
/* AMD GCN is an accelerator-only target, so this should never be called. */
|
||||||
|
|
||||||
|
bool
|
||||||
|
gomp_target_task_fn (void *data)
|
||||||
|
{
|
||||||
|
__builtin_unreachable ();
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "../../task.c"
|
|
@ -0,0 +1,202 @@
|
||||||
|
/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
|
||||||
|
Contributed by Mentor Embedded.
|
||||||
|
|
||||||
|
This file is part of the GNU Offloading and Multi Processing Library
|
||||||
|
(libgomp).
|
||||||
|
|
||||||
|
Libgomp is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
any later version.
|
||||||
|
|
||||||
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
more details.
|
||||||
|
|
||||||
|
Under Section 7 of GPL version 3, you are granted additional
|
||||||
|
permissions described in the GCC Runtime Library Exception, version
|
||||||
|
3.1, as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License and
|
||||||
|
a copy of the GCC Runtime Library Exception along with this program;
|
||||||
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
/* This file handles maintainance of threads on AMD GCN. */
|
||||||
|
|
||||||
|
#include "libgomp.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
static void gomp_thread_start (struct gomp_thread_pool *);
|
||||||
|
|
||||||
|
/* This externally visible function handles target region entry. It
|
||||||
|
sets up a per-team thread pool and transfers control by returning to
|
||||||
|
the kernel in the master thread or gomp_thread_start in other threads.
|
||||||
|
|
||||||
|
The name of this function is part of the interface with the compiler: for
|
||||||
|
each OpenMP kernel the compiler configures the stack, then calls here.
|
||||||
|
|
||||||
|
Likewise, gomp_gcn_exit_kernel is called during the kernel epilogue. */
|
||||||
|
|
||||||
|
void
|
||||||
|
gomp_gcn_enter_kernel (void)
|
||||||
|
{
|
||||||
|
int threadid = __builtin_gcn_dim_pos (1);
|
||||||
|
|
||||||
|
if (threadid == 0)
|
||||||
|
{
|
||||||
|
int numthreads = __builtin_gcn_dim_size (1);
|
||||||
|
int teamid = __builtin_gcn_dim_pos(0);
|
||||||
|
|
||||||
|
/* Set up the global state.
|
||||||
|
Every team will do this, but that should be harmless. */
|
||||||
|
gomp_global_icv.nthreads_var = 16;
|
||||||
|
gomp_global_icv.thread_limit_var = numthreads;
|
||||||
|
/* Starting additional threads is not supported. */
|
||||||
|
gomp_global_icv.dyn_var = true;
|
||||||
|
|
||||||
|
/* Allocate and initialize the team-local-storage data. */
|
||||||
|
struct gomp_thread *thrs = gomp_malloc_cleared (sizeof (*thrs)
|
||||||
|
* numthreads);
|
||||||
|
set_gcn_thrs (thrs);
|
||||||
|
|
||||||
|
/* Allocate and initailize a pool of threads in the team.
|
||||||
|
The threads are already running, of course, we just need to manage
|
||||||
|
the communication between them. */
|
||||||
|
struct gomp_thread_pool *pool = gomp_malloc (sizeof (*pool));
|
||||||
|
pool->threads = gomp_malloc (sizeof (void *) * numthreads);
|
||||||
|
for (int tid = 0; tid < numthreads; tid++)
|
||||||
|
pool->threads[tid] = &thrs[tid];
|
||||||
|
pool->threads_size = numthreads;
|
||||||
|
pool->threads_used = numthreads;
|
||||||
|
pool->threads_busy = 1;
|
||||||
|
pool->last_team = NULL;
|
||||||
|
gomp_simple_barrier_init (&pool->threads_dock, numthreads);
|
||||||
|
thrs->thread_pool = pool;
|
||||||
|
|
||||||
|
asm ("s_barrier" ::: "memory");
|
||||||
|
return; /* Return to kernel. */
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
asm ("s_barrier" ::: "memory");
|
||||||
|
gomp_thread_start (gcn_thrs ()[0].thread_pool);
|
||||||
|
/* gomp_thread_start does not return. */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
gomp_gcn_exit_kernel (void)
|
||||||
|
{
|
||||||
|
gomp_free_thread (gcn_thrs ());
|
||||||
|
free (gcn_thrs ());
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This function contains the idle loop in which a thread waits
|
||||||
|
to be called up to become part of a team. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
gomp_thread_start (struct gomp_thread_pool *pool)
|
||||||
|
{
|
||||||
|
struct gomp_thread *thr = gomp_thread ();
|
||||||
|
|
||||||
|
gomp_sem_init (&thr->release, 0);
|
||||||
|
thr->thread_pool = pool;
|
||||||
|
|
||||||
|
/* The loop exits only when "fn" is assigned "gomp_free_pool_helper",
|
||||||
|
which contains "s_endpgm", or an infinite no-op loop is
|
||||||
|
suspected (this happens when the thread master crashes). */
|
||||||
|
int nul_limit = 99;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
gomp_simple_barrier_wait (&pool->threads_dock);
|
||||||
|
if (!thr->fn)
|
||||||
|
{
|
||||||
|
if (nul_limit-- > 0)
|
||||||
|
continue;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const char msg[] = ("team master not responding;"
|
||||||
|
" slave thread aborting");
|
||||||
|
write (2, msg, sizeof (msg)-1);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
thr->fn (thr->data);
|
||||||
|
thr->fn = NULL;
|
||||||
|
|
||||||
|
struct gomp_task *task = thr->task;
|
||||||
|
gomp_team_barrier_wait_final (&thr->ts.team->barrier);
|
||||||
|
gomp_finish_task (task);
|
||||||
|
}
|
||||||
|
while (1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Launch a team. */
|
||||||
|
|
||||||
|
void
|
||||||
|
gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
|
||||||
|
unsigned flags, struct gomp_team *team,
|
||||||
|
struct gomp_taskgroup *taskgroup)
|
||||||
|
{
|
||||||
|
struct gomp_thread *thr, *nthr;
|
||||||
|
struct gomp_task *task;
|
||||||
|
struct gomp_task_icv *icv;
|
||||||
|
struct gomp_thread_pool *pool;
|
||||||
|
unsigned long nthreads_var;
|
||||||
|
|
||||||
|
thr = gomp_thread ();
|
||||||
|
pool = thr->thread_pool;
|
||||||
|
task = thr->task;
|
||||||
|
icv = task ? &task->icv : &gomp_global_icv;
|
||||||
|
|
||||||
|
/* Always save the previous state, even if this isn't a nested team.
|
||||||
|
In particular, we should save any work share state from an outer
|
||||||
|
orphaned work share construct. */
|
||||||
|
team->prev_ts = thr->ts;
|
||||||
|
|
||||||
|
thr->ts.team = team;
|
||||||
|
thr->ts.team_id = 0;
|
||||||
|
++thr->ts.level;
|
||||||
|
if (nthreads > 1)
|
||||||
|
++thr->ts.active_level;
|
||||||
|
thr->ts.work_share = &team->work_shares[0];
|
||||||
|
thr->ts.last_work_share = NULL;
|
||||||
|
thr->ts.single_count = 0;
|
||||||
|
thr->ts.static_trip = 0;
|
||||||
|
thr->task = &team->implicit_task[0];
|
||||||
|
nthreads_var = icv->nthreads_var;
|
||||||
|
gomp_init_task (thr->task, task, icv);
|
||||||
|
team->implicit_task[0].icv.nthreads_var = nthreads_var;
|
||||||
|
team->implicit_task[0].taskgroup = taskgroup;
|
||||||
|
|
||||||
|
if (nthreads == 1)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Release existing idle threads. */
|
||||||
|
for (unsigned i = 1; i < nthreads; ++i)
|
||||||
|
{
|
||||||
|
nthr = pool->threads[i];
|
||||||
|
nthr->ts.team = team;
|
||||||
|
nthr->ts.work_share = &team->work_shares[0];
|
||||||
|
nthr->ts.last_work_share = NULL;
|
||||||
|
nthr->ts.team_id = i;
|
||||||
|
nthr->ts.level = team->prev_ts.level + 1;
|
||||||
|
nthr->ts.active_level = thr->ts.active_level;
|
||||||
|
nthr->ts.single_count = 0;
|
||||||
|
nthr->ts.static_trip = 0;
|
||||||
|
nthr->task = &team->implicit_task[i];
|
||||||
|
gomp_init_task (nthr->task, task, icv);
|
||||||
|
team->implicit_task[i].icv.nthreads_var = nthreads_var;
|
||||||
|
team->implicit_task[i].taskgroup = taskgroup;
|
||||||
|
nthr->fn = fn;
|
||||||
|
nthr->data = data;
|
||||||
|
team->ordered_release[i] = &nthr->release;
|
||||||
|
}
|
||||||
|
|
||||||
|
gomp_simple_barrier_wait (&pool->threads_dock);
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "../../team.c"
|
|
@ -0,0 +1,52 @@
|
||||||
|
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
|
||||||
|
Contributed by Mentor Embedded.
|
||||||
|
|
||||||
|
This file is part of the GNU Offloading and Multi Processing Library
|
||||||
|
(libgomp).
|
||||||
|
|
||||||
|
Libgomp is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
any later version.
|
||||||
|
|
||||||
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
more details.
|
||||||
|
|
||||||
|
Under Section 7 of GPL version 3, you are granted additional
|
||||||
|
permissions described in the GCC Runtime Library Exception, version
|
||||||
|
3.1, as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License and
|
||||||
|
a copy of the GCC Runtime Library Exception along with this program;
|
||||||
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
/* This file implements timer routines for AMD GCN. */
|
||||||
|
|
||||||
|
#include "libgomp.h"
|
||||||
|
|
||||||
|
/* According to AMD:
|
||||||
|
dGPU RTC is 27MHz
|
||||||
|
AGPU RTC is 100MHz
|
||||||
|
FIXME: DTRT on an APU. */
|
||||||
|
#define RTC_TICKS (1.0 / 27000000.0) /* 27MHz */
|
||||||
|
|
||||||
|
double
|
||||||
|
omp_get_wtime (void)
|
||||||
|
{
|
||||||
|
uint64_t clock;
|
||||||
|
asm ("s_memrealtime %0\n\t"
|
||||||
|
"s_waitcnt 0" : "=r" (clock));
|
||||||
|
return clock * RTC_TICKS;
|
||||||
|
}
|
||||||
|
|
||||||
|
double
|
||||||
|
omp_get_wtick (void)
|
||||||
|
{
|
||||||
|
return RTC_TICKS;
|
||||||
|
}
|
||||||
|
|
||||||
|
ialias (omp_get_wtime)
|
||||||
|
ialias (omp_get_wtick)
|
|
@ -14921,7 +14921,7 @@ case "$host" in
|
||||||
*-*-rtems*)
|
*-*-rtems*)
|
||||||
# RTEMS supports Pthreads, but the library is not available at GCC build time.
|
# RTEMS supports Pthreads, but the library is not available at GCC build time.
|
||||||
;;
|
;;
|
||||||
nvptx*-*-*)
|
nvptx*-*-* | amdgcn*-*-*)
|
||||||
# NVPTX does not support Pthreads, has its own code replacement.
|
# NVPTX does not support Pthreads, has its own code replacement.
|
||||||
libgomp_use_pthreads=no
|
libgomp_use_pthreads=no
|
||||||
# NVPTX is an accelerator-only target
|
# NVPTX is an accelerator-only target
|
||||||
|
|
|
@ -176,7 +176,7 @@ case "$host" in
|
||||||
*-*-rtems*)
|
*-*-rtems*)
|
||||||
# RTEMS supports Pthreads, but the library is not available at GCC build time.
|
# RTEMS supports Pthreads, but the library is not available at GCC build time.
|
||||||
;;
|
;;
|
||||||
nvptx*-*-*)
|
nvptx*-*-* | amdgcn*-*-*)
|
||||||
# NVPTX does not support Pthreads, has its own code replacement.
|
# NVPTX does not support Pthreads, has its own code replacement.
|
||||||
libgomp_use_pthreads=no
|
libgomp_use_pthreads=no
|
||||||
# NVPTX is an accelerator-only target
|
# NVPTX is an accelerator-only target
|
||||||
|
|
|
@ -164,6 +164,10 @@ case "${target}" in
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
amdgcn*-*-*)
|
||||||
|
config_path="gcn accel"
|
||||||
|
;;
|
||||||
|
|
||||||
*)
|
*)
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,8 @@ enum offload_target_type
|
||||||
/* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */
|
/* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */
|
||||||
OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5,
|
OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5,
|
||||||
OFFLOAD_TARGET_TYPE_INTEL_MIC = 6,
|
OFFLOAD_TARGET_TYPE_INTEL_MIC = 6,
|
||||||
OFFLOAD_TARGET_TYPE_HSA = 7
|
OFFLOAD_TARGET_TYPE_HSA = 7,
|
||||||
|
OFFLOAD_TARGET_TYPE_GCN = 8
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Opaque type to represent plugin-dependent implementation of an
|
/* Opaque type to represent plugin-dependent implementation of an
|
||||||
|
|
|
@ -692,6 +692,24 @@ static inline struct gomp_thread *gomp_thread (void)
|
||||||
asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
|
asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
|
||||||
return nvptx_thrs + tid;
|
return nvptx_thrs + tid;
|
||||||
}
|
}
|
||||||
|
#elif defined __AMDGCN__
|
||||||
|
static inline struct gomp_thread *gcn_thrs (void)
|
||||||
|
{
|
||||||
|
/* The value is at the bottom of LDS. */
|
||||||
|
struct gomp_thread * __lds *thrs = (struct gomp_thread * __lds *)4;
|
||||||
|
return *thrs;
|
||||||
|
}
|
||||||
|
static inline void set_gcn_thrs (struct gomp_thread *val)
|
||||||
|
{
|
||||||
|
/* The value is at the bottom of LDS. */
|
||||||
|
struct gomp_thread * __lds *thrs = (struct gomp_thread * __lds *)4;
|
||||||
|
*thrs = val;
|
||||||
|
}
|
||||||
|
static inline struct gomp_thread *gomp_thread (void)
|
||||||
|
{
|
||||||
|
int tid = __builtin_gcn_dim_pos(1);
|
||||||
|
return gcn_thrs () + tid;
|
||||||
|
}
|
||||||
#elif defined HAVE_TLS || defined USE_EMUTLS
|
#elif defined HAVE_TLS || defined USE_EMUTLS
|
||||||
extern __thread struct gomp_thread gomp_tls_data;
|
extern __thread struct gomp_thread gomp_tls_data;
|
||||||
static inline struct gomp_thread *gomp_thread (void)
|
static inline struct gomp_thread *gomp_thread (void)
|
||||||
|
|
|
@ -82,7 +82,14 @@ struct goacc_thread
|
||||||
void *target_tls;
|
void *target_tls;
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined HAVE_TLS || defined USE_EMUTLS
|
#ifdef __AMDGCN__
|
||||||
|
static inline struct goacc_thread *
|
||||||
|
goacc_thread (void)
|
||||||
|
{
|
||||||
|
/* Unused in the offload libgomp for OpenACC: return a dummy value. */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#elif defined HAVE_TLS || defined USE_EMUTLS
|
||||||
extern __thread struct goacc_thread *goacc_tls_data;
|
extern __thread struct goacc_thread *goacc_tls_data;
|
||||||
static inline struct goacc_thread *
|
static inline struct goacc_thread *
|
||||||
goacc_thread (void)
|
goacc_thread (void)
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
/* Nothing needed here. */
|
|
@ -46,6 +46,7 @@ module openacc_kinds
|
||||||
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
|
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
|
||||||
integer (acc_device_kind), parameter :: acc_device_not_host = 4
|
integer (acc_device_kind), parameter :: acc_device_not_host = 4
|
||||||
integer (acc_device_kind), parameter :: acc_device_nvidia = 5
|
integer (acc_device_kind), parameter :: acc_device_nvidia = 5
|
||||||
|
integer (acc_device_kind), parameter :: acc_device_gcn = 8
|
||||||
|
|
||||||
public :: acc_handle_kind
|
public :: acc_handle_kind
|
||||||
|
|
||||||
|
|
|
@ -55,6 +55,7 @@ typedef enum acc_device_t {
|
||||||
/* acc_device_host_nonshm = 3 removed. */
|
/* acc_device_host_nonshm = 3 removed. */
|
||||||
acc_device_not_host = 4,
|
acc_device_not_host = 4,
|
||||||
acc_device_nvidia = 5,
|
acc_device_nvidia = 5,
|
||||||
|
acc_device_gcn = 8,
|
||||||
_ACC_device_hwm,
|
_ACC_device_hwm,
|
||||||
/* Ensure enumeration is layout compatible with int. */
|
/* Ensure enumeration is layout compatible with int. */
|
||||||
_ACC_highest = __INT_MAX__,
|
_ACC_highest = __INT_MAX__,
|
||||||
|
|
|
@ -239,6 +239,9 @@ gomp_free_pool_helper (void *thread_pool)
|
||||||
pthread_exit (NULL);
|
pthread_exit (NULL);
|
||||||
#elif defined(__nvptx__)
|
#elif defined(__nvptx__)
|
||||||
asm ("exit;");
|
asm ("exit;");
|
||||||
|
#elif defined(__AMDGCN__)
|
||||||
|
asm ("s_dcache_wb\n\t"
|
||||||
|
"s_endpgm");
|
||||||
#else
|
#else
|
||||||
#error gomp_free_pool_helper must terminate the thread
|
#error gomp_free_pool_helper must terminate the thread
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue