GCN, nvptx offloading: Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API [PR119853, PR119854]

'__dso_handle' for '__cxa_atexit', '__cxa_finalize'.  See
<https://itanium-cxx-abi.github.io/cxx-abi/abi.html#dso-dtor>.

	PR target/119853
	PR target/119854
	libgcc/
	* config/gcn/crt0.c (_fini_array): Call
	'__GCC_offload___cxa_finalize'.
	* config/nvptx/gbl-ctors.c (__static_do_global_dtors): Likewise.
	libgomp/
	* target-cxa-dso-dtor.c: New.
	* config/accel/target-cxa-dso-dtor.c: Likewise.
	* Makefile.am (libgomp_la_SOURCES): Add it.
	* Makefile.in: Regenerate.
	* testsuite/libgomp.c++/target-cdtor-1.C: New.
	* testsuite/libgomp.c++/target-cdtor-2.C: Likewise.
This commit is contained in:
Thomas Schwinge 2025-04-23 10:51:48 +02:00
parent 40ce48e87c
commit aafe942227
8 changed files with 363 additions and 3 deletions

View File

@ -24,6 +24,28 @@ typedef long long size_t;
/* Provide an entry point symbol to silence a linker warning. */
void _start() {}
#define PR119369_fixed 0
/* Host/device compatibility: '__cxa_finalize'. Dummy; if necessary,
overridden via libgomp 'target-cxa-dso-dtor.c'. */
#if PR119369_fixed
extern void __GCC_offload___cxa_finalize (void *) __attribute__((weak));
#else
void __GCC_offload___cxa_finalize (void *) __attribute__((weak));
void __attribute__((weak))
__GCC_offload___cxa_finalize (void *dso_handle __attribute__((unused)))
{
}
#endif
/* There are no DSOs; this is the main program. */
static void * const __dso_handle = 0;
#ifdef USE_NEWLIB_INITFINI
extern void __libc_init_array (void) __attribute__((weak));
@ -38,6 +60,11 @@ void _init_array()
__attribute__((amdgpu_hsa_kernel ()))
void _fini_array()
{
#if PR119369_fixed
if (__GCC_offload___cxa_finalize)
#endif
__GCC_offload___cxa_finalize (__dso_handle);
__libc_fini_array ();
}
@ -70,6 +97,11 @@ void _init_array()
__attribute__((amdgpu_hsa_kernel ()))
void _fini_array()
{
#if PR119369_fixed
if (__GCC_offload___cxa_finalize)
#endif
__GCC_offload___cxa_finalize (__dso_handle);
size_t count;
size_t i;

View File

@ -31,6 +31,20 @@
extern int atexit (void (*function) (void));
/* Host/device compatibility: '__cxa_finalize'. Dummy; if necessary,
overridden via libgomp 'target-cxa-dso-dtor.c'. */
extern void __GCC_offload___cxa_finalize (void *);
void __attribute__((weak))
__GCC_offload___cxa_finalize (void *dso_handle __attribute__((unused)))
{
}
/* There are no DSOs; this is the main program. */
static void * const __dso_handle = 0;
/* Handler functions ('static', in contrast to the 'gbl-ctors.h'
prototypes). */
@ -49,6 +63,8 @@ static void __static_do_global_dtors (void);
static void
__static_do_global_dtors (void)
{
__GCC_offload___cxa_finalize (__dso_handle);
func_ptr *p = __DTOR_LIST__;
++p;
for (; *p; ++p)

View File

@ -70,7 +70,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
oacc-target.c target-indirect.c
oacc-target.c target-indirect.c target-cxa-dso-dtor.c
include $(top_srcdir)/plugin/Makefrag.am

View File

@ -219,7 +219,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
oacc-target.lo target-indirect.lo $(am__objects_1)
oacc-target.lo target-indirect.lo target-cxa-dso-dtor.lo \
$(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@ -552,7 +553,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
oacc-target.c target-indirect.c $(am__append_3)
oacc-target.c target-indirect.c target-cxa-dso-dtor.c \
$(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@ -780,6 +782,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-cxa-dso-dtor.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-indirect.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@

View File

@ -0,0 +1,62 @@
/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API
Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libgomp.h"
extern void __cxa_finalize (void *);
/* See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#dso-dtor>.
Even if the device is '!DEFAULT_USE_CXA_ATEXIT', we may see '__cxa_atexit'
calls, referencing '__dso_handle', via a 'DEFAULT_USE_CXA_ATEXIT' host.
'__cxa_atexit' is provided by newlib, but use of '__dso_handle' for nvptx
results in 'ld' error:
unresolved symbol __dso_handle
collect2: error: ld returned 1 exit status
nvptx mkoffload: fatal error: [...]/x86_64-pc-linux-gnu-accel-nvptx-none-gcc returned 1 exit status
..., or for GCN get an implicit definition (running with
'--trace-symbol=__dso_handle'):
./a.xamdgcn-amdhsa.mkoffload.hsaco-a.xamdgcn-amdhsa.mkoffload.2.o: reference to __dso_handle
<internal>: definition of __dso_handle
..., which might be fine, but let's just make it explicit. */
/* There are no DSOs; this is the main program. */
attribute_hidden void * const __dso_handle = 0;
/* If this file gets linked in, that means that '__dso_handle' has been
referenced (for '__cxa_atexit'), and in that case, we also have to run
'__cxa_finalize'. Make that happen by overriding the weak libgcc dummy
function '__GCC_offload___cxa_finalize'. */
void
__GCC_offload___cxa_finalize (void *dso_handle)
{
__cxa_finalize (dso_handle);
}

View File

@ -0,0 +1,3 @@
/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API */
/* Nothing needed here. */

View File

@ -0,0 +1,104 @@
/* Offloaded C++ objects construction and destruction. */
/* { dg-additional-options -fdump-tree-optimized-raw-asmname }
{ dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */
#include <omp.h>
#include <vector>
#pragma omp declare target
struct S
{
int x;
S()
: x(-1)
{
__builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
}
S(int x)
: x(x)
{
__builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
}
~S()
{
__builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
}
};
#pragma omp end declare target
S sH1(7);
#pragma omp declare target
S sHD1(5);
std::vector<S> svHD1(2);
#pragma omp end declare target
S sH2(3);
int main()
{
int c = 0;
__builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
#pragma omp target map(c)
{
__builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
}
#pragma omp target map(c)
{
__builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
}
__builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
return 0;
}
/* Verify '__cxa_atexit' calls.
For the host, there are four expected calls:
{ dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } }
{ dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
{ dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
{ dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
{ dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
For the device, there are two expected calls:
{ dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } }
{ dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
{ dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
*/
/* C++ objects are constructed in order of appearance (..., and destructed in reverse order).
{ dg-output {S, 7, 1[\r\n]+} }
{ dg-output {S, 5, 1[\r\n]+} }
{ dg-output {S, -1, 1[\r\n]+} }
{ dg-output {S, -1, 1[\r\n]+} }
{ dg-output {S, 3, 1[\r\n]+} }
{ dg-output {main:1, 1[\r\n]+} }
{ dg-output {S, 5, 0[\r\n]+} { target offload_device } }
{ dg-output {S, -1, 0[\r\n]+} { target offload_device } }
{ dg-output {S, -1, 0[\r\n]+} { target offload_device } }
{ dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
{ dg-output {main:2, 0[\r\n]+} { target offload_device } }
{ dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
{ dg-output {main:3, 0[\r\n]+} { target offload_device } }
{ dg-output {main:4, 1[\r\n]+} }
{ dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
{ dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
{ dg-output {~S, 5, 0[\r\n]+} { target offload_device } }
{ dg-output {~S, 3, 1[\r\n]+} }
{ dg-output {~S, -1, 1[\r\n]+} }
{ dg-output {~S, -1, 1[\r\n]+} }
{ dg-output {~S, 5, 1[\r\n]+} }
{ dg-output {~S, 7, 1[\r\n]+} }
*/

View File

@ -0,0 +1,140 @@
/* Offloaded 'constructor' and 'destructor' functions, and C++ objects construction and destruction. */
/* { dg-require-effective-target init_priority } */
/* { dg-additional-options -fdump-tree-optimized-raw-asmname }
{ dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */
#include <omp.h>
#include <vector>
#pragma omp declare target
struct S
{
int x;
S()
: x(-1)
{
__builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
}
S(int x)
: x(x)
{
__builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
}
~S()
{
__builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
}
};
#pragma omp end declare target
S sH1 __attribute__((init_priority(1500))) (7);
#pragma omp declare target
S sHD1 __attribute__((init_priority(2000))) (5);
std::vector<S> svHD1 __attribute__((init_priority(1000))) (2);
static void
__attribute__((constructor(20000)))
initDH1()
{
__builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
}
static void
__attribute__((destructor(20000)))
finiDH1()
{
__builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
}
#pragma omp end declare target
S sH2 __attribute__((init_priority(500))) (3);
static void
__attribute__((constructor(10000)))
initH1()
{
__builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
}
static void
__attribute__((destructor(10000)))
finiH1()
{
__builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
}
int main()
{
int c = 0;
__builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
#pragma omp target map(c)
{
__builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
}
#pragma omp target map(c)
{
__builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
}
__builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
return 0;
}
/* Verify '__cxa_atexit' calls.
For the host, there are four expected calls:
{ dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } }
{ dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
{ dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
{ dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
{ dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
For the device, there are two expected calls:
{ dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } }
{ dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
{ dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
*/
/* Defined order in which 'constructor' functions, and 'destructor' functions are run, and C++ objects are constructed (..., and destructed in reverse order).
{ dg-output {S, 3, 1[\r\n]+} }
{ dg-output {S, -1, 1[\r\n]+} }
{ dg-output {S, -1, 1[\r\n]+} }
{ dg-output {S, 7, 1[\r\n]+} }
{ dg-output {S, 5, 1[\r\n]+} }
{ dg-output {initH1, 1[\r\n]+} }
{ dg-output {initDH1, 1[\r\n]+} }
{ dg-output {main:1, 1[\r\n]+} }
{ dg-output {S, -1, 0[\r\n]+} { target offload_device } }
{ dg-output {S, -1, 0[\r\n]+} { target offload_device } }
{ dg-output {S, 5, 0[\r\n]+} { target offload_device } }
{ dg-output {initDH1, 0[\r\n]+} { target offload_device } }
{ dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
{ dg-output {main:2, 0[\r\n]+} { target offload_device } }
{ dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
{ dg-output {main:3, 0[\r\n]+} { target offload_device } }
{ dg-output {main:4, 1[\r\n]+} }
{ dg-output {~S, 5, 0[\r\n]+} { target offload_device } }
{ dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
{ dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
{ dg-output {finiDH1, 0[\r\n]+} { target offload_device } }
{ dg-output {~S, 5, 1[\r\n]+} }
{ dg-output {~S, 7, 1[\r\n]+} }
{ dg-output {~S, -1, 1[\r\n]+} }
{ dg-output {~S, -1, 1[\r\n]+} }
{ dg-output {~S, 3, 1[\r\n]+} }
{ dg-output {finiDH1, 1[\r\n]+} }
{ dg-output {finiH1, 1[\r\n]+} }
*/