Fortran/OpenACC: Add Fortran support for acc_attach/acc_detach

While C/++ support the routines acc_attach{,_async} and
acc_detach{,_finalize}{,_async} routines since a long time, the Fortran
API routines where only added in OpenACC 3.3.

Unfortunately, they cannot directly be implemented in the library as
GCC will introduce a temporary array descriptor in some cases, which
causes the attempted attachment to the this temporary variable instead
of to the original one.

Therefore, those API routines are handled in a special way in the compiler.

gcc/fortran/ChangeLog:

	* trans-stmt.cc (gfc_trans_call_acc_attach_detach): New.
	(gfc_trans_call): Call it.

libgomp/ChangeLog:

	* libgomp.texi (acc_attach, acc_detach): Update for Fortran
	version.
	* openacc.f90 (acc_attach{,_async}, acc_detach{,_finalize}{,_async}):
	Add.
	* openacc_lib.h: Likewise.
	* testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90: New test.
	* testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90: New test.
This commit is contained in:
Tobias Burnus 2025-06-24 23:28:57 +02:00
parent 92e1893e01
commit 1e35a51825
6 changed files with 265 additions and 22 deletions

View File

@ -377,6 +377,57 @@ get_intrinsic_for_code (gfc_code *code)
}
/* Handle the OpenACC routines acc_attach{,_async} and
acc_detach{,_finalize}{,_async} explicitly. This is required as the
the corresponding device pointee is attached to the corresponding device
pointer, but if a temporary array descriptor is created for the call,
that one is used as pointer instead of the original pointer. */
tree
gfc_trans_call_acc_attach_detach (gfc_code *code)
{
stmtblock_t block;
gfc_se ptr_addr_se, async_se;
tree fn;
fn = code->resolved_sym->backend_decl;
if (fn == NULL)
{
fn = gfc_get_symbol_decl (code->resolved_sym);
code->resolved_sym->backend_decl = fn;
}
gfc_start_block (&block);
gfc_init_se (&ptr_addr_se, NULL);
ptr_addr_se.descriptor_only = 1;
ptr_addr_se.want_pointer = 1;
gfc_conv_expr (&ptr_addr_se, code->ext.actual->expr);
gfc_add_block_to_block (&block, &ptr_addr_se.pre);
if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (ptr_addr_se.expr)))
ptr_addr_se.expr = gfc_conv_descriptor_data_get (ptr_addr_se.expr);
ptr_addr_se.expr = build_fold_addr_expr (ptr_addr_se.expr);
bool async = code->ext.actual->next != NULL;
if (async)
{
gfc_init_se (&async_se, NULL);
gfc_conv_expr (&async_se, code->ext.actual->next->expr);
fn = build_call_expr_loc (gfc_get_location (&code->loc), fn, 2,
ptr_addr_se.expr, async_se.expr);
}
else
fn = build_call_expr_loc (gfc_get_location (&code->loc),
fn, 1, ptr_addr_se.expr);
gfc_add_expr_to_block (&block, fn);
gfc_add_block_to_block (&block, &ptr_addr_se.post);
if (async)
gfc_add_block_to_block (&block, &async_se.post);
return gfc_finish_block (&block);
}
/* Translate the CALL statement. Builds a call to an F95 subroutine. */
tree
@ -392,13 +443,32 @@ gfc_trans_call (gfc_code * code, bool dependency_check,
tree tmp;
bool is_intrinsic_mvbits;
gcc_assert (code->resolved_sym);
/* Unfortunately, acc_attach* and acc_detach* need some special treatment for
attaching the the pointee to a pointer as GCC might introduce a temporary
array descriptor, whose data component is then used as to be attached to
pointer. */
if (flag_openacc
&& code->resolved_sym->attr.subroutine
&& code->resolved_sym->formal
&& code->resolved_sym->formal->sym->ts.type == BT_ASSUMED
&& code->resolved_sym->formal->sym->attr.dimension
&& code->resolved_sym->formal->sym->as->type == AS_ASSUMED_RANK
&& startswith (code->resolved_sym->name, "acc_")
&& (!strcmp (code->resolved_sym->name + 4, "attach")
|| !strcmp (code->resolved_sym->name + 4, "attach_async")
|| !strcmp (code->resolved_sym->name + 4, "detach")
|| !strcmp (code->resolved_sym->name + 4, "detach_async")
|| !strcmp (code->resolved_sym->name + 4, "detach_finalize")
|| !strcmp (code->resolved_sym->name + 4, "detach_finalize_async")))
return gfc_trans_call_acc_attach_detach (code);
/* A CALL starts a new block because the actual arguments may have to
be evaluated first. */
gfc_init_se (&se, NULL);
gfc_start_block (&se.pre);
gcc_assert (code->resolved_sym);
ss = gfc_ss_terminator;
if (code->resolved_sym->attr.elemental)
ss = gfc_walk_elemental_function_args (ss, code->ext.actual,

View File

@ -5967,19 +5967,19 @@ address to pointing to the corresponding device data.
@item @emph{Prototype}: @tab @code{void acc_attach_async(h_void **ptr_addr, int async);}
@end multitable
@c @item @emph{Fortran}:
@c @multitable @columnfractions .20 .80
@c @item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)}
@c @item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)}
@c @item @tab @code{type(*), dimension(..) :: ptr_addr}
@c @item @tab @code{integer(acc_handle_kind), value :: async_arg}
@c @end multitable
@item @emph{Fortran}:
@multitable @columnfractions .20 .80
@item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)}
@item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)}
@item @tab @code{type(*), dimension(..) :: ptr_addr}
@item @tab @code{integer(acc_handle_kind), value :: async_arg}
@end multitable
@item @emph{Reference}:
@uref{https://www.openacc.org, OpenACC specification v2.6}, section
3.2.34.
@c @uref{https://www.openacc.org, OpenACC specification v3.3}, section
@c 3.2.29.
@uref{https://www.openacc.org, OpenACC specification v3.3}, section
3.2.29.
@end table
@ -5999,21 +5999,21 @@ address to pointing to the corresponding host data.
@item @emph{Prototype}: @tab @code{void acc_detach_finalize_async(h_void **ptr_addr, int async);}
@end multitable
@c @item @emph{Fortran}:
@c @multitable @columnfractions .20 .80
@c @item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)}
@c @item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)}
@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)}
@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)}
@c @item @tab @code{type(*), dimension(..) :: ptr_addr}
@c @item @tab @code{integer(acc_handle_kind), value :: async_arg}
@c @end multitable
@item @emph{Fortran}:
@multitable @columnfractions .20 .80
@item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)}
@item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)}
@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)}
@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)}
@item @tab @code{type(*), dimension(..) :: ptr_addr}
@item @tab @code{integer(acc_handle_kind), value :: async_arg}
@end multitable
@item @emph{Reference}:
@uref{https://www.openacc.org, OpenACC specification v2.6}, section
3.2.35.
@c @uref{https://www.openacc.org, OpenACC specification v3.3}, section
@c 3.2.29.
@uref{https://www.openacc.org, OpenACC specification v3.3}, section
3.2.29.
@end table

View File

@ -798,6 +798,8 @@ module openacc
public :: acc_memcpy_to_device, acc_memcpy_to_device_async
public :: acc_memcpy_from_device, acc_memcpy_from_device_async
public :: acc_memcpy_device, acc_memcpy_device_async
public :: acc_attach, acc_attach_async, acc_detach, acc_detach_async
public :: acc_detach_finalize, acc_detach_finalize_async
integer, parameter :: openacc_version = 201711
@ -1068,6 +1070,48 @@ module openacc
end subroutine
end interface
interface
subroutine acc_attach (ptr_addr) bind(C)
type(*), dimension(..) :: ptr_addr
end subroutine
end interface
interface
subroutine acc_attach_async (ptr_addr, async_arg) bind(C)
import :: acc_handle_kind
type(*), dimension(..) :: ptr_addr
integer(acc_handle_kind), value :: async_arg
end subroutine
end interface
interface
subroutine acc_detach (ptr_addr) bind(C)
type(*), dimension(..) :: ptr_addr
end subroutine
end interface
interface
subroutine acc_detach_async (ptr_addr, async_arg) bind(C)
import :: acc_handle_kind
type(*), dimension(..) :: ptr_addr
integer(acc_handle_kind), value :: async_arg
end subroutine
end interface
interface
subroutine acc_detach_finalize (ptr_addr) bind(C)
type(*), dimension(..) :: ptr_addr
end subroutine
end interface
interface
subroutine acc_detach_finalize_async (ptr_addr, async_arg) bind(C)
import :: acc_handle_kind
type(*), dimension(..) :: ptr_addr
integer(acc_handle_kind), value :: async_arg
end subroutine
end interface
interface acc_copyin_async
procedure :: acc_copyin_async_32_h
procedure :: acc_copyin_async_64_h

View File

@ -707,3 +707,45 @@
integer (acc_handle_kind) async_
end subroutine
end interface
interface
subroutine acc_attach (ptr_addr) bind(C)
type(*), dimension(..) :: ptr_addr
end subroutine
end interface
interface
subroutine acc_attach_async (ptr_addr, async_arg) bind(C)
import :: acc_handle_kind
type(*), dimension(..) :: ptr_addr
integer(acc_handle_kind), value :: async_arg
end subroutine
end interface
interface
subroutine acc_detach (ptr_addr) bind(C)
type(*), dimension(..) :: ptr_addr
end subroutine
end interface
interface
subroutine acc_detach_async (ptr_addr, async_arg) bind(C)
import :: acc_handle_kind
type(*), dimension(..) :: ptr_addr
integer(acc_handle_kind), value :: async_arg
end subroutine
end interface
interface
subroutine acc_detach_finalize (ptr_addr) bind(C)
type(*), dimension(..) :: ptr_addr
end subroutine
end interface
interface
subroutine acc_detach_finalize_async(ptr_addr, async_arg)bind(C)
import :: acc_handle_kind
type(*), dimension(..) :: ptr_addr
integer(acc_handle_kind), value :: async_arg
end subroutine
end interface

View File

@ -0,0 +1,25 @@
! { dg-do compile }
! { dg-additional-options "-fdump-tree-original" }
use openacc
implicit none (type, external)
integer,pointer :: a, b(:)
integer,allocatable :: c, d(:)
call acc_attach(a) ! ICE
call acc_attach_async(b, 4)
call acc_attach(c)
call acc_detach(a)
call acc_detach_async(b, 4)
call acc_detach_finalize(c)
call acc_detach_finalize_async(d,7)
end
! { dg-final { scan-tree-dump-times "acc_attach \\(&a\\);" 1 "original" } }
! { dg-final { scan-tree-dump-times "acc_attach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } }
! { dg-final { scan-tree-dump-times "acc_attach \\(&c\\);" 1 "original" } }
! { dg-final { scan-tree-dump-times "acc_detach \\(&a\\);" 1 "original" } }
! { dg-final { scan-tree-dump-times "acc_detach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } }
! { dg-final { scan-tree-dump-times "acc_detach_finalize \\(&c\\);" 1 "original" } }
! { dg-final { scan-tree-dump-times "acc_detach_finalize_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\* restrict\\) d.data, 7\\);" 1 "original" } }

View File

@ -0,0 +1,62 @@
! { dg-do run }
use openacc
implicit none (type, external)
integer, target :: tgt_a, tgt_b(5)
integer, pointer :: p1, p2(:)
type t
integer,pointer :: a => null ()
integer,pointer :: b(:) => null ()
integer,allocatable :: c, d(:)
end type t
type(t), target :: var
tgt_a = 51
tgt_b = [11,22,33,44,55]
var%b => tgt_b
!$acc enter data copyin(var, tgt_a, tgt_b)
var%a => tgt_a
call acc_attach(var%a)
call acc_attach(var%b)
!$acc serial
! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 }
if (var%a /= 51) stop 1
if (any (var%b /= [11,22,33,44,55])) stop 2
!$acc end serial
call acc_detach(var%a)
call acc_detach(var%b)
!$acc exit data delete(var, tgt_a, tgt_b)
var%c = 9
var%d = [1,2,3]
p1 => var%c
p2 => var%d
!$acc enter data copyin(p1, p2)
!$acc enter data copyin(var)
call acc_attach(var%c)
call acc_attach(var%d)
!$acc serial
! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 }
if (var%c /= 9) stop 3
if (any (var%d /= [1,2,3])) stop 4
!$acc end serial
call acc_detach(var%c)
call acc_detach(var%d)
!$acc exit data delete(var, p1, p2)
deallocate(var%d)
end