[OpenACC] Add tests for implied copy of variables in reduction clause.

The OpenACC reduction clause on compute construct implies a copy clause
for each reduction variable [1]. This patch adds tests to check if the
implied copy is being generated. The check covers various types and
operators as described in the specification.

gcc/testsuite/ChangeLog:

	* c-c++-common/goacc/implied-copy-1.c: New test.
	* c-c++-common/goacc/implied-copy-2.c: New test.
	* g++.dg/goacc/implied-copy.C: New test.
	* gcc.dg/goacc/implied-copy.c: New test.
	* gfortran.dg/goacc/implied-copy-1.f90: New test.
	* gfortran.dg/goacc/implied-copy-2.f90: New test.

[1] OpenACC 2.7 Specification section 2.5.13
This commit is contained in:
Hafiz Abid Qadeer 2023-12-20 13:50:26 +00:00
parent a6af434d6b
commit 0e7bc3eaa3
6 changed files with 402 additions and 0 deletions

View File

@ -0,0 +1,33 @@
/* { dg-additional-options "-fdump-tree-gimple" } */
/* Test for implied copy of reduction variable on combined construct. */
void test1 (void)
{
int i, sum = 0, prod = 1, a[100];
#pragma acc kernels loop reduction(+:sum) reduction(*:prod)
for (int i = 0; i < 10; ++i)
{
sum += a[i];
prod *= a[i];
}
#pragma acc parallel loop reduction(+:sum) reduction(*:prod)
for (int i = 0; i < 10; ++i)
{
sum += a[i];
prod *= a[i];
}
#pragma acc serial loop reduction(+:sum) reduction(*:prod)
for (int i = 0; i < 10; ++i)
{
sum += a[i];
prod *= a[i];
}
}
/* { dg-final { scan-tree-dump-times "map\\(force_tofrom:sum \\\[len: \[0-9\]+\\\].*\\)" 1 "gimple" } } */
/* { dg-final { scan-tree-dump-times "map\\(force_tofrom:prod \\\[len: \[0-9\]+\\\].*\\)" 1 "gimple" } } */
/* { dg-final { scan-tree-dump-times "map\\(tofrom:sum \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
/* { dg-final { scan-tree-dump-times "map\\(tofrom:prod \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */

View File

@ -0,0 +1,121 @@
/* { dg-additional-options "-fdump-tree-gimple" } */
/* Test that reduction on compute construct implies a copy of the reduction
variable . */
#define n 1000
#if __cplusplus
typedef bool BOOL;
#else
typedef _Bool BOOL;
#endif
int
main(void)
{
int i;
int sum = 0;
int prod = 1;
int result = 0;
int tmp = 1;
int array[n];
double sumd = 0.0;
double arrayd[n];
float sumf = 0.0;
float arrayf[n];
char sumc;
char arrayc[n];
BOOL lres;
#pragma acc parallel reduction(+:sum, sumf, sumd, sumc) reduction(*:prod)
for (i = 0; i < n; i++)
{
sum += array[i];
sumf += arrayf[i];
sumd += arrayd[i];
sumc += arrayc[i];
prod *= array[i];
}
#pragma acc parallel reduction (max:result)
for (i = 0; i < n; i++)
result = result > array[i] ? result : array[i];
#pragma acc parallel reduction (min:result)
for (i = 0; i < n; i++)
result = result < array[i] ? result : array[i];
#pragma acc parallel reduction (&:result)
for (i = 0; i < n; i++)
result &= array[i];
#pragma acc parallel reduction (|:result)
for (i = 0; i < n; i++)
result |= array[i];
#pragma acc parallel reduction (^:result)
for (i = 0; i < n; i++)
result ^= array[i];
#pragma acc parallel reduction (&&:lres) copy(tmp)
for (i = 0; i < n; i++)
lres = lres && (tmp > array[i]);
#pragma acc parallel reduction (||:lres) copy(tmp)
for (i = 0; i < n; i++)
lres = lres || (tmp > array[i]);
/* Same checks on serial construct. */
#pragma acc serial reduction(+:sum, sumf, sumd, sumc) reduction(*:prod)
for (i = 0; i < n; i++)
{
sum += array[i];
sumf += arrayf[i];
sumd += arrayd[i];
sumc += arrayc[i];
prod *= array[i];
}
#pragma acc serial reduction (max:result)
for (i = 0; i < n; i++)
result = result > array[i] ? result : array[i];
#pragma acc serial reduction (min:result)
for (i = 0; i < n; i++)
result = result < array[i] ? result : array[i];
#pragma acc serial reduction (&:result)
for (i = 0; i < n; i++)
result &= array[i];
#pragma acc serial reduction (|:result)
for (i = 0; i < n; i++)
result |= array[i];
#pragma acc serial reduction (^:result)
for (i = 0; i < n; i++)
result ^= array[i];
#pragma acc serial reduction (&&:lres) copy(tmp)
for (i = 0; i < n; i++)
lres = lres && (tmp > array[i]);
#pragma acc serial reduction (||:lres) copy(tmp)
for (i = 0; i < n; i++)
lres = lres || (tmp > array[i]);
return 0;
}
/* { dg-final { scan-tree-dump-times "map\\(tofrom:sum \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
/* { dg-final { scan-tree-dump-times "map\\(tofrom:sumf \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
/* { dg-final { scan-tree-dump-times "map\\(tofrom:sumd \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
/* { dg-final { scan-tree-dump-times "map\\(tofrom:sumc \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
/* { dg-final { scan-tree-dump-times "map\\(tofrom:lres \\\[len: \[0-9\]+\\\]\\)" 4 "gimple" } } */
/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 10 "gimple" } } */
/* { dg-final { scan-tree-dump-times "map\\(tofrom:prod \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */

View File

@ -0,0 +1,24 @@
/* { dg-additional-options "-fdump-tree-gimple" } */
/* Test for wchar_t type. */
int
main(void)
{
int i;
wchar_t a[100], s;
#pragma acc parallel reduction (+:s)
for (i = 0; i < 10; i++)
{
s += a[i];
}
#pragma acc serial reduction (+:s)
for (i = 0; i < 10; i++)
{
s += a[i];
}
return 0;
}
/* { dg-final { scan-tree-dump-times "map\\(tofrom:s \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */

View File

@ -0,0 +1,29 @@
/* { dg-additional-options "-fdump-tree-gimple" } */
/* Test for float _Complex and double _Complex types. */
int
main(void)
{
int i;
float _Complex fc[100];
float _Complex s1;
double _Complex dc[100];
double _Complex s2;
#pragma acc parallel reduction (+:s1, s2)
for (i = 0; i < 10; i++)
{
s1 += fc[i];
s2 += dc[i];
}
#pragma acc serial reduction (+:s1, s2)
for (i = 0; i < 10; i++)
{
s1 += fc[i];
s2 += dc[i];
}
return 0;
}
/* { dg-final { scan-tree-dump-times "map\\(tofrom:s1 \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
/* { dg-final { scan-tree-dump-times "map\\(tofrom:s2 \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */

View File

@ -0,0 +1,35 @@
! { dg-additional-options "-fdump-tree-gimple" }
! Test for implied copy of reduction variable on combined construct.
subroutine test
implicit none
integer a(100), i, s, p
p = 1
!$acc parallel loop reduction(+:s) reduction(*:p)
do i = 1, 100
s = s + a(i)
p = p * a(i)
end do
!$acc end parallel loop
!$acc serial loop reduction(+:s) reduction(*:p)
do i = 1, 100
s = s + a(i)
p = p * a(i)
end do
!$acc end serial loop
!$acc kernels loop reduction(+:s) reduction(*:p)
do i = 1, 100
s = s + a(i)
p = p * a(i)
end do
!$acc end kernels loop
end subroutine test
! { dg-final { scan-tree-dump-times "map\\(force_tofrom:s \\\[len: \[0-9\]+\\\].*\\)" 1 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(force_tofrom:p \\\[len: \[0-9\]+\\\].*\\)" 1 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(tofrom:s \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(tofrom:p \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } }

View File

@ -0,0 +1,160 @@
! { dg-additional-options "-fdump-tree-gimple" }
! Test that reduction on compute construct implies a copy of the reduction variable
subroutine test
implicit none
integer i
integer a(100), s1, p1
integer r1
real b(100), s2
logical c(100), r2
double precision d(100), s3
complex e(100), s4
p1 = 1
!$acc parallel reduction(+:s1, s2, s3, s4) reduction(*:p1)
do i = 1, 100
s1 = s1 + a(i)
p1 = p1 * a(i)
s2 = s2 + b(i)
s3 = s3 + d(i)
s4 = s4 + e(i)
end do
!$acc end parallel
!$acc parallel reduction (max:r1)
do i = 1,10
if (r1 <= a(i)) then
r1 = a(i)
end if
end do
!$acc end parallel
!$acc parallel reduction (min:r1)
do i = 1,10
if (r1 >= a(i)) then
r1 = a(i)
end if
end do
!$acc end parallel
!$acc parallel reduction (iand:r1)
do i = 1,10
r1 = iand (r1, a(i))
end do
!$acc end parallel
!$acc parallel reduction (ior:r1)
do i = 1,10
r1 = ior (r1, a(i))
end do
!$acc end parallel
!$acc parallel reduction (ieor:r1)
do i = 1,10
r1 = ieor (r1, a(i))
end do
!$acc end parallel
!$acc parallel reduction (.and.:r2)
do i = 1,10
r2 = r2 .and. c(i)
end do
!$acc end parallel
!$acc parallel reduction (.or.:r2)
do i = 1,10
r2 = r2 .or. c(i)
end do
!$acc end parallel
!$acc parallel reduction (.eqv.:r2)
do i = 1,10
r2 = r2 .eqv. c(i)
end do
!$acc end parallel
!$acc parallel reduction (.neqv.:r2)
do i = 1,10
r2 = r2 .neqv. c(i)
end do
!$acc end parallel
!$acc serial reduction(+:s1, s2, s3, s4) reduction(*:p1)
do i = 1, 100
s1 = s1 + a(i)
p1 = p1 * a(i)
s2 = s2 + b(i)
s3 = s3 + d(i)
s4 = s4 + e(i)
end do
!$acc end serial
!$acc serial reduction (max:r1)
do i = 1,10
if (r1 <= a(i)) then
r1 = a(i)
end if
end do
!$acc end serial
!$acc serial reduction (min:r1)
do i = 1,10
if (r1 >= a(i)) then
r1 = a(i)
end if
end do
!$acc end serial
!$acc serial reduction (iand:r1)
do i = 1,10
r1 = iand (r1, a(i))
end do
!$acc end serial
!$acc serial reduction (ior:r1)
do i = 1,10
r1 = ior (r1, a(i))
end do
!$acc end serial
!$acc serial reduction (ieor:r1)
do i = 1,10
r1 = ieor (r1, a(i))
end do
!$acc end serial
!$acc serial reduction (.and.:r2)
do i = 1,10
r2 = r2 .and. c(i)
end do
!$acc end serial
!$acc serial reduction (.or.:r2)
do i = 1,10
r2 = r2 .or. c(i)
end do
!$acc end serial
!$acc serial reduction (.eqv.:r2)
do i = 1,10
r2 = r2 .eqv. c(i)
end do
!$acc end serial
!$acc serial reduction (.neqv.:r2)
do i = 1,10
r2 = r2 .neqv. c(i)
end do
!$acc end serial
end subroutine test
! { dg-final { scan-tree-dump-times "map\\(tofrom:s1 \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(tofrom:r1 \\\[len: \[0-9\]+\\\]\\)" 10 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(tofrom:r2 \\\[len: \[0-9\]+\\\]\\)" 8 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(tofrom:s2 \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(tofrom:s3 \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(tofrom:s4 \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(tofrom:p1 \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } }