mirror of git://gcc.gnu.org/git/gcc.git
re PR middle-end/70895 (OpenACC: loop reduction does not work. Output is zero.)
2016-08-18 Chung-Lin Tang <cltang@codesourcery.com> PR middle-end/70895 gcc/ * gimplify.c (omp_add_variable): Adjust/add variable mapping on enclosing parallel construct for reduction variables on OpenACC loop directives. gcc/testsuite/ * gfortran.dg/goacc/loop-tree-1.f90: Add gimple scan-tree-dump test. * c-c++-common/goacc/reduction-1.c: Likewise. * c-c++-common/goacc/reduction-2.c: Likewise. * c-c++-common/goacc/reduction-3.c: Likewise. * c-c++-common/goacc/reduction-4.c: Likewise. libgomp/ * testsuite/libgomp.oacc-fortran/reduction-7.f90: Add explicit firstprivate clauses. * testsuite/libgomp.oacc-fortran/reduction-6.f90: Remove explicit copy clauses. * testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-flt.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/collapse-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/collapse-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-dbl.c: Likewise. From-SVN: r239576
This commit is contained in:
parent
80c5ad359f
commit
2c71d454eb
|
|
@ -1,3 +1,10 @@
|
||||||
|
2016-08-18 Chung-Lin Tang <cltang@codesourcery.com>
|
||||||
|
|
||||||
|
PR middle-end/70895
|
||||||
|
* gimplify.c (omp_add_variable): Adjust/add variable mapping on
|
||||||
|
enclosing parallel construct for reduction variables on OpenACC loop
|
||||||
|
directives.
|
||||||
|
|
||||||
2016-08-18 Pierre-Marie de Rodat <derodat@adacore.com>
|
2016-08-18 Pierre-Marie de Rodat <derodat@adacore.com>
|
||||||
|
|
||||||
* dwarf2out.c (copy_dwarf_procedure): Remove obsolete comment.
|
* dwarf2out.c (copy_dwarf_procedure): Remove obsolete comment.
|
||||||
|
|
|
||||||
|
|
@ -6010,6 +6010,45 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
|
||||||
n->value |= flags;
|
n->value |= flags;
|
||||||
else
|
else
|
||||||
splay_tree_insert (ctx->variables, (splay_tree_key)decl, flags);
|
splay_tree_insert (ctx->variables, (splay_tree_key)decl, flags);
|
||||||
|
|
||||||
|
/* For reductions clauses in OpenACC loop directives, by default create a
|
||||||
|
copy clause on the enclosing parallel construct for carrying back the
|
||||||
|
results. */
|
||||||
|
if (ctx->region_type == ORT_ACC && (flags & GOVD_REDUCTION))
|
||||||
|
{
|
||||||
|
struct gimplify_omp_ctx *outer_ctx = ctx->outer_context;
|
||||||
|
while (outer_ctx)
|
||||||
|
{
|
||||||
|
n = splay_tree_lookup (outer_ctx->variables, (splay_tree_key)decl);
|
||||||
|
if (n != NULL)
|
||||||
|
{
|
||||||
|
/* Ignore local variables and explicitly declared clauses. */
|
||||||
|
if (n->value & (GOVD_LOCAL | GOVD_EXPLICIT))
|
||||||
|
break;
|
||||||
|
else if (outer_ctx->region_type == ORT_ACC_KERNELS)
|
||||||
|
{
|
||||||
|
/* According to the OpenACC spec, such a reduction variable
|
||||||
|
should already have a copy map on a kernels construct,
|
||||||
|
verify that here. */
|
||||||
|
gcc_assert (!(n->value & GOVD_FIRSTPRIVATE)
|
||||||
|
&& (n->value & GOVD_MAP));
|
||||||
|
}
|
||||||
|
else if (outer_ctx->region_type == ORT_ACC_PARALLEL)
|
||||||
|
{
|
||||||
|
/* Remove firstprivate and make it a copy map. */
|
||||||
|
n->value &= ~GOVD_FIRSTPRIVATE;
|
||||||
|
n->value |= GOVD_MAP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (outer_ctx->region_type == ORT_ACC_PARALLEL)
|
||||||
|
{
|
||||||
|
splay_tree_insert (outer_ctx->variables, (splay_tree_key)decl,
|
||||||
|
GOVD_MAP | GOVD_SEEN);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
outer_ctx = outer_ctx->outer_context;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Notice a threadprivate variable DECL used in OMP context CTX.
|
/* Notice a threadprivate variable DECL used in OMP context CTX.
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,12 @@
|
||||||
|
2016-08-18 Chung-Lin Tang <cltang@codesourcery.com>
|
||||||
|
|
||||||
|
PR middle-end/70895
|
||||||
|
* gfortran.dg/goacc/loop-tree-1.f90: Add gimple scan-tree-dump test.
|
||||||
|
* c-c++-common/goacc/reduction-1.c: Likewise.
|
||||||
|
* c-c++-common/goacc/reduction-2.c: Likewise.
|
||||||
|
* c-c++-common/goacc/reduction-3.c: Likewise.
|
||||||
|
* c-c++-common/goacc/reduction-4.c: Likewise.
|
||||||
|
|
||||||
2016-08-18 Alan Modra <amodra@gmail.com>
|
2016-08-18 Alan Modra <amodra@gmail.com>
|
||||||
|
|
||||||
* gcc.c-torture/compile/pr72771.c: New.
|
* gcc.c-torture/compile/pr72771.c: New.
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
/* { dg-additional-options "-fdump-tree-gimple" } */
|
||||||
/* Integer reductions. */
|
/* Integer reductions. */
|
||||||
|
|
||||||
#define n 1000
|
#define n 1000
|
||||||
|
|
@ -65,3 +66,7 @@ main(void)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check that default copy maps are generated for loop reductions. */
|
||||||
|
/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 7 "gimple" } } */
|
||||||
|
/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
/* { dg-additional-options "-fdump-tree-gimple" } */
|
||||||
/* float reductions. */
|
/* float reductions. */
|
||||||
|
|
||||||
#define n 1000
|
#define n 1000
|
||||||
|
|
@ -47,3 +48,7 @@ main(void)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check that default copy maps are generated for loop reductions. */
|
||||||
|
/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 4 "gimple" } } */
|
||||||
|
/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
/* { dg-additional-options "-fdump-tree-gimple" } */
|
||||||
/* double reductions. */
|
/* double reductions. */
|
||||||
|
|
||||||
#define n 1000
|
#define n 1000
|
||||||
|
|
@ -47,3 +48,7 @@ main(void)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check that default copy maps are generated for loop reductions. */
|
||||||
|
/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 4 "gimple" } } */
|
||||||
|
/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
/* { dg-additional-options "-fdump-tree-gimple" } */
|
||||||
/* complex reductions. */
|
/* complex reductions. */
|
||||||
|
|
||||||
#define n 1000
|
#define n 1000
|
||||||
|
|
@ -35,3 +36,7 @@ main(void)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check that default copy maps are generated for loop reductions. */
|
||||||
|
/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
|
||||||
|
/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
! { dg-additional-options "-fdump-tree-original -std=f2008" }
|
! { dg-additional-options "-fdump-tree-original -fdump-tree-gimple -std=f2008" }
|
||||||
|
|
||||||
! test for tree-dump-original and spaces-commas
|
! test for tree-dump-original and spaces-commas
|
||||||
|
|
||||||
|
|
@ -44,3 +44,4 @@ end program test
|
||||||
|
|
||||||
! { dg-final { scan-tree-dump-times "private\\(m\\)" 1 "original" } }
|
! { dg-final { scan-tree-dump-times "private\\(m\\)" 1 "original" } }
|
||||||
! { dg-final { scan-tree-dump-times "reduction\\(\\+:sum\\)" 1 "original" } }
|
! { dg-final { scan-tree-dump-times "reduction\\(\\+:sum\\)" 1 "original" } }
|
||||||
|
! { dg-final { scan-tree-dump-times "map\\(tofrom:sum \\\[len: \[0-9\]+\\\]\\)" 1 "gimple" } }
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,23 @@
|
||||||
|
2016-08-18 Chung-Lin Tang <cltang@codesourcery.com>
|
||||||
|
|
||||||
|
PR middle-end/70895
|
||||||
|
* testsuite/libgomp.oacc-fortran/reduction-7.f90: Add explicit
|
||||||
|
firstprivate clauses.
|
||||||
|
* testsuite/libgomp.oacc-fortran/reduction-6.f90: Remove explicit
|
||||||
|
copy clauses.
|
||||||
|
* testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Likewise.
|
||||||
|
* testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Likewise.
|
||||||
|
* testsuite/libgomp.oacc-c-c++-common/reduction-flt.c: Likewise.
|
||||||
|
* testsuite/libgomp.oacc-c-c++-common/collapse-2.c: Likewise.
|
||||||
|
* testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Likewise.
|
||||||
|
* testsuite/libgomp.oacc-c-c++-common/collapse-4.c: Likewise.
|
||||||
|
* testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Likewise.
|
||||||
|
* testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: Likewise.
|
||||||
|
* testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c: Likewise.
|
||||||
|
* testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Likewise.
|
||||||
|
* testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise.
|
||||||
|
* testsuite/libgomp.oacc-c-c++-common/reduction-dbl.c: Likewise.
|
||||||
|
|
||||||
2016-08-14 Chung-Lin Tang <cltang@codesourcery.com>
|
2016-08-14 Chung-Lin Tang <cltang@codesourcery.com>
|
||||||
|
|
||||||
PR fortran/70598
|
PR fortran/70598
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ main (void)
|
||||||
int i, j, k, l = 0, f = 0, x = 0;
|
int i, j, k, l = 0, f = 0, x = 0;
|
||||||
int m1 = 4, m2 = -5, m3 = 17;
|
int m1 = 4, m2 = -5, m3 = 17;
|
||||||
|
|
||||||
#pragma acc parallel copy(l)
|
#pragma acc parallel
|
||||||
#pragma acc loop seq collapse(3) reduction(+:l)
|
#pragma acc loop seq collapse(3) reduction(+:l)
|
||||||
for (i = -2; i < m1; i++)
|
for (i = -2; i < m1; i++)
|
||||||
for (j = m2; j < -2; j++)
|
for (j = m2; j < -2; j++)
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ main (void)
|
||||||
|
|
||||||
memset (b, '\0', sizeof (b));
|
memset (b, '\0', sizeof (b));
|
||||||
|
|
||||||
#pragma acc parallel copy(b[0:3][0:3]) copy(l)
|
#pragma acc parallel copy(b[0:3][0:3])
|
||||||
{
|
{
|
||||||
#pragma acc loop collapse(2) reduction(+:l)
|
#pragma acc loop collapse(2) reduction(+:l)
|
||||||
for (i = 0; i < 2; i++)
|
for (i = 0; i < 2; i++)
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ int main ()
|
||||||
int ondev = 0;
|
int ondev = 0;
|
||||||
int t = 0, h = 0;
|
int t = 0, h = 0;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs(32) vector_length(32) copy(t) copy(ondev)
|
#pragma acc parallel num_gangs(32) vector_length(32) copy(ondev)
|
||||||
{
|
{
|
||||||
#pragma acc loop gang reduction (+:t)
|
#pragma acc loop gang reduction (+:t)
|
||||||
for (unsigned ix = 0; ix < N; ix++)
|
for (unsigned ix = 0; ix < N; ix++)
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ int main ()
|
||||||
int ondev = 0;
|
int ondev = 0;
|
||||||
int t = 0, h = 0;
|
int t = 0, h = 0;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(t) copy(ondev)
|
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ondev)
|
||||||
{
|
{
|
||||||
#pragma acc loop gang worker vector reduction(+:t)
|
#pragma acc loop gang worker vector reduction(+:t)
|
||||||
for (unsigned ix = 0; ix < N; ix++)
|
for (unsigned ix = 0; ix < N; ix++)
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ int main ()
|
||||||
int ondev = 0;
|
int ondev = 0;
|
||||||
int t = 0, h = 0;
|
int t = 0, h = 0;
|
||||||
|
|
||||||
#pragma acc parallel vector_length(32) copy(t) copy(ondev)
|
#pragma acc parallel vector_length(32) copy(ondev)
|
||||||
{
|
{
|
||||||
#pragma acc loop vector reduction (+:t)
|
#pragma acc loop vector reduction (+:t)
|
||||||
for (unsigned ix = 0; ix < N; ix++)
|
for (unsigned ix = 0; ix < N; ix++)
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ int main ()
|
||||||
int ondev = 0;
|
int ondev = 0;
|
||||||
int t = 0, h = 0;
|
int t = 0, h = 0;
|
||||||
|
|
||||||
#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev)
|
#pragma acc parallel num_workers(32) vector_length(32) copy(ondev)
|
||||||
{
|
{
|
||||||
#pragma acc loop worker reduction(+:t)
|
#pragma acc loop worker reduction(+:t)
|
||||||
for (unsigned ix = 0; ix < N; ix++)
|
for (unsigned ix = 0; ix < N; ix++)
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ int main ()
|
||||||
int ondev = 0;
|
int ondev = 0;
|
||||||
int t = 0, h = 0;
|
int t = 0, h = 0;
|
||||||
|
|
||||||
#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev)
|
#pragma acc parallel num_workers(32) vector_length(32) copy(ondev)
|
||||||
{
|
{
|
||||||
#pragma acc loop worker vector reduction (+:t)
|
#pragma acc loop worker vector reduction (+:t)
|
||||||
for (unsigned ix = 0; ix < N; ix++)
|
for (unsigned ix = 0; ix < N; ix++)
|
||||||
|
|
|
||||||
|
|
@ -13,8 +13,7 @@ void g_np_1()
|
||||||
for (i = 0; i < 1024; i++)
|
for (i = 0; i < 1024; i++)
|
||||||
arr[i] = i;
|
arr[i] = i;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
|
||||||
copy(res)
|
|
||||||
{
|
{
|
||||||
#pragma acc loop gang reduction(+:res)
|
#pragma acc loop gang reduction(+:res)
|
||||||
for (i = 0; i < 1024; i++)
|
for (i = 0; i < 1024; i++)
|
||||||
|
|
@ -28,8 +27,7 @@ void g_np_1()
|
||||||
|
|
||||||
res = hres = 1;
|
res = hres = 1;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
|
||||||
copy(res)
|
|
||||||
{
|
{
|
||||||
#pragma acc loop gang reduction(*:res)
|
#pragma acc loop gang reduction(*:res)
|
||||||
for (i = 0; i < 12; i++)
|
for (i = 0; i < 12; i++)
|
||||||
|
|
@ -53,8 +51,7 @@ void gv_np_1()
|
||||||
for (i = 0; i < 1024; i++)
|
for (i = 0; i < 1024; i++)
|
||||||
arr[i] = i;
|
arr[i] = i;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
|
||||||
copy(res)
|
|
||||||
{
|
{
|
||||||
#pragma acc loop gang vector reduction(+:res)
|
#pragma acc loop gang vector reduction(+:res)
|
||||||
for (i = 0; i < 1024; i++)
|
for (i = 0; i < 1024; i++)
|
||||||
|
|
@ -78,8 +75,7 @@ void gw_np_1()
|
||||||
for (i = 0; i < 1024; i++)
|
for (i = 0; i < 1024; i++)
|
||||||
arr[i] = i;
|
arr[i] = i;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
|
||||||
copy(res)
|
|
||||||
{
|
{
|
||||||
#pragma acc loop gang worker reduction(+:res)
|
#pragma acc loop gang worker reduction(+:res)
|
||||||
for (i = 0; i < 1024; i++)
|
for (i = 0; i < 1024; i++)
|
||||||
|
|
@ -103,8 +99,7 @@ void gwv_np_1()
|
||||||
for (i = 0; i < 1024; i++)
|
for (i = 0; i < 1024; i++)
|
||||||
arr[i] = i;
|
arr[i] = i;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
|
||||||
copy(res)
|
|
||||||
{
|
{
|
||||||
#pragma acc loop gang worker vector reduction(+:res)
|
#pragma acc loop gang worker vector reduction(+:res)
|
||||||
for (i = 0; i < 1024; i++)
|
for (i = 0; i < 1024; i++)
|
||||||
|
|
@ -128,8 +123,7 @@ void gwv_np_2()
|
||||||
for (i = 0; i < 32768; i++)
|
for (i = 0; i < 32768; i++)
|
||||||
arr[i] = i;
|
arr[i] = i;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
|
||||||
copy(res)
|
|
||||||
{
|
{
|
||||||
#pragma acc loop gang reduction(+:res)
|
#pragma acc loop gang reduction(+:res)
|
||||||
for (j = 0; j < 32; j++)
|
for (j = 0; j < 32; j++)
|
||||||
|
|
@ -161,7 +155,7 @@ void gwv_np_3()
|
||||||
arr[i] = i;
|
arr[i] = i;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||||
copyin(arr) copy(res)
|
copyin(arr)
|
||||||
{
|
{
|
||||||
#pragma acc loop gang reduction(+:res)
|
#pragma acc loop gang reduction(+:res)
|
||||||
for (j = 0; j < 32; j++)
|
for (j = 0; j < 32; j++)
|
||||||
|
|
@ -191,8 +185,7 @@ void gwv_np_4()
|
||||||
for (i = 0; i < 32768; i++)
|
for (i = 0; i < 32768; i++)
|
||||||
arr[i] = i;
|
arr[i] = i;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
|
||||||
copy(res, mres)
|
|
||||||
{
|
{
|
||||||
#pragma acc loop gang reduction(+:res) reduction(max:mres)
|
#pragma acc loop gang reduction(+:res) reduction(max:mres)
|
||||||
for (j = 0; j < 32; j++)
|
for (j = 0; j < 32; j++)
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ vector (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
Type tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel vector_length(32) copyin(ary[0:N])
|
||||||
{
|
{
|
||||||
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
|
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
|
||||||
for (int ix = 0; ix < N; ix++)
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
|
@ -46,7 +46,7 @@ worker (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
Type tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel num_workers(32) copyin(ary[0:N])
|
||||||
{
|
{
|
||||||
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
|
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
|
||||||
for (int ix = 0; ix < N; ix++)
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
|
@ -70,7 +70,7 @@ gang (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
Type tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel num_gangs (32) copyin(ary[0:N])
|
||||||
{
|
{
|
||||||
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
|
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
|
||||||
for (int ix = 0; ix < N; ix++)
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ vector (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
Type tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel vector_length(32) copyin(ary[0:N])
|
||||||
{
|
{
|
||||||
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
|
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
|
||||||
for (int ix = 0; ix < N; ix++)
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
|
@ -46,7 +46,7 @@ worker (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
Type tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel num_workers(32) copyin(ary[0:N])
|
||||||
{
|
{
|
||||||
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
|
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
|
||||||
for (int ix = 0; ix < N; ix++)
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
|
@ -70,7 +70,7 @@ gang (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
Type tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel num_gangs (32) copyin(ary[0:N])
|
||||||
{
|
{
|
||||||
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
|
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
|
||||||
for (int ix = 0; ix < N; ix++)
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ vector (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
Type tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel vector_length(32) copyin(ary[0:N])
|
||||||
{
|
{
|
||||||
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
|
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
|
||||||
for (int ix = 0; ix < N; ix++)
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
|
@ -43,7 +43,7 @@ worker (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
Type tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel num_workers(32) copyin(ary[0:N])
|
||||||
{
|
{
|
||||||
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
|
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
|
||||||
for (int ix = 0; ix < N; ix++)
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
|
@ -67,7 +67,7 @@ gang (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
Type tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel num_gangs (32) copyin(ary[0:N])
|
||||||
{
|
{
|
||||||
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
|
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
|
||||||
for (int ix = 0; ix < N; ix++)
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ vector (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
Type tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel vector_length(32) copyin(ary[0:N])
|
||||||
{
|
{
|
||||||
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
|
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
|
||||||
for (int ix = 0; ix < N; ix++)
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
|
@ -43,7 +43,7 @@ worker (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
Type tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel num_workers(32) copyin(ary[0:N])
|
||||||
{
|
{
|
||||||
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
|
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
|
||||||
for (int ix = 0; ix < N; ix++)
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
|
@ -67,7 +67,7 @@ gang (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
Type tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel num_gangs (32) copyin(ary[0:N])
|
||||||
{
|
{
|
||||||
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
|
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
|
||||||
for (int ix = 0; ix < N; ix++)
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ program reduction
|
||||||
hs1 = 0
|
hs1 = 0
|
||||||
hs2 = 0
|
hs2 = 0
|
||||||
|
|
||||||
!$acc parallel num_gangs (1000) copy(gs1, gs2)
|
!$acc parallel num_gangs (1000)
|
||||||
!$acc loop reduction(+:gs1, gs2) gang
|
!$acc loop reduction(+:gs1, gs2) gang
|
||||||
do i = 1, n
|
do i = 1, n
|
||||||
gs1 = gs1 + 1
|
gs1 = gs1 + 1
|
||||||
|
|
@ -27,7 +27,7 @@ program reduction
|
||||||
end do
|
end do
|
||||||
!$acc end parallel
|
!$acc end parallel
|
||||||
|
|
||||||
!$acc parallel num_workers (4) vector_length (32) copy(ws1, ws2)
|
!$acc parallel num_workers (4) vector_length (32)
|
||||||
!$acc loop reduction(+:ws1, ws2) worker
|
!$acc loop reduction(+:ws1, ws2) worker
|
||||||
do i = 1, n
|
do i = 1, n
|
||||||
ws1 = ws1 + 1
|
ws1 = ws1 + 1
|
||||||
|
|
@ -35,7 +35,7 @@ program reduction
|
||||||
end do
|
end do
|
||||||
!$acc end parallel
|
!$acc end parallel
|
||||||
|
|
||||||
!$acc parallel vector_length (32) copy(vs1, vs2)
|
!$acc parallel vector_length (32)
|
||||||
!$acc loop reduction(+:vs1, vs2) vector
|
!$acc loop reduction(+:vs1, vs2) vector
|
||||||
do i = 1, n
|
do i = 1, n
|
||||||
vs1 = vs1 + 1
|
vs1 = vs1 + 1
|
||||||
|
|
@ -43,7 +43,7 @@ program reduction
|
||||||
end do
|
end do
|
||||||
!$acc end parallel
|
!$acc end parallel
|
||||||
|
|
||||||
!$acc parallel num_gangs(8) num_workers(4) vector_length(32) copy(cs1, cs2)
|
!$acc parallel num_gangs(8) num_workers(4) vector_length(32)
|
||||||
!$acc loop reduction(+:cs1, cs2) gang worker vector
|
!$acc loop reduction(+:cs1, cs2) gang worker vector
|
||||||
do i = 1, n
|
do i = 1, n
|
||||||
cs1 = cs1 + 1
|
cs1 = cs1 + 1
|
||||||
|
|
@ -74,7 +74,7 @@ program reduction
|
||||||
red = 0
|
red = 0
|
||||||
vred = 0
|
vred = 0
|
||||||
|
|
||||||
!$acc parallel num_gangs(10) vector_length(32) copy(red)
|
!$acc parallel num_gangs(10) vector_length(32)
|
||||||
!$acc loop reduction(+:red) gang
|
!$acc loop reduction(+:red) gang
|
||||||
do i = 1, n/chunksize
|
do i = 1, n/chunksize
|
||||||
!$acc loop reduction(+:red) vector
|
!$acc loop reduction(+:red) vector
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ subroutine redsub_private(sum, n, arr)
|
||||||
end subroutine redsub_private
|
end subroutine redsub_private
|
||||||
|
|
||||||
|
|
||||||
! Bogus reduction on an impliclitly firstprivate variable. The results do
|
! Bogus reduction on a firstprivate variable. The results do
|
||||||
! survive the parallel region. The goal here is to ensure that gfortran
|
! survive the parallel region. The goal here is to ensure that gfortran
|
||||||
! doesn't ICE.
|
! doesn't ICE.
|
||||||
|
|
||||||
|
|
@ -58,7 +58,7 @@ subroutine redsub_bogus(sum, n)
|
||||||
integer :: sum, n, arr(n)
|
integer :: sum, n, arr(n)
|
||||||
integer :: i
|
integer :: i
|
||||||
|
|
||||||
!$acc parallel
|
!$acc parallel firstprivate(sum)
|
||||||
!$acc loop gang worker vector reduction (+:sum)
|
!$acc loop gang worker vector reduction (+:sum)
|
||||||
do i = 1, n
|
do i = 1, n
|
||||||
sum = sum + 1
|
sum = sum + 1
|
||||||
|
|
@ -72,7 +72,7 @@ subroutine redsub_combined(sum, n, arr)
|
||||||
integer :: sum, n, arr(n)
|
integer :: sum, n, arr(n)
|
||||||
integer :: i, j
|
integer :: i, j
|
||||||
|
|
||||||
!$acc parallel copy (arr)
|
!$acc parallel copy (arr) firstprivate(sum)
|
||||||
!$acc loop gang
|
!$acc loop gang
|
||||||
do i = 1, n
|
do i = 1, n
|
||||||
sum = i;
|
sum = i;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue