mirror of git://gcc.gnu.org/git/gcc.git
reduction-dbl.c: New.
* libgomp.oacc-c-c++-common/reduction-dbl.c: New. * libgomp.oacc-c-c++-common/reduction-flt.c: New. * libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: Use typedef. * libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Use typedef. * libgomp.oacc-c-c++-common/reduction-2.c: Uncomment broken tests and fix. * libgomp.oacc-c-c++-common/reduction-3.c: Likewise. * libgomp.oacc-c-c++-common/reduction-4.c: Likewise. From-SVN: r230621
This commit is contained in:
parent
0398c18360
commit
2d25681660
|
|
@ -1,3 +1,14 @@
|
||||||
|
2015-11-19 Nathan Sidwell <nathan@codesourcery.com>
|
||||||
|
|
||||||
|
* libgomp.oacc-c-c++-common/reduction-dbl.c: New.
|
||||||
|
* libgomp.oacc-c-c++-common/reduction-flt.c: New.
|
||||||
|
* libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: Use typedef.
|
||||||
|
* libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Use typedef.
|
||||||
|
* libgomp.oacc-c-c++-common/reduction-2.c: Uncomment broken tests
|
||||||
|
and fix.
|
||||||
|
* libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
|
||||||
|
* libgomp.oacc-c-c++-common/reduction-4.c: Likewise.
|
||||||
|
|
||||||
2015-11-18 Nathan Sidwell <nathan@codesourcery.com>
|
2015-11-18 Nathan Sidwell <nathan@codesourcery.com>
|
||||||
|
|
||||||
* testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Add
|
* testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Add
|
||||||
|
|
|
||||||
|
|
@ -50,39 +50,37 @@ main(void)
|
||||||
|
|
||||||
if (fabs(result - vresult) > .0001)
|
if (fabs(result - vresult) > .0001)
|
||||||
abort ();
|
abort ();
|
||||||
// result = 0;
|
result = 0;
|
||||||
// vresult = 0;
|
vresult = 0;
|
||||||
//
|
|
||||||
// /* 'max' reductions. */
|
/* 'max' reductions. */
|
||||||
// #pragma acc parallel vector_length (vl)
|
#pragma acc parallel vector_length (vl) copy(result)
|
||||||
// #pragma acc loop reduction (+:result)
|
#pragma acc loop reduction (max:result)
|
||||||
// for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
// result = result > array[i] ? result : array[i];
|
result = result > array[i] ? result : array[i];
|
||||||
//
|
|
||||||
// /* Verify the reduction. */
|
/* Verify the reduction. */
|
||||||
// for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
// vresult = vresult > array[i] ? vresult : array[i];
|
vresult = vresult > array[i] ? vresult : array[i];
|
||||||
//
|
|
||||||
// printf("%d != %d\n", result, vresult);
|
if (result != vresult)
|
||||||
// if (result != vresult)
|
abort ();
|
||||||
// abort ();
|
|
||||||
//
|
result = 0;
|
||||||
// result = 0;
|
vresult = 0;
|
||||||
// vresult = 0;
|
|
||||||
//
|
/* 'min' reductions. */
|
||||||
// /* 'min' reductions. */
|
#pragma acc parallel vector_length (vl) copy(result)
|
||||||
// #pragma acc parallel vector_length (vl)
|
#pragma acc loop reduction (min:result)
|
||||||
// #pragma acc loop reduction (+:result)
|
for (i = 0; i < n; i++)
|
||||||
// for (i = 0; i < n; i++)
|
result = result < array[i] ? result : array[i];
|
||||||
// result = result < array[i] ? result : array[i];
|
|
||||||
//
|
/* Verify the reduction. */
|
||||||
// /* Verify the reduction. */
|
for (i = 0; i < n; i++)
|
||||||
// for (i = 0; i < n; i++)
|
vresult = vresult < array[i] ? vresult : array[i];
|
||||||
// vresult = vresult < array[i] ? vresult : array[i];
|
|
||||||
//
|
if (result != vresult)
|
||||||
// printf("%d != %d\n", result, vresult);
|
abort ();
|
||||||
// if (result != vresult)
|
|
||||||
// abort ();
|
|
||||||
|
|
||||||
result = 5;
|
result = 5;
|
||||||
vresult = 5;
|
vresult = 5;
|
||||||
|
|
|
||||||
|
|
@ -22,15 +22,15 @@ main(void)
|
||||||
result = 0;
|
result = 0;
|
||||||
vresult = 0;
|
vresult = 0;
|
||||||
|
|
||||||
/* '+' reductions. */
|
/* 'max' reductions. */
|
||||||
#pragma acc parallel vector_length (vl) copy(result)
|
#pragma acc parallel vector_length (vl) copy(result)
|
||||||
#pragma acc loop reduction (+:result)
|
#pragma acc loop reduction (max:result)
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
result += array[i];
|
result = result > array[i] ? result : array[i];
|
||||||
|
|
||||||
/* Verify the reduction. */
|
/* Verify the reduction. */
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
vresult += array[i];
|
vresult = vresult > array[i] ? vresult : array[i];
|
||||||
|
|
||||||
if (result != vresult)
|
if (result != vresult)
|
||||||
abort ();
|
abort ();
|
||||||
|
|
@ -38,51 +38,18 @@ main(void)
|
||||||
result = 0;
|
result = 0;
|
||||||
vresult = 0;
|
vresult = 0;
|
||||||
|
|
||||||
/* '*' reductions. */
|
/* 'min' reductions. */
|
||||||
#pragma acc parallel vector_length (vl) copy(result)
|
#pragma acc parallel vector_length (vl) copy(result)
|
||||||
#pragma acc loop reduction (*:result)
|
#pragma acc loop reduction (min:result)
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
result *= array[i];
|
result = result < array[i] ? result : array[i];
|
||||||
|
|
||||||
/* Verify the reduction. */
|
/* Verify the reduction. */
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
vresult *= array[i];
|
vresult = vresult < array[i] ? vresult : array[i];
|
||||||
|
|
||||||
if (fabs(result - vresult) > .0001)
|
if (result != vresult)
|
||||||
abort ();
|
abort ();
|
||||||
// result = 0;
|
|
||||||
// vresult = 0;
|
|
||||||
//
|
|
||||||
// /* 'max' reductions. */
|
|
||||||
// #pragma acc parallel vector_length (vl)
|
|
||||||
// #pragma acc loop reduction (+:result)
|
|
||||||
// for (i = 0; i < n; i++)
|
|
||||||
// result = result > array[i] ? result : array[i];
|
|
||||||
//
|
|
||||||
// /* Verify the reduction. */
|
|
||||||
// for (i = 0; i < n; i++)
|
|
||||||
// vresult = vresult > array[i] ? vresult : array[i];
|
|
||||||
//
|
|
||||||
// printf("%d != %d\n", result, vresult);
|
|
||||||
// if (result != vresult)
|
|
||||||
// abort ();
|
|
||||||
//
|
|
||||||
// result = 0;
|
|
||||||
// vresult = 0;
|
|
||||||
//
|
|
||||||
// /* 'min' reductions. */
|
|
||||||
// #pragma acc parallel vector_length (vl)
|
|
||||||
// #pragma acc loop reduction (+:result)
|
|
||||||
// for (i = 0; i < n; i++)
|
|
||||||
// result = result < array[i] ? result : array[i];
|
|
||||||
//
|
|
||||||
// /* Verify the reduction. */
|
|
||||||
// for (i = 0; i < n; i++)
|
|
||||||
// vresult = vresult < array[i] ? vresult : array[i];
|
|
||||||
//
|
|
||||||
// printf("%d != %d\n", result, vresult);
|
|
||||||
// if (result != vresult)
|
|
||||||
// abort ();
|
|
||||||
|
|
||||||
result = 5;
|
result = 5;
|
||||||
vresult = 5;
|
vresult = 5;
|
||||||
|
|
|
||||||
|
|
@ -23,76 +23,6 @@ main(void)
|
||||||
result = 0;
|
result = 0;
|
||||||
vresult = 0;
|
vresult = 0;
|
||||||
|
|
||||||
/* '+' reductions. */
|
|
||||||
#pragma acc parallel vector_length (vl) copy(result)
|
|
||||||
#pragma acc loop reduction (+:result)
|
|
||||||
for (i = 0; i < n; i++)
|
|
||||||
result += array[i];
|
|
||||||
|
|
||||||
/* Verify the reduction. */
|
|
||||||
for (i = 0; i < n; i++)
|
|
||||||
vresult += array[i];
|
|
||||||
|
|
||||||
if (result != vresult)
|
|
||||||
abort ();
|
|
||||||
|
|
||||||
result = 0;
|
|
||||||
vresult = 0;
|
|
||||||
|
|
||||||
/* Needs support for complex multiplication. */
|
|
||||||
|
|
||||||
// /* '*' reductions. */
|
|
||||||
// #pragma acc parallel vector_length (vl)
|
|
||||||
// #pragma acc loop reduction (*:result)
|
|
||||||
// for (i = 0; i < n; i++)
|
|
||||||
// result *= array[i];
|
|
||||||
//
|
|
||||||
// /* Verify the reduction. */
|
|
||||||
// for (i = 0; i < n; i++)
|
|
||||||
// vresult *= array[i];
|
|
||||||
//
|
|
||||||
// if (fabs(result - vresult) > .0001)
|
|
||||||
// abort ();
|
|
||||||
// result = 0;
|
|
||||||
// vresult = 0;
|
|
||||||
|
|
||||||
// /* 'max' reductions. */
|
|
||||||
// #pragma acc parallel vector_length (vl)
|
|
||||||
// #pragma acc loop reduction (+:result)
|
|
||||||
// for (i = 0; i < n; i++)
|
|
||||||
// result = result > array[i] ? result : array[i];
|
|
||||||
//
|
|
||||||
// /* Verify the reduction. */
|
|
||||||
// for (i = 0; i < n; i++)
|
|
||||||
// vresult = vresult > array[i] ? vresult : array[i];
|
|
||||||
//
|
|
||||||
// printf("%d != %d\n", result, vresult);
|
|
||||||
// if (result != vresult)
|
|
||||||
// abort ();
|
|
||||||
//
|
|
||||||
// result = 0;
|
|
||||||
// vresult = 0;
|
|
||||||
//
|
|
||||||
// /* 'min' reductions. */
|
|
||||||
// #pragma acc parallel vector_length (vl)
|
|
||||||
// #pragma acc loop reduction (+:result)
|
|
||||||
// for (i = 0; i < n; i++)
|
|
||||||
// result = result < array[i] ? result : array[i];
|
|
||||||
//
|
|
||||||
// /* Verify the reduction. */
|
|
||||||
// for (i = 0; i < n; i++)
|
|
||||||
// vresult = vresult < array[i] ? vresult : array[i];
|
|
||||||
//
|
|
||||||
// printf("%d != %d\n", result, vresult);
|
|
||||||
// if (result != vresult)
|
|
||||||
// abort ();
|
|
||||||
|
|
||||||
result = 5;
|
|
||||||
vresult = 5;
|
|
||||||
|
|
||||||
lresult = false;
|
|
||||||
lvresult = false;
|
|
||||||
|
|
||||||
/* '&&' reductions. */
|
/* '&&' reductions. */
|
||||||
#pragma acc parallel vector_length (vl) copy(lresult)
|
#pragma acc parallel vector_length (vl) copy(lresult)
|
||||||
#pragma acc loop reduction (&&:lresult)
|
#pragma acc loop reduction (&&:lresult)
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,11 @@
|
||||||
|
|
||||||
/* Double float has 53 bits of fraction. */
|
/* Double float has 53 bits of fraction. */
|
||||||
#define FRAC (1.0 / (1LL << 48))
|
#define FRAC (1.0 / (1LL << 48))
|
||||||
|
typedef double _Complex Type;
|
||||||
|
|
||||||
int close_enough (double _Complex a, double _Complex b)
|
int close_enough (Type a, Type b)
|
||||||
{
|
{
|
||||||
double _Complex diff = a - b;
|
Type diff = a - b;
|
||||||
double mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
|
double mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
|
||||||
double mag2_diff = (__real__(diff) * __real__ (diff)
|
double mag2_diff = (__real__(diff) * __real__ (diff)
|
||||||
+ __imag__ (diff) * __imag__ (diff));
|
+ __imag__ (diff) * __imag__ (diff));
|
||||||
|
|
@ -17,9 +18,9 @@ int close_enough (double _Complex a, double _Complex b)
|
||||||
#define N 100
|
#define N 100
|
||||||
|
|
||||||
static int __attribute__ ((noinline))
|
static int __attribute__ ((noinline))
|
||||||
vector (double _Complex ary[N], double _Complex sum, double _Complex prod)
|
vector (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
double _Complex tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
|
||||||
{
|
{
|
||||||
|
|
@ -41,9 +42,9 @@ vector (double _Complex ary[N], double _Complex sum, double _Complex prod)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __attribute__ ((noinline))
|
static int __attribute__ ((noinline))
|
||||||
worker (double _Complex ary[N], double _Complex sum, double _Complex prod)
|
worker (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
double _Complex tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
|
||||||
{
|
{
|
||||||
|
|
@ -65,9 +66,9 @@ worker (double _Complex ary[N], double _Complex sum, double _Complex prod)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __attribute__ ((noinline))
|
static int __attribute__ ((noinline))
|
||||||
gang (double _Complex ary[N], double _Complex sum, double _Complex prod)
|
gang (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
double _Complex tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
|
||||||
{
|
{
|
||||||
|
|
@ -90,7 +91,7 @@ gang (double _Complex ary[N], double _Complex sum, double _Complex prod)
|
||||||
|
|
||||||
int main (void)
|
int main (void)
|
||||||
{
|
{
|
||||||
double _Complex ary[N], sum = 0, prod = 1;
|
Type ary[N], sum = 0, prod = 1;
|
||||||
|
|
||||||
for (int ix = 0; ix < N; ix++)
|
for (int ix = 0; ix < N; ix++)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,11 @@
|
||||||
|
|
||||||
/* Single float has 23 bits of fraction. */
|
/* Single float has 23 bits of fraction. */
|
||||||
#define FRAC (1.0f / (1 << 20))
|
#define FRAC (1.0f / (1 << 20))
|
||||||
|
typedef float _Complex Type;
|
||||||
|
|
||||||
int close_enough (float _Complex a, float _Complex b)
|
int close_enough (Type a, Type b)
|
||||||
{
|
{
|
||||||
float _Complex diff = a - b;
|
Type diff = a - b;
|
||||||
float mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
|
float mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
|
||||||
float mag2_diff = (__real__(diff) * __real__ (diff)
|
float mag2_diff = (__real__(diff) * __real__ (diff)
|
||||||
+ __imag__ (diff) * __imag__ (diff));
|
+ __imag__ (diff) * __imag__ (diff));
|
||||||
|
|
@ -17,9 +18,9 @@ int close_enough (float _Complex a, float _Complex b)
|
||||||
#define N 100
|
#define N 100
|
||||||
|
|
||||||
static int __attribute__ ((noinline))
|
static int __attribute__ ((noinline))
|
||||||
vector (float _Complex ary[N], float _Complex sum, float _Complex prod)
|
vector (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
float _Complex tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
|
||||||
{
|
{
|
||||||
|
|
@ -41,9 +42,9 @@ vector (float _Complex ary[N], float _Complex sum, float _Complex prod)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __attribute__ ((noinline))
|
static int __attribute__ ((noinline))
|
||||||
worker (float _Complex ary[N], float _Complex sum, float _Complex prod)
|
worker (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
float _Complex tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
|
||||||
{
|
{
|
||||||
|
|
@ -65,9 +66,9 @@ worker (float _Complex ary[N], float _Complex sum, float _Complex prod)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __attribute__ ((noinline))
|
static int __attribute__ ((noinline))
|
||||||
gang (float _Complex ary[N], float _Complex sum, float _Complex prod)
|
gang (Type ary[N], Type sum, Type prod)
|
||||||
{
|
{
|
||||||
float _Complex tsum = 0, tprod = 1;
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
|
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
|
||||||
{
|
{
|
||||||
|
|
@ -90,7 +91,7 @@ gang (float _Complex ary[N], float _Complex sum, float _Complex prod)
|
||||||
|
|
||||||
int main (void)
|
int main (void)
|
||||||
{
|
{
|
||||||
float _Complex ary[N], sum = 0, prod = 1;
|
Type ary[N], sum = 0, prod = 1;
|
||||||
|
|
||||||
for (int ix = 0; ix < N; ix++)
|
for (int ix = 0; ix < N; ix++)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,112 @@
|
||||||
|
|
||||||
|
/* Double float has 53 bits of fraction. */
|
||||||
|
#define FRAC (1.0 / (1LL << 48))
|
||||||
|
typedef double Type;
|
||||||
|
|
||||||
|
int close_enough (Type a, Type b)
|
||||||
|
{
|
||||||
|
Type diff = a - b;
|
||||||
|
if (diff < 0)
|
||||||
|
diff = -diff;
|
||||||
|
|
||||||
|
return diff / a < FRAC;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define N 100
|
||||||
|
|
||||||
|
static int __attribute__ ((noinline))
|
||||||
|
vector (Type ary[N], Type sum, Type prod)
|
||||||
|
{
|
||||||
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
|
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
|
||||||
|
{
|
||||||
|
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
|
||||||
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
{
|
||||||
|
tsum += ary[ix];
|
||||||
|
tprod *= ary[ix];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!close_enough (sum, tsum))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (!close_enough (prod, tprod))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __attribute__ ((noinline))
|
||||||
|
worker (Type ary[N], Type sum, Type prod)
|
||||||
|
{
|
||||||
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
|
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
|
||||||
|
{
|
||||||
|
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
|
||||||
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
{
|
||||||
|
tsum += ary[ix];
|
||||||
|
tprod *= ary[ix];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!close_enough (sum, tsum))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (!close_enough (prod, tprod))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __attribute__ ((noinline))
|
||||||
|
gang (Type ary[N], Type sum, Type prod)
|
||||||
|
{
|
||||||
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
|
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
|
||||||
|
{
|
||||||
|
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
|
||||||
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
{
|
||||||
|
tsum += ary[ix];
|
||||||
|
tprod *= ary[ix];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!close_enough (sum, tsum))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (!close_enough (prod, tprod))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main (void)
|
||||||
|
{
|
||||||
|
Type ary[N], sum = 0, prod = 1;
|
||||||
|
|
||||||
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
{
|
||||||
|
float frac = ix * (1.0f / 1024) + 1.0f;
|
||||||
|
|
||||||
|
ary[ix] = frac;
|
||||||
|
sum += ary[ix];
|
||||||
|
prod *= ary[ix];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vector (ary, sum, prod))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (worker (ary, sum, prod))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (gang (ary, sum, prod))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,112 @@
|
||||||
|
|
||||||
|
/* Single float has 23 bits of fraction. */
|
||||||
|
#define FRAC (1.0f / (1 << 20))
|
||||||
|
typedef float Type;
|
||||||
|
|
||||||
|
int close_enough (Type a, Type b)
|
||||||
|
{
|
||||||
|
Type diff = a - b;
|
||||||
|
if (diff < 0)
|
||||||
|
diff = -diff;
|
||||||
|
|
||||||
|
return diff / a < FRAC;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define N 100
|
||||||
|
|
||||||
|
static int __attribute__ ((noinline))
|
||||||
|
vector (Type ary[N], Type sum, Type prod)
|
||||||
|
{
|
||||||
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
|
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
|
||||||
|
{
|
||||||
|
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
|
||||||
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
{
|
||||||
|
tsum += ary[ix];
|
||||||
|
tprod *= ary[ix];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!close_enough (sum, tsum))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (!close_enough (prod, tprod))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __attribute__ ((noinline))
|
||||||
|
worker (Type ary[N], Type sum, Type prod)
|
||||||
|
{
|
||||||
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
|
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
|
||||||
|
{
|
||||||
|
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
|
||||||
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
{
|
||||||
|
tsum += ary[ix];
|
||||||
|
tprod *= ary[ix];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!close_enough (sum, tsum))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (!close_enough (prod, tprod))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __attribute__ ((noinline))
|
||||||
|
gang (Type ary[N], Type sum, Type prod)
|
||||||
|
{
|
||||||
|
Type tsum = 0, tprod = 1;
|
||||||
|
|
||||||
|
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
|
||||||
|
{
|
||||||
|
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
|
||||||
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
{
|
||||||
|
tsum += ary[ix];
|
||||||
|
tprod *= ary[ix];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!close_enough (sum, tsum))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (!close_enough (prod, tprod))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main (void)
|
||||||
|
{
|
||||||
|
Type ary[N], sum = 0, prod = 1;
|
||||||
|
|
||||||
|
for (int ix = 0; ix < N; ix++)
|
||||||
|
{
|
||||||
|
float frac = ix * (1.0f / 1024) + 1.0f;
|
||||||
|
|
||||||
|
ary[ix] = frac;
|
||||||
|
sum += ary[ix];
|
||||||
|
prod *= ary[ix];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vector (ary, sum, prod))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (worker (ary, sum, prod))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (gang (ary, sum, prod))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue