mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			Merge libgomp.oacc-c-c++-common/loop-reduction-*.c into libgomp.oacc-c-c++-common/reduction-7.c
libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c: Merge this file, and... * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c: ... this file into... * testsuite/libgomp.oacc-c-c++-common/reduction-7.c: ... this file. From-SVN: r234899
This commit is contained in:
		
							parent
							
								
									27f9add31b
								
							
						
					
					
						commit
						d97633ef65
					
				|  | @ -1,5 +1,34 @@ | |||
| 2016-04-12  Thomas Schwinge  <thomas@codesourcery.com> | ||||
| 
 | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c: | ||||
| 	Merge this file, and... | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c: | ||||
| 	... this file, and... | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c: | ||||
| 	... this file, and... | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c: | ||||
| 	... this file, and... | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c: | ||||
| 	... this file, and... | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c: | ||||
| 	... this file, and... | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c: | ||||
| 	... this file, and... | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c: | ||||
| 	... this file, and... | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c: | ||||
| 	... this file, and... | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c: | ||||
| 	... this file, and... | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c: | ||||
| 	... this file, and... | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c: | ||||
| 	... this file, and... | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c: | ||||
| 	... this file into... | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/reduction-7.c: ... this | ||||
| 	file. | ||||
| 
 | ||||
| 	* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c: | ||||
| 	Make failure observable. | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,45 +0,0 @@ | |||
| /* { dg-additional-options "-w" } */ | ||||
| 
 | ||||
| #include <assert.h> | ||||
| 
 | ||||
| /* Test of reduction on loop directive (gangs, non-private reduction
 | ||||
|    variable).  */ | ||||
| 
 | ||||
| int | ||||
| main (int argc, char *argv[]) | ||||
| { | ||||
|   int i, arr[1024], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       copy(res) | ||||
|   { | ||||
|     #pragma acc loop gang reduction(+:res) | ||||
|     for (i = 0; i < 1024; i++) | ||||
|       res += arr[i]; | ||||
|   } | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     hres += arr[i]; | ||||
| 
 | ||||
|   assert (res == hres); | ||||
| 
 | ||||
|   res = hres = 1; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       copy(res) | ||||
|   { | ||||
|     #pragma acc loop gang reduction(*:res) | ||||
|     for (i = 0; i < 12; i++) | ||||
|       res *= arr[i]; | ||||
|   } | ||||
| 
 | ||||
|   for (i = 0; i < 12; i++) | ||||
|     hres *= arr[i]; | ||||
| 
 | ||||
|   assert (res == hres); | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
|  | @ -1,30 +0,0 @@ | |||
| /* { dg-additional-options "-w" } */ | ||||
| 
 | ||||
| #include <assert.h> | ||||
| 
 | ||||
| /* Test of reduction on loop directive (gangs and vectors, non-private
 | ||||
|    reduction variable).  */ | ||||
| 
 | ||||
| int | ||||
| main (int argc, char *argv[]) | ||||
| { | ||||
|   int i, arr[1024], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       copy(res) | ||||
|   { | ||||
|     #pragma acc loop gang vector reduction(+:res) | ||||
|     for (i = 0; i < 1024; i++) | ||||
|       res += arr[i]; | ||||
|   } | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     hres += arr[i]; | ||||
| 
 | ||||
|   assert (res == hres); | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
|  | @ -1,30 +0,0 @@ | |||
| /* { dg-additional-options "-w" } */ | ||||
| 
 | ||||
| #include <assert.h> | ||||
| 
 | ||||
| /* Test of reduction on loop directive (gangs and workers, non-private
 | ||||
|    reduction variable).  */ | ||||
| 
 | ||||
| int | ||||
| main (int argc, char *argv[]) | ||||
| { | ||||
|   int i, arr[1024], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       copy(res) | ||||
|   { | ||||
|     #pragma acc loop gang worker reduction(+:res) | ||||
|     for (i = 0; i < 1024; i++) | ||||
|       res += arr[i]; | ||||
|   } | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     hres += arr[i]; | ||||
| 
 | ||||
|   assert (res == hres); | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
|  | @ -1,28 +0,0 @@ | |||
| #include <assert.h> | ||||
| 
 | ||||
| /* Test of reduction on loop directive (gangs, workers and vectors, non-private
 | ||||
|    reduction variable).  */ | ||||
| 
 | ||||
| int | ||||
| main (int argc, char *argv[]) | ||||
| { | ||||
|   int i, arr[1024], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       copy(res) | ||||
|   { | ||||
|     #pragma acc loop gang worker vector reduction(+:res) | ||||
|     for (i = 0; i < 1024; i++) | ||||
|       res += arr[i]; | ||||
|   } | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     hres += arr[i]; | ||||
| 
 | ||||
|   assert (res == hres); | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
|  | @ -1,34 +0,0 @@ | |||
| #include <assert.h> | ||||
| 
 | ||||
| /* Test of reduction on loop directive (gangs, workers and vectors, non-private
 | ||||
|    reduction variable: separate gang and worker/vector loops).  */ | ||||
| 
 | ||||
| int | ||||
| main (int argc, char *argv[]) | ||||
| { | ||||
|   int i, j, arr[32768], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 32768; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       copy(res) | ||||
|   { | ||||
|     #pragma acc loop gang reduction(+:res) | ||||
|     for (j = 0; j < 32; j++) | ||||
|       { | ||||
|         #pragma acc loop worker vector reduction(+:res) | ||||
|         for (i = 0; i < 1024; i++) | ||||
| 	  res += arr[j * 1024 + i]; | ||||
|       } | ||||
|     /* "res" is non-private, and is not available until after the parallel
 | ||||
|        region.  */ | ||||
|   } | ||||
| 
 | ||||
|   for (i = 0; i < 32768; i++) | ||||
|     hres += arr[i]; | ||||
| 
 | ||||
|   assert (res == hres); | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
|  | @ -1,33 +0,0 @@ | |||
| #include <assert.h> | ||||
| 
 | ||||
| /* Test of reduction on loop directive (gangs, workers and vectors, non-private
 | ||||
|    reduction variable: separate gang and worker/vector loops).  */ | ||||
| 
 | ||||
| int | ||||
| main (int argc, char *argv[]) | ||||
| { | ||||
|   int i, j; | ||||
|   double arr[32768], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 32768; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       copyin(arr) copy(res) | ||||
|   { | ||||
|     #pragma acc loop gang reduction(+:res) | ||||
|     for (j = 0; j < 32; j++) | ||||
|       { | ||||
|         #pragma acc loop worker vector reduction(+:res) | ||||
|         for (i = 0; i < 1024; i++) | ||||
| 	  res += arr[j * 1024 + i]; | ||||
|       } | ||||
|   } | ||||
| 
 | ||||
|   for (i = 0; i < 32768; i++) | ||||
|     hres += arr[i]; | ||||
| 
 | ||||
|   assert (res == hres); | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
|  | @ -1,55 +0,0 @@ | |||
| #include <assert.h> | ||||
| 
 | ||||
| /* Test of reduction on loop directive (gangs, workers and vectors, multiple
 | ||||
|    non-private reduction variables, float type).  */ | ||||
| 
 | ||||
| int | ||||
| main (int argc, char *argv[]) | ||||
| { | ||||
|   int i, j; | ||||
|   float arr[32768]; | ||||
|   float res = 0, mres = 0, hres = 0, hmres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 32768; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       copy(res, mres) | ||||
|   { | ||||
|     #pragma acc loop gang reduction(+:res) reduction(max:mres) | ||||
|     for (j = 0; j < 32; j++) | ||||
|       { | ||||
| 	#pragma acc loop worker vector reduction(+:res) reduction(max:mres) | ||||
| 	for (i = 0; i < 1024; i++) | ||||
| 	  { | ||||
| 	    res += arr[j * 1024 + i]; | ||||
| 	    if (arr[j * 1024 + i] > mres) | ||||
| 	      mres = arr[j * 1024 + i]; | ||||
| 	  } | ||||
| 
 | ||||
| 	#pragma acc loop worker vector reduction(+:res) reduction(max:mres) | ||||
| 	for (i = 0; i < 1024; i++) | ||||
| 	  { | ||||
| 	    res += arr[j * 1024 + (1023 - i)]; | ||||
| 	    if (arr[j * 1024 + (1023 - i)] > mres) | ||||
| 	      mres = arr[j * 1024 + (1023 - i)]; | ||||
| 	  } | ||||
|       } | ||||
|   } | ||||
| 
 | ||||
|   for (j = 0; j < 32; j++) | ||||
|     for (i = 0; i < 1024; i++) | ||||
|       { | ||||
|         hres += arr[j * 1024 + i]; | ||||
| 	hres += arr[j * 1024 + (1023 - i)]; | ||||
| 	if (arr[j * 1024 + i] > hmres) | ||||
| 	  hmres = arr[j * 1024 + i]; | ||||
| 	if (arr[j * 1024 + (1023 - i)] > hmres) | ||||
| 	  hmres = arr[j * 1024 + (1023 - i)]; | ||||
|       } | ||||
| 
 | ||||
|   assert (res == hres); | ||||
|   assert (mres == hmres); | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
|  | @ -1,43 +0,0 @@ | |||
| /* { dg-additional-options "-w" } */ | ||||
| 
 | ||||
| #include <assert.h> | ||||
| 
 | ||||
| /* Test of reduction on loop directive (vectors, private reduction
 | ||||
|    variable).  */ | ||||
| 
 | ||||
| int | ||||
| main (int argc, char *argv[]) | ||||
| { | ||||
|   int i, j, arr[1024], out[32], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       private(res) copyout(out) | ||||
|   { | ||||
|     #pragma acc loop gang | ||||
|     for (j = 0; j < 32; j++) | ||||
|       { | ||||
|         res = 0; | ||||
| 
 | ||||
| 	#pragma acc loop vector reduction(+:res) | ||||
| 	for (i = 0; i < 32; i++) | ||||
| 	  res += arr[j * 32 + i]; | ||||
| 
 | ||||
| 	out[j] = res; | ||||
|       } | ||||
|   } | ||||
| 
 | ||||
|   for (j = 0; j < 32; j++) | ||||
|     { | ||||
|       hres = 0; | ||||
| 
 | ||||
|       for (i = 0; i < 32; i++) | ||||
| 	hres += arr[j * 32 + i]; | ||||
| 
 | ||||
|       assert (out[j] == hres); | ||||
|     } | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
|  | @ -1,41 +0,0 @@ | |||
| #include <assert.h> | ||||
| 
 | ||||
| /* Test of reduction on loop directive (vector reduction in
 | ||||
|    gang-partitioned/worker-partitioned mode, private reduction variable).  */ | ||||
| 
 | ||||
| int | ||||
| main (int argc, char *argv[]) | ||||
| { | ||||
|   int i, j, k; | ||||
|   double ina[1024], inb[1024], out[1024], acc; | ||||
| 
 | ||||
|   for (j = 0; j < 32; j++) | ||||
|     for (i = 0; i < 32; i++) | ||||
|       { | ||||
|         ina[j * 32 + i] = (i == j) ? 2.0 : 0.0; | ||||
| 	inb[j * 32 + i] = (double) (i + j); | ||||
|       } | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       private(acc) copyin(ina, inb) copyout(out) | ||||
|   { | ||||
|     #pragma acc loop gang worker | ||||
|     for (k = 0; k < 32; k++) | ||||
|       for (j = 0; j < 32; j++) | ||||
|         { | ||||
| 	  acc = 0; | ||||
| 
 | ||||
| 	  #pragma acc loop vector reduction(+:acc) | ||||
| 	  for (i = 0; i < 32; i++) | ||||
| 	    acc += ina[k * 32 + i] * inb[i * 32 + j]; | ||||
| 
 | ||||
| 	  out[k * 32 + j] = acc; | ||||
| 	} | ||||
|   } | ||||
| 
 | ||||
|   for (j = 0; j < 32; j++) | ||||
|     for (i = 0; i < 32; i++) | ||||
|       assert (out[j * 32 + i] == (i + j) * 2); | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
|  | @ -1,43 +0,0 @@ | |||
| /* { dg-additional-options "-w" } */ | ||||
| 
 | ||||
| #include <assert.h> | ||||
| 
 | ||||
| /* Test of reduction on loop directive (workers, private reduction
 | ||||
|    variable).  */ | ||||
| 
 | ||||
| int | ||||
| main (int argc, char *argv[]) | ||||
| { | ||||
|   int i, j, arr[1024], out[32], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       private(res) copyout(out) | ||||
|   { | ||||
|     #pragma acc loop gang | ||||
|     for (j = 0; j < 32; j++) | ||||
|       { | ||||
|         res = 0; | ||||
| 
 | ||||
| 	#pragma acc loop worker reduction(+:res) | ||||
| 	for (i = 0; i < 32; i++) | ||||
| 	  res += arr[j * 32 + i]; | ||||
| 
 | ||||
| 	out[j] = res; | ||||
|       } | ||||
|   } | ||||
| 
 | ||||
|   for (j = 0; j < 32; j++) | ||||
|     { | ||||
|       hres = 0; | ||||
| 
 | ||||
|       for (i = 0; i < 32; i++) | ||||
| 	hres += arr[j * 32 + i]; | ||||
| 
 | ||||
|       assert (out[j] == hres); | ||||
|     } | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
|  | @ -1,41 +0,0 @@ | |||
| #include <assert.h> | ||||
| 
 | ||||
| /* Test of reduction on loop directive (workers and vectors, private reduction
 | ||||
|    variable).  */ | ||||
| 
 | ||||
| int | ||||
| main (int argc, char *argv[]) | ||||
| { | ||||
|   int i, j, arr[1024], out[32], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       private(res) copyout(out) | ||||
|   { | ||||
|     #pragma acc loop gang | ||||
|     for (j = 0; j < 32; j++) | ||||
|       { | ||||
|         res = 0; | ||||
| 
 | ||||
| 	#pragma acc loop worker vector reduction(+:res) | ||||
| 	for (i = 0; i < 32; i++) | ||||
| 	  res += arr[j * 32 + i]; | ||||
| 
 | ||||
| 	out[j] = res; | ||||
|       } | ||||
|   } | ||||
| 
 | ||||
|   for (j = 0; j < 32; j++) | ||||
|     { | ||||
|       hres = 0; | ||||
| 
 | ||||
|       for (i = 0; i < 32; i++) | ||||
| 	hres += arr[j * 32 + i]; | ||||
| 
 | ||||
|       assert (out[j] == hres); | ||||
|     } | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
|  | @ -1,45 +0,0 @@ | |||
| #include <assert.h> | ||||
| 
 | ||||
| /* Test of reduction on loop directive (workers and vectors, private reduction
 | ||||
|    variable).  */ | ||||
| 
 | ||||
| int | ||||
| main (int argc, char *argv[]) | ||||
| { | ||||
|   int i, j, arr[32768], out[32], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 32768; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       private(res) copyout(out) | ||||
|   { | ||||
|     #pragma acc loop gang | ||||
|     for (j = 0; j < 32; j++) | ||||
|       { | ||||
|         res = j; | ||||
| 
 | ||||
| 	#pragma acc loop worker reduction(+:res) | ||||
| 	for (i = 0; i < 1024; i++) | ||||
| 	  res += arr[j * 1024 + i]; | ||||
| 
 | ||||
| 	#pragma acc loop vector reduction(+:res) | ||||
| 	for (i = 1023; i >= 0; i--) | ||||
| 	  res += arr[j * 1024 + i]; | ||||
| 
 | ||||
| 	out[j] = res; | ||||
|       } | ||||
|   } | ||||
| 
 | ||||
|   for (j = 0; j < 32; j++) | ||||
|     { | ||||
|       hres = j; | ||||
| 
 | ||||
|       for (i = 0; i < 1024; i++) | ||||
| 	hres += arr[j * 1024 + i] * 2; | ||||
| 
 | ||||
|       assert (out[j] == hres); | ||||
|     } | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
|  | @ -1,38 +0,0 @@ | |||
| #include <assert.h> | ||||
| 
 | ||||
| /* Test of reduction on loop directive (workers and vectors, private reduction
 | ||||
|    variable: gang-redundant mode).  */ | ||||
| 
 | ||||
| int | ||||
| main (int argc, char *argv[]) | ||||
| { | ||||
|   int i, arr[1024], out[32], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     arr[i] = i ^ 33; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       private(res) copyin(arr) copyout(out) | ||||
|   { | ||||
|     /* Private variables aren't initialized by default in openacc.  */ | ||||
|     res = 0; | ||||
| 
 | ||||
|     /* "res" should be available at the end of the following loop (and should
 | ||||
|        have the same value redundantly in each gang).  */ | ||||
|     #pragma acc loop worker vector reduction(+:res) | ||||
|     for (i = 0; i < 1024; i++) | ||||
|       res += arr[i]; | ||||
| 
 | ||||
|     #pragma acc loop gang (static: 1) | ||||
|     for (i = 0; i < 32; i++) | ||||
|       out[i] = res; | ||||
|   } | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     hres += arr[i]; | ||||
| 
 | ||||
|   for (i = 0; i < 32; i++) | ||||
|     assert (out[i] == hres); | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
|  | @ -118,12 +118,363 @@ void gwv_np_1() | |||
| } | ||||
| 
 | ||||
| 
 | ||||
| /* Test of reduction on loop directive (gangs, workers and vectors, non-private
 | ||||
|    reduction variable: separate gang and worker/vector loops).  */ | ||||
| 
 | ||||
| void gwv_np_2() | ||||
| { | ||||
|   int i, j, arr[32768], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 32768; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       copy(res) | ||||
|   { | ||||
|     #pragma acc loop gang reduction(+:res) | ||||
|     for (j = 0; j < 32; j++) | ||||
|       { | ||||
|         #pragma acc loop worker vector reduction(+:res) | ||||
|         for (i = 0; i < 1024; i++) | ||||
| 	  res += arr[j * 1024 + i]; | ||||
|       } | ||||
|     /* "res" is non-private, and is not available until after the parallel
 | ||||
|        region.  */ | ||||
|   } | ||||
| 
 | ||||
|   for (i = 0; i < 32768; i++) | ||||
|     hres += arr[i]; | ||||
| 
 | ||||
|   assert (res == hres); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /* Test of reduction on loop directive (gangs, workers and vectors, non-private
 | ||||
|    reduction variable: separate gang and worker/vector loops).  */ | ||||
| 
 | ||||
| void gwv_np_3() | ||||
| { | ||||
|   int i, j; | ||||
|   double arr[32768], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 32768; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       copyin(arr) copy(res) | ||||
|   { | ||||
|     #pragma acc loop gang reduction(+:res) | ||||
|     for (j = 0; j < 32; j++) | ||||
|       { | ||||
|         #pragma acc loop worker vector reduction(+:res) | ||||
|         for (i = 0; i < 1024; i++) | ||||
| 	  res += arr[j * 1024 + i]; | ||||
|       } | ||||
|   } | ||||
| 
 | ||||
|   for (i = 0; i < 32768; i++) | ||||
|     hres += arr[i]; | ||||
| 
 | ||||
|   assert (res == hres); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /* Test of reduction on loop directive (gangs, workers and vectors, multiple
 | ||||
|    non-private reduction variables, float type).  */ | ||||
| 
 | ||||
| void gwv_np_4() | ||||
| { | ||||
|   int i, j; | ||||
|   float arr[32768]; | ||||
|   float res = 0, mres = 0, hres = 0, hmres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 32768; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       copy(res, mres) | ||||
|   { | ||||
|     #pragma acc loop gang reduction(+:res) reduction(max:mres) | ||||
|     for (j = 0; j < 32; j++) | ||||
|       { | ||||
| 	#pragma acc loop worker vector reduction(+:res) reduction(max:mres) | ||||
| 	for (i = 0; i < 1024; i++) | ||||
| 	  { | ||||
| 	    res += arr[j * 1024 + i]; | ||||
| 	    if (arr[j * 1024 + i] > mres) | ||||
| 	      mres = arr[j * 1024 + i]; | ||||
| 	  } | ||||
| 
 | ||||
| 	#pragma acc loop worker vector reduction(+:res) reduction(max:mres) | ||||
| 	for (i = 0; i < 1024; i++) | ||||
| 	  { | ||||
| 	    res += arr[j * 1024 + (1023 - i)]; | ||||
| 	    if (arr[j * 1024 + (1023 - i)] > mres) | ||||
| 	      mres = arr[j * 1024 + (1023 - i)]; | ||||
| 	  } | ||||
|       } | ||||
|   } | ||||
| 
 | ||||
|   for (j = 0; j < 32; j++) | ||||
|     for (i = 0; i < 1024; i++) | ||||
|       { | ||||
|         hres += arr[j * 1024 + i]; | ||||
| 	hres += arr[j * 1024 + (1023 - i)]; | ||||
| 	if (arr[j * 1024 + i] > hmres) | ||||
| 	  hmres = arr[j * 1024 + i]; | ||||
| 	if (arr[j * 1024 + (1023 - i)] > hmres) | ||||
| 	  hmres = arr[j * 1024 + (1023 - i)]; | ||||
|       } | ||||
| 
 | ||||
|   assert (res == hres); | ||||
|   assert (mres == hmres); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /* Test of reduction on loop directive (vectors, private reduction
 | ||||
|    variable).  */ | ||||
| 
 | ||||
| void v_p_1() | ||||
| { | ||||
|   int i, j, arr[1024], out[32], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       private(res) copyout(out) | ||||
|   { | ||||
|     #pragma acc loop gang | ||||
|     for (j = 0; j < 32; j++) | ||||
|       { | ||||
|         res = 0; | ||||
| 
 | ||||
| 	#pragma acc loop vector reduction(+:res) | ||||
| 	for (i = 0; i < 32; i++) | ||||
| 	  res += arr[j * 32 + i]; | ||||
| 
 | ||||
| 	out[j] = res; | ||||
|       } | ||||
|   } | ||||
| 
 | ||||
|   for (j = 0; j < 32; j++) | ||||
|     { | ||||
|       hres = 0; | ||||
| 
 | ||||
|       for (i = 0; i < 32; i++) | ||||
| 	hres += arr[j * 32 + i]; | ||||
| 
 | ||||
|       assert (out[j] == hres); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /* Test of reduction on loop directive (vector reduction in
 | ||||
|    gang-partitioned/worker-partitioned mode, private reduction variable).  */ | ||||
| 
 | ||||
| void v_p_2() | ||||
| { | ||||
|   int i, j, k; | ||||
|   double ina[1024], inb[1024], out[1024], acc; | ||||
| 
 | ||||
|   for (j = 0; j < 32; j++) | ||||
|     for (i = 0; i < 32; i++) | ||||
|       { | ||||
|         ina[j * 32 + i] = (i == j) ? 2.0 : 0.0; | ||||
| 	inb[j * 32 + i] = (double) (i + j); | ||||
|       } | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       private(acc) copyin(ina, inb) copyout(out) | ||||
|   { | ||||
|     #pragma acc loop gang worker | ||||
|     for (k = 0; k < 32; k++) | ||||
|       for (j = 0; j < 32; j++) | ||||
|         { | ||||
| 	  acc = 0; | ||||
| 
 | ||||
| 	  #pragma acc loop vector reduction(+:acc) | ||||
| 	  for (i = 0; i < 32; i++) | ||||
| 	    acc += ina[k * 32 + i] * inb[i * 32 + j]; | ||||
| 
 | ||||
| 	  out[k * 32 + j] = acc; | ||||
| 	} | ||||
|   } | ||||
| 
 | ||||
|   for (j = 0; j < 32; j++) | ||||
|     for (i = 0; i < 32; i++) | ||||
|       assert (out[j * 32 + i] == (i + j) * 2); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /* Test of reduction on loop directive (workers, private reduction
 | ||||
|    variable).  */ | ||||
| 
 | ||||
| void w_p_1() | ||||
| { | ||||
|   int i, j, arr[1024], out[32], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       private(res) copyout(out) | ||||
|   { | ||||
|     #pragma acc loop gang | ||||
|     for (j = 0; j < 32; j++) | ||||
|       { | ||||
|         res = 0; | ||||
| 
 | ||||
| 	#pragma acc loop worker reduction(+:res) | ||||
| 	for (i = 0; i < 32; i++) | ||||
| 	  res += arr[j * 32 + i]; | ||||
| 
 | ||||
| 	out[j] = res; | ||||
|       } | ||||
|   } | ||||
| 
 | ||||
|   for (j = 0; j < 32; j++) | ||||
|     { | ||||
|       hres = 0; | ||||
| 
 | ||||
|       for (i = 0; i < 32; i++) | ||||
| 	hres += arr[j * 32 + i]; | ||||
| 
 | ||||
|       assert (out[j] == hres); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /* Test of reduction on loop directive (workers and vectors, private reduction
 | ||||
|    variable).  */ | ||||
| 
 | ||||
| void wv_p_1() | ||||
| { | ||||
|   int i, j, arr[1024], out[32], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       private(res) copyout(out) | ||||
|   { | ||||
|     #pragma acc loop gang | ||||
|     for (j = 0; j < 32; j++) | ||||
|       { | ||||
|         res = 0; | ||||
| 
 | ||||
| 	#pragma acc loop worker vector reduction(+:res) | ||||
| 	for (i = 0; i < 32; i++) | ||||
| 	  res += arr[j * 32 + i]; | ||||
| 
 | ||||
| 	out[j] = res; | ||||
|       } | ||||
|   } | ||||
| 
 | ||||
|   for (j = 0; j < 32; j++) | ||||
|     { | ||||
|       hres = 0; | ||||
| 
 | ||||
|       for (i = 0; i < 32; i++) | ||||
| 	hres += arr[j * 32 + i]; | ||||
| 
 | ||||
|       assert (out[j] == hres); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /* Test of reduction on loop directive (workers and vectors, private reduction
 | ||||
|    variable).  */ | ||||
| 
 | ||||
| void wv_p_2() | ||||
| { | ||||
|   int i, j, arr[32768], out[32], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 32768; i++) | ||||
|     arr[i] = i; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       private(res) copyout(out) | ||||
|   { | ||||
|     #pragma acc loop gang | ||||
|     for (j = 0; j < 32; j++) | ||||
|       { | ||||
|         res = j; | ||||
| 
 | ||||
| 	#pragma acc loop worker reduction(+:res) | ||||
| 	for (i = 0; i < 1024; i++) | ||||
| 	  res += arr[j * 1024 + i]; | ||||
| 
 | ||||
| 	#pragma acc loop vector reduction(+:res) | ||||
| 	for (i = 1023; i >= 0; i--) | ||||
| 	  res += arr[j * 1024 + i]; | ||||
| 
 | ||||
| 	out[j] = res; | ||||
|       } | ||||
|   } | ||||
| 
 | ||||
|   for (j = 0; j < 32; j++) | ||||
|     { | ||||
|       hres = j; | ||||
| 
 | ||||
|       for (i = 0; i < 1024; i++) | ||||
| 	hres += arr[j * 1024 + i] * 2; | ||||
| 
 | ||||
|       assert (out[j] == hres); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /* Test of reduction on loop directive (workers and vectors, private reduction
 | ||||
|    variable: gang-redundant mode).  */ | ||||
| 
 | ||||
| void wv_p_3() | ||||
| { | ||||
|   int i, arr[1024], out[32], res = 0, hres = 0; | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     arr[i] = i ^ 33; | ||||
| 
 | ||||
|   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ | ||||
| 		       private(res) copyin(arr) copyout(out) | ||||
|   { | ||||
|     /* Private variables aren't initialized by default in openacc.  */ | ||||
|     res = 0; | ||||
| 
 | ||||
|     /* "res" should be available at the end of the following loop (and should
 | ||||
|        have the same value redundantly in each gang).  */ | ||||
|     #pragma acc loop worker vector reduction(+:res) | ||||
|     for (i = 0; i < 1024; i++) | ||||
|       res += arr[i]; | ||||
| 
 | ||||
|     #pragma acc loop gang (static: 1) | ||||
|     for (i = 0; i < 32; i++) | ||||
|       out[i] = res; | ||||
|   } | ||||
| 
 | ||||
|   for (i = 0; i < 1024; i++) | ||||
|     hres += arr[i]; | ||||
| 
 | ||||
|   for (i = 0; i < 32; i++) | ||||
|     assert (out[i] == hres); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int main() | ||||
| { | ||||
|   g_np_1(); | ||||
|   gv_np_1(); | ||||
|   gw_np_1(); | ||||
|   gwv_np_1(); | ||||
|   gwv_np_2(); | ||||
|   gwv_np_3(); | ||||
|   gwv_np_4(); | ||||
|   v_p_1(); | ||||
|   v_p_2(); | ||||
|   w_p_1(); | ||||
|   wv_p_1(); | ||||
|   wv_p_2(); | ||||
|   wv_p_3(); | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Thomas Schwinge
						Thomas Schwinge