mirror of git://gcc.gnu.org/git/gcc.git
omp-low.c (struct oacc_loop): Add 'inner' field.
gcc/ * omp-low.c (struct oacc_loop): Add 'inner' field. (new_oacc_loop_raw): Initialize it to zero. (oacc_loop_fixed_partitions): Initialize it. (oacc_loop_auto_partitions): Partition outermost loop to outermost available partitioning. gcc/testsuite/ * c-c++-common/goacc/loop-auto-1.c: Adjust expected warnings. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Adjust expected partitioning. From-SVN: r235756
This commit is contained in:
parent
87cdf04b1a
commit
5b37e8664b
|
|
@ -1,3 +1,11 @@
|
||||||
|
2016-05-02 Nathan Sidwell <nathan@codesourcery.com>
|
||||||
|
|
||||||
|
* omp-low.c (struct oacc_loop): Add 'inner' field.
|
||||||
|
(new_oacc_loop_raw): Initialize it to zero.
|
||||||
|
(oacc_loop_fixed_partitions): Initialize it.
|
||||||
|
(oacc_loop_auto_partitions): Partition outermost loop to outermost
|
||||||
|
available partitioning.
|
||||||
|
|
||||||
2016-05-02 Claudiu Zissulescu <claziss@synopsys.com>
|
2016-05-02 Claudiu Zissulescu <claziss@synopsys.com>
|
||||||
|
|
||||||
* config/arc/arc.md (mulsidi3): Change operand 0 predicate to
|
* config/arc/arc.md (mulsidi3): Change operand 0 predicate to
|
||||||
|
|
|
||||||
|
|
@ -241,6 +241,7 @@ struct oacc_loop
|
||||||
tree routine; /* Pseudo-loop enclosing a routine. */
|
tree routine; /* Pseudo-loop enclosing a routine. */
|
||||||
|
|
||||||
unsigned mask; /* Partitioning mask. */
|
unsigned mask; /* Partitioning mask. */
|
||||||
|
unsigned inner; /* Partitioning of inner loops. */
|
||||||
unsigned flags; /* Partitioning flags. */
|
unsigned flags; /* Partitioning flags. */
|
||||||
unsigned ifns; /* Contained loop abstraction functions. */
|
unsigned ifns; /* Contained loop abstraction functions. */
|
||||||
tree chunk_size; /* Chunk size. */
|
tree chunk_size; /* Chunk size. */
|
||||||
|
|
@ -18921,7 +18922,7 @@ new_oacc_loop_raw (oacc_loop *parent, location_t loc)
|
||||||
memset (loop->tails, 0, sizeof (loop->tails));
|
memset (loop->tails, 0, sizeof (loop->tails));
|
||||||
loop->routine = NULL_TREE;
|
loop->routine = NULL_TREE;
|
||||||
|
|
||||||
loop->mask = loop->flags = 0;
|
loop->mask = loop->flags = loop->inner = 0;
|
||||||
loop->ifns = 0;
|
loop->ifns = 0;
|
||||||
loop->chunk_size = 0;
|
loop->chunk_size = 0;
|
||||||
loop->head_end = NULL;
|
loop->head_end = NULL;
|
||||||
|
|
@ -19449,8 +19450,11 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
|
||||||
mask_all |= this_mask;
|
mask_all |= this_mask;
|
||||||
|
|
||||||
if (loop->child)
|
if (loop->child)
|
||||||
mask_all |= oacc_loop_fixed_partitions (loop->child,
|
{
|
||||||
outer_mask | this_mask);
|
loop->inner = oacc_loop_fixed_partitions (loop->child,
|
||||||
|
outer_mask | this_mask);
|
||||||
|
mask_all |= loop->inner;
|
||||||
|
}
|
||||||
|
|
||||||
if (loop->sibling)
|
if (loop->sibling)
|
||||||
mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
|
mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
|
||||||
|
|
@ -19466,7 +19470,7 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
|
||||||
static unsigned
|
static unsigned
|
||||||
oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
|
oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
|
||||||
{
|
{
|
||||||
unsigned inner_mask = 0;
|
bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
|
||||||
bool noisy = true;
|
bool noisy = true;
|
||||||
|
|
||||||
#ifdef ACCEL_COMPILER
|
#ifdef ACCEL_COMPILER
|
||||||
|
|
@ -19475,16 +19479,33 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
|
||||||
noisy = false;
|
noisy = false;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (loop->child)
|
if (assign && outer_mask < GOMP_DIM_MASK (GOMP_DIM_MAX - 1))
|
||||||
inner_mask |= oacc_loop_auto_partitions (loop->child,
|
|
||||||
outer_mask | loop->mask);
|
|
||||||
|
|
||||||
if ((loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT))
|
|
||||||
{
|
{
|
||||||
|
/* Allocate the outermost loop at the outermost available
|
||||||
|
level. */
|
||||||
|
unsigned this_mask = outer_mask + 1;
|
||||||
|
|
||||||
|
if (!(this_mask & loop->inner))
|
||||||
|
loop->mask = this_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loop->child)
|
||||||
|
{
|
||||||
|
unsigned child_mask = outer_mask | loop->mask;
|
||||||
|
|
||||||
|
if (loop->mask || assign)
|
||||||
|
child_mask |= GOMP_DIM_MASK (GOMP_DIM_MAX);
|
||||||
|
|
||||||
|
loop->inner = oacc_loop_auto_partitions (loop->child, child_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (assign && !loop->mask)
|
||||||
|
{
|
||||||
|
/* Allocate the loop at the innermost available level. */
|
||||||
unsigned this_mask = 0;
|
unsigned this_mask = 0;
|
||||||
|
|
||||||
/* Determine the outermost partitioning used within this loop. */
|
/* Determine the outermost partitioning used within this loop. */
|
||||||
this_mask = inner_mask | GOMP_DIM_MASK (GOMP_DIM_MAX);
|
this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
|
||||||
this_mask = (this_mask & -this_mask);
|
this_mask = (this_mask & -this_mask);
|
||||||
|
|
||||||
/* Pick the partitioning just inside that one. */
|
/* Pick the partitioning just inside that one. */
|
||||||
|
|
@ -19497,17 +19518,20 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
|
||||||
warning_at (loop->loc, 0,
|
warning_at (loop->loc, 0,
|
||||||
"insufficient partitioning available to parallelize loop");
|
"insufficient partitioning available to parallelize loop");
|
||||||
|
|
||||||
if (dump_file)
|
|
||||||
fprintf (dump_file, "Auto loop %s:%d assigned %d\n",
|
|
||||||
LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
|
|
||||||
this_mask);
|
|
||||||
|
|
||||||
loop->mask = this_mask;
|
loop->mask = this_mask;
|
||||||
}
|
}
|
||||||
inner_mask |= loop->mask;
|
|
||||||
|
if (assign && dump_file)
|
||||||
|
fprintf (dump_file, "Auto loop %s:%d assigned %d\n",
|
||||||
|
LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
|
||||||
|
loop->mask);
|
||||||
|
|
||||||
|
unsigned inner_mask = 0;
|
||||||
|
|
||||||
if (loop->sibling)
|
if (loop->sibling)
|
||||||
inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask);
|
inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask);
|
||||||
|
|
||||||
|
inner_mask |= loop->inner | loop->mask;
|
||||||
|
|
||||||
return inner_mask;
|
return inner_mask;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,7 @@
|
||||||
|
2016-05-02 Nathan Sidwell <nathan@codesourcery.com>
|
||||||
|
|
||||||
|
* c-c++-common/goacc/loop-auto-1.c: Adjust expected warnings.
|
||||||
|
|
||||||
2016-05-02 Marek Polacek <polacek@redhat.com>
|
2016-05-02 Marek Polacek <polacek@redhat.com>
|
||||||
|
|
||||||
PR c/70851
|
PR c/70851
|
||||||
|
|
|
||||||
|
|
@ -186,10 +186,10 @@ void Worker (void)
|
||||||
for (int jx = 0; jx < 10; jx++) {}
|
for (int jx = 0; jx < 10; jx++) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
|
#pragma acc loop auto
|
||||||
for (int ix = 0; ix < 10; ix++)
|
for (int ix = 0; ix < 10; ix++)
|
||||||
{
|
{
|
||||||
#pragma acc loop auto
|
#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
|
||||||
for (int jx = 0; jx < 10; jx++)
|
for (int jx = 0; jx < 10; jx++)
|
||||||
{
|
{
|
||||||
#pragma acc loop auto
|
#pragma acc loop auto
|
||||||
|
|
@ -214,10 +214,10 @@ void Vector (void)
|
||||||
#pragma acc loop auto
|
#pragma acc loop auto
|
||||||
for (int ix = 0; ix < 10; ix++) {}
|
for (int ix = 0; ix < 10; ix++) {}
|
||||||
|
|
||||||
#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
|
#pragma acc loop auto
|
||||||
for (int ix = 0; ix < 10; ix++)
|
for (int ix = 0; ix < 10; ix++)
|
||||||
{
|
{
|
||||||
#pragma acc loop auto
|
#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
|
||||||
for (int jx = 0; jx < 10; jx++) {}
|
for (int jx = 0; jx < 10; jx++) {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,8 @@
|
||||||
|
2016-05-02 Nathan Sidwell <nathan@codesourcery.com>
|
||||||
|
|
||||||
|
* testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Adjust
|
||||||
|
expected partitioning.
|
||||||
|
|
||||||
2016-04-29 Cesar Philippidis <cesar@codesourcery.com>
|
2016-04-29 Cesar Philippidis <cesar@codesourcery.com>
|
||||||
|
|
||||||
PR middle-end/70626
|
PR middle-end/70626
|
||||||
|
|
|
||||||
|
|
@ -103,9 +103,11 @@ int vector_1 (int *ary, int size)
|
||||||
|
|
||||||
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
|
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
|
||||||
{
|
{
|
||||||
|
#pragma acc loop gang
|
||||||
|
for (int jx = 0; jx < 1; jx++)
|
||||||
#pragma acc loop auto
|
#pragma acc loop auto
|
||||||
for (int ix = 0; ix < size; ix++)
|
for (int ix = 0; ix < size; ix++)
|
||||||
ary[ix] = place ();
|
ary[ix] = place ();
|
||||||
}
|
}
|
||||||
|
|
||||||
return check (ary, size, 0, 0, 1);
|
return check (ary, size, 0, 0, 1);
|
||||||
|
|
@ -118,7 +120,7 @@ int vector_2 (int *ary, int size)
|
||||||
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
|
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
|
||||||
{
|
{
|
||||||
#pragma acc loop worker
|
#pragma acc loop worker
|
||||||
for (int jx = 0; jx < size / 64; jx++)
|
for (int jx = 0; jx < size / 64; jx++)
|
||||||
#pragma acc loop auto
|
#pragma acc loop auto
|
||||||
for (int ix = 0; ix < 64; ix++)
|
for (int ix = 0; ix < 64; ix++)
|
||||||
ary[ix + jx * 64] = place ();
|
ary[ix + jx * 64] = place ();
|
||||||
|
|
@ -133,30 +135,16 @@ int worker_1 (int *ary, int size)
|
||||||
|
|
||||||
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
|
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
|
||||||
{
|
{
|
||||||
|
#pragma acc loop gang
|
||||||
|
for (int kx = 0; kx < 1; kx++)
|
||||||
#pragma acc loop auto
|
#pragma acc loop auto
|
||||||
for (int jx = 0; jx < size / 64; jx++)
|
for (int jx = 0; jx < size / 64; jx++)
|
||||||
#pragma acc loop vector
|
#pragma acc loop vector
|
||||||
for (int ix = 0; ix < 64; ix++)
|
for (int ix = 0; ix < 64; ix++)
|
||||||
ary[ix + jx * 64] = place ();
|
ary[ix + jx * 64] = place ();
|
||||||
}
|
}
|
||||||
|
|
||||||
return check (ary, size, 0, 1, 1);
|
return check (ary, size, 0, 1, 1);
|
||||||
}
|
|
||||||
|
|
||||||
int worker_2 (int *ary, int size)
|
|
||||||
{
|
|
||||||
clear (ary, size);
|
|
||||||
|
|
||||||
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
|
|
||||||
{
|
|
||||||
#pragma acc loop auto
|
|
||||||
for (int jx = 0; jx < size / 64; jx++)
|
|
||||||
#pragma acc loop auto
|
|
||||||
for (int ix = 0; ix < 64; ix++)
|
|
||||||
ary[ix + jx * 64] = place ();
|
|
||||||
}
|
|
||||||
|
|
||||||
return check (ary, size, 0, 1, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int gang_1 (int *ary, int size)
|
int gang_1 (int *ary, int size)
|
||||||
|
|
@ -193,6 +181,22 @@ int gang_2 (int *ary, int size)
|
||||||
return check (ary, size, 1, 1, 1);
|
return check (ary, size, 1, 1, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int gang_3 (int *ary, int size)
|
||||||
|
{
|
||||||
|
clear (ary, size);
|
||||||
|
|
||||||
|
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
|
||||||
|
{
|
||||||
|
#pragma acc loop auto
|
||||||
|
for (int jx = 0; jx < size / 64; jx++)
|
||||||
|
#pragma acc loop auto
|
||||||
|
for (int ix = 0; ix < 64; ix++)
|
||||||
|
ary[ix + jx * 64] = place ();
|
||||||
|
}
|
||||||
|
|
||||||
|
return check (ary, size, 1, 0, 1);
|
||||||
|
}
|
||||||
|
|
||||||
#define N (32*32*32)
|
#define N (32*32*32)
|
||||||
int main ()
|
int main ()
|
||||||
{
|
{
|
||||||
|
|
@ -214,13 +218,13 @@ int main ()
|
||||||
|
|
||||||
if (worker_1 (ary, N))
|
if (worker_1 (ary, N))
|
||||||
return 1;
|
return 1;
|
||||||
if (worker_2 (ary, N))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (gang_1 (ary, N))
|
if (gang_1 (ary, N))
|
||||||
return 1;
|
return 1;
|
||||||
if (gang_2 (ary, N))
|
if (gang_2 (ary, N))
|
||||||
return 1;
|
return 1;
|
||||||
|
if (gang_3 (ary, N))
|
||||||
|
return 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue