mirror of git://gcc.gnu.org/git/gcc.git
Add changes to profiling interface from OG8 branch
This bundles up the parts of the profiling code from the OG8 branch that were not included in the upstream patch. libgomp/ChangeLog * Makefile.am (libgomp_la_SOURCES): Add oacc-profiling-acc_register_library.c. * Makefile.in: Regenerate. * libgomp.texi: Remove paragraph about acc_register_library. * oacc-init.c (get_property_any): Add profiling code. * oacc-parallel.c (GOACC_parallel_keyed_internal): Set device_api for profiling. * oacc-profiling-acc_register_library.c: New file. * oacc-profiling.c (goacc_profiling_initialize): Call acc_register_library. Avoid duplicate registration. (acc_register_library): Remove. * config/nvptx/oacc-profiling-acc_register_library.c: New empty file. * config/nvptx/oacc-profiling.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: Remove call to acc_register_library. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. Co-Authored-By: Maciej W. Rozycki <macro@codesourcery.com>
This commit is contained in:
parent
4566c9843f
commit
63caf6bc2f
|
|
@ -70,7 +70,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
|
|||
target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
|
||||
oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
|
||||
priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
|
||||
oacc-target.c target-indirect.c
|
||||
oacc-target.c target-indirect.c oacc-profiling-acc_register_library.c
|
||||
|
||||
include $(top_srcdir)/plugin/Makefrag.am
|
||||
|
||||
|
|
|
|||
|
|
@ -219,7 +219,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
|
|||
oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
|
||||
oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
|
||||
affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
|
||||
oacc-target.lo target-indirect.lo $(am__objects_1)
|
||||
oacc-target.lo oacc-profiling-acc_register_library.lo \
|
||||
target-indirect.lo $(am__objects_1)
|
||||
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
|
||||
AM_V_P = $(am__v_P_@AM_V@)
|
||||
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
|
||||
|
|
@ -552,7 +553,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
|
|||
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
|
||||
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
|
||||
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
|
||||
oacc-target.c target-indirect.c $(am__append_3)
|
||||
oacc-target.c oacc-profiling-acc_register_library.c \
|
||||
target-indirect.c $(am__append_3)
|
||||
|
||||
# Nvidia PTX OpenACC plugin.
|
||||
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
|
||||
|
|
@ -768,6 +770,7 @@ distclean-compile:
|
|||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling-acc_register_library.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-target.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
|
||||
|
|
|
|||
|
|
@ -6377,14 +6377,6 @@ We just handle one case specially, as required by CUDA 9.0
|
|||
@code{acc_ev_device_init_start}, @code{acc_ev_device_init_end}
|
||||
callbacks.
|
||||
|
||||
We're not yet implementing initialization via a
|
||||
@code{acc_register_library} function that is either statically linked
|
||||
in, or dynamically via @env{LD_PRELOAD}.
|
||||
Initialization via @code{acc_register_library} functions dynamically
|
||||
loaded via the @env{ACC_PROFLIB} environment variable does work, as
|
||||
does directly calling @code{acc_prof_register},
|
||||
@code{acc_prof_unregister}, @code{acc_prof_lookup}.
|
||||
|
||||
As currently there are no inquiry functions defined, calls to
|
||||
@code{acc_prof_lookup} always returns @code{NULL}.
|
||||
|
||||
|
|
|
|||
|
|
@ -810,6 +810,16 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop)
|
|||
if (d == acc_device_current && thr && thr->dev)
|
||||
return thr->dev->openacc.get_property_func (thr->dev->target_id, prop);
|
||||
|
||||
acc_prof_info prof_info;
|
||||
acc_api_info api_info;
|
||||
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
||||
|
||||
if (profiling_p)
|
||||
{
|
||||
prof_info.device_type = d;
|
||||
prof_info.device_number = ord;
|
||||
}
|
||||
|
||||
gomp_mutex_lock (&acc_device_lock);
|
||||
|
||||
struct gomp_device_descr *dev = resolve_device (d, true);
|
||||
|
|
@ -830,7 +840,16 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop)
|
|||
|
||||
assert (dev);
|
||||
|
||||
return dev->openacc.get_property_func (dev->target_id, prop);
|
||||
union goacc_property_value propval =
|
||||
dev->openacc.get_property_func (dev->target_id, prop);
|
||||
|
||||
if (profiling_p)
|
||||
{
|
||||
thr->prof_info = NULL;
|
||||
thr->api_info = NULL;
|
||||
}
|
||||
|
||||
return propval;
|
||||
}
|
||||
|
||||
size_t
|
||||
|
|
|
|||
|
|
@ -367,6 +367,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
|
|||
fn (hostaddrs);
|
||||
goto out_prof;
|
||||
}
|
||||
else if (profiling_p)
|
||||
api_info.device_api = acc_device_api_cuda;
|
||||
|
||||
/* Default: let the runtime choose. */
|
||||
for (i = 0; i != GOMP_DIM_MAX; i++)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,39 @@
|
|||
/* Copyright (C) 2017 Free Software Foundation, Inc.
|
||||
|
||||
Contributed by Mentor Embedded.
|
||||
|
||||
This file is part of the GNU Offloading and Multi Processing Library
|
||||
(libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* This file provides an stub acc_register_library function. It's in a
|
||||
separate file so that this function can easily be overridden when linking
|
||||
statically. */
|
||||
|
||||
#include "libgomp.h"
|
||||
#include "acc_prof.h"
|
||||
|
||||
void
|
||||
acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
|
||||
acc_prof_lookup_func lookup)
|
||||
{
|
||||
gomp_debug (0, "dummy %s\n", __FUNCTION__);
|
||||
}
|
||||
|
|
@ -104,7 +104,12 @@ goacc_profiling_initialize (void)
|
|||
for (int i = 0; i < acc_ev_last; ++i)
|
||||
goacc_prof_callbacks_enabled[i] = true;
|
||||
|
||||
|
||||
/* We are to invoke an external acc_register_library routine, defaulting to
|
||||
our stub oacc-profiling-acc_register_library.c:acc_register_library
|
||||
implementation. */
|
||||
gomp_debug (0, "%s: calling acc_register_library\n", __FUNCTION__);
|
||||
//TODO.
|
||||
acc_register_library (acc_prof_register, acc_prof_unregister, NULL);
|
||||
#ifdef PLUGIN_SUPPORT
|
||||
char *acc_proflibs = secure_getenv ("ACC_PROFLIB");
|
||||
while (acc_proflibs != NULL && acc_proflibs[0] != '\0')
|
||||
|
|
@ -141,10 +146,20 @@ goacc_profiling_initialize (void)
|
|||
= dlsym (dl_handle, "acc_register_library");
|
||||
if (a_r_l == NULL)
|
||||
goto dl_fail;
|
||||
gomp_debug (0, " %s: calling %s:acc_register_library\n",
|
||||
__FUNCTION__, acc_proflib);
|
||||
a_r_l (acc_prof_register, acc_prof_unregister,
|
||||
acc_prof_lookup);
|
||||
/* Avoid duplicate registration, for example if the same shared
|
||||
library is specified in LD_PRELOAD and ACC_PROFLIB -- which
|
||||
TAU 2.26 does when using "tau_exec -openacc". */
|
||||
if (a_r_l != acc_register_library)
|
||||
{
|
||||
gomp_debug (0, " %s: calling %s:acc_register_library\n",
|
||||
__FUNCTION__, acc_proflib);
|
||||
//TODO.
|
||||
a_r_l (acc_prof_register, acc_prof_unregister, NULL);
|
||||
}
|
||||
else
|
||||
gomp_debug (0, " %s: skipping duplicate"
|
||||
" %s:acc_register_library\n",
|
||||
__FUNCTION__, acc_proflib);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -487,13 +502,6 @@ acc_prof_lookup (const char *name)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
|
||||
acc_prof_lookup_func lookup)
|
||||
{
|
||||
gomp_fatal ("TODO");
|
||||
}
|
||||
|
||||
/* Prepare to dispatch events? */
|
||||
|
||||
bool
|
||||
|
|
|
|||
|
|
@ -114,8 +114,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look
|
|||
|
||||
int main()
|
||||
{
|
||||
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
|
||||
|
||||
STATE_OP (state, = 0);
|
||||
reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
|
||||
reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
|
||||
|
|
|
|||
|
|
@ -270,8 +270,6 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
|
|||
|
||||
int main()
|
||||
{
|
||||
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
|
||||
|
||||
STATE_OP (state, = 0);
|
||||
reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
|
||||
reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ static int state = -1;
|
|||
static acc_device_t acc_device_type;
|
||||
static int acc_device_num;
|
||||
static int num_gangs, num_workers, vector_length;
|
||||
static int async;
|
||||
|
||||
|
||||
static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
|
||||
|
|
@ -76,7 +77,7 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
|
|||
assert (prof_info->device_type == acc_device_type);
|
||||
assert (prof_info->device_number == acc_device_num);
|
||||
assert (prof_info->thread_id == -1);
|
||||
assert (prof_info->async == acc_async_noval);
|
||||
assert (prof_info->async == async);
|
||||
assert (prof_info->async_queue == prof_info->async);
|
||||
assert (prof_info->src_file == NULL);
|
||||
assert (prof_info->func_name == NULL);
|
||||
|
|
@ -166,8 +167,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look
|
|||
|
||||
int main()
|
||||
{
|
||||
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
|
||||
|
||||
STATE_OP (state, = 0);
|
||||
reg (acc_ev_enqueue_launch_start, cb_enqueue_launch_start, acc_reg);
|
||||
assert (state == 0);
|
||||
|
|
@ -176,8 +175,10 @@ int main()
|
|||
acc_device_num = acc_get_device_num (acc_device_type);
|
||||
assert (state == 0);
|
||||
|
||||
/* Parallelism dimensions: compiler/runtime decides. */
|
||||
STATE_OP (state, = 0);
|
||||
/* Implicit async. */
|
||||
async = acc_async_noval;
|
||||
/* Parallelism dimensions: compiler/runtime decides. */
|
||||
num_gangs = num_workers = vector_length = 0;
|
||||
{
|
||||
#define N 100
|
||||
|
|
@ -203,8 +204,10 @@ int main()
|
|||
#undef N
|
||||
}
|
||||
|
||||
/* Parallelism dimensions: literal. */
|
||||
STATE_OP (state, = 0);
|
||||
/* Explicit async: without argument. */
|
||||
async = acc_async_noval;
|
||||
/* Parallelism dimensions: literal. */
|
||||
num_gangs = 30;
|
||||
num_workers = 3;
|
||||
vector_length = 5;
|
||||
|
|
@ -212,6 +215,7 @@ int main()
|
|||
#define N 100
|
||||
int x[N];
|
||||
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
|
||||
async \
|
||||
num_gangs (30) num_workers (3) vector_length (5)
|
||||
/* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in block requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
|
||||
{ dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||
|
|
@ -234,8 +238,10 @@ int main()
|
|||
#undef N
|
||||
}
|
||||
|
||||
/* Parallelism dimensions: variable. */
|
||||
STATE_OP (state, = 0);
|
||||
/* Explicit async: variable. */
|
||||
async = 123;
|
||||
/* Parallelism dimensions: variable. */
|
||||
num_gangs = 22;
|
||||
num_workers = 5;
|
||||
vector_length = 7;
|
||||
|
|
@ -243,6 +249,7 @@ int main()
|
|||
#define N 100
|
||||
int x[N];
|
||||
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
|
||||
async (async) \
|
||||
num_gangs (num_gangs) num_workers (num_workers) vector_length (vector_length)
|
||||
/* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in block requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
|
||||
{ dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||
|
|
|
|||
|
|
@ -830,8 +830,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
|
|||
|
||||
int main()
|
||||
{
|
||||
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
|
||||
|
||||
STATE_OP (state, = 0);
|
||||
reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
|
||||
reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
|
||||
|
|
|
|||
|
|
@ -143,8 +143,6 @@ typedef struct E
|
|||
|
||||
int main()
|
||||
{
|
||||
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
|
||||
|
||||
A A1;
|
||||
DEBUG_printf ("s=%zd, vb=%zd\n", sizeof A1, VALID_BYTES_A);
|
||||
assert (VALID_BYTES_A <= sizeof A1);
|
||||
|
|
|
|||
|
|
@ -56,8 +56,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look
|
|||
|
||||
int main()
|
||||
{
|
||||
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
|
||||
|
||||
ev_count = 0;
|
||||
|
||||
/* Trigger tests done in 'cb_*' functions. */
|
||||
|
|
|
|||
Loading…
Reference in New Issue