Add changes to profiling interface from OG8 branch

This bundles up the parts of the profiling code from the OG8 branch that were
not included in the upstream patch.

libgomp/ChangeLog
	* Makefile.am (libgomp_la_SOURCES): Add
	oacc-profiling-acc_register_library.c.
	* Makefile.in: Regenerate.
	* libgomp.texi: Remove paragraph about acc_register_library.
	* oacc-init.c (get_property_any): Add profiling code.
	* oacc-parallel.c (GOACC_parallel_keyed_internal): Set device_api for
	profiling.
	* oacc-profiling-acc_register_library.c: New file.
	* oacc-profiling.c (goacc_profiling_initialize): Call
	acc_register_library.  Avoid duplicate registration.
	(acc_register_library): Remove.
	* config/nvptx/oacc-profiling-acc_register_library.c:
	New empty file.
	* config/nvptx/oacc-profiling.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: Remove
	call to acc_register_library.
	* testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise.

Co-Authored-By: Maciej W. Rozycki  <macro@codesourcery.com>
This commit is contained in:
Thomas Schwinge 2019-06-21 10:40:38 -07:00 committed by Sandra Loosemore
parent 4566c9843f
commit 63caf6bc2f
15 changed files with 100 additions and 40 deletions

View File

@ -70,7 +70,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
oacc-target.c target-indirect.c
oacc-target.c target-indirect.c oacc-profiling-acc_register_library.c
include $(top_srcdir)/plugin/Makefrag.am

View File

@ -219,7 +219,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
oacc-target.lo target-indirect.lo $(am__objects_1)
oacc-target.lo oacc-profiling-acc_register_library.lo \
target-indirect.lo $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@ -552,7 +553,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
oacc-target.c target-indirect.c $(am__append_3)
oacc-target.c oacc-profiling-acc_register_library.c \
target-indirect.c $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@ -768,6 +770,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling-acc_register_library.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-target.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@

View File

View File

@ -6377,14 +6377,6 @@ We just handle one case specially, as required by CUDA 9.0
@code{acc_ev_device_init_start}, @code{acc_ev_device_init_end}
callbacks.
We're not yet implementing initialization via a
@code{acc_register_library} function that is either statically linked
in, or dynamically via @env{LD_PRELOAD}.
Initialization via @code{acc_register_library} functions dynamically
loaded via the @env{ACC_PROFLIB} environment variable does work, as
does directly calling @code{acc_prof_register},
@code{acc_prof_unregister}, @code{acc_prof_lookup}.
As currently there are no inquiry functions defined, calls to
@code{acc_prof_lookup} always returns @code{NULL}.

View File

@ -810,6 +810,16 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop)
if (d == acc_device_current && thr && thr->dev)
return thr->dev->openacc.get_property_func (thr->dev->target_id, prop);
acc_prof_info prof_info;
acc_api_info api_info;
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
if (profiling_p)
{
prof_info.device_type = d;
prof_info.device_number = ord;
}
gomp_mutex_lock (&acc_device_lock);
struct gomp_device_descr *dev = resolve_device (d, true);
@ -830,7 +840,16 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop)
assert (dev);
return dev->openacc.get_property_func (dev->target_id, prop);
union goacc_property_value propval =
dev->openacc.get_property_func (dev->target_id, prop);
if (profiling_p)
{
thr->prof_info = NULL;
thr->api_info = NULL;
}
return propval;
}
size_t

View File

@ -367,6 +367,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
fn (hostaddrs);
goto out_prof;
}
else if (profiling_p)
api_info.device_api = acc_device_api_cuda;
/* Default: let the runtime choose. */
for (i = 0; i != GOMP_DIM_MAX; i++)

View File

@ -0,0 +1,39 @@
/* Copyright (C) 2017 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file provides an stub acc_register_library function. It's in a
separate file so that this function can easily be overridden when linking
statically. */
#include "libgomp.h"
#include "acc_prof.h"
void
acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
acc_prof_lookup_func lookup)
{
gomp_debug (0, "dummy %s\n", __FUNCTION__);
}

View File

@ -104,7 +104,12 @@ goacc_profiling_initialize (void)
for (int i = 0; i < acc_ev_last; ++i)
goacc_prof_callbacks_enabled[i] = true;
/* We are to invoke an external acc_register_library routine, defaulting to
our stub oacc-profiling-acc_register_library.c:acc_register_library
implementation. */
gomp_debug (0, "%s: calling acc_register_library\n", __FUNCTION__);
//TODO.
acc_register_library (acc_prof_register, acc_prof_unregister, NULL);
#ifdef PLUGIN_SUPPORT
char *acc_proflibs = secure_getenv ("ACC_PROFLIB");
while (acc_proflibs != NULL && acc_proflibs[0] != '\0')
@ -141,10 +146,20 @@ goacc_profiling_initialize (void)
= dlsym (dl_handle, "acc_register_library");
if (a_r_l == NULL)
goto dl_fail;
gomp_debug (0, " %s: calling %s:acc_register_library\n",
__FUNCTION__, acc_proflib);
a_r_l (acc_prof_register, acc_prof_unregister,
acc_prof_lookup);
/* Avoid duplicate registration, for example if the same shared
library is specified in LD_PRELOAD and ACC_PROFLIB -- which
TAU 2.26 does when using "tau_exec -openacc". */
if (a_r_l != acc_register_library)
{
gomp_debug (0, " %s: calling %s:acc_register_library\n",
__FUNCTION__, acc_proflib);
//TODO.
a_r_l (acc_prof_register, acc_prof_unregister, NULL);
}
else
gomp_debug (0, " %s: skipping duplicate"
" %s:acc_register_library\n",
__FUNCTION__, acc_proflib);
}
else
{
@ -487,13 +502,6 @@ acc_prof_lookup (const char *name)
return NULL;
}
void
acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
acc_prof_lookup_func lookup)
{
gomp_fatal ("TODO");
}
/* Prepare to dispatch events? */
bool

View File

@ -114,8 +114,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look
int main()
{
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
STATE_OP (state, = 0);
reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);

View File

@ -270,8 +270,6 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
int main()
{
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
STATE_OP (state, = 0);
reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);

View File

@ -59,6 +59,7 @@ static int state = -1;
static acc_device_t acc_device_type;
static int acc_device_num;
static int num_gangs, num_workers, vector_length;
static int async;
static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
@ -76,7 +77,7 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async_noval);
assert (prof_info->async == async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
@ -166,8 +167,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look
int main()
{
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
STATE_OP (state, = 0);
reg (acc_ev_enqueue_launch_start, cb_enqueue_launch_start, acc_reg);
assert (state == 0);
@ -176,8 +175,10 @@ int main()
acc_device_num = acc_get_device_num (acc_device_type);
assert (state == 0);
/* Parallelism dimensions: compiler/runtime decides. */
STATE_OP (state, = 0);
/* Implicit async. */
async = acc_async_noval;
/* Parallelism dimensions: compiler/runtime decides. */
num_gangs = num_workers = vector_length = 0;
{
#define N 100
@ -203,8 +204,10 @@ int main()
#undef N
}
/* Parallelism dimensions: literal. */
STATE_OP (state, = 0);
/* Explicit async: without argument. */
async = acc_async_noval;
/* Parallelism dimensions: literal. */
num_gangs = 30;
num_workers = 3;
vector_length = 5;
@ -212,6 +215,7 @@ int main()
#define N 100
int x[N];
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
async \
num_gangs (30) num_workers (3) vector_length (5)
/* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in block requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
{ dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */
@ -234,8 +238,10 @@ int main()
#undef N
}
/* Parallelism dimensions: variable. */
STATE_OP (state, = 0);
/* Explicit async: variable. */
async = 123;
/* Parallelism dimensions: variable. */
num_gangs = 22;
num_workers = 5;
vector_length = 7;
@ -243,6 +249,7 @@ int main()
#define N 100
int x[N];
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
async (async) \
num_gangs (num_gangs) num_workers (num_workers) vector_length (vector_length)
/* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in block requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
{ dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */

View File

@ -830,8 +830,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
int main()
{
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
STATE_OP (state, = 0);
reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);

View File

@ -143,8 +143,6 @@ typedef struct E
int main()
{
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
A A1;
DEBUG_printf ("s=%zd, vb=%zd\n", sizeof A1, VALID_BYTES_A);
assert (VALID_BYTES_A <= sizeof A1);

View File

@ -56,8 +56,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look
int main()
{
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
ev_count = 0;
/* Trigger tests done in 'cb_*' functions. */