/* OpenACC Runtime Library Definitions. Copyright (C) 2013-2019 Free Software Foundation, Inc. Contributed by Mentor Embedded. This file is part of the GNU Offloading and Multi Processing Library (libgomp). Libgomp is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ #include #include #include "openacc.h" #include "libgomp.h" #include "oacc-int.h" static struct goacc_thread * get_goacc_thread (void) { struct goacc_thread *thr = goacc_thread (); if (!thr || !thr->dev) gomp_fatal ("no device active"); return thr; } static struct gomp_device_descr * get_goacc_thread_device (void) { struct goacc_thread *thr = goacc_thread (); if (!thr || !thr->dev) gomp_fatal ("no device active"); return thr->dev; } static int validate_async_val (int async) { if (!async_valid_p (async)) gomp_fatal ("invalid async-argument: %d", async); if (async == acc_async_sync) return -1; if (async == acc_async_noval) return 0; if (async >= 0) /* TODO: we reserve 0 for acc_async_noval before we can clarify the semantics of "default_async". */ return 1 + async; else __builtin_unreachable (); } /* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This might return NULL if no asyncqueue is to be used. Otherwise, if CREATE, create the asyncqueue if it doesn't exist yet. */ attribute_hidden struct goacc_asyncqueue * lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async) { async = validate_async_val (async); if (async < 0) return NULL; struct goacc_asyncqueue *ret_aq = NULL; struct gomp_device_descr *dev = thr->dev; gomp_mutex_lock (&dev->openacc.async.lock); if (!create && (async >= dev->openacc.async.nasyncqueue || !dev->openacc.async.asyncqueue[async])) goto end; if (async >= dev->openacc.async.nasyncqueue) { int diff = async + 1 - dev->openacc.async.nasyncqueue; dev->openacc.async.asyncqueue = gomp_realloc (dev->openacc.async.asyncqueue, sizeof (goacc_aq) * (async + 1)); memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue, 0, sizeof (goacc_aq) * diff); dev->openacc.async.nasyncqueue = async + 1; } if (!dev->openacc.async.asyncqueue[async]) { dev->openacc.async.asyncqueue[async] = dev->openacc.async.construct_func (); if (!dev->openacc.async.asyncqueue[async]) { gomp_mutex_unlock (&dev->openacc.async.lock); gomp_fatal ("async %d creation failed", async); } /* Link new async queue into active list. */ goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list)); n->aq = dev->openacc.async.asyncqueue[async]; n->next = dev->openacc.async.active; dev->openacc.async.active = n; } ret_aq = dev->openacc.async.asyncqueue[async]; end: gomp_mutex_unlock (&dev->openacc.async.lock); return ret_aq; } /* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This might return NULL if no asyncqueue is to be used. Otherwise, create the asyncqueue if it doesn't exist yet. */ attribute_hidden struct goacc_asyncqueue * get_goacc_asyncqueue (int async) { struct goacc_thread *thr = get_goacc_thread (); return lookup_goacc_asyncqueue (thr, true, async); } int acc_async_test (int async) { struct goacc_thread *thr = goacc_thread (); if (!thr || !thr->dev) gomp_fatal ("no device active"); goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async); if (!aq) return 1; else return thr->dev->openacc.async.test_func (aq); } int acc_async_test_all (void) { struct goacc_thread *thr = get_goacc_thread (); int ret = 1; gomp_mutex_lock (&thr->dev->openacc.async.lock); for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) if (!thr->dev->openacc.async.test_func (l->aq)) { ret = 0; break; } gomp_mutex_unlock (&thr->dev->openacc.async.lock); return ret; } void acc_wait (int async) { struct goacc_thread *thr = get_goacc_thread (); goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async); if (aq && !thr->dev->openacc.async.synchronize_func (aq)) gomp_fatal ("wait on %d failed", async); } /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */ #ifdef HAVE_ATTRIBUTE_ALIAS strong_alias (acc_wait, acc_async_wait) #else void acc_async_wait (int async) { acc_wait (async); } #endif void acc_wait_async (int async1, int async2) { struct goacc_thread *thr = get_goacc_thread (); goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1); /* TODO: Is this also correct for acc_async_sync, assuming that in this case, we'll always be synchronous anyways? */ if (!aq1) return; goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2); /* An async queue is always synchronized with itself. */ if (aq1 == aq2) return; if (aq2) { if (!thr->dev->openacc.async.serialize_func (aq1, aq2)) gomp_fatal ("ordering of async ids %d and %d failed", async1, async2); } else { /* TODO: Local thread synchronization. Necessary for the "async2 == acc_async_sync" case, or can just skip? */ if (!thr->dev->openacc.async.synchronize_func (aq1)) gomp_fatal ("wait on %d failed", async1); } } void acc_wait_all (void) { struct gomp_device_descr *dev = get_goacc_thread_device (); bool ret = true; gomp_mutex_lock (&dev->openacc.async.lock); for (goacc_aq_list l = dev->openacc.async.active; l; l = l->next) ret &= dev->openacc.async.synchronize_func (l->aq); gomp_mutex_unlock (&dev->openacc.async.lock); if (!ret) gomp_fatal ("wait all failed"); } /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */ #ifdef HAVE_ATTRIBUTE_ALIAS strong_alias (acc_wait_all, acc_async_wait_all) #else void acc_async_wait_all (void) { acc_wait_all (); } #endif void acc_wait_all_async (int async) { struct goacc_thread *thr = get_goacc_thread (); goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async); bool ret = true; gomp_mutex_lock (&thr->dev->openacc.async.lock); for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) { if (waiting_queue) ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue); else /* TODO: Local thread synchronization. Necessary for the "async2 == acc_async_sync" case, or can just skip? */ ret &= thr->dev->openacc.async.synchronize_func (l->aq); } gomp_mutex_unlock (&thr->dev->openacc.async.lock); if (!ret) gomp_fatal ("wait all async(%d) failed", async); } attribute_hidden void goacc_async_free (struct gomp_device_descr *devicep, struct goacc_asyncqueue *aq, void *ptr) { if (!aq) free (ptr); else devicep->openacc.async.queue_callback_func (aq, free, ptr); } /* This function initializes the asyncqueues for the device specified by DEVICEP. TODO DEVICEP must be locked on entry, and remains locked on return. */ attribute_hidden void goacc_init_asyncqueues (struct gomp_device_descr *devicep) { devicep->openacc.async.nasyncqueue = 0; devicep->openacc.async.asyncqueue = NULL; devicep->openacc.async.active = NULL; gomp_mutex_init (&devicep->openacc.async.lock); } /* This function finalizes the asyncqueues for the device specified by DEVICEP. TODO DEVICEP must be locked on entry, and remains locked on return. */ attribute_hidden bool goacc_fini_asyncqueues (struct gomp_device_descr *devicep) { bool ret = true; gomp_mutex_lock (&devicep->openacc.async.lock); if (devicep->openacc.async.nasyncqueue > 0) { goacc_aq_list next; for (goacc_aq_list l = devicep->openacc.async.active; l; l = next) { ret &= devicep->openacc.async.destruct_func (l->aq); next = l->next; free (l); } free (devicep->openacc.async.asyncqueue); devicep->openacc.async.nasyncqueue = 0; devicep->openacc.async.asyncqueue = NULL; devicep->openacc.async.active = NULL; } gomp_mutex_unlock (&devicep->openacc.async.lock); gomp_mutex_destroy (&devicep->openacc.async.lock); return ret; }