mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			542 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			542 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			C++
		
	
	
	
| /*
 | |
|     Copyright (c) 2014-2016 Intel Corporation.  All Rights Reserved.
 | |
| 
 | |
|     Redistribution and use in source and binary forms, with or without
 | |
|     modification, are permitted provided that the following conditions
 | |
|     are met:
 | |
| 
 | |
|       * Redistributions of source code must retain the above copyright
 | |
|         notice, this list of conditions and the following disclaimer.
 | |
|       * Redistributions in binary form must reproduce the above copyright
 | |
|         notice, this list of conditions and the following disclaimer in the
 | |
|         documentation and/or other materials provided with the distribution.
 | |
|       * Neither the name of Intel Corporation nor the names of its
 | |
|         contributors may be used to endorse or promote products derived
 | |
|         from this software without specific prior written permission.
 | |
| 
 | |
|     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 | |
|     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | |
|     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 | |
|     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 | |
|     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 | |
|     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 | |
|     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | |
|     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | |
|     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | |
|     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | |
|     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| */
 | |
| 
 | |
| 
 | |
| /*! \file
 | |
|     \brief The parts of the runtime library used only on the host
 | |
| */
 | |
| 
 | |
| #ifndef OFFLOAD_HOST_H_INCLUDED
 | |
| #define OFFLOAD_HOST_H_INCLUDED
 | |
| 
 | |
| #ifndef TARGET_WINNT
 | |
| #include <unistd.h>
 | |
| #endif // TARGET_WINNT
 | |
| #include "offload_common.h"
 | |
| #include "offload_util.h"
 | |
| #include "offload_engine.h"
 | |
| #include "offload_env.h"
 | |
| #include "offload_orsl.h"
 | |
| #include "coi/coi_client.h"
 | |
| 
 | |
| // MIC engines.
 | |
| DLL_LOCAL extern Engine*  mic_engines;
 | |
| DLL_LOCAL extern uint32_t mic_engines_total;
 | |
| 
 | |
| // DMA channel count used by COI and set via
 | |
| // OFFLOAD_DMA_CHANNEL_COUNT environment variable
 | |
| DLL_LOCAL extern uint32_t mic_dma_channel_count;
 | |
| 
 | |
| //! The target image is packed as follows.
 | |
| /*!      1. 8 bytes containing the size of the target binary          */
 | |
| /*!      2. a null-terminated string which is the binary name         */
 | |
| /*!      3. <size> number of bytes that are the contents of the image */
 | |
| /*!      The address of symbol __offload_target_image
 | |
|              is the address of this structure.                        */
 | |
| struct Image {
 | |
|      int64_t size; //!< Size in bytes of the target binary name and contents
 | |
|      char data[];  //!< The name and contents of the target image
 | |
| };
 | |
| 
 | |
| // The offload descriptor.
 | |
| class OffloadDescriptor
 | |
| {
 | |
| public:
 | |
|     enum  OmpAsyncLastEventType {
 | |
|         c_last_not,     // not last event
 | |
|         c_last_write,   // the last event that is write
 | |
|         c_last_read,    // the last event that is read
 | |
|         c_last_runfunc  // the last event that is runfunction
 | |
|     };
 | |
|     
 | |
|     OffloadDescriptor(
 | |
|         int index,
 | |
|         _Offload_status *status,
 | |
|         bool is_mandatory,
 | |
|         bool is_openmp,
 | |
|         OffloadHostTimerData * timer_data
 | |
|     ) :
 | |
|         m_device(mic_engines[index == -1 ? 0 : index % mic_engines_total]),
 | |
|         m_is_mandatory(is_mandatory),
 | |
|         m_is_openmp(is_openmp),
 | |
|         m_inout_buf(0),
 | |
|         m_func_desc(0),
 | |
|         m_func_desc_size(0),
 | |
|         m_num_in_dependencies(0),
 | |
|         m_p_in_dependencies(0),
 | |
|         m_in_deps(0),
 | |
|         m_in_deps_total(0),
 | |
|         m_in_deps_allocated(0),        
 | |
|         m_out_deps(0),
 | |
|         m_out_deps_total(0),
 | |
|         m_out_deps_allocated(0),
 | |
|         m_vars(0),
 | |
|         m_vars_extra(0),
 | |
|         m_status(status),
 | |
|         m_timer_data(timer_data),
 | |
|         m_out_with_preallocated(false),
 | |
|         m_preallocated_alloc(false),
 | |
|         m_traceback_called(false),
 | |
|         m_stream(-1),
 | |
|         m_signal(0),
 | |
|         m_has_signal(0),
 | |
|         m_omp_async_last_event_type(c_last_not)
 | |
|     {
 | |
|         m_wait_all_devices = index == -1;
 | |
|     }
 | |
| 
 | |
|     ~OffloadDescriptor()
 | |
|     {
 | |
|         if (m_in_deps != 0) {
 | |
|             free(m_in_deps);
 | |
|         }
 | |
|         if (m_out_deps != 0) {
 | |
|             free(m_out_deps);
 | |
|         }
 | |
|         if (m_func_desc != 0) {
 | |
|             free(m_func_desc);
 | |
|         }
 | |
|         if (m_vars != 0) {
 | |
|             free(m_vars);
 | |
|             free(m_vars_extra);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     bool offload(const char *name, bool is_empty,
 | |
|                  VarDesc *vars, VarDesc2 *vars2, int vars_total,
 | |
|                  const void **waits, int num_waits, const void **signal,
 | |
|                  int entry_id, const void *stack_addr,
 | |
|                  OffloadFlags offload_flags);
 | |
| 
 | |
|     bool offload_finish(bool is_traceback);
 | |
| 
 | |
|     bool is_signaled();
 | |
| 
 | |
|     OffloadHostTimerData* get_timer_data() const {
 | |
|         return m_timer_data;
 | |
|     }
 | |
| 
 | |
|     void set_stream(_Offload_stream stream) {
 | |
|         m_stream = stream;
 | |
|     }
 | |
| 
 | |
|     _Offload_stream get_stream() {
 | |
|         return(m_stream);
 | |
|     }
 | |
| 
 | |
|     Engine& get_device() {
 | |
|         return m_device;
 | |
|     }
 | |
| 
 | |
|     void* get_signal() {
 | |
|         return(m_signal);
 | |
|     }
 | |
| 
 | |
|     void set_signal(const void* signal) {
 | |
|         m_has_signal = 1;
 | |
|         m_signal = const_cast<void*>(signal);
 | |
|     }
 | |
| 
 | |
|     void cleanup();
 | |
| 
 | |
|     uint32_t  m_event_count;
 | |
|     bool      m_has_signal;
 | |
| 
 | |
| private:
 | |
|     bool offload_wrap(const char *name, bool is_empty,
 | |
|                  VarDesc *vars, VarDesc2 *vars2, int vars_total,
 | |
|                  const void **waits, int num_waits, const void **signal,
 | |
|                  int entry_id, const void *stack_addr,
 | |
|                  OffloadFlags offload_flags);
 | |
|     bool wait_dependencies(const void **waits, int num_waits,
 | |
|                            _Offload_stream stream);
 | |
|     bool setup_descriptors(VarDesc *vars, VarDesc2 *vars2, int vars_total,
 | |
|                            int entry_id, const void *stack_addr);
 | |
|     bool setup_misc_data(const char *name);
 | |
|     bool send_pointer_data(bool is_async, void* info);
 | |
|     bool send_noncontiguous_pointer_data(
 | |
|         int i,
 | |
|         PtrData* src_buf,
 | |
|         PtrData* dst_buf,
 | |
|         COIEVENT *event,
 | |
|         uint64_t  &sent_data,
 | |
|         uint32_t in_deps_amount,
 | |
|         COIEVENT *in_deps
 | |
|         );
 | |
|     bool receive_noncontiguous_pointer_data(
 | |
|         int i,
 | |
|         COIBUFFER dst_buf,
 | |
|         COIEVENT *event,
 | |
|         uint64_t  &received_data,
 | |
|         uint32_t in_deps_amount,
 | |
|         COIEVENT *in_deps
 | |
|         );
 | |
| 
 | |
|     bool gather_copyin_data();
 | |
| 
 | |
|     bool compute(void *);
 | |
| 
 | |
|     bool receive_pointer_data(bool is_async, bool first_run, void * info);
 | |
|     bool scatter_copyout_data();
 | |
| 
 | |
|     bool find_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
 | |
|                        int64_t length, bool is_targptr,
 | |
|                        bool error_does_not_exist = true);
 | |
| 
 | |
|     void find_device_ptr( int64_t* &device_ptr,
 | |
|                        void *host_ptr);
 | |
| 
 | |
|     bool alloc_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
 | |
|                         int64_t length, int64_t alloc_disp, int align,
 | |
|                         bool is_targptr, bool is_prealloc, bool pin);
 | |
|     bool create_preallocated_buffer(PtrData* ptr_data, void *base);
 | |
|     bool init_static_ptr_data(PtrData *ptr_data);
 | |
|     bool init_mic_address(PtrData *ptr_data);
 | |
|     bool offload_stack_memory_manager(
 | |
|         const void * stack_begin,
 | |
|         int routine_id,
 | |
|         int buf_size,
 | |
|         int align,
 | |
|         bool thread_specific_function_locals,
 | |
|         bool *is_new);
 | |
|     char *get_this_threads_cpu_stack_addr(
 | |
|         const void * stack_begin,
 | |
|         int routine_id,
 | |
|         bool thread_specific_function_locals);
 | |
|     PtrData *get_this_threads_mic_stack_addr(
 | |
|         const void * stack_begin,
 | |
|         int routine_id,
 | |
|         bool thread_specific_function_locals);
 | |
|     bool nullify_target_stack(COIBUFFER targ_buf, uint64_t size);
 | |
| 
 | |
|     bool gen_var_descs_for_pointer_array(int i);
 | |
| 
 | |
|     void get_stream_in_dependencies(uint32_t &in_deps_amount,
 | |
|                                     COIEVENT* &in_deps);
 | |
| 
 | |
|     void report_coi_error(error_types msg, COIRESULT res);
 | |
|     _Offload_result translate_coi_error(COIRESULT res) const;
 | |
| 
 | |
|     void setup_omp_async_info();
 | |
| 
 | |
|     void setup_use_device_ptr(int i);
 | |
| 
 | |
|     void register_event_call_back(void (*)(
 | |
|                                       COIEVENT,
 | |
|                                       const COIRESULT,
 | |
|                                       const void*),
 | |
|                                   const COIEVENT *event,
 | |
|                                   const void *info);
 | |
| 
 | |
|     void register_omp_event_call_back(const COIEVENT *event, const void *info);
 | |
| 
 | |
| private:
 | |
|     typedef std::list<COIBUFFER> BufferList;
 | |
| 
 | |
|     // extra data associated with each variable descriptor
 | |
|     struct VarExtra {
 | |
|         PtrData* src_data;
 | |
|         PtrData* dst_data;
 | |
|         AutoData* auto_data;
 | |
|         int64_t cpu_disp;
 | |
|         int64_t cpu_offset;
 | |
|         void *alloc;
 | |
|         union {
 | |
|             CeanReadRanges *read_rng_src;
 | |
|             NonContigDesc  *noncont_desc;
 | |
|         };
 | |
|         CeanReadRanges *read_rng_dst;
 | |
|         int64_t ptr_arr_offset;
 | |
|         bool is_arr_ptr_el;
 | |
|         OmpAsyncLastEventType omp_last_event_type;
 | |
|         int64_t pointer_offset;
 | |
|         uint16_t type_src;
 | |
|         uint16_t type_dst;
 | |
|     };
 | |
| 
 | |
|     template<typename T> class ReadArrElements {
 | |
|     public:
 | |
|         ReadArrElements():
 | |
|             ranges(NULL),
 | |
|             el_size(sizeof(T)),
 | |
|             offset(0),
 | |
|             count(0),
 | |
|             is_empty(true),
 | |
|             base(NULL)
 | |
|         {}
 | |
| 
 | |
|         bool read_next(bool flag)
 | |
|         {
 | |
|             if (flag != 0) {
 | |
|                 if (is_empty) {
 | |
|                     if (ranges) {
 | |
|                         if (!get_next_range(ranges, &offset)) {
 | |
|                             // ranges are over
 | |
|                             return false;
 | |
|                         }
 | |
|                     }
 | |
|                     // all contiguous elements are over
 | |
|                     else if (count != 0) {
 | |
|                         return false;
 | |
|                     }
 | |
| 
 | |
|                     length_cur = size;
 | |
|                 }
 | |
|                 else {
 | |
|                     offset += el_size;
 | |
|                 }
 | |
|                 val = (T)get_el_value(base, offset, el_size);
 | |
|                 length_cur -= el_size;
 | |
|                 count++;
 | |
|                 is_empty = length_cur == 0;
 | |
|             }
 | |
|             return true;
 | |
|         }
 | |
|     public:
 | |
|         CeanReadRanges * ranges;
 | |
|         T       val;
 | |
|         int     el_size;
 | |
|         int64_t size,
 | |
|                 offset,
 | |
|                 length_cur;
 | |
|         bool    is_empty;
 | |
|         int     count;
 | |
|         char   *base;
 | |
|     };
 | |
| 
 | |
|     // ptr_data for persistent auto objects
 | |
|     PtrData*    m_stack_ptr_data;
 | |
|     PtrDataList m_destroy_stack;
 | |
| 
 | |
|     // Engine
 | |
|     Engine& m_device;
 | |
| 
 | |
|     // true for offload_wait target(mic) stream(0)
 | |
|     bool m_wait_all_devices;
 | |
| 
 | |
|     // if true offload is mandatory
 | |
|     bool m_is_mandatory;
 | |
| 
 | |
|     // if true offload has openmp origin
 | |
|     const bool m_is_openmp;
 | |
| 
 | |
|     // The Marshaller for the inputs of the offloaded region.
 | |
|     Marshaller m_in;
 | |
| 
 | |
|     // The Marshaller for the outputs of the offloaded region.
 | |
|     Marshaller m_out;
 | |
| 
 | |
|     // List of buffers that are passed to dispatch call
 | |
|     BufferList m_compute_buffers;
 | |
| 
 | |
|     // List of buffers that need to be destroyed at the end of offload
 | |
|     BufferList m_destroy_buffers;
 | |
| 
 | |
|     // Variable descriptors
 | |
|     VarDesc*  m_vars;
 | |
|     VarExtra* m_vars_extra;
 | |
|     int       m_vars_total;
 | |
| 
 | |
|     // Pointer to a user-specified status variable
 | |
|     _Offload_status *m_status;
 | |
| 
 | |
|     // Function descriptor
 | |
|     FunctionDescriptor* m_func_desc;
 | |
|     uint32_t            m_func_desc_size;
 | |
| 
 | |
|     // Buffer for transferring copyin/copyout data
 | |
|     COIBUFFER m_inout_buf;
 | |
| 
 | |
| 
 | |
|     // Dependencies
 | |
|     COIEVENT *m_in_deps;
 | |
|     uint32_t  m_in_deps_total;
 | |
|     uint32_t  m_in_deps_allocated;    
 | |
|     COIEVENT *m_out_deps;
 | |
|     uint32_t  m_out_deps_total;
 | |
|     uint32_t  m_out_deps_allocated;
 | |
| 
 | |
|     // 2 variables defines input dependencies for current COI API.
 | |
|     // The calls to routines as BufferWrite/PipelineRunFunction/BufferRead
 | |
|     // is supposed to have input dependencies.
 | |
|     // 2 variables below defines the number and vector of dependencies
 | |
|     // in every current moment of offload.
 | |
|     // So any phase of offload can use its values as input dependencies 
 | |
|     // for the COI API that the phase calls.
 | |
|     // It means that all phases (of Write, RunFunction,Read) must keep
 | |
|     // the variables correct to be used by following phase.
 | |
|     // If some consequent offloads are connected (i.e. by the same stream)
 | |
|     // the final 2 variables of the offload is used as initial inputs
 | |
|     // for the next offload.
 | |
|     uint32_t  m_num_in_dependencies;
 | |
|     COIEVENT *m_p_in_dependencies;
 | |
| 
 | |
|     // Stream
 | |
|     _Offload_stream m_stream;
 | |
| 
 | |
|     // Signal
 | |
|     void* m_signal;
 | |
| 
 | |
|     // Timer data
 | |
|     OffloadHostTimerData *m_timer_data;
 | |
| 
 | |
|     // copyin/copyout data length
 | |
|     uint64_t m_in_datalen;
 | |
|     uint64_t m_out_datalen;
 | |
| 
 | |
|     // a boolean value calculated in setup_descriptors. If true we need to do
 | |
|     // a run function on the target. Otherwise it may be optimized away.
 | |
|     bool m_need_runfunction;
 | |
| 
 | |
|     // initialized value of m_need_runfunction;
 | |
|     // is used to recognize offload_transfer
 | |
|     bool m_initial_need_runfunction;
 | |
| 
 | |
|     // a Boolean value set to true when OUT clauses with preallocated targetptr
 | |
|     // is encountered to indicate that call receive_pointer_data needs to be
 | |
|     // invoked again after call to scatter_copyout_data.
 | |
|     bool m_out_with_preallocated;
 | |
| 
 | |
|     // a Boolean value set to true if an alloc_if(1) is used with preallocated
 | |
|     // targetptr to indicate the need to scatter_copyout_data even for
 | |
|     // async offload
 | |
|     bool m_preallocated_alloc;
 | |
| 
 | |
|     // a Boolean value set to true if traceback routine is called
 | |
|     bool m_traceback_called;  
 | |
| 
 | |
|     OmpAsyncLastEventType m_omp_async_last_event_type;
 | |
| };
 | |
| 
 | |
| // Initialization types for MIC
 | |
| enum OffloadInitType {
 | |
|     c_init_on_start,         // all devices before entering main
 | |
|     c_init_on_offload,       // single device before starting the first offload
 | |
|     c_init_on_offload_all    // all devices before starting the first offload
 | |
| };
 | |
| 
 | |
| // Determines if MIC code is an executable or a shared library
 | |
| extern "C" bool __offload_target_image_is_executable(const void *target_image);
 | |
| 
 | |
| // Initializes library and registers specified offload image.
 | |
| extern "C" bool __offload_register_image(const void* image);
 | |
| extern "C" void __offload_unregister_image(const void* image);
 | |
| 
 | |
| // Registers asynchronous task completion callback
 | |
| extern "C" void __offload_register_task_callback(void (*cb)(void *));
 | |
| 
 | |
| // Initializes offload runtime library.
 | |
| DLL_LOCAL extern int __offload_init_library(void);
 | |
| 
 | |
| // thread data for associating pipelines with threads
 | |
| DLL_LOCAL extern pthread_key_t mic_thread_key;
 | |
| 
 | |
| // location of offload_main executable
 | |
| // To be used if the main application has no offload and is not built
 | |
| // with -offload but dynamic library linked in has offload pragma
 | |
| DLL_LOCAL extern char* mic_device_main;
 | |
| 
 | |
| // Environment variables for devices
 | |
| DLL_LOCAL extern MicEnvVar mic_env_vars;
 | |
| 
 | |
| // CPU frequency
 | |
| DLL_LOCAL extern uint64_t cpu_frequency;
 | |
| 
 | |
| // LD_LIBRARY_PATH for KNC libraries
 | |
| DLL_LOCAL extern char* knc_library_path;
 | |
| 
 | |
| // LD_LIBRARY_PATH for KNL libraries
 | |
| DLL_LOCAL extern char* knl_library_path;
 | |
| 
 | |
| // stack size for target
 | |
| DLL_LOCAL extern uint32_t mic_stack_size;
 | |
| 
 | |
| // Preallocated memory size for buffers on MIC
 | |
| DLL_LOCAL extern uint64_t mic_buffer_size;
 | |
| 
 | |
| // Preallocated 4K page memory size for buffers on MIC
 | |
| DLL_LOCAL extern uint64_t mic_4k_buffer_size;
 | |
| 
 | |
| // Preallocated 2M page memory size for buffers on MIC
 | |
| DLL_LOCAL extern uint64_t mic_2m_buffer_size;
 | |
| 
 | |
| // Setting controlling inout proxy
 | |
| DLL_LOCAL extern bool  mic_proxy_io;
 | |
| DLL_LOCAL extern char* mic_proxy_fs_root;
 | |
| 
 | |
| // Threshold for creating buffers with large pages
 | |
| DLL_LOCAL extern uint64_t __offload_use_2mb_buffers;
 | |
| 
 | |
| // offload initialization type
 | |
| DLL_LOCAL extern OffloadInitType __offload_init_type;
 | |
| 
 | |
| // Device number to offload to when device is not explicitly specified.
 | |
| DLL_LOCAL extern int __omp_device_num;
 | |
| 
 | |
| // target executable
 | |
| DLL_LOCAL extern TargetImage* __target_exe;
 | |
| 
 | |
| // is true if last loaded image is dll
 | |
| DLL_LOCAL extern bool __current_image_is_dll;
 | |
| // is true if myo library is loaded when dll is loaded
 | |
| DLL_LOCAL extern bool __myo_init_in_so;
 | |
| 
 | |
| // IDB support
 | |
| 
 | |
| // Called by the offload runtime after initialization of offload infrastructure
 | |
| // has been completed.
 | |
| extern "C" void  __dbg_target_so_loaded();
 | |
| 
 | |
| // Called by the offload runtime when the offload infrastructure is about to be
 | |
| // shut down, currently at application exit.
 | |
| extern "C" void  __dbg_target_so_unloaded();
 | |
| 
 | |
| // Null-terminated string containing path to the process image of the hosting
 | |
| // application (offload_main)
 | |
| #define MAX_TARGET_NAME 512
 | |
| extern "C" char  __dbg_target_exe_name[MAX_TARGET_NAME];
 | |
| 
 | |
| // Integer specifying the process id
 | |
| extern "C" pid_t __dbg_target_so_pid;
 | |
| 
 | |
| // Integer specifying the 0-based device number
 | |
| extern "C" int   __dbg_target_id;
 | |
| 
 | |
| // Set to non-zero by the host-side debugger to enable offload debugging
 | |
| // support
 | |
| extern "C" int   __dbg_is_attached;
 | |
| 
 | |
| // Major version of the debugger support API
 | |
| extern "C" const int __dbg_api_major_version;
 | |
| 
 | |
| // Minor version of the debugger support API
 | |
| extern "C" const int __dbg_api_minor_version;
 | |
| 
 | |
| #endif // OFFLOAD_HOST_H_INCLUDED
 |