mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			1129 lines
		
	
	
		
			38 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			1129 lines
		
	
	
		
			38 KiB
		
	
	
	
		
			C++
		
	
	
	
/*
 | 
						|
    Copyright (c) 2014-2016 Intel Corporation.  All Rights Reserved.
 | 
						|
 | 
						|
    Redistribution and use in source and binary forms, with or without
 | 
						|
    modification, are permitted provided that the following conditions
 | 
						|
    are met:
 | 
						|
 | 
						|
      * Redistributions of source code must retain the above copyright
 | 
						|
        notice, this list of conditions and the following disclaimer.
 | 
						|
      * Redistributions in binary form must reproduce the above copyright
 | 
						|
        notice, this list of conditions and the following disclaimer in the
 | 
						|
        documentation and/or other materials provided with the distribution.
 | 
						|
      * Neither the name of Intel Corporation nor the names of its
 | 
						|
        contributors may be used to endorse or promote products derived
 | 
						|
        from this software without specific prior written permission.
 | 
						|
 | 
						|
    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 | 
						|
    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | 
						|
    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 | 
						|
    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 | 
						|
    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 | 
						|
    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 | 
						|
    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
						|
    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
						|
    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
						|
    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
						|
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
						|
*/
 | 
						|
 | 
						|
 | 
						|
#include "offload_engine.h"
 | 
						|
#include <signal.h>
 | 
						|
#include <errno.h>
 | 
						|
#include <sys/stat.h>
 | 
						|
#include <sys/types.h>
 | 
						|
 | 
						|
#include <algorithm>
 | 
						|
#include <vector>
 | 
						|
 | 
						|
#include "offload_host.h"
 | 
						|
#include "offload_table.h"
 | 
						|
#include "offload_iterator.h"
 | 
						|
 | 
						|
#if defined(HOST_WINNT)
 | 
						|
#define PATH_SEPARATOR ";"
 | 
						|
#else
 | 
						|
#define PATH_SEPARATOR ":"
 | 
						|
#endif
 | 
						|
 | 
						|
// Static members of Stream class must be described somewhere.
 | 
						|
// This members describe the list of all streams defined in programm
 | 
						|
// via call to _Offload_stream_create.
 | 
						|
uint64_t  Stream::m_streams_count = 0;
 | 
						|
StreamMap Stream::all_streams;
 | 
						|
mutex_t   Stream::m_stream_lock;
 | 
						|
char*     mic_library_path = 0;
 | 
						|
 | 
						|
const char* Engine::m_func_names[Engine::c_funcs_total] =
 | 
						|
{
 | 
						|
    "server_compute",
 | 
						|
#ifdef MYO_SUPPORT
 | 
						|
    "server_myoinit",
 | 
						|
    "server_myofini",
 | 
						|
#endif // MYO_SUPPORT
 | 
						|
    "server_init",
 | 
						|
    "server_var_table_size",
 | 
						|
    "server_var_table_copy",
 | 
						|
    "server_set_stream_affinity"
 | 
						|
};
 | 
						|
 | 
						|
// Symbolic representation of system signals. Fix for CQ233593
 | 
						|
const char* Engine::c_signal_names[Engine::c_signal_max] =
 | 
						|
{
 | 
						|
    "Unknown SIGNAL",
 | 
						|
    "SIGHUP",    /*  1, Hangup (POSIX).  */
 | 
						|
    "SIGINT",    /*  2, Interrupt (ANSI).  */
 | 
						|
    "SIGQUIT",   /*  3, Quit (POSIX).  */
 | 
						|
    "SIGILL",    /*  4, Illegal instruction (ANSI).  */
 | 
						|
    "SIGTRAP",   /*  5, Trace trap (POSIX).  */
 | 
						|
    "SIGABRT",   /*  6, Abort (ANSI).  */
 | 
						|
    "SIGBUS",    /*  7, BUS error (4.2 BSD).  */
 | 
						|
    "SIGFPE",    /*  8, Floating-point exception (ANSI).  */
 | 
						|
    "SIGKILL",   /*  9, Kill, unblockable (POSIX).  */
 | 
						|
    "SIGUSR1",   /* 10, User-defined signal 1 (POSIX).  */
 | 
						|
    "SIGSEGV",   /* 11, Segmentation violation (ANSI).  */
 | 
						|
    "SIGUSR2",   /* 12, User-defined signal 2 (POSIX).  */
 | 
						|
    "SIGPIPE",   /* 13, Broken pipe (POSIX).  */
 | 
						|
    "SIGALRM",   /* 14, Alarm clock (POSIX).  */
 | 
						|
    "SIGTERM",   /* 15, Termination (ANSI).  */
 | 
						|
    "SIGSTKFLT", /* 16, Stack fault.  */
 | 
						|
    "SIGCHLD",   /* 17, Child status has changed (POSIX).  */
 | 
						|
    "SIGCONT",   /* 18, Continue (POSIX).  */
 | 
						|
    "SIGSTOP",   /* 19, Stop, unblockable (POSIX).  */
 | 
						|
    "SIGTSTP",   /* 20, Keyboard stop (POSIX).  */
 | 
						|
    "SIGTTIN",   /* 21, Background read from tty (POSIX).  */
 | 
						|
    "SIGTTOU",   /* 22, Background write to tty (POSIX).  */
 | 
						|
    "SIGURG",    /* 23, Urgent condition on socket (4.2 BSD).  */
 | 
						|
    "SIGXCPU",   /* 24, CPU limit exceeded (4.2 BSD).  */
 | 
						|
    "SIGXFSZ",   /* 25, File size limit exceeded (4.2 BSD).  */
 | 
						|
    "SIGVTALRM", /* 26, Virtual alarm clock (4.2 BSD).  */
 | 
						|
    "SIGPROF",   /* 27, Profiling alarm clock (4.2 BSD).  */
 | 
						|
    "SIGWINCH",  /* 28, Window size change (4.3 BSD, Sun).  */
 | 
						|
    "SIGIO",     /* 29, I/O now possible (4.2 BSD).  */
 | 
						|
    "SIGPWR",    /* 30, Power failure restart (System V).  */
 | 
						|
    "SIGSYS"     /* 31, Bad system call.  */
 | 
						|
};
 | 
						|
 | 
						|
void Engine::init(void)
 | 
						|
{
 | 
						|
    if (!m_ready) {
 | 
						|
        mutex_locker_t locker(m_lock);
 | 
						|
 | 
						|
        if (!m_ready) {
 | 
						|
            // start process if not done yet
 | 
						|
            if (m_process == 0) {
 | 
						|
                init_process();
 | 
						|
            }
 | 
						|
 | 
						|
            // load penging images
 | 
						|
            load_libraries();
 | 
						|
 | 
						|
            // and (re)build pointer table
 | 
						|
            init_ptr_data();
 | 
						|
 | 
						|
            // it is ready now
 | 
						|
            m_ready = true;
 | 
						|
           
 | 
						|
            //  Inform the debugger 
 | 
						|
            if (__dbg_is_attached) {
 | 
						|
                __dbg_target_so_loaded();
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void Engine::print_stream_cpu_list(const char * str)
 | 
						|
{
 | 
						|
    int count = 0;
 | 
						|
    char buffer[1024];
 | 
						|
    CpuEl* cpu_el = m_cpu_head;
 | 
						|
       
 | 
						|
    OFFLOAD_DEBUG_TRACE(3,
 | 
						|
                  "%s : cpu list as Index(Count) for the streams is :\n", str);
 | 
						|
    buffer[0] = 0;
 | 
						|
    for (int i = 0; i < m_num_threads; i++) {
 | 
						|
         cpu_el = m_cpus + i;
 | 
						|
         if (m_assigned_cpus == 0 || (*m_assigned_cpus)[i]) {
 | 
						|
             count++;
 | 
						|
             sprintf(buffer + strlen(buffer), "%d(%d) ", CPU_INDEX(cpu_el), cpu_el->count);
 | 
						|
             if (count % 20 == 0) {
 | 
						|
                 OFFLOAD_DEBUG_TRACE(3, "%s\n", buffer);
 | 
						|
                 buffer[0] = 0;           
 | 
						|
             } 
 | 
						|
         }
 | 
						|
    }
 | 
						|
    if (count % 20 != 0) {
 | 
						|
        OFFLOAD_DEBUG_TRACE(3, "%s\n", buffer);
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void Engine::init_process(void)
 | 
						|
{
 | 
						|
    COIENGINE engine;
 | 
						|
    COIRESULT res;
 | 
						|
    const char **environ;
 | 
						|
    char buf[4096];  // For exe path name
 | 
						|
    char* mic_device_main = 0;
 | 
						|
 | 
						|
    // create environment for the target process
 | 
						|
    environ = (const char**) mic_env_vars.create_environ_for_card(m_index);
 | 
						|
    if (environ != 0) {
 | 
						|
        for (const char **p = environ; *p != 0; p++) {
 | 
						|
            OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    // Create execution context in the specified device
 | 
						|
    OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index,
 | 
						|
                        m_physical_index);
 | 
						|
    res = COI::EngineGetHandle(COI_ISA_MIC, m_physical_index, &engine);
 | 
						|
    check_result(res, c_get_engine_handle, m_index, res);
 | 
						|
 | 
						|
    // Get engine info on threads and cores.
 | 
						|
    // The values of core number and thread number will be used later at stream
 | 
						|
    // creation by call to _Offload_stream_create(device,number_of_cpus).
 | 
						|
 | 
						|
    COI_ENGINE_INFO engine_info;
 | 
						|
 | 
						|
    res = COI::EngineGetInfo(engine, sizeof(COI_ENGINE_INFO), &engine_info);
 | 
						|
    check_result(res, c_get_engine_info, m_index, res);
 | 
						|
    if (mic_library_path == 0 ) {
 | 
						|
       if (engine_info.ISA == COI_DEVICE_KNC) {
 | 
						|
          mic_library_path = knc_library_path;
 | 
						|
       }
 | 
						|
       else if (engine_info.ISA == COI_DEVICE_KNL) {
 | 
						|
          mic_library_path = knl_library_path;
 | 
						|
       }
 | 
						|
       else {
 | 
						|
          LIBOFFLOAD_ERROR(c_unknown_mic_device_type);
 | 
						|
       }
 | 
						|
    }
 | 
						|
 | 
						|
    // m_cpus is the list of all available threads.
 | 
						|
    // At the begining all threads made available through OFFLOAD_DEVICES
 | 
						|
    // or all threads existed at the engine if OFFLOAD_DEVICES isn't set.
 | 
						|
    // m_cpu_head points to the head of the m_cpus list.
 | 
						|
    // m_cpus is ordered by number of streams using the thread.
 | 
						|
    // m_cpu_head points to the least used thread.
 | 
						|
    // After creating and destroying a stream the m_cpus list must be fixed
 | 
						|
    // to be ordered.
 | 
						|
 | 
						|
    m_cpus = (CpuEl*)malloc(engine_info.NumThreads * sizeof(CpuEl));
 | 
						|
    if (m_cpus == NULL)
 | 
						|
        LIBOFFLOAD_ERROR(c_malloc);
 | 
						|
    memset(m_cpus, 0, engine_info.NumThreads * sizeof(CpuEl));
 | 
						|
    CpuEl* prev_cpu = NULL;
 | 
						|
 | 
						|
    for (int i = 0; i < engine_info.NumThreads; i++) {
 | 
						|
         if (m_assigned_cpus == 0 || (*m_assigned_cpus)[i]) {
 | 
						|
             if (prev_cpu) {
 | 
						|
                 prev_cpu->next = m_cpus + i;
 | 
						|
             }
 | 
						|
             else {
 | 
						|
                 m_cpu_head = m_cpus + i;
 | 
						|
             }
 | 
						|
             m_cpus[i].prev = prev_cpu;
 | 
						|
             m_cpus[i].count = 0;
 | 
						|
             prev_cpu = m_cpus + i;
 | 
						|
         }
 | 
						|
    }
 | 
						|
 | 
						|
    // The following values will be used at pipeline creation for streams
 | 
						|
    m_num_cores = engine_info.NumCores;
 | 
						|
    m_num_threads = engine_info.NumThreads;
 | 
						|
 | 
						|
    print_stream_cpu_list("init_process");
 | 
						|
 | 
						|
    // Check if OFFLOAD_DMA_CHANNEL_COUNT is set to 2
 | 
						|
    // Only the value 2 is supported in 16.0
 | 
						|
    if (mic_dma_channel_count == 2) {
 | 
						|
        if (COI::ProcessConfigureDMA) {
 | 
						|
            // Set DMA channels using COI API
 | 
						|
            COI::ProcessConfigureDMA(2, COI::DMA_MODE_READ_WRITE);
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            // Set environment variable COI_DMA_CHANNEL_COUNT
 | 
						|
            // use putenv instead of setenv as Windows has no setenv.
 | 
						|
            // Note: putenv requires its argument can't be freed or modified.
 | 
						|
            // So no free after call to putenv or elsewhere.
 | 
						|
            char * env_var = strdup("COI_DMA_CHANNEL_COUNT=2");
 | 
						|
            if (env_var == NULL)
 | 
						|
                LIBOFFLOAD_ERROR(c_malloc);
 | 
						|
            putenv(env_var);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    // Target executable is not available then use compiler provided offload_main
 | 
						|
    if (__target_exe == 0) {
 | 
						|
       // find target executable to be used if main application is not an
 | 
						|
       // offload build application.
 | 
						|
       const char *base_name = "offload_main";
 | 
						|
       if (mic_library_path != 0) {
 | 
						|
          char *buf = strdup(mic_library_path);
 | 
						|
          if (buf == NULL)
 | 
						|
              LIBOFFLOAD_ERROR(c_malloc);
 | 
						|
          char *try_name = (char*) alloca(strlen(mic_library_path) +
 | 
						|
                                          strlen(base_name) + 2);
 | 
						|
          char *dir, *ptr;
 | 
						|
 | 
						|
          for (dir = strtok_r(buf, PATH_SEPARATOR, &ptr); dir != 0;
 | 
						|
               dir = strtok_r(0, PATH_SEPARATOR, &ptr)) {
 | 
						|
              // compose a full path
 | 
						|
              sprintf(try_name, "%s/%s", dir, base_name);
 | 
						|
 | 
						|
              // check if such file exists
 | 
						|
              struct stat st;
 | 
						|
              if (stat(try_name, &st) == 0 && S_ISREG(st.st_mode)) {
 | 
						|
                  mic_device_main = strdup(try_name);
 | 
						|
                  if (mic_device_main == NULL)
 | 
						|
                      LIBOFFLOAD_ERROR(c_malloc);
 | 
						|
                  break;
 | 
						|
              }
 | 
						|
          }
 | 
						|
          free(buf);
 | 
						|
       }
 | 
						|
       if (mic_device_main == 0) {
 | 
						|
          LIBOFFLOAD_ERROR(c_report_no_target_exe, "offload_main");
 | 
						|
          exit(1);
 | 
						|
       }
 | 
						|
 | 
						|
       OFFLOAD_DEBUG_TRACE(2,
 | 
						|
           "Loading target executable %s\n",mic_device_main);
 | 
						|
 | 
						|
       res = COI::ProcessCreateFromFile(
 | 
						|
           engine,                 // in_Engine
 | 
						|
           mic_device_main,        // in_pBinaryName
 | 
						|
           0,                      // in_Argc
 | 
						|
           0,                      // in_ppArgv
 | 
						|
           environ == 0,           // in_DupEnv
 | 
						|
           environ,                // in_ppAdditionalEnv
 | 
						|
           mic_proxy_io,           // in_ProxyActive
 | 
						|
           mic_proxy_fs_root,      // in_ProxyfsRoot
 | 
						|
           mic_buffer_size,        // in_BufferSpace
 | 
						|
           mic_library_path,       // in_LibrarySearchPath
 | 
						|
           &m_process              // out_pProcess
 | 
						|
       );
 | 
						|
    }
 | 
						|
    else {
 | 
						|
    // Target executable should be available by the time when we
 | 
						|
    // attempt to initialize the device
 | 
						|
 | 
						|
       //  Need the full path of the FAT exe for VTUNE
 | 
						|
       {
 | 
						|
#ifndef TARGET_WINNT
 | 
						|
          ssize_t len = readlink("/proc/self/exe", buf,1000);
 | 
						|
#else
 | 
						|
          int len = GetModuleFileName(NULL, buf,1000);
 | 
						|
#endif // TARGET_WINNT
 | 
						|
          if  (len == -1) {
 | 
						|
             LIBOFFLOAD_ERROR(c_report_no_host_exe);
 | 
						|
             exit(1);
 | 
						|
          }
 | 
						|
          else if (len > 999) {
 | 
						|
             LIBOFFLOAD_ERROR(c_report_path_buff_overflow);
 | 
						|
             exit(1);
 | 
						|
          }
 | 
						|
          buf[len] = '\0';
 | 
						|
       }
 | 
						|
 | 
						|
       OFFLOAD_DEBUG_TRACE(2,
 | 
						|
           "Loading target executable \"%s\" from %p, size %lld, host file %s\n",
 | 
						|
           __target_exe->name, __target_exe->data, __target_exe->size,
 | 
						|
           buf);
 | 
						|
 | 
						|
       res = COI::ProcessCreateFromMemory(
 | 
						|
           engine,                 // in_Engine
 | 
						|
           __target_exe->name,     // in_pBinaryName
 | 
						|
           __target_exe->data,     // in_pBinaryBuffer
 | 
						|
           __target_exe->size,     // in_BinaryBufferLength,
 | 
						|
           0,                      // in_Argc
 | 
						|
           0,                      // in_ppArgv
 | 
						|
           environ == 0,           // in_DupEnv
 | 
						|
           environ,                // in_ppAdditionalEnv
 | 
						|
           mic_proxy_io,           // in_ProxyActive
 | 
						|
           mic_proxy_fs_root,      // in_ProxyfsRoot
 | 
						|
           mic_buffer_size,        // in_BufferSpace
 | 
						|
           mic_library_path,       // in_LibrarySearchPath
 | 
						|
           buf,                    // in_FileOfOrigin
 | 
						|
           -1,                     // in_FileOfOriginOffset use -1 to indicate to
 | 
						|
                                   // COI that is is a FAT binary
 | 
						|
           &m_process              // out_pProcess
 | 
						|
       );
 | 
						|
    }
 | 
						|
    check_result(res, c_process_create, m_index, res);
 | 
						|
 | 
						|
    if ((mic_4k_buffer_size != 0) || (mic_2m_buffer_size !=0)) {
 | 
						|
       // available only in MPSS 4.2 and greater
 | 
						|
       if (COI::ProcessSetCacheSize != 0 ) { 
 | 
						|
          int flags;
 | 
						|
          //  Need compiler to use MPSS 3.2 or greater to get these
 | 
						|
          // definition so currently hardcoding it
 | 
						|
          //  COI_CACHE_ACTION_GROW_NOW && COI_CACHE_MODE_ONDEMAND_SYNC;
 | 
						|
          flags = 0x00020002; 
 | 
						|
          res = COI::ProcessSetCacheSize(
 | 
						|
               m_process,             // in_Process
 | 
						|
               mic_2m_buffer_size,    // in_HugePagePoolSize
 | 
						|
               flags,                 // inHugeFlags
 | 
						|
               mic_4k_buffer_size,    // in_SmallPagePoolSize
 | 
						|
               flags,                 // inSmallFlags
 | 
						|
               0,                     // in_NumDependencies
 | 
						|
               0,                     // in_pDependencies
 | 
						|
               0                      // out_PCompletion
 | 
						|
          );
 | 
						|
          OFFLOAD_DEBUG_TRACE(2,
 | 
						|
              "Reserve target buffers 4K pages = %d  2M pages = %d\n",
 | 
						|
                  mic_4k_buffer_size, mic_2m_buffer_size);
 | 
						|
           check_result(res, c_process_set_cache_size, m_index, res);
 | 
						|
       }
 | 
						|
       else {
 | 
						|
             OFFLOAD_DEBUG_TRACE(2,
 | 
						|
                 "Reserve target buffers not supported in current MPSS\n");
 | 
						|
       }
 | 
						|
    }
 | 
						|
 | 
						|
    // get function handles
 | 
						|
    res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total,
 | 
						|
                                         m_func_names, m_funcs);
 | 
						|
    check_result(res, c_process_get_func_handles, m_index, res);
 | 
						|
 | 
						|
    // initialize device side
 | 
						|
    pid_t pid = init_device();
 | 
						|
 | 
						|
    // For IDB
 | 
						|
    if (__dbg_is_attached) {
 | 
						|
        // TODO: we have in-memory executable now.
 | 
						|
        // Check with IDB team what should we provide them now?
 | 
						|
        if (__target_exe == 0) {
 | 
						|
            strcpy(__dbg_target_exe_name, "offload_main");
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            if (strlen(__target_exe->name) < MAX_TARGET_NAME) {
 | 
						|
                strcpy(__dbg_target_exe_name, __target_exe->name);
 | 
						|
            }
 | 
						|
        }
 | 
						|
        __dbg_target_so_pid = pid;
 | 
						|
        __dbg_target_id = m_physical_index;
 | 
						|
       // The call to __dbg_target_so_loaded() is moved
 | 
						|
       // to Engine:init so all the libraries are loaded before
 | 
						|
       // informing debugger so debugger can access them.
 | 
						|
       // __dbg_target_so_loaded();
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void Engine::fini_process(bool verbose)
 | 
						|
{
 | 
						|
    if (m_process != 0) {
 | 
						|
        uint32_t sig;
 | 
						|
        int8_t ret;
 | 
						|
 | 
						|
        // destroy target process
 | 
						|
        OFFLOAD_DEBUG_TRACE(2, "Destroying process on the device %d\n",
 | 
						|
                            m_index);
 | 
						|
 | 
						|
        COIRESULT res = COI::ProcessDestroy(m_process, -1, 0, &ret, &sig);
 | 
						|
        m_process = 0;
 | 
						|
 | 
						|
        if (res == COI_SUCCESS) {
 | 
						|
            OFFLOAD_DEBUG_TRACE(3, "Device process: signal %d, exit code %d\n",
 | 
						|
                                sig, ret);
 | 
						|
            if (verbose) {
 | 
						|
                if (sig != 0) {
 | 
						|
                    LIBOFFLOAD_ERROR(
 | 
						|
                        c_mic_process_exit_sig, m_index, sig,
 | 
						|
                        c_signal_names[sig >= c_signal_max ? 0 : sig]);
 | 
						|
                }
 | 
						|
                else {
 | 
						|
                    LIBOFFLOAD_ERROR(c_mic_process_exit_ret, m_index, ret);
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            // for idb
 | 
						|
            if (__dbg_is_attached) {
 | 
						|
                __dbg_target_so_unloaded();
 | 
						|
            }
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            if (verbose) {
 | 
						|
                LIBOFFLOAD_ERROR(c_mic_process_exit, m_index);
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void Engine::load_libraries()
 | 
						|
{
 | 
						|
    // load libraries collected so far
 | 
						|
    for (TargetImageList::iterator it = m_images.begin();
 | 
						|
         it != m_images.end(); it++) {
 | 
						|
        OFFLOAD_DEBUG_TRACE(2,
 | 
						|
            "Loading library \"%s\" from %p, size %llu, host file %s\n",
 | 
						|
                                    it->name, it->data, it->size, it->origin);
 | 
						|
 | 
						|
        // load library to the device
 | 
						|
        COILIBRARY lib;
 | 
						|
        COIRESULT res;
 | 
						|
        res = COI::ProcessLoadLibraryFromMemory(m_process,
 | 
						|
                                                it->data,
 | 
						|
                                                it->size,
 | 
						|
                                                it->name,
 | 
						|
                                                mic_library_path,
 | 
						|
                                                it->origin,
 | 
						|
                                                (it->origin) ? -1 : 0,
 | 
						|
                                                COI_LOADLIBRARY_V1_FLAGS,
 | 
						|
                                                &lib);
 | 
						|
        m_dyn_libs.push_front(DynLib(it->name, it->data, lib));
 | 
						|
 | 
						|
        if (res != COI_SUCCESS && res != COI_ALREADY_EXISTS) {
 | 
						|
            check_result(res, c_load_library, it->origin, m_index, res);
 | 
						|
        }
 | 
						|
    }
 | 
						|
    m_images.clear();
 | 
						|
}
 | 
						|
 | 
						|
void Engine::unload_library(const void *data, const char *name)
 | 
						|
{
 | 
						|
    if (m_process == 0) {
 | 
						|
       return;
 | 
						|
    }
 | 
						|
    for (DynLibList::iterator it = m_dyn_libs.begin();
 | 
						|
         it != m_dyn_libs.end(); it++) {
 | 
						|
         if (it->data == data) {
 | 
						|
            COIRESULT res;
 | 
						|
            OFFLOAD_DEBUG_TRACE(2,
 | 
						|
               "Unloading library \"%s\"\n",name);
 | 
						|
            res = COI::ProcessUnloadLibrary(m_process,it->lib);
 | 
						|
            m_dyn_libs.erase(it);
 | 
						|
            if (res != COI_SUCCESS) {
 | 
						|
                check_result(res, c_unload_library, m_index, res);
 | 
						|
            }
 | 
						|
            return;
 | 
						|
         }
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static bool target_entry_cmp(
 | 
						|
    const VarList::BufEntry &l,
 | 
						|
    const VarList::BufEntry &r
 | 
						|
)
 | 
						|
{
 | 
						|
    const char *l_name = reinterpret_cast<const char*>(l.name);
 | 
						|
    const char *r_name = reinterpret_cast<const char*>(r.name);
 | 
						|
    return strcmp(l_name, r_name) < 0;
 | 
						|
}
 | 
						|
 | 
						|
static bool host_entry_cmp(
 | 
						|
    const VarTable::Entry *l,
 | 
						|
    const VarTable::Entry *r
 | 
						|
)
 | 
						|
{
 | 
						|
    return strcmp(l->name, r->name) < 0;
 | 
						|
}
 | 
						|
 | 
						|
void Engine::init_ptr_data(void)
 | 
						|
{
 | 
						|
    COIRESULT res;
 | 
						|
    COIEVENT event;
 | 
						|
 | 
						|
    // Prepare table of host entries
 | 
						|
    std::vector<const VarTable::Entry*> host_table(
 | 
						|
                                         Iterator(__offload_vars.get_head()),
 | 
						|
                                         Iterator());
 | 
						|
 | 
						|
    // no need to do anything further is host table is empty
 | 
						|
    if (host_table.size() <= 0) {
 | 
						|
        return;
 | 
						|
    }
 | 
						|
 | 
						|
    // Get var table entries from the target.
 | 
						|
    // First we need to get size for the buffer to copy data
 | 
						|
    struct {
 | 
						|
        int64_t nelems;
 | 
						|
        int64_t length;
 | 
						|
    } params;
 | 
						|
 | 
						|
    res = COI::PipelineRunFunction(get_pipeline(),
 | 
						|
                                   m_funcs[c_func_var_table_size],
 | 
						|
                                   0, 0, 0,
 | 
						|
                                   0, 0,
 | 
						|
                                   0, 0,
 | 
						|
                                   ¶ms, sizeof(params),
 | 
						|
                                   &event);
 | 
						|
    check_result(res, c_pipeline_run_func, m_index, res);
 | 
						|
 | 
						|
    res = COI::EventWait(1, &event, -1, 1, 0, 0);
 | 
						|
    check_result(res, c_event_wait, res);
 | 
						|
 | 
						|
    if (params.length == 0) {
 | 
						|
        return;
 | 
						|
    }
 | 
						|
 | 
						|
    // create buffer for target entries and copy data to host
 | 
						|
    COIBUFFER buffer;
 | 
						|
    res = COI::BufferCreate(params.length, COI_BUFFER_NORMAL, 0, 0, 1,
 | 
						|
                            &m_process, &buffer);
 | 
						|
    check_result(res, c_buf_create, m_index, res);
 | 
						|
 | 
						|
    COI_ACCESS_FLAGS flags = COI_SINK_WRITE;
 | 
						|
    res = COI::PipelineRunFunction(get_pipeline(),
 | 
						|
                                   m_funcs[c_func_var_table_copy],
 | 
						|
                                   1, &buffer, &flags,
 | 
						|
                                   0, 0,
 | 
						|
                                   ¶ms.nelems, sizeof(params.nelems),
 | 
						|
                                   0, 0,
 | 
						|
                                   &event);
 | 
						|
    check_result(res, c_pipeline_run_func, m_index, res);
 | 
						|
 | 
						|
    res = COI::EventWait(1, &event, -1, 1, 0, 0);
 | 
						|
    check_result(res, c_event_wait, res);
 | 
						|
 | 
						|
    // patch names in target data
 | 
						|
    VarList::BufEntry *target_table;
 | 
						|
    COIMAPINSTANCE map_inst;
 | 
						|
    res = COI::BufferMap(buffer, 0, params.length, COI_MAP_READ_ONLY, 0, 0,
 | 
						|
                         0, &map_inst,
 | 
						|
                         reinterpret_cast<void**>(&target_table));
 | 
						|
    check_result(res, c_buf_map, res);
 | 
						|
 | 
						|
    VarList::table_patch_names(target_table, params.nelems);
 | 
						|
 | 
						|
    // and sort entries
 | 
						|
    std::sort(target_table, target_table + params.nelems, target_entry_cmp);
 | 
						|
    std::sort(host_table.begin(), host_table.end(), host_entry_cmp);
 | 
						|
 | 
						|
    // merge host and target entries and enter matching vars map
 | 
						|
    std::vector<const VarTable::Entry*>::const_iterator hi =
 | 
						|
        host_table.begin();
 | 
						|
    std::vector<const VarTable::Entry*>::const_iterator he =
 | 
						|
        host_table.end();
 | 
						|
    const VarList::BufEntry *ti = target_table;
 | 
						|
    const VarList::BufEntry *te = target_table + params.nelems;
 | 
						|
 | 
						|
    while (hi != he && ti != te) {
 | 
						|
        int res = strcmp((*hi)->name, reinterpret_cast<const char*>(ti->name));
 | 
						|
        if (res == 0) {
 | 
						|
            bool is_new;
 | 
						|
            // add matching entry to var map
 | 
						|
            PtrData *ptr = insert_ptr_data((*hi)->addr, (*hi)->size, is_new);
 | 
						|
 | 
						|
            // store address for new entries
 | 
						|
            if (is_new) {
 | 
						|
                ptr->mic_addr = ti->addr;
 | 
						|
                ptr->is_static = true;
 | 
						|
                ptr->var_alloc_type = (*hi)->var_alloc_type;
 | 
						|
            }
 | 
						|
            ptr->alloc_ptr_data_lock.unlock();
 | 
						|
            hi++;
 | 
						|
            ti++;
 | 
						|
        }
 | 
						|
        else if (res < 0) {
 | 
						|
            hi++;
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            ti++;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    // cleanup
 | 
						|
    res = COI::BufferUnmap(map_inst, 0, 0, 0);
 | 
						|
    check_result(res, c_buf_unmap, res);
 | 
						|
 | 
						|
    res = COI::BufferDestroy(buffer);
 | 
						|
    check_result(res, c_buf_destroy, res);
 | 
						|
}
 | 
						|
 | 
						|
COIRESULT Engine::compute(
 | 
						|
    _Offload_stream stream,
 | 
						|
    const std::list<COIBUFFER> &buffers,
 | 
						|
    const void*         data,
 | 
						|
    uint16_t            data_size,
 | 
						|
    void*               ret,
 | 
						|
    uint16_t            ret_size,
 | 
						|
    uint32_t            num_deps,
 | 
						|
    const COIEVENT*     deps,
 | 
						|
    COIEVENT*           event
 | 
						|
) /* const */
 | 
						|
{
 | 
						|
    COIBUFFER *bufs;
 | 
						|
    COI_ACCESS_FLAGS *flags;
 | 
						|
    COIRESULT res;
 | 
						|
 | 
						|
    // convert buffers list to array
 | 
						|
    int num_bufs = buffers.size();
 | 
						|
    if (num_bufs > 0) {
 | 
						|
        bufs = (COIBUFFER*) alloca(num_bufs * sizeof(COIBUFFER));
 | 
						|
        flags = (COI_ACCESS_FLAGS*) alloca(num_bufs *
 | 
						|
                                           sizeof(COI_ACCESS_FLAGS));
 | 
						|
 | 
						|
        int i = 0;
 | 
						|
        for (std::list<COIBUFFER>::const_iterator it = buffers.begin();
 | 
						|
             it != buffers.end(); it++) {
 | 
						|
            bufs[i] = *it;
 | 
						|
 | 
						|
            // TODO: this should be fixed
 | 
						|
            flags[i++] = COI_SINK_WRITE;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        bufs = 0;
 | 
						|
        flags = 0;
 | 
						|
    }
 | 
						|
    COIPIPELINE pipeline = (stream == no_stream) ?
 | 
						|
                           get_pipeline() :
 | 
						|
                           get_pipeline(stream);
 | 
						|
    // start computation
 | 
						|
    res = COI::PipelineRunFunction(pipeline,
 | 
						|
                                   m_funcs[c_func_compute],
 | 
						|
                                   num_bufs, bufs, flags,
 | 
						|
                                   num_deps, deps,
 | 
						|
                                   data, data_size,
 | 
						|
                                   ret, ret_size,
 | 
						|
                                   event);
 | 
						|
    return res;
 | 
						|
}
 | 
						|
 | 
						|
pid_t Engine::init_device(void)
 | 
						|
{
 | 
						|
    struct init_data {
 | 
						|
        int  device_index;
 | 
						|
        int  devices_total;
 | 
						|
        int  console_level;
 | 
						|
        int  offload_report_level;
 | 
						|
    } data;
 | 
						|
    COIRESULT res;
 | 
						|
    COIEVENT event;
 | 
						|
    pid_t pid;
 | 
						|
 | 
						|
    OFFLOAD_DEBUG_TRACE_1(2, 0, c_offload_init,
 | 
						|
                          "Initializing device with logical index %d "
 | 
						|
                          "and physical index %d\n",
 | 
						|
                           m_index, m_physical_index);
 | 
						|
 | 
						|
    // setup misc data
 | 
						|
    data.device_index = m_index;
 | 
						|
    data.devices_total = mic_engines_total;
 | 
						|
    data.console_level = console_enabled;
 | 
						|
    data.offload_report_level = offload_report_level;
 | 
						|
 | 
						|
    res = COI::PipelineRunFunction(get_pipeline(),
 | 
						|
                                   m_funcs[c_func_init],
 | 
						|
                                   0, 0, 0, 0, 0,
 | 
						|
                                   &data, sizeof(data),
 | 
						|
                                   &pid, sizeof(pid),
 | 
						|
                                   &event);
 | 
						|
    check_result(res, c_pipeline_run_func, m_index, res);
 | 
						|
 | 
						|
    res = COI::EventWait(1, &event, -1, 1, 0, 0);
 | 
						|
    check_result(res, c_event_wait, res);
 | 
						|
 | 
						|
    OFFLOAD_DEBUG_TRACE(2, "Device process pid is %d\n", pid);
 | 
						|
 | 
						|
    return pid;
 | 
						|
}
 | 
						|
 | 
						|
// data associated with each thread
 | 
						|
struct Thread {
 | 
						|
    Thread(long* addr_coipipe_counter) {
 | 
						|
        m_addr_coipipe_counter = addr_coipipe_counter;
 | 
						|
        memset(m_pipelines, 0, sizeof(m_pipelines));
 | 
						|
    }
 | 
						|
 | 
						|
    ~Thread() {
 | 
						|
#ifndef TARGET_WINNT
 | 
						|
        __sync_sub_and_fetch(m_addr_coipipe_counter, 1);
 | 
						|
#else // TARGET_WINNT
 | 
						|
        _InterlockedDecrement(m_addr_coipipe_counter);
 | 
						|
#endif // TARGET_WINNT
 | 
						|
        for (int i = 0; i < mic_engines_total; i++) {
 | 
						|
            if (m_pipelines[i] != 0) {
 | 
						|
                COI::PipelineDestroy(m_pipelines[i]);
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    COIPIPELINE get_pipeline(int index) const {
 | 
						|
        return m_pipelines[index];
 | 
						|
    }
 | 
						|
 | 
						|
    void set_pipeline(int index, COIPIPELINE pipeline) {
 | 
						|
        m_pipelines[index] = pipeline;
 | 
						|
    }
 | 
						|
 | 
						|
    AutoSet& get_auto_vars() {
 | 
						|
        return m_auto_vars;
 | 
						|
    }
 | 
						|
 | 
						|
private:
 | 
						|
    long*       m_addr_coipipe_counter;
 | 
						|
    AutoSet     m_auto_vars;
 | 
						|
    COIPIPELINE m_pipelines[MIC_ENGINES_MAX];
 | 
						|
};
 | 
						|
 | 
						|
COIPIPELINE Engine::get_pipeline(void)
 | 
						|
{
 | 
						|
    Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
 | 
						|
    if (thread == 0) {
 | 
						|
        thread = new Thread(&m_proc_number);
 | 
						|
        thread_setspecific(mic_thread_key, thread);
 | 
						|
    }
 | 
						|
 | 
						|
    COIPIPELINE pipeline = thread->get_pipeline(m_index);
 | 
						|
    if (pipeline == 0) {
 | 
						|
        COIRESULT res;
 | 
						|
        int proc_num;
 | 
						|
 | 
						|
#ifndef TARGET_WINNT
 | 
						|
        proc_num = __sync_fetch_and_add(&m_proc_number, 1);
 | 
						|
#else // TARGET_WINNT
 | 
						|
        proc_num = _InterlockedIncrement(&m_proc_number);
 | 
						|
#endif // TARGET_WINNT
 | 
						|
 | 
						|
        if (proc_num > COI_PIPELINE_MAX_PIPELINES) {
 | 
						|
            LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES);
 | 
						|
            LIBOFFLOAD_ABORT;
 | 
						|
        }
 | 
						|
 | 
						|
        // Create pipeline for this thread
 | 
						|
        if (m_assigned_cpus == 0) {
 | 
						|
            // If m_assigned_cpus is NULL, it implies all threads
 | 
						|
            // Create the pipeline with no CPU mask
 | 
						|
            res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline);
 | 
						|
        } else {
 | 
						|
            // Create COI CPU mask
 | 
						|
            COI_CPU_MASK  in_Mask;
 | 
						|
            res = COI::PipelineClearCPUMask(in_Mask);
 | 
						|
            check_result(res, c_clear_cpu_mask, m_index, res);
 | 
						|
 | 
						|
            int threads_per_core = m_num_threads / m_num_cores;
 | 
						|
 | 
						|
            // Available threads are defined by examining of m_assigned_cpus bitset.
 | 
						|
            // We skip thread 0.
 | 
						|
            for (int i = 1; i < m_num_threads; i++) {
 | 
						|
                // For available thread i m_assigned_cpus[i] is equal to 1
 | 
						|
                if ((*m_assigned_cpus)[i]) {
 | 
						|
                    COI_CPU_MASK_SET(i, in_Mask);
 | 
						|
                }
 | 
						|
            }
 | 
						|
            OFFLOAD_DEBUG_TRACE(2, "COIPipelineCreate Mask for this CPU thread\n"
 | 
						|
                               "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n"
 | 
						|
                               "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n",
 | 
						|
                               in_Mask[0], in_Mask[1], in_Mask[2], in_Mask[3],
 | 
						|
                               in_Mask[4], in_Mask[5], in_Mask[6], in_Mask[7],
 | 
						|
                               in_Mask[8], in_Mask[9], in_Mask[10], in_Mask[11],
 | 
						|
                               in_Mask[12], in_Mask[13], in_Mask[14], in_Mask[15]);
 | 
						|
 | 
						|
            // Create the pipeline with allowable CPUs
 | 
						|
            res = COI::PipelineCreate(m_process, in_Mask, mic_stack_size, &pipeline);
 | 
						|
        }
 | 
						|
        check_result(res, c_pipeline_create, m_index, res);
 | 
						|
        thread->set_pipeline(m_index, pipeline);
 | 
						|
    }
 | 
						|
    return pipeline;
 | 
						|
}
 | 
						|
 | 
						|
Stream* Stream::find_stream(uint64_t handle, bool remove)
 | 
						|
{
 | 
						|
    Stream *stream = 0;
 | 
						|
 | 
						|
    m_stream_lock.lock();
 | 
						|
    {
 | 
						|
        StreamMap::iterator it = all_streams.find(handle);
 | 
						|
        if (it != all_streams.end()) {
 | 
						|
            stream = it->second;
 | 
						|
            if (remove) {
 | 
						|
                all_streams.erase(it);
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
    m_stream_lock.unlock();
 | 
						|
    return stream;
 | 
						|
}
 | 
						|
 | 
						|
void Engine::move_cpu_el_after(CpuEl* cpu_what, CpuEl* cpu_after)
 | 
						|
{
 | 
						|
    if (cpu_what == cpu_after) {
 | 
						|
        return;
 | 
						|
    }
 | 
						|
    CpuEl* cpu_prev = cpu_what->prev;
 | 
						|
 | 
						|
    // remove cpu_what
 | 
						|
    if (!cpu_prev) {
 | 
						|
        m_cpu_head = cpu_what->next;
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        cpu_prev->next = cpu_what->next;
 | 
						|
    }
 | 
						|
    if (cpu_what->next) {
 | 
						|
        cpu_what->next->prev = cpu_prev;
 | 
						|
    }
 | 
						|
 | 
						|
    // insert cpu_what after cpu_after
 | 
						|
    cpu_what->prev = cpu_after;
 | 
						|
    cpu_what->next = cpu_after->next;
 | 
						|
    if (cpu_after->next) {
 | 
						|
        cpu_after->next->prev = cpu_what;
 | 
						|
    }
 | 
						|
    cpu_after->next = cpu_what;
 | 
						|
}
 | 
						|
 | 
						|
COIPIPELINE Engine::get_pipeline(_Offload_stream handle)
 | 
						|
{
 | 
						|
    Stream * stream = Stream::find_stream(handle, false);
 | 
						|
 | 
						|
    if (!stream) {
 | 
						|
        LIBOFFLOAD_ERROR(c_offload_no_stream, m_index);
 | 
						|
        LIBOFFLOAD_ABORT;
 | 
						|
    }
 | 
						|
 | 
						|
    COIPIPELINE pipeline = stream->get_pipeline();
 | 
						|
 | 
						|
    if (pipeline == 0) {
 | 
						|
        COIRESULT     res;
 | 
						|
        int           proc_num;
 | 
						|
        COI_CPU_MASK  in_Mask ;
 | 
						|
 | 
						|
#ifndef TARGET_WINNT
 | 
						|
        proc_num = __sync_fetch_and_add(&m_proc_number, 1);
 | 
						|
#else // TARGET_WINNT
 | 
						|
        proc_num = _InterlockedIncrement(&m_proc_number);
 | 
						|
#endif // TARGET_WINNT
 | 
						|
 | 
						|
        if (proc_num > COI_PIPELINE_MAX_PIPELINES) {
 | 
						|
            LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES);
 | 
						|
            LIBOFFLOAD_ABORT;
 | 
						|
        }
 | 
						|
 | 
						|
        m_stream_lock.lock();
 | 
						|
 | 
						|
        // start process if not done yet
 | 
						|
        if (m_process == 0) {
 | 
						|
            init_process();
 | 
						|
        }
 | 
						|
 | 
						|
        // create CPUmask
 | 
						|
        res = COI::PipelineClearCPUMask(in_Mask);
 | 
						|
        check_result(res, c_clear_cpu_mask, m_index, res);
 | 
						|
 | 
						|
        int stream_cpu_num = stream->get_cpu_number();
 | 
						|
 | 
						|
        stream->m_stream_cpus.reset();
 | 
						|
 | 
						|
        int threads_per_core = m_num_threads / m_num_cores;
 | 
						|
 | 
						|
 | 
						|
        // Available threads is taken from m_cpus list.
 | 
						|
        // m_cpu_head points to the head of m_cpus.
 | 
						|
        // the elements of m_cpus is ordered by the number of usage in streams.
 | 
						|
 | 
						|
        CpuEl *cpu_el = m_cpu_head;
 | 
						|
        CpuEl *cpu_used_el, *cpu_used_prev, *cpu_prev;
 | 
						|
 | 
						|
        for (int i = 0; i < stream_cpu_num; i++) {
 | 
						|
            COI_CPU_MASK_SET(CPU_INDEX(cpu_el), in_Mask);
 | 
						|
            stream->m_stream_cpus.set(CPU_INDEX(cpu_el));
 | 
						|
            //If the number of availabale threads is less than stream_cpu_num,
 | 
						|
            // the stream_cpu_num is restricted to this number.
 | 
						|
            if (!cpu_el->next) {
 | 
						|
                break;
 | 
						|
            }
 | 
						|
            if (i + 1 < stream_cpu_num) {
 | 
						|
                cpu_el = cpu_el->next;
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        // assertion : cpu_el points to the last used thread
 | 
						|
        cpu_used_el = cpu_el;
 | 
						|
        while (cpu_used_el) {
 | 
						|
            cpu_used_el->count++;
 | 
						|
            cpu_el = cpu_prev = cpu_used_el;
 | 
						|
            cpu_used_prev = cpu_used_el->prev;
 | 
						|
            if (!cpu_el->next) {
 | 
						|
                cpu_used_el = cpu_used_prev;
 | 
						|
                continue;
 | 
						|
            }
 | 
						|
            
 | 
						|
            while (cpu_el) {
 | 
						|
                if (cpu_used_el->count < cpu_el->count) {
 | 
						|
                    break;
 | 
						|
                }
 | 
						|
                // Equal used threads are ordered by thread number to
 | 
						|
                // assign to a stream as contiguous threads as possible.
 | 
						|
                else if (cpu_used_el->count == cpu_el->count &&
 | 
						|
                         CPU_INDEX(cpu_used_el) <  CPU_INDEX(cpu_el)) {
 | 
						|
                     break;               
 | 
						|
                }
 | 
						|
                cpu_prev = cpu_el;
 | 
						|
                cpu_el = cpu_el->next;
 | 
						|
            }
 | 
						|
            if (cpu_used_el != cpu_prev) {
 | 
						|
                move_cpu_el_after(cpu_used_el, cpu_prev);
 | 
						|
            }
 | 
						|
            cpu_used_el = cpu_used_prev;
 | 
						|
        }
 | 
						|
        print_stream_cpu_list("get_pipeline");
 | 
						|
 | 
						|
        // create pipeline for this thread
 | 
						|
        OFFLOAD_DEBUG_TRACE(2, "COIPipelineCreate Mask for this Stream\n"
 | 
						|
                               "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n"
 | 
						|
                               "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n",
 | 
						|
                               in_Mask[0], in_Mask[1], in_Mask[2], in_Mask[3],
 | 
						|
                               in_Mask[4], in_Mask[5], in_Mask[6], in_Mask[7],
 | 
						|
                               in_Mask[8], in_Mask[9], in_Mask[10], in_Mask[11],
 | 
						|
                               in_Mask[12], in_Mask[13], in_Mask[14], in_Mask[15]);
 | 
						|
        res = COI::PipelineCreate(m_process, in_Mask,
 | 
						|
                                  mic_stack_size, &pipeline);
 | 
						|
        check_result(res, c_pipeline_create, m_index, res);
 | 
						|
 | 
						|
        // Set stream's affinities
 | 
						|
        {
 | 
						|
            struct affinity_spec affinity_spec;
 | 
						|
            char* affinity_type;
 | 
						|
            int i;
 | 
						|
 | 
						|
            // "compact" by default
 | 
						|
            affinity_spec.affinity_type = affinity_compact;
 | 
						|
 | 
						|
            // Check if user has specified type of affinity
 | 
						|
            if ((affinity_type = getenv("OFFLOAD_STREAM_AFFINITY")) !=
 | 
						|
                                        NULL)
 | 
						|
            {
 | 
						|
                char affinity_str[16];
 | 
						|
                int affinity_str_len;
 | 
						|
 | 
						|
                OFFLOAD_DEBUG_TRACE(2,
 | 
						|
                    "User has specified OFFLOAD_STREAM_AFFINITY=%s\n",
 | 
						|
                    affinity_type);
 | 
						|
 | 
						|
                // Set type of affinity requested
 | 
						|
                affinity_str_len = strlen(affinity_type);
 | 
						|
                for (i=0; i<affinity_str_len && i<15; i++)
 | 
						|
                {
 | 
						|
                    affinity_str[i] = tolower(affinity_type[i]);
 | 
						|
                }
 | 
						|
                affinity_str[i] = '\0';
 | 
						|
                if (strcmp(affinity_str, "compact") == 0) {
 | 
						|
                    affinity_spec.affinity_type = affinity_compact;
 | 
						|
                    OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n");
 | 
						|
                } else if (strcmp(affinity_str, "scatter") == 0) {
 | 
						|
                    affinity_spec.affinity_type = affinity_scatter;
 | 
						|
                    OFFLOAD_DEBUG_TRACE(2, "Setting affinity=scatter\n");
 | 
						|
                } else {
 | 
						|
                    LIBOFFLOAD_ERROR(c_incorrect_affinity, affinity_str);
 | 
						|
                    affinity_spec.affinity_type = affinity_compact;
 | 
						|
                    OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n");
 | 
						|
                }
 | 
						|
            }
 | 
						|
            // Make flat copy of sink mask because COI's mask is opaque
 | 
						|
            for (i=0; i<16; i++) {
 | 
						|
                affinity_spec.sink_mask[i] = in_Mask[i];
 | 
						|
            }
 | 
						|
            // Set number of cores and threads
 | 
						|
            affinity_spec.num_cores = m_num_cores;
 | 
						|
            affinity_spec.num_threads = m_num_threads;
 | 
						|
 | 
						|
            COIEVENT event;
 | 
						|
            res = COI::PipelineRunFunction(pipeline,
 | 
						|
                                   m_funcs[c_func_set_stream_affinity],
 | 
						|
                                   0, 0, 0,
 | 
						|
                                   0, 0,
 | 
						|
                                   &affinity_spec, sizeof(affinity_spec),
 | 
						|
                                   0, 0,
 | 
						|
                                   &event);
 | 
						|
            check_result(res, c_pipeline_run_func, m_index, res);
 | 
						|
    
 | 
						|
            res = COI::EventWait(1, &event, -1, 1, 0, 0);
 | 
						|
            check_result(res, c_event_wait, res);
 | 
						|
        }
 | 
						|
 | 
						|
        m_stream_lock.unlock();
 | 
						|
        stream->set_pipeline(pipeline);
 | 
						|
    }
 | 
						|
    return pipeline;
 | 
						|
}
 | 
						|
 | 
						|
void Engine::stream_destroy(_Offload_stream handle)
 | 
						|
{
 | 
						|
    // get stream
 | 
						|
    Stream * stream =  Stream::find_stream(handle, true);
 | 
						|
 | 
						|
    if (stream) {
 | 
						|
        // return cpus for future use
 | 
						|
        for (int i = 0; i < m_num_threads; i++) {
 | 
						|
            if (stream->m_stream_cpus.test(i)) {
 | 
						|
                CpuEl *cpu_el = m_cpus + i;
 | 
						|
                CpuEl *cpu_first_el = cpu_el;
 | 
						|
                // decrease count of thread "i" and move its CpuEl to the
 | 
						|
                // proper place into the ordered list
 | 
						|
                cpu_el->count--;
 | 
						|
                while (cpu_el->prev) {
 | 
						|
                    if (cpu_first_el->count > cpu_el->prev->count) {
 | 
						|
                        break;
 | 
						|
                    }
 | 
						|
                    else if (cpu_first_el->count == cpu_el->prev->count &&
 | 
						|
                             CPU_INDEX(cpu_first_el) > CPU_INDEX(cpu_el->prev)) {
 | 
						|
                        break;
 | 
						|
                    }
 | 
						|
                    cpu_el = cpu_el->prev;
 | 
						|
                }
 | 
						|
                cpu_el = cpu_el->prev;
 | 
						|
                // If cpu_el for thread "i" must be moved in the list
 | 
						|
                if (cpu_first_el != cpu_el) {
 | 
						|
                    // Thread "i" is used the least times. It must be set as
 | 
						|
                    // the m_cpu_head.
 | 
						|
                    if (!cpu_el) {
 | 
						|
                        if (!cpu_first_el->prev) {
 | 
						|
                            continue;
 | 
						|
                        }
 | 
						|
                        // remove cpu_el.
 | 
						|
                        cpu_first_el->prev->next = cpu_first_el->next;
 | 
						|
                        if (cpu_first_el->next) {
 | 
						|
                            cpu_first_el->next->prev = cpu_first_el->prev;
 | 
						|
                        }
 | 
						|
                        // make cpu_first_el as new m_cpu_head
 | 
						|
                        cpu_first_el->prev = NULL;
 | 
						|
                        cpu_first_el->next = m_cpu_head;
 | 
						|
                        m_cpu_head->prev = cpu_first_el;
 | 
						|
                        m_cpu_head = cpu_first_el;
 | 
						|
                    }
 | 
						|
                    else {
 | 
						|
                        move_cpu_el_after(cpu_first_el, cpu_el);
 | 
						|
                    }
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
        print_stream_cpu_list("stream_destroy");        
 | 
						|
        delete stream;
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        LIBOFFLOAD_ERROR(c_offload_no_stream, m_index);
 | 
						|
        LIBOFFLOAD_ABORT;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
uint64_t Engine::get_thread_id(void)
 | 
						|
{
 | 
						|
    Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
 | 
						|
    if (thread == 0) {
 | 
						|
        thread = new Thread(&m_proc_number);
 | 
						|
        thread_setspecific(mic_thread_key, thread);
 | 
						|
    }
 | 
						|
 | 
						|
    return reinterpret_cast<uint64_t>(thread);
 | 
						|
}
 | 
						|
 | 
						|
AutoSet& Engine::get_auto_vars(void)
 | 
						|
{
 | 
						|
    Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
 | 
						|
    if (thread == 0) {
 | 
						|
        thread = new Thread(&m_proc_number);
 | 
						|
        thread_setspecific(mic_thread_key, thread);
 | 
						|
    }
 | 
						|
 | 
						|
    return thread->get_auto_vars();
 | 
						|
}
 | 
						|
 | 
						|
void Engine::destroy_thread_data(void *data)
 | 
						|
{
 | 
						|
    delete static_cast<Thread*>(data);
 | 
						|
}
 |