mirror of git://gcc.gnu.org/git/gcc.git
Move gcn-run heap into GPU memory.
2019-11-13 Andrew Stubbs <ams@codesourcery.com> gcc/ * config/gcn/gcn-run.c (heap_region): New global variable. (struct hsa_runtime_fn_info): Add hsa_memory_assign_agent_fn. (init_hsa_runtime_functions): Initialize hsa_memory_assign_agent. (get_kernarg_region): Move contents to .... (get_memory_region): .... here. (get_heap_region): New function. (init_device): Initialize the heap_region. (device_malloc): Add region parameter. (struct kernargs): Move heap .... (heap): ... to global scope. (main): Allocate heap separate to kernargs. From-SVN: r278131
This commit is contained in:
parent
2dbad62da9
commit
e3d0ee4acf
|
@ -1,3 +1,17 @@
|
|||
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
* config/gcn/gcn-run.c (heap_region): New global variable.
|
||||
(struct hsa_runtime_fn_info): Add hsa_memory_assign_agent_fn.
|
||||
(init_hsa_runtime_functions): Initialize hsa_memory_assign_agent.
|
||||
(get_kernarg_region): Move contents to ....
|
||||
(get_memory_region): .... here.
|
||||
(get_heap_region): New function.
|
||||
(init_device): Initialize the heap_region.
|
||||
(device_malloc): Add region parameter.
|
||||
(struct kernargs): Move heap ....
|
||||
(heap): ... to global scope.
|
||||
(main): Allocate heap separate to kernargs.
|
||||
|
||||
2019-11-13 Jan Hubicka <hubicka@ucw.cz>
|
||||
|
||||
* ipa-prop.c (ipa_print_node_jump_functions,
|
||||
|
|
|
@ -72,6 +72,7 @@ uint64_t main_kernel = 0;
|
|||
hsa_executable_t executable = { 0 };
|
||||
|
||||
hsa_region_t kernargs_region = { 0 };
|
||||
hsa_region_t heap_region = { 0 };
|
||||
uint32_t kernarg_segment_size = 0;
|
||||
uint32_t group_segment_size = 0;
|
||||
uint32_t private_segment_size = 0;
|
||||
|
@ -135,6 +136,8 @@ struct hsa_runtime_fn_info
|
|||
hsa_signal_t *signal);
|
||||
hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size,
|
||||
void **ptr);
|
||||
hsa_status_t (*hsa_memory_assign_agent_fn) (void *ptr, hsa_agent_t agent,
|
||||
hsa_access_permission_t access);
|
||||
hsa_status_t (*hsa_memory_copy_fn) (void *dst, const void *src,
|
||||
size_t size);
|
||||
hsa_status_t (*hsa_memory_free_fn) (void *ptr);
|
||||
|
@ -204,6 +207,7 @@ init_hsa_runtime_functions (void)
|
|||
DLSYM_FN (hsa_executable_freeze)
|
||||
DLSYM_FN (hsa_signal_create)
|
||||
DLSYM_FN (hsa_memory_allocate)
|
||||
DLSYM_FN (hsa_memory_assign_agent)
|
||||
DLSYM_FN (hsa_memory_copy)
|
||||
DLSYM_FN (hsa_memory_free)
|
||||
DLSYM_FN (hsa_signal_destroy)
|
||||
|
@ -282,7 +286,8 @@ get_gpu_agent (hsa_agent_t agent, void *data __attribute__ ((unused)))
|
|||
suitable one has been found. */
|
||||
|
||||
static hsa_status_t
|
||||
get_kernarg_region (hsa_region_t region, void *data __attribute__ ((unused)))
|
||||
get_memory_region (hsa_region_t region, hsa_region_t *retval,
|
||||
hsa_region_global_flag_t kind)
|
||||
{
|
||||
/* Reject non-global regions. */
|
||||
hsa_region_segment_t segment;
|
||||
|
@ -294,9 +299,9 @@ get_kernarg_region (hsa_region_t region, void *data __attribute__ ((unused)))
|
|||
hsa_region_global_flag_t flags;
|
||||
hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS,
|
||||
&flags);
|
||||
if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
|
||||
if (flags & kind)
|
||||
{
|
||||
kernargs_region = region;
|
||||
*retval = region;
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
|
||||
|
@ -304,6 +309,20 @@ get_kernarg_region (hsa_region_t region, void *data __attribute__ ((unused)))
|
|||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static hsa_status_t
|
||||
get_kernarg_region (hsa_region_t region, void *data __attribute__((unused)))
|
||||
{
|
||||
return get_memory_region (region, &kernargs_region,
|
||||
HSA_REGION_GLOBAL_FLAG_KERNARG);
|
||||
}
|
||||
|
||||
static hsa_status_t
|
||||
get_heap_region (hsa_region_t region, void *data __attribute__((unused)))
|
||||
{
|
||||
return get_memory_region (region, &heap_region,
|
||||
HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED);
|
||||
}
|
||||
|
||||
/* Initialize the HSA Runtime library and GPU device. */
|
||||
|
||||
static void
|
||||
|
@ -338,6 +357,13 @@ init_device ()
|
|||
NULL),
|
||||
status == HSA_STATUS_SUCCESS || status == HSA_STATUS_INFO_BREAK,
|
||||
"Locate kernargs memory");
|
||||
|
||||
/* Select a memory region for the kernel heap.
|
||||
The call-back function, get_heap_region, does the selection. */
|
||||
XHSA_CMP (hsa_fns.hsa_agent_iterate_regions_fn (device, get_heap_region,
|
||||
NULL),
|
||||
status == HSA_STATUS_SUCCESS || status == HSA_STATUS_INFO_BREAK,
|
||||
"Locate device memory");
|
||||
}
|
||||
|
||||
|
||||
|
@ -593,10 +619,10 @@ found_main:;
|
|||
__flat_scalar GCN address space). */
|
||||
|
||||
static void *
|
||||
device_malloc (size_t size)
|
||||
device_malloc (size_t size, hsa_region_t region)
|
||||
{
|
||||
void *result;
|
||||
XHSA (hsa_fns.hsa_memory_allocate_fn (kernargs_region, size, &result),
|
||||
XHSA (hsa_fns.hsa_memory_allocate_fn (region, size, &result),
|
||||
"Allocate device memory");
|
||||
return result;
|
||||
}
|
||||
|
@ -634,14 +660,14 @@ struct kernargs
|
|||
} queue[1024];
|
||||
unsigned int consumed;
|
||||
} output_data;
|
||||
|
||||
struct heap
|
||||
{
|
||||
int64_t size;
|
||||
char data[0];
|
||||
} heap;
|
||||
};
|
||||
|
||||
struct heap
|
||||
{
|
||||
int64_t size;
|
||||
char data[0];
|
||||
} heap;
|
||||
|
||||
/* Print any console output from the kernel.
|
||||
We print all entries from "consumed" to the next entry without a "written"
|
||||
flag, or "next_output" is reached. The buffer is circular, but the
|
||||
|
@ -811,13 +837,19 @@ main (int argc, char *argv[])
|
|||
|
||||
/* Allocate device memory for both function parameters and the argv
|
||||
data. */
|
||||
size_t heap_size = 10 * 1024 * 1024; /* 10MB. */
|
||||
struct kernargs *kernargs = device_malloc (sizeof (*kernargs) + heap_size);
|
||||
struct kernargs *kernargs = device_malloc (sizeof (*kernargs),
|
||||
kernargs_region);
|
||||
struct argdata
|
||||
{
|
||||
int64_t argv_data[kernel_argc];
|
||||
char strings[args_size];
|
||||
} *args = device_malloc (sizeof (struct argdata));
|
||||
} *args = device_malloc (sizeof (struct argdata), kernargs_region);
|
||||
|
||||
size_t heap_size = 10 * 1024 * 1024; /* 10MB. */
|
||||
struct heap *heap = device_malloc (heap_size, heap_region);
|
||||
XHSA (hsa_fns.hsa_memory_assign_agent_fn (heap, device,
|
||||
HSA_ACCESS_PERMISSION_RW),
|
||||
"Assign heap to device agent");
|
||||
|
||||
/* Write the data to the target. */
|
||||
kernargs->argc = kernel_argc;
|
||||
|
@ -837,8 +869,8 @@ main (int argc, char *argv[])
|
|||
memcpy (&args->strings[offset], kernel_argv[i], arg_len + 1);
|
||||
offset += arg_len;
|
||||
}
|
||||
kernargs->heap_ptr = (int64_t) &kernargs->heap;
|
||||
kernargs->heap.size = heap_size;
|
||||
kernargs->heap_ptr = (int64_t) heap;
|
||||
hsa_fns.hsa_memory_copy_fn (&heap->size, &heap_size, sizeof (heap_size));
|
||||
|
||||
/* Run constructors on the GPU. */
|
||||
run (init_array_kernel, kernargs);
|
||||
|
|
Loading…
Reference in New Issue