mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			[BRIGFE] phsa-specific optimizations
Add flag -fassume-phsa that is on by default. If -fno-assume-phsa is given, these optimizations are disabled. With this flag, gccbrig can generate GENERIC that assumes we are targeting a phsa-runtime based implementation, which allows us to expose the work-item context accesses to retrieve WI IDs etc. which helps optimizers. First optimization that takes advantage of this is to get rid of the setworkitemid calls whenever we have non-inlined calls that use IDs internally. Other optimizations added in this commit: - expand absoluteid to similar level of simplicity as workitemid. At the moment absoluteid is the best indexing ID to end up with WG vectorization. - propagate ID variables closer to their uses. This is mainly to avoid known useless casts, which confuse at least scalar evolution analysis. - use signed long long for storing IDs. Unsigned integers have defined wraparound semantics, which confuse at least scalar evolution analysis, leading to unvectorizable WI loops. - also refactor some BRIG function generation helpers to brig_function. - no point in having the wi-loop as a for-loop. It's really a do...while and SCEV can analyze it just fine still. - add consts to ptrs etc. in BRIG builtin defs. Improves optimization opportunities. - add qualifiers to generated function parameters. Const and restrict on the hidden local/private pointers, the arg buffer and the context pointer help some optimizations. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@259957 138bc75d-0d04-0410-961f-82ee72b054a4
This commit is contained in:
		
							parent
							
								
									cc0d58a44a
								
							
						
					
					
						commit
						4ac4c32386
					
				|  | @ -1,3 +1,9 @@ | |||
| 2018-05-04  Pekka Jääskeläinen  <pekka.jaaskelainen@parmance.com> | ||||
| 
 | ||||
| 	* brig-builtins.def: Add consts to ptrs etc. in BRIG builtin defs. | ||||
| 	To improve optimization opportunities. | ||||
| 	* builtin-types.def: The new needed builtin types for the above. | ||||
| 
 | ||||
| 2018-05-04  Richard Biener  <rguenther@suse.de> | ||||
| 
 | ||||
| 	* bb-reorder.c (sanitize_hot_paths): Release hot_bbs_to_check. | ||||
|  |  | |||
|  | @ -45,25 +45,25 @@ DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_GRIDSIZE, BRIG_OPCODE_GRIDSIZE, | |||
| 
 | ||||
| DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_WORKITEMFLATABSID_U32, | ||||
| 		  BRIG_OPCODE_WORKITEMFLATABSID, BRIG_TYPE_U32, | ||||
| 		  "__hsail_workitemflatabsid_u32", BT_FN_UINT_PTR, | ||||
| 		  ATTR_NOTHROW_LEAF_LIST) | ||||
| 		  "__hsail_workitemflatabsid_u32", BT_FN_UINT_CONST_PTR, | ||||
| 		  ATTR_PURE_NOTHROW_LEAF_LIST) | ||||
| 
 | ||||
| DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_WORKITEMFLATABSID_U64, | ||||
| 		  BRIG_OPCODE_WORKITEMFLATABSID, BRIG_TYPE_U64, | ||||
| 		  "__hsail_workitemflatabsid_u64", BT_FN_ULONG_PTR, | ||||
| 		  ATTR_NOTHROW_LEAF_LIST) | ||||
| 		  "__hsail_workitemflatabsid_u64", BT_FN_ULONG_CONST_PTR, | ||||
| 		  ATTR_PURE_NOTHROW_LEAF_LIST) | ||||
| 
 | ||||
| DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_WORKITEMFLATID, BRIG_OPCODE_WORKITEMFLATID, | ||||
| 		  BRIG_TYPE_U32, "__hsail_workitemflatid", BT_FN_UINT_PTR, | ||||
| 		  ATTR_NOTHROW_LEAF_LIST) | ||||
| 		  BRIG_TYPE_U32, "__hsail_workitemflatid", BT_FN_UINT_CONST_PTR, | ||||
| 		  ATTR_PURE_NOTHROW_LEAF_LIST) | ||||
| 
 | ||||
| DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_WORKITEMID, BRIG_OPCODE_WORKITEMID, | ||||
| 		  BRIG_TYPE_U32, "__hsail_workitemid", BT_FN_UINT_UINT_PTR, | ||||
| 		  ATTR_NOTHROW_LEAF_LIST) | ||||
| 		  BRIG_TYPE_U32, "__hsail_workitemid", | ||||
| 		  BT_FN_UINT_UINT_CONST_PTR, ATTR_PURE_NOTHROW_LEAF_LIST) | ||||
| 
 | ||||
| DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_WORKGROUPID, BRIG_OPCODE_WORKGROUPID, | ||||
| 		  BRIG_TYPE_U32, "__hsail_workgroupid", BT_FN_UINT_UINT_PTR, | ||||
| 		  ATTR_PURE_NOTHROW_LEAF_LIST) | ||||
| 		  BRIG_TYPE_U32, "__hsail_workgroupid", | ||||
| 		  BT_FN_UINT_UINT_CONST_PTR, ATTR_PURE_NOTHROW_LEAF_LIST) | ||||
| 
 | ||||
| DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_CURRENTWORKITEMFLATID, | ||||
| 		  BRIG_OPCODE_CURRENTWORKITEMFLATID, | ||||
|  | @ -90,11 +90,12 @@ DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_PACKETCOMPLETIONSIG_SIG32, | |||
| 
 | ||||
| DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_CURRENTWORKGROUPSIZE, | ||||
| 		  BRIG_OPCODE_CURRENTWORKGROUPSIZE, BRIG_TYPE_U32, | ||||
| 		  "__hsail_currentworkgroupsize", BT_FN_UINT_UINT_PTR, | ||||
| 		  "__hsail_currentworkgroupsize", BT_FN_UINT_UINT_CONST_PTR, | ||||
| 		  ATTR_PURE_NOTHROW_LEAF_LIST) | ||||
| 
 | ||||
| DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_WORKGROUPSIZE, BRIG_OPCODE_WORKGROUPSIZE, | ||||
| 		  BRIG_TYPE_U32, "__hsail_workgroupsize", BT_FN_UINT_UINT_PTR, | ||||
| 		  BRIG_TYPE_U32, "__hsail_workgroupsize", | ||||
| 		  BT_FN_UINT_UINT_CONST_PTR, | ||||
| 		  ATTR_PURE_NOTHROW_LEAF_LIST) | ||||
| 
 | ||||
| DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_DIM, BRIG_OPCODE_DIM, | ||||
|  | @ -565,7 +566,7 @@ DEF_HSAIL_INTR_BUILTIN (BUILT_IN_HSAIL_SETWORKITEMID, "__hsail_setworkitemid", | |||
| 
 | ||||
| DEF_HSAIL_INTR_BUILTIN (BUILT_IN_HSAIL_LAUNCH_WG_FUNC, | ||||
| 		       "__hsail_launch_wg_function", | ||||
| 		       BT_FN_VOID_PTR_PTR_PTR, ATTR_NOTHROW_LEAF_LIST) | ||||
| 		       BT_FN_VOID_PTR_PTR_UINT32, ATTR_NOTHROW_LEAF_LIST) | ||||
| 
 | ||||
| DEF_HSAIL_INTR_BUILTIN (BUILT_IN_HSAIL_LAUNCH_KERNEL, | ||||
| 		       "__hsail_launch_kernel", | ||||
|  |  | |||
|  | @ -1,3 +1,46 @@ | |||
| 2018-05-04  Pekka Jääskeläinen  <pekka.jaaskelainen@parmance.com> | ||||
| 
 | ||||
| 	Add flag -fassume-phsa that is on by default. If -fno-assume-phsa | ||||
| 	is given, these optimizations are disabled.  With this flag, gccbrig | ||||
| 	can generate GENERIC that assumes we are targeting a phsa-runtime | ||||
| 	based implementation, which allows us to expose the work-item context | ||||
| 	accesses to retrieve WI IDs etc.  which helps optimizers. | ||||
| 	First optimization that takes advantage of this is to get rid of | ||||
| 	the setworkitemid calls whenever we have non-inlined calls that | ||||
| 	use IDs internally.  Other optimizations added in this commit: | ||||
| 	- expand absoluteid to similar level of simplicity as workitemid. | ||||
| 	At the moment absoluteid is the best indexing ID to end up with | ||||
| 	WG vectorization. | ||||
| 	- propagate ID variables closer to their uses. This is mainly | ||||
| 	to avoid known useless casts, which confuse at least scalar | ||||
| 	evolution analysis. | ||||
| 	- use signed long long for storing IDs. Unsigned integers have | ||||
| 	defined wraparound semantics, which confuse at least scalar | ||||
| 	evolution analysis, leading to unvectorizable WI loops. | ||||
| 	- also refactor some BRIG function generation helpers to brig_function. | ||||
| 	- no point in having the wi-loop as a for-loop. It's really | ||||
| 	a do...while and SCEV can analyze it just fine still. | ||||
| 	- add consts to ptrs etc. in BRIG builtin defs. | ||||
| 	Improves optimization opportunities. | ||||
| 	- add qualifiers to generated function parameters. | ||||
| 	Const and restrict on the hidden local/private pointers, | ||||
| 	the arg buffer and the context pointer help some optimizations. | ||||
| 	* brig/brigfrontend/brig-basic-inst-handler.cc: See above. | ||||
| 	* brig/brigfrontend/brig-branch-inst-handler.cc: See above. | ||||
| 	* brig/brigfrontend/brig-cmp-inst-handler.cc: See above. | ||||
| 	* brig/brigfrontend/brig-code-entry-handler.cc: See above. | ||||
| 	* brig/brigfrontend/brig-code-entry-handler.h: See above. | ||||
| 	* brig/brigfrontend/brig-control-handler.cc: See above. | ||||
| 	* brig/brigfrontend/brig-cvt-inst-handler.cc: See above. | ||||
| 	* brig/brigfrontend/brig-function-handler.cc: See above. | ||||
| 	* brig/brigfrontend/brig-function.cc: See above. | ||||
| 	* brig/brigfrontend/brig-function.h: See above. | ||||
| 	* brig/brigfrontend/brig-label-handler.cc: See above. | ||||
| 	* brig/brigfrontend/brig-lane-inst-handler.cc: See above. | ||||
| 	* brig/brigfrontend/brig-mem-inst-handler.cc: See above. | ||||
| 	* brig/brigfrontend/phsa.h: See above. | ||||
| 	* brig/lang.opt: See above. | ||||
| 
 | ||||
| 2018-05-04  Pekka Jääskeläinen  <pekka.jaaskelainen@parmance.com> | ||||
| 
 | ||||
| 	* brig/brigfrontend/brig-function-handler.cc: Skip multiple forward | ||||
|  |  | |||
|  | @ -105,7 +105,8 @@ brig_basic_inst_handler::build_shuffle (tree arith_type, | |||
|   /* Unpack the tightly packed mask elements to BIT_FIELD_REFs
 | ||||
|      from which to construct the mask vector as understood by | ||||
|      VEC_PERM_EXPR.  */ | ||||
|   tree mask_operand = add_temp_var ("shuffle_mask", operands[2]); | ||||
|   tree mask_operand | ||||
|     = m_parent.m_cf->add_temp_var ("shuffle_mask", operands[2]); | ||||
| 
 | ||||
|   tree mask_element_type | ||||
|     = build_nonstandard_integer_type (input_mask_element_size, true); | ||||
|  | @ -219,10 +220,11 @@ brig_basic_inst_handler::build_pack (tree_stl_vec &operands) | |||
|   tree wide_type = build_nonstandard_integer_type (vecsize, 1); | ||||
| 
 | ||||
|   tree src_vect = build_resize_convert_view (wide_type, operands[0]); | ||||
|   src_vect = add_temp_var ("src_vect", src_vect); | ||||
|   src_vect = m_parent.m_cf->add_temp_var ("src_vect", src_vect); | ||||
| 
 | ||||
|   tree scalar = operands[1]; | ||||
|   scalar = add_temp_var ("scalar", convert_to_integer (wide_type, scalar)); | ||||
|   scalar = m_parent.m_cf->add_temp_var ("scalar", | ||||
| 					convert_to_integer (wide_type, scalar)); | ||||
| 
 | ||||
|   tree pos = operands[2]; | ||||
| 
 | ||||
|  | @ -230,21 +232,22 @@ brig_basic_inst_handler::build_pack (tree_stl_vec &operands) | |||
|      Zero them for well-defined semantics.  */ | ||||
|   tree t = build2 (BIT_AND_EXPR, TREE_TYPE (pos), operands[2], | ||||
| 		   build_int_cstu (TREE_TYPE (pos), ecount - 1)); | ||||
|   pos = add_temp_var ("pos", convert (wide_type, t)); | ||||
|   pos = m_parent.m_cf->add_temp_var ("pos", convert (wide_type, t)); | ||||
| 
 | ||||
|   tree element_type = TREE_TYPE (TREE_TYPE (operands[0])); | ||||
|   size_t element_width = int_size_in_bytes (element_type) * BITS_PER_UNIT; | ||||
|   tree ewidth = build_int_cstu (wide_type, element_width); | ||||
| 
 | ||||
|   tree bitoffset = build2 (MULT_EXPR, wide_type, ewidth, pos); | ||||
|   bitoffset = add_temp_var ("offset", bitoffset); | ||||
|   bitoffset = m_parent.m_cf->add_temp_var ("offset", bitoffset); | ||||
| 
 | ||||
|   uint64_t mask_int | ||||
|     = element_width == 64 ? (uint64_t) -1 : ((uint64_t) 1 << element_width) - 1; | ||||
| 
 | ||||
|   tree mask = build_int_cstu (wide_type, mask_int); | ||||
| 
 | ||||
|   mask = add_temp_var ("mask", convert_to_integer (wide_type, mask)); | ||||
|   mask = m_parent.m_cf->add_temp_var ("mask", | ||||
| 				      convert_to_integer (wide_type, mask)); | ||||
| 
 | ||||
|   tree clearing_mask | ||||
|     = build1 (BIT_NOT_EXPR, wide_type, | ||||
|  | @ -311,7 +314,8 @@ brig_basic_inst_handler::build_inst_expr (BrigOpcode16_t brig_opcode, | |||
| 					  tree arith_type, | ||||
| 					  tree_stl_vec &operands) | ||||
| { | ||||
|   tree_code opcode = get_tree_code_for_hsa_opcode (brig_opcode, brig_type); | ||||
|   tree_code opcode | ||||
|     = brig_function::get_tree_code_for_hsa_opcode (brig_opcode, brig_type); | ||||
| 
 | ||||
|   BrigType16_t inner_type = brig_type & BRIG_TYPE_BASE_MASK; | ||||
| 
 | ||||
|  | @ -388,8 +392,8 @@ brig_basic_inst_handler::build_inst_expr (BrigOpcode16_t brig_opcode, | |||
| 	     on which cannot be used in general to remain HSAIL compliant. | ||||
| 	     Perhaps a builtin call would be better option here.  */ | ||||
| 	  return build2 (RDIV_EXPR, arith_type, build_one_cst (arith_type), | ||||
| 			 expand_or_call_builtin (BRIG_OPCODE_SQRT, brig_type, | ||||
| 						 arith_type, operands)); | ||||
| 			 m_parent.m_cf->expand_or_call_builtin | ||||
| 			 (BRIG_OPCODE_SQRT, brig_type, arith_type, operands)); | ||||
| 	} | ||||
|       else if (brig_opcode == BRIG_OPCODE_NRCP) | ||||
| 	{ | ||||
|  | @ -410,8 +414,8 @@ brig_basic_inst_handler::build_inst_expr (BrigOpcode16_t brig_opcode, | |||
| 	gcc_unreachable (); | ||||
|     } | ||||
|   else if (opcode == CALL_EXPR) | ||||
|     return expand_or_call_builtin (brig_opcode, brig_type, arith_type, | ||||
| 				   operands); | ||||
|     return m_parent.m_cf->expand_or_call_builtin (brig_opcode, brig_type, | ||||
| 						  arith_type, operands); | ||||
|   else if (output_count == 1) | ||||
|     { | ||||
|       if (input_count == 1) | ||||
|  | @ -520,7 +524,8 @@ brig_basic_inst_handler::operator () (const BrigBase *base) | |||
|     in_operands[0] = build_lower_element_broadcast (in_operands[0]); | ||||
| 
 | ||||
|   tree_code opcode | ||||
|     = get_tree_code_for_hsa_opcode (brig_inst->opcode, brig_inst_type); | ||||
|     = brig_function::get_tree_code_for_hsa_opcode (brig_inst->opcode, | ||||
| 						   brig_inst_type); | ||||
| 
 | ||||
|   if (p >= BRIG_PACK_PPSAT && p <= BRIG_PACK_PSAT) | ||||
|     { | ||||
|  | @ -566,11 +571,11 @@ brig_basic_inst_handler::operator () (const BrigBase *base) | |||
|       */ | ||||
|       tree_stl_vec operand0_elements; | ||||
|       if (input_count > 0) | ||||
| 	unpack (in_operands[0], operand0_elements); | ||||
| 	m_parent.m_cf->unpack (in_operands[0], operand0_elements); | ||||
| 
 | ||||
|       tree_stl_vec operand1_elements; | ||||
|       if (input_count > 1) | ||||
| 	unpack (in_operands[1], operand1_elements); | ||||
| 	m_parent.m_cf->unpack (in_operands[1], operand1_elements); | ||||
| 
 | ||||
|       tree_stl_vec result_elements; | ||||
| 
 | ||||
|  | @ -617,7 +622,7 @@ brig_basic_inst_handler::operator () (const BrigBase *base) | |||
| 
 | ||||
| 	  result_elements.push_back (convert (scalar_type, scalar_expr)); | ||||
| 	} | ||||
|       instr_expr = pack (result_elements); | ||||
|       instr_expr = m_parent.m_cf->pack (result_elements); | ||||
|     } | ||||
|   else | ||||
|     { | ||||
|  | @ -728,140 +733,3 @@ brig_basic_inst_handler::build_lower_element_broadcast (tree vec_operand) | |||
| 		 vec_operand, mask); | ||||
| } | ||||
| 
 | ||||
| /* Returns the tree code that should be used to implement the given
 | ||||
|    HSA instruction opcode (BRIG_OPCODE) for the given type of instruction | ||||
|    (BRIG_TYPE).  In case the opcode cannot be mapped to a TREE node directly, | ||||
|    returns TREE_LIST (if it can be emulated with a simple chain of tree | ||||
|    nodes) or CALL_EXPR if the opcode should be implemented using a builtin | ||||
|    call.  */ | ||||
| 
 | ||||
| tree_code | ||||
| brig_basic_inst_handler::get_tree_code_for_hsa_opcode | ||||
|   (BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const | ||||
| { | ||||
|   BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK; | ||||
|   switch (brig_opcode) | ||||
|     { | ||||
|     case BRIG_OPCODE_NOP: | ||||
|       return NOP_EXPR; | ||||
|     case BRIG_OPCODE_ADD: | ||||
|       return PLUS_EXPR; | ||||
|     case BRIG_OPCODE_CMOV: | ||||
|       if (brig_inner_type == brig_type) | ||||
| 	return COND_EXPR; | ||||
|       else | ||||
| 	return VEC_COND_EXPR; | ||||
|     case BRIG_OPCODE_SUB: | ||||
|       return MINUS_EXPR; | ||||
|     case BRIG_OPCODE_MUL: | ||||
|     case BRIG_OPCODE_MUL24: | ||||
|       return MULT_EXPR; | ||||
|     case BRIG_OPCODE_MULHI: | ||||
|     case BRIG_OPCODE_MUL24HI: | ||||
|       return MULT_HIGHPART_EXPR; | ||||
|     case BRIG_OPCODE_DIV: | ||||
|       if (gccbrig_is_float_type (brig_inner_type)) | ||||
| 	return RDIV_EXPR; | ||||
|       else | ||||
| 	return TRUNC_DIV_EXPR; | ||||
|     case BRIG_OPCODE_NEG: | ||||
|       return NEGATE_EXPR; | ||||
|     case BRIG_OPCODE_MIN: | ||||
|       if (gccbrig_is_float_type (brig_inner_type)) | ||||
| 	return CALL_EXPR; | ||||
|       else | ||||
| 	return MIN_EXPR; | ||||
|     case BRIG_OPCODE_MAX: | ||||
|       if (gccbrig_is_float_type (brig_inner_type)) | ||||
| 	return CALL_EXPR; | ||||
|       else | ||||
| 	return MAX_EXPR; | ||||
|     case BRIG_OPCODE_FMA: | ||||
|       return FMA_EXPR; | ||||
|     case BRIG_OPCODE_ABS: | ||||
|       return ABS_EXPR; | ||||
|     case BRIG_OPCODE_SHL: | ||||
|       return LSHIFT_EXPR; | ||||
|     case BRIG_OPCODE_SHR: | ||||
|       return RSHIFT_EXPR; | ||||
|     case BRIG_OPCODE_OR: | ||||
|       return BIT_IOR_EXPR; | ||||
|     case BRIG_OPCODE_XOR: | ||||
|       return BIT_XOR_EXPR; | ||||
|     case BRIG_OPCODE_AND: | ||||
|       return BIT_AND_EXPR; | ||||
|     case BRIG_OPCODE_NOT: | ||||
|       return BIT_NOT_EXPR; | ||||
|     case BRIG_OPCODE_RET: | ||||
|       return RETURN_EXPR; | ||||
|     case BRIG_OPCODE_MOV: | ||||
|     case BRIG_OPCODE_LDF: | ||||
|       return MODIFY_EXPR; | ||||
|     case BRIG_OPCODE_LD: | ||||
|     case BRIG_OPCODE_ST: | ||||
|       return MEM_REF; | ||||
|     case BRIG_OPCODE_BR: | ||||
|       return GOTO_EXPR; | ||||
|     case BRIG_OPCODE_REM: | ||||
|       if (brig_type == BRIG_TYPE_U64 || brig_type == BRIG_TYPE_U32) | ||||
| 	return TRUNC_MOD_EXPR; | ||||
|       else | ||||
| 	return CALL_EXPR; | ||||
|     case BRIG_OPCODE_NRCP: | ||||
|     case BRIG_OPCODE_NRSQRT: | ||||
|       /* Implement as 1/f (x).  gcc should pattern detect that and
 | ||||
| 	 use a native instruction, if available, for it.  */ | ||||
|       return TREE_LIST; | ||||
|     case BRIG_OPCODE_FLOOR: | ||||
|     case BRIG_OPCODE_CEIL: | ||||
|     case BRIG_OPCODE_SQRT: | ||||
|     case BRIG_OPCODE_NSQRT: | ||||
|     case BRIG_OPCODE_RINT: | ||||
|     case BRIG_OPCODE_TRUNC: | ||||
|     case BRIG_OPCODE_POPCOUNT: | ||||
|     case BRIG_OPCODE_COPYSIGN: | ||||
|     case BRIG_OPCODE_NCOS: | ||||
|     case BRIG_OPCODE_NSIN: | ||||
|     case BRIG_OPCODE_NLOG2: | ||||
|     case BRIG_OPCODE_NEXP2: | ||||
|     case BRIG_OPCODE_NFMA: | ||||
|       /* Class has type B1 regardless of the float type, thus
 | ||||
| 	 the below builtin map search cannot find it.  */ | ||||
|     case BRIG_OPCODE_CLASS: | ||||
|     case BRIG_OPCODE_WORKITEMABSID: | ||||
|       return CALL_EXPR; | ||||
|     default: | ||||
| 
 | ||||
|       /* Some BRIG opcodes can use the same builtins for unsigned and
 | ||||
| 	 signed types.  Force these cases to unsigned types. | ||||
|       */ | ||||
| 
 | ||||
|       if (brig_opcode == BRIG_OPCODE_BORROW | ||||
| 	  || brig_opcode == BRIG_OPCODE_CARRY | ||||
| 	  || brig_opcode == BRIG_OPCODE_LASTBIT | ||||
| 	  || brig_opcode == BRIG_OPCODE_BITINSERT) | ||||
| 	{ | ||||
| 	  if (brig_type == BRIG_TYPE_S32) | ||||
| 	    brig_type = BRIG_TYPE_U32; | ||||
| 	  else if (brig_type == BRIG_TYPE_S64) | ||||
| 	    brig_type = BRIG_TYPE_U64; | ||||
| 	} | ||||
| 
 | ||||
| 
 | ||||
|       builtin_map::const_iterator i | ||||
| 	= s_custom_builtins.find (std::make_pair (brig_opcode, brig_type)); | ||||
|       if (i != s_custom_builtins.end ()) | ||||
| 	return CALL_EXPR; | ||||
|       else if (s_custom_builtins.find | ||||
| 	       (std::make_pair (brig_opcode, brig_inner_type)) | ||||
| 	       != s_custom_builtins.end ()) | ||||
| 	return CALL_EXPR; | ||||
|       if (brig_inner_type == BRIG_TYPE_F16 | ||||
| 	  && s_custom_builtins.find | ||||
| 	  (std::make_pair (brig_opcode, BRIG_TYPE_F32)) | ||||
| 	  != s_custom_builtins.end ()) | ||||
| 	return CALL_EXPR; | ||||
|       break; | ||||
|     } | ||||
|   return TREE_LIST; /* Emulate using a chain of nodes.  */ | ||||
| } | ||||
|  |  | |||
|  | @ -119,10 +119,11 @@ brig_branch_inst_handler::operator () (const BrigBase *base) | |||
| 	 memory.  */ | ||||
| 
 | ||||
|       tree group_local_offset | ||||
| 	= add_temp_var ("group_local_offset", | ||||
| 			build_int_cst | ||||
| 			(uint32_type_node, | ||||
| 			 m_parent.m_cf->m_local_group_variables.size())); | ||||
| 	= m_parent.m_cf->add_temp_var ("group_local_offset", | ||||
| 				       build_int_cst | ||||
| 				       (uint32_type_node, | ||||
| 					m_parent.m_cf-> | ||||
| 					m_local_group_variables.size())); | ||||
| 
 | ||||
|       /* TODO: ensure the callee's frame is aligned!  */ | ||||
| 
 | ||||
|  | @ -152,6 +153,7 @@ brig_branch_inst_handler::operator () (const BrigBase *base) | |||
|       m_parent.m_cf->m_called_functions.push_back (func_ref); | ||||
|       if (DECL_EXTERNAL (func_ref)) | ||||
| 	m_parent.add_decl_call (call); | ||||
|       m_parent.m_cf->start_new_bb (); | ||||
| 
 | ||||
|       return base->byteCount; | ||||
|     } | ||||
|  | @ -216,18 +218,21 @@ brig_branch_inst_handler::operator () (const BrigBase *base) | |||
| 	 ensure the barrier won't be duplicated or moved out of loops etc. | ||||
| 	 Like the 'noduplicate' of LLVM.  Same goes for fbarriers.  */ | ||||
|       m_parent.m_cf->append_statement | ||||
| 	(expand_or_call_builtin (brig_inst->opcode, BRIG_TYPE_NONE, NULL_TREE, | ||||
| 				 call_operands)); | ||||
| 	(m_parent.m_cf->expand_or_call_builtin (brig_inst->opcode, | ||||
| 						BRIG_TYPE_NONE, NULL_TREE, | ||||
| 						call_operands)); | ||||
|     } | ||||
|   else if (brig_inst->opcode >= BRIG_OPCODE_ARRIVEFBAR | ||||
| 	   && brig_inst->opcode <= BRIG_OPCODE_WAITFBAR) | ||||
|     { | ||||
|       m_parent.m_cf->m_has_barriers = true; | ||||
|       m_parent.m_cf->append_statement | ||||
| 	(expand_or_call_builtin (brig_inst->opcode, BRIG_TYPE_NONE, | ||||
| 				 uint32_type_node, operands)); | ||||
| 	(m_parent.m_cf->expand_or_call_builtin (brig_inst->opcode, | ||||
| 						BRIG_TYPE_NONE, | ||||
| 						uint32_type_node, operands)); | ||||
|     } | ||||
|   else | ||||
|     gcc_unreachable (); | ||||
|   m_parent.m_cf->start_new_bb (); | ||||
|   return base->byteCount; | ||||
| } | ||||
|  |  | |||
|  | @ -180,17 +180,17 @@ brig_cmp_inst_handler::operator () (const BrigBase *base) | |||
| 	 results, we must now truncate the result vector to S16s so it | ||||
| 	 fits to the destination register.  We can build the target vector | ||||
| 	 type from the f16 storage type (unsigned ints).  */ | ||||
|       expr = add_temp_var ("wide_cmp_result", expr); | ||||
|       expr = m_parent.m_cf->add_temp_var ("wide_cmp_result", expr); | ||||
|       tree_stl_vec wide_elements; | ||||
|       tree_stl_vec shrunk_elements; | ||||
|       unpack (expr, wide_elements); | ||||
|       m_parent.m_cf->unpack (expr, wide_elements); | ||||
|       for (size_t i = 0; i < wide_elements.size (); ++i) | ||||
| 	{ | ||||
| 	  tree wide = wide_elements.at (i); | ||||
| 	  shrunk_elements.push_back | ||||
| 	    (convert_to_integer (short_integer_type_node, wide)); | ||||
| 	} | ||||
|       expr = pack (shrunk_elements); | ||||
|       expr = m_parent.m_cf->pack (shrunk_elements); | ||||
|     } | ||||
|   build_output_assignment (*inst_base, operands[0], expr); | ||||
| 
 | ||||
|  |  | |||
|  | @ -41,24 +41,9 @@ | |||
| #include "brig-builtins.h" | ||||
| #include "fold-const.h" | ||||
| 
 | ||||
| brig_code_entry_handler::builtin_map brig_code_entry_handler::s_custom_builtins; | ||||
| 
 | ||||
| brig_code_entry_handler::brig_code_entry_handler (brig_to_generic &parent) | ||||
|   : brig_entry_handler (parent) | ||||
| { | ||||
|   if (s_custom_builtins.size () > 0) return; | ||||
| 
 | ||||
|   /* Populate the builtin index.  */ | ||||
| #undef DEF_HSAIL_ATOMIC_BUILTIN | ||||
| #undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN | ||||
| #undef DEF_HSAIL_INTR_BUILTIN | ||||
| #undef DEF_HSAIL_SAT_BUILTIN | ||||
| #undef DEF_HSAIL_BUILTIN | ||||
| #define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \ | ||||
|   s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)]		\ | ||||
|     = builtin_decl_explicit (ENUM); | ||||
| 
 | ||||
| #include "brig-builtins.def" | ||||
| } | ||||
| 
 | ||||
| /* Build a tree operand which is a reference to a piece of code.  REF is the
 | ||||
|  | @ -301,18 +286,18 @@ brig_code_entry_handler::build_address_operand | |||
| 
 | ||||
| 	  tree local_size | ||||
| 	    = build2 (MULT_EXPR, uint32_type_node, | ||||
| 		      expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, | ||||
| 					      BRIG_TYPE_U32, | ||||
| 					      uint32_type_node, uint32_0), | ||||
| 		      expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, | ||||
| 					      BRIG_TYPE_U32, | ||||
| 					      uint32_type_node, uint32_1)); | ||||
| 		      m_parent.m_cf->expand_or_call_builtin | ||||
| 		      (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32, | ||||
| 		       uint32_type_node, uint32_0), | ||||
| 		      m_parent.m_cf->expand_or_call_builtin | ||||
| 		      (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32, | ||||
| 		       uint32_type_node, uint32_1)); | ||||
| 
 | ||||
| 	  local_size | ||||
| 	    = build2 (MULT_EXPR, uint32_type_node, | ||||
| 		      expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, | ||||
| 					      BRIG_TYPE_U32, | ||||
| 					      uint32_type_node, uint32_2), | ||||
| 		      m_parent.m_cf->expand_or_call_builtin | ||||
| 		      (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32, | ||||
| 		       uint32_type_node, uint32_2), | ||||
| 		      local_size); | ||||
| 
 | ||||
| 	  tree var_region | ||||
|  | @ -324,9 +309,9 @@ brig_code_entry_handler::build_address_operand | |||
| 	    = build2 (MULT_EXPR, uint32_type_node, | ||||
| 		      build_int_cst (uint32_type_node, | ||||
| 				     m_parent.private_variable_size (var_name)), | ||||
| 		      expand_or_call_builtin (BRIG_OPCODE_WORKITEMFLATID, | ||||
| 					      BRIG_TYPE_U32, | ||||
| 					      uint32_type_node, operands)); | ||||
| 		      m_parent.m_cf->expand_or_call_builtin | ||||
| 		      (BRIG_OPCODE_WORKITEMFLATID, BRIG_TYPE_U32, | ||||
| 		       uint32_type_node, operands)); | ||||
| 
 | ||||
| 	  tree var_offset | ||||
| 	    = build2 (PLUS_EXPR, uint32_type_node, var_region, pos); | ||||
|  | @ -336,8 +321,9 @@ brig_code_entry_handler::build_address_operand | |||
| 	     offset to a flat address by adding it as an offset to a (private | ||||
| 	     or group) base pointer later on.  Same applies to group_var_offset.  */ | ||||
| 	  symbol_base | ||||
| 	    = add_temp_var ("priv_var_offset", | ||||
| 			    convert (size_type_node, var_offset)); | ||||
| 	    = m_parent.m_cf->add_temp_var ("priv_var_offset", | ||||
| 					   convert (size_type_node, | ||||
| 						    var_offset)); | ||||
| 	} | ||||
|       else if (segment == BRIG_SEGMENT_ARG) | ||||
| 	{ | ||||
|  | @ -699,138 +685,6 @@ brig_code_entry_handler::get_tree_expr_type_for_hsa_type | |||
|     return gccbrig_tree_type_for_hsa_type (brig_type); | ||||
| } | ||||
| 
 | ||||
| /* In case the HSA instruction must be implemented using a builtin,
 | ||||
|    this function is called to get the correct builtin function. | ||||
|    TYPE is the instruction tree type, BRIG_OPCODE the opcode of the | ||||
|    brig instruction and BRIG_TYPE the brig instruction's type.  */ | ||||
| 
 | ||||
| tree | ||||
| brig_code_entry_handler::get_builtin_for_hsa_opcode | ||||
|   (tree type, BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const | ||||
| { | ||||
|   tree builtin = NULL_TREE; | ||||
|   tree builtin_type = type; | ||||
| 
 | ||||
|   /* For vector types, first find the scalar version of the builtin.  */ | ||||
|   if (type != NULL_TREE && VECTOR_TYPE_P (type)) | ||||
|     builtin_type = TREE_TYPE (type); | ||||
|   BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK; | ||||
| 
 | ||||
|   /* Some BRIG opcodes can use the same builtins for unsigned and
 | ||||
|      signed types.  Force these cases to unsigned types.  */ | ||||
| 
 | ||||
|   if (brig_opcode == BRIG_OPCODE_BORROW | ||||
|       || brig_opcode == BRIG_OPCODE_CARRY | ||||
|       || brig_opcode == BRIG_OPCODE_LASTBIT | ||||
|       || brig_opcode == BRIG_OPCODE_BITINSERT) | ||||
|     { | ||||
|       if (brig_type == BRIG_TYPE_S32) | ||||
| 	brig_type = BRIG_TYPE_U32; | ||||
|       else if (brig_type == BRIG_TYPE_S64) | ||||
| 	brig_type = BRIG_TYPE_U64; | ||||
|     } | ||||
| 
 | ||||
|   switch (brig_opcode) | ||||
|     { | ||||
|     case BRIG_OPCODE_FLOOR: | ||||
|       builtin = mathfn_built_in (builtin_type, BUILT_IN_FLOOR); | ||||
|       break; | ||||
|     case BRIG_OPCODE_CEIL: | ||||
|       builtin = mathfn_built_in (builtin_type, BUILT_IN_CEIL); | ||||
|       break; | ||||
|     case BRIG_OPCODE_SQRT: | ||||
|     case BRIG_OPCODE_NSQRT: | ||||
|       builtin = mathfn_built_in (builtin_type, BUILT_IN_SQRT); | ||||
|       break; | ||||
|     case BRIG_OPCODE_RINT: | ||||
|       builtin = mathfn_built_in (builtin_type, BUILT_IN_RINT); | ||||
|       break; | ||||
|     case BRIG_OPCODE_TRUNC: | ||||
|       builtin = mathfn_built_in (builtin_type, BUILT_IN_TRUNC); | ||||
|       break; | ||||
|     case BRIG_OPCODE_COPYSIGN: | ||||
|       builtin = mathfn_built_in (builtin_type, BUILT_IN_COPYSIGN); | ||||
|       break; | ||||
|     case BRIG_OPCODE_NSIN: | ||||
|       builtin = mathfn_built_in (builtin_type, BUILT_IN_SIN); | ||||
|       break; | ||||
|     case BRIG_OPCODE_NLOG2: | ||||
|       builtin = mathfn_built_in (builtin_type, BUILT_IN_LOG2); | ||||
|       break; | ||||
|     case BRIG_OPCODE_NEXP2: | ||||
|       builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2); | ||||
|       break; | ||||
|     case BRIG_OPCODE_NFMA: | ||||
|       builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA); | ||||
|       break; | ||||
|     case BRIG_OPCODE_NCOS: | ||||
|       builtin = mathfn_built_in (builtin_type, BUILT_IN_COS); | ||||
|       break; | ||||
|     case BRIG_OPCODE_POPCOUNT: | ||||
|       /* Popcount should be typed by its argument type (the return value
 | ||||
| 	 is always u32).  Let's use a b64 version for also for b32 for now.  */ | ||||
|       return builtin_decl_explicit (BUILT_IN_POPCOUNTL); | ||||
|     case BRIG_OPCODE_BORROW: | ||||
|       /* Borrow uses the same builtin for unsigned and signed types.  */ | ||||
|       if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32) | ||||
| 	return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32); | ||||
|       else | ||||
| 	return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64); | ||||
|     case BRIG_OPCODE_CARRY: | ||||
|       /* Carry also uses the same builtin for unsigned and signed types.  */ | ||||
|       if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32) | ||||
| 	return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32); | ||||
|       else | ||||
| 	return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64); | ||||
|     default: | ||||
| 
 | ||||
|       /* Use our builtin index for finding a proper builtin for the BRIG
 | ||||
| 	 opcode and BRIG type.  This takes care most of the builtin cases, | ||||
| 	 the special cases are handled in the separate 'case' statements | ||||
| 	 above.  */ | ||||
|       builtin_map::const_iterator i | ||||
| 	= s_custom_builtins.find (std::make_pair (brig_opcode, brig_type)); | ||||
|       if (i != s_custom_builtins.end ()) | ||||
| 	return (*i).second; | ||||
| 
 | ||||
|       if (brig_inner_type != brig_type) | ||||
| 	{ | ||||
| 	  /* Try to find a scalar built-in we could use.  */ | ||||
| 	  i = s_custom_builtins.find | ||||
| 	    (std::make_pair (brig_opcode, brig_inner_type)); | ||||
| 	  if (i != s_custom_builtins.end ()) | ||||
| 	    return (*i).second; | ||||
| 	} | ||||
| 
 | ||||
|       /* In case this is an fp16 operation that is promoted to fp32,
 | ||||
| 	 try to find a fp32 scalar built-in.  */ | ||||
|       if (brig_inner_type == BRIG_TYPE_F16) | ||||
| 	{ | ||||
| 	  i = s_custom_builtins.find | ||||
| 	    (std::make_pair (brig_opcode, BRIG_TYPE_F32)); | ||||
| 	  if (i != s_custom_builtins.end ()) | ||||
| 	    return (*i).second; | ||||
| 	} | ||||
|       gcc_unreachable (); | ||||
|     } | ||||
| 
 | ||||
|   if (VECTOR_TYPE_P (type) && builtin != NULL_TREE) | ||||
|     { | ||||
|       /* Try to find a vectorized version of the built-in.
 | ||||
| 	 TODO: properly assert that builtin is a mathfn builtin? */ | ||||
|       tree vec_builtin | ||||
| 	= targetm.vectorize.builtin_vectorized_function | ||||
| 	(builtin_mathfn_code (builtin), type, type); | ||||
|       if (vec_builtin != NULL_TREE) | ||||
| 	return vec_builtin; | ||||
|       else | ||||
| 	return builtin; | ||||
|     } | ||||
|   if (builtin == NULL_TREE) | ||||
|     gcc_unreachable (); | ||||
|   return builtin; | ||||
| } | ||||
| 
 | ||||
| /* Return the correct GENERIC type for storing comparison results
 | ||||
|    of operand with the type given in SOURCE_TYPE.  */ | ||||
| 
 | ||||
|  | @ -848,264 +702,6 @@ brig_code_entry_handler::get_comparison_result_type (tree source_type) | |||
|     return gccbrig_tree_type_for_hsa_type (BRIG_TYPE_B1); | ||||
| } | ||||
| 
 | ||||
| /* Returns true in case the given opcode needs to know about work-item context
 | ||||
|    data.  In such case the context data is passed as a pointer to a work-item | ||||
|    context object, as the last argument in the builtin call.  */ | ||||
| 
 | ||||
| bool | ||||
| brig_code_entry_handler::needs_workitem_context_data | ||||
|   (BrigOpcode16_t brig_opcode) const | ||||
| { | ||||
|   switch (brig_opcode) | ||||
|     { | ||||
|     case BRIG_OPCODE_WORKITEMABSID: | ||||
|     case BRIG_OPCODE_WORKITEMFLATABSID: | ||||
|     case BRIG_OPCODE_WORKITEMFLATID: | ||||
|     case BRIG_OPCODE_CURRENTWORKITEMFLATID: | ||||
|     case BRIG_OPCODE_WORKITEMID: | ||||
|     case BRIG_OPCODE_WORKGROUPID: | ||||
|     case BRIG_OPCODE_WORKGROUPSIZE: | ||||
|     case BRIG_OPCODE_CURRENTWORKGROUPSIZE: | ||||
|     case BRIG_OPCODE_GRIDGROUPS: | ||||
|     case BRIG_OPCODE_GRIDSIZE: | ||||
|     case BRIG_OPCODE_DIM: | ||||
|     case BRIG_OPCODE_PACKETID: | ||||
|     case BRIG_OPCODE_PACKETCOMPLETIONSIG: | ||||
|     case BRIG_OPCODE_BARRIER: | ||||
|     case BRIG_OPCODE_WAVEBARRIER: | ||||
|     case BRIG_OPCODE_ARRIVEFBAR: | ||||
|     case BRIG_OPCODE_INITFBAR: | ||||
|     case BRIG_OPCODE_JOINFBAR: | ||||
|     case BRIG_OPCODE_LEAVEFBAR: | ||||
|     case BRIG_OPCODE_RELEASEFBAR: | ||||
|     case BRIG_OPCODE_WAITFBAR: | ||||
|     case BRIG_OPCODE_CUID: | ||||
|     case BRIG_OPCODE_MAXCUID: | ||||
|     case BRIG_OPCODE_DEBUGTRAP: | ||||
|     case BRIG_OPCODE_GROUPBASEPTR: | ||||
|     case BRIG_OPCODE_KERNARGBASEPTR: | ||||
|     case BRIG_OPCODE_ALLOCA: | ||||
|       return true; | ||||
|     default: | ||||
|       return false; | ||||
|     }; | ||||
| } | ||||
| 
 | ||||
| /* Returns true in case the given opcode that would normally be generated
 | ||||
|    as a builtin call can be expanded to tree nodes.  */ | ||||
| 
 | ||||
| bool | ||||
| brig_code_entry_handler::can_expand_builtin (BrigOpcode16_t brig_opcode) const | ||||
| { | ||||
|   switch (brig_opcode) | ||||
|     { | ||||
|     case BRIG_OPCODE_WORKITEMFLATABSID: | ||||
|     case BRIG_OPCODE_WORKITEMFLATID: | ||||
|     case BRIG_OPCODE_WORKITEMABSID: | ||||
|     case BRIG_OPCODE_WORKGROUPSIZE: | ||||
|     case BRIG_OPCODE_CURRENTWORKGROUPSIZE: | ||||
|       /* TODO: expand more builtins.  */ | ||||
|       return true; | ||||
|     default: | ||||
|       return false; | ||||
|     }; | ||||
| } | ||||
| 
 | ||||
| /* Try to expand the given builtin call to reuse a previously generated
 | ||||
|    variable, if possible.  If not, just call the given builtin. | ||||
|    BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type, | ||||
|    ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's | ||||
|    input operands.  */ | ||||
| 
 | ||||
| tree | ||||
| brig_code_entry_handler::expand_or_call_builtin (BrigOpcode16_t brig_opcode, | ||||
| 						 BrigType16_t brig_type, | ||||
| 						 tree arith_type, | ||||
| 						 tree_stl_vec &operands) | ||||
| { | ||||
|   if (m_parent.m_cf->m_is_kernel && can_expand_builtin (brig_opcode)) | ||||
|     return expand_builtin (brig_opcode, operands); | ||||
| 
 | ||||
|   tree built_in | ||||
|     = get_builtin_for_hsa_opcode (arith_type, brig_opcode, brig_type); | ||||
| 
 | ||||
|   if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in))) | ||||
|       && arith_type != NULL_TREE && VECTOR_TYPE_P (arith_type) | ||||
|       && brig_opcode != BRIG_OPCODE_LERP | ||||
|       && brig_opcode != BRIG_OPCODE_PACKCVT | ||||
|       && brig_opcode != BRIG_OPCODE_SAD | ||||
|       && brig_opcode != BRIG_OPCODE_SADHI) | ||||
|     { | ||||
|       /* Call the scalar built-in for all elements in the vector.  */ | ||||
|       tree_stl_vec operand0_elements; | ||||
|       if (operands.size () > 0) | ||||
| 	unpack (operands[0], operand0_elements); | ||||
| 
 | ||||
|       tree_stl_vec operand1_elements; | ||||
|       if (operands.size () > 1) | ||||
| 	unpack (operands[1], operand1_elements); | ||||
| 
 | ||||
|       tree_stl_vec result_elements; | ||||
| 
 | ||||
|       size_t element_count = gccbrig_type_vector_subparts (arith_type); | ||||
|       for (size_t i = 0; i < element_count; ++i) | ||||
| 	{ | ||||
| 	  tree_stl_vec call_operands; | ||||
| 	  if (operand0_elements.size () > 0) | ||||
| 	    call_operands.push_back (operand0_elements.at (i)); | ||||
| 
 | ||||
| 	  if (operand1_elements.size () > 0) | ||||
| 	    call_operands.push_back (operand1_elements.at (i)); | ||||
| 
 | ||||
| 	  result_elements.push_back | ||||
| 	    (expand_or_call_builtin (brig_opcode, brig_type, | ||||
| 				     TREE_TYPE (arith_type), | ||||
| 				     call_operands)); | ||||
| 	} | ||||
|       return pack (result_elements); | ||||
|     } | ||||
| 
 | ||||
|   tree_stl_vec call_operands; | ||||
|   tree_stl_vec operand_types; | ||||
| 
 | ||||
|   tree arg_type_chain = TYPE_ARG_TYPES (TREE_TYPE (built_in)); | ||||
| 
 | ||||
|   for (size_t i = 0; i < operands.size (); ++i) | ||||
|     { | ||||
|       tree operand_type = TREE_VALUE (arg_type_chain); | ||||
|       call_operands.push_back (convert (operand_type, operands[i])); | ||||
|       operand_types.push_back (operand_type); | ||||
|       arg_type_chain = TREE_CHAIN (arg_type_chain); | ||||
|     } | ||||
| 
 | ||||
|   if (needs_workitem_context_data (brig_opcode)) | ||||
|     { | ||||
|       call_operands.push_back (m_parent.m_cf->m_context_arg); | ||||
|       operand_types.push_back (ptr_type_node); | ||||
|       m_parent.m_cf->m_has_unexpanded_dp_builtins = true; | ||||
|     } | ||||
| 
 | ||||
|   size_t operand_count = call_operands.size (); | ||||
| 
 | ||||
|   call_operands.resize (4, NULL_TREE); | ||||
|   operand_types.resize (4, NULL_TREE); | ||||
|   for (size_t i = 0; i < operand_count; ++i) | ||||
|     call_operands.at (i) = build_resize_convert_view (operand_types.at (i), | ||||
| 						      call_operands.at (i)); | ||||
| 
 | ||||
|   tree fnptr = build_fold_addr_expr (built_in); | ||||
|   return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr, | ||||
| 			   operand_count, &call_operands[0]); | ||||
| } | ||||
| 
 | ||||
| /* Instead of calling a built-in, reuse a previously returned value known to
 | ||||
|    be still valid.  This is beneficial especially for the work-item | ||||
|    identification related builtins as not having them as calls can lead to | ||||
|    more easily vectorizable parallel loops for multi work-item work-groups. | ||||
|    BRIG_OPCODE identifies the builtin and OPERANDS store the operands.  */ | ||||
| 
 | ||||
| tree | ||||
| brig_code_entry_handler::expand_builtin (BrigOpcode16_t brig_opcode, | ||||
| 					 tree_stl_vec &operands) | ||||
| { | ||||
|   tree_stl_vec uint32_0 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0)); | ||||
| 
 | ||||
|   tree_stl_vec uint32_1 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1)); | ||||
| 
 | ||||
|   tree_stl_vec uint32_2 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2)); | ||||
| 
 | ||||
|   if (brig_opcode == BRIG_OPCODE_WORKITEMFLATABSID) | ||||
|     { | ||||
|       tree id0 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_0); | ||||
|       id0 = convert (uint64_type_node, id0); | ||||
| 
 | ||||
|       tree id1 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_1); | ||||
|       id1 = convert (uint64_type_node, id1); | ||||
| 
 | ||||
|       tree id2 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_2); | ||||
|       id2 = convert (uint64_type_node, id2); | ||||
| 
 | ||||
|       tree max0 = convert (uint64_type_node, | ||||
| 			   m_parent.m_cf->m_grid_size_vars[0]); | ||||
|       tree max1 = convert (uint64_type_node, | ||||
| 			   m_parent.m_cf->m_grid_size_vars[1]); | ||||
| 
 | ||||
|       tree id2_x_max0_x_max1 = build2 (MULT_EXPR, uint64_type_node, id2, max0); | ||||
|       id2_x_max0_x_max1 | ||||
| 	= build2 (MULT_EXPR, uint64_type_node, id2_x_max0_x_max1, max1); | ||||
| 
 | ||||
|       tree id1_x_max0 = build2 (MULT_EXPR, uint64_type_node, id1, max0); | ||||
| 
 | ||||
|       tree sum = build2 (PLUS_EXPR, uint64_type_node, id0, id1_x_max0); | ||||
|       sum = build2 (PLUS_EXPR, uint64_type_node, sum, id2_x_max0_x_max1); | ||||
| 
 | ||||
|       return add_temp_var ("workitemflatabsid", sum); | ||||
|     } | ||||
|   else if (brig_opcode == BRIG_OPCODE_WORKITEMABSID) | ||||
|     { | ||||
|       HOST_WIDE_INT dim = int_constant_value (operands[0]); | ||||
| 
 | ||||
|       tree local_id_var = m_parent.m_cf->m_local_id_vars[dim]; | ||||
|       tree wg_id_var = m_parent.m_cf->m_wg_id_vars[dim]; | ||||
|       tree wg_size_var = m_parent.m_cf->m_wg_size_vars[dim]; | ||||
| 
 | ||||
|       tree wg_id_x_wg_size = build2 (MULT_EXPR, uint32_type_node, | ||||
| 				     convert (uint32_type_node, wg_id_var), | ||||
| 				     convert (uint32_type_node, wg_size_var)); | ||||
|       tree sum | ||||
| 	= build2 (PLUS_EXPR, uint32_type_node, wg_id_x_wg_size, local_id_var); | ||||
| 
 | ||||
|       return add_temp_var (std::string ("workitemabsid_") | ||||
| 			   + (char) ((int) 'x' + dim), sum); | ||||
|     } | ||||
|   else if (brig_opcode == BRIG_OPCODE_WORKITEMFLATID) | ||||
|     { | ||||
|       tree z_x_wgsx_wgsy | ||||
| 	= build2 (MULT_EXPR, uint32_type_node, | ||||
| 		  m_parent.m_cf->m_local_id_vars[2], | ||||
| 		  m_parent.m_cf->m_wg_size_vars[0]); | ||||
|       z_x_wgsx_wgsy = build2 (MULT_EXPR, uint32_type_node, z_x_wgsx_wgsy, | ||||
| 			      m_parent.m_cf->m_wg_size_vars[1]); | ||||
| 
 | ||||
|       tree y_x_wgsx | ||||
| 	= build2 (MULT_EXPR, uint32_type_node, | ||||
| 		  m_parent.m_cf->m_local_id_vars[1], | ||||
| 		  m_parent.m_cf->m_wg_size_vars[0]); | ||||
| 
 | ||||
|       tree sum = build2 (PLUS_EXPR, uint32_type_node, y_x_wgsx, z_x_wgsx_wgsy); | ||||
|       sum = build2 (PLUS_EXPR, uint32_type_node, | ||||
| 		    m_parent.m_cf->m_local_id_vars[0], | ||||
| 		    sum); | ||||
|       return add_temp_var ("workitemflatid", sum); | ||||
|     } | ||||
|   else if (brig_opcode == BRIG_OPCODE_WORKGROUPSIZE) | ||||
|     { | ||||
|       HOST_WIDE_INT dim = int_constant_value (operands[0]); | ||||
|       return m_parent.m_cf->m_wg_size_vars[dim]; | ||||
|     } | ||||
|   else if (brig_opcode == BRIG_OPCODE_CURRENTWORKGROUPSIZE) | ||||
|     { | ||||
|       HOST_WIDE_INT dim = int_constant_value (operands[0]); | ||||
|       return m_parent.m_cf->m_cur_wg_size_vars[dim]; | ||||
|     } | ||||
|   else | ||||
|     gcc_unreachable (); | ||||
| 
 | ||||
|   return NULL_TREE; | ||||
| } | ||||
| 
 | ||||
| /* Appends and returns a new temp variable and an accompanying assignment
 | ||||
|    statement that stores the value of the given EXPR and has the given NAME.  */ | ||||
| 
 | ||||
| tree | ||||
| brig_code_entry_handler::add_temp_var (std::string name, tree expr) | ||||
| { | ||||
|   tree temp_var = create_tmp_var (TREE_TYPE (expr), name.c_str ()); | ||||
|   tree assign = build2 (MODIFY_EXPR, TREE_TYPE (temp_var), temp_var, expr); | ||||
|   m_parent.m_cf->append_statement (assign); | ||||
|   return temp_var; | ||||
| } | ||||
| 
 | ||||
| /* Creates a FP32 to FP16 conversion call, assuming the source and destination
 | ||||
|    are FP32 type variables.  */ | ||||
| 
 | ||||
|  | @ -1387,7 +983,6 @@ brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst, | |||
|      variable type (can be any type; see get_m_var_declfor_reg @ | ||||
|      brig-function.cc).  */ | ||||
|   tree output_type = TREE_TYPE (output); | ||||
|   tree input_type = TREE_TYPE (inst_expr); | ||||
|   bool is_fp16 = (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16 | ||||
| 		 && brig_inst.base.kind != BRIG_KIND_INST_MEM | ||||
| 		 && !gccbrig_is_bit_operation (brig_inst.opcode); | ||||
|  | @ -1396,6 +991,13 @@ brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst, | |||
|   bool ftz = false; | ||||
|   const BrigBase *base = &brig_inst.base; | ||||
| 
 | ||||
|   if (m_parent.m_cf->is_id_val (inst_expr)) | ||||
|     inst_expr = m_parent.m_cf->id_val (inst_expr); | ||||
| 
 | ||||
|   tree input_type = TREE_TYPE (inst_expr); | ||||
| 
 | ||||
|   m_parent.m_cf->add_reg_var_update (output, inst_expr); | ||||
| 
 | ||||
|   if (base->kind == BRIG_KIND_INST_MOD) | ||||
|     { | ||||
|       const BrigInstMod *mod = (const BrigInstMod *) base; | ||||
|  | @ -1418,13 +1020,13 @@ brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst, | |||
|     { | ||||
|       /* Ensure we don't duplicate the arithmetics to the arguments of the bit
 | ||||
| 	 field reference operators.  */ | ||||
|       inst_expr = add_temp_var ("before_ftz", inst_expr); | ||||
|       inst_expr = m_parent.m_cf->add_temp_var ("before_ftz", inst_expr); | ||||
|       inst_expr = flush_to_zero (is_fp16) (*this, inst_expr); | ||||
|     } | ||||
| 
 | ||||
|   if (is_fp16) | ||||
|     { | ||||
|       inst_expr = add_temp_var ("before_f2h", inst_expr); | ||||
|       inst_expr = m_parent.m_cf->add_temp_var ("before_f2h", inst_expr); | ||||
|       tree f2h_output = build_f2h_conversion (inst_expr); | ||||
|       tree conv = build_resize_convert_view (output_type, f2h_output); | ||||
|       tree assign = build2 (MODIFY_EXPR, output_type, output, conv); | ||||
|  | @ -1486,62 +1088,6 @@ brig_code_entry_handler::append_statement (tree stmt) | |||
|   m_parent.m_cf->append_statement (stmt); | ||||
| } | ||||
| 
 | ||||
| /* Unpacks the elements of the vector in VALUE to scalars (bit field
 | ||||
|    references) in ELEMENTS.  */ | ||||
| 
 | ||||
| void | ||||
| brig_code_entry_handler::unpack (tree value, tree_stl_vec &elements) | ||||
| { | ||||
|   size_t vec_size = int_size_in_bytes (TREE_TYPE (value)); | ||||
|   size_t element_size | ||||
|     = int_size_in_bytes (TREE_TYPE (TREE_TYPE (value))) * BITS_PER_UNIT; | ||||
|   size_t element_count | ||||
|     = vec_size * BITS_PER_UNIT / element_size; | ||||
| 
 | ||||
|   tree input_element_type = TREE_TYPE (TREE_TYPE (value)); | ||||
| 
 | ||||
|   value = add_temp_var ("unpack_input", value); | ||||
| 
 | ||||
|   for (size_t i = 0; i < element_count; ++i) | ||||
|     { | ||||
|       tree element | ||||
| 	= build3 (BIT_FIELD_REF, input_element_type, value, | ||||
| 		  TYPE_SIZE (input_element_type), | ||||
| 		  bitsize_int(i * element_size)); | ||||
| 
 | ||||
|       element = add_temp_var ("scalar", element); | ||||
|       elements.push_back (element); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /* Pack the elements of the scalars in ELEMENTS to the returned vector.  */ | ||||
| 
 | ||||
| tree | ||||
| brig_code_entry_handler::pack (tree_stl_vec &elements) | ||||
| { | ||||
|   size_t element_count = elements.size (); | ||||
| 
 | ||||
|   gcc_assert (element_count > 1); | ||||
| 
 | ||||
|   tree output_element_type = TREE_TYPE (elements.at (0)); | ||||
| 
 | ||||
|   vec<constructor_elt, va_gc> *constructor_vals = NULL; | ||||
|   for (size_t i = 0; i < element_count; ++i) | ||||
|     CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, elements.at (i)); | ||||
| 
 | ||||
|   tree vec_type = build_vector_type (output_element_type, element_count); | ||||
| 
 | ||||
|   /* build_constructor creates a vector type which is not a vector_cst
 | ||||
|      that requires compile time constant elements.  */ | ||||
|   tree vec = build_constructor (vec_type, constructor_vals); | ||||
| 
 | ||||
|   /* Add a temp variable for readability.  */ | ||||
|   tree tmp_var = create_tmp_var (vec_type, "vec_out"); | ||||
|   tree vec_tmp_assign = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec); | ||||
|   m_parent.m_cf->append_statement (vec_tmp_assign); | ||||
|   return tmp_var; | ||||
| } | ||||
| 
 | ||||
| /* Visits the element(s) in the OPERAND, calling HANDLER to each of them.  */ | ||||
| 
 | ||||
| tree | ||||
|  | @ -1757,4 +1303,3 @@ brig_code_entry_handler::int_constant_value (tree node) | |||
|     n = TREE_OPERAND (n, 0); | ||||
|   return int_cst_value (n); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -35,8 +35,6 @@ class tree_element_unary_visitor; | |||
| class brig_code_entry_handler : public brig_entry_handler | ||||
| { | ||||
| public: | ||||
|   typedef std::map<std::pair<BrigOpcode16_t, BrigType16_t>, tree> builtin_map; | ||||
| 
 | ||||
|   brig_code_entry_handler (brig_to_generic &parent); | ||||
| 
 | ||||
|   /* Handles the brig_code data at the given pointer and adds it to the
 | ||||
|  | @ -51,8 +49,6 @@ protected: | |||
|   tree get_tree_expr_type_for_hsa_type (BrigType16_t brig_type) const; | ||||
|   tree get_tree_cst_for_hsa_operand (const BrigOperandConstantBytes *brigConst, | ||||
| 				     tree type) const; | ||||
|   tree get_builtin_for_hsa_opcode (tree type, BrigOpcode16_t brig_opcode, | ||||
| 				   BrigType16_t brig_type) const; | ||||
|   tree get_comparison_result_type (tree source_type); | ||||
| 
 | ||||
|   tree build_code_ref (const BrigBase &ref); | ||||
|  | @ -73,16 +69,6 @@ protected: | |||
| 
 | ||||
|   bool needs_workitem_context_data (BrigOpcode16_t brig_opcode) const; | ||||
| 
 | ||||
|   void unpack (tree value, tree_stl_vec &elements); | ||||
|   tree pack (tree_stl_vec &elements); | ||||
| 
 | ||||
|   bool can_expand_builtin (BrigOpcode16_t brig_opcode) const; | ||||
|   tree expand_builtin (BrigOpcode16_t brig_opcode, tree_stl_vec &operands); | ||||
| 
 | ||||
|   tree expand_or_call_builtin (BrigOpcode16_t brig_opcode, | ||||
| 			       BrigType16_t brig_type, tree arith_type, | ||||
| 			       tree_stl_vec &operands); | ||||
| 
 | ||||
|   tree add_temp_var (std::string name, tree expr); | ||||
| 
 | ||||
|   tree build_f2h_conversion (tree source); | ||||
|  | @ -100,10 +86,6 @@ protected: | |||
| 
 | ||||
|   tree extend_int (tree input, tree dest_type, tree src_type); | ||||
| 
 | ||||
|   /* HSAIL-specific builtin functions not yet integrated to gcc.  */ | ||||
| 
 | ||||
|   static builtin_map s_custom_builtins; | ||||
| 
 | ||||
| private: | ||||
| 
 | ||||
|   tree_stl_vec build_or_analyze_operands (const BrigInstBase &brig_inst, | ||||
|  | @ -299,9 +281,6 @@ private: | |||
| 
 | ||||
|   tree build_unpack_lo_or_hi (BrigOpcode16_t brig_opcode, tree arith_type, | ||||
| 			      tree_stl_vec &operands); | ||||
| 
 | ||||
|   tree_code get_tree_code_for_hsa_opcode (BrigOpcode16_t brig_opcode, | ||||
| 					  BrigType16_t brig_type) const; | ||||
| }; | ||||
| 
 | ||||
| class brig_cvt_inst_handler : public brig_inst_mod_handler | ||||
|  |  | |||
|  | @ -53,45 +53,45 @@ brig_directive_control_handler::operator () (const BrigBase *base) | |||
|     case BRIG_CONTROL_MAXDYNAMICGROUPSIZE: | ||||
|       { | ||||
| 	m_parent.m_cf->m_descriptor.max_dynamic_group_size | ||||
| 	  = int_constant_value (operands.at (0)); | ||||
| 	  = brig_function::int_constant_value (operands.at (0)); | ||||
| 	break; | ||||
|       } | ||||
|     case BRIG_CONTROL_MAXFLATGRIDSIZE: | ||||
|       { | ||||
| 	m_parent.m_cf->m_descriptor.max_flat_grid_size | ||||
| 	  = int_constant_value (operands.at (0)); | ||||
| 	  = brig_function::int_constant_value (operands.at (0)); | ||||
| 	break; | ||||
|       } | ||||
|     case BRIG_CONTROL_MAXFLATWORKGROUPSIZE: | ||||
|       { | ||||
| 	m_parent.m_cf->m_descriptor.max_flat_workgroup_size | ||||
| 	  = int_constant_value (operands.at (0)); | ||||
| 	  = brig_function::int_constant_value (operands.at (0)); | ||||
| 	break; | ||||
|       } | ||||
|     case BRIG_CONTROL_REQUIREDDIM: | ||||
|       { | ||||
| 	m_parent.m_cf->m_descriptor.required_dim | ||||
| 	  = int_constant_value (operands.at (0)); | ||||
| 	  = brig_function::int_constant_value (operands.at (0)); | ||||
| 	break; | ||||
|       } | ||||
|     case BRIG_CONTROL_REQUIREDGRIDSIZE: | ||||
|       { | ||||
| 	m_parent.m_cf->m_descriptor.required_grid_size[0] | ||||
| 	  = int_constant_value (operands.at (0)); | ||||
| 	  = brig_function::int_constant_value (operands.at (0)); | ||||
| 	m_parent.m_cf->m_descriptor.required_grid_size[1] | ||||
| 	  = int_constant_value (operands.at (1)); | ||||
| 	  = brig_function::int_constant_value (operands.at (1)); | ||||
| 	m_parent.m_cf->m_descriptor.required_grid_size[2] | ||||
| 	  = int_constant_value (operands.at (2)); | ||||
| 	  = brig_function::int_constant_value (operands.at (2)); | ||||
| 	break; | ||||
|       } | ||||
|     case BRIG_CONTROL_REQUIREDWORKGROUPSIZE: | ||||
|       { | ||||
| 	m_parent.m_cf->m_descriptor.required_workgroup_size[0] | ||||
| 	  = int_constant_value (operands.at (0)); | ||||
| 	  = brig_function::int_constant_value (operands.at (0)); | ||||
| 	m_parent.m_cf->m_descriptor.required_workgroup_size[1] | ||||
| 	  = int_constant_value (operands.at (1)); | ||||
| 	  = brig_function::int_constant_value (operands.at (1)); | ||||
| 	m_parent.m_cf->m_descriptor.required_workgroup_size[2] | ||||
| 	  = int_constant_value (operands.at (2)); | ||||
| 	  = brig_function::int_constant_value (operands.at (2)); | ||||
| 	break; | ||||
|       } | ||||
|     case BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS: | ||||
|  |  | |||
|  | @ -83,6 +83,12 @@ brig_cvt_inst_handler::generate (const BrigBase *base) | |||
|   tree &input = operands.at (1); | ||||
|   tree &output = operands.at (0); | ||||
| 
 | ||||
|   if (m_parent.m_cf->is_id_val (input)) | ||||
|     { | ||||
|       input = m_parent.m_cf->id_val (input); | ||||
|       src_type = TREE_TYPE (input); | ||||
|     } | ||||
| 
 | ||||
|   size_t conv_src_size = int_size_in_bytes (src_type); | ||||
|   size_t conv_dst_size = int_size_in_bytes (dest_type); | ||||
|   size_t src_reg_size = int_size_in_bytes (TREE_TYPE (input)); | ||||
|  |  | |||
|  | @ -93,6 +93,25 @@ brig_directive_function_handler::operator () (const BrigBase *base) | |||
|      represent HSAIL registers.  */ | ||||
|   tree bind_expr = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL); | ||||
| 
 | ||||
|   tree restrict_char_ptr | ||||
|     = build_qualified_type (build_pointer_type (char_type_node), | ||||
| 			    TYPE_QUAL_RESTRICT); | ||||
|   tree restrict_void_ptr | ||||
|     = build_qualified_type (build_pointer_type (void_type_node), | ||||
| 			    TYPE_QUAL_RESTRICT); | ||||
| 
 | ||||
|   tree restrict_const_char_ptr | ||||
|     = build_qualified_type (build_pointer_type | ||||
| 			    (build_qualified_type (char_type_node, | ||||
| 						   TYPE_QUAL_CONST)), | ||||
| 			    TYPE_QUAL_RESTRICT); | ||||
| 
 | ||||
|   tree restrict_const_void_ptr | ||||
|     = build_qualified_type (build_pointer_type | ||||
| 			    (build_qualified_type (void_type_node, | ||||
| 						   TYPE_QUAL_CONST)), | ||||
| 			    TYPE_QUAL_RESTRICT); | ||||
| 
 | ||||
|   if (is_kernel) | ||||
|     { | ||||
|       tree name_identifier | ||||
|  | @ -107,12 +126,11 @@ brig_directive_function_handler::operator () (const BrigBase *base) | |||
| 	 3) a void* parameter that contains the first flat address of the group | ||||
| 	 region allocated to the current work-group.  */ | ||||
| 
 | ||||
|       tree char_ptr_type_node = build_pointer_type (char_type_node); | ||||
|       fndecl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier, | ||||
| 			   build_function_type_list (void_type_node, | ||||
| 						     char_ptr_type_node, | ||||
| 						     ptr_type_node, | ||||
| 						     ptr_type_node, NULL_TREE)); | ||||
| 						     restrict_const_char_ptr, | ||||
| 						     restrict_void_ptr, | ||||
| 						     restrict_char_ptr, NULL_TREE)); | ||||
| 
 | ||||
|       SET_DECL_ASSEMBLER_NAME (fndecl, name_identifier); | ||||
| 
 | ||||
|  | @ -125,9 +143,10 @@ brig_directive_function_handler::operator () (const BrigBase *base) | |||
| 	= gccbrig_get_target_addr_space_id (BRIG_SEGMENT_KERNARG); | ||||
| 
 | ||||
|       tree arg_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL, | ||||
| 				 get_identifier ("__args"), char_ptr_type_node); | ||||
| 				 get_identifier ("__args"), | ||||
| 				 restrict_const_char_ptr); | ||||
|       DECL_ARGUMENTS (fndecl) = arg_arg; | ||||
|       DECL_ARG_TYPE (arg_arg) = char_ptr_type_node; | ||||
|       DECL_ARG_TYPE (arg_arg) = restrict_const_char_ptr; | ||||
|       DECL_CONTEXT (arg_arg) = fndecl; | ||||
|       DECL_ARTIFICIAL (arg_arg) = 1; | ||||
|       TREE_READONLY (arg_arg) = 1; | ||||
|  | @ -189,7 +208,7 @@ brig_directive_function_handler::operator () (const BrigBase *base) | |||
| 	      if (arg_decls == NULL_TREE) | ||||
| 		arg_decls = arg_var; | ||||
| 	      else | ||||
| 		chainon (arg_decls, arg_var); | ||||
| 		arg_decls = chainon (arg_decls, arg_var); | ||||
| 
 | ||||
| 	      m_parent.m_cf->add_arg_variable (brigVar, arg_var); | ||||
| 
 | ||||
|  | @ -230,18 +249,13 @@ brig_directive_function_handler::operator () (const BrigBase *base) | |||
| 	      vec_safe_push (args, TREE_TYPE (arg_var)); | ||||
| 
 | ||||
| 	      m_parent.m_cf->add_arg_variable (brigVar, arg_var); | ||||
| 
 | ||||
| 	      if (arg_decls == NULL_TREE) | ||||
| 		arg_decls = arg_var; | ||||
| 	      else | ||||
| 		chainon (arg_decls, arg_var); | ||||
| 	      arg_decls = chainon (arg_decls, arg_var); | ||||
| 	    } | ||||
| 	} | ||||
| 
 | ||||
|       vec_safe_push (args, ptr_type_node); | ||||
|       vec_safe_push (args, ptr_type_node); | ||||
|       vec_safe_push (args, ptr_type_node); | ||||
|       vec_safe_push (args, ptr_type_node); | ||||
|       vec_safe_push (args, restrict_void_ptr); | ||||
|       vec_safe_push (args, restrict_char_ptr); | ||||
|       vec_safe_push (args, uint32_type_node); | ||||
|       vec_safe_push (args, restrict_char_ptr); | ||||
| 
 | ||||
|       fndecl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier, | ||||
| 			   build_function_type_vec (ret_type, args)); | ||||
|  | @ -254,26 +268,30 @@ brig_directive_function_handler::operator () (const BrigBase *base) | |||
| 
 | ||||
|   /* All functions need the hidden __context argument passed on
 | ||||
|      because they might call WI-specific functions which need | ||||
|      the context info.  */ | ||||
|      the context info.  Only kernels can write it, if they need | ||||
|      to update the local ids in the work-item loop.  */ | ||||
| 
 | ||||
|   tree context_arg_type | ||||
|     = true ? restrict_void_ptr : restrict_const_void_ptr; | ||||
|   tree context_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL, | ||||
| 				 get_identifier ("__context"), ptr_type_node); | ||||
|   if (DECL_ARGUMENTS (fndecl) == NULL_TREE) | ||||
|     DECL_ARGUMENTS (fndecl) = context_arg; | ||||
|   else | ||||
|     chainon (DECL_ARGUMENTS (fndecl), context_arg); | ||||
| 				 get_identifier ("__context"), | ||||
| 				 context_arg_type); | ||||
|   DECL_ARGUMENTS (fndecl) = chainon (DECL_ARGUMENTS (fndecl), context_arg); | ||||
|   DECL_CONTEXT (context_arg) = fndecl; | ||||
|   DECL_ARG_TYPE (context_arg) = ptr_type_node; | ||||
|   DECL_ARG_TYPE (context_arg) = context_arg_type; | ||||
|   DECL_ARTIFICIAL (context_arg) = 1; | ||||
|   TREE_READONLY (context_arg) = 1; | ||||
|   TREE_USED (context_arg) = 1; | ||||
|   m_parent.m_cf->m_context_arg = context_arg; | ||||
| 
 | ||||
|   /* They can also access group memory, so we need to pass the
 | ||||
|      group pointer along too.  */ | ||||
|   tree group_base_arg | ||||
|     = build_decl (UNKNOWN_LOCATION, PARM_DECL, | ||||
| 		  get_identifier ("__group_base_addr"), ptr_type_node); | ||||
|   chainon (DECL_ARGUMENTS (fndecl), group_base_arg); | ||||
|   DECL_ARG_TYPE (group_base_arg) = ptr_type_node; | ||||
| 		  get_identifier ("__group_base_addr"), | ||||
| 		  restrict_char_ptr); | ||||
|   DECL_ARGUMENTS (fndecl) = chainon (DECL_ARGUMENTS (fndecl), group_base_arg); | ||||
|   DECL_ARG_TYPE (group_base_arg) = restrict_char_ptr; | ||||
|   DECL_CONTEXT (group_base_arg) = fndecl; | ||||
|   DECL_ARTIFICIAL (group_base_arg) = 1; | ||||
|   TREE_READONLY (group_base_arg) = 1; | ||||
|  | @ -288,7 +306,7 @@ brig_directive_function_handler::operator () (const BrigBase *base) | |||
|   tree group_local_offset_arg | ||||
|     = build_decl (UNKNOWN_LOCATION, PARM_DECL, | ||||
| 		  get_identifier ("__group_local_offset"), uint32_type_node); | ||||
|   chainon (DECL_ARGUMENTS (fndecl), group_local_offset_arg); | ||||
|   DECL_ARGUMENTS (fndecl) = chainon (DECL_ARGUMENTS (fndecl), group_local_offset_arg); | ||||
|   DECL_ARG_TYPE (group_local_offset_arg) = uint32_type_node; | ||||
|   DECL_CONTEXT (group_local_offset_arg) = fndecl; | ||||
|   DECL_ARTIFICIAL (group_local_offset_arg) = 1; | ||||
|  | @ -299,24 +317,25 @@ brig_directive_function_handler::operator () (const BrigBase *base) | |||
|   /* Same for private.  */ | ||||
|   tree private_base_arg | ||||
|     = build_decl (UNKNOWN_LOCATION, PARM_DECL, | ||||
| 		  get_identifier ("__private_base_addr"), ptr_type_node); | ||||
|   chainon (DECL_ARGUMENTS (fndecl), private_base_arg); | ||||
|   DECL_ARG_TYPE (private_base_arg) = ptr_type_node; | ||||
| 		  get_identifier ("__private_base_addr"), restrict_char_ptr); | ||||
|   DECL_ARGUMENTS (fndecl) = chainon (DECL_ARGUMENTS (fndecl), private_base_arg); | ||||
|   DECL_ARG_TYPE (private_base_arg) = restrict_char_ptr; | ||||
|   DECL_CONTEXT (private_base_arg) = fndecl; | ||||
|   DECL_ARTIFICIAL (private_base_arg) = 1; | ||||
|   TREE_READONLY (private_base_arg) = 1; | ||||
|   TREE_USED (private_base_arg) = 1; | ||||
|   m_parent.m_cf->m_private_base_arg = private_base_arg; | ||||
| 
 | ||||
|   DECL_SAVED_TREE (fndecl) = bind_expr; | ||||
| 
 | ||||
|   set_externally_visible (fndecl); | ||||
| 
 | ||||
|   if (base->kind == BRIG_KIND_DIRECTIVE_FUNCTION) | ||||
|     { | ||||
|       TREE_STATIC (fndecl) = 0; | ||||
|       TREE_PUBLIC (fndecl) = 1; | ||||
|       DECL_EXTERNAL (fndecl) = 0; | ||||
|       DECL_DECLARED_INLINE_P (fndecl) = 1; | ||||
|       set_inline (fndecl); | ||||
|       set_externally_visible (fndecl); | ||||
|     } | ||||
|   else if (base->kind == BRIG_KIND_DIRECTIVE_KERNEL) | ||||
|     { | ||||
|  | @ -330,6 +349,7 @@ brig_directive_function_handler::operator () (const BrigBase *base) | |||
|       TREE_STATIC (fndecl) = 0; | ||||
|       TREE_PUBLIC (fndecl) = 1; | ||||
|       DECL_EXTERNAL (fndecl) = 1; | ||||
|       set_inline (fndecl); | ||||
|     } | ||||
|   else if (base->kind == BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION) | ||||
|     { | ||||
|  | @ -371,11 +391,8 @@ brig_directive_function_handler::operator () (const BrigBase *base) | |||
|     } | ||||
| 
 | ||||
|   m_parent.start_function (fndecl); | ||||
| 
 | ||||
|   m_parent.m_cf->m_func_decl = fndecl; | ||||
|   m_parent.m_cf->m_current_bind_expr = bind_expr; | ||||
|   m_parent.m_cf->m_context_arg = context_arg; | ||||
|   m_parent.m_cf->m_private_base_arg = private_base_arg; | ||||
| 
 | ||||
|   if (ret_value != NULL_TREE && TREE_TYPE (ret_value) != void_type_node) | ||||
|     { | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -105,6 +105,30 @@ public: | |||
| 
 | ||||
|   void analyze_calls (); | ||||
| 
 | ||||
|   tree expand_builtin (BrigOpcode16_t brig_opcode, tree_stl_vec &operands); | ||||
| 
 | ||||
|   tree expand_or_call_builtin (BrigOpcode16_t brig_opcode, | ||||
| 			       BrigType16_t brig_type, tree arith_type, | ||||
| 			       tree_stl_vec &operands); | ||||
|   bool can_expand_builtin (BrigOpcode16_t brig_opcode) const; | ||||
| 
 | ||||
|   tree get_builtin_for_hsa_opcode (tree type, BrigOpcode16_t brig_opcode, | ||||
| 				   BrigType16_t brig_type) const; | ||||
| 
 | ||||
|   void unpack (tree value, tree_stl_vec &elements); | ||||
|   tree pack (tree_stl_vec &elements); | ||||
|   tree add_temp_var (std::string name, tree expr); | ||||
| 
 | ||||
|   static bool needs_workitem_context_data (BrigOpcode16_t brig_opcode); | ||||
|   static HOST_WIDE_INT int_constant_value (tree node); | ||||
|   static tree_code get_tree_code_for_hsa_opcode (BrigOpcode16_t brig_opcode, | ||||
| 						 BrigType16_t brig_type); | ||||
| 
 | ||||
|   void start_new_bb (); | ||||
|   void add_reg_var_update (tree reg_var, tree val); | ||||
|   bool is_id_val (tree reg_var); | ||||
|   tree id_val (tree reg_var); | ||||
| 
 | ||||
|   const BrigDirectiveExecutable *m_brig_def; | ||||
| 
 | ||||
|   bool m_is_kernel; | ||||
|  | @ -183,6 +207,11 @@ public: | |||
|   tree m_wg_id_vars[3]; | ||||
|   tree m_wg_size_vars[3]; | ||||
|   tree m_grid_size_vars[3]; | ||||
|   /* Explicitly computed WG base for the absolute IDs which is used
 | ||||
|      as the initial value when looping that dimension.   We update | ||||
|      the abs id with ++ to make it easy for the vectorizer.  */ | ||||
|   tree m_abs_id_base_vars[3]; | ||||
|   tree m_abs_id_vars[3]; | ||||
| 
 | ||||
|   /* Set to true in case the kernel contains at least one dispatch packet
 | ||||
|      (work-item ID-related) builtin call that could not be expanded to | ||||
|  | @ -219,6 +248,20 @@ private: | |||
|   /* Bookkeeping for the different HSA registers and their tree declarations
 | ||||
|      for the currently generated function.  */ | ||||
|   reg_decl_index_entry *m_regs[BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT]; | ||||
| 
 | ||||
|   /* Map for keeping book reads of ID variables, which can be propagated
 | ||||
|      to uses in address expressions to produce cleaner indexing functions | ||||
|      with unnecessary casts stripped off, etc.  */ | ||||
|   typedef std::map<tree, tree> id_val_map; | ||||
| 
 | ||||
|   /* Keeps track of ID values alive in registers in the currently
 | ||||
|      processed BB.  */ | ||||
|   id_val_map m_id_val_defs; | ||||
| 
 | ||||
|   /* HSAIL-specific builtin functions not yet integrated to gcc.  */ | ||||
|   typedef std::map<std::pair<BrigOpcode16_t, BrigType16_t>, tree> builtin_map; | ||||
| 
 | ||||
|   static builtin_map s_custom_builtins; | ||||
| }; | ||||
| 
 | ||||
| #endif | ||||
|  |  | |||
|  | @ -31,7 +31,10 @@ brig_directive_label_handler::operator () (const BrigBase *base) | |||
|   std::string label_str ((const char *) (label_name->bytes), | ||||
| 			 label_name->byteCount); | ||||
| 
 | ||||
|   m_parent.m_cf->start_new_bb (); | ||||
| 
 | ||||
|   tree stmt = build_stmt (LABEL_EXPR, m_parent.m_cf->label (label_str)); | ||||
|   m_parent.m_cf->append_statement (stmt); | ||||
| 
 | ||||
|   return base->byteCount; | ||||
| } | ||||
|  |  | |||
|  | @ -59,7 +59,7 @@ brig_lane_inst_handler::operator () (const BrigBase *base) | |||
|       elements.push_back (zero_cst); | ||||
|       elements.push_back (zero_cst); | ||||
| 
 | ||||
|       expr = pack (elements); | ||||
|       expr = m_parent.m_cf->pack (elements); | ||||
|     } | ||||
|   else if (inst.base.opcode == BRIG_OPCODE_ACTIVELANEPERMUTE) | ||||
|     { | ||||
|  |  | |||
|  | @ -63,7 +63,7 @@ brig_mem_inst_handler::build_mem_access (const BrigInstBase *brig_inst, | |||
|     { | ||||
|       /* Add a temporary variable so there won't be multiple
 | ||||
| 	 reads in case of vector unpack.  */ | ||||
|       mem_ref = add_temp_var ("mem_read", mem_ref); | ||||
|       mem_ref = m_parent.m_cf->add_temp_var ("mem_read", mem_ref); | ||||
|       return build_output_assignment (*brig_inst, data, mem_ref); | ||||
|     } | ||||
|   else | ||||
|  | @ -95,8 +95,9 @@ brig_mem_inst_handler::operator () (const BrigBase *base) | |||
|       inputs.push_back (operands[1]); | ||||
|       inputs.push_back (align_opr); | ||||
|       tree builtin_call | ||||
| 	= expand_or_call_builtin (BRIG_OPCODE_ALLOCA, BRIG_TYPE_U32, | ||||
| 				  uint32_type_node, inputs); | ||||
| 	= m_parent.m_cf->expand_or_call_builtin (BRIG_OPCODE_ALLOCA, | ||||
| 						 BRIG_TYPE_U32, | ||||
| 						 uint32_type_node, inputs); | ||||
|       build_output_assignment (*brig_inst, operands[0], builtin_call); | ||||
|       m_parent.m_cf->m_has_allocas = true; | ||||
|       return base->byteCount; | ||||
|  |  | |||
|  | @ -58,13 +58,22 @@ typedef struct __attribute__((__packed__)) | |||
| 
 | ||||
| /* The prefix to use in the ELF section containing descriptor for
 | ||||
|    a function.  */ | ||||
| 
 | ||||
| #define PHSA_DESC_SECTION_PREFIX "phsa.desc." | ||||
| #define PHSA_HOST_DEF_PTR_PREFIX "__phsa.host_def." | ||||
| 
 | ||||
| /* The frontend error messages are parsed by the host runtime.  Known
 | ||||
|    prefix strings are used to separate the different runtime error | ||||
|    codes.  */ | ||||
| 
 | ||||
| #define PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE "Incompatible module: " | ||||
| #define PHSA_ERROR_PREFIX_CORRUPTED_MODULE "Corrupted module: " | ||||
| 
 | ||||
| /* Offsets of attributes in the PHSA context structs.
 | ||||
|    Used by -fphsa-wi-context-opt.  */ | ||||
| #define PHSA_CONTEXT_OFFS_WI_IDS 0 | ||||
| #define PHSA_CONTEXT_OFFS_WG_IDS (PHSA_CONTEXT_OFFS_WI_IDS + 3 * 4) | ||||
| #define PHSA_CONTEXT_WG_SIZES (PHSA_CONTEXT_OFFS_WG_IDS + 3 * 4) | ||||
| #define PHSA_CONTEXT_CURRENT_WG_SIZES (PHSA_CONTEXT_WG_SIZES + 3 * 4) | ||||
| 
 | ||||
| #endif | ||||
|  |  | |||
|  | @ -31,6 +31,11 @@ BRIG Separate Alias(d) | |||
| -dump= | ||||
| BRIG Joined Alias(d) | ||||
| 
 | ||||
| fassume-phsa | ||||
| BRIG Report Var(flag_assume_phsa) Init(1) Optimization | ||||
| Assume we are finalizing for phsa and its libhsail-rt.  Enables additional | ||||
| phsa-specific optimizations (default). | ||||
| 
 | ||||
| L | ||||
| BRIG Joined Separate | ||||
| ; Not documented | ||||
|  |  | |||
|  | @ -283,7 +283,9 @@ DEF_FUNCTION_TYPE_1 (BT_FN_UINT_INT, BT_UINT, BT_INT) | |||
| DEF_FUNCTION_TYPE_1 (BT_FN_UINT_ULONG, BT_UINT, BT_ULONG) | ||||
| DEF_FUNCTION_TYPE_1 (BT_FN_UINT_LONG, BT_UINT, BT_LONG) | ||||
| DEF_FUNCTION_TYPE_1 (BT_FN_UINT_PTR, BT_UINT, BT_PTR) | ||||
| DEF_FUNCTION_TYPE_1 (BT_FN_UINT_CONST_PTR, BT_UINT, BT_CONST_PTR) | ||||
| DEF_FUNCTION_TYPE_1 (BT_FN_ULONG_PTR, BT_ULONG, BT_PTR) | ||||
| DEF_FUNCTION_TYPE_1 (BT_FN_ULONG_CONST_PTR, BT_ULONG, BT_CONST_PTR) | ||||
| DEF_FUNCTION_TYPE_1 (BT_FN_ULONG_ULONG, BT_ULONG, BT_ULONG) | ||||
| DEF_FUNCTION_TYPE_1 (BT_FN_ULONGLONG_ULONGLONG, BT_ULONGLONG, BT_ULONGLONG) | ||||
| DEF_FUNCTION_TYPE_1 (BT_FN_INT8_FLOAT, BT_INT8, BT_FLOAT) | ||||
|  | @ -480,6 +482,7 @@ DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE, | |||
| DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_INT_BOOL, BT_BOOL, BT_INT, BT_BOOL) | ||||
| DEF_FUNCTION_TYPE_2 (BT_FN_VOID_UINT_UINT, BT_VOID, BT_UINT, BT_UINT) | ||||
| DEF_FUNCTION_TYPE_2 (BT_FN_UINT_UINT_PTR, BT_UINT, BT_UINT, BT_PTR) | ||||
| DEF_FUNCTION_TYPE_2 (BT_FN_UINT_UINT_CONST_PTR, BT_UINT, BT_UINT, BT_CONST_PTR) | ||||
| DEF_FUNCTION_TYPE_2 (BT_FN_PTR_CONST_PTR_SIZE, BT_PTR, BT_CONST_PTR, BT_SIZE) | ||||
| DEF_FUNCTION_TYPE_2 (BT_FN_PTR_CONST_PTR_CONST_PTR, BT_PTR, BT_CONST_PTR, BT_CONST_PTR) | ||||
| DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTRPTR_CONST_PTR, BT_VOID, BT_PTR_PTR, BT_CONST_PTR) | ||||
|  | @ -569,6 +572,7 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_DOUBLE_DOUBLEPTR_DOUBLEPTR, | |||
| DEF_FUNCTION_TYPE_3 (BT_FN_VOID_LONGDOUBLE_LONGDOUBLEPTR_LONGDOUBLEPTR, | ||||
| 		     BT_VOID, BT_LONGDOUBLE, BT_LONGDOUBLE_PTR, BT_LONGDOUBLE_PTR) | ||||
| DEF_FUNCTION_TYPE_3 (BT_FN_VOID_PTR_PTR_PTR, BT_VOID, BT_PTR, BT_PTR, BT_PTR) | ||||
| DEF_FUNCTION_TYPE_3 (BT_FN_VOID_PTR_PTR_UINT32, BT_VOID, BT_PTR, BT_PTR, BT_UINT32) | ||||
| DEF_FUNCTION_TYPE_3 (BT_FN_INT_CONST_STRING_PTR_CONST_STRING_PTR_CONST_STRING, | ||||
| 		     BT_INT, BT_CONST_STRING, BT_PTR_CONST_STRING, BT_PTR_CONST_STRING) | ||||
| DEF_FUNCTION_TYPE_3 (BT_FN_INT_INT_CONST_STRING_VALIST_ARG, | ||||
|  |  | |||
|  | @ -1,4 +1,9 @@ | |||
| 2018-05-04 Carl Love  <cel@us.ibm.com> | ||||
| 2018-05-04  Pekka Jääskeläinen  <pekka.jaaskelainen@parmance.com> | ||||
| 
 | ||||
| 	* testsuite/brig.dg/test/gimple/smoke_test.hsail: Fix the test | ||||
| 	to match the currently produced gimple. | ||||
| 
 | ||||
| 2018-05-04  Carl Love  <cel@us.ibm.com> | ||||
| 	* gcc.target/powerpc/vsx-vector-6.h (foo): Add test for vec_max, | ||||
| 	vec_trunc. | ||||
| 	* gcc.target/powerpc/vsx-vector-6-le.c (dg-final): Update xvcmpeqdp, | ||||
|  |  | |||
|  | @ -41,15 +41,15 @@ prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) | |||
| }; | ||||
| 
 | ||||
| /* The kernel function itself should have a fingerprint as follows */ | ||||
| /* _Kernel (unsigned char * __args, void * __context, void * __group_base_addr, void * __private_base_addr) */ | ||||
| /* { dg-final { scan-tree-dump "_Kernel \\\(unsigned char \\\* __args, void \\\* __context, void \\\* __group_base_addr, unsigned int __group_local_offset, void \\\* __private_base_addr\\\)" "gimple"} } */ | ||||
| /* _Kernel (const unsigned char * restrict __args, void * restrict __context, unsigned char * restrict __group_base_addr, unsigned int __group_local_offset, unsigned char * restrict __private_base_addr) */ | ||||
| /* { dg-final { scan-tree-dump "_Kernel \\\(const unsigned char \\\* restrict __args, void \\\* restrict __context, unsigned char \\\* restrict __group_base_addr, unsigned int __group_local_offset, unsigned char \\\* restrict __private_base_addr\\\)" "gimple"} } */ | ||||
| 
 | ||||
| /* ld_kernarg: mem_read.0 = MEM[(unsigned long *)__args]; */ | ||||
| /* { dg-final { scan-tree-dump "mem_read.\[0-9\] = MEM\\\[\\\(unsigned long \\\*\\\)__args\\\];" "gimple"} } */ | ||||
| 
 | ||||
| /* The latter ld_global_u32 should be visible as a pointer dereference (after pointer arithmetics on a temporary var): */ | ||||
| /* mem_read.2 = *D.1691; */ | ||||
| /* { dg-final { scan-tree-dump "mem_read.\[0-9\] = \\\*\[_0-9\]+;" "gimple"} } */ | ||||
| /* { dg-final { scan-tree-dump "mem_read.\[0-9\]+ = \\\*\[_0-9\]+;" "gimple"} } */ | ||||
| 
 | ||||
| /* add_u32s should generate +operators */ | ||||
| /* { dg-final { scan-tree-dump "s2 = s0 \\\+ s1;" "gimple"} } */ | ||||
|  | @ -71,8 +71,8 @@ prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) | |||
| /* { dg-final { scan-tree-dump "if \\\(__local_z < __cur_wg_size_z\\\) goto __wi_loop_z; else goto" "gimple"} } */ | ||||
| 
 | ||||
| /* The launcher should call __hsail_launch_wg_function in this case: */ | ||||
| /* Kernel (void * __context, void * __group_base_addr) */ | ||||
| /* { dg-final { scan-tree-dump "Kernel \\\(void \\\* __context, void \\\* __group_base_addr\\\)" "gimple"} } */ | ||||
| /* Kernel (void * restrict __context, unsigned char * restrict __group_base_addr) */ | ||||
| /* { dg-final { scan-tree-dump "Kernel \\\(void \\\* restrict __context, unsigned char \\\* restrict __group_base_addr\\\)" "gimple"} } */ | ||||
| /* { dg-final { scan-tree-dump "__hsail_launch_wg_function \\\(_Kernel, __context, __group_base_addr, group_local_offset.*\\\);" "gimple"} }*/ | ||||
| 
 | ||||
| /* The kernel should have the magic metadata section injected to the ELF. */ | ||||
|  |  | |||
|  | @ -1,3 +1,10 @@ | |||
| 2018-05-04  Pekka Jääskeläinen  <pekka.jaaskelainen@parmance.com> | ||||
| 
 | ||||
| 	* include/internal/phsa-rt.h: Whitespace cleanup. | ||||
| 	* include/internal/workitems.h: Store work item ID data to easily | ||||
| 	accessible locations. | ||||
| 	* rt/workitems.c: Same. | ||||
| 
 | ||||
| 2018-05-04  Pekka Jääskeläinen  <pekka.jaaskelainen@parmance.com> | ||||
| 
 | ||||
| 	* rt/workitems.c: Fix an alloca stack underflow. | ||||
|  |  | |||
|  | @ -54,7 +54,6 @@ typedef void (*gccbrigKernelFunc) (unsigned char *, void *, void *, uint32_t, | |||
| */ | ||||
| typedef struct | ||||
| { | ||||
| 
 | ||||
|   /* Data set by the HSA Runtime's kernel launcher.  */ | ||||
|   hsa_kernel_dispatch_packet_t *dp; | ||||
| 
 | ||||
|  |  | |||
|  | @ -45,11 +45,6 @@ | |||
| 
 | ||||
| typedef struct | ||||
| { | ||||
|   /* The group id of the currently executed WG.  */ | ||||
|   size_t x; | ||||
|   size_t y; | ||||
|   size_t z; | ||||
| 
 | ||||
|   /* This is 1 in case there are more work groups to execute.
 | ||||
|      If 0, the work-item threads should finish themselves.  */ | ||||
|   int more_wgs; | ||||
|  | @ -89,6 +84,16 @@ typedef struct | |||
|      stack frame.  Initialized to point outside the private segment.  */ | ||||
|   uint32_t alloca_frame_p; | ||||
| 
 | ||||
|   /* The group id of the currently executed WG.  This is for fiber based
 | ||||
|      execution.  The group ids are duplicated also to the per WI context | ||||
|      struct for simplified single pointer access in the GCCBRIG produced | ||||
|      code. | ||||
|    */ | ||||
| 
 | ||||
|   uint32_t x; | ||||
|   uint32_t y; | ||||
|   uint32_t z; | ||||
| 
 | ||||
| } PHSAWorkGroup; | ||||
| 
 | ||||
| /* Data identifying a single work-item, passed to the work-item thread in case
 | ||||
|  | @ -96,17 +101,42 @@ typedef struct | |||
| 
 | ||||
| typedef struct | ||||
| { | ||||
|   /* NOTE: These members STARTing here should not be moved as they are
 | ||||
|      accessed directly by code emitted by BRIG FE.   */ | ||||
| 
 | ||||
|   /* The local id of the current WI. */ | ||||
| 
 | ||||
|   uint32_t x; | ||||
|   uint32_t y; | ||||
|   uint32_t z; | ||||
| 
 | ||||
|   /* The group id of the currently executed WG.  */ | ||||
| 
 | ||||
|   uint32_t group_x; | ||||
|   uint32_t group_y; | ||||
|   uint32_t group_z; | ||||
| 
 | ||||
|   /* The local size of a complete WG.  */ | ||||
| 
 | ||||
|   uint32_t wg_size_x; | ||||
|   uint32_t wg_size_y; | ||||
|   uint32_t wg_size_z; | ||||
| 
 | ||||
|   /* The local size of the current WG.  */ | ||||
| 
 | ||||
|   uint32_t cur_wg_size_x; | ||||
|   uint32_t cur_wg_size_y; | ||||
|   uint32_t cur_wg_size_z; | ||||
| 
 | ||||
|   /* NOTE: Fixed members END here.  */ | ||||
| 
 | ||||
|   PHSAKernelLaunchData *launch_data; | ||||
|   /* Identifies and keeps book of the currently executed WG of the WI swarm.  */ | ||||
|   volatile PHSAWorkGroup *wg; | ||||
|   /* The local id of the current WI.  */ | ||||
|   size_t x; | ||||
|   size_t y; | ||||
|   size_t z; | ||||
| #ifdef HAVE_FIBERS | ||||
|   fiber_t fiber; | ||||
| #endif | ||||
| } PHSAWorkItem; | ||||
| } __attribute__((packed)) PHSAWorkItem; | ||||
| 
 | ||||
| 
 | ||||
| #endif | ||||
|  |  | |||
|  | @ -107,11 +107,20 @@ phsa_work_item_thread (int arg0, int arg1) | |||
| 	 the current_work_group_* is set to point to the WG executed next.  */ | ||||
|       if (!wi->wg->more_wgs) | ||||
| 	break; | ||||
| 
 | ||||
|       wi->group_x = wg->x; | ||||
|       wi->group_y = wg->y; | ||||
|       wi->group_z = wg->z; | ||||
| 
 | ||||
|       wi->cur_wg_size_x = __hsail_currentworkgroupsize (0, wi); | ||||
|       wi->cur_wg_size_y = __hsail_currentworkgroupsize (1, wi); | ||||
|       wi->cur_wg_size_z = __hsail_currentworkgroupsize (2, wi); | ||||
| 
 | ||||
| #ifdef DEBUG_PHSA_RT | ||||
|       printf ( | ||||
| 	"Running work-item %lu/%lu/%lu for wg %lu/%lu/%lu / %lu/%lu/%lu...\n", | ||||
| 	wi->x, wi->y, wi->z, wg->x, wg->y, wg->z, l_data->wg_max_x, | ||||
| 	l_data->wg_max_y, l_data->wg_max_z); | ||||
| 	wi->x, wi->y, wi->z, wi->group_x, wi->group_y, wi->group_z, | ||||
| 	l_data->wg_max_x, l_data->wg_max_y, l_data->wg_max_z); | ||||
| #endif | ||||
| 
 | ||||
|       if (wi->x < __hsail_currentworkgroupsize (0, wi) | ||||
|  | @ -180,6 +189,13 @@ phsa_work_item_thread (int arg0, int arg1) | |||
| 	  else | ||||
| 	    wg->x++; | ||||
| #endif | ||||
| 	  wi->group_x = wg->x; | ||||
| 	  wi->group_y = wg->y; | ||||
| 	  wi->group_z = wg->z; | ||||
| 
 | ||||
| 	  wi->cur_wg_size_x = __hsail_currentworkgroupsize (0, wi); | ||||
| 	  wi->cur_wg_size_y = __hsail_currentworkgroupsize (1, wi); | ||||
| 	  wi->cur_wg_size_z = __hsail_currentworkgroupsize (2, wi); | ||||
| 
 | ||||
| 	  /* Reinitialize the work-group barrier according to the new WG's
 | ||||
| 	     size, which might not be the same as the previous ones, due | ||||
|  | @ -233,6 +249,7 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr, | |||
|   PHSAWorkItem *wi_threads = NULL; | ||||
|   PHSAWorkGroup wg; | ||||
|   size_t flat_wi_id = 0, x, y, z, max_x, max_y, max_z; | ||||
|   uint32_t group_x, group_y, group_z; | ||||
|   fiber_barrier_t wg_start_barrier; | ||||
|   fiber_barrier_t wg_completion_barrier; | ||||
|   fiber_barrier_t wg_sync_barrier; | ||||
|  | @ -257,13 +274,13 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr, | |||
|   wg.initial_group_offset = group_local_offset; | ||||
| 
 | ||||
| #ifdef EXECUTE_WGS_BACKWARDS | ||||
|   wg.x = context->wg_max_x - 1; | ||||
|   wg.y = context->wg_max_y - 1; | ||||
|   wg.z = context->wg_max_z - 1; | ||||
|   group_x = context->wg_max_x - 1; | ||||
|   group_y = context->wg_max_y - 1; | ||||
|   group_z = context->wg_max_z - 1; | ||||
| #else | ||||
|   wg.x = context->wg_min_x; | ||||
|   wg.y = context->wg_min_y; | ||||
|   wg.z = context->wg_min_z; | ||||
|   group_x = context->wg_min_x; | ||||
|   group_y = context->wg_min_y; | ||||
|   group_z = context->wg_min_z; | ||||
| #endif | ||||
| 
 | ||||
|   fiber_barrier_init (&wg_sync_barrier, wg_size); | ||||
|  | @ -290,6 +307,19 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr, | |||
| 	  PHSAWorkItem *wi = &wi_threads[flat_wi_id]; | ||||
| 	  wi->launch_data = context; | ||||
| 	  wi->wg = &wg; | ||||
| 
 | ||||
| 	  wg.x = wi->group_x = group_x; | ||||
| 	  wg.y = wi->group_y = group_y; | ||||
| 	  wg.z = wi->group_z = group_z; | ||||
| 
 | ||||
| 	  wi->wg_size_x = context->dp->workgroup_size_x; | ||||
| 	  wi->wg_size_y = context->dp->workgroup_size_y; | ||||
| 	  wi->wg_size_z = context->dp->workgroup_size_z; | ||||
| 
 | ||||
| 	  wi->cur_wg_size_x = __hsail_currentworkgroupsize (0, wi); | ||||
| 	  wi->cur_wg_size_y = __hsail_currentworkgroupsize (1, wi); | ||||
| 	  wi->cur_wg_size_z = __hsail_currentworkgroupsize (2, wi); | ||||
| 
 | ||||
| 	  wi->x = x; | ||||
| 	  wi->y = y; | ||||
| 	  wi->z = z; | ||||
|  | @ -467,9 +497,17 @@ phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr, | |||
|     for (wg_y = context->wg_min_y; wg_y < context->wg_max_y; ++wg_y) | ||||
|       for (wg_x = context->wg_min_x; wg_x < context->wg_max_x; ++wg_x) | ||||
| 	{ | ||||
| 	  wi.wg->x = wg_x; | ||||
| 	  wi.wg->y = wg_y; | ||||
| 	  wi.wg->z = wg_z; | ||||
| 	  wi.group_x = wg_x; | ||||
| 	  wi.group_y = wg_y; | ||||
| 	  wi.group_z = wg_z; | ||||
| 
 | ||||
| 	  wi.wg_size_x = context->dp->workgroup_size_x; | ||||
| 	  wi.wg_size_y = context->dp->workgroup_size_y; | ||||
| 	  wi.wg_size_z = context->dp->workgroup_size_z; | ||||
| 
 | ||||
| 	  wi.cur_wg_size_x = __hsail_currentworkgroupsize (0, &wi); | ||||
| 	  wi.cur_wg_size_y = __hsail_currentworkgroupsize (1, &wi); | ||||
| 	  wi.cur_wg_size_z = __hsail_currentworkgroupsize (2, &wi); | ||||
| 
 | ||||
| 	  context->kernel (context->kernarg_addr, &wi, group_base_ptr, | ||||
| 			   group_local_offset, private_base_ptr); | ||||
|  | @ -564,15 +602,15 @@ __hsail_workitemabsid (uint32_t dim, PHSAWorkItem *context) | |||
|     default: | ||||
|     case 0: | ||||
|       /* Overflow semantics in the case of WG dim > grid dim.  */ | ||||
|       id = ((uint64_t) context->wg->x * dp->workgroup_size_x + context->x) | ||||
|       id = ((uint64_t) context->group_x * dp->workgroup_size_x + context->x) | ||||
| 	   % dp->grid_size_x; | ||||
|       break; | ||||
|     case 1: | ||||
|       id = ((uint64_t) context->wg->y * dp->workgroup_size_y + context->y) | ||||
|       id = ((uint64_t) context->group_y * dp->workgroup_size_y + context->y) | ||||
| 	   % dp->grid_size_y; | ||||
|       break; | ||||
|     case 2: | ||||
|       id = ((uint64_t) context->wg->z * dp->workgroup_size_z + context->z) | ||||
|       id = ((uint64_t) context->group_z * dp->workgroup_size_z + context->z) | ||||
| 	   % dp->grid_size_z; | ||||
|       break; | ||||
|     } | ||||
|  | @ -590,15 +628,15 @@ __hsail_workitemabsid_u64 (uint32_t dim, PHSAWorkItem *context) | |||
|     default: | ||||
|     case 0: | ||||
|       /* Overflow semantics in the case of WG dim > grid dim.  */ | ||||
|       id = ((uint64_t) context->wg->x * dp->workgroup_size_x + context->x) | ||||
|       id = ((uint64_t) context->group_x * dp->workgroup_size_x + context->x) | ||||
| 	   % dp->grid_size_x; | ||||
|       break; | ||||
|     case 1: | ||||
|       id = ((uint64_t) context->wg->y * dp->workgroup_size_y + context->y) | ||||
|       id = ((uint64_t) context->group_y * dp->workgroup_size_y + context->y) | ||||
| 	   % dp->grid_size_y; | ||||
|       break; | ||||
|     case 2: | ||||
|       id = ((uint64_t) context->wg->z * dp->workgroup_size_z + context->z) | ||||
|       id = ((uint64_t) context->group_z * dp->workgroup_size_z + context->z) | ||||
| 	   % dp->grid_size_z; | ||||
|       break; | ||||
|     } | ||||
|  | @ -738,19 +776,19 @@ __hsail_currentworkgroupsize (uint32_t dim, PHSAWorkItem *wi) | |||
|     { | ||||
|     default: | ||||
|     case 0: | ||||
|       if ((uint64_t) wi->wg->x < dp->grid_size_x / dp->workgroup_size_x) | ||||
|       if ((uint64_t) wi->group_x < dp->grid_size_x / dp->workgroup_size_x) | ||||
| 	wg_size = dp->workgroup_size_x; /* Full WG.  */ | ||||
|       else | ||||
| 	wg_size = dp->grid_size_x % dp->workgroup_size_x; /* Partial WG.  */ | ||||
|       break; | ||||
|     case 1: | ||||
|       if ((uint64_t) wi->wg->y < dp->grid_size_y / dp->workgroup_size_y) | ||||
|       if ((uint64_t) wi->group_y < dp->grid_size_y / dp->workgroup_size_y) | ||||
| 	wg_size = dp->workgroup_size_y; /* Full WG.  */ | ||||
|       else | ||||
| 	wg_size = dp->grid_size_y % dp->workgroup_size_y; /* Partial WG.  */ | ||||
|       break; | ||||
|     case 2: | ||||
|       if ((uint64_t) wi->wg->z < dp->grid_size_z / dp->workgroup_size_z) | ||||
|       if ((uint64_t) wi->group_z < dp->grid_size_z / dp->workgroup_size_z) | ||||
| 	wg_size = dp->workgroup_size_z; /* Full WG.  */ | ||||
|       else | ||||
| 	wg_size = dp->grid_size_z % dp->workgroup_size_z; /* Partial WG.  */ | ||||
|  | @ -798,11 +836,11 @@ __hsail_workgroupid (uint32_t dim, PHSAWorkItem *wi) | |||
|     { | ||||
|     default: | ||||
|     case 0: | ||||
|       return wi->wg->x; | ||||
|       return wi->group_x; | ||||
|     case 1: | ||||
|       return wi->wg->y; | ||||
|       return wi->group_y; | ||||
|     case 2: | ||||
|       return wi->wg->z; | ||||
|       return wi->group_z; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 visit0r
						visit0r