mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			1414 lines
		
	
	
		
			45 KiB
		
	
	
	
		
			C
		
	
	
	
			
		
		
	
	
			1414 lines
		
	
	
		
			45 KiB
		
	
	
	
		
			C
		
	
	
	
| /* Lowering and expansion of OpenMP directives for HSA GPU agents.
 | |
| 
 | |
|    Copyright (C) 2013-2019 Free Software Foundation, Inc.
 | |
| 
 | |
| This file is part of GCC.
 | |
| 
 | |
| GCC is free software; you can redistribute it and/or modify it under
 | |
| the terms of the GNU General Public License as published by the Free
 | |
| Software Foundation; either version 3, or (at your option) any later
 | |
| version.
 | |
| 
 | |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 | |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or
 | |
| FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 | |
| for more details.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License
 | |
| along with GCC; see the file COPYING3.  If not see
 | |
| <http://www.gnu.org/licenses/>.  */
 | |
| 
 | |
| #include "config.h"
 | |
| #include "system.h"
 | |
| #include "coretypes.h"
 | |
| #include "backend.h"
 | |
| #include "tree.h"
 | |
| #include "gimple.h"
 | |
| #include "tree-pass.h"
 | |
| #include "ssa.h"
 | |
| #include "cgraph.h"
 | |
| #include "pretty-print.h"
 | |
| #include "fold-const.h"
 | |
| #include "gimplify.h"
 | |
| #include "gimple-iterator.h"
 | |
| #include "gimple-walk.h"
 | |
| #include "tree-inline.h"
 | |
| #include "langhooks.h"
 | |
| #include "omp-general.h"
 | |
| #include "omp-low.h"
 | |
| #include "omp-grid.h"
 | |
| #include "gimple-pretty-print.h"
 | |
| 
 | |
| /* Return the lastprivate predicate for a given gridified loop described by
 | |
|    FD).  */
 | |
| 
 | |
| tree
 | |
| omp_grid_lastprivate_predicate (struct omp_for_data *fd)
 | |
| {
 | |
|   /* When dealing with a gridified loop, we need to check up to three collapsed
 | |
|      iteration variables but they are not actually captured in this fd.
 | |
|      Fortunately, we can easily rely on HSA builtins to get this
 | |
|      information.  */
 | |
| 
 | |
|   tree id, size;
 | |
|   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
 | |
|       && gimple_omp_for_grid_intra_group (fd->for_stmt))
 | |
|     {
 | |
|       id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
 | |
|       size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
 | |
|     }
 | |
|   else
 | |
|     {
 | |
|       id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
 | |
|       size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
 | |
|     }
 | |
|   tree cond = NULL;
 | |
|   for (int dim = 0; dim < fd->collapse; dim++)
 | |
|     {
 | |
|       tree dim_tree = build_int_cstu (unsigned_type_node, dim);
 | |
|       tree u1 = build_int_cstu (unsigned_type_node, 1);
 | |
|       tree c2
 | |
| 	= build2 (EQ_EXPR, boolean_type_node,
 | |
| 		  build2 (PLUS_EXPR, unsigned_type_node,
 | |
| 			  build_call_expr (id, 1, dim_tree), u1),
 | |
| 		  build_call_expr (size, 1, dim_tree));
 | |
|       if (cond)
 | |
| 	cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
 | |
|       else
 | |
| 	cond = c2;
 | |
|     }
 | |
|   return cond;
 | |
| }
 | |
| 
 | |
| /* Structure describing the basic properties of the loop we ara analyzing
 | |
|    whether it can be gridified and when it is gridified.  */
 | |
| 
 | |
| struct grid_prop
 | |
| {
 | |
|   /* True when we are doing tiling gridification, i.e. when there is a distinct
 | |
|      distribute loop over groups and a loop construct over work-items.  False
 | |
|      when distribute and parallel for loops form a combined construct.  */
 | |
|   bool tiling;
 | |
|   /* Location of the target construct for optimization information
 | |
|      messages.  */
 | |
|   dump_user_location_t target_loc;
 | |
|   /* The collapse clause of the involved loops.  Collapse value of all of them
 | |
|      must be the same for gridification to take place.  */
 | |
|   size_t collapse;
 | |
|   /* Group sizes, if requested by the user or NULL if not requested.  */
 | |
|   tree group_sizes[3];
 | |
| };
 | |
| 
 | |
| #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
 | |
|   "gridified HSA kernel because "
 | |
| 
 | |
| /* Return true if STMT is an assignment of a register-type into a local
 | |
|    VAR_DECL.  If GRID is non-NULL, the assignment additionally must not be to
 | |
|    any of the trees specifying group sizes there.  */
 | |
| 
 | |
| static bool
 | |
| grid_safe_assignment_p (gimple *stmt, grid_prop *grid)
 | |
| {
 | |
|   gassign *assign = dyn_cast <gassign *> (stmt);
 | |
|   if (!assign)
 | |
|     return false;
 | |
|   if (gimple_clobber_p (assign))
 | |
|     return true;
 | |
|   tree lhs = gimple_assign_lhs (assign);
 | |
|   if (!VAR_P (lhs)
 | |
|       || !is_gimple_reg_type (TREE_TYPE (lhs))
 | |
|       || is_global_var (lhs))
 | |
|     return false;
 | |
|   if (grid)
 | |
|     for (unsigned i = 0; i < grid->collapse; i++)
 | |
|       if (lhs == grid->group_sizes[i])
 | |
| 	return false;
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| /* Return true if all statements in SEQ are assignments to local register-type
 | |
|    variables that do not hold group size information.  */
 | |
| 
 | |
| static bool
 | |
| grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
 | |
| {
 | |
|   if (!seq)
 | |
|     return true;
 | |
| 
 | |
|   gimple_stmt_iterator gsi;
 | |
|   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
 | |
|     if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
 | |
|       return false;
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| /* Scan statements in SEQ and call itself recursively on any bind.  GRID
 | |
|    describes hitherto discovered properties of the loop that is evaluated for
 | |
|    possible gridification.  If during whole search only assignments to
 | |
|    register-type local variables (that do not overwrite group size information)
 | |
|    and one single OMP statement is encountered, return true, otherwise return
 | |
|    false.  RET is where we store any OMP statement encountered.  */
 | |
| 
 | |
| static bool
 | |
| grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
 | |
| 					  const char *name, gimple **ret)
 | |
| {
 | |
|   gimple_stmt_iterator gsi;
 | |
|   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
 | |
|     {
 | |
|       gimple *stmt = gsi_stmt (gsi);
 | |
| 
 | |
|       if (grid_safe_assignment_p (stmt, grid))
 | |
| 	continue;
 | |
|       if (gbind *bind = dyn_cast <gbind *> (stmt))
 | |
| 	{
 | |
| 	  gimple_seq bind_body = gimple_bind_body (bind);
 | |
| 	  if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name,
 | |
| 							 ret))
 | |
| 	      return false;
 | |
| 	}
 | |
|       else if (is_gimple_omp (stmt))
 | |
| 	{
 | |
| 	  if (*ret)
 | |
| 	    {
 | |
| 	      if (dump_enabled_p ())
 | |
| 		{
 | |
| 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 				   GRID_MISSED_MSG_PREFIX "%s construct "
 | |
| 				   "contains multiple OpenMP constructs\n",
 | |
| 				   name);
 | |
| 		  dump_printf_loc (MSG_NOTE, *ret,
 | |
| 				   "The first OpenMP construct within "
 | |
| 				   "a parallel\n");
 | |
| 		  dump_printf_loc (MSG_NOTE, stmt,
 | |
| 				   "The second OpenMP construct within "
 | |
| 				   "a parallel\n");
 | |
| 		}
 | |
| 	      return false;
 | |
| 	    }
 | |
| 	  *ret = stmt;
 | |
| 	}
 | |
|       else
 | |
| 	{
 | |
| 	  if (dump_enabled_p ())
 | |
| 	    {
 | |
| 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 			       GRID_MISSED_MSG_PREFIX "%s construct contains "
 | |
| 			       "a complex statement\n", name);
 | |
| 	      dump_printf_loc (MSG_NOTE, stmt,
 | |
| 			       "This statement cannot be analyzed for "
 | |
| 			       "gridification\n");
 | |
| 	    }
 | |
| 	  return false;
 | |
| 	}
 | |
|     }
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| /* Scan statements in SEQ and make sure that it and any binds in it contain
 | |
|    only assignments to local register-type variables (that do not overwrite
 | |
|    group size information) and one OMP construct.  If so, return that
 | |
|    construct, otherwise return NULL.  GRID describes hitherto discovered
 | |
|    properties of the loop that is evaluated for possible gridification.  If
 | |
|    dumping is enabled and function fails, use NAME to dump a note with the
 | |
|    reason for failure.  */
 | |
| 
 | |
| static gimple *
 | |
| grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
 | |
| 					const char *name)
 | |
| {
 | |
|   if (!seq)
 | |
|     {
 | |
|       if (dump_enabled_p ())
 | |
| 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 			 GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
 | |
| 			 name);
 | |
|       return NULL;
 | |
|     }
 | |
| 
 | |
|   gimple *ret = NULL;
 | |
|   if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
 | |
|     {
 | |
|       if (!ret && dump_enabled_p ())
 | |
| 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 			 GRID_MISSED_MSG_PREFIX "%s construct does not contain"
 | |
| 			 " any other OpenMP construct\n", name);
 | |
|       return ret;
 | |
|     }
 | |
|   else
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| /* Walker function looking for statements there is no point gridifying (and for
 | |
|    noreturn function calls which we cannot do).  Return non-NULL if such a
 | |
|    function is found.  */
 | |
| 
 | |
| static tree
 | |
| grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
 | |
| 				   bool *handled_ops_p,
 | |
| 				   struct walk_stmt_info *wi)
 | |
| {
 | |
|   *handled_ops_p = false;
 | |
|   gimple *stmt = gsi_stmt (*gsi);
 | |
|   switch (gimple_code (stmt))
 | |
|     {
 | |
|     case GIMPLE_CALL:
 | |
|       if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
 | |
| 	{
 | |
| 	  *handled_ops_p = true;
 | |
| 	  wi->info = stmt;
 | |
| 	  return error_mark_node;
 | |
| 	}
 | |
|       break;
 | |
| 
 | |
|     /* We may reduce the following list if we find a way to implement the
 | |
|        clauses, but now there is no point trying further.  */
 | |
|     case GIMPLE_OMP_CRITICAL:
 | |
|     case GIMPLE_OMP_TASKGROUP:
 | |
|     case GIMPLE_OMP_TASK:
 | |
|     case GIMPLE_OMP_SECTION:
 | |
|     case GIMPLE_OMP_SECTIONS:
 | |
|     case GIMPLE_OMP_SECTIONS_SWITCH:
 | |
|     case GIMPLE_OMP_TARGET:
 | |
|     case GIMPLE_OMP_ORDERED:
 | |
|       *handled_ops_p = true;
 | |
|       wi->info = stmt;
 | |
|       return error_mark_node;
 | |
|     default:
 | |
|       break;
 | |
|     }
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| /* Examine clauses of omp parallel statement PAR and if any prevents
 | |
|    gridification, issue a missed-optimization diagnostics and return false,
 | |
|    otherwise return true.  GRID describes hitherto discovered properties of the
 | |
|    loop that is evaluated for possible gridification.  */
 | |
| 
 | |
| static bool
 | |
| grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc)
 | |
| {
 | |
|   tree clauses = gimple_omp_parallel_clauses (par);
 | |
|   while (clauses)
 | |
|     {
 | |
|       switch (OMP_CLAUSE_CODE (clauses))
 | |
| 	{
 | |
| 	case OMP_CLAUSE_NUM_THREADS:
 | |
| 	  if (dump_enabled_p ())
 | |
| 	    {
 | |
| 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 | |
| 			       GRID_MISSED_MSG_PREFIX "because there is "
 | |
| 			       "a num_threads clause of the parallel "
 | |
| 			       "construct\n");
 | |
| 	      dump_printf_loc (MSG_NOTE, par,
 | |
| 			       "Parallel construct has a num_threads clause\n");
 | |
| 	    }
 | |
| 	  return false;
 | |
| 
 | |
| 	case OMP_CLAUSE_REDUCTION:
 | |
| 	  if (dump_enabled_p ())
 | |
| 	    {
 | |
| 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 | |
| 			       GRID_MISSED_MSG_PREFIX "a reduction clause "
 | |
| 			       "is present\n ");
 | |
| 	      dump_printf_loc (MSG_NOTE, par,
 | |
| 			       "Parallel construct has a reduction clause\n");
 | |
| 	    }
 | |
| 	  return false;
 | |
| 
 | |
| 	default:
 | |
| 	  break;
 | |
| 	}
 | |
|       clauses = OMP_CLAUSE_CHAIN (clauses);
 | |
|     }
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| /* Examine clauses and the body of omp loop statement GFOR and if something
 | |
|    prevents gridification, issue a missed-optimization diagnostics and return
 | |
|    false, otherwise return true.  GRID describes hitherto discovered properties
 | |
|    of the loop that is evaluated for possible gridification.  */
 | |
| 
 | |
| static bool
 | |
| grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
 | |
| {
 | |
|   if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
 | |
| 						 grid))
 | |
|     {
 | |
|       if (dump_enabled_p ())
 | |
| 	{
 | |
| 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 			   GRID_MISSED_MSG_PREFIX "the inner loop "
 | |
| 			   "loop bounds computation contains a complex "
 | |
| 			   "statement\n");
 | |
| 	  dump_printf_loc (MSG_NOTE, gfor,
 | |
| 			   "Loop construct cannot be analyzed for "
 | |
| 			   "gridification\n");
 | |
| 	}
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|   tree clauses = gimple_omp_for_clauses (gfor);
 | |
|   while (clauses)
 | |
|     {
 | |
|       switch (OMP_CLAUSE_CODE (clauses))
 | |
| 	{
 | |
| 	case OMP_CLAUSE_SCHEDULE:
 | |
| 	  if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
 | |
| 	    {
 | |
| 	      if (dump_enabled_p ())
 | |
| 		{
 | |
| 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 				   GRID_MISSED_MSG_PREFIX "the inner loop "
 | |
| 				   "has a non-automatic schedule clause\n");
 | |
| 		  dump_printf_loc (MSG_NOTE, gfor,
 | |
| 				   "Loop construct has a non automatic "
 | |
| 				   "schedule clause\n");
 | |
| 		}
 | |
| 	      return false;
 | |
| 	    }
 | |
| 	  break;
 | |
| 
 | |
| 	case OMP_CLAUSE_REDUCTION:
 | |
| 	  if (dump_enabled_p ())
 | |
| 	    {
 | |
| 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 			       GRID_MISSED_MSG_PREFIX "a reduction "
 | |
| 			       "clause is present\n ");
 | |
| 	      dump_printf_loc (MSG_NOTE, gfor,
 | |
| 			       "Loop construct has a reduction schedule "
 | |
| 			       "clause\n");
 | |
| 	    }
 | |
| 	  return false;
 | |
| 
 | |
| 	default:
 | |
| 	  break;
 | |
| 	}
 | |
|       clauses = OMP_CLAUSE_CHAIN (clauses);
 | |
|     }
 | |
|   struct walk_stmt_info wi;
 | |
|   memset (&wi, 0, sizeof (wi));
 | |
|   if (walk_gimple_seq (gimple_omp_body (gfor),
 | |
| 		       grid_find_ungridifiable_statement,
 | |
| 		       NULL, &wi))
 | |
|     {
 | |
|       gimple *bad = (gimple *) wi.info;
 | |
|       if (dump_enabled_p ())
 | |
| 	{
 | |
| 	  if (is_gimple_call (bad))
 | |
| 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 			       GRID_MISSED_MSG_PREFIX "the inner loop contains "
 | |
| 			       "call to a noreturn function\n");
 | |
| 	  else
 | |
| 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 			     GRID_MISSED_MSG_PREFIX "the inner loop contains "
 | |
| 			     "statement %s which cannot be transformed\n",
 | |
| 			     gimple_code_name[(int) gimple_code (bad)]);
 | |
| 	  dump_printf_loc (MSG_NOTE, bad,
 | |
| 			   "This statement cannot be analyzed for "
 | |
| 			   "gridification\n");
 | |
| 	}
 | |
|       return false;
 | |
|     }
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| /* Given distribute omp construct represented by DIST, which in the original
 | |
|    source forms a compound construct with a looping construct, return true if it
 | |
|    can be turned into a gridified HSA kernel.  Otherwise return false.  GRID
 | |
|    describes hitherto discovered properties of the loop that is evaluated for
 | |
|    possible gridification.  */
 | |
| 
 | |
| static bool
 | |
| grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
 | |
| {
 | |
|   dump_user_location_t tloc = grid->target_loc;
 | |
|   gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
 | |
| 							 grid, "distribute");
 | |
|   gomp_parallel *par;
 | |
|   if (!stmt
 | |
|       || !(par = dyn_cast <gomp_parallel *> (stmt))
 | |
|       || !grid_parallel_clauses_gridifiable (par, tloc))
 | |
|     return false;
 | |
| 
 | |
|   stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
 | |
| 						 "parallel");
 | |
|   gomp_for *gfor;
 | |
|   if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
 | |
|     return false;
 | |
| 
 | |
|   if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
 | |
|     {
 | |
|       if (dump_enabled_p ())
 | |
| 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 | |
| 			 GRID_MISSED_MSG_PREFIX "the inner loop is not "
 | |
| 			 "a simple for loop\n");
 | |
|       return false;
 | |
|     }
 | |
|   gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
 | |
| 
 | |
|   if (!grid_inner_loop_gridifiable_p (gfor, grid))
 | |
|     return false;
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| /* Given an omp loop statement GFOR, return true if it can participate in
 | |
|    tiling gridification, i.e. in one where the distribute and parallel for
 | |
|    loops do not form a compound statement.  GRID describes hitherto discovered
 | |
|    properties of the loop that is evaluated for possible gridification.  */
 | |
| 
 | |
| static bool
 | |
| grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
 | |
| {
 | |
|   if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
 | |
|     {
 | |
|       if (dump_enabled_p ())
 | |
| 	{
 | |
| 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 			   GRID_MISSED_MSG_PREFIX "an inner loop is not "
 | |
| 			   "a simple for loop\n");
 | |
| 	  dump_printf_loc (MSG_NOTE, gfor,
 | |
| 			   "This statement is not a simple for loop\n");
 | |
| 	}
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|   if (!grid_inner_loop_gridifiable_p (gfor, grid))
 | |
|     return false;
 | |
| 
 | |
|   if (gimple_omp_for_collapse (gfor) != grid->collapse)
 | |
|     {
 | |
|       if (dump_enabled_p ())
 | |
| 	{
 | |
| 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 			   GRID_MISSED_MSG_PREFIX "an inner loop does not "
 | |
| 			   "have use the same collapse clause\n");
 | |
| 	  dump_printf_loc (MSG_NOTE, gfor,
 | |
| 			   "Loop construct uses a different collapse clause\n");
 | |
| 	}
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|   struct omp_for_data fd;
 | |
|   struct omp_for_data_loop *loops
 | |
|     = (struct omp_for_data_loop *)alloca (grid->collapse
 | |
| 					  * sizeof (struct omp_for_data_loop));
 | |
|   omp_extract_for_data (gfor, &fd, loops);
 | |
|   for (unsigned i = 0; i < grid->collapse; i++)
 | |
|     {
 | |
|       tree itype, type = TREE_TYPE (fd.loops[i].v);
 | |
|       if (POINTER_TYPE_P (type))
 | |
| 	itype = signed_type_for (type);
 | |
|       else
 | |
| 	itype = type;
 | |
| 
 | |
|       tree n1 = fold_convert (itype, fd.loops[i].n1);
 | |
|       tree n2 = fold_convert (itype, fd.loops[i].n2);
 | |
|       tree t = build_int_cst (itype,
 | |
| 			      (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
 | |
|       t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
 | |
|       t = fold_build2 (PLUS_EXPR, itype, t, n2);
 | |
|       t = fold_build2 (MINUS_EXPR, itype, t, n1);
 | |
|       if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
 | |
| 	t = fold_build2 (TRUNC_DIV_EXPR, itype,
 | |
| 			 fold_build1 (NEGATE_EXPR, itype, t),
 | |
| 			 fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
 | |
|       else
 | |
| 	t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
 | |
| 
 | |
|       if (!operand_equal_p (grid->group_sizes[i], t, 0))
 | |
| 	{
 | |
| 	  if (dump_enabled_p ())
 | |
| 	    {
 | |
| 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 			       GRID_MISSED_MSG_PREFIX "the distribute and "
 | |
| 			       "an internal loop do not agree on tile size\n");
 | |
| 	      dump_printf_loc (MSG_NOTE, gfor,
 | |
| 			       "Loop construct does not seem to loop over "
 | |
| 			       "a tile size\n");
 | |
| 	    }
 | |
| 	  return false;
 | |
| 	}
 | |
|     }
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| /* Facing a call to FNDECL in the body of a distribute construct, return true
 | |
|    if we can handle it or false if it precludes gridification.  */
 | |
| 
 | |
| static bool
 | |
| grid_call_permissible_in_distribute_p (tree fndecl)
 | |
| {
 | |
|   if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
 | |
|     return true;
 | |
| 
 | |
|   const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
 | |
|   if (strstr (name, "omp_") != name)
 | |
|     return false;
 | |
| 
 | |
|   if ((strcmp (name, "omp_get_thread_num") == 0)
 | |
|       || (strcmp (name, "omp_get_num_threads") == 0)
 | |
|       || (strcmp (name, "omp_get_num_teams") == 0)
 | |
|       || (strcmp (name, "omp_get_team_num") == 0)
 | |
|       || (strcmp (name, "omp_get_level") == 0)
 | |
|       || (strcmp (name, "omp_get_active_level") == 0)
 | |
|       || (strcmp (name, "omp_in_parallel") == 0))
 | |
|     return true;
 | |
| 
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
 | |
|    of a distribute construct that is pointed at by GSI, modify it as necessary
 | |
|    for gridification.  If the statement itself got removed, return true.  */
 | |
| 
 | |
| static bool
 | |
| grid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
 | |
| {
 | |
|   gimple *stmt = gsi_stmt (*gsi);
 | |
|   tree fndecl = gimple_call_fndecl (stmt);
 | |
|   gcc_checking_assert (stmt);
 | |
|   if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
 | |
|     return false;
 | |
| 
 | |
|   const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
 | |
|   if ((strcmp (name, "omp_get_thread_num") == 0)
 | |
|       || (strcmp (name, "omp_get_level") == 0)
 | |
|       || (strcmp (name, "omp_get_active_level") == 0)
 | |
|       || (strcmp (name, "omp_in_parallel") == 0))
 | |
|     {
 | |
|       tree lhs = gimple_call_lhs (stmt);
 | |
|       if (lhs)
 | |
| 	{
 | |
| 	  gassign *assign
 | |
| 	    = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
 | |
| 	  gsi_insert_before (gsi, assign, GSI_SAME_STMT);
 | |
| 	}
 | |
|       gsi_remove (gsi, true);
 | |
|       return true;
 | |
|     }
 | |
| 
 | |
|   /* The rest of the omp functions can stay as they are, HSA back-end will
 | |
|      handle them correctly.  */
 | |
|   gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
 | |
| 		       || (strcmp (name, "omp_get_num_teams") == 0)
 | |
| 		       || (strcmp (name, "omp_get_team_num") == 0));
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| /* Given a sequence of statements within a distribute omp construct or a
 | |
|    parallel construct, which in the original source does not form a compound
 | |
|    construct with a looping construct, return true if it does not prevent us
 | |
|    from turning it into a gridified HSA kernel.  Otherwise return false.  GRID
 | |
|    describes hitherto discovered properties of the loop that is evaluated for
 | |
|    possible gridification.  IN_PARALLEL must be true if seq is within a
 | |
|    parallel construct and flase if it is only within a distribute
 | |
|    construct.  */
 | |
| 
 | |
| static bool
 | |
| grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
 | |
| 				  bool in_parallel)
 | |
| {
 | |
|   gimple_stmt_iterator gsi;
 | |
|   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
 | |
|     {
 | |
|       gimple *stmt = gsi_stmt (gsi);
 | |
| 
 | |
|       if (grid_safe_assignment_p (stmt, grid)
 | |
| 	  || gimple_code (stmt) == GIMPLE_GOTO
 | |
| 	  || gimple_code (stmt) == GIMPLE_LABEL
 | |
| 	  || gimple_code (stmt) == GIMPLE_COND)
 | |
| 	continue;
 | |
|       else if (gbind *bind = dyn_cast <gbind *> (stmt))
 | |
| 	{
 | |
| 	  if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
 | |
| 						 grid, in_parallel))
 | |
| 	    return false;
 | |
| 	  continue;
 | |
| 	}
 | |
|       else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
 | |
| 	{
 | |
| 	  if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
 | |
| 	    {
 | |
| 	      if (dump_enabled_p ())
 | |
| 		{
 | |
| 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 				   GRID_MISSED_MSG_PREFIX "the distribute "
 | |
| 				   "construct contains a try..catch region\n");
 | |
| 		  dump_printf_loc (MSG_NOTE, try_stmt,
 | |
| 				   "This statement cannot be analyzed for "
 | |
| 				   "tiled gridification\n");
 | |
| 		}
 | |
| 	      return false;
 | |
| 	    }
 | |
| 	  if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
 | |
| 						 grid, in_parallel))
 | |
| 	    return false;
 | |
| 	  if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
 | |
| 						 grid, in_parallel))
 | |
| 	    return false;
 | |
| 	  continue;
 | |
| 	}
 | |
|       else if (is_gimple_call (stmt))
 | |
| 	{
 | |
| 	  tree fndecl = gimple_call_fndecl (stmt);
 | |
| 	  if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
 | |
| 	    continue;
 | |
| 
 | |
| 	  if (dump_enabled_p ())
 | |
| 	    {
 | |
| 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 			       GRID_MISSED_MSG_PREFIX "the distribute "
 | |
| 			       "construct contains a call\n");
 | |
| 	      dump_printf_loc (MSG_NOTE, stmt,
 | |
| 			       "This statement cannot be analyzed for "
 | |
| 			       "tiled gridification\n");
 | |
| 	    }
 | |
| 	  return false;
 | |
| 	}
 | |
|       else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
 | |
| 	{
 | |
| 	  if (in_parallel)
 | |
| 	    {
 | |
| 	      if (dump_enabled_p ())
 | |
| 		{
 | |
| 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 				   GRID_MISSED_MSG_PREFIX "a parallel "
 | |
| 				   "construct contains another parallel "
 | |
| 				   "construct\n");
 | |
| 		  dump_printf_loc (MSG_NOTE, stmt,
 | |
| 				   "This parallel construct is nested in "
 | |
| 				   "another one\n");
 | |
| 		}
 | |
| 	      return false;
 | |
| 	    }
 | |
| 	  if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
 | |
| 	      || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
 | |
| 						    grid, true))
 | |
| 	    return false;
 | |
| 	}
 | |
|       else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
 | |
| 	{
 | |
| 	  if (!in_parallel)
 | |
| 	    {
 | |
| 	      if (dump_enabled_p ())
 | |
| 		{
 | |
| 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 				   GRID_MISSED_MSG_PREFIX "a loop "
 | |
| 				   "construct is not nested within a parallel "
 | |
| 				   "construct\n");
 | |
| 		  dump_printf_loc (MSG_NOTE, stmt,
 | |
| 				   "This loop construct is not nested in "
 | |
| 				   "a parallel construct\n");
 | |
| 		}
 | |
| 	      return false;
 | |
| 	    }
 | |
| 	  if (!grid_gfor_follows_tiling_pattern (gfor, grid))
 | |
| 	    return false;
 | |
| 	}
 | |
|       else
 | |
| 	{
 | |
| 	  if (dump_enabled_p ())
 | |
| 	    {
 | |
| 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 | |
| 			       GRID_MISSED_MSG_PREFIX "the distribute "
 | |
| 			       "construct contains a complex statement\n");
 | |
| 	      dump_printf_loc (MSG_NOTE, stmt,
 | |
| 			       "This statement cannot be analyzed for "
 | |
| 			       "tiled gridification\n");
 | |
| 	    }
 | |
| 	  return false;
 | |
| 	}
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
 | |
|    return true, otherwise return false.  In the case of success, also fill in
 | |
|    GRID with information describing the kernel grid.  */
 | |
| 
 | |
| static bool
 | |
| grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
 | |
| {
 | |
|   if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
 | |
|     return false;
 | |
| 
 | |
|   dump_user_location_t tloc = target;
 | |
|   grid->target_loc = tloc;
 | |
|   gimple *stmt
 | |
|     = grid_find_single_omp_among_assignments (gimple_omp_body (target),
 | |
| 					      grid, "target");
 | |
|   if (!stmt)
 | |
|     return false;
 | |
|   gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
 | |
|   tree group_size = NULL;
 | |
|   if (!teams)
 | |
|     {
 | |
|       dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 | |
| 		       GRID_MISSED_MSG_PREFIX "it does not have a sole teams "
 | |
| 		       "construct in it.\n");
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|   tree clauses = gimple_omp_teams_clauses (teams);
 | |
|   while (clauses)
 | |
|     {
 | |
|       switch (OMP_CLAUSE_CODE (clauses))
 | |
| 	{
 | |
| 	case OMP_CLAUSE_NUM_TEAMS:
 | |
| 	  if (dump_enabled_p ())
 | |
| 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 | |
| 			     GRID_MISSED_MSG_PREFIX "the teams construct "
 | |
| 			     "contains a num_teams clause\n ");
 | |
| 	  return false;
 | |
| 
 | |
| 	case OMP_CLAUSE_REDUCTION:
 | |
| 	  if (dump_enabled_p ())
 | |
| 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 | |
| 			     GRID_MISSED_MSG_PREFIX "a reduction "
 | |
| 			     "clause is present\n ");
 | |
| 	  return false;
 | |
| 
 | |
| 	case OMP_CLAUSE_THREAD_LIMIT:
 | |
| 	  if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
 | |
| 	    group_size = OMP_CLAUSE_OPERAND (clauses, 0);
 | |
| 	  break;
 | |
| 
 | |
| 	default:
 | |
| 	  break;
 | |
| 	}
 | |
|       clauses = OMP_CLAUSE_CHAIN (clauses);
 | |
|     }
 | |
| 
 | |
|   stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
 | |
| 						 "teams");
 | |
|   if (!stmt)
 | |
|     return false;
 | |
|   gomp_for *dist = dyn_cast <gomp_for *> (stmt);
 | |
|   if (!dist)
 | |
|     {
 | |
|       dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 | |
| 		       GRID_MISSED_MSG_PREFIX "the teams construct does not "
 | |
| 		       "have a single distribute construct in it.\n");
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|   gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
 | |
| 
 | |
|   grid->collapse = gimple_omp_for_collapse (dist);
 | |
|   if (grid->collapse > 3)
 | |
|     {
 | |
|       if (dump_enabled_p ())
 | |
| 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 | |
| 			 GRID_MISSED_MSG_PREFIX "the distribute construct "
 | |
| 			 "contains collapse clause with parameter greater "
 | |
| 			 "than 3\n");
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|   struct omp_for_data fd;
 | |
|   struct omp_for_data_loop *dist_loops
 | |
|     = (struct omp_for_data_loop *)alloca (grid->collapse
 | |
| 					  * sizeof (struct omp_for_data_loop));
 | |
|   omp_extract_for_data (dist, &fd, dist_loops);
 | |
|   if (fd.chunk_size)
 | |
|     {
 | |
|       if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
 | |
| 	{
 | |
| 	  if (dump_enabled_p ())
 | |
| 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 | |
| 			     GRID_MISSED_MSG_PREFIX "the teams "
 | |
| 			     "thread limit is different from distribute "
 | |
| 			     "schedule chunk\n");
 | |
| 	  return false;
 | |
| 	}
 | |
|       group_size = fd.chunk_size;
 | |
|     }
 | |
|   if (group_size && grid->collapse > 1)
 | |
|     {
 | |
|       if (dump_enabled_p ())
 | |
| 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 | |
| 			 GRID_MISSED_MSG_PREFIX "group size cannot be "
 | |
| 			 "set using thread_limit or schedule clauses "
 | |
| 			 "when also using a collapse clause greater than 1\n");
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|   if (gimple_omp_for_combined_p (dist))
 | |
|     {
 | |
|       grid->tiling = false;
 | |
|       grid->group_sizes[0] = group_size;
 | |
|       for (unsigned i = 1; i < grid->collapse; i++)
 | |
| 	grid->group_sizes[i] = NULL;
 | |
|       return grid_dist_follows_simple_pattern (dist, grid);
 | |
|     }
 | |
|   else
 | |
|     {
 | |
|       grid->tiling = true;
 | |
|       if (group_size)
 | |
| 	{
 | |
| 	  if (dump_enabled_p ())
 | |
| 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 | |
| 			     GRID_MISSED_MSG_PREFIX "group size cannot be set "
 | |
| 			     "using thread_limit or schedule clauses when "
 | |
| 			     "distribute and loop constructs do not form "
 | |
| 			     "one combined construct\n");
 | |
| 	  return false;
 | |
| 	}
 | |
|       for (unsigned i = 0; i < grid->collapse; i++)
 | |
| 	{
 | |
| 	  if (fd.loops[i].cond_code == GT_EXPR)
 | |
| 	    grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
 | |
| 						TREE_TYPE (fd.loops[i].step),
 | |
| 						fd.loops[i].step);
 | |
| 	  else
 | |
| 	    grid->group_sizes[i] = fd.loops[i].step;
 | |
| 	}
 | |
|       return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
 | |
| 					       false);
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* Operand walker, used to remap pre-body declarations according to a hash map
 | |
|    provided in DATA.  */
 | |
| 
 | |
| static tree
 | |
| grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
 | |
| {
 | |
|   tree t = *tp;
 | |
| 
 | |
|   if (DECL_P (t) || TYPE_P (t))
 | |
|     *walk_subtrees = 0;
 | |
|   else
 | |
|     *walk_subtrees = 1;
 | |
| 
 | |
|   if (VAR_P (t))
 | |
|     {
 | |
|       struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
 | |
|       hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
 | |
|       tree *repl = declmap->get (t);
 | |
|       if (repl)
 | |
| 	*tp = *repl;
 | |
|     }
 | |
|   return NULL_TREE;
 | |
| }
 | |
| 
 | |
| /* Identifiers of segments into which a particular variable should be places
 | |
|    when gridifying.  */
 | |
| 
 | |
| enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
 | |
| 		       GRID_SEGMENT_GLOBAL};
 | |
| 
 | |
| /* Mark VAR so that it is eventually placed into SEGMENT.  Place an artificial
 | |
|    builtin call into SEQ that will make sure the variable is always considered
 | |
|    address taken.  */
 | |
| 
 | |
| static void
 | |
| grid_mark_variable_segment (tree var, enum grid_var_segment segment)
 | |
| {
 | |
|   /* Making a non-addressable variables would require that we re-gimplify all
 | |
|      their uses.  Fortunately, we do not have to do this because if they are
 | |
|      not addressable, it means they are not used in atomic or parallel
 | |
|      statements and so relaxed GPU consistency rules mean we can just keep them
 | |
|      private.  */
 | |
|   if (!TREE_ADDRESSABLE (var))
 | |
|     return;
 | |
| 
 | |
|   switch (segment)
 | |
|     {
 | |
|     case GRID_SEGMENT_GROUP:
 | |
|       DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
 | |
| 					 NULL, DECL_ATTRIBUTES (var));
 | |
|       break;
 | |
|     case GRID_SEGMENT_GLOBAL:
 | |
|       DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
 | |
| 					 NULL, DECL_ATTRIBUTES (var));
 | |
|       break;
 | |
|     default:
 | |
|       gcc_unreachable ();
 | |
|     }
 | |
| 
 | |
|   if (!TREE_STATIC (var))
 | |
|     {
 | |
|       TREE_STATIC (var) = 1;
 | |
|       varpool_node::finalize_decl (var);
 | |
|     }
 | |
| 
 | |
| }
 | |
| 
 | |
| /* Copy leading register-type assignments to local variables in SRC to just
 | |
|    before DST, Creating temporaries, adjusting mapping of operands in WI and
 | |
|    remapping operands as necessary.  Add any new temporaries to TGT_BIND.
 | |
|    Return the first statement that does not conform to grid_safe_assignment_p
 | |
|    or NULL.  If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
 | |
|    variables in traversed bind statements so that they are put into the
 | |
|    appropriate segment.  */
 | |
| 
 | |
| static gimple *
 | |
| grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
 | |
| 				     gbind *tgt_bind,
 | |
| 				     enum grid_var_segment var_segment,
 | |
| 				     struct walk_stmt_info *wi)
 | |
| {
 | |
|   hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
 | |
|   gimple_stmt_iterator gsi;
 | |
|   for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
 | |
|     {
 | |
|       gimple *stmt = gsi_stmt (gsi);
 | |
|       if (gbind *bind = dyn_cast <gbind *> (stmt))
 | |
| 	{
 | |
| 	  gimple *r = grid_copy_leading_local_assignments
 | |
| 	    (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
 | |
| 
 | |
| 	  if (var_segment != GRID_SEGMENT_PRIVATE)
 | |
| 	    for (tree var = gimple_bind_vars (bind);
 | |
| 		 var;
 | |
| 		 var = DECL_CHAIN (var))
 | |
| 	      grid_mark_variable_segment (var, var_segment);
 | |
| 	  if (r)
 | |
| 	    return r;
 | |
| 	  else
 | |
| 	    continue;
 | |
| 	}
 | |
|       if (!grid_safe_assignment_p (stmt, NULL))
 | |
| 	return stmt;
 | |
|       tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
 | |
|       tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
 | |
| 				 TREE_TYPE (lhs));
 | |
|       DECL_CONTEXT (repl) = current_function_decl;
 | |
|       gimple_bind_append_vars (tgt_bind, repl);
 | |
| 
 | |
|       declmap->put (lhs, repl);
 | |
|       gassign *copy = as_a <gassign *> (gimple_copy (stmt));
 | |
|       walk_gimple_op (copy, grid_remap_prebody_decls, wi);
 | |
|       gsi_insert_before (dst, copy, GSI_SAME_STMT);
 | |
|     }
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| /* Statement walker function to make adjustments to statements within the
 | |
|    gridifed kernel copy.  */
 | |
| 
 | |
| static tree
 | |
| grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 | |
| 			struct walk_stmt_info *)
 | |
| {
 | |
|   *handled_ops_p = false;
 | |
|   gimple *stmt = gsi_stmt (*gsi);
 | |
|   if (gimple_code (stmt) == GIMPLE_OMP_FOR
 | |
|       && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD))
 | |
|   {
 | |
|     gomp_for *loop = as_a <gomp_for *> (stmt);
 | |
|     tree clauses = gimple_omp_for_clauses (loop);
 | |
|     tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN);
 | |
|     if (cl)
 | |
|       OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
 | |
|     else
 | |
|       {
 | |
| 	tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
 | |
| 	OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
 | |
| 	OMP_CLAUSE_CHAIN (c) = clauses;
 | |
| 	gimple_omp_for_set_clauses (loop, c);
 | |
|       }
 | |
|   }
 | |
|   return NULL_TREE;
 | |
| }
 | |
| 
 | |
| /* Given a PARLOOP that is a normal for looping construct but also a part of a
 | |
|    combined construct with a simd loop, eliminate the simd loop.  */
 | |
| 
 | |
| static void
 | |
| grid_eliminate_combined_simd_part (gomp_for *parloop)
 | |
| {
 | |
|   struct walk_stmt_info wi;
 | |
| 
 | |
|   memset (&wi, 0, sizeof (wi));
 | |
|   wi.val_only = true;
 | |
|   enum gf_mask msk = GF_OMP_FOR_SIMD;
 | |
|   wi.info = (void *) &msk;
 | |
|   walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi);
 | |
|   gimple *stmt = (gimple *) wi.info;
 | |
|   /* We expect that the SIMD id the only statement in the parallel loop.  */
 | |
|   gcc_assert (stmt
 | |
| 	      && gimple_code (stmt) == GIMPLE_OMP_FOR
 | |
| 	      && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD)
 | |
| 	      && gimple_omp_for_combined_into_p (stmt)
 | |
| 	      && !gimple_omp_for_combined_p (stmt));
 | |
|   gomp_for *simd = as_a <gomp_for *> (stmt);
 | |
| 
 | |
|   /* Copy over the iteration properties because the body refers to the index in
 | |
|      the bottmom-most loop.  */
 | |
|   unsigned i, collapse = gimple_omp_for_collapse (parloop);
 | |
|   gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
 | |
|   for (i = 0; i < collapse; i++)
 | |
|     {
 | |
|       gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
 | |
|       gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
 | |
|       gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
 | |
|       gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
 | |
|     }
 | |
| 
 | |
|   tree *tgt= gimple_omp_for_clauses_ptr (parloop);
 | |
|   while (*tgt)
 | |
|     tgt = &OMP_CLAUSE_CHAIN (*tgt);
 | |
| 
 | |
|   /* Copy over all clauses, except for linear clauses, which are turned into
 | |
|      private clauses, and all other simd-specific clauses, which are
 | |
|      ignored.  */
 | |
|   tree *pc = gimple_omp_for_clauses_ptr (simd);
 | |
|   while (*pc)
 | |
|     {
 | |
|       tree c = *pc;
 | |
|       switch (TREE_CODE (c))
 | |
| 	{
 | |
| 	case OMP_CLAUSE_LINEAR:
 | |
| 	  {
 | |
| 	    tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
 | |
| 	    OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
 | |
| 	    OMP_CLAUSE_CHAIN (priv) = NULL;
 | |
| 	    *tgt = priv;
 | |
| 	    tgt = &OMP_CLAUSE_CHAIN (priv);
 | |
| 	    pc = &OMP_CLAUSE_CHAIN (c);
 | |
| 	    break;
 | |
| 	  }
 | |
| 
 | |
| 	case OMP_CLAUSE_SAFELEN:
 | |
| 	case OMP_CLAUSE_SIMDLEN:
 | |
| 	case OMP_CLAUSE_ALIGNED:
 | |
| 	  pc = &OMP_CLAUSE_CHAIN (c);
 | |
| 	  break;
 | |
| 
 | |
| 	default:
 | |
| 	  *pc = OMP_CLAUSE_CHAIN (c);
 | |
| 	  OMP_CLAUSE_CHAIN (c) = NULL;
 | |
| 	  *tgt = c;
 | |
| 	  tgt = &OMP_CLAUSE_CHAIN (c);
 | |
| 	  break;
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|   /* Finally, throw away the simd and mark the parallel loop as not
 | |
|      combined.  */
 | |
|   gimple_omp_set_body (parloop, gimple_omp_body (simd));
 | |
|   gimple_omp_for_set_combined_p (parloop, false);
 | |
| }
 | |
| 
 | |
| /* Statement walker function marking all parallels as grid_phony and loops as
 | |
|    grid ones representing threads of a particular thread group.  */
 | |
| 
 | |
| static tree
 | |
| grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 | |
| 			struct walk_stmt_info *wi_in)
 | |
| {
 | |
|   *handled_ops_p = false;
 | |
|   if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
 | |
|     {
 | |
|       *handled_ops_p = true;
 | |
|       gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
 | |
|       gimple_omp_for_set_grid_intra_group (loop, true);
 | |
|       if (gimple_omp_for_combined_p (loop))
 | |
| 	grid_eliminate_combined_simd_part (loop);
 | |
| 
 | |
|       struct walk_stmt_info body_wi;
 | |
|       memset (&body_wi, 0, sizeof (body_wi));
 | |
|       walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
 | |
| 			   grid_process_grid_body, NULL, &body_wi);
 | |
| 
 | |
|       gbind *bind = (gbind *) wi_in->info;
 | |
|       tree c;
 | |
|       for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
 | |
| 	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
 | |
| 	  {
 | |
| 	    push_gimplify_context ();
 | |
| 	    tree ov = OMP_CLAUSE_DECL (c);
 | |
| 	    tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
 | |
| 				    TREE_TYPE (ov));
 | |
| 
 | |
| 	    grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
 | |
| 	    DECL_CONTEXT (gv) = current_function_decl;
 | |
| 	    gimple_bind_append_vars (bind, gv);
 | |
| 	    tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
 | |
| 	    gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
 | |
| 	    x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
 | |
| 	    gimple_seq l = NULL;
 | |
| 	    gimplify_and_add (x, &l);
 | |
| 	    gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
 | |
| 	    pop_gimplify_context (bind);
 | |
| 	  }
 | |
|     }
 | |
|   return NULL_TREE;
 | |
| }
 | |
| 
 | |
| /* Statement walker function marking all parallels as grid_phony and loops as
 | |
|    grid ones representing threads of a particular thread group.  */
 | |
| 
 | |
| static tree
 | |
| grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
 | |
| 				      bool *handled_ops_p,
 | |
| 				      struct walk_stmt_info *wi_in)
 | |
| {
 | |
|   *handled_ops_p = false;
 | |
|   wi_in->removed_stmt = false;
 | |
|   gimple *stmt = gsi_stmt (*gsi);
 | |
|   if (gbind *bind = dyn_cast <gbind *> (stmt))
 | |
|     {
 | |
|       for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
 | |
| 	grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
 | |
|     }
 | |
|   else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
 | |
|     {
 | |
|       *handled_ops_p = true;
 | |
|       gimple_omp_parallel_set_grid_phony (parallel, true);
 | |
| 
 | |
|       gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
 | |
|       gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
 | |
|       gimple_seq s = NULL;
 | |
|       gimple_seq_add_stmt (&s, new_bind);
 | |
|       gimple_omp_set_body (parallel, s);
 | |
| 
 | |
|       struct walk_stmt_info wi_par;
 | |
|       memset (&wi_par, 0, sizeof (wi_par));
 | |
|       wi_par.info = new_bind;
 | |
|       walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
 | |
| 			   grid_mark_tiling_loops, NULL, &wi_par);
 | |
|     }
 | |
|   else if (is_a <gcall *> (stmt))
 | |
|     wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
 | |
|   return NULL_TREE;
 | |
| }
 | |
| 
 | |
| /* Given freshly copied top level kernel SEQ, identify the individual OMP
 | |
|    components, mark them as part of kernel, copy assignment leading to them
 | |
|    just before DST, remapping them using WI and adding new temporaries to
 | |
|    TGT_BIND, and and return the loop that will be used for kernel dispatch.  */
 | |
| 
 | |
| static gomp_for *
 | |
| grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
 | |
| 			       gimple_stmt_iterator *dst,
 | |
| 			       gbind *tgt_bind, struct walk_stmt_info *wi)
 | |
| {
 | |
|   gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
 | |
| 						      GRID_SEGMENT_GLOBAL, wi);
 | |
|   gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
 | |
|   gcc_assert (teams);
 | |
|   gimple_omp_teams_set_grid_phony (teams, true);
 | |
|   stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
 | |
| 					      tgt_bind, GRID_SEGMENT_GLOBAL,
 | |
| 					      wi);
 | |
|   gcc_checking_assert (stmt);
 | |
|   gomp_for *dist = dyn_cast <gomp_for *> (stmt);
 | |
|   gcc_assert (dist);
 | |
|   gimple_seq prebody = gimple_omp_for_pre_body (dist);
 | |
|   if (prebody)
 | |
|     grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
 | |
| 					 GRID_SEGMENT_GROUP, wi);
 | |
| 
 | |
|   if (grid->tiling)
 | |
|     {
 | |
|       gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
 | |
|       gimple_omp_for_set_grid_group_iter (dist, true);
 | |
| 
 | |
|       struct walk_stmt_info wi_tiled;
 | |
|       memset (&wi_tiled, 0, sizeof (wi_tiled));
 | |
|       walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
 | |
| 			   grid_mark_tiling_parallels_and_loops, NULL,
 | |
| 			   &wi_tiled);
 | |
|       return dist;
 | |
|     }
 | |
|   else
 | |
|     {
 | |
|       gimple_omp_for_set_grid_phony (dist, true);
 | |
|       stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
 | |
| 						  tgt_bind,
 | |
| 						  GRID_SEGMENT_PRIVATE, wi);
 | |
|       gcc_checking_assert (stmt);
 | |
|       gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
 | |
|       gimple_omp_parallel_set_grid_phony (parallel, true);
 | |
|       stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
 | |
| 						  dst, tgt_bind,
 | |
| 						  GRID_SEGMENT_PRIVATE, wi);
 | |
|       gomp_for *inner_loop = as_a <gomp_for *> (stmt);
 | |
|       gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
 | |
|       prebody = gimple_omp_for_pre_body (inner_loop);
 | |
|       if (prebody)
 | |
| 	grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
 | |
| 					     GRID_SEGMENT_PRIVATE, wi);
 | |
| 
 | |
|       if (gimple_omp_for_combined_p (inner_loop))
 | |
| 	grid_eliminate_combined_simd_part (inner_loop);
 | |
|       struct walk_stmt_info body_wi;
 | |
|       memset (&body_wi, 0, sizeof (body_wi));
 | |
|       walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
 | |
| 			   grid_process_grid_body, NULL, &body_wi);
 | |
| 
 | |
|       return inner_loop;
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
 | |
|    create a GPU kernel for it.  GSI must point to the same statement, TGT_BIND
 | |
|    is the bind into which temporaries inserted before TARGET should be
 | |
|    added.  */
 | |
| 
 | |
| static void
 | |
| grid_attempt_target_gridification (gomp_target *target,
 | |
| 				   gimple_stmt_iterator *gsi,
 | |
| 				   gbind *tgt_bind)
 | |
| {
 | |
|   /* removed group_size */
 | |
|   grid_prop grid = {};
 | |
|   if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
 | |
|     return;
 | |
| 
 | |
|   location_t loc = gimple_location (target);
 | |
|   if (dump_enabled_p ())
 | |
|     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target,
 | |
| 		     "Target construct will be turned into a gridified HSA "
 | |
| 		     "kernel\n");
 | |
| 
 | |
|   /* Copy target body to a GPUKERNEL construct:  */
 | |
|   gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
 | |
|     (gimple_omp_body (target));
 | |
| 
 | |
|   hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
 | |
|   struct walk_stmt_info wi;
 | |
|   memset (&wi, 0, sizeof (struct walk_stmt_info));
 | |
|   wi.info = declmap;
 | |
| 
 | |
|   /* Copy assignments in between OMP statements before target, mark OMP
 | |
|      statements within copy appropriately.  */
 | |
|   gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
 | |
| 							tgt_bind, &wi);
 | |
| 
 | |
|   gbind *old_bind
 | |
|     = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
 | |
|   gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
 | |
|   tree new_block = gimple_bind_block (new_bind);
 | |
|   tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
 | |
|   BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
 | |
|   BLOCK_SUBBLOCKS (enc_block) = new_block;
 | |
|   BLOCK_SUPERCONTEXT (new_block) = enc_block;
 | |
|   gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
 | |
|   gimple_seq_add_stmt
 | |
|     (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
 | |
|      gpukernel);
 | |
| 
 | |
|   for (size_t i = 0; i < grid.collapse; i++)
 | |
|     walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
 | |
|   push_gimplify_context ();
 | |
|   for (size_t i = 0; i < grid.collapse; i++)
 | |
|     {
 | |
|       tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i));
 | |
|       if (POINTER_TYPE_P (type))
 | |
| 	itype = signed_type_for (type);
 | |
|       else
 | |
| 	itype = type;
 | |
| 
 | |
|       enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
 | |
|       tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
 | |
|       walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
 | |
|       tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
 | |
|       walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
 | |
|       omp_adjust_for_condition (loc, &cond_code, &n2);
 | |
|       n1 = fold_convert (itype, n1);
 | |
|       n2 = fold_convert (itype, n2);
 | |
| 
 | |
|       tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
 | |
|       tree step
 | |
| 	= omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
 | |
| 
 | |
|       tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
 | |
|       t = fold_build2 (PLUS_EXPR, itype, step, t);
 | |
|       t = fold_build2 (PLUS_EXPR, itype, t, n2);
 | |
|       t = fold_build2 (MINUS_EXPR, itype, t, n1);
 | |
|       if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
 | |
| 	t = fold_build2 (TRUNC_DIV_EXPR, itype,
 | |
| 			 fold_build1 (NEGATE_EXPR, itype, t),
 | |
| 			 fold_build1 (NEGATE_EXPR, itype, step));
 | |
|       else
 | |
| 	t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
 | |
|       t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
 | |
|       if (grid.tiling)
 | |
| 	{
 | |
| 	  if (cond_code == GT_EXPR)
 | |
| 	    step = fold_build1 (NEGATE_EXPR, itype, step);
 | |
| 	  t = fold_build2 (MULT_EXPR, itype, t, step);
 | |
| 	}
 | |
| 
 | |
|       tree gs = fold_convert (uint32_type_node, t);
 | |
|       gimple_seq tmpseq = NULL;
 | |
|       gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
 | |
|       if (!gimple_seq_empty_p (tmpseq))
 | |
| 	gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
 | |
| 
 | |
|       tree ws;
 | |
|       if (grid.group_sizes[i])
 | |
| 	{
 | |
| 	  ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
 | |
| 	  tmpseq = NULL;
 | |
| 	  gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
 | |
| 	  if (!gimple_seq_empty_p (tmpseq))
 | |
| 	    gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
 | |
| 	}
 | |
|       else
 | |
| 	ws = build_zero_cst (uint32_type_node);
 | |
| 
 | |
|       tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
 | |
|       OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
 | |
|       OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
 | |
|       OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
 | |
|       OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
 | |
|       gimple_omp_target_set_clauses (target, c);
 | |
|     }
 | |
|   pop_gimplify_context (tgt_bind);
 | |
|   delete declmap;
 | |
|   return;
 | |
| }
 | |
| 
 | |
| /* Walker function doing all the work for create_target_kernels.  */
 | |
| 
 | |
| static tree
 | |
| grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
 | |
| 				   bool *handled_ops_p,
 | |
| 				   struct walk_stmt_info *incoming)
 | |
| {
 | |
|   *handled_ops_p = false;
 | |
| 
 | |
|   gimple *stmt = gsi_stmt (*gsi);
 | |
|   gomp_target *target = dyn_cast <gomp_target *> (stmt);
 | |
|   if (target)
 | |
|     {
 | |
|       gbind *tgt_bind = (gbind *) incoming->info;
 | |
|       gcc_checking_assert (tgt_bind);
 | |
|       grid_attempt_target_gridification (target, gsi, tgt_bind);
 | |
|       return NULL_TREE;
 | |
|     }
 | |
|   gbind *bind = dyn_cast <gbind *> (stmt);
 | |
|   if (bind)
 | |
|     {
 | |
|       *handled_ops_p = true;
 | |
|       struct walk_stmt_info wi;
 | |
|       memset (&wi, 0, sizeof (wi));
 | |
|       wi.info = bind;
 | |
|       walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
 | |
| 			   grid_gridify_all_targets_stmt, NULL, &wi);
 | |
|     }
 | |
|   return NULL_TREE;
 | |
| }
 | |
| 
 | |
| /* Attempt to gridify all target constructs in BODY_P.  All such targets will
 | |
|    have their bodies duplicated, with the new copy being put into a
 | |
|    gimple_omp_grid_body statement.  All kernel-related construct within the
 | |
|    grid_body will be marked with phony flags or kernel kinds.  Moreover, some
 | |
|    re-structuring is often needed, such as copying pre-bodies before the target
 | |
|    construct so that kernel grid sizes can be computed.  */
 | |
| 
 | |
| void
 | |
| omp_grid_gridify_all_targets (gimple_seq *body_p)
 | |
| {
 | |
|   struct walk_stmt_info wi;
 | |
|   memset (&wi, 0, sizeof (wi));
 | |
|   walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
 | |
| }
 |