mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			re PR fortran/29550 (Optimize -fexternal-blas calls for conjg())
2018-09-18 Thomas Koenig <tkoenig@gcc.gnu.org> PR fortran/29550 * gfortran.h (gfc_expr): Add external_blas flag. * frontend-passes.c (matrix_case): Add case A2TB2T. (optimize_namespace): Handle flag_external_blas by calling call_external_blas. (get_array_inq_function): Add argument okind. If it is nonzero, use it as the kind of argument to be used. (inline_limit_check): Remove m_case argument, add limit argument instead. Remove assert about m_case. Set the limit for inlining from the limit argument. (matmul_lhs_realloc): Handle case A2TB2T. (inline_matmul_assign): Handle inline limit for other cases with two rank-two matrices. Remove no-op calls to inline_limit_check. (call_external_blas): New function. * trans-intrinsic.c (gfc_conv_intrinsic_funcall): Do not add argument to external BLAS if external_blas is already set. 2018-09-18 Thomas Koenig <tkoenig@gcc.gnu.org> PR fortran/29550 * gfortran.dg/inline_matmul_13.f90: Adjust count for _gfortran_matmul. * gfortran.dg/inline_matmul_16.f90: Likewise. * gfortran.dg/promotion_2.f90: Add -fblas-matmul-limit=1. Scan for dgemm instead of dgemm_. Add call to random_number to make standard conforming. * gfortran.dg/matmul_blas_1.f90: New test. * gfortran.dg/matmul_bounds_14.f: New test. * gfortran.dg/matmul_bounds_15.f: New test. * gfortran.dg/matmul_bounds_16.f: New test. * gfortran.dg/blas_gemm_routines.f: New test / additional file for preceding tests. From-SVN: r264412
This commit is contained in:
		
							parent
							
								
									5c470e0f07
								
							
						
					
					
						commit
						998511a610
					
				|  | @ -53,6 +53,7 @@ static gfc_code * create_do_loop (gfc_expr *, gfc_expr *, gfc_expr *, | |||
| 				  char *vname=NULL); | ||||
| static gfc_expr* check_conjg_transpose_variable (gfc_expr *, bool *, | ||||
| 						 bool *); | ||||
| static int call_external_blas (gfc_code **, int *, void *); | ||||
| static bool has_dimen_vector_ref (gfc_expr *); | ||||
| static int matmul_temp_args (gfc_code **, int *,void *data); | ||||
| static int index_interchange (gfc_code **, int*, void *); | ||||
|  | @ -131,7 +132,7 @@ static int var_num = 1; | |||
| 
 | ||||
| /* What sort of matrix we are dealing with when inlining MATMUL.  */ | ||||
| 
 | ||||
| enum matrix_case { none=0, A2B2, A2B1, A1B2, A2B2T, A2TB2 }; | ||||
| enum matrix_case { none=0, A2B2, A2B1, A1B2, A2B2T, A2TB2, A2TB2T }; | ||||
| 
 | ||||
| /* Keep track of the number of expressions we have inserted so far
 | ||||
|    using create_var.  */ | ||||
|  | @ -1428,7 +1429,7 @@ optimize_namespace (gfc_namespace *ns) | |||
|       gfc_code_walker (&ns->code, convert_elseif, dummy_expr_callback, NULL); | ||||
|       gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL); | ||||
|       gfc_code_walker (&ns->code, optimize_code, optimize_expr, NULL); | ||||
|       if (flag_inline_matmul_limit != 0) | ||||
|       if (flag_inline_matmul_limit != 0 || flag_external_blas) | ||||
| 	{ | ||||
| 	  bool found; | ||||
| 	  do | ||||
|  | @ -1441,9 +1442,15 @@ optimize_namespace (gfc_namespace *ns) | |||
| 
 | ||||
| 	  gfc_code_walker (&ns->code, matmul_temp_args, dummy_expr_callback, | ||||
| 			   NULL); | ||||
| 	  gfc_code_walker (&ns->code, inline_matmul_assign, dummy_expr_callback, | ||||
| 			   NULL); | ||||
| 	} | ||||
| 
 | ||||
|       if (flag_external_blas) | ||||
| 	gfc_code_walker (&ns->code, call_external_blas, dummy_expr_callback, | ||||
| 			 NULL); | ||||
| 
 | ||||
|       if (flag_inline_matmul_limit != 0) | ||||
| 	gfc_code_walker (&ns->code, inline_matmul_assign, dummy_expr_callback, | ||||
| 			 NULL); | ||||
|     } | ||||
| 
 | ||||
|   if (flag_frontend_loop_interchange) | ||||
|  | @ -2938,7 +2945,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, | |||
|    dim is zero-based.  */ | ||||
| 
 | ||||
| static gfc_expr * | ||||
| get_array_inq_function (gfc_isym_id id, gfc_expr *e, int dim) | ||||
| get_array_inq_function (gfc_isym_id id, gfc_expr *e, int dim, int okind = 0) | ||||
| { | ||||
|   gfc_expr *fcn; | ||||
|   gfc_expr *dim_arg, *kind; | ||||
|  | @ -2964,8 +2971,12 @@ get_array_inq_function (gfc_isym_id id, gfc_expr *e, int dim) | |||
|     } | ||||
| 
 | ||||
|   dim_arg =  gfc_get_int_expr (gfc_default_integer_kind, &e->where, dim); | ||||
|   kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where, | ||||
| 			   gfc_index_integer_kind); | ||||
|   if (okind != 0) | ||||
|     kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where, | ||||
| 			     okind); | ||||
|   else | ||||
|     kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where, | ||||
| 			     gfc_index_integer_kind); | ||||
| 
 | ||||
|   ec = gfc_copy_expr (e); | ||||
| 
 | ||||
|  | @ -3026,7 +3037,7 @@ get_operand (gfc_intrinsic_op op, gfc_expr *e1, gfc_expr *e2) | |||
|    removed by DCE. Only called for rank-two matrices A and B.  */ | ||||
| 
 | ||||
| static gfc_code * | ||||
| inline_limit_check (gfc_expr *a, gfc_expr *b, enum matrix_case m_case) | ||||
| inline_limit_check (gfc_expr *a, gfc_expr *b, int limit) | ||||
| { | ||||
|   gfc_expr *inline_limit; | ||||
|   gfc_code *if_1, *if_2, *else_2; | ||||
|  | @ -3034,14 +3045,11 @@ inline_limit_check (gfc_expr *a, gfc_expr *b, enum matrix_case m_case) | |||
|   gfc_typespec ts; | ||||
|   gfc_expr *cond; | ||||
| 
 | ||||
|   gcc_assert (m_case == A2B2 || m_case == A2B2T || m_case == A2TB2); | ||||
| 
 | ||||
|   /* Calculation is done in real to avoid integer overflow.  */ | ||||
| 
 | ||||
|   inline_limit = gfc_get_constant_expr (BT_REAL, gfc_default_real_kind, | ||||
| 					&a->where); | ||||
|   mpfr_set_si (inline_limit->value.real, flag_inline_matmul_limit, | ||||
| 	       GFC_RND_MODE); | ||||
|   mpfr_set_si (inline_limit->value.real, limit, GFC_RND_MODE); | ||||
|   mpfr_pow_ui (inline_limit->value.real, inline_limit->value.real, 3, | ||||
| 	       GFC_RND_MODE); | ||||
| 
 | ||||
|  | @ -3235,6 +3243,22 @@ matmul_lhs_realloc (gfc_expr *c, gfc_expr *a, gfc_expr *b, | |||
| 				 get_array_inq_function (GFC_ISYM_SIZE, b, 2)); | ||||
|       break; | ||||
| 
 | ||||
|     case A2TB2T: | ||||
|       /* This can only happen for BLAS, we do not handle that case in
 | ||||
| 	 inline mamtul.  */ | ||||
|       ar->start[0] = get_array_inq_function (GFC_ISYM_SIZE, a, 2); | ||||
|       ar->start[1] = get_array_inq_function (GFC_ISYM_SIZE, b, 1); | ||||
| 
 | ||||
|       ne1 = build_logical_expr (INTRINSIC_NE, | ||||
| 				get_array_inq_function (GFC_ISYM_SIZE, c, 1), | ||||
| 				get_array_inq_function (GFC_ISYM_SIZE, a, 2)); | ||||
|       ne2 = build_logical_expr (INTRINSIC_NE, | ||||
| 				get_array_inq_function (GFC_ISYM_SIZE, c, 2), | ||||
| 				get_array_inq_function (GFC_ISYM_SIZE, b, 1)); | ||||
| 
 | ||||
|       cond = build_logical_expr (INTRINSIC_OR, ne1, ne2); | ||||
|       break; | ||||
| 
 | ||||
|     default: | ||||
|       gcc_unreachable(); | ||||
| 
 | ||||
|  | @ -3946,9 +3970,11 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, | |||
|   /* Take care of the inline flag.  If the limit check evaluates to a
 | ||||
|      constant, dead code elimination will eliminate the unneeded branch.  */ | ||||
| 
 | ||||
|   if (m_case == A2B2 && flag_inline_matmul_limit > 0) | ||||
|   if (flag_inline_matmul_limit > 0 && matrix_a->rank == 2 | ||||
|       && matrix_b->rank == 2) | ||||
|     { | ||||
|       if_limit = inline_limit_check (matrix_a, matrix_b, m_case); | ||||
|       if_limit = inline_limit_check (matrix_a, matrix_b, | ||||
| 				     flag_inline_matmul_limit); | ||||
| 
 | ||||
|       /* Insert the original statement into the else branch.  */ | ||||
|       if_limit->block->block->next = co; | ||||
|  | @ -4118,7 +4144,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, | |||
|   switch (m_case) | ||||
|     { | ||||
|     case A2B2: | ||||
|       inline_limit_check (matrix_a, matrix_b, m_case); | ||||
| 
 | ||||
|       u1 = get_size_m1 (matrix_b, 2); | ||||
|       u2 = get_size_m1 (matrix_a, 2); | ||||
|  | @ -4151,7 +4176,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, | |||
|       break; | ||||
| 
 | ||||
|     case A2B2T: | ||||
|       inline_limit_check (matrix_a, matrix_b, m_case); | ||||
| 
 | ||||
|       u1 = get_size_m1 (matrix_b, 1); | ||||
|       u2 = get_size_m1 (matrix_a, 2); | ||||
|  | @ -4184,7 +4208,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, | |||
|       break; | ||||
| 
 | ||||
|     case A2TB2: | ||||
|       inline_limit_check (matrix_a, matrix_b, m_case); | ||||
| 
 | ||||
|       u1 = get_size_m1 (matrix_a, 2); | ||||
|       u2 = get_size_m1 (matrix_b, 2); | ||||
|  | @ -4311,6 +4334,405 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, | |||
|   return 0; | ||||
| } | ||||
| 
 | ||||
| /* Change matmul function calls in the form of
 | ||||
| 
 | ||||
|    c = matmul(a,b) | ||||
| 
 | ||||
|    to the corresponding call to a BLAS routine, if applicable.  */ | ||||
| 
 | ||||
| static int | ||||
| call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, | ||||
| 		    void *data ATTRIBUTE_UNUSED) | ||||
| { | ||||
|   gfc_code *co, *co_next; | ||||
|   gfc_expr *expr1, *expr2; | ||||
|   gfc_expr *matrix_a, *matrix_b; | ||||
|   gfc_code *if_limit = NULL; | ||||
|   gfc_actual_arglist *a, *b; | ||||
|   bool conjg_a, conjg_b, transpose_a, transpose_b; | ||||
|   gfc_code *call; | ||||
|   const char *blas_name; | ||||
|   const char *transa, *transb; | ||||
|   gfc_expr *c1, *c2, *b1; | ||||
|   gfc_actual_arglist *actual, *next; | ||||
|   bt type; | ||||
|   int kind; | ||||
|   enum matrix_case m_case; | ||||
|   bool realloc_c; | ||||
|   gfc_code **next_code_point; | ||||
| 
 | ||||
|   /* Many of the tests for inline matmul also apply here.  */ | ||||
| 
 | ||||
|   co = *c; | ||||
| 
 | ||||
|   if (co->op != EXEC_ASSIGN) | ||||
|     return 0; | ||||
| 
 | ||||
|   if (in_where || in_assoc_list) | ||||
|     return 0; | ||||
| 
 | ||||
|   /* The BLOCKS generated for the temporary variables and FORALL don't
 | ||||
|      mix.  */ | ||||
|   if (forall_level > 0) | ||||
|     return 0; | ||||
| 
 | ||||
|   /* For now don't do anything in OpenMP workshare, it confuses
 | ||||
|      its translation, which expects only the allowed statements in there. */ | ||||
| 
 | ||||
|   if (in_omp_workshare) | ||||
|     return 0; | ||||
| 
 | ||||
|   expr1 = co->expr1; | ||||
|   expr2 = co->expr2; | ||||
|   if (expr2->expr_type != EXPR_FUNCTION | ||||
|       || expr2->value.function.isym == NULL | ||||
|       || expr2->value.function.isym->id != GFC_ISYM_MATMUL) | ||||
|     return 0; | ||||
| 
 | ||||
|   type = expr2->ts.type; | ||||
|   kind = expr2->ts.kind; | ||||
| 
 | ||||
|   /* Guard against recursion. */ | ||||
| 
 | ||||
|   if (expr2->external_blas) | ||||
|     return 0; | ||||
| 
 | ||||
|   if (type != expr1->ts.type || kind != expr1->ts.kind) | ||||
|     return 0; | ||||
| 
 | ||||
|   if (type == BT_REAL) | ||||
|     { | ||||
|       if (kind == 4) | ||||
| 	blas_name = "sgemm"; | ||||
|       else if (kind == 8) | ||||
| 	blas_name = "dgemm"; | ||||
|       else | ||||
| 	return 0; | ||||
|     } | ||||
|   else if (type == BT_COMPLEX) | ||||
|     { | ||||
|       if (kind == 4) | ||||
| 	blas_name = "cgemm"; | ||||
|       else if (kind == 8) | ||||
| 	blas_name = "zgemm"; | ||||
|       else | ||||
| 	return 0; | ||||
|     } | ||||
|   else | ||||
|     return 0; | ||||
| 
 | ||||
|   a = expr2->value.function.actual; | ||||
|   if (a->expr->rank != 2) | ||||
|     return 0; | ||||
| 
 | ||||
|   b = a->next; | ||||
|   if (b->expr->rank != 2) | ||||
|     return 0; | ||||
| 
 | ||||
|   matrix_a = check_conjg_transpose_variable (a->expr, &conjg_a, &transpose_a); | ||||
|   if (matrix_a == NULL) | ||||
|     return 0; | ||||
| 
 | ||||
|   if (transpose_a) | ||||
|     { | ||||
|       if (conjg_a) | ||||
| 	transa = "C"; | ||||
|       else | ||||
| 	transa = "T"; | ||||
|     } | ||||
|   else | ||||
|     transa = "N"; | ||||
| 
 | ||||
|   matrix_b = check_conjg_transpose_variable (b->expr, &conjg_b, &transpose_b); | ||||
|   if (matrix_b == NULL) | ||||
|     return 0; | ||||
| 
 | ||||
|   if (transpose_b) | ||||
|     { | ||||
|       if (conjg_b) | ||||
| 	transb = "C"; | ||||
|       else | ||||
| 	transb = "T"; | ||||
|     } | ||||
|   else | ||||
|     transb = "N"; | ||||
| 
 | ||||
|   if (transpose_a) | ||||
|     { | ||||
|       if (transpose_b) | ||||
| 	m_case = A2TB2T; | ||||
|       else | ||||
| 	m_case = A2TB2; | ||||
|     } | ||||
|   else | ||||
|     { | ||||
|       if (transpose_b) | ||||
| 	m_case = A2B2T; | ||||
|       else | ||||
| 	m_case = A2B2; | ||||
|     } | ||||
| 
 | ||||
|   current_code = c; | ||||
|   inserted_block = NULL; | ||||
|   changed_statement = NULL; | ||||
| 
 | ||||
|   expr2->external_blas = 1; | ||||
| 
 | ||||
|   /* We do not handle data dependencies yet.  */ | ||||
|   if (gfc_check_dependency (expr1, matrix_a, true) | ||||
|       || gfc_check_dependency (expr1, matrix_b, true)) | ||||
|     return 0; | ||||
| 
 | ||||
|   /* Generate the if statement and hang it into the tree.  */ | ||||
|   if_limit = inline_limit_check (matrix_a, matrix_b, flag_blas_matmul_limit); | ||||
|   co_next = co->next; | ||||
|   (*current_code) = if_limit; | ||||
|   co->next = NULL; | ||||
|   if_limit->block->next = co; | ||||
| 
 | ||||
|   call = XCNEW (gfc_code); | ||||
|   call->loc = co->loc; | ||||
| 
 | ||||
|   /* Bounds checking - a bit simpler than for inlining since we only
 | ||||
|      have to take care of two-dimensional arrays here.  */ | ||||
| 
 | ||||
|   realloc_c = flag_realloc_lhs && gfc_is_reallocatable_lhs (expr1); | ||||
|   next_code_point = &(if_limit->block->block->next); | ||||
| 
 | ||||
|   if (gfc_option.rtcheck & GFC_RTCHECK_BOUNDS) | ||||
|     { | ||||
|       gfc_code *test; | ||||
|       //      gfc_expr *a2, *b1, *c1, *c2, *a1, *b2;
 | ||||
|       gfc_expr *c1, *a1, *c2, *b2, *a2; | ||||
|       switch (m_case) | ||||
| 	{ | ||||
| 	case A2B2: | ||||
| 	  b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1); | ||||
| 	  a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2); | ||||
| 	  test = runtime_error_ne (b1, a2, B_ERROR(1)); | ||||
| 	  *next_code_point = test; | ||||
| 	  next_code_point = &test->next; | ||||
| 
 | ||||
| 	  if (!realloc_c) | ||||
| 	    { | ||||
| 	      c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1); | ||||
| 	      a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1); | ||||
| 	      test = runtime_error_ne (c1, a1, C_ERROR(1)); | ||||
| 	      *next_code_point = test; | ||||
| 	      next_code_point = &test->next; | ||||
| 
 | ||||
| 	      c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2); | ||||
| 	      b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2); | ||||
| 	      test = runtime_error_ne (c2, b2, C_ERROR(2)); | ||||
| 	      *next_code_point = test; | ||||
| 	      next_code_point = &test->next; | ||||
| 	    } | ||||
| 	  break; | ||||
| 
 | ||||
| 	case A2B2T: | ||||
| 
 | ||||
| 	  b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2); | ||||
| 	  a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2); | ||||
| 	  /* matrix_b is transposed, hence dimension 1 for the error message.  */ | ||||
| 	  test = runtime_error_ne (b2, a2, B_ERROR(1)); | ||||
| 	  *next_code_point = test; | ||||
| 	  next_code_point = &test->next; | ||||
| 
 | ||||
| 	  if (!realloc_c) | ||||
| 	    { | ||||
| 	      c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1); | ||||
| 	      a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1); | ||||
| 	      test = runtime_error_ne (c1, a1, C_ERROR(1)); | ||||
| 	      *next_code_point = test; | ||||
| 	      next_code_point = &test->next; | ||||
| 
 | ||||
| 	      c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2); | ||||
| 	      b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1); | ||||
| 	      test = runtime_error_ne (c2, b1, C_ERROR(2)); | ||||
| 	      *next_code_point = test; | ||||
| 	      next_code_point = &test->next; | ||||
| 	    } | ||||
| 	  break; | ||||
| 
 | ||||
| 	case A2TB2: | ||||
| 
 | ||||
| 	  b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1); | ||||
| 	  a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1); | ||||
| 	  test = runtime_error_ne (b1, a1, B_ERROR(1)); | ||||
| 	  *next_code_point = test; | ||||
| 	  next_code_point = &test->next; | ||||
| 
 | ||||
| 	  if (!realloc_c) | ||||
| 	    { | ||||
| 	      c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1); | ||||
| 	      a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2); | ||||
| 	      test = runtime_error_ne (c1, a2, C_ERROR(1)); | ||||
| 	      *next_code_point = test; | ||||
| 	      next_code_point = &test->next; | ||||
| 
 | ||||
| 	      c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2); | ||||
| 	      b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2); | ||||
| 	      test = runtime_error_ne (c2, b2, C_ERROR(2)); | ||||
| 	      *next_code_point = test; | ||||
| 	      next_code_point = &test->next; | ||||
| 	    } | ||||
| 	  break; | ||||
| 
 | ||||
| 	case A2TB2T: | ||||
| 	  b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2); | ||||
| 	  a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1); | ||||
| 	  test = runtime_error_ne (b2, a1, B_ERROR(1)); | ||||
| 	  *next_code_point = test; | ||||
| 	  next_code_point = &test->next; | ||||
| 
 | ||||
| 	  if (!realloc_c) | ||||
| 	    { | ||||
| 	      c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1); | ||||
| 	      a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2); | ||||
| 	      test = runtime_error_ne (c1, a2, C_ERROR(1)); | ||||
| 	      *next_code_point = test; | ||||
| 	      next_code_point = &test->next; | ||||
| 
 | ||||
| 	      c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2); | ||||
| 	      b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1); | ||||
| 	      test = runtime_error_ne (c2, b1, C_ERROR(2)); | ||||
| 	      *next_code_point = test; | ||||
| 	      next_code_point = &test->next; | ||||
| 	    } | ||||
| 	  break; | ||||
| 
 | ||||
| 	default: | ||||
| 	  gcc_unreachable (); | ||||
| 	} | ||||
|     }     | ||||
| 
 | ||||
|   /* Handle the reallocation, if needed.  */ | ||||
| 
 | ||||
|   if (realloc_c) | ||||
|     { | ||||
|       gfc_code *lhs_alloc; | ||||
| 
 | ||||
|       lhs_alloc = matmul_lhs_realloc (expr1, matrix_a, matrix_b, m_case); | ||||
|       *next_code_point = lhs_alloc; | ||||
|       next_code_point = &lhs_alloc->next; | ||||
|     } | ||||
| 
 | ||||
|   *next_code_point = call; | ||||
|   if_limit->next = co_next; | ||||
| 
 | ||||
|   /* Set up the BLAS call.  */ | ||||
| 
 | ||||
|   call->op = EXEC_CALL; | ||||
| 
 | ||||
|   gfc_get_sym_tree (blas_name, current_ns, &(call->symtree), true); | ||||
|   call->symtree->n.sym->attr.subroutine = 1; | ||||
|   call->symtree->n.sym->attr.procedure = 1; | ||||
|   call->symtree->n.sym->attr.flavor = FL_PROCEDURE; | ||||
|   call->resolved_sym = call->symtree->n.sym; | ||||
| 
 | ||||
|   /* Argument TRANSA.  */ | ||||
|   next = gfc_get_actual_arglist (); | ||||
|   next->expr = gfc_get_character_expr (gfc_default_character_kind, &co->loc, | ||||
| 				       transa, 1); | ||||
| 
 | ||||
|   call->ext.actual = next; | ||||
| 
 | ||||
|   /* Argument TRANSB.  */ | ||||
|   actual = next; | ||||
|   next = gfc_get_actual_arglist (); | ||||
|   next->expr = gfc_get_character_expr (gfc_default_character_kind, &co->loc, | ||||
| 				       transb, 1); | ||||
|   actual->next = next; | ||||
| 
 | ||||
|   c1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (a->expr), 1, | ||||
| 			       gfc_integer_4_kind); | ||||
|   c2 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 2, | ||||
| 			       gfc_integer_4_kind); | ||||
| 
 | ||||
|   b1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 1, | ||||
| 			       gfc_integer_4_kind); | ||||
| 
 | ||||
|   /* Argument M. */ | ||||
|   actual = next; | ||||
|   next = gfc_get_actual_arglist (); | ||||
|   next->expr = c1; | ||||
|   actual->next = next; | ||||
| 
 | ||||
|   /* Argument N. */ | ||||
|   actual = next; | ||||
|   next = gfc_get_actual_arglist (); | ||||
|   next->expr = c2; | ||||
|   actual->next = next; | ||||
| 
 | ||||
|   /* Argument K.  */ | ||||
|   actual = next; | ||||
|   next = gfc_get_actual_arglist (); | ||||
|   next->expr = b1; | ||||
|   actual->next = next; | ||||
| 
 | ||||
|   /* Argument ALPHA - set to one.  */ | ||||
|   actual = next; | ||||
|   next = gfc_get_actual_arglist (); | ||||
|   next->expr = gfc_get_constant_expr (type, kind, &co->loc); | ||||
|   if (type == BT_REAL) | ||||
|     mpfr_set_ui (next->expr->value.real, 1, GFC_RND_MODE); | ||||
|   else | ||||
|     mpc_set_ui (next->expr->value.complex, 1, GFC_MPC_RND_MODE); | ||||
|   actual->next = next; | ||||
| 
 | ||||
|   /* Argument A.  */ | ||||
|   actual = next; | ||||
|   next = gfc_get_actual_arglist (); | ||||
|   next->expr = gfc_copy_expr (matrix_a); | ||||
|   actual->next = next; | ||||
| 
 | ||||
|   /* Argument LDA.  */ | ||||
|   actual = next; | ||||
|   next = gfc_get_actual_arglist (); | ||||
|   next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_a), | ||||
| 				       1, gfc_integer_4_kind); | ||||
|   actual->next = next; | ||||
| 
 | ||||
|   /* Argument B.  */ | ||||
|   actual = next; | ||||
|   next = gfc_get_actual_arglist (); | ||||
|   next->expr = gfc_copy_expr (matrix_b); | ||||
|   actual->next = next; | ||||
| 
 | ||||
|   /* Argument LDB.  */ | ||||
|   actual = next; | ||||
|   next = gfc_get_actual_arglist (); | ||||
|   next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_b), | ||||
| 				       1, gfc_integer_4_kind); | ||||
|   actual->next = next; | ||||
| 
 | ||||
|   /* Argument BETA - set to zero.  */ | ||||
|   actual = next; | ||||
|   next = gfc_get_actual_arglist (); | ||||
|   next->expr = gfc_get_constant_expr (type, kind, &co->loc); | ||||
|   if (type == BT_REAL) | ||||
|     mpfr_set_ui (next->expr->value.real, 0, GFC_RND_MODE); | ||||
|   else | ||||
|     mpc_set_ui (next->expr->value.complex, 0, GFC_MPC_RND_MODE); | ||||
|   actual->next = next; | ||||
| 
 | ||||
|   /* Argument C.  */ | ||||
| 
 | ||||
|   actual = next; | ||||
|   next = gfc_get_actual_arglist (); | ||||
|   next->expr = gfc_copy_expr (expr1); | ||||
|   actual->next = next; | ||||
| 
 | ||||
|   /* Argument LDC.  */ | ||||
|   actual = next; | ||||
|   next = gfc_get_actual_arglist (); | ||||
|   next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (expr1), | ||||
| 				       1, gfc_integer_4_kind); | ||||
|   actual->next = next; | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /* Code for index interchange for loops which are grouped together in DO
 | ||||
|    CONCURRENT or FORALL statements.  This is currently only applied if the | ||||
|  |  | |||
|  | @ -2143,6 +2143,11 @@ typedef struct gfc_expr | |||
| 
 | ||||
|   unsigned int no_bounds_check : 1; | ||||
| 
 | ||||
|   /* Set this if a matmul expression has already been evaluated for conversion
 | ||||
|      to a BLAS call.  */ | ||||
| 
 | ||||
|   unsigned int external_blas : 1; | ||||
| 
 | ||||
|   /* If an expression comes from a Hollerith constant or compile-time
 | ||||
|      evaluation of a transfer statement, it may have a prescribed target- | ||||
|      memory representation, and these cannot always be backformed from | ||||
|  |  | |||
|  | @ -4055,6 +4055,7 @@ gfc_conv_intrinsic_funcall (gfc_se * se, gfc_expr * expr) | |||
|      to be able to call the BLAS ?gemm functions if required and possible.  */ | ||||
|   append_args = NULL; | ||||
|   if (expr->value.function.isym->id == GFC_ISYM_MATMUL | ||||
|       && !expr->external_blas | ||||
|       && sym->ts.type != BT_LOGICAL) | ||||
|     { | ||||
|       tree cint = gfc_get_int_type (gfc_c_int_kind); | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -44,4 +44,4 @@ program main | |||
|   deallocate(calloc) | ||||
| 
 | ||||
| end program main | ||||
| ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "original" } } | ||||
| ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 1 "original" } } | ||||
|  |  | |||
|  | @ -58,4 +58,4 @@ program main | |||
|      end do | ||||
|   end do | ||||
| end program main | ||||
| ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } } | ||||
| ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 1 "optimized" } } | ||||
|  |  | |||
|  | @ -0,0 +1,240 @@ | |||
| C { dg-do  run } | ||||
| C { dg-options "-fcheck=bounds -fdump-tree-optimized -fblas-matmul-limit=1 -O -fexternal-blas" } | ||||
| C { dg-additional-sources blas_gemm_routines.f } | ||||
| C Test calling of BLAS routines | ||||
| 
 | ||||
|       program main | ||||
|       call sub_s | ||||
|       call sub_d | ||||
|       call sub_c | ||||
|       call sub_z | ||||
|       end | ||||
| 
 | ||||
|       subroutine sub_d | ||||
|       implicit none | ||||
|       real(8), dimension(3,2) :: a | ||||
|       real(8), dimension(2,3) :: at | ||||
|       real(8), dimension(2,4) :: b | ||||
|       real(8), dimension(4,2) :: bt | ||||
|       real(8), dimension(3,4) :: c | ||||
|       real(8), dimension(3,4) :: cres | ||||
|       real(8), dimension(:,:), allocatable :: c_alloc | ||||
|       data a / 2.,  -3.,  5.,  -7., 11., -13./ | ||||
|       data b /17., -23., 29., -31., 37., -39., 41., -47./ | ||||
|       data cres /195., -304.,  384.,  275., -428.,  548.,  347., -540., | ||||
|      &           692.,  411., -640.,  816./ | ||||
| 
 | ||||
|       c = matmul(a,b) | ||||
|       if (any (c /= cres)) stop 31 | ||||
| 
 | ||||
|       at = transpose(a) | ||||
|       c = (1.2,-2.2) | ||||
|       c = matmul(transpose(at), b) | ||||
|       if (any (c /= cres)) stop 32 | ||||
| 
 | ||||
|       bt = transpose(b) | ||||
|       c = (1.2,-2.1) | ||||
|       c = matmul(a, transpose(bt)) | ||||
|       if (any (c /= cres)) stop 33 | ||||
| 
 | ||||
|       c_alloc = matmul(a,b) | ||||
|       if (any (c /= cres)) stop 34 | ||||
| 
 | ||||
|       at = transpose(a) | ||||
|       deallocate (c_alloc) | ||||
|       c = matmul(transpose(at), b) | ||||
|       if (any (c /= cres)) stop 35 | ||||
| 
 | ||||
|       bt = transpose(b) | ||||
|       allocate (c_alloc(20,20)) | ||||
|       c = (1.2,-2.1) | ||||
|       c = matmul(a, transpose(bt)) | ||||
|       if (any (c /= cres)) stop 36 | ||||
| 
 | ||||
|       end | ||||
| 
 | ||||
|       subroutine sub_s | ||||
|       implicit none | ||||
|       real, dimension(3,2) :: a | ||||
|       real, dimension(2,3) :: at | ||||
|       real, dimension(2,4) :: b | ||||
|       real, dimension(4,2) :: bt | ||||
|       real, dimension(3,4) :: c | ||||
|       real, dimension(3,4) :: cres | ||||
|       real, dimension(:,:), allocatable :: c_alloc | ||||
|       data a / 2.,  -3.,  5.,  -7., 11., -13./ | ||||
|       data b /17., -23., 29., -31., 37., -39., 41., -47./ | ||||
|       data cres /195., -304.,  384.,  275., -428.,  548.,  347., -540., | ||||
|      &           692.,  411., -640.,  816./ | ||||
| 
 | ||||
|       c = matmul(a,b) | ||||
|       if (any (c /= cres)) stop 21 | ||||
| 
 | ||||
|       at = transpose(a) | ||||
|       c = (1.2,-2.2) | ||||
|       c = matmul(transpose(at), b) | ||||
|       if (any (c /= cres)) stop 22 | ||||
| 
 | ||||
|       bt = transpose(b) | ||||
|       c = (1.2,-2.1) | ||||
|       c = matmul(a, transpose(bt)) | ||||
|       if (any (c /= cres)) stop 23 | ||||
| 
 | ||||
|       c_alloc = matmul(a,b) | ||||
|       if (any (c /= cres)) stop 24 | ||||
| 
 | ||||
|       at = transpose(a) | ||||
|       deallocate (c_alloc) | ||||
|       c = matmul(transpose(at), b) | ||||
|       if (any (c /= cres)) stop 25 | ||||
| 
 | ||||
|       bt = transpose(b) | ||||
|       allocate (c_alloc(20,20)) | ||||
|       c = (1.2,-2.1) | ||||
|       c = matmul(a, transpose(bt)) | ||||
|       if (any (c /= cres)) stop 26 | ||||
| 
 | ||||
|       end | ||||
| 
 | ||||
|       subroutine sub_c | ||||
|       implicit none | ||||
|       complex, dimension(3,2) :: a | ||||
|       complex, dimension(2,3) :: at, ah | ||||
|       complex, dimension(2,4) :: b | ||||
|       complex, dimension(4,2) :: bt, bh | ||||
|       complex, dimension(3,4) :: c | ||||
|       complex, dimension(3,4) :: cres | ||||
|       complex, dimension(:,:), allocatable :: c_alloc | ||||
|        | ||||
|       data a / (2.,-3.), (-5.,7.), (11.,-13.), (17.,19), (-23., -29), | ||||
|      &     (-31., 37.)/ | ||||
| 
 | ||||
|       data b / (-41., 43.), (-47., 53.), (-59.,-61.), (-67., 71), | ||||
|      &     ( 73.,79. ), (83.,-89.), (97.,-101.), (-107.,-109.)/ | ||||
|       data cres /(-1759.,217.), (2522.,-358.), (-396.,-2376.), | ||||
|      &     (-2789.,-11.), | ||||
|      &     (4322.,202.), (-1992.,-4584.), (3485.,3.), (-5408.,-244.), | ||||
|      &     (2550.,5750.), (143.,-4379.), (-478.,6794.), (7104.,-2952.) / | ||||
| 
 | ||||
|       c = matmul(a,b) | ||||
|       if (any (c /= cres)) stop 1 | ||||
| 
 | ||||
|       at = transpose(a) | ||||
|       c = (1.2,-2.2) | ||||
|       c = matmul(transpose(at), b) | ||||
|       if (any (c /= cres)) stop 2 | ||||
| 
 | ||||
|       bt = transpose(b) | ||||
|       c = (1.2,-2.1) | ||||
|       c = matmul(a, transpose(bt)) | ||||
|       if (any (c /= cres)) stop 3 | ||||
| 
 | ||||
|       ah = transpose(conjg(a)) | ||||
|       c = (1.2,-2.2) | ||||
|       c = matmul(conjg(transpose(ah)), b) | ||||
|       if (any (c /= cres)) stop 4 | ||||
| 
 | ||||
|       bh = transpose(conjg(b)) | ||||
|       c = (1.2,-2.2) | ||||
|       c = matmul(a, transpose(conjg(bh))) | ||||
|       if (any (c /= cres)) stop 5 | ||||
| 
 | ||||
|       c_alloc = matmul(a,b) | ||||
|       if (any (c /= cres)) stop 6 | ||||
| 
 | ||||
|       at = transpose(a) | ||||
|       deallocate (c_alloc) | ||||
|       c = matmul(transpose(at), b) | ||||
|       if (any (c /= cres)) stop 7 | ||||
| 
 | ||||
|       bt = transpose(b) | ||||
|       allocate (c_alloc(20,20)) | ||||
|       c = (1.2,-2.1) | ||||
|       c = matmul(a, transpose(bt)) | ||||
|       if (any (c /= cres)) stop 8 | ||||
| 
 | ||||
|       ah = transpose(conjg(a)) | ||||
|       c = (1.2,-2.2) | ||||
|       c = matmul(conjg(transpose(ah)), b) | ||||
|       if (any (c /= cres)) stop 9 | ||||
| 
 | ||||
|       deallocate (c_alloc) | ||||
|       allocate (c_alloc(0,0)) | ||||
|       bh = transpose(conjg(b)) | ||||
|       c = (1.2,-2.2) | ||||
|       c = matmul(a, transpose(conjg(bh))) | ||||
|       if (any (c /= cres)) stop 10 | ||||
| 
 | ||||
|       end | ||||
| 
 | ||||
|       subroutine sub_z | ||||
|       implicit none | ||||
|       complex(8), dimension(3,2) :: a | ||||
|       complex(8), dimension(2,3) :: at, ah | ||||
|       complex(8), dimension(2,4) :: b | ||||
|       complex(8), dimension(4,2) :: bt, bh | ||||
|       complex(8), dimension(3,4) :: c | ||||
|       complex(8), dimension(3,4) :: cres | ||||
|       complex(8), dimension(:,:), allocatable :: c_alloc | ||||
|        | ||||
|       data a / (2.,-3.), (-5._8,7.), (11.,-13.), (17.,19), | ||||
|      &         (-23., -29), (-31., 37.)/ | ||||
| 
 | ||||
|       data b / (-41., 43.), (-47., 53.), (-59.,-61.), (-67., 71), | ||||
|      &     ( 73.,79. ), (83.,-89.), (97.,-101.), (-107.,-109.)/ | ||||
|       data cres /(-1759.,217.), (2522.,-358.), (-396.,-2376.), | ||||
|      &     (-2789.,-11.), | ||||
|      &     (4322.,202.), (-1992.,-4584.), (3485.,3.), (-5408.,-244.), | ||||
|      &     (2550.,5750.), (143.,-4379.), (-478.,6794.), (7104.,-2952.) / | ||||
| 
 | ||||
|       c = matmul(a,b) | ||||
|       if (any (c /= cres)) stop 11 | ||||
| 
 | ||||
|       at = transpose(a) | ||||
|       c = (1.2,-2.2) | ||||
|       c = matmul(transpose(at), b) | ||||
|       if (any (c /= cres)) stop 12 | ||||
| 
 | ||||
|       bt = transpose(b) | ||||
|       c = (1.2,-2.1) | ||||
|       c = matmul(a, transpose(bt)) | ||||
|       if (any (c /= cres)) stop 13 | ||||
| 
 | ||||
|       ah = transpose(conjg(a)) | ||||
|       c = (1.2,-2.2) | ||||
|       c = matmul(conjg(transpose(ah)), b) | ||||
|       if (any (c /= cres)) stop 14 | ||||
| 
 | ||||
|       bh = transpose(conjg(b)) | ||||
|       c = (1.2,-2.2) | ||||
|       c = matmul(a, transpose(conjg(bh))) | ||||
|       if (any (c /= cres)) stop 15 | ||||
| 
 | ||||
|       c_alloc = matmul(a,b) | ||||
|       if (any (c /= cres)) stop 16 | ||||
| 
 | ||||
|       at = transpose(a) | ||||
|       deallocate (c_alloc) | ||||
|       c = matmul(transpose(at), b) | ||||
|       if (any (c /= cres)) stop 17 | ||||
| 
 | ||||
|       bt = transpose(b) | ||||
|       allocate (c_alloc(20,20)) | ||||
|       c = (1.2,-2.1) | ||||
|       c = matmul(a, transpose(bt)) | ||||
|       if (any (c /= cres)) stop 18 | ||||
| 
 | ||||
|       ah = transpose(conjg(a)) | ||||
|       c = (1.2,-2.2) | ||||
|       c = matmul(conjg(transpose(ah)), b) | ||||
|       if (any (c /= cres)) stop 19 | ||||
| 
 | ||||
|       deallocate (c_alloc) | ||||
|       allocate (c_alloc(0,0)) | ||||
|       bh = transpose(conjg(b)) | ||||
|       c = (1.2,-2.2) | ||||
|       c = matmul(a, transpose(conjg(bh))) | ||||
|       if (any (c /= cres)) stop 20 | ||||
| 
 | ||||
|       end | ||||
| ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } } | ||||
|  | @ -0,0 +1,16 @@ | |||
| C { dg-do  run } | ||||
| C { dg-options "-fno-realloc-lhs -fdump-tree-optimized -fcheck=bounds -fblas-matmul-limit=1 -O -fexternal-blas" } | ||||
| C { dg-shouldfail "Fortran runtime error: Array bound mismatch for dimension 2 of array." } | ||||
| C { dg-additional-sources blas_gemm_routines.f } | ||||
| 
 | ||||
|       program main | ||||
|       real, dimension(3,2) :: a | ||||
|       real, dimension(2,3) :: b | ||||
|       real, dimension(:,:), allocatable :: ret | ||||
|       a = 1.0 | ||||
|       b = 2.3 | ||||
|       allocate(ret(3,2)) | ||||
|       ret = matmul(a,b)         ! This should throw an error. | ||||
|       end | ||||
| ! { dg-output "Fortran runtime error: Array bound mismatch for dimension 2 of array.*" } | ||||
| ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } } | ||||
|  | @ -0,0 +1,19 @@ | |||
| C { dg-do  run } | ||||
| C { dg-options "-fdump-tree-optimized -fcheck=bounds -fblas-matmul-limit=1 -O -fexternal-blas" } | ||||
| C { dg-shouldfail "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1.*" } | ||||
| C { dg-additional-sources blas_gemm_routines.f } | ||||
|       program main | ||||
|       character(len=20) :: line | ||||
|       integer :: n, m | ||||
|       real, dimension(3,2) :: a | ||||
|       real, dimension(:,:), allocatable :: b | ||||
|       real, dimension(:,:), allocatable :: ret | ||||
|       a = 1.0 | ||||
|       line = '3 3' | ||||
|       read (unit=line,fmt=*) n, m | ||||
|       allocate (b(n,m)) | ||||
|       b = 2.3 | ||||
|       ret = matmul(a,b)         ! This should throw an error. | ||||
|       end | ||||
| ! { dg-output "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1.*" } | ||||
| ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } } | ||||
|  | @ -0,0 +1,20 @@ | |||
| C { dg-do  run } | ||||
| C { dg-options "-fdump-tree-optimized -fcheck=bounds -fblas-matmul-limit=1 -O -fexternal-blas" } | ||||
| C { dg-shouldfail "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1" } | ||||
| C { dg-additional-sources blas_gemm_routines.f } | ||||
| 
 | ||||
|       program main | ||||
|       character(len=20) :: line | ||||
|       integer :: n, m | ||||
|       real, dimension(3,2) :: a | ||||
|       real, dimension(:,:), allocatable :: b | ||||
|       real, dimension(:,:), allocatable :: ret | ||||
|       a = 1.0 | ||||
|       line = '4 3' | ||||
|       read (unit=line,fmt=*) n, m | ||||
|       allocate (b(n,m)) | ||||
|       b = 2.3 | ||||
|       ret = matmul(transpose(a),b)         ! This should throw an error. | ||||
|       end | ||||
| ! { dg-output "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1.*" } | ||||
| ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } } | ||||
|  | @ -1,5 +1,5 @@ | |||
| ! { dg-do compile } | ||||
| ! { dg-options "-fdefault-real-8 -fexternal-blas -fdump-tree-original -finline-matmul-limit=0" } | ||||
| ! { dg-options "-fdefault-real-8 -fexternal-blas -fblas-matmul-limit=1 -fdump-tree-original -finline-matmul-limit=0" } | ||||
| ! | ||||
| ! PR fortran/54463 | ||||
| ! | ||||
|  | @ -8,8 +8,9 @@ | |||
| program test | ||||
|   implicit none | ||||
|   real, dimension(3,3) :: A | ||||
|   call random_number(a) | ||||
|   A = matmul(A,A) | ||||
| end program test | ||||
| 
 | ||||
| ! { dg-final { scan-tree-dump-times "sgemm_" 0 "original" } } | ||||
| ! { dg-final { scan-tree-dump-times "dgemm_" 1 "original" } } | ||||
| ! { dg-final { scan-tree-dump-times "sgemm" 0 "original" } } | ||||
| ! { dg-final { scan-tree-dump-times "dgemm" 1 "original" } } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Thomas Koenig
						Thomas Koenig