mirror of git://gcc.gnu.org/git/gcc.git
re PR fortran/29550 (Optimize -fexternal-blas calls for conjg())
2018-09-18 Thomas Koenig <tkoenig@gcc.gnu.org> PR fortran/29550 * gfortran.h (gfc_expr): Add external_blas flag. * frontend-passes.c (matrix_case): Add case A2TB2T. (optimize_namespace): Handle flag_external_blas by calling call_external_blas. (get_array_inq_function): Add argument okind. If it is nonzero, use it as the kind of argument to be used. (inline_limit_check): Remove m_case argument, add limit argument instead. Remove assert about m_case. Set the limit for inlining from the limit argument. (matmul_lhs_realloc): Handle case A2TB2T. (inline_matmul_assign): Handle inline limit for other cases with two rank-two matrices. Remove no-op calls to inline_limit_check. (call_external_blas): New function. * trans-intrinsic.c (gfc_conv_intrinsic_funcall): Do not add argument to external BLAS if external_blas is already set. 2018-09-18 Thomas Koenig <tkoenig@gcc.gnu.org> PR fortran/29550 * gfortran.dg/inline_matmul_13.f90: Adjust count for _gfortran_matmul. * gfortran.dg/inline_matmul_16.f90: Likewise. * gfortran.dg/promotion_2.f90: Add -fblas-matmul-limit=1. Scan for dgemm instead of dgemm_. Add call to random_number to make standard conforming. * gfortran.dg/matmul_blas_1.f90: New test. * gfortran.dg/matmul_bounds_14.f: New test. * gfortran.dg/matmul_bounds_15.f: New test. * gfortran.dg/matmul_bounds_16.f: New test. * gfortran.dg/blas_gemm_routines.f: New test / additional file for preceding tests. From-SVN: r264412
This commit is contained in:
parent
5c470e0f07
commit
998511a610
|
|
@ -53,6 +53,7 @@ static gfc_code * create_do_loop (gfc_expr *, gfc_expr *, gfc_expr *,
|
||||||
char *vname=NULL);
|
char *vname=NULL);
|
||||||
static gfc_expr* check_conjg_transpose_variable (gfc_expr *, bool *,
|
static gfc_expr* check_conjg_transpose_variable (gfc_expr *, bool *,
|
||||||
bool *);
|
bool *);
|
||||||
|
static int call_external_blas (gfc_code **, int *, void *);
|
||||||
static bool has_dimen_vector_ref (gfc_expr *);
|
static bool has_dimen_vector_ref (gfc_expr *);
|
||||||
static int matmul_temp_args (gfc_code **, int *,void *data);
|
static int matmul_temp_args (gfc_code **, int *,void *data);
|
||||||
static int index_interchange (gfc_code **, int*, void *);
|
static int index_interchange (gfc_code **, int*, void *);
|
||||||
|
|
@ -131,7 +132,7 @@ static int var_num = 1;
|
||||||
|
|
||||||
/* What sort of matrix we are dealing with when inlining MATMUL. */
|
/* What sort of matrix we are dealing with when inlining MATMUL. */
|
||||||
|
|
||||||
enum matrix_case { none=0, A2B2, A2B1, A1B2, A2B2T, A2TB2 };
|
enum matrix_case { none=0, A2B2, A2B1, A1B2, A2B2T, A2TB2, A2TB2T };
|
||||||
|
|
||||||
/* Keep track of the number of expressions we have inserted so far
|
/* Keep track of the number of expressions we have inserted so far
|
||||||
using create_var. */
|
using create_var. */
|
||||||
|
|
@ -1428,7 +1429,7 @@ optimize_namespace (gfc_namespace *ns)
|
||||||
gfc_code_walker (&ns->code, convert_elseif, dummy_expr_callback, NULL);
|
gfc_code_walker (&ns->code, convert_elseif, dummy_expr_callback, NULL);
|
||||||
gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL);
|
gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL);
|
||||||
gfc_code_walker (&ns->code, optimize_code, optimize_expr, NULL);
|
gfc_code_walker (&ns->code, optimize_code, optimize_expr, NULL);
|
||||||
if (flag_inline_matmul_limit != 0)
|
if (flag_inline_matmul_limit != 0 || flag_external_blas)
|
||||||
{
|
{
|
||||||
bool found;
|
bool found;
|
||||||
do
|
do
|
||||||
|
|
@ -1441,9 +1442,15 @@ optimize_namespace (gfc_namespace *ns)
|
||||||
|
|
||||||
gfc_code_walker (&ns->code, matmul_temp_args, dummy_expr_callback,
|
gfc_code_walker (&ns->code, matmul_temp_args, dummy_expr_callback,
|
||||||
NULL);
|
NULL);
|
||||||
gfc_code_walker (&ns->code, inline_matmul_assign, dummy_expr_callback,
|
|
||||||
NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (flag_external_blas)
|
||||||
|
gfc_code_walker (&ns->code, call_external_blas, dummy_expr_callback,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
if (flag_inline_matmul_limit != 0)
|
||||||
|
gfc_code_walker (&ns->code, inline_matmul_assign, dummy_expr_callback,
|
||||||
|
NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flag_frontend_loop_interchange)
|
if (flag_frontend_loop_interchange)
|
||||||
|
|
@ -2938,7 +2945,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
|
||||||
dim is zero-based. */
|
dim is zero-based. */
|
||||||
|
|
||||||
static gfc_expr *
|
static gfc_expr *
|
||||||
get_array_inq_function (gfc_isym_id id, gfc_expr *e, int dim)
|
get_array_inq_function (gfc_isym_id id, gfc_expr *e, int dim, int okind = 0)
|
||||||
{
|
{
|
||||||
gfc_expr *fcn;
|
gfc_expr *fcn;
|
||||||
gfc_expr *dim_arg, *kind;
|
gfc_expr *dim_arg, *kind;
|
||||||
|
|
@ -2964,8 +2971,12 @@ get_array_inq_function (gfc_isym_id id, gfc_expr *e, int dim)
|
||||||
}
|
}
|
||||||
|
|
||||||
dim_arg = gfc_get_int_expr (gfc_default_integer_kind, &e->where, dim);
|
dim_arg = gfc_get_int_expr (gfc_default_integer_kind, &e->where, dim);
|
||||||
kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where,
|
if (okind != 0)
|
||||||
gfc_index_integer_kind);
|
kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where,
|
||||||
|
okind);
|
||||||
|
else
|
||||||
|
kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where,
|
||||||
|
gfc_index_integer_kind);
|
||||||
|
|
||||||
ec = gfc_copy_expr (e);
|
ec = gfc_copy_expr (e);
|
||||||
|
|
||||||
|
|
@ -3026,7 +3037,7 @@ get_operand (gfc_intrinsic_op op, gfc_expr *e1, gfc_expr *e2)
|
||||||
removed by DCE. Only called for rank-two matrices A and B. */
|
removed by DCE. Only called for rank-two matrices A and B. */
|
||||||
|
|
||||||
static gfc_code *
|
static gfc_code *
|
||||||
inline_limit_check (gfc_expr *a, gfc_expr *b, enum matrix_case m_case)
|
inline_limit_check (gfc_expr *a, gfc_expr *b, int limit)
|
||||||
{
|
{
|
||||||
gfc_expr *inline_limit;
|
gfc_expr *inline_limit;
|
||||||
gfc_code *if_1, *if_2, *else_2;
|
gfc_code *if_1, *if_2, *else_2;
|
||||||
|
|
@ -3034,14 +3045,11 @@ inline_limit_check (gfc_expr *a, gfc_expr *b, enum matrix_case m_case)
|
||||||
gfc_typespec ts;
|
gfc_typespec ts;
|
||||||
gfc_expr *cond;
|
gfc_expr *cond;
|
||||||
|
|
||||||
gcc_assert (m_case == A2B2 || m_case == A2B2T || m_case == A2TB2);
|
|
||||||
|
|
||||||
/* Calculation is done in real to avoid integer overflow. */
|
/* Calculation is done in real to avoid integer overflow. */
|
||||||
|
|
||||||
inline_limit = gfc_get_constant_expr (BT_REAL, gfc_default_real_kind,
|
inline_limit = gfc_get_constant_expr (BT_REAL, gfc_default_real_kind,
|
||||||
&a->where);
|
&a->where);
|
||||||
mpfr_set_si (inline_limit->value.real, flag_inline_matmul_limit,
|
mpfr_set_si (inline_limit->value.real, limit, GFC_RND_MODE);
|
||||||
GFC_RND_MODE);
|
|
||||||
mpfr_pow_ui (inline_limit->value.real, inline_limit->value.real, 3,
|
mpfr_pow_ui (inline_limit->value.real, inline_limit->value.real, 3,
|
||||||
GFC_RND_MODE);
|
GFC_RND_MODE);
|
||||||
|
|
||||||
|
|
@ -3235,6 +3243,22 @@ matmul_lhs_realloc (gfc_expr *c, gfc_expr *a, gfc_expr *b,
|
||||||
get_array_inq_function (GFC_ISYM_SIZE, b, 2));
|
get_array_inq_function (GFC_ISYM_SIZE, b, 2));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case A2TB2T:
|
||||||
|
/* This can only happen for BLAS, we do not handle that case in
|
||||||
|
inline mamtul. */
|
||||||
|
ar->start[0] = get_array_inq_function (GFC_ISYM_SIZE, a, 2);
|
||||||
|
ar->start[1] = get_array_inq_function (GFC_ISYM_SIZE, b, 1);
|
||||||
|
|
||||||
|
ne1 = build_logical_expr (INTRINSIC_NE,
|
||||||
|
get_array_inq_function (GFC_ISYM_SIZE, c, 1),
|
||||||
|
get_array_inq_function (GFC_ISYM_SIZE, a, 2));
|
||||||
|
ne2 = build_logical_expr (INTRINSIC_NE,
|
||||||
|
get_array_inq_function (GFC_ISYM_SIZE, c, 2),
|
||||||
|
get_array_inq_function (GFC_ISYM_SIZE, b, 1));
|
||||||
|
|
||||||
|
cond = build_logical_expr (INTRINSIC_OR, ne1, ne2);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
gcc_unreachable();
|
gcc_unreachable();
|
||||||
|
|
||||||
|
|
@ -3946,9 +3970,11 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
|
||||||
/* Take care of the inline flag. If the limit check evaluates to a
|
/* Take care of the inline flag. If the limit check evaluates to a
|
||||||
constant, dead code elimination will eliminate the unneeded branch. */
|
constant, dead code elimination will eliminate the unneeded branch. */
|
||||||
|
|
||||||
if (m_case == A2B2 && flag_inline_matmul_limit > 0)
|
if (flag_inline_matmul_limit > 0 && matrix_a->rank == 2
|
||||||
|
&& matrix_b->rank == 2)
|
||||||
{
|
{
|
||||||
if_limit = inline_limit_check (matrix_a, matrix_b, m_case);
|
if_limit = inline_limit_check (matrix_a, matrix_b,
|
||||||
|
flag_inline_matmul_limit);
|
||||||
|
|
||||||
/* Insert the original statement into the else branch. */
|
/* Insert the original statement into the else branch. */
|
||||||
if_limit->block->block->next = co;
|
if_limit->block->block->next = co;
|
||||||
|
|
@ -4118,7 +4144,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
|
||||||
switch (m_case)
|
switch (m_case)
|
||||||
{
|
{
|
||||||
case A2B2:
|
case A2B2:
|
||||||
inline_limit_check (matrix_a, matrix_b, m_case);
|
|
||||||
|
|
||||||
u1 = get_size_m1 (matrix_b, 2);
|
u1 = get_size_m1 (matrix_b, 2);
|
||||||
u2 = get_size_m1 (matrix_a, 2);
|
u2 = get_size_m1 (matrix_a, 2);
|
||||||
|
|
@ -4151,7 +4176,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case A2B2T:
|
case A2B2T:
|
||||||
inline_limit_check (matrix_a, matrix_b, m_case);
|
|
||||||
|
|
||||||
u1 = get_size_m1 (matrix_b, 1);
|
u1 = get_size_m1 (matrix_b, 1);
|
||||||
u2 = get_size_m1 (matrix_a, 2);
|
u2 = get_size_m1 (matrix_a, 2);
|
||||||
|
|
@ -4184,7 +4208,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case A2TB2:
|
case A2TB2:
|
||||||
inline_limit_check (matrix_a, matrix_b, m_case);
|
|
||||||
|
|
||||||
u1 = get_size_m1 (matrix_a, 2);
|
u1 = get_size_m1 (matrix_a, 2);
|
||||||
u2 = get_size_m1 (matrix_b, 2);
|
u2 = get_size_m1 (matrix_b, 2);
|
||||||
|
|
@ -4311,6 +4334,405 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Change matmul function calls in the form of
|
||||||
|
|
||||||
|
c = matmul(a,b)
|
||||||
|
|
||||||
|
to the corresponding call to a BLAS routine, if applicable. */
|
||||||
|
|
||||||
|
static int
|
||||||
|
call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
|
||||||
|
void *data ATTRIBUTE_UNUSED)
|
||||||
|
{
|
||||||
|
gfc_code *co, *co_next;
|
||||||
|
gfc_expr *expr1, *expr2;
|
||||||
|
gfc_expr *matrix_a, *matrix_b;
|
||||||
|
gfc_code *if_limit = NULL;
|
||||||
|
gfc_actual_arglist *a, *b;
|
||||||
|
bool conjg_a, conjg_b, transpose_a, transpose_b;
|
||||||
|
gfc_code *call;
|
||||||
|
const char *blas_name;
|
||||||
|
const char *transa, *transb;
|
||||||
|
gfc_expr *c1, *c2, *b1;
|
||||||
|
gfc_actual_arglist *actual, *next;
|
||||||
|
bt type;
|
||||||
|
int kind;
|
||||||
|
enum matrix_case m_case;
|
||||||
|
bool realloc_c;
|
||||||
|
gfc_code **next_code_point;
|
||||||
|
|
||||||
|
/* Many of the tests for inline matmul also apply here. */
|
||||||
|
|
||||||
|
co = *c;
|
||||||
|
|
||||||
|
if (co->op != EXEC_ASSIGN)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (in_where || in_assoc_list)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* The BLOCKS generated for the temporary variables and FORALL don't
|
||||||
|
mix. */
|
||||||
|
if (forall_level > 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* For now don't do anything in OpenMP workshare, it confuses
|
||||||
|
its translation, which expects only the allowed statements in there. */
|
||||||
|
|
||||||
|
if (in_omp_workshare)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
expr1 = co->expr1;
|
||||||
|
expr2 = co->expr2;
|
||||||
|
if (expr2->expr_type != EXPR_FUNCTION
|
||||||
|
|| expr2->value.function.isym == NULL
|
||||||
|
|| expr2->value.function.isym->id != GFC_ISYM_MATMUL)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
type = expr2->ts.type;
|
||||||
|
kind = expr2->ts.kind;
|
||||||
|
|
||||||
|
/* Guard against recursion. */
|
||||||
|
|
||||||
|
if (expr2->external_blas)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (type != expr1->ts.type || kind != expr1->ts.kind)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (type == BT_REAL)
|
||||||
|
{
|
||||||
|
if (kind == 4)
|
||||||
|
blas_name = "sgemm";
|
||||||
|
else if (kind == 8)
|
||||||
|
blas_name = "dgemm";
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else if (type == BT_COMPLEX)
|
||||||
|
{
|
||||||
|
if (kind == 4)
|
||||||
|
blas_name = "cgemm";
|
||||||
|
else if (kind == 8)
|
||||||
|
blas_name = "zgemm";
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
a = expr2->value.function.actual;
|
||||||
|
if (a->expr->rank != 2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
b = a->next;
|
||||||
|
if (b->expr->rank != 2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
matrix_a = check_conjg_transpose_variable (a->expr, &conjg_a, &transpose_a);
|
||||||
|
if (matrix_a == NULL)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (transpose_a)
|
||||||
|
{
|
||||||
|
if (conjg_a)
|
||||||
|
transa = "C";
|
||||||
|
else
|
||||||
|
transa = "T";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
transa = "N";
|
||||||
|
|
||||||
|
matrix_b = check_conjg_transpose_variable (b->expr, &conjg_b, &transpose_b);
|
||||||
|
if (matrix_b == NULL)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (transpose_b)
|
||||||
|
{
|
||||||
|
if (conjg_b)
|
||||||
|
transb = "C";
|
||||||
|
else
|
||||||
|
transb = "T";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
transb = "N";
|
||||||
|
|
||||||
|
if (transpose_a)
|
||||||
|
{
|
||||||
|
if (transpose_b)
|
||||||
|
m_case = A2TB2T;
|
||||||
|
else
|
||||||
|
m_case = A2TB2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (transpose_b)
|
||||||
|
m_case = A2B2T;
|
||||||
|
else
|
||||||
|
m_case = A2B2;
|
||||||
|
}
|
||||||
|
|
||||||
|
current_code = c;
|
||||||
|
inserted_block = NULL;
|
||||||
|
changed_statement = NULL;
|
||||||
|
|
||||||
|
expr2->external_blas = 1;
|
||||||
|
|
||||||
|
/* We do not handle data dependencies yet. */
|
||||||
|
if (gfc_check_dependency (expr1, matrix_a, true)
|
||||||
|
|| gfc_check_dependency (expr1, matrix_b, true))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Generate the if statement and hang it into the tree. */
|
||||||
|
if_limit = inline_limit_check (matrix_a, matrix_b, flag_blas_matmul_limit);
|
||||||
|
co_next = co->next;
|
||||||
|
(*current_code) = if_limit;
|
||||||
|
co->next = NULL;
|
||||||
|
if_limit->block->next = co;
|
||||||
|
|
||||||
|
call = XCNEW (gfc_code);
|
||||||
|
call->loc = co->loc;
|
||||||
|
|
||||||
|
/* Bounds checking - a bit simpler than for inlining since we only
|
||||||
|
have to take care of two-dimensional arrays here. */
|
||||||
|
|
||||||
|
realloc_c = flag_realloc_lhs && gfc_is_reallocatable_lhs (expr1);
|
||||||
|
next_code_point = &(if_limit->block->block->next);
|
||||||
|
|
||||||
|
if (gfc_option.rtcheck & GFC_RTCHECK_BOUNDS)
|
||||||
|
{
|
||||||
|
gfc_code *test;
|
||||||
|
// gfc_expr *a2, *b1, *c1, *c2, *a1, *b2;
|
||||||
|
gfc_expr *c1, *a1, *c2, *b2, *a2;
|
||||||
|
switch (m_case)
|
||||||
|
{
|
||||||
|
case A2B2:
|
||||||
|
b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
|
||||||
|
a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
|
||||||
|
test = runtime_error_ne (b1, a2, B_ERROR(1));
|
||||||
|
*next_code_point = test;
|
||||||
|
next_code_point = &test->next;
|
||||||
|
|
||||||
|
if (!realloc_c)
|
||||||
|
{
|
||||||
|
c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
|
||||||
|
a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
|
||||||
|
test = runtime_error_ne (c1, a1, C_ERROR(1));
|
||||||
|
*next_code_point = test;
|
||||||
|
next_code_point = &test->next;
|
||||||
|
|
||||||
|
c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2);
|
||||||
|
b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
|
||||||
|
test = runtime_error_ne (c2, b2, C_ERROR(2));
|
||||||
|
*next_code_point = test;
|
||||||
|
next_code_point = &test->next;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case A2B2T:
|
||||||
|
|
||||||
|
b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
|
||||||
|
a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
|
||||||
|
/* matrix_b is transposed, hence dimension 1 for the error message. */
|
||||||
|
test = runtime_error_ne (b2, a2, B_ERROR(1));
|
||||||
|
*next_code_point = test;
|
||||||
|
next_code_point = &test->next;
|
||||||
|
|
||||||
|
if (!realloc_c)
|
||||||
|
{
|
||||||
|
c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
|
||||||
|
a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
|
||||||
|
test = runtime_error_ne (c1, a1, C_ERROR(1));
|
||||||
|
*next_code_point = test;
|
||||||
|
next_code_point = &test->next;
|
||||||
|
|
||||||
|
c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2);
|
||||||
|
b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
|
||||||
|
test = runtime_error_ne (c2, b1, C_ERROR(2));
|
||||||
|
*next_code_point = test;
|
||||||
|
next_code_point = &test->next;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case A2TB2:
|
||||||
|
|
||||||
|
b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
|
||||||
|
a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
|
||||||
|
test = runtime_error_ne (b1, a1, B_ERROR(1));
|
||||||
|
*next_code_point = test;
|
||||||
|
next_code_point = &test->next;
|
||||||
|
|
||||||
|
if (!realloc_c)
|
||||||
|
{
|
||||||
|
c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
|
||||||
|
a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
|
||||||
|
test = runtime_error_ne (c1, a2, C_ERROR(1));
|
||||||
|
*next_code_point = test;
|
||||||
|
next_code_point = &test->next;
|
||||||
|
|
||||||
|
c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2);
|
||||||
|
b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
|
||||||
|
test = runtime_error_ne (c2, b2, C_ERROR(2));
|
||||||
|
*next_code_point = test;
|
||||||
|
next_code_point = &test->next;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case A2TB2T:
|
||||||
|
b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
|
||||||
|
a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
|
||||||
|
test = runtime_error_ne (b2, a1, B_ERROR(1));
|
||||||
|
*next_code_point = test;
|
||||||
|
next_code_point = &test->next;
|
||||||
|
|
||||||
|
if (!realloc_c)
|
||||||
|
{
|
||||||
|
c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
|
||||||
|
a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
|
||||||
|
test = runtime_error_ne (c1, a2, C_ERROR(1));
|
||||||
|
*next_code_point = test;
|
||||||
|
next_code_point = &test->next;
|
||||||
|
|
||||||
|
c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2);
|
||||||
|
b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
|
||||||
|
test = runtime_error_ne (c2, b1, C_ERROR(2));
|
||||||
|
*next_code_point = test;
|
||||||
|
next_code_point = &test->next;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle the reallocation, if needed. */
|
||||||
|
|
||||||
|
if (realloc_c)
|
||||||
|
{
|
||||||
|
gfc_code *lhs_alloc;
|
||||||
|
|
||||||
|
lhs_alloc = matmul_lhs_realloc (expr1, matrix_a, matrix_b, m_case);
|
||||||
|
*next_code_point = lhs_alloc;
|
||||||
|
next_code_point = &lhs_alloc->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
*next_code_point = call;
|
||||||
|
if_limit->next = co_next;
|
||||||
|
|
||||||
|
/* Set up the BLAS call. */
|
||||||
|
|
||||||
|
call->op = EXEC_CALL;
|
||||||
|
|
||||||
|
gfc_get_sym_tree (blas_name, current_ns, &(call->symtree), true);
|
||||||
|
call->symtree->n.sym->attr.subroutine = 1;
|
||||||
|
call->symtree->n.sym->attr.procedure = 1;
|
||||||
|
call->symtree->n.sym->attr.flavor = FL_PROCEDURE;
|
||||||
|
call->resolved_sym = call->symtree->n.sym;
|
||||||
|
|
||||||
|
/* Argument TRANSA. */
|
||||||
|
next = gfc_get_actual_arglist ();
|
||||||
|
next->expr = gfc_get_character_expr (gfc_default_character_kind, &co->loc,
|
||||||
|
transa, 1);
|
||||||
|
|
||||||
|
call->ext.actual = next;
|
||||||
|
|
||||||
|
/* Argument TRANSB. */
|
||||||
|
actual = next;
|
||||||
|
next = gfc_get_actual_arglist ();
|
||||||
|
next->expr = gfc_get_character_expr (gfc_default_character_kind, &co->loc,
|
||||||
|
transb, 1);
|
||||||
|
actual->next = next;
|
||||||
|
|
||||||
|
c1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (a->expr), 1,
|
||||||
|
gfc_integer_4_kind);
|
||||||
|
c2 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 2,
|
||||||
|
gfc_integer_4_kind);
|
||||||
|
|
||||||
|
b1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 1,
|
||||||
|
gfc_integer_4_kind);
|
||||||
|
|
||||||
|
/* Argument M. */
|
||||||
|
actual = next;
|
||||||
|
next = gfc_get_actual_arglist ();
|
||||||
|
next->expr = c1;
|
||||||
|
actual->next = next;
|
||||||
|
|
||||||
|
/* Argument N. */
|
||||||
|
actual = next;
|
||||||
|
next = gfc_get_actual_arglist ();
|
||||||
|
next->expr = c2;
|
||||||
|
actual->next = next;
|
||||||
|
|
||||||
|
/* Argument K. */
|
||||||
|
actual = next;
|
||||||
|
next = gfc_get_actual_arglist ();
|
||||||
|
next->expr = b1;
|
||||||
|
actual->next = next;
|
||||||
|
|
||||||
|
/* Argument ALPHA - set to one. */
|
||||||
|
actual = next;
|
||||||
|
next = gfc_get_actual_arglist ();
|
||||||
|
next->expr = gfc_get_constant_expr (type, kind, &co->loc);
|
||||||
|
if (type == BT_REAL)
|
||||||
|
mpfr_set_ui (next->expr->value.real, 1, GFC_RND_MODE);
|
||||||
|
else
|
||||||
|
mpc_set_ui (next->expr->value.complex, 1, GFC_MPC_RND_MODE);
|
||||||
|
actual->next = next;
|
||||||
|
|
||||||
|
/* Argument A. */
|
||||||
|
actual = next;
|
||||||
|
next = gfc_get_actual_arglist ();
|
||||||
|
next->expr = gfc_copy_expr (matrix_a);
|
||||||
|
actual->next = next;
|
||||||
|
|
||||||
|
/* Argument LDA. */
|
||||||
|
actual = next;
|
||||||
|
next = gfc_get_actual_arglist ();
|
||||||
|
next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_a),
|
||||||
|
1, gfc_integer_4_kind);
|
||||||
|
actual->next = next;
|
||||||
|
|
||||||
|
/* Argument B. */
|
||||||
|
actual = next;
|
||||||
|
next = gfc_get_actual_arglist ();
|
||||||
|
next->expr = gfc_copy_expr (matrix_b);
|
||||||
|
actual->next = next;
|
||||||
|
|
||||||
|
/* Argument LDB. */
|
||||||
|
actual = next;
|
||||||
|
next = gfc_get_actual_arglist ();
|
||||||
|
next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_b),
|
||||||
|
1, gfc_integer_4_kind);
|
||||||
|
actual->next = next;
|
||||||
|
|
||||||
|
/* Argument BETA - set to zero. */
|
||||||
|
actual = next;
|
||||||
|
next = gfc_get_actual_arglist ();
|
||||||
|
next->expr = gfc_get_constant_expr (type, kind, &co->loc);
|
||||||
|
if (type == BT_REAL)
|
||||||
|
mpfr_set_ui (next->expr->value.real, 0, GFC_RND_MODE);
|
||||||
|
else
|
||||||
|
mpc_set_ui (next->expr->value.complex, 0, GFC_MPC_RND_MODE);
|
||||||
|
actual->next = next;
|
||||||
|
|
||||||
|
/* Argument C. */
|
||||||
|
|
||||||
|
actual = next;
|
||||||
|
next = gfc_get_actual_arglist ();
|
||||||
|
next->expr = gfc_copy_expr (expr1);
|
||||||
|
actual->next = next;
|
||||||
|
|
||||||
|
/* Argument LDC. */
|
||||||
|
actual = next;
|
||||||
|
next = gfc_get_actual_arglist ();
|
||||||
|
next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (expr1),
|
||||||
|
1, gfc_integer_4_kind);
|
||||||
|
actual->next = next;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Code for index interchange for loops which are grouped together in DO
|
/* Code for index interchange for loops which are grouped together in DO
|
||||||
CONCURRENT or FORALL statements. This is currently only applied if the
|
CONCURRENT or FORALL statements. This is currently only applied if the
|
||||||
|
|
|
||||||
|
|
@ -2143,6 +2143,11 @@ typedef struct gfc_expr
|
||||||
|
|
||||||
unsigned int no_bounds_check : 1;
|
unsigned int no_bounds_check : 1;
|
||||||
|
|
||||||
|
/* Set this if a matmul expression has already been evaluated for conversion
|
||||||
|
to a BLAS call. */
|
||||||
|
|
||||||
|
unsigned int external_blas : 1;
|
||||||
|
|
||||||
/* If an expression comes from a Hollerith constant or compile-time
|
/* If an expression comes from a Hollerith constant or compile-time
|
||||||
evaluation of a transfer statement, it may have a prescribed target-
|
evaluation of a transfer statement, it may have a prescribed target-
|
||||||
memory representation, and these cannot always be backformed from
|
memory representation, and these cannot always be backformed from
|
||||||
|
|
|
||||||
|
|
@ -4055,6 +4055,7 @@ gfc_conv_intrinsic_funcall (gfc_se * se, gfc_expr * expr)
|
||||||
to be able to call the BLAS ?gemm functions if required and possible. */
|
to be able to call the BLAS ?gemm functions if required and possible. */
|
||||||
append_args = NULL;
|
append_args = NULL;
|
||||||
if (expr->value.function.isym->id == GFC_ISYM_MATMUL
|
if (expr->value.function.isym->id == GFC_ISYM_MATMUL
|
||||||
|
&& !expr->external_blas
|
||||||
&& sym->ts.type != BT_LOGICAL)
|
&& sym->ts.type != BT_LOGICAL)
|
||||||
{
|
{
|
||||||
tree cint = gfc_get_int_type (gfc_c_int_kind);
|
tree cint = gfc_get_int_type (gfc_c_int_kind);
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -44,4 +44,4 @@ program main
|
||||||
deallocate(calloc)
|
deallocate(calloc)
|
||||||
|
|
||||||
end program main
|
end program main
|
||||||
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "original" } }
|
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 1 "original" } }
|
||||||
|
|
|
||||||
|
|
@ -58,4 +58,4 @@ program main
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end program main
|
end program main
|
||||||
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
|
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 1 "optimized" } }
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,240 @@
|
||||||
|
C { dg-do run }
|
||||||
|
C { dg-options "-fcheck=bounds -fdump-tree-optimized -fblas-matmul-limit=1 -O -fexternal-blas" }
|
||||||
|
C { dg-additional-sources blas_gemm_routines.f }
|
||||||
|
C Test calling of BLAS routines
|
||||||
|
|
||||||
|
program main
|
||||||
|
call sub_s
|
||||||
|
call sub_d
|
||||||
|
call sub_c
|
||||||
|
call sub_z
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine sub_d
|
||||||
|
implicit none
|
||||||
|
real(8), dimension(3,2) :: a
|
||||||
|
real(8), dimension(2,3) :: at
|
||||||
|
real(8), dimension(2,4) :: b
|
||||||
|
real(8), dimension(4,2) :: bt
|
||||||
|
real(8), dimension(3,4) :: c
|
||||||
|
real(8), dimension(3,4) :: cres
|
||||||
|
real(8), dimension(:,:), allocatable :: c_alloc
|
||||||
|
data a / 2., -3., 5., -7., 11., -13./
|
||||||
|
data b /17., -23., 29., -31., 37., -39., 41., -47./
|
||||||
|
data cres /195., -304., 384., 275., -428., 548., 347., -540.,
|
||||||
|
& 692., 411., -640., 816./
|
||||||
|
|
||||||
|
c = matmul(a,b)
|
||||||
|
if (any (c /= cres)) stop 31
|
||||||
|
|
||||||
|
at = transpose(a)
|
||||||
|
c = (1.2,-2.2)
|
||||||
|
c = matmul(transpose(at), b)
|
||||||
|
if (any (c /= cres)) stop 32
|
||||||
|
|
||||||
|
bt = transpose(b)
|
||||||
|
c = (1.2,-2.1)
|
||||||
|
c = matmul(a, transpose(bt))
|
||||||
|
if (any (c /= cres)) stop 33
|
||||||
|
|
||||||
|
c_alloc = matmul(a,b)
|
||||||
|
if (any (c /= cres)) stop 34
|
||||||
|
|
||||||
|
at = transpose(a)
|
||||||
|
deallocate (c_alloc)
|
||||||
|
c = matmul(transpose(at), b)
|
||||||
|
if (any (c /= cres)) stop 35
|
||||||
|
|
||||||
|
bt = transpose(b)
|
||||||
|
allocate (c_alloc(20,20))
|
||||||
|
c = (1.2,-2.1)
|
||||||
|
c = matmul(a, transpose(bt))
|
||||||
|
if (any (c /= cres)) stop 36
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine sub_s
|
||||||
|
implicit none
|
||||||
|
real, dimension(3,2) :: a
|
||||||
|
real, dimension(2,3) :: at
|
||||||
|
real, dimension(2,4) :: b
|
||||||
|
real, dimension(4,2) :: bt
|
||||||
|
real, dimension(3,4) :: c
|
||||||
|
real, dimension(3,4) :: cres
|
||||||
|
real, dimension(:,:), allocatable :: c_alloc
|
||||||
|
data a / 2., -3., 5., -7., 11., -13./
|
||||||
|
data b /17., -23., 29., -31., 37., -39., 41., -47./
|
||||||
|
data cres /195., -304., 384., 275., -428., 548., 347., -540.,
|
||||||
|
& 692., 411., -640., 816./
|
||||||
|
|
||||||
|
c = matmul(a,b)
|
||||||
|
if (any (c /= cres)) stop 21
|
||||||
|
|
||||||
|
at = transpose(a)
|
||||||
|
c = (1.2,-2.2)
|
||||||
|
c = matmul(transpose(at), b)
|
||||||
|
if (any (c /= cres)) stop 22
|
||||||
|
|
||||||
|
bt = transpose(b)
|
||||||
|
c = (1.2,-2.1)
|
||||||
|
c = matmul(a, transpose(bt))
|
||||||
|
if (any (c /= cres)) stop 23
|
||||||
|
|
||||||
|
c_alloc = matmul(a,b)
|
||||||
|
if (any (c /= cres)) stop 24
|
||||||
|
|
||||||
|
at = transpose(a)
|
||||||
|
deallocate (c_alloc)
|
||||||
|
c = matmul(transpose(at), b)
|
||||||
|
if (any (c /= cres)) stop 25
|
||||||
|
|
||||||
|
bt = transpose(b)
|
||||||
|
allocate (c_alloc(20,20))
|
||||||
|
c = (1.2,-2.1)
|
||||||
|
c = matmul(a, transpose(bt))
|
||||||
|
if (any (c /= cres)) stop 26
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine sub_c
|
||||||
|
implicit none
|
||||||
|
complex, dimension(3,2) :: a
|
||||||
|
complex, dimension(2,3) :: at, ah
|
||||||
|
complex, dimension(2,4) :: b
|
||||||
|
complex, dimension(4,2) :: bt, bh
|
||||||
|
complex, dimension(3,4) :: c
|
||||||
|
complex, dimension(3,4) :: cres
|
||||||
|
complex, dimension(:,:), allocatable :: c_alloc
|
||||||
|
|
||||||
|
data a / (2.,-3.), (-5.,7.), (11.,-13.), (17.,19), (-23., -29),
|
||||||
|
& (-31., 37.)/
|
||||||
|
|
||||||
|
data b / (-41., 43.), (-47., 53.), (-59.,-61.), (-67., 71),
|
||||||
|
& ( 73.,79. ), (83.,-89.), (97.,-101.), (-107.,-109.)/
|
||||||
|
data cres /(-1759.,217.), (2522.,-358.), (-396.,-2376.),
|
||||||
|
& (-2789.,-11.),
|
||||||
|
& (4322.,202.), (-1992.,-4584.), (3485.,3.), (-5408.,-244.),
|
||||||
|
& (2550.,5750.), (143.,-4379.), (-478.,6794.), (7104.,-2952.) /
|
||||||
|
|
||||||
|
c = matmul(a,b)
|
||||||
|
if (any (c /= cres)) stop 1
|
||||||
|
|
||||||
|
at = transpose(a)
|
||||||
|
c = (1.2,-2.2)
|
||||||
|
c = matmul(transpose(at), b)
|
||||||
|
if (any (c /= cres)) stop 2
|
||||||
|
|
||||||
|
bt = transpose(b)
|
||||||
|
c = (1.2,-2.1)
|
||||||
|
c = matmul(a, transpose(bt))
|
||||||
|
if (any (c /= cres)) stop 3
|
||||||
|
|
||||||
|
ah = transpose(conjg(a))
|
||||||
|
c = (1.2,-2.2)
|
||||||
|
c = matmul(conjg(transpose(ah)), b)
|
||||||
|
if (any (c /= cres)) stop 4
|
||||||
|
|
||||||
|
bh = transpose(conjg(b))
|
||||||
|
c = (1.2,-2.2)
|
||||||
|
c = matmul(a, transpose(conjg(bh)))
|
||||||
|
if (any (c /= cres)) stop 5
|
||||||
|
|
||||||
|
c_alloc = matmul(a,b)
|
||||||
|
if (any (c /= cres)) stop 6
|
||||||
|
|
||||||
|
at = transpose(a)
|
||||||
|
deallocate (c_alloc)
|
||||||
|
c = matmul(transpose(at), b)
|
||||||
|
if (any (c /= cres)) stop 7
|
||||||
|
|
||||||
|
bt = transpose(b)
|
||||||
|
allocate (c_alloc(20,20))
|
||||||
|
c = (1.2,-2.1)
|
||||||
|
c = matmul(a, transpose(bt))
|
||||||
|
if (any (c /= cres)) stop 8
|
||||||
|
|
||||||
|
ah = transpose(conjg(a))
|
||||||
|
c = (1.2,-2.2)
|
||||||
|
c = matmul(conjg(transpose(ah)), b)
|
||||||
|
if (any (c /= cres)) stop 9
|
||||||
|
|
||||||
|
deallocate (c_alloc)
|
||||||
|
allocate (c_alloc(0,0))
|
||||||
|
bh = transpose(conjg(b))
|
||||||
|
c = (1.2,-2.2)
|
||||||
|
c = matmul(a, transpose(conjg(bh)))
|
||||||
|
if (any (c /= cres)) stop 10
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine sub_z
|
||||||
|
implicit none
|
||||||
|
complex(8), dimension(3,2) :: a
|
||||||
|
complex(8), dimension(2,3) :: at, ah
|
||||||
|
complex(8), dimension(2,4) :: b
|
||||||
|
complex(8), dimension(4,2) :: bt, bh
|
||||||
|
complex(8), dimension(3,4) :: c
|
||||||
|
complex(8), dimension(3,4) :: cres
|
||||||
|
complex(8), dimension(:,:), allocatable :: c_alloc
|
||||||
|
|
||||||
|
data a / (2.,-3.), (-5._8,7.), (11.,-13.), (17.,19),
|
||||||
|
& (-23., -29), (-31., 37.)/
|
||||||
|
|
||||||
|
data b / (-41., 43.), (-47., 53.), (-59.,-61.), (-67., 71),
|
||||||
|
& ( 73.,79. ), (83.,-89.), (97.,-101.), (-107.,-109.)/
|
||||||
|
data cres /(-1759.,217.), (2522.,-358.), (-396.,-2376.),
|
||||||
|
& (-2789.,-11.),
|
||||||
|
& (4322.,202.), (-1992.,-4584.), (3485.,3.), (-5408.,-244.),
|
||||||
|
& (2550.,5750.), (143.,-4379.), (-478.,6794.), (7104.,-2952.) /
|
||||||
|
|
||||||
|
c = matmul(a,b)
|
||||||
|
if (any (c /= cres)) stop 11
|
||||||
|
|
||||||
|
at = transpose(a)
|
||||||
|
c = (1.2,-2.2)
|
||||||
|
c = matmul(transpose(at), b)
|
||||||
|
if (any (c /= cres)) stop 12
|
||||||
|
|
||||||
|
bt = transpose(b)
|
||||||
|
c = (1.2,-2.1)
|
||||||
|
c = matmul(a, transpose(bt))
|
||||||
|
if (any (c /= cres)) stop 13
|
||||||
|
|
||||||
|
ah = transpose(conjg(a))
|
||||||
|
c = (1.2,-2.2)
|
||||||
|
c = matmul(conjg(transpose(ah)), b)
|
||||||
|
if (any (c /= cres)) stop 14
|
||||||
|
|
||||||
|
bh = transpose(conjg(b))
|
||||||
|
c = (1.2,-2.2)
|
||||||
|
c = matmul(a, transpose(conjg(bh)))
|
||||||
|
if (any (c /= cres)) stop 15
|
||||||
|
|
||||||
|
c_alloc = matmul(a,b)
|
||||||
|
if (any (c /= cres)) stop 16
|
||||||
|
|
||||||
|
at = transpose(a)
|
||||||
|
deallocate (c_alloc)
|
||||||
|
c = matmul(transpose(at), b)
|
||||||
|
if (any (c /= cres)) stop 17
|
||||||
|
|
||||||
|
bt = transpose(b)
|
||||||
|
allocate (c_alloc(20,20))
|
||||||
|
c = (1.2,-2.1)
|
||||||
|
c = matmul(a, transpose(bt))
|
||||||
|
if (any (c /= cres)) stop 18
|
||||||
|
|
||||||
|
ah = transpose(conjg(a))
|
||||||
|
c = (1.2,-2.2)
|
||||||
|
c = matmul(conjg(transpose(ah)), b)
|
||||||
|
if (any (c /= cres)) stop 19
|
||||||
|
|
||||||
|
deallocate (c_alloc)
|
||||||
|
allocate (c_alloc(0,0))
|
||||||
|
bh = transpose(conjg(b))
|
||||||
|
c = (1.2,-2.2)
|
||||||
|
c = matmul(a, transpose(conjg(bh)))
|
||||||
|
if (any (c /= cres)) stop 20
|
||||||
|
|
||||||
|
end
|
||||||
|
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
C { dg-do run }
|
||||||
|
C { dg-options "-fno-realloc-lhs -fdump-tree-optimized -fcheck=bounds -fblas-matmul-limit=1 -O -fexternal-blas" }
|
||||||
|
C { dg-shouldfail "Fortran runtime error: Array bound mismatch for dimension 2 of array." }
|
||||||
|
C { dg-additional-sources blas_gemm_routines.f }
|
||||||
|
|
||||||
|
program main
|
||||||
|
real, dimension(3,2) :: a
|
||||||
|
real, dimension(2,3) :: b
|
||||||
|
real, dimension(:,:), allocatable :: ret
|
||||||
|
a = 1.0
|
||||||
|
b = 2.3
|
||||||
|
allocate(ret(3,2))
|
||||||
|
ret = matmul(a,b) ! This should throw an error.
|
||||||
|
end
|
||||||
|
! { dg-output "Fortran runtime error: Array bound mismatch for dimension 2 of array.*" }
|
||||||
|
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
|
||||||
|
|
@ -0,0 +1,19 @@
|
||||||
|
C { dg-do run }
|
||||||
|
C { dg-options "-fdump-tree-optimized -fcheck=bounds -fblas-matmul-limit=1 -O -fexternal-blas" }
|
||||||
|
C { dg-shouldfail "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1.*" }
|
||||||
|
C { dg-additional-sources blas_gemm_routines.f }
|
||||||
|
program main
|
||||||
|
character(len=20) :: line
|
||||||
|
integer :: n, m
|
||||||
|
real, dimension(3,2) :: a
|
||||||
|
real, dimension(:,:), allocatable :: b
|
||||||
|
real, dimension(:,:), allocatable :: ret
|
||||||
|
a = 1.0
|
||||||
|
line = '3 3'
|
||||||
|
read (unit=line,fmt=*) n, m
|
||||||
|
allocate (b(n,m))
|
||||||
|
b = 2.3
|
||||||
|
ret = matmul(a,b) ! This should throw an error.
|
||||||
|
end
|
||||||
|
! { dg-output "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1.*" }
|
||||||
|
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
C { dg-do run }
|
||||||
|
C { dg-options "-fdump-tree-optimized -fcheck=bounds -fblas-matmul-limit=1 -O -fexternal-blas" }
|
||||||
|
C { dg-shouldfail "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1" }
|
||||||
|
C { dg-additional-sources blas_gemm_routines.f }
|
||||||
|
|
||||||
|
program main
|
||||||
|
character(len=20) :: line
|
||||||
|
integer :: n, m
|
||||||
|
real, dimension(3,2) :: a
|
||||||
|
real, dimension(:,:), allocatable :: b
|
||||||
|
real, dimension(:,:), allocatable :: ret
|
||||||
|
a = 1.0
|
||||||
|
line = '4 3'
|
||||||
|
read (unit=line,fmt=*) n, m
|
||||||
|
allocate (b(n,m))
|
||||||
|
b = 2.3
|
||||||
|
ret = matmul(transpose(a),b) ! This should throw an error.
|
||||||
|
end
|
||||||
|
! { dg-output "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1.*" }
|
||||||
|
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
! { dg-do compile }
|
! { dg-do compile }
|
||||||
! { dg-options "-fdefault-real-8 -fexternal-blas -fdump-tree-original -finline-matmul-limit=0" }
|
! { dg-options "-fdefault-real-8 -fexternal-blas -fblas-matmul-limit=1 -fdump-tree-original -finline-matmul-limit=0" }
|
||||||
!
|
!
|
||||||
! PR fortran/54463
|
! PR fortran/54463
|
||||||
!
|
!
|
||||||
|
|
@ -8,8 +8,9 @@
|
||||||
program test
|
program test
|
||||||
implicit none
|
implicit none
|
||||||
real, dimension(3,3) :: A
|
real, dimension(3,3) :: A
|
||||||
|
call random_number(a)
|
||||||
A = matmul(A,A)
|
A = matmul(A,A)
|
||||||
end program test
|
end program test
|
||||||
|
|
||||||
! { dg-final { scan-tree-dump-times "sgemm_" 0 "original" } }
|
! { dg-final { scan-tree-dump-times "sgemm" 0 "original" } }
|
||||||
! { dg-final { scan-tree-dump-times "dgemm_" 1 "original" } }
|
! { dg-final { scan-tree-dump-times "dgemm" 1 "original" } }
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue