re PR libfortran/51119 (MATMUL slow for large matrices)

2017-02-26  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/51119
	* options.c (gfc_post_options): Set default limit for matmul
	inlining to 30.
	* invoke.texi: Document change.

2017-02-26  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/51119
	* gfortran.dg/inline_matmul_1.f90: Scan optimized dump instead
	of original.
	* gfortran.dg/inline_matmul_11.f90: Likewise.
	* gfortran.dg/inline_matmul_9.f90: Likewise.
	* gfortran.dg/matmul_13.f90: New test.
	* gfortran.dg/matmul_14.f90: New test.

From-SVN: r245745
This commit is contained in:
Thomas Koenig 2017-02-26 13:22:43 +00:00
parent 462a7b5685
commit c7b608a9f7
9 changed files with 58 additions and 12 deletions

View File

@ -1,3 +1,10 @@
2017-02-26 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/51119
* options.c (gfc_post_options): Set default limit for matmul
inlining to 30.
* invoke.texi: Document change.
2017-02-25 Dominique d'Humieres <dominiq@lps.ens.fr> 2017-02-25 Dominique d'Humieres <dominiq@lps.ens.fr>
PR fortran/79601 PR fortran/79601

View File

@ -1629,9 +1629,8 @@ for matrices with size up to @var{n}. If the matrices involved are not
square, the size comparison is performed using the geometric mean of square, the size comparison is performed using the geometric mean of
the dimensions of the argument and result matrices. the dimensions of the argument and result matrices.
The default value for @var{n} is the value specified for The default value for @var{n} is 30. The @code{-fblas-matmul-limit}
@code{-fblas-matmul-limit} if this option is specified, or unlimitited can be used to change this value.
otherwise.
@item -frecursive @item -frecursive
@opindex @code{frecursive} @opindex @code{frecursive}

View File

@ -388,10 +388,16 @@ gfc_post_options (const char **pfilename)
if (!flag_automatic) if (!flag_automatic)
flag_max_stack_var_size = 0; flag_max_stack_var_size = 0;
/* If we call BLAS directly, only inline up to the BLAS limit. */ /* If the user did not specify an inline matmul limit, inline up to the BLAS
limit or up to 30 if no external BLAS is specified. */
if (flag_external_blas && flag_inline_matmul_limit < 0) if (flag_inline_matmul_limit < 0)
flag_inline_matmul_limit = flag_blas_matmul_limit; {
if (flag_external_blas)
flag_inline_matmul_limit = flag_blas_matmul_limit;
else
flag_inline_matmul_limit = 30;
}
/* Optimization implies front end optimization, unless the user /* Optimization implies front end optimization, unless the user
specified it directly. */ specified it directly. */

View File

@ -1,3 +1,13 @@
2017-02-26 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/51119
* gfortran.dg/inline_matmul_1.f90: Scan optimized dump instead
of original.
* gfortran.dg/inline_matmul_11.f90: Likewise.
* gfortran.dg/inline_matmul_9.f90: Likewise.
* gfortran.dg/matmul_13.f90: New test.
* gfortran.dg/matmul_14.f90: New test.
2017-02-25 Jakub Jelinek <jakub@redhat.com> 2017-02-25 Jakub Jelinek <jakub@redhat.com>
PR middle-end/79396 PR middle-end/79396

View File

@ -1,5 +1,5 @@
! { dg-do run } ! { dg-do run }
! { dg-options "-ffrontend-optimize -fdump-tree-original -Wrealloc-lhs" } ! { dg-options "-ffrontend-optimize -fdump-tree-optimized -Wrealloc-lhs" }
! PR 37131 - check basic functionality of inlined matmul, making ! PR 37131 - check basic functionality of inlined matmul, making
! sure that the library is not called, with and without reallocation. ! sure that the library is not called, with and without reallocation.
@ -149,4 +149,4 @@ program main
end program main end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "original" } } ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }

View File

@ -1,5 +1,5 @@
! { dg-do run } ! { dg-do run }
! { dg-additional-options "-ffrontend-optimize -fdump-tree-original" } ! { dg-additional-options "-ffrontend-optimize -fdump-tree-optimized" }
! PR fortran/66176 - inline conjg for matml. ! PR fortran/66176 - inline conjg for matml.
program main program main
complex, dimension(3,2) :: a complex, dimension(3,2) :: a
@ -29,4 +29,4 @@ program main
c = matmul(conjg(a), b) c = matmul(conjg(a), b)
if (any(conjg(c) /= res2)) call abort if (any(conjg(c) /= res2)) call abort
end program main end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "original" } } ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }

View File

@ -1,5 +1,5 @@
! { dg-do run } ! { dg-do run }
! { dg-options "-ffrontend-optimize -fdump-tree-original" } ! { dg-options "-ffrontend-optimize -fdump-tree-optimized" }
! PR 66041 - this used to ICE with an incomplete fix for the PR. ! PR 66041 - this used to ICE with an incomplete fix for the PR.
program main program main
implicit none implicit none
@ -21,4 +21,4 @@ program main
if (any (c2-reshape([248., -749.],shape(c2)) /= 0.)) call abort if (any (c2-reshape([248., -749.],shape(c2)) /= 0.)) call abort
end program main end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "original" } } ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }

View File

@ -0,0 +1,12 @@
! { dg-do compile }
! { dg-options "-O3 -fdump-tree-optimized" }
! Check that the default limit of 30 for inlining matmul applies.
program main
integer, parameter :: n = 31
real, dimension(n,n) :: a, b, c
call random_number(a)
call random_number(b)
c = matmul(a,b)
print *,sum(c)
end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul_r4" 1 "optimized" } }

View File

@ -0,0 +1,12 @@
! { dg-do compile }
! { dg-options "-O3 -fdump-tree-optimized" }
! Check that the default limit of 30 for inlining matmul applies.
program main
integer, parameter :: n = 30
real, dimension(n,n) :: a, b, c
call random_number(a)
call random_number(b)
c = matmul(a,b)
print *,sum(c)
end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul_r4" 0 "optimized" } }