mirror of git://gcc.gnu.org/git/gcc.git
PR libfortran/99218 - matmul on temporary array accesses invalid memory
Do not invoke tuned rank-2 times rank-2 matmul if rank(b) == 1. libgfortran/ChangeLog: PR libfortran/99218 * m4/matmul_internal.m4: Invoke tuned matmul only for rank(b)>1. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Likewise. * generated/matmul_c4.c: Likewise. * generated/matmul_c8.c: Likewise. * generated/matmul_i1.c: Likewise. * generated/matmul_i16.c: Likewise. * generated/matmul_i2.c: Likewise. * generated/matmul_i4.c: Likewise. * generated/matmul_i8.c: Likewise. * generated/matmul_r10.c: Likewise. * generated/matmul_r16.c: Likewise. * generated/matmul_r4.c: Likewise. * generated/matmul_r8.c: Likewise. * generated/matmulavx128_c10.c: Likewise. * generated/matmulavx128_c16.c: Likewise. * generated/matmulavx128_c4.c: Likewise. * generated/matmulavx128_c8.c: Likewise. * generated/matmulavx128_i1.c: Likewise. * generated/matmulavx128_i16.c: Likewise. * generated/matmulavx128_i2.c: Likewise. * generated/matmulavx128_i4.c: Likewise. * generated/matmulavx128_i8.c: Likewise. * generated/matmulavx128_r10.c: Likewise. * generated/matmulavx128_r16.c: Likewise. * generated/matmulavx128_r4.c: Likewise. * generated/matmulavx128_r8.c: Likewise. gcc/testsuite/ChangeLog: PR libfortran/99218 * gfortran.dg/matmul_21.f90: New test.
This commit is contained in:
parent
1e5cdb9f89
commit
b1bee29167
|
|
@ -0,0 +1,15 @@
|
|||
! { dg-do run }
|
||||
! PR libfortran/99218 - matmul on temporary array accesses invalid memory
|
||||
|
||||
program p
|
||||
implicit none
|
||||
integer, parameter :: nState = 300000
|
||||
integer, parameter :: nCon = 1
|
||||
real, parameter :: ZERO = 0.0
|
||||
real :: G(nCon,nState) = ZERO
|
||||
real :: H(nState,nCon) = ZERO
|
||||
real :: lambda(nCon) = ZERO
|
||||
real :: f(nState) = ZERO
|
||||
f = matmul (transpose (G), lambda)
|
||||
if (f(1) /= ZERO) stop 1
|
||||
end program
|
||||
|
|
@ -276,7 +276,8 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -844,7 +845,8 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1412,7 +1414,8 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1994,7 +1997,8 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -2635,7 +2639,8 @@ matmul_c10 (gfc_array_c10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -276,7 +276,8 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -844,7 +845,8 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1412,7 +1414,8 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1994,7 +1997,8 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -2635,7 +2639,8 @@ matmul_c16 (gfc_array_c16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -276,7 +276,8 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -844,7 +845,8 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1412,7 +1414,8 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1994,7 +1997,8 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -2635,7 +2639,8 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -276,7 +276,8 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -844,7 +845,8 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1412,7 +1414,8 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1994,7 +1997,8 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -2635,7 +2639,8 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -276,7 +276,8 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -844,7 +845,8 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1412,7 +1414,8 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1994,7 +1997,8 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -2635,7 +2639,8 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -276,7 +276,8 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -844,7 +845,8 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1412,7 +1414,8 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1994,7 +1997,8 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -2635,7 +2639,8 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -276,7 +276,8 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -844,7 +845,8 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1412,7 +1414,8 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1994,7 +1997,8 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -2635,7 +2639,8 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -276,7 +276,8 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -844,7 +845,8 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1412,7 +1414,8 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1994,7 +1997,8 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -2635,7 +2639,8 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -276,7 +276,8 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -844,7 +845,8 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1412,7 +1414,8 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1994,7 +1997,8 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -2635,7 +2639,8 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -276,7 +276,8 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -844,7 +845,8 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1412,7 +1414,8 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1994,7 +1997,8 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -2635,7 +2639,8 @@ matmul_r10 (gfc_array_r10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -276,7 +276,8 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -844,7 +845,8 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1412,7 +1414,8 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1994,7 +1997,8 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -2635,7 +2639,8 @@ matmul_r16 (gfc_array_r16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -276,7 +276,8 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -844,7 +845,8 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1412,7 +1414,8 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1994,7 +1997,8 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -2635,7 +2639,8 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -276,7 +276,8 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -844,7 +845,8 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1412,7 +1414,8 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -1994,7 +1997,8 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -2635,7 +2639,8 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -241,7 +241,8 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -810,7 +811,8 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -241,7 +241,8 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -810,7 +811,8 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -241,7 +241,8 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -810,7 +811,8 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -241,7 +241,8 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -810,7 +811,8 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -241,7 +241,8 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -810,7 +811,8 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -241,7 +241,8 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -810,7 +811,8 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -241,7 +241,8 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -810,7 +811,8 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -241,7 +241,8 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -810,7 +811,8 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -241,7 +241,8 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -810,7 +811,8 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -241,7 +241,8 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -810,7 +811,8 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -241,7 +241,8 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -810,7 +811,8 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -241,7 +241,8 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -810,7 +811,8 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -241,7 +241,8 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
@ -810,7 +811,8 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
|
|
@ -192,7 +192,8 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
|
|||
}
|
||||
}
|
||||
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1)
|
||||
if (rxstride == 1 && axstride == 1 && bxstride == 1
|
||||
&& GFC_DESCRIPTOR_RANK (b) != 1)
|
||||
{
|
||||
/* This block of code implements a tuned matmul, derived from
|
||||
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
|
||||
|
|
|
|||
Loading…
Reference in New Issue