mirror of git://gcc.gnu.org/git/gcc.git
Add BMI2 support.
gcc/ 2011-08-23 Uros Bizjak <ubizjak@gmail.com> * config/i386/i386.md (type): Add imulx, ishiftx and rotatex. (length_immediate): Handle imulx, ishiftx and rotatex. (imm_disp): Ditto. (isa): Add bmi2. (enabled): Handle bmi2. (w): New mode attribute. (*mul<mode><dwi>3): Split from *<u>mul<mode><dwi>3. (*umul<mode><dwi>3): Ditto. Add imulx BMI2 alternative. (*bmi2_umulditi3_1): New insn pattern. (*bmi2_umulsidi3_1): Ditto. (*umul<mode><dwi>3 splitter): New splitter to avoid flags dependency. (*bmi2_ashl<mode>3_1): New insn pattern. (*ashl<mode>3_1): Add ishiftx BMI2 alternative. (*ashl<mode>3_1 splitter): New splitter to avoid flags dependency. (*bmi2_ashlsi3_1_zext): New insn pattern. (*ashlsi3_1_zext): Add ishiftx BMI2 alternative. (*ashlsi3_1_zext splitter): New splitter to avoid flags dependency. (*bmi2_<shiftrt_insn><mode>3_1): New insn pattern. (*<shiftrt_insn><mode>3_1): Add ishiftx BMI2 alternative. (*<shiftrt_insn><mode>3_1 splitter): New splitter to avoid flags dependency. (*bmi2_<shiftrt_insn>si3_1_zext): New insn pattern. (*<shiftrt_insn>si3_1_zext): Add ishiftx BMI2 alternative. (*<shiftrt_insn>si3_1_zext splitter): New splitter to avoid flags dependency. (*bmi2_rorx<mode>3_1): New insn pattern. (*<rotate_insn><mode>3_1): Add rotatex BMI2 alternative. (*rotate<mode>3_1 splitter): New splitter to avoid flags dependency. (*rotatert<mode>3_1 splitter): Ditto. (*bmi2_rorxsi3_1_zext): New insn pattern. (*<rotate_insn>si3_1_zext): Add rotatex BMI2 alternative. (*rotatesi3_1_zext splitter): New splitter to avoid flags dependency. (*rotatertsi3_1_zext splitter): Ditto. 2011-08-23 Kirill Yukhin <kirill.yukhin@intel.com> * common/config/i386/i386-common.c (OPTION_MASK_ISA_BMI2_SET): New. (OPTION_MASK_ISA_BMI2_UNSET): Likewise. (ix86_handle_option): Handle OPT_mbmi2 case. * config.gcc (i[34567]86-*-*): Add bmi2intrin.h. (x86_64-*-*): Likewise. * config/i386/bmi2intrin.h: New file. * config/i386/cpuid.h (bit_BMI2): New. * config/i386/driver-i386.c (host_detect_local_cpu): Detect BMI2 feature. * config/i386/i386-c.c (ix86_target_macros_internal): Conditionally define __BMI2__. * config/i386/i386.c (ix86_option_override_internal): Define PTA_BMI2. Handle BMI2 option. (ix86_valid_target_attribute_inner_p): Handle BMI2 option. (print_reg): New code. (ix86_print_operand): Likewise. (ix86_builtins): Add IX86_BUILTIN_BZHI32, IX86_BUILTIN_BZHI64, IX86_BUILTIN_PDEP32, IX86_BUILTIN_PDEP64, IX86_BUILTIN_PEXT32, IX86_BUILTIN_PEXT64. (bdesc_args): Add IX86_BUILTIN_BZHI32, IX86_BUILTIN_BZHI64, IX86_BUILTIN_PDEP32, IX86_BUILTIN_PDEP64, IX86_BUILTIN_PEXT32, IX86_BUILTIN_PEXT64. * config/i386/i386.h (TARGET_BMI2): New. * config/i386/i386.md (UNSPEC_PDEP): New. (UNSPEC_PEXT): Likewise. (*bmi2_bzhi_<mode>3): Likewise. (*bmi2_pdep_<mode>3): Likewise. (*bmi2_pext_<mode>3): Likewise. * config/i386/i386.opt (mbmi2): New. * config/i386/x86intrin.h: Include bmi2intrin.h when __BMI2__ is defined. * doc/extend.texi: Document BMI2 built-in functions. * doc/invoke.texi: Document -mbmi2. gcc/testsuite/ 2011-08-23 Kirill Yukhin <kirill.yukhin@intel.com> * g++.dg/other/i386-2.C: Add -mbmi2 check. * g++.dg/other/i386-3.C: Likewise. * gcc.target/i386/bmi2-bzhi32-1.c: New testcase. * gcc.target/i386/bmi2-bzhi32-1a.c: Likewise. * gcc.target/i386/bmi2-bzhi64-1.c: Likewise. * gcc.target/i386/bmi2-bzhi64-1a.c: Likewise. * gcc.target/i386/bmi2-mulx32-1.c: Likewise. * gcc.target/i386/bmi2-mulx32-1a.c: Likewise. * gcc.target/i386/bmi2-mulx64-1.c: Likewise. * gcc.target/i386/bmi2-mulx64-1a.c: Likewise. * gcc.target/i386/bmi2-pdep32-1.c: Likewise. * gcc.target/i386/bmi2-pdep32-1a.c: Likewise. * gcc.target/i386/bmi2-pdep64-1.c: Likewise. * gcc.target/i386/bmi2-pdep64-1a.c: Likewise. * gcc.target/i386/bmi2-pext32-1.c: Likewise. * gcc.target/i386/bmi2-pext32-1a.c: Likewise. * gcc.target/i386/bmi2-pext64-1.c: Likewise. * gcc.target/i386/bmi2-pext64-1a.c: Likewise. * gcc.target/i386/bmi2-rorx32-1.c: Likewise. * gcc.target/i386/bmi2-rorx32-1a.c: Likewise. * gcc.target/i386/bmi2-rorx64-1.c: Likewise. * gcc.target/i386/bmi2-rorx64-1a.c: Likewise. * gcc.target/i386/bmi2-sarx32-1.c: Likewise. * gcc.target/i386/bmi2-sarx32-1a.c: Likewise. * gcc.target/i386/bmi2-sarx64-1.c: Likewise. * gcc.target/i386/bmi2-sarx64-1a.c: Likewise. * gcc.target/i386/bmi2-shlx32-1.c: Likewise. * gcc.target/i386/bmi2-shlx32-1a.c: Likewise. * gcc.target/i386/bmi2-shlx64-1.c: Likewise. * gcc.target/i386/bmi2-shlx64-1a.c: Likewise. * gcc.target/i386/bmi2-shrx32-1.c: Likewise. * gcc.target/i386/bmi2-shrx32-1a.c: Likewise. * gcc.target/i386/bmi2-shrx64-1.c: Likewise. * gcc.target/i386/bmi2-shrx64-1a.c: Likewise. * gcc.target/i386/i386.exp (check_effective_target_bmi2): New. * gcc.target/i386/sse-12.c: Add BMI2. * gcc.target/i386/sse-13.c: Likewise. * gcc.target/i386/sse-14.c: Likewise. * gcc.target/i386/sse-22.c: Likewise. * gcc.target/i386/sse-23.c: Likewise. From-SVN: r178001
This commit is contained in:
parent
30d1352ecc
commit
82feeb8d68
|
|
@ -1,3 +1,81 @@
|
|||
2011-08-23 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* config/i386/i386.md (type): Add imulx, ishiftx and rotatex.
|
||||
(length_immediate): Handle imulx, ishiftx and rotatex.
|
||||
(imm_disp): Ditto.
|
||||
(isa): Add bmi2.
|
||||
(enabled): Handle bmi2.
|
||||
(w): New mode attribute.
|
||||
(*mul<mode><dwi>3): Split from *<u>mul<mode><dwi>3.
|
||||
(*umul<mode><dwi>3): Ditto. Add imulx BMI2 alternative.
|
||||
(*bmi2_umulditi3_1): New insn pattern.
|
||||
(*bmi2_umulsidi3_1): Ditto.
|
||||
(*umul<mode><dwi>3 splitter): New splitter to avoid flags
|
||||
dependency.
|
||||
(*bmi2_ashl<mode>3_1): New insn pattern.
|
||||
(*ashl<mode>3_1): Add ishiftx BMI2 alternative.
|
||||
(*ashl<mode>3_1 splitter): New splitter to avoid flags
|
||||
dependency.
|
||||
(*bmi2_ashlsi3_1_zext): New insn pattern.
|
||||
(*ashlsi3_1_zext): Add ishiftx BMI2 alternative.
|
||||
(*ashlsi3_1_zext splitter): New splitter to avoid flags
|
||||
dependency.
|
||||
(*bmi2_<shiftrt_insn><mode>3_1): New insn pattern.
|
||||
(*<shiftrt_insn><mode>3_1): Add ishiftx BMI2 alternative.
|
||||
(*<shiftrt_insn><mode>3_1 splitter): New splitter to avoid
|
||||
flags dependency.
|
||||
(*bmi2_<shiftrt_insn>si3_1_zext): New insn pattern.
|
||||
(*<shiftrt_insn>si3_1_zext): Add ishiftx BMI2 alternative.
|
||||
(*<shiftrt_insn>si3_1_zext splitter): New splitter to avoid
|
||||
flags dependency.
|
||||
(*bmi2_rorx<mode>3_1): New insn pattern.
|
||||
(*<rotate_insn><mode>3_1): Add rotatex BMI2 alternative.
|
||||
(*rotate<mode>3_1 splitter): New splitter to avoid flags
|
||||
dependency.
|
||||
(*rotatert<mode>3_1 splitter): Ditto.
|
||||
(*bmi2_rorxsi3_1_zext): New insn pattern.
|
||||
(*<rotate_insn>si3_1_zext): Add rotatex BMI2 alternative.
|
||||
(*rotatesi3_1_zext splitter): New splitter to avoid flags
|
||||
dependency.
|
||||
(*rotatertsi3_1_zext splitter): Ditto.
|
||||
|
||||
2011-08-23 Kirill Yukhin <kirill.yukhin@intel.com>
|
||||
|
||||
* common/config/i386/i386-common.c (OPTION_MASK_ISA_BMI2_SET):
|
||||
New.
|
||||
(OPTION_MASK_ISA_BMI2_UNSET): Likewise.
|
||||
(ix86_handle_option): Handle OPT_mbmi2 case.
|
||||
* config.gcc (i[34567]86-*-*): Add bmi2intrin.h.
|
||||
(x86_64-*-*): Likewise.
|
||||
* config/i386/bmi2intrin.h: New file.
|
||||
* config/i386/cpuid.h (bit_BMI2): New.
|
||||
* config/i386/driver-i386.c (host_detect_local_cpu): Detect
|
||||
BMI2 feature.
|
||||
* config/i386/i386-c.c (ix86_target_macros_internal):
|
||||
Conditionally define __BMI2__.
|
||||
* config/i386/i386.c (ix86_option_override_internal): Define PTA_BMI2.
|
||||
Handle BMI2 option.
|
||||
(ix86_valid_target_attribute_inner_p): Handle BMI2 option.
|
||||
(print_reg): New code.
|
||||
(ix86_print_operand): Likewise.
|
||||
(ix86_builtins): Add IX86_BUILTIN_BZHI32, IX86_BUILTIN_BZHI64,
|
||||
IX86_BUILTIN_PDEP32, IX86_BUILTIN_PDEP64, IX86_BUILTIN_PEXT32,
|
||||
IX86_BUILTIN_PEXT64.
|
||||
(bdesc_args): Add IX86_BUILTIN_BZHI32, IX86_BUILTIN_BZHI64,
|
||||
IX86_BUILTIN_PDEP32, IX86_BUILTIN_PDEP64, IX86_BUILTIN_PEXT32,
|
||||
IX86_BUILTIN_PEXT64.
|
||||
* config/i386/i386.h (TARGET_BMI2): New.
|
||||
* config/i386/i386.md (UNSPEC_PDEP): New.
|
||||
(UNSPEC_PEXT): Likewise.
|
||||
(*bmi2_bzhi_<mode>3): Likewise.
|
||||
(*bmi2_pdep_<mode>3): Likewise.
|
||||
(*bmi2_pext_<mode>3): Likewise.
|
||||
* config/i386/i386.opt (mbmi2): New.
|
||||
* config/i386/x86intrin.h: Include bmi2intrin.h when __BMI2__
|
||||
is defined.
|
||||
* doc/extend.texi: Document BMI2 built-in functions.
|
||||
* doc/invoke.texi: Document -mbmi2.
|
||||
|
||||
2011-08-23 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR middle-end/50161
|
||||
|
|
|
|||
|
|
@ -79,6 +79,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
(OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
|
||||
|
||||
#define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
|
||||
#define OPTION_MASK_ISA_BMI2_SET OPTION_MASK_ISA_BMI2
|
||||
#define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
|
||||
#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
|
||||
#define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
|
||||
|
|
@ -137,6 +138,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
|
||||
#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
|
||||
#define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
|
||||
#define OPTION_MASK_ISA_BMI2_UNSET OPTION_MASK_ISA_BMI2
|
||||
#define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
|
||||
#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
|
||||
#define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
|
||||
|
|
@ -395,6 +397,19 @@ ix86_handle_option (struct gcc_options *opts,
|
|||
}
|
||||
return true;
|
||||
|
||||
case OPT_mbmi2:
|
||||
if (value)
|
||||
{
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2_SET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI2_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_BMI2_UNSET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI2_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mtbm:
|
||||
if (value)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -352,7 +352,8 @@ i[34567]86-*-*)
|
|||
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
|
||||
immintrin.h x86intrin.h avxintrin.h xopintrin.h
|
||||
ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
|
||||
lzcntintrin.h bmiintrin.h tbmintrin.h avx2intrin.h"
|
||||
lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h
|
||||
avx2intrin.h"
|
||||
;;
|
||||
x86_64-*-*)
|
||||
cpu_type=i386
|
||||
|
|
@ -364,7 +365,8 @@ x86_64-*-*)
|
|||
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
|
||||
immintrin.h x86intrin.h avxintrin.h xopintrin.h
|
||||
ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
|
||||
lzcntintrin.h bmiintrin.h tbmintrin.h avx2intrin.h"
|
||||
lzcntintrin.h bmiintrin.h tbmintrin.h bmi2intrin.h
|
||||
avx2intrin.h"
|
||||
need_64bit_hwint=yes
|
||||
;;
|
||||
ia64-*-*)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,81 @@
|
|||
/* Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86INTRIN_H_INCLUDED
|
||||
# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __BMI2__
|
||||
# error "BMI2 instruction set not enabled"
|
||||
#endif /* __BMI2__ */
|
||||
|
||||
#ifndef _BMI2INTRIN_H_INCLUDED
|
||||
#define _BMI2INTRIN_H_INCLUDED
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bzhi_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_bzhi_si (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pdep_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_pdep_si (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pext_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_pext_si (__X, __Y);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bzhi_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_bzhi_di (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pdep_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_pdep_di (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pext_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_pext_di (__X, __Y);
|
||||
}
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#endif /* _BMI2INTRIN_H_INCLUDED */
|
||||
|
|
@ -67,6 +67,7 @@
|
|||
#define bit_FSGSBASE (1 << 0)
|
||||
#define bit_BMI (1 << 3)
|
||||
#define bit_AVX2 (1 << 5)
|
||||
#define bit_BMI2 (1 << 8)
|
||||
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
/* %ebx may be the PIC register. */
|
||||
|
|
|
|||
|
|
@ -396,7 +396,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
|||
unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
|
||||
unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
|
||||
unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
|
||||
unsigned int has_bmi = 0, has_tbm = 0, has_lzcnt = 0;
|
||||
unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
|
||||
|
||||
bool arch;
|
||||
|
||||
|
|
@ -475,6 +475,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
|||
|
||||
has_bmi = ebx & bit_BMI;
|
||||
has_avx2 = ebx & bit_AVX2;
|
||||
has_bmi2 = ebx & bit_BMI2;
|
||||
}
|
||||
|
||||
if (!arch)
|
||||
|
|
@ -715,6 +716,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
|||
const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
|
||||
const char *xop = has_xop ? " -mxop" : " -mno-xop";
|
||||
const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
|
||||
const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
|
||||
const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
|
||||
const char *avx = has_avx ? " -mavx" : " -mno-avx";
|
||||
const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
|
||||
|
|
@ -723,8 +725,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
|||
const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
|
||||
|
||||
options = concat (options, cx16, sahf, movbe, ase, pclmul,
|
||||
popcnt, abm, lwp, fma, fma4, xop, bmi, tbm,
|
||||
avx2, avx, sse4_2, sse4_1, lzcnt, NULL);
|
||||
popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
|
||||
tbm, avx, avx2, sse4_2, sse4_1, lzcnt, NULL);
|
||||
}
|
||||
|
||||
done:
|
||||
|
|
|
|||
|
|
@ -273,6 +273,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
|||
def_or_undef (parse_in, "__ABM__");
|
||||
if (isa_flag & OPTION_MASK_ISA_BMI)
|
||||
def_or_undef (parse_in, "__BMI__");
|
||||
if (isa_flag & OPTION_MASK_ISA_BMI2)
|
||||
def_or_undef (parse_in, "__BMI2__");
|
||||
if (isa_flag & OPTION_MASK_ISA_LZCNT)
|
||||
def_or_undef (parse_in, "__LZCNT__");
|
||||
if (isa_flag & OPTION_MASK_ISA_TBM)
|
||||
|
|
|
|||
|
|
@ -2664,6 +2664,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
|
|||
{ "-mmmx", OPTION_MASK_ISA_MMX },
|
||||
{ "-mabm", OPTION_MASK_ISA_ABM },
|
||||
{ "-mbmi", OPTION_MASK_ISA_BMI },
|
||||
{ "-mbmi2", OPTION_MASK_ISA_BMI2 },
|
||||
{ "-mlzcnt", OPTION_MASK_ISA_LZCNT },
|
||||
{ "-mtbm", OPTION_MASK_ISA_TBM },
|
||||
{ "-mpopcnt", OPTION_MASK_ISA_POPCNT },
|
||||
|
|
@ -2921,6 +2922,7 @@ ix86_option_override_internal (bool main_args_p)
|
|||
#define PTA_TBM (HOST_WIDE_INT_1 << 28)
|
||||
#define PTA_XOP (HOST_WIDE_INT_1 << 29)
|
||||
#define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
|
||||
#define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
|
||||
/* if this reaches 64, need to widen struct pta flags below */
|
||||
|
||||
static struct pta
|
||||
|
|
@ -2978,8 +2980,8 @@ ix86_option_override_internal (bool main_args_p)
|
|||
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
|
||||
| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
|
||||
| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
|
||||
| PTA_RDRND | PTA_F16C | PTA_BMI | PTA_LZCNT | PTA_FMA
|
||||
| PTA_MOVBE},
|
||||
| PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
|
||||
| PTA_FMA | PTA_MOVBE},
|
||||
{"atom", PROCESSOR_ATOM, CPU_ATOM,
|
||||
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
|
||||
| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
|
||||
|
|
@ -3300,6 +3302,9 @@ ix86_option_override_internal (bool main_args_p)
|
|||
if (processor_alias_table[i].flags & PTA_TBM
|
||||
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
|
||||
ix86_isa_flags |= OPTION_MASK_ISA_TBM;
|
||||
if (processor_alias_table[i].flags & PTA_BMI2
|
||||
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
|
||||
ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
|
||||
if (processor_alias_table[i].flags & PTA_CX16
|
||||
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
|
||||
ix86_isa_flags |= OPTION_MASK_ISA_CX16;
|
||||
|
|
@ -4053,6 +4058,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
|
|||
IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
|
||||
IX86_ATTR_ISA ("abm", OPT_mabm),
|
||||
IX86_ATTR_ISA ("bmi", OPT_mbmi),
|
||||
IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
|
||||
IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
|
||||
IX86_ATTR_ISA ("tbm", OPT_mtbm),
|
||||
IX86_ATTR_ISA ("aes", OPT_maes),
|
||||
|
|
@ -24242,6 +24248,13 @@ enum ix86_builtins
|
|||
IX86_BUILTIN_BEXTRI32,
|
||||
IX86_BUILTIN_BEXTRI64,
|
||||
|
||||
/* BMI2 instructions. */
|
||||
IX86_BUILTIN_BZHI32,
|
||||
IX86_BUILTIN_BZHI64,
|
||||
IX86_BUILTIN_PDEP32,
|
||||
IX86_BUILTIN_PDEP64,
|
||||
IX86_BUILTIN_PEXT32,
|
||||
IX86_BUILTIN_PEXT64,
|
||||
|
||||
/* FSGSBASE instructions. */
|
||||
IX86_BUILTIN_RDFSBASE32,
|
||||
|
|
@ -25375,6 +25388,14 @@ static const struct builtin_description bdesc_args[] =
|
|||
{ OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
|
||||
{ OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
|
||||
{ OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
|
||||
|
||||
/* BMI2 */
|
||||
{ OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
|
||||
{ OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
|
||||
{ OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
|
||||
{ OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
|
||||
{ OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
|
||||
{ OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
|
||||
};
|
||||
|
||||
/* FMA4 and XOP. */
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|||
#define TARGET_ROUND OPTION_ISA_ROUND
|
||||
#define TARGET_ABM OPTION_ISA_ABM
|
||||
#define TARGET_BMI OPTION_ISA_BMI
|
||||
#define TARGET_BMI2 OPTION_ISA_BMI2
|
||||
#define TARGET_LZCNT OPTION_ISA_LZCNT
|
||||
#define TARGET_TBM OPTION_ISA_TBM
|
||||
#define TARGET_POPCNT OPTION_ISA_POPCNT
|
||||
|
|
|
|||
|
|
@ -244,6 +244,10 @@
|
|||
|
||||
;; For RDRAND support
|
||||
UNSPEC_RDRAND
|
||||
|
||||
;; For BMI2 support
|
||||
UNSPEC_PDEP
|
||||
UNSPEC_PEXT
|
||||
])
|
||||
|
||||
(define_c_enum "unspecv" [
|
||||
|
|
@ -385,7 +389,7 @@
|
|||
(define_attr "type"
|
||||
"other,multi,
|
||||
alu,alu1,negnot,imov,imovx,lea,
|
||||
incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
|
||||
incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,imul,imulx,idiv,
|
||||
icmp,test,ibr,setcc,icmov,
|
||||
push,pop,call,callv,leave,
|
||||
str,bitmanip,
|
||||
|
|
@ -418,12 +422,12 @@
|
|||
;; The (bounding maximum) length of an instruction immediate.
|
||||
(define_attr "length_immediate" ""
|
||||
(cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
|
||||
bitmanip")
|
||||
bitmanip,imulx")
|
||||
(const_int 0)
|
||||
(eq_attr "unit" "i387,sse,mmx")
|
||||
(const_int 0)
|
||||
(eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1,
|
||||
imul,icmp,push,pop")
|
||||
(eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1,
|
||||
rotate,rotatex,rotate1,imul,icmp,push,pop")
|
||||
(symbol_ref "ix86_attr_length_immediate_default (insn, true)")
|
||||
(eq_attr "type" "imov,test")
|
||||
(symbol_ref "ix86_attr_length_immediate_default (insn, false)")
|
||||
|
|
@ -683,7 +687,7 @@
|
|||
(and (match_operand 0 "memory_displacement_operand" "")
|
||||
(match_operand 1 "immediate_operand" "")))
|
||||
(const_string "true")
|
||||
(and (eq_attr "type" "alu,ishift,rotate,imul,idiv")
|
||||
(and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv")
|
||||
(and (match_operand 0 "memory_displacement_operand" "")
|
||||
(match_operand 2 "immediate_operand" "")))
|
||||
(const_string "true")
|
||||
|
|
@ -707,12 +711,13 @@
|
|||
(define_attr "movu" "0,1" (const_string "0"))
|
||||
|
||||
;; Used to control the "enabled" attribute on a per-instruction basis.
|
||||
(define_attr "isa" "base,noavx,avx"
|
||||
(define_attr "isa" "base,noavx,avx,bmi2"
|
||||
(const_string "base"))
|
||||
|
||||
(define_attr "enabled" ""
|
||||
(cond [(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
|
||||
(eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
|
||||
(eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
|
||||
]
|
||||
(const_int 1)))
|
||||
|
||||
|
|
@ -6853,16 +6858,103 @@
|
|||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"TARGET_QIMODE_MATH")
|
||||
|
||||
(define_insn "*<u>mul<mode><dwi>3_1"
|
||||
(define_insn "*bmi2_umulditi3_1"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(mult:DI
|
||||
(match_operand:DI 2 "nonimmediate_operand" "%d")
|
||||
(match_operand:DI 3 "nonimmediate_operand" "rm")))
|
||||
(set (match_operand:DI 1 "register_operand" "=r")
|
||||
(truncate:DI
|
||||
(lshiftrt:TI
|
||||
(mult:TI (zero_extend:TI (match_dup 2))
|
||||
(zero_extend:TI (match_dup 3)))
|
||||
(const_int 64))))]
|
||||
"TARGET_64BIT && TARGET_BMI2
|
||||
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
|
||||
"mulx\t{%3, %0, %1|%1, %0, %3}"
|
||||
[(set_attr "type" "imulx")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "*bmi2_umulsidi3_1"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(mult:SI
|
||||
(match_operand:SI 2 "nonimmediate_operand" "%d")
|
||||
(match_operand:SI 3 "nonimmediate_operand" "rm")))
|
||||
(set (match_operand:SI 1 "register_operand" "=r")
|
||||
(truncate:SI
|
||||
(lshiftrt:DI
|
||||
(mult:DI (zero_extend:DI (match_dup 2))
|
||||
(zero_extend:DI (match_dup 3)))
|
||||
(const_int 32))))]
|
||||
"!TARGET_64BIT && TARGET_BMI2
|
||||
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
|
||||
"mulx\t{%3, %0, %1|%1, %0, %3}"
|
||||
[(set_attr "type" "imulx")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*umul<mode><dwi>3_1"
|
||||
[(set (match_operand:<DWI> 0 "register_operand" "=A,r")
|
||||
(mult:<DWI>
|
||||
(zero_extend:<DWI>
|
||||
(match_operand:DWIH 1 "nonimmediate_operand" "%0,d"))
|
||||
(zero_extend:<DWI>
|
||||
(match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"!(MEM_P (operands[1]) && MEM_P (operands[2]))"
|
||||
"@
|
||||
mul{<imodesuffix>}\t%2
|
||||
#"
|
||||
[(set_attr "isa" "base,bmi2")
|
||||
(set_attr "type" "imul,imulx")
|
||||
(set_attr "length_immediate" "0,*")
|
||||
(set (attr "athlon_decode")
|
||||
(cond [(eq_attr "alternative" "0")
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double"))]
|
||||
(const_string "*")))
|
||||
(set_attr "amdfam10_decode" "double,*")
|
||||
(set_attr "bdver1_decode" "direct,*")
|
||||
(set_attr "prefix" "orig,vex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
;; Convert mul to the mulx pattern to avoid flags dependency.
|
||||
(define_split
|
||||
[(set (match_operand:<DWI> 0 "register_operand" "")
|
||||
(mult:<DWI>
|
||||
(zero_extend:<DWI>
|
||||
(match_operand:DWIH 1 "register_operand" ""))
|
||||
(zero_extend:<DWI>
|
||||
(match_operand:DWIH 2 "nonimmediate_operand" ""))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_BMI2 && reload_completed
|
||||
&& true_regnum (operands[1]) == DX_REG"
|
||||
[(parallel [(set (match_dup 3)
|
||||
(mult:DWIH (match_dup 1) (match_dup 2)))
|
||||
(set (match_dup 4)
|
||||
(truncate:DWIH
|
||||
(lshiftrt:<DWI>
|
||||
(mult:<DWI> (zero_extend:<DWI> (match_dup 1))
|
||||
(zero_extend:<DWI> (match_dup 2)))
|
||||
(match_dup 5))))])]
|
||||
{
|
||||
split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
|
||||
|
||||
operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
|
||||
})
|
||||
|
||||
(define_insn "*mul<mode><dwi>3_1"
|
||||
[(set (match_operand:<DWI> 0 "register_operand" "=A")
|
||||
(mult:<DWI>
|
||||
(any_extend:<DWI>
|
||||
(sign_extend:<DWI>
|
||||
(match_operand:DWIH 1 "nonimmediate_operand" "%0"))
|
||||
(any_extend:<DWI>
|
||||
(sign_extend:<DWI>
|
||||
(match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"!(MEM_P (operands[1]) && MEM_P (operands[2]))"
|
||||
"<sgnprefix>mul{<imodesuffix>}\t%2"
|
||||
"imul{<imodesuffix>}\t%2"
|
||||
[(set_attr "type" "imul")
|
||||
(set_attr "length_immediate" "0")
|
||||
(set (attr "athlon_decode")
|
||||
|
|
@ -9060,16 +9152,26 @@
|
|||
[(set_attr "type" "ishift")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*bmi2_ashl<mode>3_1"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r")
|
||||
(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
|
||||
(match_operand:SWI48 2 "register_operand" "r")))]
|
||||
"TARGET_BMI2"
|
||||
"shlx\t{%2, %1, %0|%0, %1, %2}"
|
||||
[(set_attr "type" "ishiftx")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*ashl<mode>3_1"
|
||||
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
|
||||
(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l")
|
||||
(match_operand:QI 2 "nonmemory_operand" "c<S>,M")))
|
||||
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
|
||||
(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm")
|
||||
(match_operand:QI 2 "nonmemory_operand" "c<S>,M,r")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
|
||||
{
|
||||
switch (get_attr_type (insn))
|
||||
{
|
||||
case TYPE_LEA:
|
||||
case TYPE_ISHIFTX:
|
||||
return "#";
|
||||
|
||||
case TYPE_ALU:
|
||||
|
|
@ -9085,9 +9187,12 @@
|
|||
return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
|
||||
}
|
||||
}
|
||||
[(set (attr "type")
|
||||
[(set_attr "isa" "base,base,bmi2")
|
||||
(set (attr "type")
|
||||
(cond [(eq_attr "alternative" "1")
|
||||
(const_string "lea")
|
||||
(eq_attr "alternative" "2")
|
||||
(const_string "ishiftx")
|
||||
(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
|
||||
(const_int 0))
|
||||
(match_operand 0 "register_operand" ""))
|
||||
|
|
@ -9106,17 +9211,39 @@
|
|||
(const_string "*")))
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*ashlsi3_1_zext"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r,r")
|
||||
;; Convert shift to the shiftx pattern to avoid flags dependency.
|
||||
(define_split
|
||||
[(set (match_operand:SWI48 0 "register_operand" "")
|
||||
(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
|
||||
(match_operand:QI 2 "register_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_BMI2 && reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(ashift:SWI48 (match_dup 1) (match_dup 2)))]
|
||||
"operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
|
||||
|
||||
(define_insn "*bmi2_ashlsi3_1_zext"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(zero_extend:DI
|
||||
(ashift:SI (match_operand:SI 1 "register_operand" "0,l")
|
||||
(match_operand:QI 2 "nonmemory_operand" "cI,M"))))
|
||||
(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
|
||||
(match_operand:SI 2 "register_operand" "r"))))]
|
||||
"TARGET_64BIT && TARGET_BMI2"
|
||||
"shlx\t{%2, %1, %k0|%k0, %1, %2}"
|
||||
[(set_attr "type" "ishiftx")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*ashlsi3_1_zext"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r,r,r")
|
||||
(zero_extend:DI
|
||||
(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm")
|
||||
(match_operand:QI 2 "nonmemory_operand" "cI,M,r"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
|
||||
{
|
||||
switch (get_attr_type (insn))
|
||||
{
|
||||
case TYPE_LEA:
|
||||
case TYPE_ISHIFTX:
|
||||
return "#";
|
||||
|
||||
case TYPE_ALU:
|
||||
|
|
@ -9131,9 +9258,12 @@
|
|||
return "sal{l}\t{%2, %k0|%k0, %2}";
|
||||
}
|
||||
}
|
||||
[(set (attr "type")
|
||||
[(set_attr "isa" "base,base,bmi2")
|
||||
(set (attr "type")
|
||||
(cond [(eq_attr "alternative" "1")
|
||||
(const_string "lea")
|
||||
(eq_attr "alternative" "2")
|
||||
(const_string "ishiftx")
|
||||
(and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
|
||||
(const_int 0))
|
||||
(match_operand 2 "const1_operand" ""))
|
||||
|
|
@ -9151,6 +9281,18 @@
|
|||
(const_string "*")))
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
;; Convert shift to the shiftx pattern to avoid flags dependency.
|
||||
(define_split
|
||||
[(set (match_operand:DI 0 "register_operand" "")
|
||||
(zero_extend:DI
|
||||
(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
|
||||
(match_operand:QI 2 "register_operand" ""))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT && TARGET_BMI2 && reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
|
||||
"operands[2] = gen_lowpart (SImode, operands[2]);")
|
||||
|
||||
(define_insn "*ashlhi3_1"
|
||||
[(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
|
||||
(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
|
||||
|
|
@ -9767,10 +9909,117 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "*bmi2_<shiftrt_insn><mode>3_1"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r")
|
||||
(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
|
||||
(match_operand:SWI48 2 "register_operand" "r")))]
|
||||
"TARGET_BMI2"
|
||||
"<shiftrt>x\t{%2, %1, %0|%0, %1, %2}"
|
||||
[(set_attr "type" "ishiftx")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*<shiftrt_insn><mode>3_1"
|
||||
[(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
|
||||
(any_shiftrt:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
|
||||
(match_operand:QI 2 "nonmemory_operand" "c<S>")))
|
||||
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
|
||||
(any_shiftrt:SWI48
|
||||
(match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
|
||||
(match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
|
||||
{
|
||||
switch (get_attr_type (insn))
|
||||
{
|
||||
case TYPE_ISHIFTX:
|
||||
return "#";
|
||||
|
||||
default:
|
||||
if (operands[2] == const1_rtx
|
||||
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
|
||||
return "<shiftrt>{<imodesuffix>}\t%0";
|
||||
else
|
||||
return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
|
||||
}
|
||||
}
|
||||
[(set_attr "isa" "base,bmi2")
|
||||
(set_attr "type" "ishift,ishiftx")
|
||||
(set (attr "length_immediate")
|
||||
(if_then_else
|
||||
(and (match_operand 2 "const1_operand" "")
|
||||
(ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
|
||||
(const_int 0)))
|
||||
(const_string "0")
|
||||
(const_string "*")))
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
;; Convert shift to the shiftx pattern to avoid flags dependency.
|
||||
(define_split
|
||||
[(set (match_operand:SWI48 0 "register_operand" "")
|
||||
(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
|
||||
(match_operand:QI 2 "register_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_BMI2 && reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
|
||||
"operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
|
||||
|
||||
(define_insn "*bmi2_<shiftrt_insn>si3_1_zext"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(zero_extend:DI
|
||||
(any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
|
||||
(match_operand:SI 2 "register_operand" "r"))))]
|
||||
"TARGET_64BIT && TARGET_BMI2"
|
||||
"<shiftrt>x\t{%2, %1, %k0|%k0, %1, %2}"
|
||||
[(set_attr "type" "ishiftx")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*<shiftrt_insn>si3_1_zext"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r,r")
|
||||
(zero_extend:DI
|
||||
(any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
|
||||
(match_operand:QI 2 "nonmemory_operand" "cI,r"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
|
||||
{
|
||||
switch (get_attr_type (insn))
|
||||
{
|
||||
case TYPE_ISHIFTX:
|
||||
return "#";
|
||||
|
||||
default:
|
||||
if (operands[2] == const1_rtx
|
||||
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
|
||||
return "<shiftrt>{l}\t%k0";
|
||||
else
|
||||
return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
|
||||
}
|
||||
}
|
||||
[(set_attr "isa" "base,bmi2")
|
||||
(set_attr "type" "ishift,ishiftx")
|
||||
(set (attr "length_immediate")
|
||||
(if_then_else
|
||||
(and (match_operand 2 "const1_operand" "")
|
||||
(ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
|
||||
(const_int 0)))
|
||||
(const_string "0")
|
||||
(const_string "*")))
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
;; Convert shift to the shiftx pattern to avoid flags dependency.
|
||||
(define_split
|
||||
[(set (match_operand:DI 0 "register_operand" "")
|
||||
(zero_extend:DI
|
||||
(any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
|
||||
(match_operand:QI 2 "register_operand" ""))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT && TARGET_BMI2 && reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
|
||||
"operands[2] = gen_lowpart (SImode, operands[2]);")
|
||||
|
||||
(define_insn "*<shiftrt_insn><mode>3_1"
|
||||
[(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
|
||||
(any_shiftrt:SWI12
|
||||
(match_operand:SWI12 1 "nonimmediate_operand" "0")
|
||||
(match_operand:QI 2 "nonmemory_operand" "c<S>")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
|
||||
{
|
||||
|
|
@ -9790,30 +10039,6 @@
|
|||
(const_string "*")))
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*<shiftrt_insn>si3_1_zext"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(zero_extend:DI
|
||||
(any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
|
||||
(match_operand:QI 2 "nonmemory_operand" "cI"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
|
||||
{
|
||||
if (operands[2] == const1_rtx
|
||||
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
|
||||
return "<shiftrt>{l}\t%k0";
|
||||
else
|
||||
return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
|
||||
}
|
||||
[(set_attr "type" "ishift")
|
||||
(set (attr "length_immediate")
|
||||
(if_then_else
|
||||
(and (match_operand 2 "const1_operand" "")
|
||||
(ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
|
||||
(const_int 0)))
|
||||
(const_string "0")
|
||||
(const_string "*")))
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*<shiftrt_insn>qi3_1_slp"
|
||||
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
|
||||
(any_shiftrt:QI (match_dup 0)
|
||||
|
|
@ -10064,10 +10289,143 @@
|
|||
split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
|
||||
})
|
||||
|
||||
(define_insn "*bmi2_rorx<mode>3_1"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r")
|
||||
(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
|
||||
(match_operand:QI 2 "immediate_operand" "<S>")))]
|
||||
"TARGET_BMI2"
|
||||
"rorx\t{%2, %1, %0|%0, %1, %2}"
|
||||
[(set_attr "type" "rotatex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*<rotate_insn><mode>3_1"
|
||||
[(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
|
||||
(any_rotate:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
|
||||
(match_operand:QI 2 "nonmemory_operand" "c<S>")))
|
||||
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
|
||||
(any_rotate:SWI48
|
||||
(match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
|
||||
(match_operand:QI 2 "nonmemory_operand" "c<S>,<S>")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
|
||||
{
|
||||
switch (get_attr_type (insn))
|
||||
{
|
||||
case TYPE_ROTATEX:
|
||||
return "#";
|
||||
|
||||
default:
|
||||
if (operands[2] == const1_rtx
|
||||
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
|
||||
return "<rotate>{<imodesuffix>}\t%0";
|
||||
else
|
||||
return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
|
||||
}
|
||||
}
|
||||
[(set_attr "isa" "base,bmi2")
|
||||
(set_attr "type" "rotate,rotatex")
|
||||
(set (attr "length_immediate")
|
||||
(if_then_else
|
||||
(and (eq_attr "type" "rotate")
|
||||
(and (match_operand 2 "const1_operand" "")
|
||||
(ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
|
||||
(const_int 0))))
|
||||
(const_string "0")
|
||||
(const_string "*")))
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
;; Convert rotate to the rotatex pattern to avoid flags dependency.
|
||||
(define_split
|
||||
[(set (match_operand:SWI48 0 "register_operand" "")
|
||||
(rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
|
||||
(match_operand:QI 2 "immediate_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_BMI2 && reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(rotatert:SWI48 (match_dup 1) (match_dup 2)))]
|
||||
{
|
||||
operands[2]
|
||||
= GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - INTVAL (operands[2]));
|
||||
})
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:SWI48 0 "register_operand" "")
|
||||
(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
|
||||
(match_operand:QI 2 "immediate_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_BMI2 && reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(rotatert:SWI48 (match_dup 1) (match_dup 2)))])
|
||||
|
||||
(define_insn "*bmi2_rorxsi3_1_zext"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(zero_extend:DI
|
||||
(rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
|
||||
(match_operand:QI 2 "immediate_operand" "I"))))]
|
||||
"TARGET_64BIT && TARGET_BMI2"
|
||||
"rorx\t{%2, %1, %k0|%k0, %1, %2}"
|
||||
[(set_attr "type" "rotatex")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*<rotate_insn>si3_1_zext"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r,r")
|
||||
(zero_extend:DI
|
||||
(any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
|
||||
(match_operand:QI 2 "nonmemory_operand" "cI,I"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
|
||||
{
|
||||
switch (get_attr_type (insn))
|
||||
{
|
||||
case TYPE_ROTATEX:
|
||||
return "#";
|
||||
|
||||
default:
|
||||
if (operands[2] == const1_rtx
|
||||
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
|
||||
return "<rotate>{l}\t%k0";
|
||||
else
|
||||
return "<rotate>{l}\t{%2, %k0|%k0, %2}";
|
||||
}
|
||||
}
|
||||
[(set_attr "isa" "base,bmi2")
|
||||
(set_attr "type" "rotate,rotatex")
|
||||
(set (attr "length_immediate")
|
||||
(if_then_else
|
||||
(and (eq_attr "type" "rotate")
|
||||
(and (match_operand 2 "const1_operand" "")
|
||||
(ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
|
||||
(const_int 0))))
|
||||
(const_string "0")
|
||||
(const_string "*")))
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
;; Convert rotate to the rotatex pattern to avoid flags dependency.
|
||||
(define_split
|
||||
[(set (match_operand:DI 0 "register_operand" "")
|
||||
(zero_extend:DI
|
||||
(rotate:SI (match_operand:SI 1 "nonimmediate_operand" "")
|
||||
(match_operand:QI 2 "immediate_operand" ""))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT && TARGET_BMI2 && reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
|
||||
{
|
||||
operands[2]
|
||||
= GEN_INT (GET_MODE_BITSIZE (SImode) - INTVAL (operands[2]));
|
||||
})
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:DI 0 "register_operand" "")
|
||||
(zero_extend:DI
|
||||
(rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "")
|
||||
(match_operand:QI 2 "immediate_operand" ""))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT && TARGET_BMI2 && reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
|
||||
|
||||
(define_insn "*<rotate_insn><mode>3_1"
|
||||
[(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
|
||||
(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0")
|
||||
(match_operand:QI 2 "nonmemory_operand" "c<S>")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
|
||||
{
|
||||
|
|
@ -10087,30 +10445,6 @@
|
|||
(const_string "*")))
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*<rotate_insn>si3_1_zext"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(zero_extend:DI
|
||||
(any_rotate:SI (match_operand:SI 1 "register_operand" "0")
|
||||
(match_operand:QI 2 "nonmemory_operand" "cI"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
|
||||
{
|
||||
if (operands[2] == const1_rtx
|
||||
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
|
||||
return "<rotate>{l}\t%k0";
|
||||
else
|
||||
return "<rotate>{l}\t{%2, %k0|%k0, %2}";
|
||||
}
|
||||
[(set_attr "type" "rotate")
|
||||
(set (attr "length_immediate")
|
||||
(if_then_else
|
||||
(and (match_operand 2 "const1_operand" "")
|
||||
(ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
|
||||
(const_int 0)))
|
||||
(const_string "0")
|
||||
(const_string "*")))
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*<rotate_insn>qi3_1_slp"
|
||||
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
|
||||
(any_rotate:QI (match_dup 0)
|
||||
|
|
@ -11951,6 +12285,41 @@
|
|||
[(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
;; BMI2 instructions.
|
||||
(define_insn "bmi2_bzhi_<mode>3"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r")
|
||||
(and:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
|
||||
(lshiftrt:SWI48 (const_int -1)
|
||||
(match_operand:SWI48 2 "register_operand" "r"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_BMI2"
|
||||
"bzhi\t{%2, %1, %0|%0, %1, %2}"
|
||||
[(set_attr "type" "bitmanip")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "bmi2_pdep_<mode>3"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r")
|
||||
(unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
|
||||
(match_operand:SWI48 2 "register_operand" "r")]
|
||||
UNSPEC_PDEP))]
|
||||
"TARGET_BMI2"
|
||||
"pdep\t{%2, %1, %0|%0, %1, %2}"
|
||||
[(set_attr "type" "bitmanip")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "bmi2_pext_<mode>3"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r")
|
||||
(unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
|
||||
(match_operand:SWI48 2 "register_operand" "r")]
|
||||
UNSPEC_PEXT))]
|
||||
"TARGET_BMI2"
|
||||
"pext\t{%2, %1, %0|%0, %1, %2}"
|
||||
[(set_attr "type" "bitmanip")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
;; TBM instructions.
|
||||
(define_insn "tbm_bextri_<mode>"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r")
|
||||
|
|
@ -12350,6 +12719,7 @@
|
|||
"xor{b}\t{%h0, %b0|%b0, %h0}"
|
||||
[(set_attr "length" "2")
|
||||
(set_attr "mode" "HI")])
|
||||
|
||||
|
||||
;; Thread-local storage patterns for ELF.
|
||||
;;
|
||||
|
|
|
|||
|
|
@ -493,6 +493,10 @@ mbmi
|
|||
Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save
|
||||
Support BMI built-in functions and code generation
|
||||
|
||||
mbmi2
|
||||
Target Report Mask(ISA_BMI2) Var(ix86_isa_flags) Save
|
||||
Support BMI2 built-in functions and code generation
|
||||
|
||||
mlzcnt
|
||||
Target Report Mask(ISA_LZCNT) Var(ix86_isa_flags) Save
|
||||
Support LZCNT built-in function and code generation
|
||||
|
|
|
|||
|
|
@ -81,6 +81,10 @@
|
|||
#include <bmiintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __BMI2__
|
||||
#include <bmi2intrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __TBM__
|
||||
#include <tbmintrin.h>
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -9883,6 +9883,17 @@ unsigned int __builtin_ia32_bextr_u32(unsigned int, unsigned int);
|
|||
unsigned long long __builtin_ia32_bextr_u64 (unsigned long long, unsigned long long);
|
||||
@end smallexample
|
||||
|
||||
The following built-in functions are available when @option{-mbmi2} is used.
|
||||
All of them generate the machine instruction that is part of the name.
|
||||
@smallexample
|
||||
unsigned int _bzhi_u32 (unsigned int, unsigned int)
|
||||
unsigned int _pdep_u32 (unsigned int, unsigned int)
|
||||
unsigned int _pext_u32 (unsigned int, unsigned int)
|
||||
unsigned long long _bzhi_u64 (unsigned long long, unsigned long long)
|
||||
unsigned long long _pdep_u64 (unsigned long long, unsigned long long)
|
||||
unsigned long long _pext_u64 (unsigned long long, unsigned long long)
|
||||
@end smallexample
|
||||
|
||||
The following built-in functions are available when @option{-mlzcnt} is used.
|
||||
All of them generate the machine instruction that is part of the name.
|
||||
@smallexample
|
||||
|
|
|
|||
|
|
@ -607,7 +607,7 @@ Objective-C and Objective-C++ Dialects}.
|
|||
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
|
||||
-mavx2 -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol
|
||||
-msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlzcnt @gol
|
||||
-mlwp -mthreads -mno-align-stringops -minline-all-stringops @gol
|
||||
-mbmi2 -mlwp -mthreads -mno-align-stringops -minline-all-stringops @gol
|
||||
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
|
||||
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
|
||||
-m96bit-long-double -mregparm=@var{num} -msseregparm @gol
|
||||
|
|
@ -12697,7 +12697,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
|||
@itemx -mabm
|
||||
@itemx -mno-abm
|
||||
@itemx -mbmi
|
||||
@itemx -mbmi2
|
||||
@itemx -mno-bmi
|
||||
@itemx -mno-bmi2
|
||||
@itemx -mlzcnt
|
||||
@itemx -mno-lzcnt
|
||||
@itemx -mtbm
|
||||
|
|
@ -12709,8 +12711,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
|||
@opindex m3dnow
|
||||
@opindex mno-3dnow
|
||||
These switches enable or disable the use of instructions in the MMX, SSE,
|
||||
SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA,
|
||||
SSE4A, FMA4, XOP, LWP, ABM, BMI, LZCNT or 3DNow!@: extended instruction sets.
|
||||
SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, F16C,
|
||||
FMA, SSE4A, FMA4, XOP, LWP, ABM, BMI, BMI2, LZCNT or 3DNow!
|
||||
@: extended instruction sets.
|
||||
These extensions are also available as built-in functions: see
|
||||
@ref{X86 Built-in Functions}, for details of the functions enabled and
|
||||
disabled by these switches.
|
||||
|
|
|
|||
|
|
@ -1,3 +1,46 @@
|
|||
2011-08-23 Kirill Yukhin <kirill.yukhin@intel.com>
|
||||
|
||||
* g++.dg/other/i386-2.C: Add -mbmi2 check.
|
||||
* g++.dg/other/i386-3.C: Likewise.
|
||||
* gcc.target/i386/bmi2-bzhi32-1.c: New testcase.
|
||||
* gcc.target/i386/bmi2-bzhi32-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-bzhi64-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-bzhi64-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-mulx32-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-mulx32-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-mulx64-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-mulx64-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-pdep32-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-pdep32-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-pdep64-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-pdep64-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-pext32-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-pext32-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-pext64-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-pext64-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-rorx32-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-rorx32-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-rorx64-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-rorx64-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-sarx32-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-sarx32-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-sarx64-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-sarx64-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-shlx32-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-shlx32-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-shlx64-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-shlx64-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-shrx32-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-shrx32-1a.c: Likewise.
|
||||
* gcc.target/i386/bmi2-shrx64-1.c: Likewise.
|
||||
* gcc.target/i386/bmi2-shrx64-1a.c: Likewise.
|
||||
* gcc.target/i386/i386.exp (check_effective_target_bmi2): New.
|
||||
* gcc.target/i386/sse-12.c: Add BMI2.
|
||||
* gcc.target/i386/sse-13.c: Likewise.
|
||||
* gcc.target/i386/sse-14.c: Likewise.
|
||||
* gcc.target/i386/sse-22.c: Likewise.
|
||||
* gcc.target/i386/sse-23.c: Likewise.
|
||||
|
||||
2011-08-23 Jason Merrill <jason@redhat.com>
|
||||
|
||||
* g++.dg/template/crash7.C: Adjust expected errors.
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
|
||||
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
|
||||
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,35 @@
|
|||
/* { dg-do run { target { bmi2 } } } */
|
||||
/* { dg-options "-mbmi2 -O2" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
unsigned
|
||||
calc_bzhi_u32 (unsigned a, int l)
|
||||
{
|
||||
unsigned res = a;
|
||||
int i;
|
||||
for (i = 0; i < 32 - l; ++i)
|
||||
res &= ~(1 << (31 - i));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned i;
|
||||
unsigned src = 0xce7ace0f;
|
||||
unsigned res, res_ref;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
src = src * (i + 1);
|
||||
|
||||
res_ref = calc_bzhi_u32 (src, i * 2);
|
||||
res = _bzhi_u32 (src, i * 2);
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mbmi2 -O2 -dp" } */
|
||||
|
||||
#include "bmi2-bzhi32-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_bzhi_si3" 1 } } */
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
|
||||
/* { dg-options "-mbmi2 -O2" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
unsigned long long
|
||||
calc_bzhi_u64 (unsigned long long a, int l)
|
||||
{
|
||||
unsigned long long res = a;
|
||||
int i;
|
||||
for (i = 0; i < 64 - l; ++i)
|
||||
res &= ~(1LL << (63 - i));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned i;
|
||||
unsigned long long src = 0xce7ace0ce7ace0ff;
|
||||
unsigned long long res, res_ref;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
src = src * (i + 1);
|
||||
|
||||
res_ref = calc_bzhi_u64 (src, i * 2);
|
||||
res = _bzhi_u64 (src, i * 2);
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-mbmi2 -O2 -dp" } */
|
||||
|
||||
#include "bmi2-bzhi64-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_bzhi_di3" 1 } } */
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cpuid.h"
|
||||
|
||||
static void bmi2_test (void);
|
||||
|
||||
static void
|
||||
__attribute__ ((noinline))
|
||||
do_test (void)
|
||||
{
|
||||
bmi2_test ();
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
||||
__cpuid_count (7, 0, eax, ebx, ecx, edx);
|
||||
|
||||
/* Run BMI2 test only if host has BMI2 support. */
|
||||
if (ebx & bit_BMI2)
|
||||
{
|
||||
do_test ();
|
||||
#ifdef DEBUG
|
||||
printf ("PASSED\n");
|
||||
#endif
|
||||
}
|
||||
#ifdef DEBUG
|
||||
else
|
||||
printf ("SKIPPED\n");
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
/* { dg-do run { target { bmi2 && { ia32 } } } } */
|
||||
/* { dg-options "-mbmi2 -O2" } */
|
||||
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
unsigned long long
|
||||
calc_mul_u32 (unsigned volatile a, unsigned b)
|
||||
{
|
||||
unsigned long long res = 0;
|
||||
int i;
|
||||
for (i = 0; i < b; ++i)
|
||||
res += a;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
__attribute__((noinline))
|
||||
unsigned long long
|
||||
gen_mulx (unsigned a, unsigned b)
|
||||
{
|
||||
unsigned long long res;
|
||||
|
||||
res = (unsigned long long)a * b;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned i;
|
||||
unsigned a = 0xce7ace0;
|
||||
unsigned b = 0xfacefff;
|
||||
unsigned long long res, res_ref;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
a = a * (i + 1);
|
||||
b = b / (i + 1);
|
||||
|
||||
res_ref = calc_mul_u32 (a, b);
|
||||
res = gen_mulx (a, b);
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile { target { ia32 } } } */
|
||||
/* { dg-options "-O2 -mbmi2 -dp" } */
|
||||
|
||||
#include "bmi2-mulx32-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_umulsidi3_1" 1 } } */
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
|
||||
/* { dg-options "-mbmi2 -O2" } */
|
||||
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
unsigned __int128
|
||||
calc_mul_u64 (unsigned long long volatile a, unsigned long long b)
|
||||
{
|
||||
unsigned __int128 res = 0;
|
||||
int i;
|
||||
for (i = 0; i < b; ++i)
|
||||
res += (unsigned __int128) a;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned i;
|
||||
unsigned long long a = 0xce7ace0ce7ace0;
|
||||
unsigned long long b = 0xface;
|
||||
unsigned __int128 res, res_ref;
|
||||
|
||||
for (i=0; i<5; ++i) {
|
||||
a = a * (i + 1);
|
||||
b = b / (i + 1);
|
||||
|
||||
res_ref = calc_mul_u64 (a, b);
|
||||
res = (unsigned __int128) a * b;
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mbmi2 -dp" } */
|
||||
|
||||
#include "bmi2-mulx64-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_umulditi3_1" 1 } } */
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
/* { dg-do run { target { bmi2 } } } */
|
||||
/* { dg-options "-mbmi2 -O2" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
unsigned
|
||||
calc_pdep_u32 (unsigned a, int mask)
|
||||
{
|
||||
unsigned res = 0;
|
||||
int i, k = 0;
|
||||
|
||||
for (i = 0; i < 32; ++i)
|
||||
if (mask & (1 << i)) {
|
||||
res |= ((a & (1 << k)) >> k) << i;
|
||||
++k;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned i;
|
||||
unsigned src = 0xce7acc;
|
||||
unsigned res, res_ref;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
src = src * (i + 1);
|
||||
|
||||
res_ref = calc_pdep_u32 (src, i * 3);
|
||||
res = _pdep_u32 (src, i * 3);
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mbmi2 -O2 -dp" } */
|
||||
|
||||
#include "bmi2-pdep32-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_pdep_si3" 1 } } */
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
|
||||
/* { dg-options "-mbmi2 -O2" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
unsigned long long
|
||||
calc_pdep_u64 (unsigned long long a, unsigned long long mask)
|
||||
{
|
||||
unsigned long long res = 0;
|
||||
unsigned long long i, k = 0;
|
||||
|
||||
for (i = 0; i < 64; ++i)
|
||||
if (mask & (1LL << i)) {
|
||||
res |= ((a & (1LL << k)) >> k) << i;
|
||||
++k;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned long long i;
|
||||
unsigned long long src = 0xce7acce7acce7ac;
|
||||
unsigned long long res, res_ref;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
src = src * (i + 1);
|
||||
|
||||
res_ref = calc_pdep_u64 (src, ~(i * 3));
|
||||
res = _pdep_u64 (src, ~(i * 3));
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-mbmi2 -O2 -dp" } */
|
||||
|
||||
#include "bmi2-pdep64-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_pdep_di3" 1 } } */
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
/* { dg-do run { target { bmi2 } } } */
|
||||
/* { dg-options "-mbmi2 -O2" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
unsigned
|
||||
calc_pext_u32 (unsigned a, unsigned mask)
|
||||
{
|
||||
unsigned res = 0;
|
||||
int i, k = 0;
|
||||
|
||||
for (i = 0; i < 32; ++i)
|
||||
if (mask & (1 << i)) {
|
||||
res |= ((a & (1 << i)) >> i) << k;
|
||||
++k;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned i;
|
||||
unsigned src = 0xce7acc;
|
||||
unsigned res, res_ref;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
src = src * (i + 1);
|
||||
|
||||
res_ref = calc_pext_u32 (src, ~(i * 3));
|
||||
res = _pext_u32 (src, ~(i * 3));
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mbmi2 -O2 -dp" } */
|
||||
|
||||
#include "bmi2-pext32-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_pext_si3" 1 } } */
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
|
||||
/* { dg-options "-mbmi2 -O2" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
unsigned long long
|
||||
calc_pext_u64 (unsigned long long a, unsigned long long mask)
|
||||
{
|
||||
unsigned long long res = 0;
|
||||
int i, k = 0;
|
||||
|
||||
for (i = 0; i < 64; ++i)
|
||||
if (mask & (1LL << i)) {
|
||||
res |= ((a & (1LL << i)) >> i) << k;
|
||||
++k;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned long long i;
|
||||
unsigned long long src = 0xce7acce7acce7ac;
|
||||
unsigned long long res, res_ref;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
src = src * (i + 1);
|
||||
|
||||
res_ref = calc_pext_u64 (src, ~(i * 3));
|
||||
res = _pext_u64 (src, ~(i * 3));
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-mbmi2 -O2 -dp" } */
|
||||
|
||||
#include "bmi2-pext64-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_pext_di3" 1 } } */
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
/* { dg-do run { target { bmi2 } } } */
|
||||
/* { dg-options "-mbmi2 -O2 -dp" } */
|
||||
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
unsigned
|
||||
calc_rorx_u32 (unsigned a, int l)
|
||||
{
|
||||
unsigned volatile res = a;
|
||||
int i;
|
||||
for (i = 0; i < l; ++i)
|
||||
res = (res >> 1) | ((res & 1) << 31);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#define SHIFT_VAL 0x0e
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned i;
|
||||
unsigned src = 0xce7ace0;
|
||||
unsigned res, res_ref;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
src = src * (i + 1);
|
||||
|
||||
res_ref = calc_rorx_u32 (src, SHIFT_VAL);
|
||||
res = (src >> SHIFT_VAL) | (src << (32 - SHIFT_VAL));
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mbmi2 -dp" } */
|
||||
|
||||
#include "bmi2-rorx32-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_rorxsi3_1" 1 } } */
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
|
||||
/* { dg-options "-mbmi2 -O2 -dp" } */
|
||||
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
unsigned long long
|
||||
calc_rorx_u64 (unsigned long long a, int l)
|
||||
{
|
||||
unsigned long long volatile res = a;
|
||||
int i;
|
||||
for (i = 0; i < l; ++i)
|
||||
res = (res >> 1) | ((res&1)<< 63);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#define SHIFT_VAL 0x1e
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned i;
|
||||
unsigned long long src = 0xce7ace0ce7ace0;
|
||||
unsigned long long res, res_ref;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
src = src * (i + 1);
|
||||
|
||||
res_ref = calc_rorx_u64 (src, SHIFT_VAL);
|
||||
res = (src >> SHIFT_VAL) | (src << (64 - SHIFT_VAL));
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mbmi2 -dp" } */
|
||||
|
||||
#include "bmi2-rorx64-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_rorxdi3_1" 1 } } */
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run { target { bmi2 } } } */
|
||||
/* { dg-options "-mbmi2 -O2 -dp" } */
|
||||
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
int
|
||||
calc_sarx_u32 (int a, int l)
|
||||
{
|
||||
int volatile res = a;
|
||||
int i;
|
||||
for (i = 0; i < l; ++i)
|
||||
res >>= 1;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned i;
|
||||
int src = 0xfce7ace0;
|
||||
int res, res_ref;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
src = src * (i + 1);
|
||||
|
||||
res_ref = calc_sarx_u32 (src, i + 1);
|
||||
res = src >> (i + 1);
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mbmi2 -dp" } */
|
||||
|
||||
#include "bmi2-sarx32-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_ashrsi3" 1 } } */
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
|
||||
/* { dg-options "-mbmi2 -O2 -dp" } */
|
||||
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
long long
|
||||
calc_sarx_u64 (long long a, int l)
|
||||
{
|
||||
long long volatile res = a;
|
||||
int i;
|
||||
for (i = 0; i < l; ++i)
|
||||
res >>= 1;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned i;
|
||||
long long src = 0xfce7ace0ce7ace0;
|
||||
long long res, res_ref;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
src = src * (i + 1);
|
||||
|
||||
res_ref = calc_sarx_u64 (src, i + 1);
|
||||
res = src >> (i + 1);
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mbmi2 -dp" } */
|
||||
|
||||
#include "bmi2-sarx64-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_ashrdi3" 1 } } */
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run { target { bmi2 } } } */
|
||||
/* { dg-options "-mbmi2 -O2 -dp" } */
|
||||
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
int
|
||||
calc_shlx_u32 (int a, int l)
|
||||
{
|
||||
int volatile res = a;
|
||||
int i;
|
||||
for (i = 0; i < l; ++i)
|
||||
res <<= 1;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned i;
|
||||
int src = 0xfce7ace0;
|
||||
int res, res_ref;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
src = src * (i + 1);
|
||||
|
||||
res_ref = calc_shlx_u32 (src, i + 1);
|
||||
res = src << (i + 1);
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mbmi2 -dp" } */
|
||||
|
||||
#include "bmi2-shlx32-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_ashlsi3" 1 } } */
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run { target { bmi2 } } } */
|
||||
/* { dg-options "-mbmi2 -O2 -dp" } */
|
||||
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
unsigned
|
||||
calc_shrx_u32 (unsigned a, int l)
|
||||
{
|
||||
unsigned volatile res = a;
|
||||
int i;
|
||||
for (i = 0; i < l; ++i)
|
||||
res >>= 1;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned i;
|
||||
unsigned src = 0xce7ace0;
|
||||
unsigned res, res_ref;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
src = src * (i + 1);
|
||||
|
||||
res_ref = calc_shrx_u32 (src, i + 1);
|
||||
res = src >> (i + 1);
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mbmi2 -dp" } */
|
||||
|
||||
#include "bmi2-shrx32-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_lshrsi3" 1 } } */
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
|
||||
/* { dg-options "-mbmi2 -O2 -dp" } */
|
||||
|
||||
#include "bmi2-check.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
unsigned long long
|
||||
calc_shrx_u64 (unsigned long long a, int l)
|
||||
{
|
||||
unsigned long long volatile res = a;
|
||||
int i;
|
||||
for (i = 0; i < l; ++i)
|
||||
res >>= 1;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
bmi2_test ()
|
||||
{
|
||||
unsigned i;
|
||||
unsigned long long src = 0xce7ace0ce7ace0;
|
||||
unsigned long long res, res_ref;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
src = src * (i + 1);
|
||||
|
||||
res_ref = calc_shrx_u64 (src, i + 1);
|
||||
res = src >> (i + 1);
|
||||
|
||||
if (res != res_ref)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mbmi2 -dp" } */
|
||||
|
||||
#include "bmi2-shrx64-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bmi2_lshrdi3" 1 } } */
|
||||
|
|
@ -206,6 +206,17 @@ proc check_effective_target_bmi { } {
|
|||
} "-mbmi" ]
|
||||
}
|
||||
|
||||
# Return 1 if bmi2 instructions can be compiled.
|
||||
proc check_effective_target_bmi2 { } {
|
||||
return [check_no_compiler_messages bmi2 object {
|
||||
unsigned int
|
||||
_bzhi_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_bzhi_si (__X, __Y);
|
||||
}
|
||||
} "-mbmi2" ]
|
||||
}
|
||||
|
||||
# If the linker used understands -M <mapfile>, pass it to clear hardware
|
||||
# capabilities set by the Sun assembler.
|
||||
set clearcap_ldflags "-Wl,-M,$srcdir/$subdir/clearcap.map"
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
popcntintrin.h and mm_malloc.h are usable
|
||||
with -O -std=c89 -pedantic-errors. */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
|
||||
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@
|
|||
|
||||
|
||||
#ifndef DIFFERENT_PRAGMAS
|
||||
#pragma GCC target ("sse4a,3dnow,avx,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,tbm,lwp,fsgsbase,rdrnd,f16c")
|
||||
#pragma GCC target ("sse4a,3dnow,avx,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c")
|
||||
#endif
|
||||
|
||||
/* Following intrinsics require immediate arguments. They
|
||||
|
|
@ -220,9 +220,9 @@ test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1)
|
|||
#endif
|
||||
#include <popcntintrin.h>
|
||||
|
||||
/* x86intrin.h (FMA4/XOP/LWP/BMI/TBM/LZCNT). */
|
||||
/* x86intrin.h (FMA4/XOP/LWP/BMI/BMI2/TBM/LZCNT). */
|
||||
#ifdef DIFFERENT_PRAGMAS
|
||||
#pragma GCC target ("fma4,xop,lwp,bmi,tbm,lzcnt")
|
||||
#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt")
|
||||
#endif
|
||||
#include <x86intrin.h>
|
||||
/* xopintrin.h */
|
||||
|
|
|
|||
|
|
@ -147,7 +147,7 @@
|
|||
#define __builtin_ia32_bextri_u32(X, Y) __builtin_ia32_bextr_u32 (X, 1)
|
||||
#define __builtin_ia32_bextri_u64(X, Y) __builtin_ia32_bextr_u64 (X, 1)
|
||||
|
||||
#pragma GCC target ("sse4a,3dnow,avx,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,tbm,lwp,fsgsbase,rdrnd,f16c")
|
||||
#pragma GCC target ("sse4a,3dnow,avx,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c")
|
||||
#include <wmmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
#include <mm3dnow.h>
|
||||
|
|
|
|||
Loading…
Reference in New Issue