config.gcc: Support "goldmont".

2018-05-08  Olga Makhotina  <olga.makhotina@intel.com>

gcc/

	* config.gcc: Support "goldmont".
	* config/i386/driver-i386.c (host_detect_local_cpu): Detect "goldmont".
	* config/i386/i386-c.c (ix86_target_macros_internal): Handle
	PROCESSOR_GOLDMONT.
	* config/i386/i386.c (m_GOLDMONT): Define.
	(processor_target_table): Add "goldmont".
	(PTA_GOLDMONT): Define.
	(ix86_lea_outperforms): Add TARGET_GOLDMONT.
	(get_builtin_code_for_version): Handle PROCESSOR_GOLDMONT.
	(fold_builtin_cpu): Add M_INTEL_GOLDMONT.
	(fold_builtin_cpu): Add "goldmont".
	(ix86_add_stmt_cost): Add TARGET_GOLDMONT.
	(ix86_option_override_internal): Add "goldmont".
	* config/i386/i386.h (processor_costs): Define TARGET_GOLDMONT.
	(processor_type): Add PROCESSOR_GOLDMONT.
	* config/i386/i386.md: Add CPU "glm".
	* config/i386/glm.md: New file.
	* config/i386/x86-tune.def: Add m_GOLDMONT.
	* doc/invoke.texi: Add goldmont as x86 -march=/-mtune= CPU type.

libgcc/
	* config/i386/cpuinfo.h (processor_types): Add INTEL_GOLDMONT.
	* config/i386/cpuinfo.c (get_intel_cpu): Detect Goldmont.

gcc/testsuite/

	* gcc.target/i386/builtin_target.c: Test goldmont.
	* gcc.target/i386/funcspec-56.inc: Tests for arch=goldmont and
	arch=silvermont.

From-SVN: r260042
This commit is contained in:
Olga Makhotina 2018-05-08 12:23:08 +00:00 committed by Sebastian Peryt
parent 4d4015db89
commit 50e461dfe3
16 changed files with 829 additions and 29 deletions

View File

@ -1,3 +1,25 @@
2018-05-08 Olga Makhotina <olga.makhotina@intel.com>
* config.gcc: Support "goldmont".
* config/i386/driver-i386.c (host_detect_local_cpu): Detect "goldmont".
* config/i386/i386-c.c (ix86_target_macros_internal): Handle
PROCESSOR_GOLDMONT.
* config/i386/i386.c (m_GOLDMONT): Define.
(processor_target_table): Add "goldmont".
(PTA_GOLDMONT): Define.
(ix86_lea_outperforms): Add TARGET_GOLDMONT.
(get_builtin_code_for_version): Handle PROCESSOR_GOLDMONT.
(fold_builtin_cpu): Add M_INTEL_GOLDMONT.
(fold_builtin_cpu): Add "goldmont".
(ix86_add_stmt_cost): Add TARGET_GOLDMONT.
(ix86_option_override_internal): Add "goldmont".
* config/i386/i386.h (processor_costs): Define TARGET_GOLDMONT.
(processor_type): Add PROCESSOR_GOLDMONT.
* config/i386/i386.md: Add CPU "glm".
* config/i386/glm.md: New file.
* config/i386/x86-tune.def: Add m_GOLDMONT.
* doc/invoke.texi: Add goldmont as x86 -march=/-mtune= CPU type.
2018-05-08 Jakub Jelinek <jakub@redhat.com> 2018-05-08 Jakub Jelinek <jakub@redhat.com>
PR target/85572 PR target/85572

View File

@ -637,7 +637,8 @@ x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \
bdver3 bdver4 znver1 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \ bdver3 bdver4 znver1 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \
core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \ core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \
sandybridge ivybridge haswell broadwell bonnell silvermont knl knm \ sandybridge ivybridge haswell broadwell bonnell silvermont knl knm \
skylake-avx512 cannonlake icelake-client icelake-server skylake x86-64 native" skylake-avx512 cannonlake icelake-client icelake-server skylake goldmont \
x86-64 native"
# Additional x86 processors supported by --with-cpu=. Each processor # Additional x86 processors supported by --with-cpu=. Each processor
# MUST be separated by exactly one space. # MUST be separated by exactly one space.

View File

@ -755,6 +755,11 @@ const char *host_detect_local_cpu (int argc, const char **argv)
/* Silvermont. */ /* Silvermont. */
cpu = "silvermont"; cpu = "silvermont";
break; break;
case 0x5c:
case 0x5f:
/* Goldmont. */
cpu = "goldmont";
break;
case 0x0f: case 0x0f:
/* Merom. */ /* Merom. */
case 0x17: case 0x17:
@ -859,7 +864,10 @@ const char *host_detect_local_cpu (int argc, const char **argv)
cpu = "sandybridge"; cpu = "sandybridge";
else if (has_sse4_2) else if (has_sse4_2)
{ {
if (has_movbe) if (has_xsave)
/* Assume Goldmont. */
cpu = "goldmont";
else if (has_movbe)
/* Assume Silvermont. */ /* Assume Silvermont. */
cpu = "silvermont"; cpu = "silvermont";
else else

711
gcc/config/i386/glm.md Normal file
View File

@ -0,0 +1,711 @@
;; Goldmont(GLM) Scheduling
;; Copyright (C) 2018 Free Software Foundation, Inc.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
;;
;; Goldmont has 3 out-of-order IEC, 2 out-of--order FEC and out-of-order MEC.
(define_automaton "glm")
;; EU: Execution Unit
;; Goldmont has 3 clusters - IEC, FPC, MEC
;; IEC has three execution ports - IEC-0, IEC-1 and IEC-2.
;; FPC has two execution ports - FPC-0 and FPC-1.
;; MEC has two execution ports - MEC-0 (load) and MEC-1 (store0.
(define_cpu_unit "glm-iec-0,glm-iec-1,glm-iec-2" "glm")
(define_cpu_unit "glm-fec-0,glm-fec-1,glm-load,glm-store" "glm")
;; Some EUs have duplicated copied and can be accessed via either ports 0, 1 or 2.
(define_reservation "glm-iec-any" "(glm-iec-0 | glm-iec-1 | glm-iec-2)")
(define_reservation "glm-iec-any-load" "(glm-iec-0|glm-iec-1|glm-iec-2)+glm-load")
(define_reservation "glm-iec-any-store" "(glm-iec-0|glm-iec-1|glm-iec-2)+glm-store")
(define_reservation "glm-iec-any-both" "(glm-iec-0 | glm-iec-1 | glm-iec-2) + glm-load + glm-store")
(define_reservation "glm-fec-all" "(glm-fec-0 + glm-fec-1)")
(define_reservation "glm-all" "(glm-iec-0+glm-iec-1+glm-iec-2)+(glm-fec-0+glm-fec-1)+(glm-load+glm-store)")
(define_reservation "glm-int-0" "glm-iec-0")
(define_reservation "glm-int-0-load" "glm-iec-0 + glm-load")
(define_reservation "glm-int-0-both" "glm-iec-0 + glm-load + glm-store")
(define_reservation "glm-int-1" "glm-iec-1")
(define_reservation "glm-int-1-mem" "glm-iec-1 + glm-load")
(define_reservation "glm-int-2" "glm-iec-2")
(define_reservation "glm-int-2-mem" "glm-iec-2 + glm-load")
(define_reservation "glm-fp-0" "glm-fec-0")
(define_reservation "glm-fec-any" "(glm-fec-0 | glm-fec-1)")
;;; fmul insn can have 4 or 5 cycles latency for scalar and vector types.
(define_reservation "glm-fmul-4c" "glm-fec-0, nothing*3")
(define_reservation "glm-fmul-4c-mem" "glm-fec-0+glm-load, nothing*3")
(define_reservation "glm-fmul-5c" "glm-fec-0, nothing*4")
;;; fadd has 3 cycles latency.
(define_reservation "glm-fadd-3c" "glm-fec-1, nothing*2")
(define_reservation "glm-fadd-3c-mem" "glm-fec-1+glm-load, nothing*2")
;;; imul insn has 3 cycles latency for SI operands
(define_reservation "glm-imul-32" "glm-iec-1, nothing*2")
(define_reservation "glm-imul-mem-32"
"(glm-iec-1+glm-load), nothing*2")
;;; imul has 5 cycles latency for DI operands with 1/2 tput
(define_reservation "glm-imul-64"
"glm-iec-1, glm-iec-1, nothing*3")
(define_reservation "glm-imul-mem-64"
"glm-iec-1+glm-load, glm-iec-1, nothing*3")
(define_insn_reservation "glm_other" 9
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "other")
(eq_attr "atom_unit" "!jeu")))
"glm-all*9")
;; return has type "other" with atom_unit "jeu"
(define_insn_reservation "glm_other_2" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "other")
(eq_attr "atom_unit" "jeu")))
"glm-all")
(define_insn_reservation "glm_multi" 9
(and (eq_attr "cpu" "glm")
(eq_attr "type" "multi"))
"glm-all*9")
;; Normal alu insns without carry
(define_insn_reservation "glm_alu" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "alu")
(and (eq_attr "memory" "none")
(eq_attr "use_carry" "0"))))
"glm-iec-any")
;; Normal alu insns without carry, but use MEC.
(define_insn_reservation "glm_alu_load" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "alu")
(and (eq_attr "memory" "load")
(eq_attr "use_carry" "0"))))
"glm-iec-any-load")
(define_insn_reservation "glm_alu_mem" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "alu")
(and (eq_attr "memory" "both")
(eq_attr "use_carry" "0"))))
"glm-iec-any-both")
;; Alu insn consuming CF, such as add/sbb
(define_insn_reservation "glm_alu_carry" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "alu")
(and (eq_attr "memory" "none")
(eq_attr "use_carry" "1"))))
"glm-int-2, nothing")
;; Alu insn consuming CF, such as add/sbb
(define_insn_reservation "glm_alu_carry_mem" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "alu")
(and (eq_attr "memory" "!none")
(eq_attr "use_carry" "1"))))
"glm-int-2-mem, nothing")
(define_insn_reservation "glm_alu1" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "alu1")
(eq_attr "memory" "none") (eq_attr "prefix_0f" "0")))
"glm-int-1")
;; bsf and bsf insn
(define_insn_reservation "glm_alu1_1" 10
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "alu1")
(eq_attr "memory" "none") (eq_attr "prefix_0f" "1")))
"glm-int-1*8,nothing*2")
(define_insn_reservation "glm_alu1_mem" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "alu1")
(eq_attr "memory" "!none")))
"glm-int-1-mem")
(define_insn_reservation "glm_negnot" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "negnot")
(eq_attr "memory" "none")))
"glm-iec-any")
(define_insn_reservation "glm_negnot_mem" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "negnot")
(eq_attr "memory" "!none")))
"glm-iec-any-both")
(define_insn_reservation "glm_imov" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "imov")
(eq_attr "memory" "none")))
"glm-iec-any")
(define_insn_reservation "glm_imov_load" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "imov")
(eq_attr "memory" "load")))
"glm-iec-any-load,nothing")
(define_insn_reservation "glm_imov_store" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "imov")
(eq_attr "memory" "store")))
"glm-iec-any-store")
;; 16<-16, 32<-32
(define_insn_reservation "glm_imovx" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "imovx")
(and (eq_attr "memory" "none")
(ior (and (match_operand:HI 0 "register_operand")
(match_operand:HI 1 "general_operand"))
(and (match_operand:SI 0 "register_operand")
(match_operand:SI 1 "general_operand"))))))
"glm-iec-any")
;; 16<-16, 32<-32, mem
(define_insn_reservation "glm_imovx_mem" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "imovx")
(and (eq_attr "memory" "!none")
(ior (and (match_operand:HI 0 "register_operand")
(match_operand:HI 1 "general_operand"))
(and (match_operand:SI 0 "register_operand")
(match_operand:SI 1 "general_operand"))))))
"glm-iec-any-load")
;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8
(define_insn_reservation "glm_imovx_2" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "imovx")
(and (eq_attr "memory" "none")
(ior (match_operand:QI 0 "register_operand")
(ior (and (match_operand:SI 0 "register_operand")
(not (match_operand:SI 1 "general_operand")))
(match_operand:DI 0 "register_operand"))))))
"glm-iec-any")
;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem
(define_insn_reservation "glm_imovx_2_load" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "imovx")
(and (eq_attr "memory" "load")
(ior (match_operand:QI 0 "register_operand")
(ior (and (match_operand:SI 0 "register_operand")
(not (match_operand:SI 1 "general_operand")))
(match_operand:DI 0 "register_operand"))))))
"glm-iec-any-load,nothing")
(define_insn_reservation "glm_imovx_2_mem" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "imovx")
(and (eq_attr "memory" "!none")
(ior (match_operand:QI 0 "register_operand")
(ior (and (match_operand:SI 0 "register_operand")
(not (match_operand:SI 1 "general_operand")))
(match_operand:DI 0 "register_operand"))))))
"glm-iec-any-both")
;; 16<-8
(define_insn_reservation "glm_imovx_3" 3
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "imovx")
(and (match_operand:HI 0 "register_operand")
(match_operand:QI 1 "general_operand"))))
"glm-int-0, nothing*2")
(define_insn_reservation "glm_lea" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "lea")
(eq_attr "mode" "!HI")))
"glm-iec-any")
;; lea 16bit address is complex insn
(define_insn_reservation "glm_lea_2" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "lea")
(eq_attr "mode" "HI")))
"glm-all*2")
(define_insn_reservation "glm_incdec" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "incdec")
(eq_attr "memory" "none")))
"glm-int-0")
(define_insn_reservation "glm_incdec_mem" 3
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "incdec")
(eq_attr "memory" "!none")))
"glm-int-0-both, nothing*2")
;; simple shift instruction use SHIFT eu, none memory
(define_insn_reservation "glm_ishift" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ishift")
(and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))))
"glm-int-0")
;; simple shift instruction use SHIFT eu, memory
(define_insn_reservation "glm_ishift_mem" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ishift")
(and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0"))))
"glm-int-0-both,nothing")
;; DF shift (prefixed with 0f) is complex insn with latency of 4 cycles
(define_insn_reservation "glm_ishift_3" 4
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ishift")
(eq_attr "prefix_0f" "1")))
"glm-all*4")
(define_insn_reservation "glm_ishift1" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ishift1")
(eq_attr "memory" "none")))
"glm-int-0")
(define_insn_reservation "glm_ishift1_mem" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ishift1")
(eq_attr "memory" "!none")))
"glm-int-0-both,nothing")
(define_insn_reservation "glm_rotate" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "rotate")
(eq_attr "memory" "none")))
"glm-int-0")
(define_insn_reservation "glm_rotate_mem" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "rotate")
(eq_attr "memory" "!none")))
"glm-int-0-both,nothing")
(define_insn_reservation "glm_imul" 3
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "imul")
(and (eq_attr "memory" "none") (eq_attr "mode" "SI"))))
"glm-imul-32")
(define_insn_reservation "glm_imul_load" 3
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "imul")
(and (eq_attr "memory" "!none") (eq_attr "mode" "SI"))))
"glm-imul-mem-32")
;; latency set to 5 as common 64x64 imul with 1/2 tput
(define_insn_reservation "glm_imul64" 5
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "imul")
(and (eq_attr "memory" "none") (eq_attr "mode" "!SI"))))
"glm-imul-64")
(define_insn_reservation "glm_imul64-load" 5
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "imul")
(and (eq_attr "memory" "!none") (eq_attr "mode" "!SI"))))
"glm-imul-mem-64")
(define_insn_reservation "glm_idiv" 25
(and (eq_attr "cpu" "glm")
(eq_attr "type" "idiv"))
"glm-all*16, nothing*9")
(define_insn_reservation "glm_icmp" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "icmp")
(eq_attr "memory" "none")))
"glm-int-0")
(define_insn_reservation "glm_icmp_mem" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "icmp")
(eq_attr "memory" "!none")))
"glm-int-0-load,nothing")
(define_insn_reservation "glm_test" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "test")
(eq_attr "memory" "none")))
"glm-int-0")
(define_insn_reservation "glm_test_mem" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "test")
(eq_attr "memory" "!none")))
"glm-int-0-load,nothing")
(define_insn_reservation "glm_ibr" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ibr")
(eq_attr "memory" "!load")))
"glm-int-1")
;; complex if jump target is from address
(define_insn_reservation "glm_ibr_2" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ibr")
(eq_attr "memory" "load")))
"glm-all*2")
(define_insn_reservation "glm_setcc" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "setcc")
(eq_attr "memory" "!store")))
"glm-iec-any")
;; 2 cycles complex if target is in memory
(define_insn_reservation "glm_setcc_2" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "setcc")
(eq_attr "memory" "store")))
"glm-all*2")
(define_insn_reservation "glm_icmov" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "icmov")
(eq_attr "memory" "none")))
"glm-iec-any, nothing")
(define_insn_reservation "glm_icmov_mem" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "icmov")
(eq_attr "memory" "!none")))
"glm-int-0-load, nothing")
;; UCODE if segreg, ignored
(define_insn_reservation "glm_push" 2
(and (eq_attr "cpu" "glm")
(eq_attr "type" "push"))
"(glm-int-1+glm-int-2)*2")
;; pop r64 is 1 cycle. UCODE if segreg, ignored
(define_insn_reservation "glm_pop" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "pop")
(eq_attr "mode" "DI")))
"glm-int-1+glm-int-2")
;; pop non-r64 is 2 cycles. UCODE if segreg, ignored
(define_insn_reservation "glm_pop_2" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "pop")
(eq_attr "mode" "!DI")))
"(glm-int-1+glm-int-2)*2")
;; UCODE if segreg, ignored
(define_insn_reservation "glm_call" 1
(and (eq_attr "cpu" "glm")
(eq_attr "type" "call,callv"))
"(glm-int-0+glm-int-1)")
(define_insn_reservation "glm_leave" 3
(and (eq_attr "cpu" "glm")
(eq_attr "type" "leave"))
"glm-all*3")
(define_insn_reservation "glm_str" 3
(and (eq_attr "cpu" "glm")
(eq_attr "type" "str"))
"glm-all*3")
(define_insn_reservation "glm_sselog" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sselog")
(eq_attr "memory" "none")))
"glm-fec-all")
(define_insn_reservation "glm_sselog_mem" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sselog")
(eq_attr "memory" "!none")))
"glm-fec-all+glm-load")
(define_insn_reservation "glm_sselog1" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sselog1")
(eq_attr "memory" "none")))
"glm-fp-0")
(define_insn_reservation "glm_sselog1_mem" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sselog1")
(eq_attr "memory" "!none")))
"glm-fp-0+glm-load")
;; not pmad, not psad
(define_insn_reservation "glm_sseiadd" 3
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sseiadd")
(and (not (match_operand:V2DI 0 "register_operand"))
(and (eq_attr "atom_unit" "!simul")
(eq_attr "atom_unit" "!complex")))))
"glm-fadd-3c")
;; pmad, psad and 64
(define_insn_reservation "glm_sseiadd_2" 4
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sseiadd")
(and (not (match_operand:V2DI 0 "register_operand"))
(and (eq_attr "atom_unit" "simul")
(eq_attr "mode" "DI,TI")))))
"glm-fmul-4c")
;; if paddq(64 bit op), phadd/phsub
(define_insn_reservation "glm_sseiadd_3" 5
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sseiadd")
(ior (match_operand:V2DI 0 "register_operand")
(eq_attr "atom_unit" "complex"))))
"glm-fmul-5c")
;; if immediate op.
(define_insn_reservation "glm_sseishft" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sseishft")
(match_operand 2 "immediate_operand")))
"glm-fp-0, nothing")
(define_insn_reservation "glm_sseimul" 4
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "none")))
"glm-fmul-4c")
(define_insn_reservation "glm_sseimul_load" 4
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "!none")))
"glm-fmul-4c-mem")
;; rcpss or rsqrtss
(define_insn_reservation "glm_sse" 4
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sse")
(and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF"))))
"glm-fmul-4c")
;; movshdup, movsldup. Suggest to type sseishft
(define_insn_reservation "glm_sse_2" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sse")
(eq_attr "atom_sse_attr" "movdup")))
"glm-fec-any")
;; lfence
(define_insn_reservation "glm_sse_3" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sse")
(eq_attr "atom_sse_attr" "lfence")))
"glm-fec-any")
;; sfence,clflush,mfence, prefetch
(define_insn_reservation "glm_sse_4" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sse")
(ior (eq_attr "atom_sse_attr" "fence")
(eq_attr "atom_sse_attr" "prefetch"))))
"glm-fp-0")
;; rcpps, rsqrtss, sqrt, ldmxcsr
(define_insn_reservation "glm_sse_5" 9
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sse")
(ior (ior (eq_attr "atom_sse_attr" "sqrt")
(eq_attr "atom_sse_attr" "mxcsr"))
(and (eq_attr "atom_sse_attr" "rcp")
(eq_attr "mode" "V4SF")))))
"glm-fec-all*6, nothing*3")
;; xmm->xmm
(define_insn_reservation "glm_ssemov" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ssemov")
(and (match_operand 0 "register_operand" "xy")
(match_operand 1 "register_operand" "xy"))))
"glm-fec-any")
;; reg->xmm
(define_insn_reservation "glm_ssemov_2" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ssemov")
(and (match_operand 0 "register_operand" "xy")
(match_operand 1 "register_operand" "r"))))
"glm-fp-0")
;; xmm->reg
(define_insn_reservation "glm_ssemov_3" 3
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ssemov")
(and (match_operand 0 "register_operand" "r")
(match_operand 1 "register_operand" "xy"))))
"glm-fp-0, nothing*2")
;; mov mem
(define_insn_reservation "glm_ssemov_load" 2
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ssemov")
(eq_attr "memory" "load")))
"glm-fec-any+glm-load,nothing")
(define_insn_reservation "glm_ssemov_store" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ssemov")
(eq_attr "memory" "store")))
"glm-fec-any+glm-store")
;; no memory simple
(define_insn_reservation "glm_sseadd" 3
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sseadd")
(eq_attr "memory" "none")))
"glm-fadd-3c")
;; memory simple
(define_insn_reservation "glm_sseadd_mem" 3
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sseadd")
(eq_attr "memory" "!none")))
"glm-fadd-3c-mem")
;; Except dppd/dpps
(define_insn_reservation "glm_ssemul" 4
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none")))
"glm-fmul-4c")
(define_insn_reservation "glm_ssemul_mem" 4
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "!none")))
"glm-fmul-4c-mem")
(define_insn_reservation "glm_ssecmp" 1
(and (eq_attr "cpu" "glm")
(eq_attr "type" "ssecmp"))
"glm-fec-any")
(define_insn_reservation "glm_ssecomi" 1
(and (eq_attr "cpu" "glm")
(eq_attr "type" "ssecomi"))
"glm-fp-0")
;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi
(define_insn_reservation "glm_ssecvt" 4
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ssecvt")
(ior (and (match_operand:V2SI 0 "register_operand")
(match_operand:V4SF 1 "register_operand"))
(and (match_operand:V4SF 0 "register_operand")
(match_operand:V2SI 1 "register_operand")))))
"glm-fp-0, nothing*3")
;; memory and cvtpi2ps, cvtps2pi, cvttps2pi
(define_insn_reservation "glm_ssecvt_mem" 4
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "ssecvt")
(ior (and (match_operand:V2SI 0 "register_operand")
(match_operand:V4SF 1 "memory_operand"))
(and (match_operand:V4SF 0 "register_operand")
(match_operand:V2SI 1 "memory_operand")))))
"glm-fp-0+glm-load, nothing*3")
;; memory and cvtsi2sd
(define_insn_reservation "glm_sseicvt" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sseicvt")
(and (match_operand:V2DF 0 "register_operand")
(match_operand:SI 1 "nonimmediate_operand"))))
"glm-fp-0")
;; otherwise. 8 cycles average for cvtsd2si
(define_insn_reservation "glm_sseicvt_2" 4
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "sseicvt")
(not (and (match_operand:V2DF 0 "register_operand")
(match_operand:SI 1 "memory_operand")))))
"glm-fp-0, nothing*3")
(define_insn_reservation "glm_ssediv" 13
(and (eq_attr "cpu" "glm")
(eq_attr "type" "ssediv"))
"glm-fec-all*12, nothing")
;; simple for fmov
(define_insn_reservation "glm_fmov" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "none")))
"glm-fec-any")
;; simple for fmov
(define_insn_reservation "glm_fmov_load" 3
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "load")))
"glm-fec-any+glm-load, nothing*2")
(define_insn_reservation "glm_fmov_store" 1
(and (eq_attr "cpu" "glm")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "store")))
"glm-fec-any+glm-store")
;; Define bypass here
;; There will be 0 cycle stall from cmp/test to jcc
;; There will be 1 cycle stall from flag producer to cmov and adc/sbb
(define_bypass 2 "glm_icmp, glm_test, glm_alu, glm_alu_carry,
glm_alu1, glm_negnot, glm_incdec, glm_ishift,
glm_ishift1, glm_rotate"
"glm_icmov, glm_alu_carry")
;; lea to shift source stall is 1 cycle
(define_bypass 2 "glm_lea"
"glm_ishift, glm_ishift1, glm_rotate"
"!ix86_dep_by_shift_count")
;; non-lea to shift count stall is 1 cycle
(define_bypass 2 "glm_alu_carry,
glm_alu,glm_alu1,glm_negnot,glm_imov,glm_imovx,
glm_incdec,glm_ishift,glm_ishift1,glm_rotate,
glm_setcc, glm_icmov, glm_pop, glm_imov_store,
glm_alu_mem, glm_alu_carry_mem, glm_alu1_mem,
glm_alu_load, glm_imovx_mem, glm_imovx_2_mem,
glm_imov_load, glm_icmov_mem, glm_fmov_load, glm_fmov_store"
"glm_ishift, glm_ishift1, glm_rotate,
glm_ishift_mem, glm_ishift1_mem,
glm_rotate_mem"
"ix86_dep_by_shift_count")

View File

@ -174,6 +174,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__silvermont"); def_or_undef (parse_in, "__silvermont");
def_or_undef (parse_in, "__silvermont__"); def_or_undef (parse_in, "__silvermont__");
break; break;
case PROCESSOR_GOLDMONT:
def_or_undef (parse_in, "__goldmont");
def_or_undef (parse_in, "__goldmont__");
break;
case PROCESSOR_KNL: case PROCESSOR_KNL:
def_or_undef (parse_in, "__knl"); def_or_undef (parse_in, "__knl");
def_or_undef (parse_in, "__knl__"); def_or_undef (parse_in, "__knl__");
@ -311,6 +315,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__tune_slm__"); def_or_undef (parse_in, "__tune_slm__");
def_or_undef (parse_in, "__tune_silvermont__"); def_or_undef (parse_in, "__tune_silvermont__");
break; break;
case PROCESSOR_GOLDMONT:
def_or_undef (parse_in, "__tune_goldmont__");
break;
case PROCESSOR_KNL: case PROCESSOR_KNL:
def_or_undef (parse_in, "__tune_knl__"); def_or_undef (parse_in, "__tune_knl__");
break; break;

View File

@ -149,6 +149,7 @@ const struct processor_costs *ix86_cost = NULL;
#define m_CANNONLAKE (HOST_WIDE_INT_1U<<PROCESSOR_CANNONLAKE) #define m_CANNONLAKE (HOST_WIDE_INT_1U<<PROCESSOR_CANNONLAKE)
#define m_ICELAKE_CLIENT (HOST_WIDE_INT_1U<<PROCESSOR_ICELAKE_CLIENT) #define m_ICELAKE_CLIENT (HOST_WIDE_INT_1U<<PROCESSOR_ICELAKE_CLIENT)
#define m_ICELAKE_SERVER (HOST_WIDE_INT_1U<<PROCESSOR_ICELAKE_SERVER) #define m_ICELAKE_SERVER (HOST_WIDE_INT_1U<<PROCESSOR_ICELAKE_SERVER)
#define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT)
#define m_INTEL (HOST_WIDE_INT_1U<<PROCESSOR_INTEL) #define m_INTEL (HOST_WIDE_INT_1U<<PROCESSOR_INTEL)
#define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE) #define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE)
@ -858,6 +859,7 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
{"haswell", &core_cost, 16, 10, 16, 10, 16}, {"haswell", &core_cost, 16, 10, 16, 10, 16},
{"bonnell", &atom_cost, 16, 15, 16, 7, 16}, {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
{"silvermont", &slm_cost, 16, 15, 16, 7, 16}, {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
{"goldmont", &slm_cost, 16, 15, 16, 7, 16},
{"knl", &slm_cost, 16, 15, 16, 7, 16}, {"knl", &slm_cost, 16, 15, 16, 7, 16},
{"knm", &slm_cost, 16, 15, 16, 7, 16}, {"knm", &slm_cost, 16, 15, 16, 7, 16},
{"skylake", &skylake_cost, 16, 10, 16, 10, 16}, {"skylake", &skylake_cost, 16, 10, 16, 10, 16},
@ -3484,6 +3486,9 @@ ix86_option_override_internal (bool main_args_p,
| PTA_AVX512F | PTA_AVX512CD; | PTA_AVX512F | PTA_AVX512CD;
const wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE; const wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE;
const wide_int_bitmask PTA_SILVERMONT = PTA_WESTMERE | PTA_MOVBE | PTA_RDRND; const wide_int_bitmask PTA_SILVERMONT = PTA_WESTMERE | PTA_MOVBE | PTA_RDRND;
const wide_int_bitmask PTA_GOLDMONT = PTA_SILVERMONT | PTA_SHA | PTA_XSAVE
| PTA_RDSEED | PTA_XSAVEC | PTA_XSAVES | PTA_CLFLUSHOPT | PTA_XSAVEOPT
| PTA_FSGSBASE;
const wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW const wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
| PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ; | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
@ -3559,6 +3564,7 @@ ix86_option_override_internal (bool main_args_p,
{"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL}, {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
{"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
{"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
{"goldmont", PROCESSOR_GOLDMONT, CPU_GLM, PTA_GOLDMONT},
{"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL}, {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
{"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM}, {"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM},
{"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM}, {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
@ -21233,7 +21239,7 @@ ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
/* For Silvermont if using a 2-source or 3-source LEA for /* For Silvermont if using a 2-source or 3-source LEA for
non-destructive destination purposes, or due to wanting non-destructive destination purposes, or due to wanting
ability to use SCALE, the use of LEA is justified. */ ability to use SCALE, the use of LEA is justified. */
if (TARGET_SILVERMONT || TARGET_INTEL) if (TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_INTEL)
{ {
if (has_scale) if (has_scale)
return true; return true;
@ -32392,6 +32398,10 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
arg_str = "silvermont"; arg_str = "silvermont";
priority = P_PROC_SSE4_2; priority = P_PROC_SSE4_2;
break; break;
case PROCESSOR_GOLDMONT:
arg_str = "goldmont";
priority = P_PROC_SSE4_2;
break;
case PROCESSOR_AMDFAM10: case PROCESSOR_AMDFAM10:
arg_str = "amdfam10h"; arg_str = "amdfam10h";
priority = P_PROC_SSE4_A; priority = P_PROC_SSE4_A;
@ -33096,7 +33106,8 @@ fold_builtin_cpu (tree fndecl, tree *args)
M_INTEL_COREI7_SKYLAKE_AVX512, M_INTEL_COREI7_SKYLAKE_AVX512,
M_INTEL_COREI7_CANNONLAKE, M_INTEL_COREI7_CANNONLAKE,
M_INTEL_COREI7_ICELAKE_CLIENT, M_INTEL_COREI7_ICELAKE_CLIENT,
M_INTEL_COREI7_ICELAKE_SERVER M_INTEL_COREI7_ICELAKE_SERVER,
M_INTEL_GOLDMONT
}; };
static struct _arch_names_table static struct _arch_names_table
@ -33125,6 +33136,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
{"icelake-server", M_INTEL_COREI7_ICELAKE_SERVER}, {"icelake-server", M_INTEL_COREI7_ICELAKE_SERVER},
{"bonnell", M_INTEL_BONNELL}, {"bonnell", M_INTEL_BONNELL},
{"silvermont", M_INTEL_SILVERMONT}, {"silvermont", M_INTEL_SILVERMONT},
{"goldmont", M_INTEL_GOLDMONT},
{"knl", M_INTEL_KNL}, {"knl", M_INTEL_KNL},
{"knm", M_INTEL_KNM}, {"knm", M_INTEL_KNM},
{"amdfam10h", M_AMDFAM10H}, {"amdfam10h", M_AMDFAM10H},
@ -50647,7 +50659,7 @@ ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
/* We need to multiply all vector stmt cost by 1.7 (estimated cost) /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
for Silvermont as it has out of order integer pipeline and can execute for Silvermont as it has out of order integer pipeline and can execute
2 scalar instruction per tick, but has in order SIMD pipeline. */ 2 scalar instruction per tick, but has in order SIMD pipeline. */
if ((TARGET_SILVERMONT || TARGET_INTEL) if ((TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_INTEL)
&& stmt_info && stmt_info->stmt) && stmt_info && stmt_info->stmt)
{ {
tree lhs_op = gimple_get_lhs (stmt_info->stmt); tree lhs_op = gimple_get_lhs (stmt_info->stmt);

View File

@ -385,6 +385,7 @@ extern const struct processor_costs ix86_size_cost;
#define TARGET_HASWELL (ix86_tune == PROCESSOR_HASWELL) #define TARGET_HASWELL (ix86_tune == PROCESSOR_HASWELL)
#define TARGET_BONNELL (ix86_tune == PROCESSOR_BONNELL) #define TARGET_BONNELL (ix86_tune == PROCESSOR_BONNELL)
#define TARGET_SILVERMONT (ix86_tune == PROCESSOR_SILVERMONT) #define TARGET_SILVERMONT (ix86_tune == PROCESSOR_SILVERMONT)
#define TARGET_GOLDMONT (ix86_tune == PROCESSOR_GOLDMONT)
#define TARGET_KNL (ix86_tune == PROCESSOR_KNL) #define TARGET_KNL (ix86_tune == PROCESSOR_KNL)
#define TARGET_KNM (ix86_tune == PROCESSOR_KNM) #define TARGET_KNM (ix86_tune == PROCESSOR_KNM)
#define TARGET_SKYLAKE (ix86_tune == PROCESSOR_SKYLAKE) #define TARGET_SKYLAKE (ix86_tune == PROCESSOR_SKYLAKE)
@ -2279,6 +2280,7 @@ enum processor_type
PROCESSOR_HASWELL, PROCESSOR_HASWELL,
PROCESSOR_BONNELL, PROCESSOR_BONNELL,
PROCESSOR_SILVERMONT, PROCESSOR_SILVERMONT,
PROCESSOR_GOLDMONT,
PROCESSOR_KNL, PROCESSOR_KNL,
PROCESSOR_KNM, PROCESSOR_KNM,
PROCESSOR_SKYLAKE, PROCESSOR_SKYLAKE,

View File

@ -433,7 +433,7 @@
;; Processor type. ;; Processor type.
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem, (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
atom,slm,haswell,generic,amdfam10,bdver1,bdver2,bdver3, atom,slm,glm,haswell,generic,amdfam10,bdver1,bdver2,bdver3,
bdver4,btver2,znver1" bdver4,btver2,znver1"
(const (symbol_ref "ix86_schedule"))) (const (symbol_ref "ix86_schedule")))
@ -1230,6 +1230,7 @@
(include "geode.md") (include "geode.md")
(include "atom.md") (include "atom.md")
(include "slm.md") (include "slm.md")
(include "glm.md")
(include "core2.md") (include "core2.md")
(include "haswell.md") (include "haswell.md")

View File

@ -41,7 +41,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
/* X86_TUNE_SCHEDULE: Enable scheduling. */ /* X86_TUNE_SCHEDULE: Enable scheduling. */
DEF_TUNE (X86_TUNE_SCHEDULE, "schedule", DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
| m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC) | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GOLDMONT
| m_GENERIC)
/* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming /* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming
on modern chips. Preffer stores affecting whole integer register on modern chips. Preffer stores affecting whole integer register
@ -49,7 +50,7 @@ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
value over movb. */ value over movb. */
DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE
| m_BONNELL | m_SILVERMONT | m_INTEL | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_INTEL
| m_KNL | m_KNM | m_AMD_MULTIPLE | m_SKYLAKE_AVX512 | m_GENERIC) | m_KNL | m_KNM | m_AMD_MULTIPLE | m_SKYLAKE_AVX512 | m_GENERIC)
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store
@ -84,14 +85,14 @@ DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall",
partial dependencies. */ partial dependencies. */
DEF_TUNE (X86_TUNE_MOVX, "movx", DEF_TUNE (X86_TUNE_MOVX, "movx",
m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE
| m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL
| m_GEODE | m_AMD_MULTIPLE | m_SKYLAKE_AVX512 | m_GENERIC) | m_GEODE | m_AMD_MULTIPLE | m_SKYLAKE_AVX512 | m_GENERIC)
/* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by
full sized loads. */ full sized loads. */
DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall", DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
| m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC) | m_KNL | m_KNM | m_GOLDMONT | m_AMD_MULTIPLE | m_GENERIC)
/* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent
conditional jump instruction for 32 bit TARGET. */ conditional jump instruction for 32 bit TARGET. */
@ -130,7 +131,7 @@ DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args", DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args",
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
| m_ATHLON_K8) | m_GOLDMONT | m_ATHLON_K8)
/* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are /* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are
considered on critical path. */ considered on critical path. */
@ -192,7 +193,7 @@ DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns",
than 4 branch instructions in the 16 byte window. */ than 4 branch instructions in the 16 byte window. */
DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit", DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM
|m_INTEL | m_ATHLON_K8 | m_AMDFAM10) | m_GOLDMONT | m_INTEL | m_ATHLON_K8 | m_AMDFAM10)
/*****************************************************************************/ /*****************************************************************************/
/* Integer instruction selection tuning */ /* Integer instruction selection tuning */
@ -220,22 +221,24 @@ DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_LAKEMONT | m_PPRO))
done by GCC generated code. */ done by GCC generated code. */
DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec", DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
~(m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE ~(m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE
| m_BONNELL | m_SILVERMONT | m_INTEL | m_KNL | m_KNM | m_GENERIC)) | m_BONNELL | m_SILVERMONT | m_INTEL | m_KNL | m_KNM | m_GOLDMONT
| m_GENERIC))
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
for DFmode copies */ for DFmode copies */
DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves", DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",
~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
| m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)) | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GOLDMONT
| m_GENERIC))
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
will impact LEA instruction selection. */ will impact LEA instruction selection. */
DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL
| m_KNM | m_INTEL) | m_KNM | m_GOLDMONT | m_INTEL)
/* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */ /* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */
DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr", DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr",
m_BONNELL | m_SILVERMONT | m_KNL | m_KNM) m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM)
/* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
vector path on AMD machines. vector path on AMD machines.
@ -252,7 +255,7 @@ DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM8, "slow_imul_imm8",
/* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
a conditional move. */ a conditional move. */
DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove", DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove",
m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL) m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL)
/* X86_TUNE_SINGLE_STRINGOP: Enable use of single string operations, such /* X86_TUNE_SINGLE_STRINGOP: Enable use of single string operations, such
as MOVS and STOS (without a REP prefix) to move/set sequences of bytes. */ as MOVS and STOS (without a REP prefix) to move/set sequences of bytes. */
@ -271,17 +274,17 @@ DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,
DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf", DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
| m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
| m_BTVER | m_ZNVER1 | m_GENERIC) | m_BTVER | m_ZNVER1 | m_GOLDMONT | m_GENERIC)
/* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */ /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */
DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL ~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
| m_K6)) | m_K6 | m_GOLDMONT))
/* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */ /* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */
DEF_TUNE (X86_TUNE_USE_BT, "use_bt", DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
| m_LAKEMONT | m_AMD_MULTIPLE | m_GENERIC) | m_LAKEMONT | m_AMD_MULTIPLE | m_GOLDMONT | m_GENERIC)
/* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency /* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency
for bit-manipulation instructions. */ for bit-manipulation instructions. */
@ -297,7 +300,8 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
/* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of cmov insns in /* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of cmov insns in
if-converted sequence to one. */ if-converted sequence to one. */
DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GENERIC) m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT
| m_GENERIC)
/*****************************************************************************/ /*****************************************************************************/
/* 387 instruction selection tuning */ /* 387 instruction selection tuning */
@ -313,7 +317,8 @@ DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop",
integer operand. */ integer operand. */
DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop", DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL
| m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE | m_GENERIC)) | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE
| m_GOLDMONT | m_GENERIC))
/* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */ /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */
DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE) DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
@ -321,7 +326,8 @@ DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
/* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */ /* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */
DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants", DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
| m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC) | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GOLDMONT
| m_GENERIC)
/*****************************************************************************/ /*****************************************************************************/
/* SSE instruction selection tuning */ /* SSE instruction selection tuning */
@ -336,14 +342,15 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
of a sequence loading registers by parts. */ of a sequence loading registers by parts. */
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM
| m_INTEL | m_SKYLAKE_AVX512 | m_AMDFAM10 | m_BDVER | m_BTVER | m_INTEL | m_SKYLAKE_AVX512 | m_GOLDMONT | m_AMDFAM10 | m_BDVER
| m_ZNVER1 | m_GENERIC) | m_BTVER | m_ZNVER1 | m_GENERIC)
/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead
of a sequence loading registers by parts. */ of a sequence loading registers by parts. */
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal", DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM
| m_INTEL | m_SKYLAKE_AVX512 | m_BDVER | m_ZNVER1 | m_GENERIC) | m_INTEL | m_SKYLAKE_AVX512 | m_GOLDMONT | m_BDVER | m_ZNVER1
| m_GENERIC)
/* Use packed single precision instructions where posisble. I.e. movups instead /* Use packed single precision instructions where posisble. I.e. movups instead
of movupd. */ of movupd. */
@ -380,7 +387,7 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions",
/* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
fp converts to destination register. */ fp converts to destination register. */
DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts", DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts",
m_SILVERMONT | m_KNL | m_KNM | m_INTEL) m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT | m_INTEL)
/* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
from FP to FP. This form of instructions avoids partial write to the from FP to FP. This form of instructions avoids partial write to the
@ -394,11 +401,11 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
/* X86_TUNE_SLOW_SHUFB: Indicates tunings with slow pshufb instruction. */ /* X86_TUNE_SLOW_SHUFB: Indicates tunings with slow pshufb instruction. */
DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb", DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",
m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL) m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT | m_INTEL)
/* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes. */ /* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes. */
DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
m_SILVERMONT | m_INTEL) m_SILVERMONT | m_GOLDMONT | m_INTEL)
/* X86_TUNE_USE_GATHER: Use gather instructions. */ /* X86_TUNE_USE_GATHER: Use gather instructions. */
DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather", DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",

View File

@ -26498,6 +26498,11 @@ instruction set support.
Intel Silvermont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, Intel Silvermont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3,
SSE4.1, SSE4.2, POPCNT, AES, PCLMUL and RDRND instruction set support. SSE4.1, SSE4.2, POPCNT, AES, PCLMUL and RDRND instruction set support.
@item goldmont
Intel Goldmont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3,
SSE4.1, SSE4.2, POPCNT, AES, PCLMUL, RDRND, XSAVE, XSAVEOPT and FSGSBASE
instruction set support.
@item knl @item knl
Intel Knight's Landing CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, Intel Knight's Landing CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA, SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA,

View File

@ -1,3 +1,9 @@
2018-05-08 Olga Makhotina <olga.makhotina@intel.com>
* gcc.target/i386/builtin_target.c: Test goldmont.
* gcc.target/i386/funcspec-56.inc: Tests for arch=goldmont and
arch=silvermont.
2018-05-08 Jakub Jelinek <jakub@redhat.com> 2018-05-08 Jakub Jelinek <jakub@redhat.com>
PR target/85572 PR target/85572

View File

@ -38,6 +38,11 @@ check_intel_cpu_model (unsigned int family, unsigned int model,
/* Silvermont. */ /* Silvermont. */
assert (__builtin_cpu_is ("silvermont")); assert (__builtin_cpu_is ("silvermont"));
break; break;
case 0x5c:
case 0x5f:
/* Goldmont. */
assert (__builtin_cpu_is ("goldmont"));
break;
case 0x57: case 0x57:
/* Knights Landing. */ /* Knights Landing. */
assert (__builtin_cpu_is ("knl")); assert (__builtin_cpu_is ("knl"));

View File

@ -141,6 +141,8 @@ extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
extern void test_arch_corei7 (void) __attribute__((__target__("arch=corei7"))); extern void test_arch_corei7 (void) __attribute__((__target__("arch=corei7")));
extern void test_arch_corei7_avx (void) __attribute__((__target__("arch=corei7-avx"))); extern void test_arch_corei7_avx (void) __attribute__((__target__("arch=corei7-avx")));
extern void test_arch_core_avx2 (void) __attribute__((__target__("arch=core-avx2"))); extern void test_arch_core_avx2 (void) __attribute__((__target__("arch=core-avx2")));
extern void test_arch_silvermont (void) __attribute__((__target__("arch=silvermont")));
extern void test_arch_goldmont (void) __attribute__((__target__("arch=goldmont")));
extern void test_arch_knl (void) __attribute__((__target__("arch=knl"))); extern void test_arch_knl (void) __attribute__((__target__("arch=knl")));
extern void test_arch_knm (void) __attribute__((__target__("arch=knm"))); extern void test_arch_knm (void) __attribute__((__target__("arch=knm")));
extern void test_arch_skylake (void) __attribute__((__target__("arch=skylake"))); extern void test_arch_skylake (void) __attribute__((__target__("arch=skylake")));

View File

@ -1,3 +1,8 @@
2018-05-08 Olga Makhotina <olga.makhotina@intel.com>
* config/i386/cpuinfo.h (processor_types): Add INTEL_GOLDMONT.
* config/i386/cpuinfo.c (get_intel_cpu): Detect Goldmont.
2018-05-07 Amaan Cheval <amaan.cheval@gmail.com> 2018-05-07 Amaan Cheval <amaan.cheval@gmail.com>
* config.host (x86_64-*-rtems*): Build crti.o and crtn.o. * config.host (x86_64-*-rtems*): Build crti.o and crtn.o.

View File

@ -140,6 +140,11 @@ get_intel_cpu (unsigned int family, unsigned int model, unsigned int brand_id)
/* Silvermont. */ /* Silvermont. */
__cpu_model.__cpu_type = INTEL_SILVERMONT; __cpu_model.__cpu_type = INTEL_SILVERMONT;
break; break;
case 0x5c:
case 0x5f:
/* Goldmont. */
__cpu_model.__cpu_type = INTEL_GOLDMONT;
break;
case 0x57: case 0x57:
/* Knights Landing. */ /* Knights Landing. */
__cpu_model.__cpu_type = INTEL_KNL; __cpu_model.__cpu_type = INTEL_KNL;

View File

@ -48,6 +48,7 @@ enum processor_types
AMD_BTVER2, AMD_BTVER2,
AMDFAM17H, AMDFAM17H,
INTEL_KNM, INTEL_KNM,
INTEL_GOLDMONT,
CPU_TYPE_MAX CPU_TYPE_MAX
}; };