diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 5454e2453bb2..fb960f7ba966 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2402,22 +2402,6 @@ } [(set_attr "type" "vecperm")]) - -(define_insn "*altivec_vsplth_internal_v8hx" - [(set (match_operand:V8HF 0 "register_operand" "=v") - (vec_duplicate:V8HF - (vec_select:HF (match_operand:V8HF 1 "register_operand" "v") - (parallel - [(match_operand:QI 2 "const_0_to_7_operand" "")]))))] - "TARGET_ALTIVEC" -{ - if (!BYTES_BIG_ENDIAN) - operands[2] = GEN_INT (7 - INTVAL (operands[2])); - - return "vsplth %0,%1,%2"; -} - [(set_attr "type" "vecperm")]) - (define_insn "altivec_vsplth_direct" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") @@ -3146,15 +3130,6 @@ "lvewx %0,%y1" [(set_attr "type" "vecload")]) -(define_insn "*altivec_lvehf" - [(parallel - [(set (match_operand:V8HF 0 "register_operand" "=v") - (match_operand:V8HF 1 "memory_operand" "Z")) - (unspec [(const_int 0)] UNSPEC_LVE)])] - "TARGET_ALTIVEC" - "lvehx %0,%y1" - [(set_attr "type" "vecload")]) - (define_insn "altivec_lvxl_" [(parallel [(set (match_operand:VM2 0 "register_operand" "=v") diff --git a/gcc/config/rs6000/float16.cc b/gcc/config/rs6000/float16.cc index b887d400312a..3dc7273719c1 100644 --- a/gcc/config/rs6000/float16.cc +++ b/gcc/config/rs6000/float16.cc @@ -42,7 +42,7 @@ #include "common/common-target.h" #include "rs6000-internal.h" -/* Expand a bfloat16 scalar floating point operation: +/* Expand a bfloat16 floating point operation: ICODE: Operation to perform. RESULT: Result of the operation. @@ -64,7 +64,7 @@ bfloat16_operation_as_v4sf (enum rtx_code icode, rtx op1, rtx op2, rtx op3, - enum fp16_operation subtype) + enum bfloat16_operation subtype) { gcc_assert (can_create_pseudo_p ()); @@ -75,22 +75,19 @@ bfloat16_operation_as_v4sf (enum rtx_code icode, switch (subtype) { - case FP16_BINARY: + case BF16_BINARY: n_opts = 2; gcc_assert (op3 == NULL_RTX); break; - case FP16_FMA: - case FP16_FMS: - case FP16_NFMA: - case FP16_NFMS: + case BF16_FMA: + case BF16_FMS: + case BF16_NFMA: + case BF16_NFMS: gcc_assert (icode == FMA); n_opts = 3; break; - case FP16_UNARY: - case FP16_ABS_BINARY: - case FP16_NEG_BINARY: default: gcc_unreachable (); } @@ -147,41 +144,27 @@ bfloat16_operation_as_v4sf (enum rtx_code icode, } /* Do the operation in V4SFmode. */ - switch (subtype) + if (subtype == BF16_BINARY) + emit_insn (gen_rtx_SET (result_v4sf, + gen_rtx_fmt_ee (icode, V4SFmode, + ops_v4sf[0], + ops_v4sf[1]))); + + else /* FMA/FMS/NFMA/NFMS operation. */ { - case FP16_BINARY: - emit_insn (gen_rtx_SET (result_v4sf, - gen_rtx_fmt_ee (icode, V4SFmode, - ops_v4sf[0], - ops_v4sf[1]))); - break; + rtx op1 = ops_v4sf[0]; + rtx op2 = ops_v4sf[1]; + rtx op3 = ops_v4sf[2]; - case FP16_FMA: - case FP16_FMS: - case FP16_NFMA: - case FP16_NFMS: - { - rtx op1 = ops_v4sf[0]; - rtx op2 = ops_v4sf[1]; - rtx op3 = ops_v4sf[2]; + if (subtype == BF16_FMS || subtype == BF16_NFMS) + op3 = gen_rtx_NEG (V4SFmode, op3); - if (subtype == FP16_FMS || subtype == FP16_NFMS) - op3 = gen_rtx_NEG (V4SFmode, op3); + rtx op_fma = gen_rtx_FMA (V4SFmode, op1, op2, op3); - rtx op_fma = gen_rtx_FMA (V4SFmode, op1, op2, op3); + if (subtype == BF16_NFMA || subtype == BF16_NFMS) + op_fma = gen_rtx_NEG (V4SFmode, op_fma); - if (subtype == FP16_NFMA || subtype == FP16_NFMS) - op_fma = gen_rtx_NEG (V4SFmode, op_fma); - - emit_insn (gen_rtx_SET (result_v4sf, op_fma)); - } - break; - - case FP16_UNARY: - case FP16_ABS_BINARY: - case FP16_NEG_BINARY: - default: - gcc_unreachable (); + emit_insn (gen_rtx_SET (result_v4sf, op_fma)); } /* Convert V4SF result back to scalar mode. */ @@ -197,154 +180,3 @@ bfloat16_operation_as_v4sf (enum rtx_code icode, else gcc_unreachable (); } - - -/* Expand a _Float16 vector operation: - - ICODE: Operation to perform. - RESULT: Result of the operation. - OP1: Input operand1. - OP2: Input operand2. - OP3: Input operand3 or NULL_RTX. - SUBTYPE: Describe the operation. */ - -void -float16_vectorization (enum rtx_code icode, - rtx result, - rtx op1, - rtx op2, - rtx op3, - enum fp16_operation subtype) -{ - gcc_assert (can_create_pseudo_p ()); - - enum rtx_code unary_op = UNKNOWN; - rtx op_orig[3] = { op1, op2, op3 }; - rtx op_hi[3]; - rtx op_lo[3]; - rtx result_hi; - rtx result_lo; - size_t n_opts; - - switch (subtype) - { - case FP16_UNARY: - n_opts = 1; - break; - - case FP16_BINARY: - n_opts = 2; - break; - - case FP16_ABS_BINARY: - unary_op = ABS; - n_opts = 2; - break; - - case FP16_NEG_BINARY: - unary_op = NEG; - n_opts = 2; - break; - - case FP16_FMA: - case FP16_FMS: - case FP16_NFMA: - case FP16_NFMS: - n_opts = 3; - break; - - default: - gcc_unreachable (); - } - - /* Allocate 2 temporaries for the results and the input operands. */ - result_hi = gen_reg_rtx (V4SFmode); - result_lo = gen_reg_rtx (V4SFmode); - - for (size_t i = 0; i < n_opts; i++) - { - gcc_assert (op_orig[i] != NULL_RTX); - op_hi[i] = gen_reg_rtx (V4SFmode); /* high register. */ - op_lo[i] = gen_reg_rtx (V4SFmode); /* low register. */ - - emit_insn (gen_vec_unpacks_hi_v8hf (op_hi[i], op_orig[i])); - emit_insn (gen_vec_unpacks_lo_v8hf (op_lo[i], op_orig[i])); - } - - /* Do 2 sets of V4SFmode operations. */ - switch (subtype) - { - case FP16_UNARY: - emit_insn (gen_rtx_SET (result_hi, - gen_rtx_fmt_e (icode, V4SFmode, op_hi[0]))); - - emit_insn (gen_rtx_SET (result_lo, - gen_rtx_fmt_e (icode, V4SFmode, op_lo[0]))); - break; - - case FP16_BINARY: - case FP16_ABS_BINARY: - case FP16_NEG_BINARY: - emit_insn (gen_rtx_SET (result_hi, - gen_rtx_fmt_ee (icode, V4SFmode, - op_hi[0], - op_hi[1]))); - - emit_insn (gen_rtx_SET (result_lo, - gen_rtx_fmt_ee (icode, V4SFmode, - op_lo[0], - op_lo[1]))); - break; - - case FP16_FMA: - case FP16_FMS: - case FP16_NFMA: - case FP16_NFMS: - { - rtx op1_hi = op_hi[0]; - rtx op2_hi = op_hi[1]; - rtx op3_hi = op_hi[2]; - - rtx op1_lo = op_lo[0]; - rtx op2_lo = op_lo[1]; - rtx op3_lo = op_lo[2]; - - if (subtype == FP16_FMS || subtype == FP16_NFMS) - { - op3_hi = gen_rtx_NEG (V4SFmode, op3_hi); - op3_lo = gen_rtx_NEG (V4SFmode, op3_lo); - } - - rtx op_fma_hi = gen_rtx_FMA (V4SFmode, op1_hi, op2_hi, op3_hi); - rtx op_fma_lo = gen_rtx_FMA (V4SFmode, op1_lo, op2_lo, op3_lo); - - if (subtype == FP16_NFMA || subtype == FP16_NFMS) - { - op_fma_hi = gen_rtx_NEG (V4SFmode, op_fma_hi); - op_fma_lo = gen_rtx_NEG (V4SFmode, op_fma_lo); - } - - emit_insn (gen_rtx_SET (result_hi, op_fma_hi)); - emit_insn (gen_rtx_SET (result_lo, op_fma_lo)); - } - break; - - default: - gcc_unreachable (); - } - - /* Add any unary operator modifications. */ - if (unary_op != UNKNOWN) - { - emit_insn (gen_rtx_SET (result_hi, - gen_rtx_fmt_e (unary_op, V4SFmode, result_hi))); - - emit_insn (gen_rtx_SET (result_lo, - gen_rtx_fmt_e (unary_op, V4SFmode, result_lo))); - } - - /* Combine the 2 V4SFmode operations into one V8HFmode vector. */ - emit_insn (gen_vec_pack_trunc_v4sf_v8hf (result, result_hi, result_lo)); - return; -} - diff --git a/gcc/config/rs6000/float16.md b/gcc/config/rs6000/float16.md index 06cee341381d..28586654f96e 100644 --- a/gcc/config/rs6000/float16.md +++ b/gcc/config/rs6000/float16.md @@ -62,22 +62,6 @@ (V8BF "V4BF") (V8HF "V4HF")]) -;; Unary operators for float16 vectorization. -(define_code_iterator FLOAT16_UNARY_OP [abs neg]) - -;; Binary operators for float16 vectorization. -(define_code_iterator FLOAT16_BINARY_OP [plus minus mult smax smin]) - -;; Standard names for the unary/binary/ternary operators -(define_code_attr float16_names [(abs "abs") - (fma "fma") - (plus "add") - (minus "sub") - (mult "mul") - (neg "neg") - (smax "smax") - (smin "smin")]) - ;; UNSPEC constants (define_c_enum "unspec" [UNSPEC_FP16_SHIFT_LEFT_32BIT @@ -462,7 +446,7 @@ (define_insn_and_split "*bfloat16_binary_op_internal1" [(set (match_operand:SF 0 "vsx_register_operand") - (match_operator:SF 1 "fp16_binary_operator" + (match_operator:SF 1 "bfloat16_binary_operator" [(match_operand:SF 2 "bfloat16_v4sf_operand") (match_operand:SF 3 "bfloat16_v4sf_operand")]))] "TARGET_BFLOAT16_HW && can_create_pseudo_p () @@ -473,14 +457,14 @@ [(pc)] { bfloat16_operation_as_v4sf (GET_CODE (operands[1]), operands[0], operands[2], - operands[3], NULL_RTX, FP16_BINARY); + operands[3], NULL_RTX, BF16_BINARY); DONE; }) (define_insn_and_split "*bfloat16_binary_op_internal2" [(set (match_operand:BF 0 "vsx_register_operand") (float_truncate:BF - (match_operator:SF 1 "fp16_binary_operator" + (match_operator:SF 1 "bfloat16_binary_operator" [(match_operand:SF 2 "bfloat16_v4sf_operand") (match_operand:SF 3 "bfloat16_v4sf_operand")])))] "TARGET_BFLOAT16_HW && can_create_pseudo_p () @@ -491,7 +475,7 @@ [(pc)] { bfloat16_operation_as_v4sf (GET_CODE (operands[1]), operands[0], operands[2], - operands[3], NULL_RTX, FP16_BINARY); + operands[3], NULL_RTX, BF16_BINARY); DONE; }) @@ -510,7 +494,7 @@ [(pc)] { bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_FMA); + operands[3], BF16_FMA); DONE; }) @@ -530,7 +514,7 @@ [(pc)] { bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_FMA); + operands[3], BF16_FMA); DONE; }) @@ -550,7 +534,7 @@ [(pc)] { bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_FMS); + operands[3], BF16_FMS); DONE; }) @@ -571,7 +555,7 @@ [(pc)] { bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_FMS); + operands[3], BF16_FMS); DONE; }) @@ -591,7 +575,7 @@ [(pc)] { bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMA); + operands[3], BF16_NFMA); DONE; }) @@ -612,7 +596,7 @@ [(pc)] { bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMA); + operands[3], BF16_NFMA); DONE; }) @@ -633,7 +617,7 @@ [(pc)] { bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMA); + operands[3], BF16_NFMA); DONE; }) @@ -654,7 +638,7 @@ [(pc)] { bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMS); + operands[3], BF16_NFMS); DONE; }) @@ -676,7 +660,7 @@ [(pc)] { bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMS); + operands[3], BF16_NFMS); DONE; }) @@ -698,151 +682,10 @@ [(pc)] { bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMS); + operands[3], BF16_NFMS); DONE; }) - -;; Add vectorization support for _Float16. Unfortunately, since there -;; can only be one vec_pack_trunc_v4sf, we choose to support automatic -;; vectorization for BFmode. The following insns define vectorization -;; for HFmode. - -;; Unary operators being vectorized. -(define_insn_and_split "v8hf3" - [(set (match_operand:V8HF 0 "vsx_register_operand") - (FLOAT16_UNARY_OP:V8HF - (match_operand:V8HF 1 "vsx_register_operand")))] - "TARGET_FLOAT16_HW && can_create_pseudo_p ()" - "#" - "&& 1" - [(pc)] -{ - float16_vectorization (, operands[0], operands[1], NULL_RTX, NULL_RTX, - FP16_UNARY); - DONE; -}) - -;; Binary operators being vectorized. -(define_insn_and_split "v8hf3" - [(set (match_operand:V8HF 0 "vsx_register_operand") - (FLOAT16_BINARY_OP:V8HF - (match_operand:V8HF 1 "vsx_register_operand") - (match_operand:V8HF 2 "vsx_register_operand")))] - "TARGET_FLOAT16_HW && can_create_pseudo_p ()" - "#" - "&& 1" - [(pc)] -{ - float16_vectorization (, operands[0], operands[1], operands[2], - NULL_RTX, FP16_BINARY); - DONE; -}) - -;; Negative of binary operators being vectorized. -(define_insn_and_split "*neg_v8hf3" - [(set (match_operand:V8HF 0 "vsx_register_operand") - (neg:V8HF - (FLOAT16_BINARY_OP:V8HF - (match_operand:V8HF 1 "vsx_register_operand") - (match_operand:V8HF 2 "vsx_register_operand"))))] - "TARGET_FLOAT16_HW && can_create_pseudo_p ()" - "#" - "&& 1" - [(pc)] -{ - float16_vectorization (, operands[0], operands[1], operands[2], - NULL_RTX, FP16_NEG_BINARY); - DONE; -}) - -;; Absolute value of binary operators being vectorized. -(define_insn_and_split "*abs_v8hf3" - [(set (match_operand:V8HF 0 "vsx_register_operand") - (abs:V8HF - (FLOAT16_BINARY_OP:V8HF - (match_operand:V8HF 1 "vsx_register_operand") - (match_operand:V8HF 2 "vsx_register_operand"))))] - "TARGET_FLOAT16_HW && can_create_pseudo_p ()" - "#" - "&& 1" - [(pc)] -{ - float16_vectorization (, operands[0], operands[1], operands[2], - NULL_RTX, FP16_ABS_BINARY); - DONE; -}) - -;; FMA operations being vectorized. -(define_insn_and_split "fmav8hf3" - [(set (match_operand:V8HF 0 "vsx_register_operand") - (fma:V8HF - (match_operand:V8HF 1 "vsx_register_operand") - (match_operand:V8HF 2 "vsx_register_operand") - (match_operand:V8HF 3 "vsx_register_operand")))] - "TARGET_FLOAT16_HW && can_create_pseudo_p ()" - "#" - "&& 1" - [(pc)] -{ - float16_vectorization (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_FMA); - DONE; -}) - -(define_insn_and_split "*fmsv8hf3" - [(set (match_operand:V8HF 0 "vsx_register_operand") - (fma:V8HF - (match_operand:V8HF 1 "vsx_register_operand") - (match_operand:V8HF 2 "vsx_register_operand") - (neg:V8HF - (match_operand:V8HF 3 "vsx_register_operand"))))] - "TARGET_FLOAT16_HW && can_create_pseudo_p ()" - "#" - "&& 1" - [(pc)] -{ - float16_vectorization (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_FMS); - DONE; -}) - -(define_insn_and_split "*nfmav8hf3" - [(set (match_operand:V8HF 0 "vsx_register_operand") - (neg:V8HF - (fma:V8HF - (match_operand:V8HF 1 "vsx_register_operand") - (match_operand:V8HF 2 "vsx_register_operand") - (match_operand:V8HF 3 "vsx_register_operand"))))] - "TARGET_FLOAT16_HW && can_create_pseudo_p ()" - "#" - "&& 1" - [(pc)] -{ - float16_vectorization (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMA); - DONE; -}) - -(define_insn_and_split "*nfmsv8hf3" - [(set (match_operand:V8HF 0 "vsx_register_operand") - (neg:V8HF - (fma:V8HF - (match_operand:V8HF 1 "vsx_register_operand") - (match_operand:V8HF 2 "vsx_register_operand") - (neg:V8HF - (match_operand:V8HF 3 "vsx_register_operand")))))] - "TARGET_FLOAT16_HW && can_create_pseudo_p ()" - "#" - "&& 1" - [(pc)] -{ - float16_vectorization (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMS); - DONE; -}) - - ;; If we do multiple __bfloat16 operations, between the first and ;; second operation, GCC will want to convert the first operation from ;; V4SFmode to SFmode and then reconvert it back to V4SFmode. On the diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 4394b4a6daa1..d47d09cf73db 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -2208,7 +2208,7 @@ ;; the operation in vector mode rather than convverting the BFmode to a ;; V8BFmode vector, converting that V8BFmode vector to V4SFmode, and ;; then converting the V4SFmode element to SFmode scalar. -(define_predicate "fp16_binary_operator" +(define_predicate "bfloat16_binary_operator" (match_code "plus,minus,mult,smax,smin")) ;; Match bfloat16/float operands that can be optimized to do the diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index f677506b4aa0..db38468df816 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -260,22 +260,17 @@ extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *); extern unsigned constant_generates_xxspltidp (vec_const_128bit_type *); /* From float16.cc. */ -/* Optimize bfloat16 and float16 operations. */ -enum fp16_operation { - FP16_UNARY, /* Bfloat16/float16 unary op. */ - FP16_BINARY, /* Bfloat16/float16 binary op. */ - FP16_ABS_BINARY, /* abs (binary op). */ - FP16_NEG_BINARY, /* - binary op. */ - FP16_FMA, /* (a * b) + c. */ - FP16_FMS, /* (a * b) - c. */ - FP16_NFMA, /* - ((a * b) + c). */ - FP16_NFMS /* - ((a * b) - c). */ +/* Optimize bfloat16 operations. */ +enum bfloat16_operation { + BF16_BINARY, /* Bfloat16 binary op. */ + BF16_FMA, /* (a * b) + c. */ + BF16_FMS, /* (a * b) - c. */ + BF16_NFMA, /* - ((a * b) + c). */ + BF16_NFMS /* - ((a * b) - c). */ }; extern void bfloat16_operation_as_v4sf (enum rtx_code, rtx, rtx, rtx, rtx, - enum fp16_operation); -extern void float16_vectorization (enum rtx_code, rtx, rtx, rtx, rtx, - enum fp16_operation); + enum bfloat16_operation); #endif /* RTX_CODE */ #ifdef TREE_CODE