mirror of git://gcc.gnu.org/git/gcc.git
Undo patches 330 and 329.
2025-10-13 Michael Meissner <meissner@linux.ibm.com> gcc/ * config/rs6000/altivec.md (altivec_vsplth_internal_v8h): New insn. (altivec_lvehf): Likewise. 2025-10-13 Michael Meissner <meissner@linux.ibm.com> gcc/ * config/rs6000/float16.md (bfloat16_operation_as_v4sf): Upgrade float16 vector optimizations. (float16_vectorization): New function for _Float16 vectorization support. * config/rs6000/float16.md (FLOAT16_UNARY_OP): New code iterator. (FLOAT16_BINARY_OP): Likewise. (float16_names): New code attribute. (bfloat16_binary_op_internal1): Upgrade float16 optimizations and change bfloat16 optimization names. (bfloat16_binary_op_internal2): Likewise. (bfloat16_binary_op_internal2): Likewise. (bfloat16_fma_op_internal1): Likewise. (bfloat16_fma_op_internal2): Likewise. (bfloat16_fms_op_internal1): Likewise. (bfloat16_fms_op_internal2): Likewise. (bfloat16_nfma_op_internal1): Likewise. (bfloat16_nfma_op_internal2): Likewise. (bfloat16_nfma_op_internal3): Likewise. (bfloat16_nfms_op_internal1): Likewise. (bfloat16_nfms_op_internal2): Likewise. (bfloat16_nfms_op_internal3): Likewise. (<float16_names>v8hf, FLOAT16_UNARY_OP iterator): Likewise. (<float16_names>v8hf, FLOAT16_BINARY_OP iterator): Likewise. (neg_<float16_names>v8hf3): Likewise. (abs_<float16_names>v8hf3): Likewise. (fmav8hf3): Likewise. (fmsv8hf3): Likewise. (nfmav8hf3): Likewise. (nfmsv8hf3): Likewise. * config/rs6000/predicates.md (fp16_binary_operator): Rename from bfloat16_binary_operator. (enum fp16_operation): Rename from enum bfloat16_operation. * config/rs6000/rs6000-protos.h (float16_vectorization): New declaration.
This commit is contained in:
parent
8aac2a9845
commit
724f5eae29
|
@ -2402,22 +2402,6 @@
|
|||
}
|
||||
[(set_attr "type" "vecperm")])
|
||||
|
||||
|
||||
(define_insn "*altivec_vsplth_internal_v8hx"
|
||||
[(set (match_operand:V8HF 0 "register_operand" "=v")
|
||||
(vec_duplicate:V8HF
|
||||
(vec_select:HF (match_operand:V8HF 1 "register_operand" "v")
|
||||
(parallel
|
||||
[(match_operand:QI 2 "const_0_to_7_operand" "")]))))]
|
||||
"TARGET_ALTIVEC"
|
||||
{
|
||||
if (!BYTES_BIG_ENDIAN)
|
||||
operands[2] = GEN_INT (7 - INTVAL (operands[2]));
|
||||
|
||||
return "vsplth %0,%1,%2";
|
||||
}
|
||||
[(set_attr "type" "vecperm")])
|
||||
|
||||
(define_insn "altivec_vsplth_direct"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||
(unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
|
||||
|
@ -3146,15 +3130,6 @@
|
|||
"lvewx %0,%y1"
|
||||
[(set_attr "type" "vecload")])
|
||||
|
||||
(define_insn "*altivec_lvehf"
|
||||
[(parallel
|
||||
[(set (match_operand:V8HF 0 "register_operand" "=v")
|
||||
(match_operand:V8HF 1 "memory_operand" "Z"))
|
||||
(unspec [(const_int 0)] UNSPEC_LVE)])]
|
||||
"TARGET_ALTIVEC"
|
||||
"lvehx %0,%y1"
|
||||
[(set_attr "type" "vecload")])
|
||||
|
||||
(define_insn "altivec_lvxl_<mode>"
|
||||
[(parallel
|
||||
[(set (match_operand:VM2 0 "register_operand" "=v")
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
#include "common/common-target.h"
|
||||
#include "rs6000-internal.h"
|
||||
|
||||
/* Expand a bfloat16 scalar floating point operation:
|
||||
/* Expand a bfloat16 floating point operation:
|
||||
|
||||
ICODE: Operation to perform.
|
||||
RESULT: Result of the operation.
|
||||
|
@ -64,7 +64,7 @@ bfloat16_operation_as_v4sf (enum rtx_code icode,
|
|||
rtx op1,
|
||||
rtx op2,
|
||||
rtx op3,
|
||||
enum fp16_operation subtype)
|
||||
enum bfloat16_operation subtype)
|
||||
{
|
||||
gcc_assert (can_create_pseudo_p ());
|
||||
|
||||
|
@ -75,22 +75,19 @@ bfloat16_operation_as_v4sf (enum rtx_code icode,
|
|||
|
||||
switch (subtype)
|
||||
{
|
||||
case FP16_BINARY:
|
||||
case BF16_BINARY:
|
||||
n_opts = 2;
|
||||
gcc_assert (op3 == NULL_RTX);
|
||||
break;
|
||||
|
||||
case FP16_FMA:
|
||||
case FP16_FMS:
|
||||
case FP16_NFMA:
|
||||
case FP16_NFMS:
|
||||
case BF16_FMA:
|
||||
case BF16_FMS:
|
||||
case BF16_NFMA:
|
||||
case BF16_NFMS:
|
||||
gcc_assert (icode == FMA);
|
||||
n_opts = 3;
|
||||
break;
|
||||
|
||||
case FP16_UNARY:
|
||||
case FP16_ABS_BINARY:
|
||||
case FP16_NEG_BINARY:
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
@ -147,41 +144,27 @@ bfloat16_operation_as_v4sf (enum rtx_code icode,
|
|||
}
|
||||
|
||||
/* Do the operation in V4SFmode. */
|
||||
switch (subtype)
|
||||
if (subtype == BF16_BINARY)
|
||||
emit_insn (gen_rtx_SET (result_v4sf,
|
||||
gen_rtx_fmt_ee (icode, V4SFmode,
|
||||
ops_v4sf[0],
|
||||
ops_v4sf[1])));
|
||||
|
||||
else /* FMA/FMS/NFMA/NFMS operation. */
|
||||
{
|
||||
case FP16_BINARY:
|
||||
emit_insn (gen_rtx_SET (result_v4sf,
|
||||
gen_rtx_fmt_ee (icode, V4SFmode,
|
||||
ops_v4sf[0],
|
||||
ops_v4sf[1])));
|
||||
break;
|
||||
rtx op1 = ops_v4sf[0];
|
||||
rtx op2 = ops_v4sf[1];
|
||||
rtx op3 = ops_v4sf[2];
|
||||
|
||||
case FP16_FMA:
|
||||
case FP16_FMS:
|
||||
case FP16_NFMA:
|
||||
case FP16_NFMS:
|
||||
{
|
||||
rtx op1 = ops_v4sf[0];
|
||||
rtx op2 = ops_v4sf[1];
|
||||
rtx op3 = ops_v4sf[2];
|
||||
if (subtype == BF16_FMS || subtype == BF16_NFMS)
|
||||
op3 = gen_rtx_NEG (V4SFmode, op3);
|
||||
|
||||
if (subtype == FP16_FMS || subtype == FP16_NFMS)
|
||||
op3 = gen_rtx_NEG (V4SFmode, op3);
|
||||
rtx op_fma = gen_rtx_FMA (V4SFmode, op1, op2, op3);
|
||||
|
||||
rtx op_fma = gen_rtx_FMA (V4SFmode, op1, op2, op3);
|
||||
if (subtype == BF16_NFMA || subtype == BF16_NFMS)
|
||||
op_fma = gen_rtx_NEG (V4SFmode, op_fma);
|
||||
|
||||
if (subtype == FP16_NFMA || subtype == FP16_NFMS)
|
||||
op_fma = gen_rtx_NEG (V4SFmode, op_fma);
|
||||
|
||||
emit_insn (gen_rtx_SET (result_v4sf, op_fma));
|
||||
}
|
||||
break;
|
||||
|
||||
case FP16_UNARY:
|
||||
case FP16_ABS_BINARY:
|
||||
case FP16_NEG_BINARY:
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
emit_insn (gen_rtx_SET (result_v4sf, op_fma));
|
||||
}
|
||||
|
||||
/* Convert V4SF result back to scalar mode. */
|
||||
|
@ -197,154 +180,3 @@ bfloat16_operation_as_v4sf (enum rtx_code icode,
|
|||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
|
||||
/* Expand a _Float16 vector operation:
|
||||
|
||||
ICODE: Operation to perform.
|
||||
RESULT: Result of the operation.
|
||||
OP1: Input operand1.
|
||||
OP2: Input operand2.
|
||||
OP3: Input operand3 or NULL_RTX.
|
||||
SUBTYPE: Describe the operation. */
|
||||
|
||||
void
|
||||
float16_vectorization (enum rtx_code icode,
|
||||
rtx result,
|
||||
rtx op1,
|
||||
rtx op2,
|
||||
rtx op3,
|
||||
enum fp16_operation subtype)
|
||||
{
|
||||
gcc_assert (can_create_pseudo_p ());
|
||||
|
||||
enum rtx_code unary_op = UNKNOWN;
|
||||
rtx op_orig[3] = { op1, op2, op3 };
|
||||
rtx op_hi[3];
|
||||
rtx op_lo[3];
|
||||
rtx result_hi;
|
||||
rtx result_lo;
|
||||
size_t n_opts;
|
||||
|
||||
switch (subtype)
|
||||
{
|
||||
case FP16_UNARY:
|
||||
n_opts = 1;
|
||||
break;
|
||||
|
||||
case FP16_BINARY:
|
||||
n_opts = 2;
|
||||
break;
|
||||
|
||||
case FP16_ABS_BINARY:
|
||||
unary_op = ABS;
|
||||
n_opts = 2;
|
||||
break;
|
||||
|
||||
case FP16_NEG_BINARY:
|
||||
unary_op = NEG;
|
||||
n_opts = 2;
|
||||
break;
|
||||
|
||||
case FP16_FMA:
|
||||
case FP16_FMS:
|
||||
case FP16_NFMA:
|
||||
case FP16_NFMS:
|
||||
n_opts = 3;
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Allocate 2 temporaries for the results and the input operands. */
|
||||
result_hi = gen_reg_rtx (V4SFmode);
|
||||
result_lo = gen_reg_rtx (V4SFmode);
|
||||
|
||||
for (size_t i = 0; i < n_opts; i++)
|
||||
{
|
||||
gcc_assert (op_orig[i] != NULL_RTX);
|
||||
op_hi[i] = gen_reg_rtx (V4SFmode); /* high register. */
|
||||
op_lo[i] = gen_reg_rtx (V4SFmode); /* low register. */
|
||||
|
||||
emit_insn (gen_vec_unpacks_hi_v8hf (op_hi[i], op_orig[i]));
|
||||
emit_insn (gen_vec_unpacks_lo_v8hf (op_lo[i], op_orig[i]));
|
||||
}
|
||||
|
||||
/* Do 2 sets of V4SFmode operations. */
|
||||
switch (subtype)
|
||||
{
|
||||
case FP16_UNARY:
|
||||
emit_insn (gen_rtx_SET (result_hi,
|
||||
gen_rtx_fmt_e (icode, V4SFmode, op_hi[0])));
|
||||
|
||||
emit_insn (gen_rtx_SET (result_lo,
|
||||
gen_rtx_fmt_e (icode, V4SFmode, op_lo[0])));
|
||||
break;
|
||||
|
||||
case FP16_BINARY:
|
||||
case FP16_ABS_BINARY:
|
||||
case FP16_NEG_BINARY:
|
||||
emit_insn (gen_rtx_SET (result_hi,
|
||||
gen_rtx_fmt_ee (icode, V4SFmode,
|
||||
op_hi[0],
|
||||
op_hi[1])));
|
||||
|
||||
emit_insn (gen_rtx_SET (result_lo,
|
||||
gen_rtx_fmt_ee (icode, V4SFmode,
|
||||
op_lo[0],
|
||||
op_lo[1])));
|
||||
break;
|
||||
|
||||
case FP16_FMA:
|
||||
case FP16_FMS:
|
||||
case FP16_NFMA:
|
||||
case FP16_NFMS:
|
||||
{
|
||||
rtx op1_hi = op_hi[0];
|
||||
rtx op2_hi = op_hi[1];
|
||||
rtx op3_hi = op_hi[2];
|
||||
|
||||
rtx op1_lo = op_lo[0];
|
||||
rtx op2_lo = op_lo[1];
|
||||
rtx op3_lo = op_lo[2];
|
||||
|
||||
if (subtype == FP16_FMS || subtype == FP16_NFMS)
|
||||
{
|
||||
op3_hi = gen_rtx_NEG (V4SFmode, op3_hi);
|
||||
op3_lo = gen_rtx_NEG (V4SFmode, op3_lo);
|
||||
}
|
||||
|
||||
rtx op_fma_hi = gen_rtx_FMA (V4SFmode, op1_hi, op2_hi, op3_hi);
|
||||
rtx op_fma_lo = gen_rtx_FMA (V4SFmode, op1_lo, op2_lo, op3_lo);
|
||||
|
||||
if (subtype == FP16_NFMA || subtype == FP16_NFMS)
|
||||
{
|
||||
op_fma_hi = gen_rtx_NEG (V4SFmode, op_fma_hi);
|
||||
op_fma_lo = gen_rtx_NEG (V4SFmode, op_fma_lo);
|
||||
}
|
||||
|
||||
emit_insn (gen_rtx_SET (result_hi, op_fma_hi));
|
||||
emit_insn (gen_rtx_SET (result_lo, op_fma_lo));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Add any unary operator modifications. */
|
||||
if (unary_op != UNKNOWN)
|
||||
{
|
||||
emit_insn (gen_rtx_SET (result_hi,
|
||||
gen_rtx_fmt_e (unary_op, V4SFmode, result_hi)));
|
||||
|
||||
emit_insn (gen_rtx_SET (result_lo,
|
||||
gen_rtx_fmt_e (unary_op, V4SFmode, result_lo)));
|
||||
}
|
||||
|
||||
/* Combine the 2 V4SFmode operations into one V8HFmode vector. */
|
||||
emit_insn (gen_vec_pack_trunc_v4sf_v8hf (result, result_hi, result_lo));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -62,22 +62,6 @@
|
|||
(V8BF "V4BF")
|
||||
(V8HF "V4HF")])
|
||||
|
||||
;; Unary operators for float16 vectorization.
|
||||
(define_code_iterator FLOAT16_UNARY_OP [abs neg])
|
||||
|
||||
;; Binary operators for float16 vectorization.
|
||||
(define_code_iterator FLOAT16_BINARY_OP [plus minus mult smax smin])
|
||||
|
||||
;; Standard names for the unary/binary/ternary operators
|
||||
(define_code_attr float16_names [(abs "abs")
|
||||
(fma "fma")
|
||||
(plus "add")
|
||||
(minus "sub")
|
||||
(mult "mul")
|
||||
(neg "neg")
|
||||
(smax "smax")
|
||||
(smin "smin")])
|
||||
|
||||
;; UNSPEC constants
|
||||
(define_c_enum "unspec"
|
||||
[UNSPEC_FP16_SHIFT_LEFT_32BIT
|
||||
|
@ -462,7 +446,7 @@
|
|||
|
||||
(define_insn_and_split "*bfloat16_binary_op_internal1"
|
||||
[(set (match_operand:SF 0 "vsx_register_operand")
|
||||
(match_operator:SF 1 "fp16_binary_operator"
|
||||
(match_operator:SF 1 "bfloat16_binary_operator"
|
||||
[(match_operand:SF 2 "bfloat16_v4sf_operand")
|
||||
(match_operand:SF 3 "bfloat16_v4sf_operand")]))]
|
||||
"TARGET_BFLOAT16_HW && can_create_pseudo_p ()
|
||||
|
@ -473,14 +457,14 @@
|
|||
[(pc)]
|
||||
{
|
||||
bfloat16_operation_as_v4sf (GET_CODE (operands[1]), operands[0], operands[2],
|
||||
operands[3], NULL_RTX, FP16_BINARY);
|
||||
operands[3], NULL_RTX, BF16_BINARY);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn_and_split "*bfloat16_binary_op_internal2"
|
||||
[(set (match_operand:BF 0 "vsx_register_operand")
|
||||
(float_truncate:BF
|
||||
(match_operator:SF 1 "fp16_binary_operator"
|
||||
(match_operator:SF 1 "bfloat16_binary_operator"
|
||||
[(match_operand:SF 2 "bfloat16_v4sf_operand")
|
||||
(match_operand:SF 3 "bfloat16_v4sf_operand")])))]
|
||||
"TARGET_BFLOAT16_HW && can_create_pseudo_p ()
|
||||
|
@ -491,7 +475,7 @@
|
|||
[(pc)]
|
||||
{
|
||||
bfloat16_operation_as_v4sf (GET_CODE (operands[1]), operands[0], operands[2],
|
||||
operands[3], NULL_RTX, FP16_BINARY);
|
||||
operands[3], NULL_RTX, BF16_BINARY);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -510,7 +494,7 @@
|
|||
[(pc)]
|
||||
{
|
||||
bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
|
||||
operands[3], FP16_FMA);
|
||||
operands[3], BF16_FMA);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -530,7 +514,7 @@
|
|||
[(pc)]
|
||||
{
|
||||
bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
|
||||
operands[3], FP16_FMA);
|
||||
operands[3], BF16_FMA);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -550,7 +534,7 @@
|
|||
[(pc)]
|
||||
{
|
||||
bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
|
||||
operands[3], FP16_FMS);
|
||||
operands[3], BF16_FMS);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -571,7 +555,7 @@
|
|||
[(pc)]
|
||||
{
|
||||
bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
|
||||
operands[3], FP16_FMS);
|
||||
operands[3], BF16_FMS);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -591,7 +575,7 @@
|
|||
[(pc)]
|
||||
{
|
||||
bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
|
||||
operands[3], FP16_NFMA);
|
||||
operands[3], BF16_NFMA);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -612,7 +596,7 @@
|
|||
[(pc)]
|
||||
{
|
||||
bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
|
||||
operands[3], FP16_NFMA);
|
||||
operands[3], BF16_NFMA);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -633,7 +617,7 @@
|
|||
[(pc)]
|
||||
{
|
||||
bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
|
||||
operands[3], FP16_NFMA);
|
||||
operands[3], BF16_NFMA);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -654,7 +638,7 @@
|
|||
[(pc)]
|
||||
{
|
||||
bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
|
||||
operands[3], FP16_NFMS);
|
||||
operands[3], BF16_NFMS);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -676,7 +660,7 @@
|
|||
[(pc)]
|
||||
{
|
||||
bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
|
||||
operands[3], FP16_NFMS);
|
||||
operands[3], BF16_NFMS);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -698,151 +682,10 @@
|
|||
[(pc)]
|
||||
{
|
||||
bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
|
||||
operands[3], FP16_NFMS);
|
||||
operands[3], BF16_NFMS);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
||||
;; Add vectorization support for _Float16. Unfortunately, since there
|
||||
;; can only be one vec_pack_trunc_v4sf, we choose to support automatic
|
||||
;; vectorization for BFmode. The following insns define vectorization
|
||||
;; for HFmode.
|
||||
|
||||
;; Unary operators being vectorized.
|
||||
(define_insn_and_split "<float16_names>v8hf3"
|
||||
[(set (match_operand:V8HF 0 "vsx_register_operand")
|
||||
(FLOAT16_UNARY_OP:V8HF
|
||||
(match_operand:V8HF 1 "vsx_register_operand")))]
|
||||
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(pc)]
|
||||
{
|
||||
float16_vectorization (<CODE>, operands[0], operands[1], NULL_RTX, NULL_RTX,
|
||||
FP16_UNARY);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Binary operators being vectorized.
|
||||
(define_insn_and_split "<float16_names>v8hf3"
|
||||
[(set (match_operand:V8HF 0 "vsx_register_operand")
|
||||
(FLOAT16_BINARY_OP:V8HF
|
||||
(match_operand:V8HF 1 "vsx_register_operand")
|
||||
(match_operand:V8HF 2 "vsx_register_operand")))]
|
||||
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(pc)]
|
||||
{
|
||||
float16_vectorization (<CODE>, operands[0], operands[1], operands[2],
|
||||
NULL_RTX, FP16_BINARY);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Negative of binary operators being vectorized.
|
||||
(define_insn_and_split "*neg_<float16_names>v8hf3"
|
||||
[(set (match_operand:V8HF 0 "vsx_register_operand")
|
||||
(neg:V8HF
|
||||
(FLOAT16_BINARY_OP:V8HF
|
||||
(match_operand:V8HF 1 "vsx_register_operand")
|
||||
(match_operand:V8HF 2 "vsx_register_operand"))))]
|
||||
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(pc)]
|
||||
{
|
||||
float16_vectorization (<CODE>, operands[0], operands[1], operands[2],
|
||||
NULL_RTX, FP16_NEG_BINARY);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Absolute value of binary operators being vectorized.
|
||||
(define_insn_and_split "*abs_<float16_names>v8hf3"
|
||||
[(set (match_operand:V8HF 0 "vsx_register_operand")
|
||||
(abs:V8HF
|
||||
(FLOAT16_BINARY_OP:V8HF
|
||||
(match_operand:V8HF 1 "vsx_register_operand")
|
||||
(match_operand:V8HF 2 "vsx_register_operand"))))]
|
||||
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(pc)]
|
||||
{
|
||||
float16_vectorization (<CODE>, operands[0], operands[1], operands[2],
|
||||
NULL_RTX, FP16_ABS_BINARY);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; FMA operations being vectorized.
|
||||
(define_insn_and_split "fmav8hf3"
|
||||
[(set (match_operand:V8HF 0 "vsx_register_operand")
|
||||
(fma:V8HF
|
||||
(match_operand:V8HF 1 "vsx_register_operand")
|
||||
(match_operand:V8HF 2 "vsx_register_operand")
|
||||
(match_operand:V8HF 3 "vsx_register_operand")))]
|
||||
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(pc)]
|
||||
{
|
||||
float16_vectorization (FMA, operands[0], operands[1], operands[2],
|
||||
operands[3], FP16_FMA);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn_and_split "*fmsv8hf3"
|
||||
[(set (match_operand:V8HF 0 "vsx_register_operand")
|
||||
(fma:V8HF
|
||||
(match_operand:V8HF 1 "vsx_register_operand")
|
||||
(match_operand:V8HF 2 "vsx_register_operand")
|
||||
(neg:V8HF
|
||||
(match_operand:V8HF 3 "vsx_register_operand"))))]
|
||||
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(pc)]
|
||||
{
|
||||
float16_vectorization (FMA, operands[0], operands[1], operands[2],
|
||||
operands[3], FP16_FMS);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn_and_split "*nfmav8hf3"
|
||||
[(set (match_operand:V8HF 0 "vsx_register_operand")
|
||||
(neg:V8HF
|
||||
(fma:V8HF
|
||||
(match_operand:V8HF 1 "vsx_register_operand")
|
||||
(match_operand:V8HF 2 "vsx_register_operand")
|
||||
(match_operand:V8HF 3 "vsx_register_operand"))))]
|
||||
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(pc)]
|
||||
{
|
||||
float16_vectorization (FMA, operands[0], operands[1], operands[2],
|
||||
operands[3], FP16_NFMA);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn_and_split "*nfmsv8hf3"
|
||||
[(set (match_operand:V8HF 0 "vsx_register_operand")
|
||||
(neg:V8HF
|
||||
(fma:V8HF
|
||||
(match_operand:V8HF 1 "vsx_register_operand")
|
||||
(match_operand:V8HF 2 "vsx_register_operand")
|
||||
(neg:V8HF
|
||||
(match_operand:V8HF 3 "vsx_register_operand")))))]
|
||||
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(pc)]
|
||||
{
|
||||
float16_vectorization (FMA, operands[0], operands[1], operands[2],
|
||||
operands[3], FP16_NFMS);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
||||
;; If we do multiple __bfloat16 operations, between the first and
|
||||
;; second operation, GCC will want to convert the first operation from
|
||||
;; V4SFmode to SFmode and then reconvert it back to V4SFmode. On the
|
||||
|
|
|
@ -2208,7 +2208,7 @@
|
|||
;; the operation in vector mode rather than convverting the BFmode to a
|
||||
;; V8BFmode vector, converting that V8BFmode vector to V4SFmode, and
|
||||
;; then converting the V4SFmode element to SFmode scalar.
|
||||
(define_predicate "fp16_binary_operator"
|
||||
(define_predicate "bfloat16_binary_operator"
|
||||
(match_code "plus,minus,mult,smax,smin"))
|
||||
|
||||
;; Match bfloat16/float operands that can be optimized to do the
|
||||
|
|
|
@ -260,22 +260,17 @@ extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *);
|
|||
extern unsigned constant_generates_xxspltidp (vec_const_128bit_type *);
|
||||
|
||||
/* From float16.cc. */
|
||||
/* Optimize bfloat16 and float16 operations. */
|
||||
enum fp16_operation {
|
||||
FP16_UNARY, /* Bfloat16/float16 unary op. */
|
||||
FP16_BINARY, /* Bfloat16/float16 binary op. */
|
||||
FP16_ABS_BINARY, /* abs (binary op). */
|
||||
FP16_NEG_BINARY, /* - binary op. */
|
||||
FP16_FMA, /* (a * b) + c. */
|
||||
FP16_FMS, /* (a * b) - c. */
|
||||
FP16_NFMA, /* - ((a * b) + c). */
|
||||
FP16_NFMS /* - ((a * b) - c). */
|
||||
/* Optimize bfloat16 operations. */
|
||||
enum bfloat16_operation {
|
||||
BF16_BINARY, /* Bfloat16 binary op. */
|
||||
BF16_FMA, /* (a * b) + c. */
|
||||
BF16_FMS, /* (a * b) - c. */
|
||||
BF16_NFMA, /* - ((a * b) + c). */
|
||||
BF16_NFMS /* - ((a * b) - c). */
|
||||
};
|
||||
|
||||
extern void bfloat16_operation_as_v4sf (enum rtx_code, rtx, rtx, rtx, rtx,
|
||||
enum fp16_operation);
|
||||
extern void float16_vectorization (enum rtx_code, rtx, rtx, rtx, rtx,
|
||||
enum fp16_operation);
|
||||
enum bfloat16_operation);
|
||||
#endif /* RTX_CODE */
|
||||
|
||||
#ifdef TREE_CODE
|
||||
|
|
Loading…
Reference in New Issue