Delete _Float16 -Ofast vectorization support.

2025-10-15  Michael Meissner  <meissner@linux.ibm.com>

gcc/

	* config/rs6000/float16.cc (bfloat16_operation_as_v4sf): Delete fp16
	operation cases only used by _Float16 vectorization.
	(float16_vectorization): Delete function.
	* config/rs6000/float16.md (VFP16): Delete.
	(FLOAT16_UNARY_OP): Likewise.
	(FLOAT16_BINARY_OP): Likewise.
	(float16_names): Likewise.
	(<float16_names>v8hf3, FLOAT16_UNARY_OP iterator): Likewise.
	(<float16_names>v8hf3, FLOAT16_BINARY_OP iterator): Likewise.
	(neg_<float16_names>v8hf3): Likewise.
	(abs_<float16_names>v8hf): Likewise.
	(fmav8hf3): Likewise.
	(fmsv8hf3): Likewise.
	(nfmav8hf3): Likewise.
	(nfmsv8hf3): Likewise.
	* config/rs6000/rs6000-protos.h (FP16_UNARY): Likewise.
	(FP16_ABS_BINAR): Likewise.
	(FP16_NEG_BINARY): Likewise.
	(float16_vectorization): Likewise.
This commit is contained in:
Michael Meissner 2025-10-15 11:25:38 -04:00
parent 4209980a8c
commit 2e20b2ecd9
3 changed files with 0 additions and 321 deletions

View File

@ -88,9 +88,6 @@ bfloat16_operation_as_v4sf (enum rtx_code icode,
n_opts = 3;
break;
case FP16_UNARY:
case FP16_ABS_BINARY:
case FP16_NEG_BINARY:
default:
gcc_unreachable ();
}
@ -177,9 +174,6 @@ bfloat16_operation_as_v4sf (enum rtx_code icode,
}
break;
case FP16_UNARY:
case FP16_ABS_BINARY:
case FP16_NEG_BINARY:
default:
gcc_unreachable ();
}
@ -197,154 +191,3 @@ bfloat16_operation_as_v4sf (enum rtx_code icode,
else
gcc_unreachable ();
}
/* Expand a _Float16 vector operation:
ICODE: Operation to perform.
RESULT: Result of the operation.
OP1: Input operand1.
OP2: Input operand2.
OP3: Input operand3 or NULL_RTX.
SUBTYPE: Describe the operation. */
void
float16_vectorization (enum rtx_code icode,
rtx result,
rtx op1,
rtx op2,
rtx op3,
enum fp16_operation subtype)
{
gcc_assert (can_create_pseudo_p ());
enum rtx_code unary_op = UNKNOWN;
rtx op_orig[3] = { op1, op2, op3 };
rtx op_hi[3];
rtx op_lo[3];
rtx result_hi;
rtx result_lo;
size_t n_opts;
switch (subtype)
{
case FP16_UNARY:
n_opts = 1;
break;
case FP16_BINARY:
n_opts = 2;
break;
case FP16_ABS_BINARY:
unary_op = ABS;
n_opts = 2;
break;
case FP16_NEG_BINARY:
unary_op = NEG;
n_opts = 2;
break;
case FP16_FMA:
case FP16_FMS:
case FP16_NFMA:
case FP16_NFMS:
n_opts = 3;
break;
default:
gcc_unreachable ();
}
/* Allocate 2 temporaries for the results and the input operands. */
result_hi = gen_reg_rtx (V4SFmode);
result_lo = gen_reg_rtx (V4SFmode);
for (size_t i = 0; i < n_opts; i++)
{
gcc_assert (op_orig[i] != NULL_RTX);
op_hi[i] = gen_reg_rtx (V4SFmode); /* high register. */
op_lo[i] = gen_reg_rtx (V4SFmode); /* low register. */
emit_insn (gen_vec_unpacks_hi_v8hf (op_hi[i], op_orig[i]));
emit_insn (gen_vec_unpacks_lo_v8hf (op_lo[i], op_orig[i]));
}
/* Do 2 sets of V4SFmode operations. */
switch (subtype)
{
case FP16_UNARY:
emit_insn (gen_rtx_SET (result_hi,
gen_rtx_fmt_e (icode, V4SFmode, op_hi[0])));
emit_insn (gen_rtx_SET (result_lo,
gen_rtx_fmt_e (icode, V4SFmode, op_lo[0])));
break;
case FP16_BINARY:
case FP16_ABS_BINARY:
case FP16_NEG_BINARY:
emit_insn (gen_rtx_SET (result_hi,
gen_rtx_fmt_ee (icode, V4SFmode,
op_hi[0],
op_hi[1])));
emit_insn (gen_rtx_SET (result_lo,
gen_rtx_fmt_ee (icode, V4SFmode,
op_lo[0],
op_lo[1])));
break;
case FP16_FMA:
case FP16_FMS:
case FP16_NFMA:
case FP16_NFMS:
{
rtx op1_hi = op_hi[0];
rtx op2_hi = op_hi[1];
rtx op3_hi = op_hi[2];
rtx op1_lo = op_lo[0];
rtx op2_lo = op_lo[1];
rtx op3_lo = op_lo[2];
if (subtype == FP16_FMS || subtype == FP16_NFMS)
{
op3_hi = gen_rtx_NEG (V4SFmode, op3_hi);
op3_lo = gen_rtx_NEG (V4SFmode, op3_lo);
}
rtx op_fma_hi = gen_rtx_FMA (V4SFmode, op1_hi, op2_hi, op3_hi);
rtx op_fma_lo = gen_rtx_FMA (V4SFmode, op1_lo, op2_lo, op3_lo);
if (subtype == FP16_NFMA || subtype == FP16_NFMS)
{
op_fma_hi = gen_rtx_NEG (V4SFmode, op_fma_hi);
op_fma_lo = gen_rtx_NEG (V4SFmode, op_fma_lo);
}
emit_insn (gen_rtx_SET (result_hi, op_fma_hi));
emit_insn (gen_rtx_SET (result_lo, op_fma_lo));
}
break;
default:
gcc_unreachable ();
}
/* Add any unary operator modifications. */
if (unary_op != UNKNOWN)
{
emit_insn (gen_rtx_SET (result_hi,
gen_rtx_fmt_e (unary_op, V4SFmode, result_hi)));
emit_insn (gen_rtx_SET (result_lo,
gen_rtx_fmt_e (unary_op, V4SFmode, result_lo)));
}
/* Combine the 2 V4SFmode operations into one V8HFmode vector. */
emit_insn (gen_vec_pack_trunc_v4sf_v8hf (result, result_hi, result_lo));
return;
}

View File

@ -25,9 +25,6 @@
(define_mode_iterator FP16 [(BF "TARGET_BFLOAT16")
(HF "TARGET_FLOAT16")])
(define_mode_iterator VFP16 [(V8BF "TARGET_BFLOAT16")
(V8HF "TARGET_FLOAT16")])
;; Mode iterator for 16-bit floating point modes on machines with
;; hardware support both as a scalar and as a vector.
(define_mode_iterator FP16_HW [(BF "TARGET_BFLOAT16_HW")
@ -62,22 +59,6 @@
(V8BF "V4BF")
(V8HF "V4HF")])
;; Unary operators for float16 vectorization.
(define_code_iterator FLOAT16_UNARY_OP [abs neg])
;; Binary operators for float16 vectorization.
(define_code_iterator FLOAT16_BINARY_OP [plus minus mult smax smin])
;; Standard names for the unary/binary/ternary operators
(define_code_attr float16_names [(abs "abs")
(fma "fma")
(plus "add")
(minus "sub")
(mult "mul")
(neg "neg")
(smax "smax")
(smin "smin")])
;; UNSPEC constants
(define_c_enum "unspec"
[UNSPEC_FP16_SHIFT_LEFT_32BIT
@ -711,146 +692,6 @@
DONE;
})
;; Add vectorization support for _Float16. Unfortunately, since there
;; can only be one vec_pack_trunc_v4sf, we choose to support automatic
;; vectorization for BFmode. The following insns define vectorization
;; for HFmode.
;; Unary operators being vectorized.
(define_insn_and_split "<float16_names>v8hf3"
[(set (match_operand:V8HF 0 "vsx_register_operand")
(FLOAT16_UNARY_OP:V8HF
(match_operand:V8HF 1 "vsx_register_operand")))]
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
"#"
"&& 1"
[(pc)]
{
float16_vectorization (<CODE>, operands[0], operands[1], NULL_RTX, NULL_RTX,
FP16_UNARY);
DONE;
})
;; Binary operators being vectorized.
(define_insn_and_split "<float16_names>v8hf3"
[(set (match_operand:V8HF 0 "vsx_register_operand")
(FLOAT16_BINARY_OP:V8HF
(match_operand:V8HF 1 "vsx_register_operand")
(match_operand:V8HF 2 "vsx_register_operand")))]
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
"#"
"&& 1"
[(pc)]
{
float16_vectorization (<CODE>, operands[0], operands[1], operands[2],
NULL_RTX, FP16_BINARY);
DONE;
})
;; Negative of binary operators being vectorized.
(define_insn_and_split "*neg_<float16_names>v8hf3"
[(set (match_operand:V8HF 0 "vsx_register_operand")
(neg:V8HF
(FLOAT16_BINARY_OP:V8HF
(match_operand:V8HF 1 "vsx_register_operand")
(match_operand:V8HF 2 "vsx_register_operand"))))]
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
"#"
"&& 1"
[(pc)]
{
float16_vectorization (<CODE>, operands[0], operands[1], operands[2],
NULL_RTX, FP16_NEG_BINARY);
DONE;
})
;; Absolute value of binary operators being vectorized.
(define_insn_and_split "*abs_<float16_names>v8hf3"
[(set (match_operand:V8HF 0 "vsx_register_operand")
(abs:V8HF
(FLOAT16_BINARY_OP:V8HF
(match_operand:V8HF 1 "vsx_register_operand")
(match_operand:V8HF 2 "vsx_register_operand"))))]
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
"#"
"&& 1"
[(pc)]
{
float16_vectorization (<CODE>, operands[0], operands[1], operands[2],
NULL_RTX, FP16_ABS_BINARY);
DONE;
})
;; FMA operations being vectorized.
(define_insn_and_split "fmav8hf3"
[(set (match_operand:V8HF 0 "vsx_register_operand")
(fma:V8HF
(match_operand:V8HF 1 "vsx_register_operand")
(match_operand:V8HF 2 "vsx_register_operand")
(match_operand:V8HF 3 "vsx_register_operand")))]
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
"#"
"&& 1"
[(pc)]
{
float16_vectorization (FMA, operands[0], operands[1], operands[2],
operands[3], FP16_FMA);
DONE;
})
(define_insn_and_split "*fmsv8hf3"
[(set (match_operand:V8HF 0 "vsx_register_operand")
(fma:V8HF
(match_operand:V8HF 1 "vsx_register_operand")
(match_operand:V8HF 2 "vsx_register_operand")
(neg:V8HF
(match_operand:V8HF 3 "vsx_register_operand"))))]
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
"#"
"&& 1"
[(pc)]
{
float16_vectorization (FMA, operands[0], operands[1], operands[2],
operands[3], FP16_FMS);
DONE;
})
(define_insn_and_split "*nfmav8hf3"
[(set (match_operand:V8HF 0 "vsx_register_operand")
(neg:V8HF
(fma:V8HF
(match_operand:V8HF 1 "vsx_register_operand")
(match_operand:V8HF 2 "vsx_register_operand")
(match_operand:V8HF 3 "vsx_register_operand"))))]
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
"#"
"&& 1"
[(pc)]
{
float16_vectorization (FMA, operands[0], operands[1], operands[2],
operands[3], FP16_NFMA);
DONE;
})
(define_insn_and_split "*nfmsv8hf3"
[(set (match_operand:V8HF 0 "vsx_register_operand")
(neg:V8HF
(fma:V8HF
(match_operand:V8HF 1 "vsx_register_operand")
(match_operand:V8HF 2 "vsx_register_operand")
(neg:V8HF
(match_operand:V8HF 3 "vsx_register_operand")))))]
"TARGET_FLOAT16_HW && can_create_pseudo_p ()"
"#"
"&& 1"
[(pc)]
{
float16_vectorization (FMA, operands[0], operands[1], operands[2],
operands[3], FP16_NFMS);
DONE;
})
;; If we do multiple __bfloat16 operations, between the first and
;; second operation, GCC will want to convert the first operation from

View File

@ -262,10 +262,7 @@ extern unsigned constant_generates_xxspltidp (vec_const_128bit_type *);
/* From float16.cc. */
/* Optimize bfloat16 and float16 operations. */
enum fp16_operation {
FP16_UNARY, /* Bfloat16/float16 unary op. */
FP16_BINARY, /* Bfloat16/float16 binary op. */
FP16_ABS_BINARY, /* abs (binary op). */
FP16_NEG_BINARY, /* - binary op. */
FP16_FMA, /* (a * b) + c. */
FP16_FMS, /* (a * b) - c. */
FP16_NFMA, /* - ((a * b) + c). */
@ -274,8 +271,6 @@ enum fp16_operation {
extern void bfloat16_operation_as_v4sf (enum rtx_code, rtx, rtx, rtx, rtx,
enum fp16_operation);
extern void float16_vectorization (enum rtx_code, rtx, rtx, rtx, rtx,
enum fp16_operation);
#endif /* RTX_CODE */
#ifdef TREE_CODE