mirror of git://gcc.gnu.org/git/gcc.git
Fix FMA4 and XOP insns.
2009-12-02 Sebastian Pop <sebastian.pop@amd.com> Richard Henderson <rth@redhat.com> * config/i386/i386-protos.h (ix86_fma4_valid_op_p): Removed. * config/i386/i386.c (ix86_fma4_valid_op_p): Removed. * config/i386/i386.md: Do not use ix86_fma4_valid_op_p. * config/i386/sse.md (fma4_*): Remove alternative with operand 1 matching a memory access. Do not use ix86_fma4_valid_op_p. (xop_*): Same. Do not use ix86_fma4_valid_op_p in FMA4 and XOP splitters. Co-Authored-By: Richard Henderson <rth@redhat.com> From-SVN: r154970
This commit is contained in:
parent
aa356b75ed
commit
4926bb1d60
|
@ -1,3 +1,14 @@
|
||||||
|
2009-12-02 Sebastian Pop <sebastian.pop@amd.com>
|
||||||
|
Richard Henderson <rth@redhat.com>
|
||||||
|
|
||||||
|
* config/i386/i386-protos.h (ix86_fma4_valid_op_p): Removed.
|
||||||
|
* config/i386/i386.c (ix86_fma4_valid_op_p): Removed.
|
||||||
|
* config/i386/i386.md: Do not use ix86_fma4_valid_op_p.
|
||||||
|
* config/i386/sse.md (fma4_*): Remove alternative with operand 1
|
||||||
|
matching a memory access. Do not use ix86_fma4_valid_op_p.
|
||||||
|
(xop_*): Same.
|
||||||
|
Do not use ix86_fma4_valid_op_p in FMA4 and XOP splitters.
|
||||||
|
|
||||||
2009-12-02 Richard Henderson <rth@redhat.com>
|
2009-12-02 Richard Henderson <rth@redhat.com>
|
||||||
|
|
||||||
* config/i386/i386.c (ix86_fixup_binary_operands): For FMA4, force
|
* config/i386/i386.c (ix86_fixup_binary_operands): For FMA4, force
|
||||||
|
|
|
@ -218,8 +218,7 @@ extern void ix86_expand_vector_set (bool, rtx, rtx, int);
|
||||||
extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
|
extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
|
||||||
extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
|
extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
|
||||||
|
|
||||||
extern bool ix86_fma4_valid_op_p (rtx [], rtx, int, bool, int, bool);
|
extern bool ix86_expand_fma4_multiple_memory (rtx [], enum machine_mode);
|
||||||
extern void ix86_expand_fma4_multiple_memory (rtx [], enum machine_mode);
|
|
||||||
|
|
||||||
extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
|
extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
|
||||||
|
|
||||||
|
|
|
@ -28807,197 +28807,35 @@ ix86_expand_round (rtx operand0, rtx operand1)
|
||||||
emit_move_insn (operand0, res);
|
emit_move_insn (operand0, res);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Validate whether a FMA4 instruction is valid or not.
|
|
||||||
OPERANDS is the array of operands.
|
/* Fixup an FMA4 or XOP instruction that has 2 memory input references
|
||||||
NUM is the number of operands.
|
into a form the hardware will allow by using the destination
|
||||||
USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
|
register to load one of the memory operations. Presently this is
|
||||||
NUM_MEMORY is the maximum number of memory operands to accept.
|
used by the multiply/add routines to allow 2 memory references. */
|
||||||
NUM_MEMORY less than zero is a special case to allow an operand
|
|
||||||
of an instruction to be memory operation.
|
|
||||||
when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
ix86_fma4_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
|
|
||||||
bool uses_oc0, int num_memory, bool commutative)
|
|
||||||
{
|
|
||||||
int mem_mask;
|
|
||||||
int mem_count;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* Count the number of memory arguments */
|
|
||||||
mem_mask = 0;
|
|
||||||
mem_count = 0;
|
|
||||||
for (i = 0; i < num; i++)
|
|
||||||
{
|
|
||||||
enum machine_mode mode = GET_MODE (operands[i]);
|
|
||||||
if (register_operand (operands[i], mode))
|
|
||||||
;
|
|
||||||
|
|
||||||
else if (memory_operand (operands[i], mode))
|
|
||||||
{
|
|
||||||
mem_mask |= (1 << i);
|
|
||||||
mem_count++;
|
|
||||||
}
|
|
||||||
|
|
||||||
else
|
|
||||||
{
|
|
||||||
rtx pattern = PATTERN (insn);
|
|
||||||
|
|
||||||
/* allow 0 for pcmov */
|
|
||||||
if (GET_CODE (pattern) != SET
|
|
||||||
|| GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
|
|
||||||
|| i < 2
|
|
||||||
|| operands[i] != CONST0_RTX (mode))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Special case pmacsdq{l,h} where we allow the 3rd argument to be
|
|
||||||
a memory operation. */
|
|
||||||
if (num_memory < 0)
|
|
||||||
{
|
|
||||||
num_memory = -num_memory;
|
|
||||||
if ((mem_mask & (1 << (num-1))) != 0)
|
|
||||||
{
|
|
||||||
mem_mask &= ~(1 << (num-1));
|
|
||||||
mem_count--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If there were no memory operations, allow the insn */
|
|
||||||
if (mem_mask == 0)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
/* Do not allow the destination register to be a memory operand. */
|
|
||||||
else if (mem_mask & (1 << 0))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* If there are too many memory operations, disallow the instruction. While
|
|
||||||
the hardware only allows 1 memory reference, before register allocation
|
|
||||||
for some insns, we allow two memory operations sometimes in order to allow
|
|
||||||
code like the following to be optimized:
|
|
||||||
|
|
||||||
float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
|
|
||||||
|
|
||||||
or similar cases that are vectorized into using the vfmaddss
|
|
||||||
instruction. */
|
|
||||||
else if (mem_count > num_memory)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* Don't allow more than one memory operation if not optimizing. */
|
|
||||||
else if (mem_count > 1 && !optimize)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
else if (num == 4 && mem_count == 1)
|
|
||||||
{
|
|
||||||
/* formats (destination is the first argument), example vfmaddss:
|
|
||||||
xmm1, xmm1, xmm2, xmm3/mem
|
|
||||||
xmm1, xmm1, xmm2/mem, xmm3
|
|
||||||
xmm1, xmm2, xmm3/mem, xmm1
|
|
||||||
xmm1, xmm2/mem, xmm3, xmm1 */
|
|
||||||
if (uses_oc0)
|
|
||||||
return ((mem_mask == (1 << 1))
|
|
||||||
|| (mem_mask == (1 << 2))
|
|
||||||
|| (mem_mask == (1 << 3)));
|
|
||||||
|
|
||||||
/* format, example vpmacsdd:
|
|
||||||
xmm1, xmm2, xmm3/mem, xmm1 */
|
|
||||||
if (commutative)
|
|
||||||
return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
|
|
||||||
else
|
|
||||||
return (mem_mask == (1 << 2));
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (num == 4 && num_memory == 2)
|
|
||||||
{
|
|
||||||
/* If there are two memory operations, we can load one of the memory ops
|
|
||||||
into the destination register. This is for optimizing the
|
|
||||||
multiply/add ops, which the combiner has optimized both the multiply
|
|
||||||
and the add insns to have a memory operation. We have to be careful
|
|
||||||
that the destination doesn't overlap with the inputs. */
|
|
||||||
rtx op0 = operands[0];
|
|
||||||
|
|
||||||
if (reg_mentioned_p (op0, operands[1])
|
|
||||||
|| reg_mentioned_p (op0, operands[2])
|
|
||||||
|| reg_mentioned_p (op0, operands[3]))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* formats (destination is the first argument), example vfmaddss:
|
|
||||||
xmm1, xmm1, xmm2, xmm3/mem
|
|
||||||
xmm1, xmm1, xmm2/mem, xmm3
|
|
||||||
xmm1, xmm2, xmm3/mem, xmm1
|
|
||||||
xmm1, xmm2/mem, xmm3, xmm1
|
|
||||||
|
|
||||||
For the oc0 case, we will load either operands[1] or operands[3] into
|
|
||||||
operands[0], so any combination of 2 memory operands is ok. */
|
|
||||||
if (uses_oc0)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
/* format, example vpmacsdd:
|
|
||||||
xmm1, xmm2, xmm3/mem, xmm1
|
|
||||||
|
|
||||||
For the integer multiply/add instructions be more restrictive and
|
|
||||||
require operands[2] and operands[3] to be the memory operands. */
|
|
||||||
if (commutative)
|
|
||||||
return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
|
|
||||||
else
|
|
||||||
return (mem_mask == ((1 << 2) | (1 << 3)));
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (num == 3 && num_memory == 1)
|
|
||||||
{
|
|
||||||
/* formats, example vprotb:
|
|
||||||
xmm1, xmm2, xmm3/mem
|
|
||||||
xmm1, xmm2/mem, xmm3 */
|
|
||||||
if (uses_oc0)
|
|
||||||
return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
|
|
||||||
|
|
||||||
/* format, example vpcomeq:
|
|
||||||
xmm1, xmm2, xmm3/mem */
|
|
||||||
else
|
|
||||||
return (mem_mask == (1 << 2));
|
|
||||||
}
|
|
||||||
|
|
||||||
else
|
|
||||||
gcc_unreachable ();
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Fixup an FMA4 instruction that has 2 memory input references into a form the
|
|
||||||
hardware will allow by using the destination register to load one of the
|
|
||||||
memory operations. Presently this is used by the multiply/add routines to
|
|
||||||
allow 2 memory references. */
|
|
||||||
|
|
||||||
void
|
|
||||||
ix86_expand_fma4_multiple_memory (rtx operands[],
|
ix86_expand_fma4_multiple_memory (rtx operands[],
|
||||||
enum machine_mode mode)
|
enum machine_mode mode)
|
||||||
{
|
{
|
||||||
rtx op0 = operands[0];
|
rtx scratch = operands[0];
|
||||||
|
|
||||||
if (memory_operand (op0, mode)
|
gcc_assert (register_operand (operands[0], mode));
|
||||||
|| reg_mentioned_p (op0, operands[1])
|
gcc_assert (register_operand (operands[1], mode));
|
||||||
|| reg_mentioned_p (op0, operands[2])
|
gcc_assert (MEM_P (operands[2]) && MEM_P (operands[3]));
|
||||||
|| reg_mentioned_p (op0, operands[3]))
|
|
||||||
gcc_unreachable ();
|
|
||||||
|
|
||||||
/* For 2 memory operands, pick either operands[1] or operands[3] to move into
|
if (reg_mentioned_p (scratch, operands[1]))
|
||||||
the destination register. */
|
|
||||||
if (memory_operand (operands[1], mode))
|
|
||||||
{
|
{
|
||||||
emit_move_insn (op0, operands[1]);
|
if (!can_create_pseudo_p ())
|
||||||
operands[1] = op0;
|
return false;
|
||||||
}
|
scratch = gen_reg_rtx (mode);
|
||||||
else if (memory_operand (operands[3], mode))
|
|
||||||
{
|
|
||||||
emit_move_insn (op0, operands[3]);
|
|
||||||
operands[3] = op0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
emit_move_insn (scratch, operands[3]);
|
||||||
|
if (rtx_equal_p (operands[2], operands[3]))
|
||||||
|
operands[2] = operands[3] = scratch;
|
||||||
else
|
else
|
||||||
gcc_unreachable ();
|
operands[3] = scratch;
|
||||||
|
return true;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Table of valid machine attributes. */
|
/* Table of valid machine attributes. */
|
||||||
|
|
|
@ -19248,7 +19248,7 @@
|
||||||
(match_operand:MODEF 1 "register_operand" "x")
|
(match_operand:MODEF 1 "register_operand" "x")
|
||||||
(match_operand:MODEF 2 "register_operand" "x")
|
(match_operand:MODEF 2 "register_operand" "x")
|
||||||
(match_operand:MODEF 3 "register_operand" "x")))]
|
(match_operand:MODEF 3 "register_operand" "x")))]
|
||||||
"TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
|
"TARGET_XOP"
|
||||||
"vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
|
"vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
|
||||||
[(set_attr "type" "sse4arg")])
|
[(set_attr "type" "sse4arg")])
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue