mirror of git://gcc.gnu.org/git/gcc.git
Add sparc vec_perm patterns when VIS2.
* config/sparc/sparc.md (vec_perm_constv8qi, vec_perm<mode>): New patterns. * config/sparc/sparc.c (sparc_expand_vec_perm_bmask): New function. * config/sparc/sparc-protos.h (sparc_expand_vec_perm_bmask): Declare. From-SVN: r180119
This commit is contained in:
parent
d62fc6ed20
commit
9d4dedaa7b
|
@ -53,6 +53,11 @@
|
||||||
* config/sparc/sparc.md: Use register_or_zero_operand where rJ
|
* config/sparc/sparc.md: Use register_or_zero_operand where rJ
|
||||||
is the constraint.
|
is the constraint.
|
||||||
|
|
||||||
|
* config/sparc/sparc.md (vec_perm_constv8qi, vec_perm<mode>): New
|
||||||
|
patterns.
|
||||||
|
* config/sparc/sparc.c (sparc_expand_vec_perm_bmask): New function.
|
||||||
|
* config/sparc/sparc-protos.h (sparc_expand_vec_perm_bmask): Declare.
|
||||||
|
|
||||||
2011-10-17 David S. Miller <davem@davemloft.net>
|
2011-10-17 David S. Miller <davem@davemloft.net>
|
||||||
|
|
||||||
* config/sparc/sparc-modes.def: Add single entry vector modes for
|
* config/sparc/sparc-modes.def: Add single entry vector modes for
|
||||||
|
|
|
@ -107,6 +107,7 @@ extern rtx gen_df_reg (rtx, int);
|
||||||
extern void sparc_expand_compare_and_swap_12 (rtx, rtx, rtx, rtx);
|
extern void sparc_expand_compare_and_swap_12 (rtx, rtx, rtx, rtx);
|
||||||
extern const char *output_v8plus_mult (rtx, rtx *, const char *);
|
extern const char *output_v8plus_mult (rtx, rtx *, const char *);
|
||||||
extern void sparc_expand_vector_init (rtx, rtx);
|
extern void sparc_expand_vector_init (rtx, rtx);
|
||||||
|
extern void sparc_expand_vec_perm_bmask(enum machine_mode, rtx);
|
||||||
#endif /* RTX_CODE */
|
#endif /* RTX_CODE */
|
||||||
|
|
||||||
#endif /* __SPARC_PROTOS_H__ */
|
#endif /* __SPARC_PROTOS_H__ */
|
||||||
|
|
|
@ -10863,6 +10863,113 @@ sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval)
|
||||||
emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
|
emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
|
||||||
|
{
|
||||||
|
rtx t_1, t_2, t_3;
|
||||||
|
|
||||||
|
sel = gen_lowpart (DImode, sel);
|
||||||
|
switch (vmode)
|
||||||
|
{
|
||||||
|
case V2SImode:
|
||||||
|
/* inp = xxxxxxxAxxxxxxxB */
|
||||||
|
t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
/* t_1 = ....xxxxxxxAxxx. */
|
||||||
|
sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
|
||||||
|
GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
|
||||||
|
GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
/* sel = .......B */
|
||||||
|
/* t_1 = ...A.... */
|
||||||
|
sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
|
||||||
|
/* sel = ...A...B */
|
||||||
|
sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
|
||||||
|
/* sel = AAAABBBB * 4 */
|
||||||
|
t_1 = force_reg (SImode, GEN_INT (0x01230123));
|
||||||
|
/* sel = { A*4, A*4+1, A*4+2, ... } */
|
||||||
|
break;
|
||||||
|
|
||||||
|
case V4HImode:
|
||||||
|
/* inp = xxxAxxxBxxxCxxxD */
|
||||||
|
t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
/* t_1 = ..xxxAxxxBxxxCxx */
|
||||||
|
/* t_2 = ....xxxAxxxBxxxC */
|
||||||
|
/* t_3 = ......xxxAxxxBxx */
|
||||||
|
sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
|
||||||
|
GEN_INT (0x07),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
|
||||||
|
GEN_INT (0x0700),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
|
||||||
|
GEN_INT (0x070000),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
|
||||||
|
GEN_INT (0x07000000),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
/* sel = .......D */
|
||||||
|
/* t_1 = .....C.. */
|
||||||
|
/* t_2 = ...B.... */
|
||||||
|
/* t_3 = .A...... */
|
||||||
|
sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
|
||||||
|
t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
|
||||||
|
sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
|
||||||
|
/* sel = .A.B.C.D */
|
||||||
|
sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
|
||||||
|
/* sel = AABBCCDD * 2 */
|
||||||
|
t_1 = force_reg (SImode, GEN_INT (0x01010101));
|
||||||
|
/* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
|
||||||
|
break;
|
||||||
|
|
||||||
|
case V8QImode:
|
||||||
|
/* input = xAxBxCxDxExFxGxH */
|
||||||
|
sel = expand_simple_binop (DImode, AND, sel,
|
||||||
|
GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
|
||||||
|
| 0x0f0f0f0f),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
/* sel = .A.B.C.D.E.F.G.H */
|
||||||
|
t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
/* t_1 = ..A.B.C.D.E.F.G. */
|
||||||
|
sel = expand_simple_binop (DImode, IOR, sel, t_1,
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
/* sel = .AABBCCDDEEFFGGH */
|
||||||
|
sel = expand_simple_binop (DImode, AND, sel,
|
||||||
|
GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
|
||||||
|
| 0xff00ff),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
/* sel = ..AB..CD..EF..GH */
|
||||||
|
t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
/* t_1 = ....AB..CD..EF.. */
|
||||||
|
sel = expand_simple_binop (DImode, IOR, sel, t_1,
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
/* sel = ..ABABCDCDEFEFGH */
|
||||||
|
sel = expand_simple_binop (DImode, AND, sel,
|
||||||
|
GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
/* sel = ....ABCD....EFGH */
|
||||||
|
t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
/* t_1 = ........ABCD.... */
|
||||||
|
sel = gen_lowpart (SImode, sel);
|
||||||
|
t_1 = gen_lowpart (SImode, t_1);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Always perform the final addition/merge within the bmask insn. */
|
||||||
|
emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
|
||||||
|
}
|
||||||
|
|
||||||
/* Implement TARGET_FRAME_POINTER_REQUIRED. */
|
/* Implement TARGET_FRAME_POINTER_REQUIRED. */
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
|
|
@ -8350,6 +8350,43 @@
|
||||||
[(set_attr "type" "fga")
|
[(set_attr "type" "fga")
|
||||||
(set_attr "fptype" "double")])
|
(set_attr "fptype" "double")])
|
||||||
|
|
||||||
|
;; The rtl expanders will happily convert constant permutations on other
|
||||||
|
;; modes down to V8QI. Rely on this to avoid the complexity of the byte
|
||||||
|
;; order of the permutation.
|
||||||
|
(define_expand "vec_perm_constv8qi"
|
||||||
|
[(match_operand:V8QI 0 "register_operand" "")
|
||||||
|
(match_operand:V8QI 1 "register_operand" "")
|
||||||
|
(match_operand:V8QI 2 "register_operand" "")
|
||||||
|
(match_operand:V8QI 3 "" "")]
|
||||||
|
"TARGET_VIS2"
|
||||||
|
{
|
||||||
|
unsigned int i, mask;
|
||||||
|
rtx sel = operands[3];
|
||||||
|
|
||||||
|
for (i = mask = 0; i < 8; ++i)
|
||||||
|
mask |= (INTVAL (XVECEXP (sel, 0, i)) & 0xf) << (28 - i*4);
|
||||||
|
sel = force_reg (SImode, gen_int_mode (mask, SImode));
|
||||||
|
|
||||||
|
emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
|
||||||
|
emit_insn (gen_bshufflev8qi_vis (operands[0], operands[1], operands[2]));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
;; Unlike constant permutation, we can vastly simplify the compression of
|
||||||
|
;; the 64-bit selector input to the 32-bit %gsr value by knowing what the
|
||||||
|
;; width of the input is.
|
||||||
|
(define_expand "vec_perm<mode>"
|
||||||
|
[(match_operand:VM64 0 "register_operand" "")
|
||||||
|
(match_operand:VM64 1 "register_operand" "")
|
||||||
|
(match_operand:VM64 2 "register_operand" "")
|
||||||
|
(match_operand:VM64 3 "register_operand" "")]
|
||||||
|
"TARGET_VIS2"
|
||||||
|
{
|
||||||
|
sparc_expand_vec_perm_bmask (<MODE>mode, operands[3]);
|
||||||
|
emit_insn (gen_bshuffle<mode>_vis (operands[0], operands[1], operands[2]));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
;; VIS 2.0 adds edge variants which do not set the condition codes
|
;; VIS 2.0 adds edge variants which do not set the condition codes
|
||||||
(define_insn "edge8n<P:mode>_vis"
|
(define_insn "edge8n<P:mode>_vis"
|
||||||
[(set (match_operand:P 0 "register_operand" "=r")
|
[(set (match_operand:P 0 "register_operand" "=r")
|
||||||
|
|
Loading…
Reference in New Issue