Revert sparc vec_init improvements as they cause 64-bit regressions.

gcc/

	Revert
	2011-11-05  David S. Miller  <davem@davemloft.net>

From-SVN: r181283
This commit is contained in:
David S. Miller 2011-11-11 08:23:34 +00:00 committed by David S. Miller
parent ca3e7c9ffd
commit 2b38137d3d
3 changed files with 109 additions and 398 deletions

View File

@ -1,3 +1,8 @@
2011-11-11 David S. Miller <davem@davemloft.net>
Revert
2011-11-05 David S. Miller <davem@davemloft.net>
2011-11-11 Jakub Jelinek <jakub@redhat.com>
* opts-common.c (generate_canonical_option): Free opt_text

View File

@ -11285,357 +11285,88 @@ output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
}
}
/* Subroutine of sparc_expand_vector_init. Emit code to initialize TARGET to
the N_ELTS values for individual fields contained in LOCS by means of VIS2
BSHUFFLE insn. MODE and INNER_MODE are the modes describing TARGET. */
/* Subroutine of sparc_expand_vector_init. Emit code to initialize
all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
and INNER_MODE are the modes describing TARGET. */
static void
vector_init_bshuffle (rtx target, rtx *locs, int n_elts,
enum machine_mode mode,
vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
enum machine_mode inner_mode)
{
rtx mid_target, r0_high, r0_low, r1_high, r1_low;
enum machine_mode partial_mode;
int bmask, i, idxs[8];
rtx t1, final_insn;
int bmask;
partial_mode = (mode == V4HImode
? V2HImode
: (mode == V8QImode
? V4QImode : mode));
t1 = gen_reg_rtx (mode);
r0_high = r0_low = NULL_RTX;
r1_high = r1_low = NULL_RTX;
elt = convert_modes (SImode, inner_mode, elt, true);
emit_move_insn (gen_lowpart(SImode, t1), elt);
/* Move the pieces into place, as needed, and calculate the nibble
indexes for the bmask calculation. After we execute this loop the
locs[] array is no longer needed. Therefore, to simplify things,
we set entries that have been processed already to NULL_RTX. */
for (i = 0; i < n_elts; i++)
switch (mode)
{
int j;
if (locs[i] == NULL_RTX)
continue;
if (!r0_low)
{
r0_low = locs[i];
idxs[i] = 0x7;
}
else if (!r1_low)
{
r1_low = locs[i];
idxs[i] = 0xf;
}
else if (!r0_high)
{
r0_high = gen_highpart (partial_mode, r0_low);
emit_move_insn (r0_high, gen_lowpart (partial_mode, locs[i]));
idxs[i] = 0x3;
}
else if (!r1_high)
{
r1_high = gen_highpart (partial_mode, r1_low);
emit_move_insn (r1_high, gen_lowpart (partial_mode, locs[i]));
idxs[i] = 0xb;
}
else
gcc_unreachable ();
for (j = i + 1; j < n_elts; j++)
{
if (locs[j] == locs[i])
{
locs[j] = NULL_RTX;
idxs[j] = idxs[i];
}
}
locs[i] = NULL_RTX;
}
bmask = 0;
for (i = 0; i < n_elts; i++)
{
int v = idxs[i];
switch (GET_MODE_SIZE (inner_mode))
{
case 2:
bmask <<= 8;
bmask |= (((v - 1) << 4) | v);
break;
case 1:
bmask <<= 4;
bmask |= v;
break;
default:
gcc_unreachable ();
}
case V2SImode:
final_insn = gen_bshufflev2si_vis (target, t1, t1);
bmask = 0x45674567;
break;
case V4HImode:
final_insn = gen_bshufflev4hi_vis (target, t1, t1);
bmask = 0x67676767;
break;
case V8QImode:
final_insn = gen_bshufflev8qi_vis (target, t1, t1);
bmask = 0x77777777;
break;
default:
gcc_unreachable ();
}
emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
force_reg (SImode, GEN_INT (bmask))));
mid_target = target;
if (GET_MODE_SIZE (mode) == 4)
{
mid_target = gen_reg_rtx (mode == V2HImode
? V4HImode : V8QImode);
}
if (!r1_low)
r1_low = r0_low;
switch (GET_MODE (mid_target))
{
case V4HImode:
emit_insn (gen_bshufflev4hi_vis (mid_target, r0_low, r1_low));
break;
case V8QImode:
emit_insn (gen_bshufflev8qi_vis (mid_target, r0_low, r1_low));
break;
default:
gcc_unreachable ();
}
if (mid_target != target)
emit_move_insn (target, gen_lowpart (partial_mode, mid_target));
emit_insn (final_insn);
}
/* Subroutine of sparc_expand_vector_init. Emit code to initialize TARGET to
values for individual fields VALS by means of simple word moves if this is
possible. MODE and INNER_MODE are the modes describing TARGET. Return true
on success. */
static bool
vector_init_move_words (rtx target, rtx vals, enum machine_mode mode,
enum machine_mode inner_mode)
{
switch (mode)
{
case V1SImode:
case V1DImode:
emit_move_insn (gen_lowpart (inner_mode, target),
gen_lowpart (inner_mode, XVECEXP (vals, 0, 0)));
return true;
case V2SImode:
emit_move_insn (gen_highpart (SImode, target), XVECEXP (vals, 0, 0));
emit_move_insn (gen_lowpart (SImode, target), XVECEXP (vals, 0, 1));
return true;
default:
break;
}
return false;
}
/* Subroutine of sparc_expand_vector_init. Move the N_ELTS elements in VALS
into registers compatible with MODE and INNER_MODE. Store the RTX for
these regs into the corresponding array entry of LOCS. */
static void
vector_init_prepare_elts (rtx *locs, rtx vals, int n_elts,
enum machine_mode mode,
enum machine_mode inner_mode)
vector_init_fpmerge (rtx target, rtx elt, enum machine_mode inner_mode)
{
enum machine_mode loc_mode;
int i;
rtx t1, t2, t3, t3_low;
switch (mode)
{
case V2HImode:
loc_mode = V4HImode;
break;
t1 = gen_reg_rtx (V4QImode);
elt = convert_modes (SImode, inner_mode, elt, true);
emit_move_insn (gen_lowpart (SImode, t1), elt);
case V4QImode:
loc_mode = V8QImode;
break;
t2 = gen_reg_rtx (V4QImode);
emit_move_insn (t2, t1);
case V4HImode:
case V8QImode:
loc_mode = mode;
break;
t3 = gen_reg_rtx (V8QImode);
t3_low = gen_lowpart (V4QImode, t3);
default:
gcc_unreachable ();
}
emit_insn (gen_fpmerge_vis (t3, t1, t2));
emit_move_insn (t1, t3_low);
emit_move_insn (t2, t3_low);
gcc_assert (GET_MODE_SIZE (inner_mode) <= 4);
for (i = 0; i < n_elts; i++)
{
rtx dst, elt = XVECEXP (vals, 0, i);
int j;
emit_insn (gen_fpmerge_vis (t3, t1, t2));
emit_move_insn (t1, t3_low);
emit_move_insn (t2, t3_low);
/* Did we see this already? If so just record it's location. */
dst = NULL_RTX;
for (j = 0; j < i; j++)
{
if (XVECEXP (vals, 0, j) == elt)
{
dst = locs[j];
break;
}
}
if (! dst)
{
enum rtx_code code = GET_CODE (elt);
dst = gen_reg_rtx (loc_mode);
/* We use different strategies based upon whether the element
is in memory or in a register. When we start in a register
and we're VIS3 capable, it's always cheaper to use the VIS3
int-->fp register moves since we avoid having to use stack
memory. */
if ((TARGET_VIS3 && (code == REG || code == SUBREG))
|| (CONSTANT_P (elt)
&& (const_zero_operand (elt, inner_mode)
|| const_all_ones_operand (elt, inner_mode))))
{
elt = convert_modes (SImode, inner_mode, elt, true);
emit_clobber (dst);
emit_move_insn (gen_lowpart (SImode, dst), elt);
}
else
{
rtx m = elt;
if (CONSTANT_P (elt))
{
m = force_const_mem (inner_mode, elt);
}
else if (code != MEM)
{
rtx stk
= assign_stack_temp (inner_mode, GET_MODE_SIZE(inner_mode),
0);
emit_move_insn (stk, elt);
m = stk;
}
switch (loc_mode)
{
case V4HImode:
emit_insn (gen_zero_extend_v4hi_vis (dst, m));
break;
case V8QImode:
emit_insn (gen_zero_extend_v8qi_vis (dst, m));
break;
default:
gcc_unreachable ();
}
}
}
locs[i] = dst;
}
emit_insn (gen_fpmerge_vis (gen_lowpart (V8QImode, target), t1, t2));
}
/* Subroutine of sparc_expand_vector_init. Emit code to initialize TARGET to
the N_ELTS values for individual fields contained in LOCS by means of VIS2
instructions, among which N_UNIQUE are unique. MODE and INNER_MODE are the
modes describing TARGET. */
static void
sparc_expand_vector_init_vis2 (rtx target, rtx *locs, int n_elts, int n_unique,
enum machine_mode mode,
enum machine_mode inner_mode)
vector_init_faligndata (rtx target, rtx elt, enum machine_mode inner_mode)
{
if (n_unique <= 4)
{
vector_init_bshuffle (target, locs, n_elts, mode, inner_mode);
}
else
{
int i;
rtx t1 = gen_reg_rtx (V4HImode);
gcc_assert (mode == V8QImode);
elt = convert_modes (SImode, inner_mode, elt, true);
emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
force_reg (SImode, GEN_INT (7)),
CONST0_RTX (SImode)));
i = n_elts - 1;
emit_insn (gen_faligndatav8qi_vis (target, locs[i], locs[i]));
while (--i >= 0)
emit_insn (gen_faligndatav8qi_vis (target, locs[i], target));
}
}
/* Subroutine of sparc_expand_vector_init. Emit code to initialize TARGET to
the N_ELTS values for individual fields contained in LOCS by means of VIS1
instructions, among which N_UNIQUE are unique. MODE is TARGET's mode. */
static void
sparc_expand_vector_init_vis1 (rtx target, rtx *locs, int n_elts, int n_unique,
enum machine_mode mode)
{
enum machine_mode full_mode = mode;
rtx (*emitter)(rtx, rtx, rtx);
int alignaddr_val, i;
rtx tmp = target;
if (n_unique == 1 && mode == V8QImode)
{
rtx t2, t2_low, t1;
t1 = gen_reg_rtx (V4QImode);
emit_move_insn (t1, gen_lowpart (V4QImode, locs[0]));
t2 = gen_reg_rtx (V8QImode);
t2_low = gen_lowpart (V4QImode, t2);
/* xxxxxxAA --> xxxxxxxxxxxxAAAA
xxxxAAAA --> xxxxxxxxAAAAAAAA
AAAAAAAA --> AAAAAAAAAAAAAAAA */
emit_insn (gen_fpmerge_vis (t2, t1, t1));
emit_move_insn (t1, t2_low);
emit_insn (gen_fpmerge_vis (t2, t1, t1));
emit_move_insn (t1, t2_low);
emit_insn (gen_fpmerge_vis (target, t1, t1));
return;
}
switch (mode)
{
case V2HImode:
full_mode = V4HImode;
/* FALLTHRU */
case V4HImode:
emitter = gen_faligndatav4hi_vis;
alignaddr_val = 6;
break;
case V4QImode:
full_mode = V8QImode;
/* FALLTHRU */
case V8QImode:
emitter = gen_faligndatav8qi_vis;
alignaddr_val = 7;
break;
default:
gcc_unreachable ();
}
if (full_mode != mode)
tmp = gen_reg_rtx (full_mode);
emit_move_insn (gen_lowpart (SImode, t1), elt);
emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
force_reg (SImode, GEN_INT (alignaddr_val)),
force_reg (SImode, GEN_INT (6)),
CONST0_RTX (SImode)));
i = n_elts - 1;
emit_insn (emitter (tmp, locs[i], locs[i]));
while (--i >= 0)
emit_insn (emitter (tmp, locs[i], tmp));
if (tmp != target)
emit_move_insn (target, gen_highpart (mode, tmp));
emit_insn (gen_faligndatav4hi_vis (target, t1, target));
emit_insn (gen_faligndatav4hi_vis (target, t1, target));
emit_insn (gen_faligndatav4hi_vis (target, t1, target));
emit_insn (gen_faligndatav4hi_vis (target, t1, target));
}
/* Emit code to initialize TARGET to values for individual fields VALS. */
@ -11646,30 +11377,19 @@ sparc_expand_vector_init (rtx target, rtx vals)
enum machine_mode mode = GET_MODE (target);
enum machine_mode inner_mode = GET_MODE_INNER (mode);
int n_elts = GET_MODE_NUNITS (mode);
int i, n_var = 0, n_unique = 0;
rtx locs[8];
gcc_assert (n_elts <= 8);
int i, n_var = 0;
bool all_same;
rtx mem;
all_same = true;
for (i = 0; i < n_elts; i++)
{
rtx x = XVECEXP (vals, 0, i);
bool found = false;
int j;
if (!CONSTANT_P (x))
n_var++;
for (j = 0; j < i; j++)
{
if (rtx_equal_p (x, XVECEXP (vals, 0, j)))
{
found = true;
break;
}
}
if (!found)
n_unique++;
if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
all_same = false;
}
if (n_var == 0)
@ -11678,16 +11398,56 @@ sparc_expand_vector_init (rtx target, rtx vals)
return;
}
if (vector_init_move_words (target, vals, mode, inner_mode))
return;
if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
{
if (GET_MODE_SIZE (inner_mode) == 4)
{
emit_move_insn (gen_lowpart (SImode, target),
gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
return;
}
else if (GET_MODE_SIZE (inner_mode) == 8)
{
emit_move_insn (gen_lowpart (DImode, target),
gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
return;
}
}
else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
&& GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
{
emit_move_insn (gen_highpart (word_mode, target),
gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
emit_move_insn (gen_lowpart (word_mode, target),
gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
return;
}
vector_init_prepare_elts (locs, vals, n_elts, mode, inner_mode);
if (all_same && GET_MODE_SIZE (mode) == 8)
{
if (TARGET_VIS2)
{
vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
return;
}
if (mode == V8QImode)
{
vector_init_fpmerge (target, XVECEXP (vals, 0, 0), inner_mode);
return;
}
if (mode == V4HImode)
{
vector_init_faligndata (target, XVECEXP (vals, 0, 0), inner_mode);
return;
}
}
if (TARGET_VIS2)
sparc_expand_vector_init_vis2 (target, locs, n_elts, n_unique,
mode, inner_mode);
else
sparc_expand_vector_init_vis1 (target, locs, n_elts, n_unique, mode);
mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
for (i = 0; i < n_elts; i++)
emit_move_insn (adjust_address_nv (mem, inner_mode,
i * GET_MODE_SIZE (inner_mode)),
XVECEXP (vals, 0, i));
emit_move_insn (target, mem);
}
/* Implement TARGET_SECONDARY_RELOAD. */

View File

@ -7830,60 +7830,6 @@
DONE;
})
(define_expand "zero_extend_v8qi_vis"
[(set (match_operand:V8QI 0 "register_operand" "")
(vec_merge:V8QI
(vec_duplicate:V8QI
(match_operand:QI 1 "memory_operand" ""))
(match_dup 2)
(const_int 254)))]
"TARGET_VIS"
{
if (! REG_P (XEXP (operands[1], 0)))
{
rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
operands[1] = replace_equiv_address (operands[1], addr);
}
operands[2] = CONST0_RTX (V8QImode);
})
(define_expand "zero_extend_v4hi_vis"
[(set (match_operand:V4HI 0 "register_operand" "")
(vec_merge:V4HI
(vec_duplicate:V4HI
(match_operand:HI 1 "memory_operand" ""))
(match_dup 2)
(const_int 14)))]
"TARGET_VIS"
{
if (! REG_P (XEXP (operands[1], 0)))
{
rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
operands[1] = replace_equiv_address (operands[1], addr);
}
operands[2] = CONST0_RTX (V4HImode);
})
(define_insn "*zero_extend_v8qi_<P:mode>_insn"
[(set (match_operand:V8QI 0 "register_operand" "=e")
(vec_merge:V8QI
(vec_duplicate:V8QI
(mem:QI (match_operand:P 1 "register_operand" "r")))
(match_operand:V8QI 2 "const_zero_operand" "Y")
(const_int 254)))]
"TARGET_VIS"
"ldda\t[%1] 0xd0, %0")
(define_insn "*zero_extend_v4hi_<P:mode>_insn"
[(set (match_operand:V4HI 0 "register_operand" "=e")
(vec_merge:V4HI
(vec_duplicate:V4HI
(mem:HI (match_operand:P 1 "register_operand" "r")))
(match_operand:V4HI 2 "const_zero_operand" "Y")
(const_int 14)))]
"TARGET_VIS"
"ldda\t[%1] 0xd2, %0")
(define_expand "vec_init<mode>"
[(match_operand:VMALL 0 "register_operand" "")
(match_operand:VMALL 1 "" "")]