mirror of git://gcc.gnu.org/git/gcc.git
Many more comments. Use a vec<bool> when we only care about 0/1.
This commit is contained in:
parent
42a2a810cd
commit
4953a04ee6
|
|
@ -702,7 +702,19 @@
|
||||||
rtx t1 = force_reg (word_mode, operands[3]);
|
rtx t1 = force_reg (word_mode, operands[3]);
|
||||||
a0 = force_reg (word_mode, gen_rtx_XOR (word_mode, a0, a1));
|
a0 = force_reg (word_mode, gen_rtx_XOR (word_mode, a0, a1));
|
||||||
a0 = force_reg (word_mode, gen_rtx_CLMUL (word_mode, a0, q_reg));
|
a0 = force_reg (word_mode, gen_rtx_CLMUL (word_mode, a0, q_reg));
|
||||||
|
|
||||||
|
/* XXX By adjusting Q we may be able to eliminate this shift. The size
|
||||||
|
of this shift seems to be dependent on the size of the CRC
|
||||||
|
output (aka N in N-bit CRC). */
|
||||||
a0 = force_reg (word_mode, gen_rtx_ASHIFT (word_mode, a0, GEN_INT (16)));
|
a0 = force_reg (word_mode, gen_rtx_ASHIFT (word_mode, a0, GEN_INT (16)));
|
||||||
|
|
||||||
|
/* CCC By adjusting operands[3] (which should be a constant) we may
|
||||||
|
be able to utilize CLMULH to get the bits in the right place and
|
||||||
|
avoid the shifts to extract the bitfield. If that is not possible
|
||||||
|
the shifts will still be needed and are dependent on input/output
|
||||||
|
sizes as well. Does adjusting the constant and shift counts
|
||||||
|
result in a constant that is more likely to bt synthesized in a
|
||||||
|
single instruction? */
|
||||||
a0 = force_reg (word_mode, gen_rtx_CLMUL (word_mode, a0, t1));
|
a0 = force_reg (word_mode, gen_rtx_CLMUL (word_mode, a0, t1));
|
||||||
a0 = force_reg (word_mode, gen_rtx_LSHIFTRT (word_mode, a0, GEN_INT (24)));
|
a0 = force_reg (word_mode, gen_rtx_LSHIFTRT (word_mode, a0, GEN_INT (24)));
|
||||||
a0 = force_reg (word_mode, gen_rtx_ASHIFT (word_mode, a0, GEN_INT (24)));
|
a0 = force_reg (word_mode, gen_rtx_ASHIFT (word_mode, a0, GEN_INT (24)));
|
||||||
|
|
@ -718,7 +730,13 @@
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* If we do not have the ZBC extension (ie, no clmul), then
|
/* If we do not have the ZBC extension (ie, no clmul), then
|
||||||
use a table based algorithm to implement the CRC. */
|
use a table based algorithm to implement the CRC.
|
||||||
|
|
||||||
|
XXX What is the size of each element in this table and
|
||||||
|
how many entries are in the table? Does the element
|
||||||
|
size or number of elements vary based on the input or
|
||||||
|
output types for the CRC function? If so, do we need
|
||||||
|
to restrict it to only be used for certain modes? */
|
||||||
expand_crc_table_based (operands, QImode);
|
expand_crc_table_based (operands, QImode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6902,19 +6902,21 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
|
||||||
return shamt == ctz_hwi (mask);
|
return shamt == ctz_hwi (mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Calculate the quotient of polynomial long division of x^2n by the polynomial
|
/* Return the quotient of polynomial long division of x^2n by POLYNOMIAL
|
||||||
in GF (2^n). */
|
in GF (2^n). */
|
||||||
|
|
||||||
unsigned HOST_WIDE_INT
|
unsigned HOST_WIDE_INT
|
||||||
gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial)
|
gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial)
|
||||||
{
|
{
|
||||||
vec<short> x2n, pol, q;
|
vec<bool>pol;
|
||||||
|
vec<short> x2n, q;
|
||||||
|
|
||||||
// Create vector of bits, for the polynomial.
|
// Create vector of bits, for the polynomial.
|
||||||
pol.create (sizeof (polynomial) * BITS_PER_UNIT + 1);
|
pol.create (sizeof (polynomial) * BITS_PER_UNIT + 1);
|
||||||
size_t n = 1;
|
size_t n = 1;
|
||||||
for ( ; polynomial; n++)
|
for ( ; polynomial; n++)
|
||||||
{
|
{
|
||||||
pol.quick_push (polynomial&1);
|
pol.quick_push (polynomial & 1);
|
||||||
polynomial >>= 1;
|
polynomial >>= 1;
|
||||||
}
|
}
|
||||||
pol.quick_push (1);
|
pol.quick_push (1);
|
||||||
|
|
@ -6923,6 +6925,8 @@ gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial)
|
||||||
x2n.create (2 * n - 1);
|
x2n.create (2 * n - 1);
|
||||||
for (size_t i = 0; i < 2 * (n - 1); i++)
|
for (size_t i = 0; i < 2 * (n - 1); i++)
|
||||||
x2n.safe_push (0);
|
x2n.safe_push (0);
|
||||||
|
|
||||||
|
/* Is this the implicit bit on at the top of the poly? */
|
||||||
x2n.safe_push (1);
|
x2n.safe_push (1);
|
||||||
|
|
||||||
q.create (n);
|
q.create (n);
|
||||||
|
|
@ -6952,6 +6956,9 @@ gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Calculates reciprocal CRC for initial CRC and given polynomial. */
|
/* Calculates reciprocal CRC for initial CRC and given polynomial. */
|
||||||
|
/* XXX Is this needed? It's not referenced anywhere.
|
||||||
|
If it is needed, it needs to be generalized rather than only
|
||||||
|
working on uint16_t. */
|
||||||
static uint16_t
|
static uint16_t
|
||||||
generate_crc_reciprocal (uint16_t crc,
|
generate_crc_reciprocal (uint16_t crc,
|
||||||
uint16_t polynomial)
|
uint16_t polynomial)
|
||||||
|
|
@ -6967,6 +6974,8 @@ generate_crc_reciprocal (uint16_t crc,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Calculates CRC for initial CRC and given polynomial. */
|
/* Calculates CRC for initial CRC and given polynomial. */
|
||||||
|
/* XXX This needs to be generalized rather than only working
|
||||||
|
on uint16_t. */
|
||||||
static uint16_t
|
static uint16_t
|
||||||
generate_crc (uint16_t crc,
|
generate_crc (uint16_t crc,
|
||||||
uint16_t polynomial)
|
uint16_t polynomial)
|
||||||
|
|
@ -6983,6 +6992,19 @@ generate_crc (uint16_t crc,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Generates 16-bit CRC table. */
|
/* Generates 16-bit CRC table. */
|
||||||
|
/* XXX This needs to be generalized rather than only working
|
||||||
|
on uint16_t.
|
||||||
|
|
||||||
|
This looks like it tries to share tables which is good, but
|
||||||
|
don't we have to verify that the polynomial and sizes are the
|
||||||
|
same for sharing to be safe? Doesn't that in turn argue that
|
||||||
|
the polynomial and size should be encoded into the table
|
||||||
|
name?
|
||||||
|
|
||||||
|
Presumably the table should be going into a readonly data
|
||||||
|
section. It doesn't look like we make any attempt to switch
|
||||||
|
sections. Mixing code and data is exceedingly bad on
|
||||||
|
modern processors. */
|
||||||
rtx
|
rtx
|
||||||
generate_crc16_table (uint16_t polynom)
|
generate_crc16_table (uint16_t polynom)
|
||||||
{
|
{
|
||||||
|
|
@ -7016,27 +7038,43 @@ generate_crc16_table (uint16_t polynom)
|
||||||
return lab;
|
return lab;
|
||||||
}
|
}
|
||||||
|
|
||||||
void reflect (rtx op1, machine_mode mode)
|
/* XXX Is this needed? It's not referenced anywhere. */
|
||||||
|
void
|
||||||
|
reflect (rtx op1, machine_mode mode)
|
||||||
{
|
{
|
||||||
// Reflect the bits
|
// Reflect the bits
|
||||||
op1 = gen_rtx_BSWAP (mode, op1);
|
op1 = gen_rtx_BSWAP (mode, op1);
|
||||||
|
|
||||||
// Adjust the position of the reflected bits
|
// Adjust the position of the reflected bits
|
||||||
|
/* XXX I don't understand the comment. Under what
|
||||||
|
conditions does mode != Pmode? */
|
||||||
if (mode != Pmode)
|
if (mode != Pmode)
|
||||||
op1 = gen_rtx_SUBREG (Pmode, op1, 0);
|
op1 = gen_rtx_SUBREG (Pmode, op1, 0);
|
||||||
|
|
||||||
// Shift the reflected bits to the least significant end
|
// Shift the reflected bits to the least significant end
|
||||||
|
/* XXX This seems to assume we're always dealing with
|
||||||
|
a 16bit quantity. */
|
||||||
rtx shift_amt = gen_rtx_CONST_INT (Pmode, 8);
|
rtx shift_amt = gen_rtx_CONST_INT (Pmode, 8);
|
||||||
op1 = gen_rtx_LSHIFTRT (Pmode, op1, shift_amt);
|
op1 = gen_rtx_LSHIFTRT (Pmode, op1, shift_amt);
|
||||||
|
|
||||||
|
/* XXX This routine is going to have no impact if it was
|
||||||
|
ever used. Changing OP1 above isn't reflected into
|
||||||
|
the caller. */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Generate table based CRC code. */
|
/* Generate table based CRC code. */
|
||||||
|
/* XXX This doesn't seem to be used. Is it the case that we're
|
||||||
|
eventually going to need to distinguish between a bit-reflected
|
||||||
|
CRC and a normal CRC for table based variants? If so, doesn't
|
||||||
|
that need to be in the operands for the .CRC IFN? */
|
||||||
void
|
void
|
||||||
expand_crc_table_based_reflected (rtx *operands, machine_mode data_mode)
|
expand_crc_table_based_reflected (rtx *operands, machine_mode data_mode)
|
||||||
{
|
{
|
||||||
machine_mode mode = GET_MODE (operands[0]);
|
machine_mode mode = GET_MODE (operands[0]);
|
||||||
rtx in = force_reg (mode, gen_rtx_XOR (mode, operands[1], operands[2]));
|
rtx in = force_reg (mode, gen_rtx_XOR (mode, operands[1], operands[2]));
|
||||||
rtx ix = gen_rtx_AND (mode, in, GEN_INT (GET_MODE_MASK (data_mode)));
|
rtx ix = gen_rtx_AND (mode, in, GEN_INT (GET_MODE_MASK (data_mode)));
|
||||||
|
/* XXX Under what conditions will mode != Pmode be true? Is this an
|
||||||
|
artifact of having the modes wrong for the crc expander? */
|
||||||
if (mode != Pmode)
|
if (mode != Pmode)
|
||||||
ix = gen_rtx_SUBREG (Pmode, ix, 0);
|
ix = gen_rtx_SUBREG (Pmode, ix, 0);
|
||||||
ix = gen_rtx_ASHIFT (Pmode, ix, GEN_INT (exact_log2 (GET_MODE_SIZE (mode)
|
ix = gen_rtx_ASHIFT (Pmode, ix, GEN_INT (exact_log2 (GET_MODE_SIZE (mode)
|
||||||
|
|
@ -7048,6 +7086,15 @@ expand_crc_table_based_reflected (rtx *operands, machine_mode data_mode)
|
||||||
rtx high = gen_rtx_LSHIFTRT (mode, operands[1],
|
rtx high = gen_rtx_LSHIFTRT (mode, operands[1],
|
||||||
GEN_INT (data_mode));
|
GEN_INT (data_mode));
|
||||||
rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, high));
|
rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, high));
|
||||||
|
|
||||||
|
/* XXX In general we prefer to avoid SUBREGs, especially
|
||||||
|
paradoxical subregs (outer mode is wider than inner mode).
|
||||||
|
|
||||||
|
It should be possible to replace a paradoxical subreg with
|
||||||
|
a sign or zero extension.
|
||||||
|
|
||||||
|
If this is a narrowing subreg, then gen_lowpart might be
|
||||||
|
better. */
|
||||||
riscv_emit_move (operands[0], gen_rtx_SUBREG (mode, crc, 0));
|
riscv_emit_move (operands[0], gen_rtx_SUBREG (mode, crc, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -7060,6 +7107,8 @@ expand_crc_table_based (rtx *operands, machine_mode data_mode)
|
||||||
GEN_INT (8));
|
GEN_INT (8));
|
||||||
rtx in = force_reg (mode, gen_rtx_XOR (mode, op1, operands[2]));
|
rtx in = force_reg (mode, gen_rtx_XOR (mode, op1, operands[2]));
|
||||||
rtx ix = gen_rtx_AND (mode, in, GEN_INT (GET_MODE_MASK (data_mode)));
|
rtx ix = gen_rtx_AND (mode, in, GEN_INT (GET_MODE_MASK (data_mode)));
|
||||||
|
/* XXX Under what conditions will mode != Pmode be true? Is this an
|
||||||
|
artifact of having the modes wrong for the crc expander? */
|
||||||
if (mode != Pmode)
|
if (mode != Pmode)
|
||||||
ix = gen_rtx_SUBREG (Pmode, ix, 0);
|
ix = gen_rtx_SUBREG (Pmode, ix, 0);
|
||||||
ix = gen_rtx_ASHIFT (Pmode, ix, GEN_INT (exact_log2 (GET_MODE_SIZE (mode)
|
ix = gen_rtx_ASHIFT (Pmode, ix, GEN_INT (exact_log2 (GET_MODE_SIZE (mode)
|
||||||
|
|
@ -7072,6 +7121,10 @@ expand_crc_table_based (rtx *operands, machine_mode data_mode)
|
||||||
GEN_INT (8));
|
GEN_INT (8));
|
||||||
high = force_reg (mode, gen_rtx_AND (mode, high, GEN_INT (65535)));
|
high = force_reg (mode, gen_rtx_AND (mode, high, GEN_INT (65535)));
|
||||||
rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, high));
|
rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, high));
|
||||||
|
|
||||||
|
/* Why is this different than the reflected version above? Doesn't
|
||||||
|
it have the same potential concers WRT mismatched modes between
|
||||||
|
these two objects? */
|
||||||
riscv_emit_move (operands[0], crc);
|
riscv_emit_move (operands[0], crc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue