Many more comments. Use a vec<bool> when we only care about 0/1.

2023-03-21 22:45:51 -06:00 · 2023-03-21 22:45:51 -06:00 · 4953a04ee6
parent 42a2a810cd
commit 4953a04ee6
2 changed files with 79 additions and 8 deletions
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@ -702,7 +702,19 @@
      rtx t1 = force_reg (word_mode, operands[3]);
      a0 = force_reg (word_mode, gen_rtx_XOR (word_mode, a0, a1));
      a0 = force_reg (word_mode, gen_rtx_CLMUL (word_mode, a0, q_reg));
+
+      /* XXX By adjusting Q we may be able to eliminate this shift.  The size
+         of this shift seems to be dependent on the size of the CRC
+         output (aka N in N-bit CRC).  */
      a0 = force_reg (word_mode, gen_rtx_ASHIFT (word_mode, a0, GEN_INT (16)));
+
+      /* CCC By adjusting operands[3] (which should be a constant) we may
+         be able to utilize CLMULH to get the bits in the right place and
+         avoid the shifts to extract the bitfield.   If that is not possible
+         the shifts will still be needed and are dependent on input/output
+         sizes as well.   Does adjusting the constant and shift counts
+         result in a constant that is more likely to bt synthesized in a
+         single instruction?  */
      a0 = force_reg (word_mode, gen_rtx_CLMUL (word_mode, a0, t1));
      a0 = force_reg (word_mode, gen_rtx_LSHIFTRT (word_mode, a0, GEN_INT (24)));
      a0 = force_reg (word_mode, gen_rtx_ASHIFT (word_mode, a0, GEN_INT (24)));
@ -718,7 +730,13 @@
  else
    {
      /* If we do not have the ZBC extension (ie, no clmul), then
-         use a table based algorithm to implement the CRC.  */
+         use a table based algorithm to implement the CRC. 
+
+         XXX What is the size of each element in this table and
+         how many entries are in the table?  Does the element
+         size or number of elements vary based on the input or
+         output types for the CRC function?   If so, do we need
+         to restrict it to only be used for certain modes?  */
      expand_crc_table_based (operands, QImode);
    }

--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@ -6902,19 +6902,21 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
  return shamt == ctz_hwi (mask);
 }

-/* Calculate the quotient of polynomial long division of x^2n by the polynomial
+/* Return the quotient of polynomial long division of x^2n by POLYNOMIAL
   in GF (2^n).  */

 unsigned HOST_WIDE_INT
 gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial)
 {
-  vec<short> x2n, pol, q;
+  vec<bool>pol;
+  vec<short> x2n, q;
+
  // Create vector of bits, for the polynomial.
  pol.create (sizeof (polynomial) * BITS_PER_UNIT + 1);
  size_t n = 1;
  for ( ; polynomial; n++)
    {
-      pol.quick_push (polynomial&1);
+      pol.quick_push (polynomial & 1);
      polynomial >>= 1;
    }
  pol.quick_push (1);
@ -6923,6 +6925,8 @@ gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial)
  x2n.create (2 * n - 1);
  for (size_t i = 0; i < 2 * (n - 1); i++)
    x2n.safe_push (0);
+
+  /* Is this the implicit bit on at the top of the poly?  */
  x2n.safe_push (1);

  q.create (n);
@ -6952,6 +6956,9 @@ gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial)
 }

 /* Calculates reciprocal CRC for initial CRC and given polynomial.  */
+/* XXX Is this needed?  It's not referenced anywhere. 
+   If it is needed, it needs to be generalized rather than only
+   working on uint16_t.  */
 static uint16_t
 generate_crc_reciprocal (uint16_t crc,
 			 uint16_t polynomial)
@ -6967,6 +6974,8 @@ generate_crc_reciprocal (uint16_t crc,
 }

 /* Calculates CRC for initial CRC and given polynomial.  */
+/* XXX This needs to be generalized rather than only working
+   on uint16_t.  */
 static uint16_t
 generate_crc (uint16_t crc,
 	      uint16_t polynomial)
@ -6983,6 +6992,19 @@ generate_crc (uint16_t crc,
 }

 /* Generates 16-bit CRC table.  */
+/* XXX This needs to be generalized rather than only working
+   on uint16_t.
+
+   This looks like it tries to share tables which is good, but
+   don't we have to verify that the polynomial and sizes are the
+   same for sharing to be safe?  Doesn't that in turn argue that
+   the polynomial and size should be encoded into the table
+   name? 
+
+   Presumably the table should be going into a readonly data
+   section.  It doesn't look like we make any attempt to switch
+   sections.  Mixing code and data is exceedingly bad on
+   modern processors.  */
 rtx
 generate_crc16_table (uint16_t polynom)
 {
@ -7016,27 +7038,43 @@ generate_crc16_table (uint16_t polynom)
  return lab;
 }

-void reflect (rtx op1, machine_mode mode)
+/* XXX Is this needed?  It's not referenced anywhere.   */
+void
+reflect (rtx op1, machine_mode mode)
 {
  // Reflect the bits
  op1 = gen_rtx_BSWAP (mode, op1);

-// Adjust the position of the reflected bits
+  // Adjust the position of the reflected bits
+  /* XXX I don't understand the comment.  Under what
+     conditions does mode != Pmode?  */
  if (mode != Pmode)
    op1 = gen_rtx_SUBREG (Pmode, op1, 0);

-// Shift the reflected bits to the least significant end
+  // Shift the reflected bits to the least significant end
+  /* XXX This seems to assume we're always dealing with
+     a 16bit quantity.  */
  rtx shift_amt = gen_rtx_CONST_INT (Pmode, 8);
  op1 = gen_rtx_LSHIFTRT (Pmode, op1, shift_amt);
+
+  /* XXX This routine is going to have no impact if it was
+     ever used.  Changing OP1 above isn't reflected into
+     the caller.  */
 }

 /* Generate table based CRC code.  */
+/* XXX This doesn't seem to be used.  Is it the case that we're
+   eventually going to need to distinguish between a bit-reflected
+   CRC and a normal CRC for table based variants?  If so, doesn't
+   that need to be in the operands for the .CRC IFN?  */
 void
-expand_crc_table_based_reflected (rtx *operands,  machine_mode data_mode)
+expand_crc_table_based_reflected (rtx *operands, machine_mode data_mode)
 {
  machine_mode mode = GET_MODE (operands[0]);
  rtx in = force_reg (mode, gen_rtx_XOR (mode, operands[1], operands[2]));
  rtx ix = gen_rtx_AND (mode, in, GEN_INT (GET_MODE_MASK (data_mode)));
+  /* XXX Under what conditions will mode != Pmode be true?  Is this an
+     artifact of having the modes wrong for the crc expander?  */
  if (mode != Pmode)
    ix = gen_rtx_SUBREG (Pmode, ix, 0);
  ix = gen_rtx_ASHIFT (Pmode, ix, GEN_INT (exact_log2 (GET_MODE_SIZE (mode)
@ -7048,6 +7086,15 @@ expand_crc_table_based_reflected (rtx *operands,  machine_mode data_mode)
  rtx high = gen_rtx_LSHIFTRT (mode, operands[1],
 			       GEN_INT (data_mode));
  rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, high));
+
+  /* XXX In general we prefer to avoid SUBREGs, especially
+     paradoxical subregs (outer mode is wider than inner mode).
+
+     It should be possible to replace a paradoxical subreg with
+     a sign or zero extension.
+
+     If this is a narrowing subreg, then gen_lowpart might be
+     better.  */
  riscv_emit_move (operands[0], gen_rtx_SUBREG (mode, crc, 0));
 }

@ -7060,6 +7107,8 @@ expand_crc_table_based (rtx *operands,  machine_mode data_mode)
 			       GEN_INT (8));
  rtx in = force_reg (mode, gen_rtx_XOR (mode, op1, operands[2]));
  rtx ix = gen_rtx_AND (mode, in, GEN_INT (GET_MODE_MASK (data_mode)));
+  /* XXX Under what conditions will mode != Pmode be true?  Is this an
+     artifact of having the modes wrong for the crc expander?  */
  if (mode != Pmode)
    ix = gen_rtx_SUBREG (Pmode, ix, 0);
  ix = gen_rtx_ASHIFT (Pmode, ix, GEN_INT (exact_log2 (GET_MODE_SIZE (mode)
@ -7072,6 +7121,10 @@ expand_crc_table_based (rtx *operands,  machine_mode data_mode)
 			       GEN_INT (8));
  high = force_reg (mode, gen_rtx_AND (mode, high, GEN_INT (65535)));
  rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, high));
+
+  /* Why is this different than the reflected version above?  Doesn't
+     it have the same potential concers WRT mismatched modes between
+     these two objects?  */
  riscv_emit_move (operands[0], crc);
 }