lex.c (search_line_fast): Add new version to be used for Power8 and later targets when Altivec is enabled.

2014-10-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * lex.c (search_line_fast): Add new version to be used for Power8 and later targets when Altivec is enabled. Restrict the existing Altivec version to big-endian systems so that lvsr is not used on little endian, where it is deprecated. Remove LE-specific code from the now-BE-only version. From-SVN: r215873
2014-10-03 20:06:38 +00:00 · 2014-10-03 20:06:38 +00:00 · 0ccaaab0e3
parent 3c9aabbde5
commit 0ccaaab0e3
2 changed files with 112 additions and 11 deletions
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@ -1,3 +1,11 @@
 2014-10-03  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
 	* lex.c (search_line_fast): Add new version to be used for Power8
 	and later targets when Altivec is enabled.  Restrict the existing
 	Altivec version to big-endian systems so that lvsr is not used on
 	little endian, where it is deprecated.  Remove LE-specific code
 	from the now-BE-only version.
 2014-10-02  Bernd Edlinger  <bernd.edlinger@hotmail.de>
 	    Jeff Law  <law@redhat.com>
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@ -513,9 +513,111 @@ init_vectorized_lexer (void)
  search_line_fast = impl;
 }
-#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
+#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
-/* A vection of the fast scanner using AltiVec vectorized byte compares.  */
+/* A vection of the fast scanner using AltiVec vectorized byte compares
   and VSX unaligned loads (when VSX is available).  This is otherwise
   the same as the pre-GCC 5 version.  */
 static const uchar *
 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 {
  typedef __attribute__((altivec(vector))) unsigned char vc;
  const vc repl_nl = {
    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', 
    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
  };
  const vc repl_cr = {
    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r', 
    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
  };
  const vc repl_bs = {
    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', 
    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
  };
  const vc repl_qm = {
    '?', '?', '?', '?', '?', '?', '?', '?', 
    '?', '?', '?', '?', '?', '?', '?', '?', 
  };
  const vc zero = { 0 };
  vc data, t;
  /* Main loop processing 16 bytes at a time.  */
  do
    {
      vc m_nl, m_cr, m_bs, m_qm;
      data = *((const vc *)s);
      s += 16;
      m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
      m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
      m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
      m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
      t = (m_nl | m_cr) | (m_bs | m_qm);
      /* T now contains 0xff in bytes for which we matched one of the relevant
 	 characters.  We want to exit the loop if any byte in T is non-zero.
 	 Below is the expansion of vec_any_ne(t, zero).  */
    }
  while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
  /* Restore s to to point to the 16 bytes we just processed.  */
  s -= 16;
  {
 #define N  (sizeof(vc) / sizeof(long))
    union {
      vc v;
      /* Statically assert that N is 2 or 4.  */
      unsigned long l[(N == 2 || N == 4) ? N : -1];
    } u;
    unsigned long l, i = 0;
    u.v = t;
    /* Find the first word of T that is non-zero.  */
    switch (N)
      {
      case 4:
 	l = u.l[i++];
 	if (l != 0)
 	  break;
 	s += sizeof(unsigned long);
 	l = u.l[i++];
 	if (l != 0)
 	  break;
 	s += sizeof(unsigned long);
      case 2:
 	l = u.l[i++];
 	if (l != 0)
 	  break;
 	s += sizeof(unsigned long);
 	l = u.l[i];
      }
    /* L now contains 0xff in bytes for which we matched one of the
       relevant characters.  We can find the byte index by finding
       its bit index and dividing by 8.  */
 #ifdef __BIG_ENDIAN__
    l = __builtin_clzl(l) >> 3;
 #else
    l = __builtin_ctzl(l) >> 3;
 #endif
    return s + l;
 #undef N
  }
 }
 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
 /* A vection of the fast scanner using AltiVec vectorized byte compares.
   This cannot be used for little endian because vec_lvsl/lvsr are
   deprecated for little endian and the code won't work properly.  */
 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
   so we can't compile this function without -maltivec on the command line
   (or implied by some other switch).  */
@ -557,13 +659,8 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
     beginning with all ones and shifting in zeros according to the
     mis-alignment.  The LVSR instruction pulls the exact shift we
     want from the address.  */
 #ifdef __BIG_ENDIAN__
  mask = __builtin_vec_lvsr(0, s);
  mask = __builtin_vec_perm(zero, ones, mask);
 #else
  mask = __builtin_vec_lvsl(0, s);
  mask = __builtin_vec_perm(ones, zero, mask);
 #endif
  data &= mask;
  /* While altivec loads mask addresses, we still need to align S so
@ -627,11 +724,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    /* L now contains 0xff in bytes for which we matched one of the
       relevant characters.  We can find the byte index by finding
       its bit index and dividing by 8.  */
 #ifdef __BIG_ENDIAN__
    l = __builtin_clzl(l) >> 3;
 #else
    l = __builtin_ctzl(l) >> 3;
 #endif
    return s + l;
 #undef N