mirror of git://gcc.gnu.org/git/gcc.git
lex.c (search_line_fast): Add new version to be used for Power8 and later targets when Altivec is enabled.
2014-10-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * lex.c (search_line_fast): Add new version to be used for Power8 and later targets when Altivec is enabled. Restrict the existing Altivec version to big-endian systems so that lvsr is not used on little endian, where it is deprecated. Remove LE-specific code from the now-BE-only version. From-SVN: r215873
This commit is contained in:
parent
3c9aabbde5
commit
0ccaaab0e3
|
|
@ -1,3 +1,11 @@
|
||||||
|
2014-10-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||||
|
|
||||||
|
* lex.c (search_line_fast): Add new version to be used for Power8
|
||||||
|
and later targets when Altivec is enabled. Restrict the existing
|
||||||
|
Altivec version to big-endian systems so that lvsr is not used on
|
||||||
|
little endian, where it is deprecated. Remove LE-specific code
|
||||||
|
from the now-BE-only version.
|
||||||
|
|
||||||
2014-10-02 Bernd Edlinger <bernd.edlinger@hotmail.de>
|
2014-10-02 Bernd Edlinger <bernd.edlinger@hotmail.de>
|
||||||
Jeff Law <law@redhat.com>
|
Jeff Law <law@redhat.com>
|
||||||
|
|
||||||
|
|
|
||||||
115
libcpp/lex.c
115
libcpp/lex.c
|
|
@ -513,9 +513,111 @@ init_vectorized_lexer (void)
|
||||||
search_line_fast = impl;
|
search_line_fast = impl;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
|
#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
|
||||||
|
|
||||||
/* A vection of the fast scanner using AltiVec vectorized byte compares. */
|
/* A vection of the fast scanner using AltiVec vectorized byte compares
|
||||||
|
and VSX unaligned loads (when VSX is available). This is otherwise
|
||||||
|
the same as the pre-GCC 5 version. */
|
||||||
|
|
||||||
|
static const uchar *
|
||||||
|
search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
|
||||||
|
{
|
||||||
|
typedef __attribute__((altivec(vector))) unsigned char vc;
|
||||||
|
|
||||||
|
const vc repl_nl = {
|
||||||
|
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
|
||||||
|
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
|
||||||
|
};
|
||||||
|
const vc repl_cr = {
|
||||||
|
'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
|
||||||
|
'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
|
||||||
|
};
|
||||||
|
const vc repl_bs = {
|
||||||
|
'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
|
||||||
|
'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
|
||||||
|
};
|
||||||
|
const vc repl_qm = {
|
||||||
|
'?', '?', '?', '?', '?', '?', '?', '?',
|
||||||
|
'?', '?', '?', '?', '?', '?', '?', '?',
|
||||||
|
};
|
||||||
|
const vc zero = { 0 };
|
||||||
|
|
||||||
|
vc data, t;
|
||||||
|
|
||||||
|
/* Main loop processing 16 bytes at a time. */
|
||||||
|
do
|
||||||
|
{
|
||||||
|
vc m_nl, m_cr, m_bs, m_qm;
|
||||||
|
|
||||||
|
data = *((const vc *)s);
|
||||||
|
s += 16;
|
||||||
|
|
||||||
|
m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
|
||||||
|
m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
|
||||||
|
m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
|
||||||
|
m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
|
||||||
|
t = (m_nl | m_cr) | (m_bs | m_qm);
|
||||||
|
|
||||||
|
/* T now contains 0xff in bytes for which we matched one of the relevant
|
||||||
|
characters. We want to exit the loop if any byte in T is non-zero.
|
||||||
|
Below is the expansion of vec_any_ne(t, zero). */
|
||||||
|
}
|
||||||
|
while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
|
||||||
|
|
||||||
|
/* Restore s to to point to the 16 bytes we just processed. */
|
||||||
|
s -= 16;
|
||||||
|
|
||||||
|
{
|
||||||
|
#define N (sizeof(vc) / sizeof(long))
|
||||||
|
|
||||||
|
union {
|
||||||
|
vc v;
|
||||||
|
/* Statically assert that N is 2 or 4. */
|
||||||
|
unsigned long l[(N == 2 || N == 4) ? N : -1];
|
||||||
|
} u;
|
||||||
|
unsigned long l, i = 0;
|
||||||
|
|
||||||
|
u.v = t;
|
||||||
|
|
||||||
|
/* Find the first word of T that is non-zero. */
|
||||||
|
switch (N)
|
||||||
|
{
|
||||||
|
case 4:
|
||||||
|
l = u.l[i++];
|
||||||
|
if (l != 0)
|
||||||
|
break;
|
||||||
|
s += sizeof(unsigned long);
|
||||||
|
l = u.l[i++];
|
||||||
|
if (l != 0)
|
||||||
|
break;
|
||||||
|
s += sizeof(unsigned long);
|
||||||
|
case 2:
|
||||||
|
l = u.l[i++];
|
||||||
|
if (l != 0)
|
||||||
|
break;
|
||||||
|
s += sizeof(unsigned long);
|
||||||
|
l = u.l[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* L now contains 0xff in bytes for which we matched one of the
|
||||||
|
relevant characters. We can find the byte index by finding
|
||||||
|
its bit index and dividing by 8. */
|
||||||
|
#ifdef __BIG_ENDIAN__
|
||||||
|
l = __builtin_clzl(l) >> 3;
|
||||||
|
#else
|
||||||
|
l = __builtin_ctzl(l) >> 3;
|
||||||
|
#endif
|
||||||
|
return s + l;
|
||||||
|
|
||||||
|
#undef N
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
|
||||||
|
|
||||||
|
/* A vection of the fast scanner using AltiVec vectorized byte compares.
|
||||||
|
This cannot be used for little endian because vec_lvsl/lvsr are
|
||||||
|
deprecated for little endian and the code won't work properly. */
|
||||||
/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
|
/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
|
||||||
so we can't compile this function without -maltivec on the command line
|
so we can't compile this function without -maltivec on the command line
|
||||||
(or implied by some other switch). */
|
(or implied by some other switch). */
|
||||||
|
|
@ -557,13 +659,8 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
|
||||||
beginning with all ones and shifting in zeros according to the
|
beginning with all ones and shifting in zeros according to the
|
||||||
mis-alignment. The LVSR instruction pulls the exact shift we
|
mis-alignment. The LVSR instruction pulls the exact shift we
|
||||||
want from the address. */
|
want from the address. */
|
||||||
#ifdef __BIG_ENDIAN__
|
|
||||||
mask = __builtin_vec_lvsr(0, s);
|
mask = __builtin_vec_lvsr(0, s);
|
||||||
mask = __builtin_vec_perm(zero, ones, mask);
|
mask = __builtin_vec_perm(zero, ones, mask);
|
||||||
#else
|
|
||||||
mask = __builtin_vec_lvsl(0, s);
|
|
||||||
mask = __builtin_vec_perm(ones, zero, mask);
|
|
||||||
#endif
|
|
||||||
data &= mask;
|
data &= mask;
|
||||||
|
|
||||||
/* While altivec loads mask addresses, we still need to align S so
|
/* While altivec loads mask addresses, we still need to align S so
|
||||||
|
|
@ -627,11 +724,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
|
||||||
/* L now contains 0xff in bytes for which we matched one of the
|
/* L now contains 0xff in bytes for which we matched one of the
|
||||||
relevant characters. We can find the byte index by finding
|
relevant characters. We can find the byte index by finding
|
||||||
its bit index and dividing by 8. */
|
its bit index and dividing by 8. */
|
||||||
#ifdef __BIG_ENDIAN__
|
|
||||||
l = __builtin_clzl(l) >> 3;
|
l = __builtin_clzl(l) >> 3;
|
||||||
#else
|
|
||||||
l = __builtin_ctzl(l) >> 3;
|
|
||||||
#endif
|
|
||||||
return s + l;
|
return s + l;
|
||||||
|
|
||||||
#undef N
|
#undef N
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue