Vectorize fast path of _cpp_clean_line.

* configure.ac (AC_C_BIGENDIAN, AC_TYPE_UINTPTR_T): New tests.
	(ssize_t): Check via AC_TYPE_SSIZE_T instead of AC_CHECK_TYPE.
	(ptrdiff_t): Check via AC_CHECK_TYPE.
	* config.in, configure: Rebuild.
	* system.h: Include stdint.h, if available.
	* lex.c (WORDS_BIGENDIAN): Provide default.
	(acc_char_mask_misalign, acc_char_replicate, acc_char_cmp,
	acc_char_index, search_line_acc_char, repl_chars, search_line_mmx,
	search_line_sse2, search_line_sse42, init_vectorized_lexer,
	search_line_fast): New.
	(_cpp_clean_line): Use search_line_fast.  Restructure the fast
	loop to make it clear when we're leaving the loop.  Stay in the
	fast loop for non-trigraph '?'.

Co-Authored-By: Andi Kleen <ak@linux.intel.com>
Co-Authored-By: David S. Miller <davem@davemloft.net>

From-SVN: r163446
This commit is contained in:
Richard Henderson 2010-08-21 12:05:40 -07:00 committed by Richard Henderson
parent 1d0134b3cc
commit 246a2fcb5e
6 changed files with 953 additions and 99 deletions

View File

@ -1,3 +1,21 @@
2010-08-21 Richard Henderson <rth@redhat.com>
Andi Kleen <ak@linux.intel.com>
David S. Miller <davem@davemloft.net>
* configure.ac (AC_C_BIGENDIAN, AC_TYPE_UINTPTR_T): New tests.
(ssize_t): Check via AC_TYPE_SSIZE_T instead of AC_CHECK_TYPE.
(ptrdiff_t): Check via AC_CHECK_TYPE.
* config.in, configure: Rebuild.
* system.h: Include stdint.h, if available.
* lex.c (WORDS_BIGENDIAN): Provide default.
(acc_char_mask_misalign, acc_char_replicate, acc_char_cmp,
acc_char_index, search_line_acc_char, repl_chars, search_line_mmx,
search_line_sse2, search_line_sse42, init_vectorized_lexer,
search_line_fast): New.
(_cpp_clean_line): Use search_line_fast. Restructure the fast
loop to make it clear when we're leaving the loop. Stay in the
fast loop for non-trigraph '?'.
2010-06-11 Jakub Jelinek <jakub@redhat.com> 2010-06-11 Jakub Jelinek <jakub@redhat.com>
* include/cpplib.h (struct cpp_callbacks): Add user_builtin_macro * include/cpplib.h (struct cpp_callbacks): Add user_builtin_macro

View File

@ -1,5 +1,8 @@
/* config.in. Generated from configure.ac by autoheader. */ /* config.in. Generated from configure.ac by autoheader. */
/* Define if building universal (internal helper macro) */
#undef AC_APPLE_UNIVERSAL_BUILD
/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP /* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
systems. This function is required for `alloca.c' support on those systems. systems. This function is required for `alloca.c' support on those systems.
*/ */
@ -209,6 +212,9 @@
/* Define if <sys/types.h> defines \`uchar'. */ /* Define if <sys/types.h> defines \`uchar'. */
#undef HAVE_UCHAR #undef HAVE_UCHAR
/* Define to 1 if the system has the type `uintptr_t'. */
#undef HAVE_UINTPTR_T
/* Define to 1 if you have the <unistd.h> header file. */ /* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H #undef HAVE_UNISTD_H
@ -266,6 +272,18 @@
/* Define to 1 if your <sys/time.h> declares `struct tm'. */ /* Define to 1 if your <sys/time.h> declares `struct tm'. */
#undef TM_IN_SYS_TIME #undef TM_IN_SYS_TIME
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
significant byte first (like Motorola and SPARC, unlike Intel). */
#if defined AC_APPLE_UNIVERSAL_BUILD
# if defined __BIG_ENDIAN__
# define WORDS_BIGENDIAN 1
# endif
#else
# ifndef WORDS_BIGENDIAN
# undef WORDS_BIGENDIAN
# endif
#endif
/* Define to empty if `const' does not conform to ANSI C. */ /* Define to empty if `const' does not conform to ANSI C. */
#undef const #undef const
@ -278,8 +296,15 @@
/* Define to `long int' if <sys/types.h> does not define. */ /* Define to `long int' if <sys/types.h> does not define. */
#undef off_t #undef off_t
/* Define to `int' if <sys/types.h> does not define. */
#undef ptrdiff_t
/* Define to `unsigned int' if <sys/types.h> does not define. */ /* Define to `unsigned int' if <sys/types.h> does not define. */
#undef size_t #undef size_t
/* Define to `int' if <sys/types.h> does not define. */ /* Define to `int' if <sys/types.h> does not define. */
#undef ssize_t #undef ssize_t
/* Define to the type of an unsigned integer type wide enough to hold a
pointer, if such a type exists, and if the system does not define it. */
#undef uintptr_t

356
libcpp/configure vendored
View File

@ -1846,6 +1846,48 @@ fi
} # ac_fn_cxx_check_header_mongrel } # ac_fn_cxx_check_header_mongrel
# ac_fn_cxx_try_run LINENO
# ------------------------
# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
# that executables *can* be run.
ac_fn_cxx_try_run ()
{
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
if { { ac_try="$ac_link"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
$as_echo "$ac_try_echo"; } >&5
(eval "$ac_link") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
{ { case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
$as_echo "$ac_try_echo"; } >&5
(eval "$ac_try") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
ac_retval=0
else
$as_echo "$as_me: program exited with status $ac_status" >&5
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
ac_retval=$ac_status
fi
rm -rf conftest.dSYM conftest_ipa8_conftest.oo
eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
return $ac_retval
} # ac_fn_cxx_try_run
# ac_fn_cxx_try_link LINENO # ac_fn_cxx_try_link LINENO
# ------------------------- # -------------------------
# Try to link conftest.$ac_ext, and return whether this succeeded. # Try to link conftest.$ac_ext, and return whether this succeeded.
@ -1946,48 +1988,6 @@ $as_echo "$ac_res" >&6; }
} # ac_fn_cxx_check_type } # ac_fn_cxx_check_type
# ac_fn_cxx_try_run LINENO
# ------------------------
# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
# that executables *can* be run.
ac_fn_cxx_try_run ()
{
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
if { { ac_try="$ac_link"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
$as_echo "$ac_try_echo"; } >&5
(eval "$ac_link") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
{ { case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
$as_echo "$ac_try_echo"; } >&5
(eval "$ac_try") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
ac_retval=0
else
$as_echo "$as_me: program exited with status $ac_status" >&5
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
ac_retval=$ac_status
fi
rm -rf conftest.dSYM conftest_ipa8_conftest.oo
eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
return $ac_retval
} # ac_fn_cxx_try_run
# ac_fn_cxx_compute_int LINENO EXPR VAR INCLUDES # ac_fn_cxx_compute_int LINENO EXPR VAR INCLUDES
# ---------------------------------------------- # ----------------------------------------------
# Tries to find the compile-time value of EXPR in a program that includes # Tries to find the compile-time value of EXPR in a program that includes
@ -5172,6 +5172,230 @@ done
fi fi
# Checks for typedefs, structures, and compiler characteristics. # Checks for typedefs, structures, and compiler characteristics.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
if test "${ac_cv_c_bigendian+set}" = set; then :
$as_echo_n "(cached) " >&6
else
ac_cv_c_bigendian=unknown
# See if we're dealing with a universal compiler.
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifndef __APPLE_CC__
not a universal capable compiler
#endif
typedef int dummy;
_ACEOF
if ac_fn_cxx_try_compile "$LINENO"; then :
# Check for potential -arch flags. It is not universal unless
# there are at least two -arch flags with different values.
ac_arch=
ac_prev=
for ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do
if test -n "$ac_prev"; then
case $ac_word in
i?86 | x86_64 | ppc | ppc64)
if test -z "$ac_arch" || test "$ac_arch" = "$ac_word"; then
ac_arch=$ac_word
else
ac_cv_c_bigendian=universal
break
fi
;;
esac
ac_prev=
elif test "x$ac_word" = "x-arch"; then
ac_prev=arch
fi
done
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
if test $ac_cv_c_bigendian = unknown; then
# See if sys/param.h defines the BYTE_ORDER macro.
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <sys/types.h>
#include <sys/param.h>
int
main ()
{
#if ! (defined BYTE_ORDER && defined BIG_ENDIAN \
&& defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \
&& LITTLE_ENDIAN)
bogus endian macros
#endif
;
return 0;
}
_ACEOF
if ac_fn_cxx_try_compile "$LINENO"; then :
# It does; now see whether it defined to BIG_ENDIAN or not.
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <sys/types.h>
#include <sys/param.h>
int
main ()
{
#if BYTE_ORDER != BIG_ENDIAN
not big endian
#endif
;
return 0;
}
_ACEOF
if ac_fn_cxx_try_compile "$LINENO"; then :
ac_cv_c_bigendian=yes
else
ac_cv_c_bigendian=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
if test $ac_cv_c_bigendian = unknown; then
# See if <limits.h> defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., Solaris).
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <limits.h>
int
main ()
{
#if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN)
bogus endian macros
#endif
;
return 0;
}
_ACEOF
if ac_fn_cxx_try_compile "$LINENO"; then :
# It does; now see whether it defined to _BIG_ENDIAN or not.
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <limits.h>
int
main ()
{
#ifndef _BIG_ENDIAN
not big endian
#endif
;
return 0;
}
_ACEOF
if ac_fn_cxx_try_compile "$LINENO"; then :
ac_cv_c_bigendian=yes
else
ac_cv_c_bigendian=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
if test $ac_cv_c_bigendian = unknown; then
# Compile a test program.
if test "$cross_compiling" = yes; then :
# Try to guess by grepping values from an object file.
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
short int ascii_mm[] =
{ 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };
short int ascii_ii[] =
{ 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };
int use_ascii (int i) {
return ascii_mm[i] + ascii_ii[i];
}
short int ebcdic_ii[] =
{ 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };
short int ebcdic_mm[] =
{ 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };
int use_ebcdic (int i) {
return ebcdic_mm[i] + ebcdic_ii[i];
}
extern int foo;
int
main ()
{
return use_ascii (foo) == use_ebcdic (foo);
;
return 0;
}
_ACEOF
if ac_fn_cxx_try_compile "$LINENO"; then :
if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then
ac_cv_c_bigendian=yes
fi
if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then
if test "$ac_cv_c_bigendian" = unknown; then
ac_cv_c_bigendian=no
else
# finding both strings is unlikely to happen, but who knows?
ac_cv_c_bigendian=unknown
fi
fi
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
$ac_includes_default
int
main ()
{
/* Are we little or big endian? From Harbison&Steele. */
union
{
long int l;
char c[sizeof (long int)];
} u;
u.l = 1;
return u.c[sizeof (long int) - 1] == 1;
;
return 0;
}
_ACEOF
if ac_fn_cxx_try_run "$LINENO"; then :
ac_cv_c_bigendian=no
else
ac_cv_c_bigendian=yes
fi
rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
conftest.$ac_objext conftest.beam conftest.$ac_ext
fi
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian" >&5
$as_echo "$ac_cv_c_bigendian" >&6; }
case $ac_cv_c_bigendian in #(
yes)
$as_echo "#define WORDS_BIGENDIAN 1" >>confdefs.h
;; #(
no)
;; #(
universal)
$as_echo "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h
;; #(
*)
as_fn_error "unknown endianness
presetting ac_cv_c_bigendian=no (or yes) will help" "$LINENO" 5 ;;
esac
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for an ANSI C-conforming const" >&5 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for an ANSI C-conforming const" >&5
$as_echo_n "checking for an ANSI C-conforming const... " >&6; } $as_echo_n "checking for an ANSI C-conforming const... " >&6; }
if test "${ac_cv_c_const+set}" = set; then : if test "${ac_cv_c_const+set}" = set; then :
@ -5369,6 +5593,53 @@ cat >>confdefs.h <<_ACEOF
#define ssize_t int #define ssize_t int
_ACEOF _ACEOF
fi
ac_fn_cxx_check_type "$LINENO" "uintptr_t" "ac_cv_type_uintptr_t" "$ac_includes_default"
if test "x$ac_cv_type_uintptr_t" = x""yes; then :
$as_echo "#define HAVE_UINTPTR_T 1" >>confdefs.h
else
for ac_type in 'unsigned int' 'unsigned long int' \
'unsigned long long int'; do
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
$ac_includes_default
int
main ()
{
static int test_array [1 - 2 * !(sizeof (void *) <= sizeof ($ac_type))];
test_array [0] = 0
;
return 0;
}
_ACEOF
if ac_fn_cxx_try_compile "$LINENO"; then :
cat >>confdefs.h <<_ACEOF
#define uintptr_t $ac_type
_ACEOF
ac_type=
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
test -z "$ac_type" && break
done
fi
ac_fn_cxx_check_type "$LINENO" "ptrdiff_t" "ac_cv_type_ptrdiff_t" "$ac_includes_default"
if test "x$ac_cv_type_ptrdiff_t" = x""yes; then :
else
cat >>confdefs.h <<_ACEOF
#define ptrdiff_t int
_ACEOF
fi fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether struct tm is in sys/time.h or time.h" >&5 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether struct tm is in sys/time.h or time.h" >&5
@ -7042,6 +7313,7 @@ LTLIBOBJS=$ac_ltlibobjs
: ${CONFIG_STATUS=./config.status} : ${CONFIG_STATUS=./config.status}
ac_write_fail=0 ac_write_fail=0
ac_clean_files_save=$ac_clean_files ac_clean_files_save=$ac_clean_files

View File

@ -70,12 +70,15 @@ else
fi fi
# Checks for typedefs, structures, and compiler characteristics. # Checks for typedefs, structures, and compiler characteristics.
AC_C_BIGENDIAN
AC_C_CONST AC_C_CONST
AC_C_INLINE AC_C_INLINE
AC_FUNC_OBSTACK AC_FUNC_OBSTACK
AC_TYPE_OFF_T AC_TYPE_OFF_T
AC_TYPE_SIZE_T AC_TYPE_SIZE_T
AC_CHECK_TYPE(ssize_t, int) AC_TYPE_SSIZE_T
AC_TYPE_UINTPTR_T
AC_CHECK_TYPE(ptrdiff_t, int)
AC_STRUCT_TM AC_STRUCT_TM
AC_CHECK_SIZEOF(int) AC_CHECK_SIZEOF(int)
AC_CHECK_SIZEOF(long) AC_CHECK_SIZEOF(long)

View File

@ -1,5 +1,5 @@
/* CPP Library - lexical analysis. /* CPP Library - lexical analysis.
Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010
Free Software Foundation, Inc. Free Software Foundation, Inc.
Contributed by Per Bothner, 1994-95. Contributed by Per Bothner, 1994-95.
Based on CCCP program by Paul Rubin, June 1986 Based on CCCP program by Paul Rubin, June 1986
@ -96,6 +96,531 @@ add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
buffer->notes_used++; buffer->notes_used++;
} }
/* Fast path to find line special characters using optimized character
scanning algorithms. Anything complicated falls back to the slow
path below. Since this loop is very hot it's worth doing these kinds
of optimizations.
One of the paths through the ifdefs should provide
const uchar *search_line_fast (const uchar *s, const uchar *end);
Between S and END, search for \n, \r, \\, ?. Return a pointer to
the found character.
Note that the last character of the buffer is *always* a newline,
as forced by _cpp_convert_input. This fact can be used to avoid
explicitly looking for the end of the buffer. */
/* Configure gives us an ifdef test. */
#ifndef WORDS_BIGENDIAN
#define WORDS_BIGENDIAN 0
#endif
/* We'd like the largest integer that fits into a register. There's nothing
in <stdint.h> that gives us that. For most hosts this is unsigned long,
but MS decided on an LLP64 model. Thankfully when building with GCC we
can get the "real" word size. */
#ifdef __GNUC__
typedef unsigned int word_type __attribute__((__mode__(__word__)));
#else
typedef unsigned long word_type;
#endif
/* The code below is only expecting sizes 4 or 8.
Die at compile-time if this expectation is violated. */
typedef char check_word_type_size
[(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
/* Return X with the first N bytes forced to values that won't match one
of the interesting characters. Note that NUL is not interesting. */
static inline word_type
acc_char_mask_misalign (word_type val, unsigned int n)
{
word_type mask = -1;
if (WORDS_BIGENDIAN)
mask >>= n * 8;
else
mask <<= n * 8;
return val & mask;
}
/* Return X replicated to all byte positions within WORD_TYPE. */
static inline word_type
acc_char_replicate (uchar x)
{
word_type ret;
ret = (x << 24) | (x << 16) | (x << 8) | x;
if (sizeof(word_type) == 8)
ret = (ret << 16 << 16) | ret;
return ret;
}
/* Return non-zero if some byte of VAL is (probably) C. */
static inline word_type
acc_char_cmp (word_type val, word_type c)
{
#if defined(__GNUC__) && defined(__alpha__)
/* We can get exact results using a compare-bytes instruction.
Get (val == c) via (0 >= (val ^ c)). */
return __builtin_alpha_cmpbge (0, val ^ c);
#else
word_type magic = 0x7efefefeU;
if (sizeof(word_type) == 8)
magic = (magic << 16 << 16) | 0xfefefefeU;
magic |= 1;
val ^= c;
return ((val + magic) ^ ~val) & ~magic;
#endif
}
/* Given the result of acc_char_cmp is non-zero, return the index of
the found character. If this was a false positive, return -1. */
static inline int
acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
word_type val ATTRIBUTE_UNUSED)
{
#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
/* The cmpbge instruction sets *bits* of the result corresponding to
matches in the bytes with no false positives. */
return __builtin_ctzl (cmp);
#else
unsigned int i;
/* ??? It would be nice to force unrolling here,
and have all of these constants folded. */
for (i = 0; i < sizeof(word_type); ++i)
{
uchar c;
if (WORDS_BIGENDIAN)
c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
else
c = (val >> i * 8) & 0xff;
if (c == '\n' || c == '\r' || c == '\\' || c == '?')
return i;
}
return -1;
#endif
}
/* A version of the fast scanner using bit fiddling techniques.
For 32-bit words, one would normally perform 16 comparisons and
16 branches. With this algorithm one performs 24 arithmetic
operations and one branch. Whether this is faster with a 32-bit
word size is going to be somewhat system dependent.
For 64-bit words, we eliminate twice the number of comparisons
and branches without increasing the number of arithmetic operations.
It's almost certainly going to be a win with 64-bit word size. */
static const uchar * search_line_acc_char (const uchar *, const uchar *)
ATTRIBUTE_UNUSED;
static const uchar *
search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
{
const word_type repl_nl = acc_char_replicate ('\n');
const word_type repl_cr = acc_char_replicate ('\r');
const word_type repl_bs = acc_char_replicate ('\\');
const word_type repl_qm = acc_char_replicate ('?');
unsigned int misalign;
const word_type *p;
word_type val, t;
/* Align the buffer. Mask out any bytes from before the beginning. */
p = (word_type *)((uintptr_t)s & -sizeof(word_type));
val = *p;
misalign = (uintptr_t)s & (sizeof(word_type) - 1);
if (misalign)
val = acc_char_mask_misalign (val, misalign);
/* Main loop. */
while (1)
{
t = acc_char_cmp (val, repl_nl);
t |= acc_char_cmp (val, repl_cr);
t |= acc_char_cmp (val, repl_bs);
t |= acc_char_cmp (val, repl_qm);
if (__builtin_expect (t != 0, 0))
{
int i = acc_char_index (t, val);
if (i >= 0)
return (const uchar *)p + i;
}
val = *++p;
}
}
#if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__))
/* Replicated character data to be shared between implementations.
Recall that outside of a context with vector support we can't
define compatible vector types, therefore these are all defined
in terms of raw characters. */
static const char repl_chars[4][16] __attribute__((aligned(16))) = {
{ '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
{ '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
{ '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
{ '?', '?', '?', '?', '?', '?', '?', '?',
'?', '?', '?', '?', '?', '?', '?', '?' },
};
/* A version of the fast scanner using MMX vectorized byte compare insns.
This uses the PMOVMSKB instruction which was introduced with "MMX2",
which was packaged into SSE1; it is also present in the AMD 3dNOW-A
extension. Mark the function as using "sse" so that we emit a real
"emms" instruction, rather than the 3dNOW "femms" instruction. */
static const uchar *
#ifndef __SSE__
__attribute__((__target__("sse")))
#endif
search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
{
typedef char v8qi __attribute__ ((__vector_size__ (8)));
typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
const v8qi repl_nl = *(const v8qi *)repl_chars[0];
const v8qi repl_cr = *(const v8qi *)repl_chars[1];
const v8qi repl_bs = *(const v8qi *)repl_chars[2];
const v8qi repl_qm = *(const v8qi *)repl_chars[3];
unsigned int misalign, found, mask;
const v8qi *p;
v8qi data, t, c;
/* Align the source pointer. While MMX doesn't generate unaligned data
faults, this allows us to safely scan to the end of the buffer without
reading beyond the end of the last page. */
misalign = (uintptr_t)s & 7;
p = (const v8qi *)((uintptr_t)s & -8);
data = *p;
/* Create a mask for the bytes that are valid within the first
16-byte block. The Idea here is that the AND with the mask
within the loop is "free", since we need some AND or TEST
insn in order to set the flags for the branch anyway. */
mask = -1u << misalign;
/* Main loop processing 8 bytes at a time. */
goto start;
do
{
data = *++p;
mask = -1;
start:
t = __builtin_ia32_pcmpeqb(data, repl_nl);
c = __builtin_ia32_pcmpeqb(data, repl_cr);
t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
c = __builtin_ia32_pcmpeqb(data, repl_bs);
t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
c = __builtin_ia32_pcmpeqb(data, repl_qm);
t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
found = __builtin_ia32_pmovmskb (t);
found &= mask;
}
while (!found);
__builtin_ia32_emms ();
/* FOUND contains 1 in bits for which we matched a relevant
character. Conversion to the byte index is trivial. */
found = __builtin_ctz(found);
return (const uchar *)p + found;
}
/* A version of the fast scanner using SSE2 vectorized byte compare insns. */
static const uchar *
#ifndef __SSE2__
__attribute__((__target__("sse2")))
#endif
search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
{
typedef char v16qi __attribute__ ((__vector_size__ (16)));
const v16qi repl_nl = *(const v16qi *)repl_chars[0];
const v16qi repl_cr = *(const v16qi *)repl_chars[1];
const v16qi repl_bs = *(const v16qi *)repl_chars[2];
const v16qi repl_qm = *(const v16qi *)repl_chars[3];
unsigned int misalign, found, mask;
const v16qi *p;
v16qi data, t;
/* Align the source pointer. */
misalign = (uintptr_t)s & 15;
p = (const v16qi *)((uintptr_t)s & -16);
data = *p;
/* Create a mask for the bytes that are valid within the first
16-byte block. The Idea here is that the AND with the mask
within the loop is "free", since we need some AND or TEST
insn in order to set the flags for the branch anyway. */
mask = -1u << misalign;
/* Main loop processing 16 bytes at a time. */
goto start;
do
{
data = *++p;
mask = -1;
start:
t = __builtin_ia32_pcmpeqb128(data, repl_nl);
t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
found = __builtin_ia32_pmovmskb128 (t);
found &= mask;
}
while (!found);
/* FOUND contains 1 in bits for which we matched a relevant
character. Conversion to the byte index is trivial. */
found = __builtin_ctz(found);
return (const uchar *)p + found;
}
/* A version of the fast scanner using SSE 4.2 vectorized string insns. */
static const uchar *
#ifndef __SSE4_2__
__attribute__((__target__("sse4.2")))
#endif
search_line_sse42 (const uchar *s, const uchar *end)
{
typedef char v16qi __attribute__ ((__vector_size__ (16)));
static const v16qi search = { '\n', '\r', '?', '\\' };
uintptr_t si = (uintptr_t)s;
uintptr_t index;
/* Check for unaligned input. */
if (si & 15)
{
if (__builtin_expect (end - s < 16, 0)
&& __builtin_expect ((si & 0xfff) > 0xff0, 0))
{
/* There are less than 16 bytes left in the buffer, and less
than 16 bytes left on the page. Reading 16 bytes at this
point might generate a spurious page fault. Defer to the
SSE2 implementation, which already handles alignment. */
return search_line_sse2 (s, end);
}
/* ??? The builtin doesn't understand that the PCMPESTRI read from
memory need not be aligned. */
__asm ("%vpcmpestri $0, (%1), %2"
: "=c"(index) : "r"(s), "x"(search), "a"(4), "d"(16));
if (__builtin_expect (index < 16, 0))
goto found;
/* Advance the pointer to an aligned address. We will re-scan a
few bytes, but we no longer need care for reading past the
end of a page, since we're guaranteed a match. */
s = (const uchar *)((si + 16) & -16);
}
/* Main loop, processing 16 bytes at a time. By doing the whole loop
in inline assembly, we can make proper use of the flags set. */
__asm ( "sub $16, %1\n"
" .balign 16\n"
"0: add $16, %1\n"
" %vpcmpestri $0, (%1), %2\n"
" jnc 0b"
: "=&c"(index), "+r"(s)
: "x"(search), "a"(4), "d"(16));
found:
return s + index;
}
/* Check the CPU capabilities. */
#include "../gcc/config/i386/cpuid.h"
typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
static search_line_fast_type search_line_fast;
static void __attribute__((constructor))
init_vectorized_lexer (void)
{
unsigned dummy, ecx = 0, edx = 0;
search_line_fast_type impl = search_line_acc_char;
int minimum = 0;
#if defined(__SSE4_2__)
minimum = 3;
#elif defined(__SSE2__)
minimum = 2;
#elif defined(__SSE__) || defined(__3dNOW_A__)
minimum = 1;
#endif
if (minimum == 3)
impl = search_line_sse42;
else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
{
if (minimum == 3 || (ecx & bit_SSE4_2))
impl = search_line_sse42;
else if (minimum == 2 || (edx & bit_SSE2))
impl = search_line_sse2;
else if (minimum == 1 || (edx & bit_SSE))
impl = search_line_mmx;
}
else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
{
if (minimum == 1 || edx & bit_3DNOWP)
impl = search_line_mmx;
}
search_line_fast = impl;
}
#elif defined(__GNUC__) && defined(__ALTIVEC__)
/* A vection of the fast scanner using AltiVec vectorized byte compares. */
/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
so we can't compile this function without -maltivec on the command line
(or implied by some other switch). */
static const uchar *
search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
{
typedef __attribute__((altivec(vector))) unsigned char vc;
const vc repl_nl = {
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
};
const vc repl_cr = {
'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
};
const vc repl_bs = {
'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
};
const vc repl_qm = {
'?', '?', '?', '?', '?', '?', '?', '?',
'?', '?', '?', '?', '?', '?', '?', '?',
};
const vc ones = {
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
};
const vc zero = { 0 };
vc data, mask, t;
/* Altivec loads automatically mask addresses with -16. This lets us
issue the first load as early as possible. */
data = __builtin_vec_ld(0, (const vc *)s);
/* Discard bytes before the beginning of the buffer. Do this by
beginning with all ones and shifting in zeros according to the
mis-alignment. The LVSR instruction pulls the exact shift we
want from the address. */
mask = __builtin_vec_lvsr(0, s);
mask = __builtin_vec_perm(zero, ones, mask);
data &= mask;
/* While altivec loads mask addresses, we still need to align S so
that the offset we compute at the end is correct. */
s = (const uchar *)((uintptr_t)s & -16);
/* Main loop processing 16 bytes at a time. */
goto start;
do
{
vc m_nl, m_cr, m_bs, m_qm;
s += 16;
data = __builtin_vec_ld(0, (const vc *)s);
start:
m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
t = (m_nl | m_cr) | (m_bs | m_qm);
/* T now contains 0xff in bytes for which we matched one of the relevant
characters. We want to exit the loop if any byte in T is non-zero.
Below is the expansion of vec_any_ne(t, zero). */
}
while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
{
#define N (sizeof(vc) / sizeof(long))
typedef char check_count[(N == 2 || N == 4) * 2 - 1];
union {
vc v;
unsigned long l[N];
} u;
unsigned long l, i = 0;
u.v = t;
/* Find the first word of T that is non-zero. */
switch (N)
{
case 4:
l = u.l[i++];
if (l != 0)
break;
s += sizeof(unsigned long);
l = u.l[i++];
if (l != 0)
break;
s += sizeof(unsigned long);
case 2:
l = u.l[i++];
if (l != 0)
break;
s += sizeof(unsigned long);
l = u.l[i];
}
/* L now contains 0xff in bytes for which we matched one of the
relevant characters. We can find the byte index by finding
its bit index and dividing by 8. */
l = __builtin_clzl(l) >> 3;
return s + l;
#undef N
}
}
#else
/* We only have one accellerated alternative. Use a direct call so that
we encourage inlining. */
#define search_line_fast search_line_acc_char
#endif
/* Returns with a logical line that contains no escaped newlines or /* Returns with a logical line that contains no escaped newlines or
trigraphs. This is a time-critical inner loop. */ trigraphs. This is a time-critical inner loop. */
void void
@ -109,82 +634,91 @@ _cpp_clean_line (cpp_reader *pfile)
buffer->cur_note = buffer->notes_used = 0; buffer->cur_note = buffer->notes_used = 0;
buffer->cur = buffer->line_base = buffer->next_line; buffer->cur = buffer->line_base = buffer->next_line;
buffer->need_line = false; buffer->need_line = false;
s = buffer->next_line - 1; s = buffer->next_line;
if (!buffer->from_stage3) if (!buffer->from_stage3)
{ {
const uchar *pbackslash = NULL; const uchar *pbackslash = NULL;
/* Short circuit for the common case of an un-escaped line with /* Fast path. This is the common case of an un-escaped line with
no trigraphs. The primary win here is by not writing any no trigraphs. The primary win here is by not writing any
data back to memory until we have to. */ data back to memory until we have to. */
for (;;) while (1)
{ {
c = *++s; /* Perform an optimized search for \n, \r, \\, ?. */
if (__builtin_expect (c == '\n', false) s = search_line_fast (s, buffer->rlimit);
|| __builtin_expect (c == '\r', false))
c = *s;
if (c == '\\')
{ {
d = (uchar *) s; /* Record the location of the backslash and continue. */
pbackslash = s++;
if (__builtin_expect (s == buffer->rlimit, false))
goto done;
/* DOS line ending? */
if (__builtin_expect (c == '\r', false)
&& s[1] == '\n')
{
s++;
if (s == buffer->rlimit)
goto done;
}
if (__builtin_expect (pbackslash == NULL, true))
goto done;
/* Check for escaped newline. */
p = d;
while (is_nvspace (p[-1]))
p--;
if (p - 1 != pbackslash)
goto done;
/* Have an escaped newline; process it and proceed to
the slow path. */
add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
d = p - 2;
buffer->next_line = p - 1;
break;
} }
if (__builtin_expect (c == '\\', false)) else if (__builtin_expect (c == '?', 0))
pbackslash = s; {
else if (__builtin_expect (c == '?', false) if (__builtin_expect (s[1] == '?', false)
&& __builtin_expect (s[1] == '?', false)
&& _cpp_trigraph_map[s[2]]) && _cpp_trigraph_map[s[2]])
{
/* Have a trigraph. We may or may not have to convert
it. Add a line note regardless, for -Wtrigraphs. */
add_line_note (buffer, s, s[2]);
if (CPP_OPTION (pfile, trigraphs))
{ {
/* We do, and that means we have to switch to the /* Have a trigraph. We may or may not have to convert
slow path. */ it. Add a line note regardless, for -Wtrigraphs. */
d = (uchar *) s; add_line_note (buffer, s, s[2]);
*d = _cpp_trigraph_map[s[2]]; if (CPP_OPTION (pfile, trigraphs))
s += 2; {
break; /* We do, and that means we have to switch to the
slow path. */
d = (uchar *) s;
*d = _cpp_trigraph_map[s[2]];
s += 2;
goto slow_path;
}
} }
/* Not a trigraph. Continue on fast-path. */
s++;
} }
else
break;
} }
/* This must be \r or \n. We're either done, or we'll be forced
to write back to the buffer and continue on the slow path. */
d = (uchar *) s;
for (;;) if (__builtin_expect (s == buffer->rlimit, false))
goto done;
/* DOS line ending? */
if (__builtin_expect (c == '\r', false) && s[1] == '\n')
{
s++;
if (s == buffer->rlimit)
goto done;
}
if (__builtin_expect (pbackslash == NULL, true))
goto done;
/* Check for escaped newline. */
p = d;
while (is_nvspace (p[-1]))
p--;
if (p - 1 != pbackslash)
goto done;
/* Have an escaped newline; process it and proceed to
the slow path. */
add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
d = p - 2;
buffer->next_line = p - 1;
slow_path:
while (1)
{ {
c = *++s; c = *++s;
*++d = c; *++d = c;
if (c == '\n' || c == '\r') if (c == '\n' || c == '\r')
{ {
/* Handle DOS line endings. */ /* Handle DOS line endings. */
if (c == '\r' && s != buffer->rlimit && s[1] == '\n') if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
s++; s++;
if (s == buffer->rlimit) if (s == buffer->rlimit)
@ -215,9 +749,8 @@ _cpp_clean_line (cpp_reader *pfile)
} }
else else
{ {
do while (*s != '\n' && *s != '\r')
s++; s++;
while (*s != '\n' && *s != '\r');
d = (uchar *) s; d = (uchar *) s;
/* Handle DOS line endings. */ /* Handle DOS line endings. */

View File

@ -29,6 +29,9 @@ along with GCC; see the file COPYING3. If not see
#ifdef HAVE_STDDEF_H #ifdef HAVE_STDDEF_H
# include <stddef.h> # include <stddef.h>
#endif #endif
#ifdef HAVE_STDINT_H
# include <stdint.h>
#endif
#include <stdio.h> #include <stdio.h>