re PR middle-end/18785 (isdigit builtin function fails with EBCDIC character sets)

PR 18785 libcpp: * charset.c (LAST_POSSIBLY_BASIC_SOURCE_CHAR): New helper macro. (cpp_host_to_exec_charset): New function. * include/cpplib.h: Declare cpp_host_to_exec_charset. gcc: * langhooks.h (struct lang_hooks): Add to_target_charset. * langhooks.c (lhd_to_target_charset): New function. * langhooks-def.h: Declare lhd_to_target_charset. (LANG_HOOKS_TO_TARGET_CHARSET): New macro. (LANG_HOOKS_INITIALIZER): Update. * c-common.c (c_common_to_target_charset): New function. * c-common.h: Declare it. * c-objc-common.h (LANG_HOOKS_TO_TARGET_CHARSET): Set to c_common_to_target_charset. * defaults.c (TARGET_BELL, TARGET_BS, TARGET_CR, TARGET_DIGIT0) (TARGET_ESC, TARGET_FF, TARGET_NEWLINE, TARGET_TAB, TARGET_VT): Delete definitions. * system.h: Poison them. * doc/tm.texi: Don't discuss them. * builtins.c (fold_builtin_isdigit): Use lang_hooks.to_target_charset. * c-pretty-print.c (pp_c_integer_constant): Don't use pp_c_char. (pp_c_char): Do not attempt to generate letter escapes for newline, tab, etc. * config/arm/arm.c (output_ascii_pseudo_op): Likewise. * config/mips/mips.c (mips_output_ascii): Likewise. gcc/cp: * cp-objcp-common.h (LANG_HOOKS_TO_TARGET_CHARSET): Set to c_common_to_target_charset. Delete bogus comment. gcc/testsuite: * gcc.dg/charset/builtin1.c: New test. From-SVN: r95304
2005-02-20 17:01:32 +00:00 · 2005-02-20 17:01:32 +00:00 · c5ff069dc4
parent 5920b5d2e8
commit c5ff069dc4
21 changed files with 370 additions and 325 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,28 @@
+2005-02-20  Zack Weinberg  <zack@codesourcery.com>
+
+	PR 18785
+	* langhooks.h (struct lang_hooks): Add to_target_charset.
+	* langhooks.c (lhd_to_target_charset): New function.
+	* langhooks-def.h: Declare lhd_to_target_charset.
+	(LANG_HOOKS_TO_TARGET_CHARSET): New macro.
+	(LANG_HOOKS_INITIALIZER): Update.
+	* c-common.c (c_common_to_target_charset): New function.
+	* c-common.h: Declare it.
+	* c-objc-common.h (LANG_HOOKS_TO_TARGET_CHARSET): Set to
+	c_common_to_target_charset.
+
+	* defaults.c (TARGET_BELL, TARGET_BS, TARGET_CR, TARGET_DIGIT0)
+	(TARGET_ESC, TARGET_FF, TARGET_NEWLINE, TARGET_TAB, TARGET_VT):
+	Delete definitions.
+	* system.h: Poison them.
+	* doc/tm.texi: Don't discuss them.
+	* builtins.c (fold_builtin_isdigit): Use lang_hooks.to_target_charset.
+	* c-pretty-print.c (pp_c_integer_constant): Don't use pp_c_char.
+	(pp_c_char): Do not attempt to generate letter escapes for
+	newline, tab, etc.
+	* config/arm/arm.c (output_ascii_pseudo_op): Likewise.
+	* config/mips/mips.c (mips_output_ascii): Likewise.
+
 2005-02-20  Dorit Naishlos  <dorit@il.ibm.com>

 	PR tree-optimization/19951
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@ -7623,11 +7623,18 @@ fold_builtin_isdigit (tree arglist)
  else
    {
      /* Transform isdigit(c) -> (unsigned)(c) - '0' <= 9.  */
-      /* According to the C standard, isdigit is unaffected by locale.  */
-      tree arg = TREE_VALUE (arglist);
-      arg = fold_convert (unsigned_type_node, arg);
+      /* According to the C standard, isdigit is unaffected by locale.
+	 However, it definitely is affected by the target character set.  */
+      tree arg;
+      unsigned HOST_WIDE_INT target_digit0
+	= lang_hooks.to_target_charset ('0');
+
+      if (target_digit0 == 0)
+	return NULL_TREE;
+
+      arg = fold_convert (unsigned_type_node, TREE_VALUE (arglist));
      arg = build2 (MINUS_EXPR, unsigned_type_node, arg,
-		    build_int_cst (unsigned_type_node, TARGET_DIGIT0));
+		    build_int_cst (unsigned_type_node, target_digit0));
      arg = build2 (LE_EXPR, integer_type_node, arg,
 		    build_int_cst (unsigned_type_node, 9));
      arg = fold (arg);
--- a/gcc/c-common.c
+++ b/gcc/c-common.c
@ -5620,6 +5620,27 @@ c_warn_unused_result (tree *top_p)
    }
 }

+/* Convert a character from the host to the target execution character
+   set.  cpplib handles this, mostly.  */
+
+HOST_WIDE_INT
+c_common_to_target_charset (HOST_WIDE_INT c)
+{
+  /* Character constants in GCC proper are sign-extended under -fsigned-char,
+     zero-extended under -fno-signed-char.  cpplib insists that characters
+     and character constants are always unsigned.  Hence we must convert
+     back and forth.  */
+  cppchar_t uc = ((cppchar_t)c) & ((((cppchar_t)1) << CHAR_BIT)-1);
+
+  uc = cpp_host_to_exec_charset (parse_in, uc);
+
+  if (flag_signed_char)
+    return ((HOST_WIDE_INT)uc) << (HOST_BITS_PER_WIDE_INT - CHAR_TYPE_SIZE)
+			       >> (HOST_BITS_PER_WIDE_INT - CHAR_TYPE_SIZE);
+  else
+    return uc;
+}
+
 /* Build the result of __builtin_offsetof.  EXPR is a nested sequence of
   component references, with an INDIRECT_REF at the bottom; much like
   the traditional rendering of offsetof as a macro.  Returns the folded
--- a/gcc/c-common.h
+++ b/gcc/c-common.h
@ -688,12 +688,14 @@ extern bool c_promoting_integer_type_p (tree);
 extern int self_promoting_args_p (tree);
 extern tree strip_array_types (tree);
 extern tree strip_pointer_operator (tree);
+extern HOST_WIDE_INT c_common_to_target_charset (HOST_WIDE_INT);

 /* This is the basic parsing function.  */
 extern void c_parse_file (void);
 /* This is misnamed, it actually performs end-of-compilation processing.  */
 extern void finish_file	(void);

+
 /* These macros provide convenient access to the various _STMT nodes.  */

 /* Nonzero if this statement should be considered a full-expression,
--- a/gcc/c-objc-common.h
+++ b/gcc/c-objc-common.h
@ -117,6 +117,8 @@ extern void c_initialize_diagnostics (diagnostic_context *);
 #define LANG_HOOKS_TYPE_PROMOTES_TO c_type_promotes_to
 #undef LANG_HOOKS_REGISTER_BUILTIN_TYPE
 #define LANG_HOOKS_REGISTER_BUILTIN_TYPE c_register_builtin_type
+#undef LANG_HOOKS_TO_TARGET_CHARSET
+#define LANG_HOOKS_TO_TARGET_CHARSET c_common_to_target_charset

 /* The C front end's scoping structure is very different from
   that expected by the language-independent code; it is best
--- a/gcc/c-pretty-print.c
+++ b/gcc/c-pretty-print.c
@ -712,50 +712,37 @@ pp_c_function_definition (c_pretty_printer *pp, tree t)

 /* Expressions.  */

-/* Print out a c-char.  */
+/* Print out a c-char.  This is called solely for characters which are
+   in the *target* execution character set.  We ought to convert them
+   back to the *host* execution character set before printing, but we
+   have no way to do this at present.  A decent compromise is to print
+   all characters as if they were in the host execution character set,
+   and not attempt to recover any named escape characters, but render
+   all unprintables as octal escapes.  If the host and target character
+   sets are the same, this produces relatively readable output.  If they
+   are not the same, strings may appear as gibberish, but that's okay
+   (in fact, it may well be what the reader wants, e.g. if they are looking
+   to see if conversion to the target character set happened correctly).
+
+   A special case: we need to prefix \, ", and ' with backslashes.  It is
+   correct to do so for the *host*'s \, ", and ', because the rest of the
+   file appears in the host character set.  */

 static void
 pp_c_char (c_pretty_printer *pp, int c)
 {
+  if (ISPRINT (c))
+    {
      switch (c)
 	{
-    case TARGET_NEWLINE:
-      pp_string (pp, "\\n");
-      break;
-    case TARGET_TAB:
-      pp_string (pp, "\\t");
-      break;
-    case TARGET_VT:
-      pp_string (pp, "\\v");
-      break;
-    case TARGET_BS:
-      pp_string (pp, "\\b");
-      break;
-    case TARGET_CR:
-      pp_string (pp, "\\r");
-      break;
-    case TARGET_FF:
-      pp_string (pp, "\\f");
-      break;
-    case TARGET_BELL:
-      pp_string (pp, "\\a");
-      break;
-    case '\\':
-      pp_string (pp, "\\\\");
-      break;
-    case '\'':
-      pp_string (pp, "\\'");
-      break;
-    case '\"':
-      pp_string (pp, "\\\"");
-      break;
-    default:
-      if (ISPRINT (c))
-	pp_character (pp, c);
+	case '\\': pp_string (pp, "\\\\"); break;
+	case '\'': pp_string (pp, "\\\'"); break;
+	case '\"': pp_string (pp, "\\\""); break;
+	default:   pp_character (pp, c);
+	}
+    }
  else
    pp_scalar (pp, "\\%03o", (unsigned) c);
-      break;
-    }
 }

 /* Print out a STRING literal.  */
@ -785,7 +772,7 @@ pp_c_integer_constant (c_pretty_printer *pp, tree i)
    {
      if (tree_int_cst_sgn (i) < 0)
        {
-          pp_c_char (pp, '-');
+          pp_character (pp, '-');
          i = build_int_cst_wide (NULL_TREE,
 				  -TREE_INT_CST_LOW (i),
 				  ~TREE_INT_CST_HIGH (i)
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@ -8657,8 +8657,14 @@ int_log2 (HOST_WIDE_INT power)
  return shift;
 }

-/* Output a .ascii pseudo-op, keeping track of lengths.  This is because
-   /bin/as is horribly restrictive.  */
+/* Output a .ascii pseudo-op, keeping track of lengths.  This is
+   because /bin/as is horribly restrictive.  The judgement about
+   whether or not each character is 'printable' (and can be output as
+   is) or not (and must be printed with an octal escape) must be made
+   with reference to the *host* character set -- the situation is
+   similar to that discussed in the comments above pp_c_char in
+   c-pretty-print.c.  */
+
 #define MAX_ASCII_LEN 51

 void
@ -8679,48 +8685,13 @@ output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
 	  len_so_far = 0;
 	}

-      switch (c)
+      if (ISPRINT (c))
+	{
+	  if (c == '\\' || c == '\"')
 	    {
-	case TARGET_TAB:
-	  fputs ("\\t", stream);
-	  len_so_far += 2;
-	  break;
-
-	case TARGET_FF:
-	  fputs ("\\f", stream);
-	  len_so_far += 2;
-	  break;
-
-	case TARGET_BS:
-	  fputs ("\\b", stream);
-	  len_so_far += 2;
-	  break;
-
-	case TARGET_CR:
-	  fputs ("\\r", stream);
-	  len_so_far += 2;
-	  break;
-
-	case TARGET_NEWLINE:
-	  fputs ("\\n", stream);
-	  c = p [i + 1];
-	  if ((c >= ' ' && c <= '~')
-	      || c == TARGET_TAB)
-	    /* This is a good place for a line break.  */
-	    len_so_far = MAX_ASCII_LEN;
-	  else
-	    len_so_far += 2;
-	  break;
-
-	case '\"':
-	case '\\':
 	      putc ('\\', stream);
 	      len_so_far++;
-	  /* Drop through.  */
-
-	default:
-	  if (c >= ' ' && c <= '~')
-	    {
+	    }
 	  putc (c, stream);
 	  len_so_far++;
 	}
@ -8729,8 +8700,6 @@ output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
 	  fprintf (stream, "\\%03o", c);
 	  len_so_far += 4;
 	}
-	  break;
-	}
    }

  fputs ("\"\n", stream);
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@ -5135,48 +5135,13 @@ mips_output_ascii (FILE *stream, const char *string_param, size_t len,
    {
      register int c = string[i];

-      switch (c)
+      if (ISPRINT (c))
+	{
+	  if (c == '\\' || c == '\"')
 	    {
-	case '\"':
-	case '\\':
 	      putc ('\\', stream);
-	  putc (c, stream);
-	  cur_pos += 2;
-	  break;
-
-	case TARGET_NEWLINE:
-	  fputs ("\\n", stream);
-	  if (i+1 < len
-	      && (((c = string[i+1]) >= '\040' && c <= '~')
-		  || c == TARGET_TAB))
-	    cur_pos = 32767;		/* break right here */
-	  else
-	    cur_pos += 2;
-	  break;
-
-	case TARGET_TAB:
-	  fputs ("\\t", stream);
-	  cur_pos += 2;
-	  break;
-
-	case TARGET_FF:
-	  fputs ("\\f", stream);
-	  cur_pos += 2;
-	  break;
-
-	case TARGET_BS:
-	  fputs ("\\b", stream);
-	  cur_pos += 2;
-	  break;
-
-	case TARGET_CR:
-	  fputs ("\\r", stream);
-	  cur_pos += 2;
-	  break;
-
-	default:
-	  if (c >= ' ' && c < 0177)
-	    {
+	      cur_pos++;
+	    }
 	  putc (c, stream);
 	  cur_pos++;
 	}
@ -5185,7 +5150,6 @@ mips_output_ascii (FILE *stream, const char *string_param, size_t len,
 	  fprintf (stream, "\\%03o", c);
 	  cur_pos += 4;
 	}
-	}

      if (cur_pos > 72 && i+1 < len)
 	{
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@ -1,3 +1,9 @@
+2005-02-20  Zack Weinberg  <zack@codesourcery.com>
+
+	PR 18785
+	* cp-objcp-common.h (LANG_HOOKS_TO_TARGET_CHARSET): Set to
+	c_common_to_target_charset.  Delete bogus comment.
+
 2005-02-18  Richard Henderson  <rth@redhat.com>

 	PR libstdc++/10606
--- a/gcc/cp/cp-objcp-common.h
+++ b/gcc/cp/cp-objcp-common.h
@ -159,6 +159,8 @@ extern tree objcp_tsubst_copy_and_build (tree, tree, tsubst_flags_t,
 #define LANG_HOOKS_TYPE_PROMOTES_TO cxx_type_promotes_to
 #undef LANG_HOOKS_REGISTER_BUILTIN_TYPE
 #define LANG_HOOKS_REGISTER_BUILTIN_TYPE c_register_builtin_type
+#undef LANG_HOOKS_TO_TARGET_CHARSET
+#define LANG_HOOKS_TO_TARGET_CHARSET c_common_to_target_charset
 #undef LANG_HOOKS_GIMPLIFY_EXPR
 #define LANG_HOOKS_GIMPLIFY_EXPR cp_gimplify_expr

--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@ -36,19 +36,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 		  obstack_chunk_alloc,			\
 		  obstack_chunk_free)

-/* Define default standard character escape sequences.  */
-#ifndef TARGET_BELL
-#  define TARGET_BELL 007
-#  define TARGET_BS 010
-#  define TARGET_CR 015
-#  define TARGET_DIGIT0 060
-#  define TARGET_ESC 033
-#  define TARGET_FF 014
-#  define TARGET_NEWLINE 012
-#  define TARGET_TAB 011
-#  define TARGET_VT 013
-#endif
-
 /* Store in OUTPUT a string (made with alloca) containing an
   assembler-name for a local static variable or function named NAME.
   LABELNO is an integer which is different for each call.  */
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@ -31,7 +31,6 @@ through the macros defined in the @file{.h} file.
 * Per-Function Data::   Defining data structures for per-function information.
 * Storage Layout::      Defining sizes and alignments of data.
 * Type Layout::         Defining sizes and properties of basic user data types.
-* Escape Sequences::    Defining the value of target character escape sequences
 * Registers::           Naming and describing the hardware registers.
 * Register Classes::    Defining the classes of hardware registers.
 * Stack and Calling::   Defining which way the stack grows and by how much.
@ -1816,42 +1815,6 @@ specified by @code{TARGET_VTABLE_ENTRY_ALIGN}), set this to the number
 of words in each data entry.
@end defmac

-@node Escape Sequences
-@section Target Character Escape Sequences
-@cindex escape sequences
-
-By default, GCC assumes that the C character escape sequences and other
-characters take on their ASCII values for the target.  If this is not
-correct, you must explicitly define all of the macros below.  All of
-them must evaluate to constants; they are used in @code{case}
-statements.
-
-@findex TARGET_BELL
-@findex TARGET_BS
-@findex TARGET_CR
-@findex TARGET_DIGIT0
-@findex TARGET_ESC
-@findex TARGET_FF
-@findex TARGET_NEWLINE
-@findex TARGET_TAB
-@findex TARGET_VT
-@multitable {@code{TARGET_NEWLINE}} {Escape} {ASCII character}
-@item Macro                 @tab Escape             @tab ASCII character
-@item @code{TARGET_BELL}    @tab @kbd{\a}           @tab @code{07}, @code{BEL}
-@item @code{TARGET_BS}      @tab @kbd{\b}           @tab @code{08}, @code{BS}
-@item @code{TARGET_CR}      @tab @kbd{\r}           @tab @code{0D}, @code{CR}
-@item @code{TARGET_DIGIT0}  @tab @kbd{0}            @tab @code{30}, @code{ZERO}
-@item @code{TARGET_ESC}     @tab @kbd{\e}, @kbd{\E} @tab @code{1B}, @code{ESC}
-@item @code{TARGET_FF}      @tab @kbd{\f}           @tab @code{0C}, @code{FF}
-@item @code{TARGET_NEWLINE} @tab @kbd{\n}           @tab @code{0A}, @code{LF}
-@item @code{TARGET_TAB}     @tab @kbd{\t}           @tab @code{09}, @code{HT}
-@item @code{TARGET_VT}      @tab @kbd{\v}           @tab @code{0B}, @code{VT}
-@end multitable
-
-@noindent
-Note that the @kbd{\e} and @kbd{\E} escapes are GNU extensions, not
-part of the C standard.
-
@node Registers
@section Register Usage
@cindex register usage
--- a/gcc/langhooks-def.h
+++ b/gcc/langhooks-def.h
@ -68,6 +68,7 @@ extern bool lhd_decl_ok_for_sibcall (tree);
 extern const char *lhd_comdat_group (tree);
 extern tree lhd_expr_size (tree);
 extern size_t lhd_tree_size (enum tree_code);
+extern HOST_WIDE_INT lhd_to_target_charset (HOST_WIDE_INT);

 /* Declarations of default tree inlining hooks.  */
 extern tree lhd_tree_inlining_walk_subtrees (tree *, int *, walk_tree_fn,
@ -122,6 +123,7 @@ extern int lhd_gimplify_expr (tree *, tree *, tree *);
 #define LANG_HOOKS_TREE_SIZE		lhd_tree_size
 #define LANG_HOOKS_TYPES_COMPATIBLE_P	lhd_types_compatible_p
 #define LANG_HOOKS_BUILTIN_FUNCTION	builtin_function
+#define LANG_HOOKS_TO_TARGET_CHARSET	lhd_to_target_charset

 #define LANG_HOOKS_FUNCTION_INIT	lhd_do_nothing_f
 #define LANG_HOOKS_FUNCTION_FINAL	lhd_do_nothing_f
@ -285,6 +287,7 @@ extern tree lhd_make_node (enum tree_code);
  LANG_HOOKS_GET_CALLEE_FNDECL, \
  LANG_HOOKS_PRINT_ERROR_FUNCTION, \
  LANG_HOOKS_EXPR_SIZE, \
+  LANG_HOOKS_TO_TARGET_CHARSET, \
  LANG_HOOKS_ATTRIBUTE_TABLE, \
  LANG_HOOKS_COMMON_ATTRIBUTE_TABLE, \
  LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE, \
--- a/gcc/langhooks.c
+++ b/gcc/langhooks.c
@ -546,3 +546,9 @@ lhd_make_node (enum tree_code code)
 {
  return make_node (code);
 }
+
+HOST_WIDE_INT
+lhd_to_target_charset (HOST_WIDE_INT c)
+{
+  return c;
+}
--- a/gcc/langhooks.h
+++ b/gcc/langhooks.h
@ -374,6 +374,15 @@ struct lang_hooks
     semantics in cases that it doesn't want to handle specially.  */
  tree (*expr_size) (tree);

+  /* Convert a character from the host's to the target's character
+     set.  The character should be in what C calls the "basic source
+     character set" (roughly, the set of characters defined by plain
+     old ASCII).  The default is to return the character unchanged,
+     which is correct in most circumstances.  Note that both argument
+     and result should be sign-extended under -fsigned-char,
+     zero-extended under -fno-signed-char.  */
+  HOST_WIDE_INT (*to_target_charset) (HOST_WIDE_INT);
+
  /* Pointers to machine-independent attribute tables, for front ends
     using attribs.c.  If one is NULL, it is ignored.  Respectively, a
     table of attributes specific to the language, a table of
--- a/gcc/system.h
+++ b/gcc/system.h
@ -660,7 +660,8 @@ extern void fancy_abort (const char *, int, const char *) ATTRIBUTE_NORETURN;
 	PUT_SDB_SRC_FILE STABS_GCC_MARKER DBX_OUTPUT_FUNCTION_END	   \
 	DBX_OUTPUT_GCC_MARKER DBX_FINISH_SYMBOL SDB_GENERATE_FAKE	   \
 	NON_SAVING_SETJMP TARGET_LATE_RTL_PROLOGUE_EPILOGUE		   \
-	CASE_DROPS_THROUGH
+	CASE_DROPS_THROUGH TARGET_BELL TARGET_BS TARGET_CR TARGET_DIGIT0   \
+        TARGET_ESC TARGET_FF TARGET_NEWLINE TARGET_TAB TARGET_VT

 /* Hooks that are no longer used.  */
 #pragma GCC poison LANG_HOOKS_FUNCTION_MARK LANG_HOOKS_FUNCTION_FREE	\
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,8 @@
+2005-02-20  Zack Weinberg  <zack@codesourcery.com>
+
+	PR 18785
+	* gcc.dg/charset/builtin1.c: New test.
+
 2005-02-19  Volker Reichelt  <reichelt@igpm.rwth-aachen.de>

 	PR c++/19299
--- a/gcc/testsuite/gcc.dg/charset/builtin1.c
+++ b/gcc/testsuite/gcc.dg/charset/builtin1.c
@ -0,0 +1,25 @@
+/* isdigit(c) can be optimized to ((unsigned)c) - '0' <= 9, but only if
+   we know the correct value of '0'.  PR 18785.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline -fexec-charset=IBM-1047" } */
+
+extern int isdigit(int);
+extern void abort(void);
+
+static int str1(void) { return '1'; }
+static int strA(void) { return 'A'; }
+
+int
+main(void)
+{
+  if (!isdigit('1'))
+    abort();
+  if (isdigit('A'))
+    abort();
+  if (!isdigit(str1()))
+    abort();
+  if (isdigit(strA()))
+    abort();
+  return 0;
+}
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@ -1,3 +1,10 @@
+2005-02-20  Zack Weinberg  <zack@codesourcery.com>
+
+	PR 18785
+	* charset.c (LAST_POSSIBLY_BASIC_SOURCE_CHAR): New helper macro.
+	(cpp_host_to_exec_charset): New function.
+	* include/cpplib.h: Declare cpp_host_to_exec_charset.
+
 2005-02-19  Devang Patel  <dpatel@apple.com>

 	* charset.c (_cpp_convert_input): Check '\r' before inserting
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@ -81,8 +81,10 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */

 #if HOST_CHARSET == HOST_CHARSET_ASCII
 #define SOURCE_CHARSET "UTF-8"
+#define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0x7e
 #elif HOST_CHARSET == HOST_CHARSET_EBCDIC
 #define SOURCE_CHARSET "UTF-EBCDIC"
+#define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0xFF
 #else
 #error "Unrecognized basic host character set"
 #endif
@ -714,6 +716,63 @@ _cpp_destroy_iconv (cpp_reader *pfile)
    }
 }

+/* Utility routine for use by a full compiler.  C is a character taken
+   from the *basic* source character set, encoded in the host's
+   execution encoding.  Convert it to (the target's) execution
+   encoding, and return that value.
+
+   Issues an internal error if C's representation in the narrow
+   execution character set fails to be a single-byte value (C99
+   5.2.1p3: "The representation of each member of the source and
+   execution character sets shall fit in a byte.")  May also issue an
+   internal error if C fails to be a member of the basic source
+   character set (testing this exactly is too hard, especially when
+   the host character set is EBCDIC).  */
+cppchar_t
+cpp_host_to_exec_charset (cpp_reader *pfile, cppchar_t c)
+{
+  uchar sbuf[1];
+  struct _cpp_strbuf tbuf;
+
+  /* This test is merely an approximation, but it suffices to catch
+     the most important thing, which is that we don't get handed a
+     character outside the unibyte range of the host character set.  */
+  if (c > LAST_POSSIBLY_BASIC_SOURCE_CHAR)
+    {
+      cpp_error (pfile, CPP_DL_ICE,
+		 "character 0x%lx is not in the basic source character set\n",
+		 (unsigned long)c);
+      return 0;
+    }
+
+  /* Being a character in the unibyte range of the host character set,
+     we can safely splat it into a one-byte buffer and trust that that
+     is a well-formed string.  */
+  sbuf[0] = c;
+
+  /* This should never need to reallocate, but just in case... */
+  tbuf.asize = 1;
+  tbuf.text = xmalloc (tbuf.asize);
+  tbuf.len = 0;
+
+  if (!APPLY_CONVERSION (pfile->narrow_cset_desc, sbuf, 1, &tbuf))
+    {
+      cpp_errno (pfile, CPP_DL_ICE, "converting to execution character set");
+      return 0;
+    }
+  if (tbuf.len != 1)
+    {
+      cpp_error (pfile, CPP_DL_ICE,
+		 "character 0x%lx is not unibyte in execution character set",
+		 (unsigned long)c);
+      return 0;
+    }
+  c = tbuf.text[0];
+  free(tbuf.text);
+  return c;
+}
+
+

 /* Utility routine that computes a mask of the form 0000...111... with
   WIDTH 1-bits.  */
@ -727,8 +786,6 @@ width_to_mask (size_t width)
    return ((size_t) 1 << width) - 1;
 }

-
-
 /* Returns 1 if C is valid in an identifier, 2 if C is valid except at
   the start of an identifier, and 0 if C is not valid in an
   identifier.  We assume C has already gone through the checks of
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@ -659,6 +659,9 @@ extern bool cpp_interpret_string_notranslate (cpp_reader *,
 					      const cpp_string *, size_t,
 					      cpp_string *, bool);

+/* Convert a host character constant to the execution character set.  */
+extern cppchar_t cpp_host_to_exec_charset (cpp_reader *, cppchar_t);
+
 /* Used to register macros and assertions, perhaps from the command line.
   The text is the same as the command line argument.  */
 extern void cpp_define (cpp_reader *, const char *);
@ -743,12 +746,6 @@ cpp_num cpp_num_sign_extend (cpp_num, size_t);
 #define CPP_DL_WARNING_P(l)	(CPP_DL_EXTRACT (l) >= CPP_DL_WARNING \
 				 && CPP_DL_EXTRACT (l) <= CPP_DL_PEDWARN)

-/* N.B. The error-message-printer prototypes have not been nicely
-   formatted because exgettext needs to see 'msgid' on the same line
-   as the name of the function in order to work properly.  Only the
-   string argument gets a name in an effort to keep the lines from
-   getting ridiculously oversized.  */
-
 /* Output a diagnostic of some kind.  */
 extern void cpp_error (cpp_reader *, int, const char *msgid, ...)
  ATTRIBUTE_PRINTF_3;