cobol: Mainly extends compilation and execution in finternal-ebcdic.

We expanded our extended testing regime to execute many testcases in
EBCDIC mode as well as in ASCII. This exposed hundreds of problems in
both compilation (where conversions must be made between the ASCII
source code and the EBCDIC execution environment) and in run-time
functionality, where results from calls to system routines and internal
calculations that must be done in ASCII have to be converted to EBCDIC.

These changes also switch to using FIXED_WIDE_INT(128) instead of
REAL_VALUE_TYPE when initializing fixed-point COBOL variable types.
This provides for accurate initialization up to 37 digits, instead of
losing accuracy after 33 digits.

These changes also support the implementation of the COBOL DELETE FILE
(Format 2) statement.

These changes also introduce expanded support for specifying character
encodings, including support for locales.

co-authored-by: Robert Dubner <rdubner@symas.com>
co-authored-by: James K. Lowden <jklowden@cobolworx.com>

gcc/cobol/ChangeLog:

	* Make-lang.in: Repair documentation generation.
	* cdf.y: Changes to tokens.
	* cobol1.cc (cobol_langhook_handle_option): Add comment.
	* genapi.cc (function_pointer_from_name): Use data.original() for
	function name.
	(parser_initialize_programs): Likewise.
	(cobol_compare): Make sure encodings of comparands are the same.
	(move_tree): Change name of DEFAULT_SOURCE_ENCODING macro.
	(parser_enter_program): Typo.
	(psa_FldLiteralN): Break out dirty_to_binary() support routine.
	(dirty_to_binary): Likewise.
	(parser_alphabet): Rename 'alphabet' to 'collation_sequence'.
	(parser_allocate): Change wsclear() to be uint32_t instead of char.
	(parser_label_label): Formatting.
	(parser_label_goto): Likewise.
	(get_the_filename): Breakout get_the_filename(), which handles
	encoding.
	(parser_file_open): Likewise.
	(set_up_delete_file_label): Implement DELETE FILE (Format 2).
	(parser_file_delete_file): Likewise.
	(parser_file_delete_on_exception): Likewise.
	(parser_file_delete_not_exception): Likewise.
	(parser_file_delete_end): Likewise.
	(parser_call): Use data.original().
	(parser_entry): Use data.original().
	(mh_source_is_literalN): Convert from
	sourceref.field->codeset.encoding.
	(binary_initial_from_float128): Change to "binary_initial".
	(binary_initial): Calculate in FIXED_WIDE_INT(128) instead of
	REAL_VALUE_TYPE.
	(digits_from_int128): New routine uses binary_initial.
	(digits_from_float128): Removed.  Kept as comment for reference.
	(initial_from_initial): Use binary_initial.
	(actually_create_the_static_field): Use correct encoding.
	(parser_symbol_add): Likewise.
	* genapi.h (parser_file_delete_file): Implement FILE DELETE.
	(parser_file_delete_on_exception): Implement FILE DELETE.
	(parser_file_delete_not_exception): Implement FILE DELETE.
	(parser_file_delete_end): Implement FILE DELETE.
	* genmath.cc: Include charmaps.h.
	* genutil.cc (get_literal_string):  Change name of
	DEFAULT_SOURCE_ENCODING macro.
	* parse.y: Token changes; numerous changes in support of encoding;
	support for DELETE FILE.
	* parse_ante.h (name_of): Use data.original().
	(class prog_descr_t): Support of locales.
	(current_options): Formatting.
	(current_encoding):  Formatting.
	(current_program_index): Formatting.
	(current_section): Formatting.
	(current_paragraph): Formatting.
	(is_integer_literal): Use correct encoding.
	(value_encoding_check): Handle encoding changes.
	(alphabet_add): Likewise.
	(data_division_ready): Likewise.
	* scan.l: Use data.original().
	* show_parse.h: Use correct encoding.
	* symbols.cc (elementize): Likewise.
	(symbol_elem_cmp): Handle locale.
	(struct symbol_elem_t): Likewise.
	(symbol_locale): Likewise.
	(field_str): Change DEFAULT_SOURCE_ENCODING macro name.
	(symbols_alphabet_set): Formatting.
	(symbols_update): Modify consistency checks.
	(symbol_locale_add): Locale support.
	(cbl_locale_t::cbl_locale_t): Locale support.
	(cbl_alphabet_t::cbl_alphabet_t): New structure.
	(cbl_alphabet_t::reencode): Formatting.
	(cbl_alphabet_t::assign): Change name of collation_sequence.
	(cbl_alphabet_t::also): Likewise.
	(new_literal_add): Anticipate the need for four-byte characters.
	(guess_encoding): Eliminate.
	(cbl_field_t::internalize): Refine conversion of data.initial to
	specified encoding.
	* symbols.h (enum symbol_type_t): Add SymLocale.
	(struct cbl_field_data_t): Incorporate data.orig.
	(struct cbl_field_t): Likewise.
	(struct cbl_delete_file_t): New structure.
	(struct cbl_label_t): Incorporate cbl_delete_file_t.
	(struct cbl_locale_t): Support for locale.
	(hex_decode): Comment.
	(struct cbl_alphabet_t): Incorporate locale; change variable name
	to collation_sequence.
	(struct symbol_elem_t): Incorporate locale.
	(cbl_locale_of): Likewise.
	(cbl_alphabet_of): Likewise.
	(symbol_locale_add): Likewise.
	(wsclear): Type is now uint32_t instead of char.
	* util.cc (symbol_type_str):  Incorporate locale.
	(cbl_field_t::report_invalid_initial_value): Change test so that
	pure PIC A() variables are limited to [a-zA-Z] and space.
	(valid_move): Use DEFAULT_SOURCE_ENCODING macro.
	(cobol_filename): Formatting.

libgcobol/ChangeLog:

	* charmaps.cc (__gg__encoding_iconv_type): Eliminate trailing
	'/' characters from encoding names.
	(__gg__get_charmap): Switch to DEFAULT_SOURCE_ENCODING macro name.
	* charmaps.h (DEFAULT_CHARMAP_SOURCE): Likewise.
	(DEFAULT_SOURCE_ENCODING): Likewise.
	(class charmap_t): Enhance constructor.
	* encodings.h (valid_encoding): New routine.
	* gcobolio.h (enum cblc_file_prior_op_t): Support DELETE FILE.
	* gfileio.cc (get_filename): Likewise.
	(__io__file_remove): Likewise.
	(__gg__file_reopen): Likewise.
	(__io__file_open): Likewise.
	(gcobol_fileops): Likewise.
	(__gg__file_delete): Likewise.
	(__gg__file_remove): Likewise.
	* intrinsic.cc (get_all_time):  Switch to DEFAULT_SOURCE_ENCODING
	macro name.
	(ftime_replace): Support ASCII/EBCDIC encoding.
	(__gg__current_date): Likewise.
	(__gg__max): Likewise.
	(__gg__lower_case): Likewise.
	(numval): Likewise.
	(numval_c): Likewise.
	(__gg__upper_case): Likewise.
	(__gg__when_compiled): Likewise.
	(gets_int): Likewise.
	(gets_nanoseconds): Likewise.
	(fill_cobol_tm): Likewise.
	(floating_format_tester): Likewise.
	(__gg__numval_f): Likewise.
	(__gg__test_numval_f): Likewise.
	(iscasematch): Likewise.
	(strcasestr): Likewise.
	(strcaselaststr): Likewise.
	(__gg__substitute): Likewise.
	(__gg__locale_compare): Support for locale.
	(__gg__locale_date): Likewise.
	(__gg__locale_time): Likewise.
	(__gg__locale_time_from_seconds): Likewise.
	* libgcobol.cc (class ec_status_t): Support for encoding.
	(int128_to_field): Likewise.
	(__gg__dirty_to_float): Likewise.
	(format_for_display_internal): Likewise.
	(get_float128): Likewise.
	(compare_field_class): Likewise.
	(__gg__compare_2): Likewise.
	(init_var_both): Likewise.
	(__gg__move): Likewise.
	(display_both): Likewise.
	(is_numeric_display_numeric): Likewise.
	(accept_envar): Likewise.
	(__gg__get_argv): Likewise.
	(__gg__unstring): Likewise.
	(__gg__check_fatal_exception): Likewise.
	(__gg__adjust_encoding): Likewise.
	(__gg__func_exception_location): Likewise.
	(__gg__func_exception_statement): Likewise.
	(__gg__func_exception_status): Likewise.
	(__gg__func_exception_file): Likewise.
	(__gg__just_mangle_name): Likewise.
	(__gg__function_handle_from_name): Likewise.
	(get_the_byte): Likewise.
	(__gg__module_name): Likewise.
	(__gg__accept_arg_value): Likewise.
	* xmlparse.cc (fatalError): Formatting.
	(setDocumentLocator): Formatting.
	(xmlchar_of): Formatting.
	(xmlParserErrors_str): Formatting.
This commit is contained in:
Robert Dubner 2025-11-06 07:26:18 -05:00
parent e82a29a9a7
commit 1139d69306
22 changed files with 1764 additions and 838 deletions

View File

@ -330,7 +330,7 @@ cobol.srcpdf: gcobol.pdf gcobol-io.pdf
ln $^ $(srcdir)/cobol/
gcobol.pdf: $(srcdir)/cobol/gcobol.1
groff -mdoc -T pdf $^ > $@~
groff -mdoc -t -T pdf $^ > $@~
@mv $@~ $@
gcobol-io.pdf: $(srcdir)/cobol/gcobol.3
groff -mdoc -T pdf $^ > $@~

View File

@ -244,21 +244,21 @@ apply_cdf_turn( const exception_turn_t& turn ) {
%type <boolean> DEFINED
%token OTHER 699 PARAMETER_kw 369 "PARAMETER"
%token OFF 688 OVERRIDE 370
%token THRU 949
%token TRUE_kw 814 "True"
%token THRU 950
%token TRUE_kw 815 "True"
%token CALL_COBOL 393 "CALL"
%token CALL_VERBATIM 394 "CALL (as C)"
%token TURN 816 CHECKING 497 LOCATION 650 ON 690 WITH 843
%token TURN 817 CHECKING 497 LOCATION 650 ON 690 WITH 844
%left OR 950
%left AND 951
%right NOT 952
%left '<' '>' '=' NE 953 LE 954 GE 955
%left OR 951
%left AND 952
%right NOT 953
%left '<' '>' '=' NE 954 LE 955 GE 956
%left '-' '+'
%left '*' '/'
%right NEG 957
%right NEG 958
%define api.prefix {ydf}
%define api.token.prefix{YDF_}

View File

@ -365,6 +365,7 @@ cobol_langhook_handle_option (size_t scode,
return true;
case OPT_fdefaultbyte:
// cobol_default_byte is an unsigned ing
wsclear(cobol_default_byte);
return true;

File diff suppressed because it is too large Load Diff

View File

@ -400,6 +400,12 @@ parser_file_rewrite( cbl_file_t *file, cbl_field_t *field,
void
parser_file_delete( cbl_file_t *file, bool sequentially );
void parser_file_delete_file( cbl_label_t *name,
std::vector<cbl_file_t*> filenames );
void parser_file_delete_on_exception( cbl_label_t *name );
void parser_file_delete_not_exception( cbl_label_t *name );
void parser_file_delete_end( cbl_label_t *name );
#if condition_lists
struct cbl_conditional_t {
cbl_field_t *tgt;

View File

@ -42,6 +42,7 @@
#include "gengen.h"
#include "structs.h"
#include "../../libgcobol/gcobolio.h"
#include "../../libgcobol/charmaps.h"
#include "show_parse.h"
void

View File

@ -1744,7 +1744,7 @@ get_literal_string(cbl_field_t *field)
char *buffer = static_cast<char *>(xcalloc(1, buffer_length));
size_t charsout;
const char *converted = __gg__iconverter(DEFAULT_CHARMAP_SOURCE,
const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
field->codeset.encoding,
field->data.initial,
field->data.capacity,

View File

@ -51,7 +51,7 @@
accept_envar_e,
};
struct collating_an_t {
struct coll_alphanat_t {
const char *alpha, *national;
};
@ -575,7 +575,7 @@ class locale_tgt_t {
RD RECORD RECORDING RECORDS RECURSIVE
REDEFINES REEL REFERENCE RELATIVE REM REMAINDER REMARKS
REMOVAL RENAMES REPLACE REPLACING REPORT REPORTING REPORTS
REPOSITORY RERUN RESERVE RESTRICTED RESUME
REPOSITORY RERUN RESERVE RESTRICTED RESUME RETRY
REVERSE REVERSED REWIND RF RH RIGHT ROUNDED RUN
SAME SCREEN SD
@ -702,8 +702,8 @@ class locale_tgt_t {
%type <number> open_io alphabet_etc
%type <special_type> device_name
%type <string> numed context_word ctx_name locale_spec
%type <collating_sequences> collating_sequences collating_ans
%type <collating_name> collating_an
%type <char_class_locales> char_class_locales coll_alphanats
%type <collating_name> coll_alphanat
%type <literal> namestr alphabet_lit program_as repo_as
%type <field> perform_cond kind_of_name
%type <refer> alloc_ret
@ -738,6 +738,9 @@ class locale_tgt_t {
relative_key_clause reserve_clause sharing_clause
%type <file> filename read_body write_body delete_body
%type <label> delete_file_body
%type <error> delete_error delete_except delete_excepts
%type <file> start_impl start_cond start_body
%type <rewrite_t> rewrite_body
%type <min_max> record_vary rec_contains from_to record_desc
@ -833,6 +836,7 @@ class locale_tgt_t {
global is_global anycase backward
end_display
exh_changed exh_named
override
%type <number> mistake globally first_last
%type <io_mode> io_mode
@ -874,6 +878,7 @@ class locale_tgt_t {
%type <opt_init_sect> opt_init_sect
%type <number> opt_init_value
%type <number> locale_current loc_category user_default
%type <string> locale_name
%type <token_list> loc_categories locale_tgt
%type <opt_round> rounded round_between rounded_type rounded_mode
%type <opt_arith> opt_arith_type
@ -901,7 +906,7 @@ class locale_tgt_t {
struct { YYLTYPE loc; int token; literal_t name; } prog_end;
struct { int token; special_name_t id; } special_type;
struct { char locale_type; const char * name; } locale_phrase;
collating_an_t collating_sequences;
coll_alphanat_t char_class_locales;
struct collating_name_t { int token; const char *name; } collating_name;
struct { size_t isym; cbl_encoding_t encoding; } codeset;
struct { cbl_field_type_t type;
@ -2371,6 +2376,23 @@ config_paragraphs: config_paragraph
config_paragraph:
SPECIAL_NAMES '.'
| SPECIAL_NAMES '.' special_names '.'
{
std::reverse_iterator<symbol_elem_t *>
p(symbols_end()),
pend(symbols_begin(PROGRAM));
for( ++p; p != pend; p++ ) {
if( p->type == SymAlphabet ) {
const auto& alphabet = *cbl_alphabet_of(&*p);
if( alphabet.encoding == no_encoding_e ) {
assert(alphabet.locale != 0 );
const auto& missing = *cbl_locale_of(symbol_at(alphabet.locale));
error_msg(alphabet.loc,
"ALPHABET %qs references LOCALE %qs, which is not defined",
alphabet.name, missing.name);
}
}
}
}
| SOURCE_COMPUTER '.'
| SOURCE_COMPUTER '.' NAME '.'
| SOURCE_COMPUTER '.' NAME with_debug '.'
@ -2507,19 +2529,36 @@ with_debug: with DEBUGGING MODE {
;
collations: %empty
| collation_classification
| collation_sequence
| collation_classification collation_sequence
| collation_sequence collation_classification
| char_classification
| collating_sequence
| char_classification collating_sequence
| collating_sequence char_classification
;
collation_classification:
character CLASSIFICATION collating_sequences[seq]
char_classification:
character CLASSIFICATION char_class_locales[seq]
{
warn_msg(@seq, "CHARACTER CLASSIFICATION ignored");
if( $seq.alpha ) {
auto e = symbol_locale(PROGRAM, $seq.alpha);
if( !e ) {
error_msg(@seq, "no LOCALE defined as %qs", $seq.alpha);
} else {
auto& encoding = cbl_locale_of(e)->encoding;
current.alpha_encoding(symbol_index(e), encoding);
}
}
if( $seq.national ) {
auto e = symbol_locale(PROGRAM, $seq.national);
if( !e ) {
error_msg(@seq, "no LOCALE defined as %qs", $seq.national);
} else {
auto& encoding = cbl_locale_of(e)->encoding;
current.national_encoding(symbol_index(e), encoding);
}
}
}
;
collation_sequence:
program_kw collating SEQUENCE collating_sequences[seq]
collating_sequence:
program_kw collating SEQUENCE char_class_locales[seq]
{
if( !current.collating_sequence($seq.alpha) ) {
error_msg(@seq, "collating sequence already defined as '%s'",
@ -2529,20 +2568,20 @@ collation_sequence:
}
;
collating_sequences:
char_class_locales:
is NAME[name] {
$$.alpha = $name;
$$.national = nullptr;
}
| collating_ans { $$ = $1; }
| coll_alphanats { $$ = $1; }
;
collating_ans: collating_an[encoding] {
$$ = collating_an_t();
coll_alphanats: coll_alphanat[encoding] {
$$ = coll_alphanat_t();
const char **pname =
$encoding.token == ALPHANUMERIC? &$$.alpha : &$$.national;
*pname = $encoding.name;
}
| collating_ans collating_an[encoding]
| coll_alphanats coll_alphanat[encoding]
{
const char **pname =
$encoding.token == ALPHANUMERIC? &$$.alpha : &$$.national;
@ -2553,7 +2592,7 @@ collating_ans: collating_an[encoding] {
*pname = $encoding.name;
}
;
collating_an: for alphanational is locale_phrase[locale] {
coll_alphanat: for alphanational is locale_phrase[locale] {
$$.token = $alphanational;
$$.name = $locale.name;
if( ! $locale.name ) {
@ -2568,7 +2607,6 @@ collating_an: for alphanational is locale_phrase[locale] {
keyword_str($$.token),
locale_name);
}
warn_msg(@locale, "LOCALE phrase ignored");
}
;
@ -2643,9 +2681,20 @@ special_name: dev_mnemonic
{
symbol_decimal_point_set(',');
}
| LOCALE NAME is locale_spec[spec] {
current.locale($NAME, $spec);
cbl_unimplementedw("sorry, unimplemented: LOCALE %qs", $spec);
| LOCALE NAME is locale_spec[spec]
{
cbl_locale_t locale($NAME, $spec);
if( locale.encoding == no_encoding_e ) {
error_msg(@NAME, "invalid iconv LOCALE name %qs", $spec);
YYERROR;
}
if( locale.encoding == UTF8_e ) {
cbl_unimplemented("UTF-8");
YYERROR;
}
if( ! current.locale_add(locale) ) {
error_msg(@NAME, "%qs already defined as LOCALE name", $NAME);
}
}
;
| upsi
@ -2655,6 +2704,8 @@ special_name: dev_mnemonic
}
;
locale_spec: NAME { $$ = $1; }
| UTF_8 { static char s[] ="UTF-8"; $$ = s; }
| UTF_16 { static char s[] ="UTF-16"; $$ = s; }
| LITERAL { $$ = string_of($1); }
;
@ -2746,14 +2797,16 @@ device_name: SYSIN { $$.token = SYSIN; $$.id = SYSIN_e; }
alphabet_name: STANDARD_ALPHABET { $$ = alphabet_add(@1, CP1252_e); }
| NATIVE { $$ = alphabet_add(@1, EBCDIC_e); }
| EBCDIC { $$ = alphabet_add(@1, EBCDIC_e); }
| LOCALE ctx_name
| LOCALE locale_name[name]
{
auto e = symbol_alphabet(PROGRAM, $ctx_name);
auto e = symbol_locale(PROGRAM, $name);
if( !e ) {
error_msg(@ctx_name, "no such ALPHABET %qs", $ctx_name);
YYERROR;
}
$$ = cbl_alphabet_of(e);
dbgmsg("no such LOCALE yet %s", $name);
cbl_locale_t locale($name); // locale is named but not defined
e = symbol_locale_add(PROGRAM, &locale);
}
cbl_alphabet_t alphabet( @name, symbol_index(e), $name);
$$ = alphabet_add(alphabet);
}
| alphabet_seqs
{
@ -3592,7 +3645,7 @@ const_value: cce_expr
value78: literalism
{
cbl_field_data_t data = {};
cbl_field_data_t data;
data.capacity = capacity_cast(strlen($1.data));
data.initial = $1.data;
$$.encoding = $1.encoding;
@ -3600,13 +3653,15 @@ value78: literalism
}
| const_value
{
cbl_field_data_t data = {};
cbl_field_data_t data;
data = build_real (float128_type_node, $1);
$$.encoding = current_encoding('A');
$$.data = new cbl_field_data_t(data);
}
| reserved_value[value]
{
const auto field = constant_of(constant_index($value));
$$.encoding = current_encoding('A');
$$.data = new cbl_field_data_t(field->data);
}
@ -3638,6 +3693,7 @@ data_descr1: level_name
field.type = FldLiteralN;
field.data = build_real (float128_type_node, $const_value);
field.data.initial = string_of($const_value);
field.codeset.set();
if( !cdf_value(field.name, cdfval_t($const_value)) ) {
error_msg(@1, "%s was defined by CDF", field.name);
@ -3674,13 +3730,12 @@ data_descr1: level_name
if( !cdf_value(field.name, $lit.data) ) {
error_msg(@1, "%s was defined by CDF", field.name);
}
if( ! field.codeset.valid() ) {
if( ! field.codeset.set(field.codeset.standard_internal.type) ) {
error_msg(@lit, "CONSTANT inconsistent with encoding %s",
cbl_alphabet_t::encoding_str(field.codeset.encoding));
}
if( ! field.codeset.set() ) {
error_msg(@lit, "CONSTANT inconsistent with encoding %s",
cbl_alphabet_t::encoding_str(field.codeset.encoding));
}
value_encoding_check(@lit, $1, $lit.encoding);
value_encoding_check(@lit, $1);
}
| level_name CONSTANT is_global FROM NAME
{
@ -3718,6 +3773,7 @@ data_descr1: level_name
} else {
field.type = FldLiteralN;
field.data.initial = string_of(field.data.value_of());
field.codeset.set($data.encoding);
if( !cdf_value(field.name, field.as_integer()) ) {
yywarn("%s was defined by CDF", field.name);
}
@ -3975,6 +4031,15 @@ data_descr1: level_name
// Verify VALUE
$field->report_invalid_initial_value(@data_clauses);
bool numerical =
$field->type == FldNumericDisplay || is_numeric($field);
if( $field->data.initial && ! numerical ) {
if( normal_value_e == cbl_figconst_of($field->data.initial) ) {
value_encoding_check(@data_clauses, $field);
}
}
// verify REDEFINES
const auto parent = parent_of($field);
if( parent && $field->level == parent->level ) {
@ -4287,14 +4352,16 @@ picture_clause: PIC signed nps[fore] nines nps[aft]
if( field->data.initial != NULL ) {
if( 0 < field->data.capacity &&
field->data.capacity < uint32_t($size) ) {
auto p = blank_pad_initial( field->data.initial,
field->data.capacity, $size );
auto p = blank_pad_initial(field->data.initial,
field->data.capacity, $size );
if( !p ) YYERROR;
field->data.initial = p;
}
}
field->data.capacity = $size;
charmap_t *charmap =
__gg__get_charmap(field->codeset.encoding);
field->data.capacity = $size * charmap->stride();
field->data.picture = NULL;
if( false ) dbgmsg("PIC alphanum_pic[size]:%d: %s",
@ -4708,14 +4775,23 @@ usage_clause1: usage BIT
value_clause: VALUE all LITERAL[lit] {
cbl_field_t *field = current_field();
if( ! field->codeset.set($lit.encoding) ) {
error_msg(@lit, "VALUE inconsistent with encoding %s",
cbl_alphabet_t::encoding_str(field->codeset.encoding));
if( $lit.prefix[0] ) { // not the default encoding
if( ! field->codeset.set($lit.encoding) ) {
error_msg(@lit, "VALUE inconsistent with encoding %s",
cbl_alphabet_t::encoding_str(field->codeset.encoding));
}
} else {
field->codeset.set();
}
if( field->codeset.encoding != $lit.encoding ) {
error_msg(@lit, "PICTURE inconsistent with VALUE %s'%s'",
$lit.prefix, $lit.data);
}
field->data.initial = $lit.data;
field->attr |= literal_attr($lit.prefix);
// The __gg__initialize_data routine needs to know that VALUE is a
// quoted literal. This is critical for NumericEdited variables
field->attr |= quoted_e;
if( field->data.capacity == 0 ) {
@ -4732,7 +4808,6 @@ value_clause: VALUE all LITERAL[lit] {
}
}
}
value_encoding_check(@lit, field, $lit.encoding);
}
| VALUE all cce_expr[value] {
cbl_field_t *field = current_field();
@ -4761,11 +4836,9 @@ value_clause: VALUE all LITERAL[lit] {
| VALUE all reserved_value[value]
{
cbl_field_t *field = current_field();
if( ! field->codeset.valid() ) {
if( ! field->codeset.set(field->codeset.standard_internal.type) ) {
error_msg(@value, "VALUE inconsistent with encoding %s",
cbl_alphabet_t::encoding_str(field->codeset.encoding));
}
if( ! field->codeset.set() ) {
error_msg(@value, "VALUE inconsistent with encoding %s",
cbl_alphabet_t::encoding_str(field->codeset.encoding));
}
if( $value != NULLS ) {
auto fig = constant_of(constant_index($value));
@ -5017,6 +5090,7 @@ typedef_clause: is TYPEDEF strong
error_msg(@2, "%s %s IS TYPEDEF must be level 01",
field->level_str(), field->name);
}
field->codeset.set();
field->attr |= typedef_e;
if( $strong ) field->attr |= strongdef_e;
if( ! current.typedef_add(field) ) {
@ -7007,6 +7081,8 @@ context_word: APPLY { static char s[] ="APPLY";
$$ = s; } // LOCK MODE clause
| MULTIPLE { static char s[] ="MULTIPLE";
$$ = s; } // LOCK ON phrase
| NAT { static char s[] ="NAT";
$$ = s; } // CONVERT function
| NEAREST_AWAY_FROM_ZERO { static char s[] ="NEAREST-AWAY-FROM-ZERO";
$$ = s; } // INTERMEDIATE ROUNDING clause and ROUNDED phrase
| NEAREST_EVEN { static char s[] ="NEAREST-EVEN";
@ -8544,7 +8620,7 @@ advance_by: scalar lines { $$ = $1; } /* BUG: should accept reference */
* number of lines is negative. So, we use the
* negative Number Of The Beast as a PAGE flag.
*/
$$ = new_reference( new_literal("-666") );
$$ = new_reference( new_literal(xstrdup("-666")) );
}
| device_name { $$ = new_reference(literally_one); }
;
@ -8601,7 +8677,33 @@ io_invalid: INVALID key {
delete: delete_impl end_delete
| delete_cond end_delete
| delete_file end_delete
;
delete_file: DELETE delete_file_body[stmt] delete_error[err] {
if( ! $err.on_error ) parser_file_delete_on_exception($stmt);
if( ! $err.not_error ) parser_file_delete_not_exception($stmt);
parser_file_delete_end($stmt);
current.declaratives_evaluate();
}
delete_file_body:
FILE_KW override filenames retry_phrase {
$$ = label_add(@$, LblXml, uniq_label("xfile"));
xml_statements.push($$);
statement_begin(@$, DELETE);
std::vector<cbl_file_t*>
filenames($filenames->files.begin(),
$filenames->files.end() );
parser_file_delete_file( $$, filenames);
}
;
retry_phrase: %empty
| RETRY expr TIMES
| FOR expr SECONDS
| FOREVER {
cbl_unimplemented("DELETE FILE RETRY");
}
;
delete_impl: DELETE delete_body[file]
{
file_delete_args.call_parser_file_delete(true);
@ -8634,6 +8736,63 @@ delete_body: filename[file] record
$$ = $file;
}
;
delete_error: %empty %prec DELETE {
$$.on_error = $$.not_error = nullptr;
}
| delete_excepts %prec DELETE
;
delete_excepts: delete_except[a] statements %prec DELETE
{
assert( $a.on_error || $a.not_error );
assert( ! ($a.on_error && $a.not_error) );
$$ = $a;
}
| delete_excepts[a] delete_except[b] statements %prec DELETE
{
if( $a.on_error && $a.not_error ) {
error_msg(@1, "too many ON ERROR clauses");
YYERROR;
}
// "ON" and "NOT ON" could be reversed, but not duplicated.
if( $a.on_error && $b.on_error ) {
error_msg(@1, "duplicate ON ERROR clauses");
YYERROR;
}
if( $a.not_error && $b.not_error ) {
error_msg(@1, "duplicate NOT ON ERROR clauses");
YYERROR;
}
$$ = $a;
if( $$.on_error ) {
assert($b.not_error);
$$.not_error = $b.not_error;
} else {
assert($b.on_error);
$$.on_error = $b.on_error;
}
}
;
delete_except: EXCEPTION
{
auto xml_stmt = xml_statements.top();
// The value of the pointer no longer matters, only NULL or not.
$$.on_error = $$.not_error = nullptr;
switch($1) {
case EXCEPTION:
$$.on_error = xml_stmt;
parser_file_delete_on_exception(xml_stmt);
break;
case NOT:
$$.not_error = xml_stmt;
parser_file_delete_not_exception(xml_stmt);
break;
default:
gcc_unreachable();
}
}
;
end_delete: %empty %prec DELETE
| END_DELETE
;
@ -10536,7 +10695,9 @@ function_udf: FUNCTION_UDF '(' arg_list[args] ')' {
cbl_ffi_arg_t actual(param.crv, ar);
return actual;
} );
auto name = new_literal(strlen(L->name), L->name, quoted_e);
// Pretend hex-encoded because that means use verbatim.
auto attr = cbl_field_attr_t(quoted_e | hex_encoded_e);
auto name = new_literal(strlen(L->name), L->name, attr);
ast_call( @1, name, $$, args.size(), args.data(), NULL, NULL, true );
}
| FUNCTION_UDF_0 {
@ -10547,8 +10708,11 @@ function_udf: FUNCTION_UDF '(' arg_list[args] ')' {
const auto returning = cbl_field_of(symbol_at(L->returning));
$$ = new_temporary_clone(returning);
$$->data.initial = returning->name; // user's name for the field
auto name = new_literal(strlen(L->name), L->name, quoted_e);
cbl_field_attr_t call_attr
= (cbl_field_attr_t)(quoted_e|hex_encoded_e);
cbl_field_t *name = new_literal(strlen(L->name),
L->name,
call_attr);
ast_call( @1, name, $$, narg, args, NULL, NULL, true );
}
;
@ -11135,6 +11299,18 @@ subst_input: anycase first_last varg[v1] varg[v2] {
}
;
locale_name: NAME
{
auto e = symbol_locale(PROGRAM, $NAME);
if( !e ) {
error_msg(@NAME, "no such SPECIAL-NAMES LOCALE: %qs", $NAME);
YYERROR;
}
$$ = const_cast<char*>(
__gg__encoding_iconv_name(cbl_locale_of(e)->encoding) );
}
;
intrinsic_locale:
LOCALE_COMPARE '(' varg[r1] varg[r2] ')'
{
@ -11143,11 +11319,12 @@ intrinsic_locale:
cbl_refer_t dummy = {};
if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, &dummy) ) YYERROR;
}
| LOCALE_COMPARE '(' varg[r1] varg[r2] varg[r3] ')'
| LOCALE_COMPARE '(' varg[r1] varg[r2] locale_name ')'
{
location_set(@1);
$$ = new_alphanumeric();
if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, $r3) ) YYERROR;
cbl_refer_t locale(new_literal($locale_name));
if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, &locale) ) YYERROR;
}
| LOCALE_DATE '(' varg[r1] ')'
@ -11453,6 +11630,10 @@ optional: %empty { $$ = false; }
| OPTIONAL { $$ = true; }
;
override: %empty { $$ = false; }
| OVERRIDE { $$ = true; }
;
program_kw: %empty
| PROGRAM_kw
;
@ -11900,6 +12081,7 @@ void ast_call( const YYLTYPE& loc, cbl_refer_t name, const cbl_refer_t& returnin
if( is_literal(name.field) ) {
cbl_field_t called = { FldLiteralA, quoted_e | constant_e,
name.field->data, 77 };
called.attr |= name.field->attr;
snprintf(called.name, sizeof(called.name), "_%s", name.field->data.initial);
name.field = cbl_field_of(symbol_field_add(PROGRAM, &called));
symbol_field_location(field_index(name.field), loc);
@ -13030,13 +13212,13 @@ struct expand_group : public std::list<cbl_refer_t> {
};
static const char * initial_default_value;
const char * wsclear() { return initial_default_value; }
static const uint32_t * initial_default_value;
const uint32_t * wsclear() { return initial_default_value; }
void
wsclear( char ch ) {
static char byte = ch;
initial_default_value = &byte;
wsclear( uint32_t i ) {
static uint32_t init_val = i;
initial_default_value = &init_val;
current.program_needs_initial();
}
@ -13558,16 +13740,16 @@ literal_t::set( const cbl_field_t * field ) {
literal_t&
literal_t::set_prefix( const char *input, size_t len ) {
encoding = current_encoding('A');
encoding = current_encoding(display_encoding_e);
assert(len < sizeof(prefix));
std::fill(prefix, prefix + sizeof(prefix), '\0');
std::transform(input, input + len, prefix, toupper);
switch(prefix[0]) {
case '\0': case 'Z':
encoding = current_encoding('A');
encoding = current_encoding(display_encoding_e);
break;
case 'N':
encoding = current_encoding('N');
encoding = current_encoding(national_encoding_e);
if( 'X' == prefix[1] ) {
cbl_unimplemented("NX literals");
}
@ -13583,7 +13765,7 @@ literal_t::set_prefix( const char *input, size_t len ) {
default:
gcc_unreachable();
}
assert(encoding <= iconv_YU_e);
assert(valid_encoding(encoding));
return *this;
}
@ -13608,8 +13790,8 @@ literal_attr( const char prefix[] ) {
case 'X':
switch(prefix[0]) {
case 'B': return cbl_field_attr_t(hex_encoded_e | bool_encoded_e);
case 'N':
case 'U': cbl_unimplemented("National"); return none_e;
case 'N': cbl_unimplemented("Hexadecimal National"); return none_e;
case 'U': cbl_unimplemented("Hexadecimal Unicode"); return none_e;
}
break;
}

View File

@ -273,38 +273,11 @@ static inline char * dequote( char input[] ) {
static const char *
name_of( cbl_field_t *field ) {
assert(field);
// Because this can be called after .initial has been converted to the
// field->codeset.encoding, we have to undo that. There may be some danger
// associated with returning a static. I don't actually know. -- RJD.
static size_t static_length = 0;
static char * static_buffer = nullptr;
if( field->data.initial == nullptr ) return field->name;
if( field->name[0] == '_' )
{
// Make a copy of .initial
if( static_length < field->data.capacity+1 )
{
static_length = field->data.capacity+1;
static_buffer = static_cast<char *>(xrealloc(static_buffer,
static_length));
memcpy(static_buffer, field->data.initial, field->data.capacity);
static_buffer[field->data.capacity] = '\0';
}
// Convert it from ->encoding to DEFAULT_CHARMAP_SOURCE
size_t charsout;
char *converted = __gg__iconverter(field->codeset.encoding,
DEFAULT_CHARMAP_SOURCE,
field->data.initial,
field->data.capacity,
&charsout );
memcpy(static_buffer, converted, charsout);
static_buffer[charsout] = '\0';
}
if( field->data.initial == nullptr ) {
return field->name;
}
return field->name[0] == '_' && field->data.initial?
static_buffer : field->name;
field->data.original() : field->name;
}
static const char *
@ -1337,6 +1310,7 @@ std::map<std::string, std::list<std::string>>
class prog_descr_t {
std::set<std::string> call_targets, subprograms;
std::set<cbl_locale_t> locales;
public:
std::set<function_descr_t> function_repository;
size_t program_index;
@ -1361,17 +1335,14 @@ public:
} alpha, national;
encoding_t() : national(EBCDIC_e) {}
} alphabet;
struct locale_t {
cbl_name_t name; const char *os_name;
locale_t() : name(""), os_name(nullptr) {}
locale_t(const cbl_name_t name, const char *os_name)
: name(""), os_name(os_name) {
if( name ) {
bool ok = namcpy(YYLTYPE(), this->name, name);
gcc_assert(ok);
}
}
} locale;
bool locale_add( const cbl_locale_t& locale ) {
auto e = symbol_locale_add(program_index, &locale);
assert(e);
auto p = locales.insert(locale);
return p.second;
}
cbl_options_t options;
explicit prog_descr_t( size_t isymbol )
@ -1904,7 +1875,14 @@ static class current_t {
return program.alphabet.alpha.encoding;
}
cbl_encoding_t national_encoding() const {
if( programs.empty() ) return EBCDIC_e;
cbl_encoding_t when_empty = EBCDIC_e;
char *alternate = getenv("NATIONAL");
if( alternate )
{
when_empty = __gg__encoding_iconv_type(alternate);
gcc_assert(when_empty);
}
if( programs.empty() ) return when_empty;
const prog_descr_t& program = programs.top();
return program.alphabet.national.encoding;
}
@ -1929,23 +1907,8 @@ static class current_t {
return programs.top().options.default_round = mode;
}
const char *
locale() {
return programs.empty()? NULL : programs.top().locale.os_name;
}
const char *
locale( const cbl_name_t name ) {
if( programs.empty() ) return NULL;
const prog_descr_t::locale_t& locale = programs.top().locale;
return 0 == strcmp(name, locale.name)? locale.name : NULL;
}
const prog_descr_t::locale_t&
locale( const cbl_name_t name, const char os_name[] ) {
if( programs.empty() ) {
static prog_descr_t::locale_t empty;
return empty;
}
return programs.top().locale = prog_descr_t::locale_t(name, os_name);
bool locale_add( const cbl_locale_t& locale ) {
return programs.top().locale_add(locale);
}
bool new_program ( const YYLTYPE& loc, cbl_label_type_t type,
@ -2296,11 +2259,13 @@ add_debugging_declarative( const cbl_label_t * label ) {
}
}
cbl_options_t current_options() {
cbl_options_t
current_options() {
return current.options_paragraph;
}
cbl_encoding_t current_encoding( char a_or_n ) {
cbl_encoding_t
current_encoding( char a_or_n ) {
cbl_encoding_t retval;
switch(a_or_n) {
case 'A':
@ -2316,14 +2281,17 @@ cbl_encoding_t current_encoding( char a_or_n ) {
return retval;
}
size_t current_program_index() {
size_t
current_program_index() {
return current.program()? current.program_index() : 0;
}
cbl_label_t * current_section() {
cbl_label_t *
current_section() {
return current.section();
}
cbl_label_t * current_paragraph() {
cbl_label_t *
current_paragraph() {
return current.paragraph();
}
@ -2402,8 +2370,13 @@ literal_refmod_valid( YYLTYPE loc, const cbl_refer_t& r );
static bool
is_integer_literal( const cbl_field_t *field ) {
if( field->type == FldLiteralN ) {
const char *initial = field->data.initial;
size_t nchar;
const char *initial = __gg__iconverter(field->codeset.encoding,
DEFAULT_SOURCE_ENCODING,
field->data.initial,
strlen(field->data.initial),
&nchar);
assert(strlen(initial) == nchar);
switch( *initial ) {
case '-': case '+': ++initial;
}
@ -2982,16 +2955,28 @@ blank_pad_initial( const char initial[], size_t capacity, size_t new_size ) {
return p;
}
/*
* When cbl_field_t::internalize is called, its data.initial value has been
* set, but nothing has been done to it. It is encoded according to the source
* code. internalize() converts data.initial to the field's encoding.
*
* If syntax used was was PIC VALUE, in that order, then PIC set the field's
* encoding, and the VALUE clause can verify that its encoding matches. If the
* order was VALUE PIC, the value leaves the encoding uninitialized unless the
* value string bore an encoding prefix. When PIC is processed, codeset_t::set
* allows it to set the encoding only if it's either uninitialized, or the PIC
* encoding matches the existing one set by VALUE. In no event does one
* override the other; they must agree.
*
* internalize() fails if data.initial cannot be converted to the field's
* encoding.
*/
static void
value_encoding_check( const YYLTYPE& loc, cbl_field_t *field, cbl_encoding_t encoding ) {
value_encoding_check( const YYLTYPE& loc, cbl_field_t *field ) {
if( ! field->internalize() ) {
error_msg(loc, "inconsistent string literal encoding for '%s'",
field->data.initial);
}
if( encoding != field->codeset.encoding ) {
warn_msg(loc, "VALUE encoded as %qs for data item encoded as %qs",
__gg__encoding_iconv_name(encoding), field->codeset.name());
}
}
#pragma GCC diagnostic push
@ -3046,12 +3031,16 @@ file_add( YYLTYPE loc, cbl_file_t *file ) {
static cbl_alphabet_t *
alphabet_add( const YYLTYPE& loc, cbl_encoding_t encoding ) {
cbl_alphabet_t alphabet(loc, encoding);
alphabet_add( const cbl_alphabet_t& alphabet ) {
symbol_elem_t *e = symbol_alphabet_add(PROGRAM, &alphabet);
assert(e);
return cbl_alphabet_of(e);
}
static cbl_alphabet_t *
alphabet_add( const YYLTYPE& loc, cbl_encoding_t encoding ) {
cbl_alphabet_t alphabet(loc, encoding);
return alphabet_add(alphabet);
}
// The current field always exists in the symbol table, even if it's incomplete.
static cbl_field_t *
@ -3302,8 +3291,9 @@ data_division_ready() {
static size_t nsymbol = 0;
if( (nsymbol = symbols_update(nsymbol, nparse_error == 0)) > 0 ) {
if( ! literally_one ) {
literally_one = new_literal("1");
literally_zero = new_literal("0");
// Use strdup so cbl_field_t::internalize can free them if need be.
literally_one = new_literal(xstrdup("1"));
literally_zero = new_literal(xstrdup("0"));
}
}

View File

@ -1801,8 +1801,8 @@ B-SHIFT-RC
if( elem->type == SymField ) {
auto f = cbl_field_of(elem);
if( f->type == FldLiteralA && f->has_attr(constant_e) ) {
type = date_time_fmt(f->data.initial);
yylval.string = xstrdup(f->data.initial);
type = date_time_fmt(f->data.original());
yylval.string = xstrdup(f->data.original());
}
} else {
yylval.string = xstrdup(yytext);

View File

@ -140,7 +140,13 @@ extern bool cursor_at_sol;
fprintf(stderr, "%s", (b).field->name); \
if( (b).field->type == FldLiteralA || (b).field->type == FldLiteralN ) \
{ \
fprintf(stderr, " \"%s\"", (b).field->data.initial); \
size_t nbytes; \
const char *literal = __gg__iconverter((b).field->codeset.encoding, \
DEFAULT_SOURCE_ENCODING, \
(b).field->data.initial, \
strlen((b).field->data.initial), \
&nbytes); \
fprintf(stderr, " \"%s\"", literal); \
} \
else \
{ \

View File

@ -293,7 +293,7 @@ elementize( const cbl_field_t& field ) {
// Dubner did the following because he didn't feel like creating yet another
// cbl_field_t constructor that included the hardcoded encoding for the
// global special registers.
sym.elem.field.codeset.encoding = iconv_CP1252_e;
sym.elem.field.codeset.set();
return sym;
}
@ -511,6 +511,9 @@ symbol_elem_cmp( const void *K, const void *E )
case SymSpecial:
return special_pair_cmp(k->elem.special, e->elem.special)? 0 : 1;
break;
case SymLocale:
return strcasecmp(k->elem.locale.name, e->elem.locale.name);
break;
case SymAlphabet:
return strcasecmp(k->elem.alphabet.name, e->elem.alphabet.name);
break;
@ -676,6 +679,22 @@ symbol_special( size_t program, const char name[] )
return symbol_at(p->second);
}
struct symbol_elem_t *
symbol_locale( size_t program, const char name[] )
{
cbl_locale_t locale(name);
assert(strlen(name) < sizeof locale.name);
strcpy(locale.name, name);
struct symbol_elem_t key(SymLocale, program), *e;
key.elem.locale = locale;
e = static_cast<struct symbol_elem_t *>(lfind( &key, symbols.elems,
&symbols.nelem, sizeof(key),
symbol_elem_cmp ) );
return e;
}
struct symbol_elem_t *
symbol_alphabet( size_t program, const char name[] )
{
@ -1510,11 +1529,11 @@ field_str( const cbl_field_t *field ) {
{
// Apparently we need to trace back the meaning of data.literal for
// field::type == FldNumericDisplay
enc_from = DEFAULT_CHARMAP_SOURCE;
enc_from = DEFAULT_SOURCE_ENCODING;
}
init = __gg__iconverter(enc_from,
DEFAULT_CHARMAP_SOURCE,
DEFAULT_SOURCE_ENCODING,
false_data,
field->data.capacity,
&charsout);
@ -1522,12 +1541,12 @@ field_str( const cbl_field_t *field ) {
auto eoinit = init + strlen(init);
char *s = xasprintf("'%s'", init);
// No NUL within the initial data.
// No NUL within the initial data.
auto ok = std::none_of( init, eoinit,
[]( char ch ) { return ch == '\0'; } );
assert(ok);
// If any of the init are unprintable, provide a hex version.
// If any of the init are unprintable, provide a hex version.
if( ! std::all_of(init, eoinit, fisprint) ) {
if( is_elementary(field->type) && field->type != FldPointer ) {
const size_t len = strlen(s) + 8 + 2 * field->data.capacity;
@ -1663,7 +1682,7 @@ symbols_alphabet_set( size_t program, const char name[]) {
//// // Define alphabets for codegen.
//// const cbl_alphabet_t *alphabet = nullptr;
//// bool supported = true;
////
////
//// std::for_each( symbols_begin(program), symbols_end(),
//// [&alphabet, &supported]( const auto& sym ) {
//// if( sym.type == SymAlphabet ) {
@ -1679,7 +1698,7 @@ symbols_alphabet_set( size_t program, const char name[]) {
//// cbl_unimplemented("alphabet %qs (as %qs)", alphabet->name, encoding);
//// return false;
//// }
////
////
//// // Set collation sequence before parser_symbol_add.`
//// if( name ) {
//// symbol_elem_t *e = symbol_alphabet(program, name);
@ -1906,38 +1925,46 @@ symbols_update( size_t first, bool parsed_ok ) {
}
}
if( ! field->codeset.valid() ) {
switch(field->type) {
case FldForward:
case FldInvalid:
gcc_unreachable();
case FldAlphaEdited:
case FldAlphanumeric:
case FldClass:
case FldDisplay:
case FldGroup:
case FldLiteralA:
case FldNumericDisplay:
case FldNumericEdited:
if( ! field->codeset.consistent() ) {
if( ! field->codeset.valid() ) {
switch(field->type) {
case FldForward:
case FldInvalid:
gcc_unreachable();
case FldAlphaEdited:
case FldAlphanumeric:
case FldClass:
case FldDisplay:
case FldGroup:
case FldLiteralA:
case FldLiteralN:
case FldNumericDisplay:
case FldNumericEdited:
if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) {
error_msg(symbol_field_location(field_index(field)),
"internal: %qs encoding not defined", field->name);
}
break;
case FldConditional:
case FldFloat:
case FldIndex:
case FldNumericBin5:
case FldNumericBinary:
case FldPacked:
case FldPointer:
case FldSwitch:
break;
}
} else {
if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) {
error_msg(symbol_field_location(field_index(field)),
"internal: %qs encoding not defined", field->name);
"internal: %qs encoding %qs inconsistent",
field->name,
cbl_alphabet_t::encoding_str(field->codeset.encoding) );
}
break;
case FldConditional:
case FldFloat:
case FldIndex:
case FldLiteralN:
case FldNumericBin5:
case FldNumericBinary:
case FldPacked:
case FldPointer:
case FldSwitch:
break;
}
}
assert( ! field->is_typedef() );
if( parsed_ok ) parser_symbol_add(field);
@ -2541,6 +2568,13 @@ symbol_file_add( size_t program, cbl_file_t *file ) {
return e;
}
symbol_elem_t *
symbol_locale_add( size_t program, const cbl_locale_t *locale ) {
symbol_elem_t sym{ SymLocale, program };
sym.elem.locale = *locale;
return symbol_add(&sym);
}
symbol_elem_t *
symbol_alphabet_add( size_t program, const cbl_alphabet_t *alphabet ) {
symbol_elem_t sym{ SymAlphabet, program };
@ -3202,19 +3236,56 @@ constant_of( size_t isym )
return field;
}
cbl_locale_t::cbl_locale_t( const cbl_name_t name, const char iconv_name[] ) {
gcc_assert(strlen(name) < sizeof this->name);
strcpy(this->name, name);
if( iconv_name ) {
encoding = __gg__encoding_iconv_type(iconv_name);
strcpy(collation, "C");
// If the iconv_name is prefixed by langauge_COUNTRY (e.g. en_US), capture that.
auto pend = iconv_name + strlen(iconv_name);
auto p = std::find(iconv_name, pend, '.');
if( p < pend ) {
auto pend2 = std::copy(iconv_name, p, collation);
std::fill(pend2, collation + sizeof(collation), '\0');
iconv_name = ++p;
}
encoding = __gg__encoding_iconv_type(iconv_name);
}
}
cbl_alphabet_t::cbl_alphabet_t(const YYLTYPE& loc, size_t locale, cbl_name_t name )
: loc(loc)
, locale(locale)
, low_index(0)
, high_index(255)
, last_index(0)
{
if( locale > 0 ) {
encoding = cbl_locale_of(symbol_at(locale))->encoding;
}
memset(collation_sequence, 0xFF, sizeof(collation_sequence));
if( name ) { // from Special-Names collation_sequence
assert(strlen(name) < sizeof(cbl_name_t));
strcpy(this->name, name);
}
}
/*
* As parsed, the alphabet reflects the encoding of the source code. If the
* program uses a different encoding for alphanumeric, convert the alphabet to
* that.
*
* that.
*
* Because a custom alphabet is rare and occurs at most only once per program,
* we don't attempt to avoid re-encoding. "Conversion" of ASCII to ASCII is at
* most 256 calls to iconv(3).
* most 256 calls to iconv(3).
*/
void
cbl_alphabet_t::reencode() {
const unsigned char * const pend = alphabet + sizeof(alphabet);
const unsigned char * const pend = collation_sequence + sizeof(collation_sequence);
std::vector<char> tgt(256, (char)0xFF);
/* Keep copies of low_index and last_index for use in run-time as LOW-VALUE
@ -3230,13 +3301,14 @@ cbl_alphabet_t::reencode() {
* a custom alphabet are from NIST, which of course are ASCII.
*/
const char *fromcode = __gg__encoding_iconv_name(CP1252_e);
const char *tocode = __gg__encoding_iconv_name(current_encoding('A'));
const char *tocode =
__gg__encoding_iconv_name(current_encoding(display_encoding_e));
iconv_t cd = iconv_open(tocode, fromcode);
#if optimal_reencode
if( fromcode == tocode ) { // semantically
tgt.resize(0);
return tgt; // Return empty vector; caller copies zero bytes.
return tgt; // Return empty vector; caller copies zero bytes.
}
#endif
@ -3247,14 +3319,14 @@ cbl_alphabet_t::reencode() {
* that letter in the alphanumeric encoding, and set its collation position
* in that alphabet.
*/
for( const unsigned char *p = alphabet; p < pend; p++ ) {
for( const unsigned char *p = collation_sequence; p < pend; p++ ) {
if( *p == 0xFF ) continue;
unsigned char ch = p - alphabet;
unsigned char ch = p - collation_sequence;
unsigned char pos[8] = {};
size_t inbytesleft = 1, outbytesleft = sizeof(pos);
char *inbuf = reinterpret_cast<char*>(&ch),
*outbuf = reinterpret_cast<char*>(pos);
size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
if( n == size_t(-1) ) {
@ -3273,7 +3345,7 @@ cbl_alphabet_t::reencode() {
fromcode, ch, ch, n, tocode);
continue;
}
if( ch == low_index ) {
low_index = pos[0];
}
@ -3283,21 +3355,21 @@ cbl_alphabet_t::reencode() {
if( ch == high_index ) {
high_index = pos[0];
}
tgt.at(pos[0]) = *p;
}
std::copy(tgt.begin(), tgt.end(), alphabet);
std::copy(tgt.begin(), tgt.end(), collation_sequence);
}
bool
cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high_value ) {
if( alphabet[ch] == 0xFF || alphabet[ch] == high_value) {
alphabet[ch] = high_value;
if( collation_sequence[ch] == 0xFF || collation_sequence[ch] == high_value) {
collation_sequence[ch] = high_value;
last_index = ch;
return true;
}
auto taken = alphabet[ch];
auto taken = collation_sequence[ch];
error_msg(loc, "ALPHABET %s, character %<%c%> (X%'%x%') "
"in position %d already defined at position %d",
name,
@ -3310,7 +3382,7 @@ cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high
void
cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) {
if( ch < 256 ) {
alphabet[ch] = alphabet[last_index];
collation_sequence[ch] = collation_sequence[last_index];
if( ch == high_index ) high_index--;
return;
} // else it's a figurative constant ...
@ -3323,20 +3395,20 @@ cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) {
// last_index is already set; use it as the "last value before ALSO"
if( attr & low_value_e ) {
alphabet[0] = alphabet[last_index];
collation_sequence[0] = collation_sequence[last_index];
return;
}
if( attr & high_value_e ) {
alphabet[high_index--] = alphabet[last_index];
collation_sequence[high_index--] = collation_sequence[last_index];
return;
}
if( attr & (space_value_e|quote_value_e) ) {
ch = field->data.initial[0];
alphabet[ch] = alphabet[last_index];
collation_sequence[ch] = collation_sequence[last_index];
return;
}
if( attr & (zero_value_e) ) {
alphabet[0] = alphabet[last_index];
collation_sequence[0] = collation_sequence[last_index];
error_msg(loc, "ALSO value '%s' is unknown", field->name);
return;
}
@ -3448,18 +3520,33 @@ new_literal_add( const char initial[], uint32_t len,
}
else
{
static char empty[2] = "\0";
field = new_temporary_impl(FldLiteralA);
field->attr |= attr;
field->data.initial = len > 0? initial : empty;
if(len == 0)
{
// This will cover UTF-32, should that arise.
size_t nbytes = 4;
char *init = static_cast<char *>(xmalloc(nbytes));
memset(init, 0, nbytes);
field->data.initial = init;
}
if(len)
{
char *init = static_cast<char *>(xmalloc(len+4));
memcpy(init, initial, len);
memset(init+len, 0, 4);
field->data.initial = init;
}
field->data.capacity = len;
}
if( ! field->has_attr(hex_encoded_e) ) {
field->codeset.set(encoding);
if( ! field->internalize() ) {
ERROR_FIELD(field, "inconsistent string literal encoding for '%s'", initial);
// If the literal bore a prefix, set the encoding,
if( encoding != cbl_field_t::codeset_t::source_encoding->type ) {
field->codeset.set(encoding);
}
field->internalize();
}
static size_t literal_count = 1;
@ -3595,6 +3682,14 @@ new_alphanumeric( size_t capacity, const cbl_name_t name = nullptr ) {
extern os_locale_t os_locale;
const encodings_t cbl_field_t::codeset_t::source_encodings[2] = {
{ false, iconv_UTF_8_e, "UTF-8" },
{ true, iconv_CP1252_e, "CP1252" },
};
const encodings_t * cbl_field_t::codeset_t::source_encoding = {
cbl_field_t::codeset_t::source_encodings
};
const encodings_t cbl_field_t::codeset_t::standard_internal = {
true, iconv_CP1252_e, "CP1252"
};
@ -3603,7 +3698,7 @@ const encodings_t cbl_field_t::codeset_t::standard_internal = {
cbl_field_t *
new_temporary( enum cbl_field_type_t type, const char *initial, bool is_signed ) {
const bool force_unsigned = type == FldNumericBin5 && ! is_signed;
if( ! initial && ! force_unsigned ) {
assert( ! is_literal(type) ); // Literal type must have literal value.
return temporaries.acquire(type, initial);
@ -3719,29 +3814,26 @@ cbl_field_t::is_ascii() const {
* never reverts.
*/
static const char *
guess_encoding() {
static const char *fromcode;
if( ! fromcode ) {
return fromcode = os_locale.assumed;
}
if( fromcode == os_locale.assumed ) {
fromcode = os_locale.codeset;
if( 0 != strcmp(fromcode, "C") ) { // anything but that
return fromcode;
}
}
return standard_internal.name;
}
const char *
cbl_field_t::internalize() {
static const char *fromcode = guess_encoding();
/* The purpose of this routine is to return a nul-terminated string which
is data.initial converted from the source-code characters to the
codeset.encoding characters.
The contract between this routine and the routines that call it is that
for alphanumeric types, data.initial shall have the same number of
characters as will be needed to fill data.capacity.
Be aware that for PIC X(32) Z"foo", there are the characters "foo",
followed by a NUL, and then 28 spaces to fill it out. It turns out that
iconv, given a character count of 32, converts all 32, including the
embedded NUL. So, that case works even through strlen(initial) is
smaller than the length of initial, which is the same as capacity.
*/
static const char *fromcode = codeset.source_encodings[0].name;
static const size_t noconv = size_t(-1);
static std::map<std::string, iconv_t> tocodes;
static std::unordered_map<std::string, iconv_t> tocodes;
if( ! codeset.valid() ) {
dbgmsg("%s:%d: not converting %s", __func__, __LINE__, data.initial);
@ -3769,20 +3861,33 @@ cbl_field_t::internalize() {
assert(0 == strlen(data.initial));
return data.initial;
}
if( holds_ascii() && is_ascii() ) return data.initial;
if( holds_ascii() && is_ascii() ) {
if( type != FldNumericEdited ) {
if( ! data.initial_within_capacity() ) {
ERROR_FIELD(this, "%s %s VALUE %qs of %zu exceeds size %u",
cbl_field_t::level_str(level), name, data.initial,
strlen(data.initial), data.capacity );
}
}
return data.initial;
}
assert(data.capacity > 0);
// The final 2 bytes of the output are "!\0". It's a debugging sentinel.
size_t n;
size_t inbytesleft = data.capacity;
size_t outbytesleft = inbytesleft;
char *in = const_cast<char*>(data.initial);
char *out = static_cast<char*>( xcalloc(1, outbytesleft + 2) ), *output = out;
if( !is_literal(this) && inbytesleft < strlen(data.initial) ) {
inbytesleft = strlen(data.initial);
}
if( type == FldNumericEdited ) {
outbytesleft = inbytesleft;
}
const unsigned int in_len = inbytesleft;
char *in = const_cast<char*>(data.initial);
char *out = static_cast<char*>( xcalloc(1, outbytesleft + 2) ), *output = out;
assert(fromcode != tocode);
/*
@ -3799,8 +3904,9 @@ cbl_field_t::internalize() {
do {
if( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) {
if( fromcode == os_locale.assumed ) {
fromcode = standard_internal.name;
if( fromcode == codeset.source_encodings[0].name ) {
codeset.source_encoding = &codeset.source_encodings[1];
fromcode = codeset.source_encoding->name;
tocodes.clear();
cd = tocodes[toname] = iconv_open(tocode, fromcode);
dbgmsg("%s: trying input encoding %s", __func__, fromcode);
@ -3813,7 +3919,7 @@ cbl_field_t::internalize() {
if( n == noconv ) {
size_t i = in_len - inbytesleft;
yywarn("failed to encode %s %qs as %s (%zu of %u bytes left)",
yyerror("failed to encode %s %qs as %s (%zu of %u bytes left)",
fromcode, data.initial + i, tocode, inbytesleft, in_len);
if( false ) return NULL;
return data.initial;
@ -3821,7 +3927,7 @@ cbl_field_t::internalize() {
if( 0 < inbytesleft ) {
// data.capacity + inbytesleft is not correct if the remaining portion has
// multibyte characters. But the fact reamins that the VALUE is too big.
// multibyte characters. But the fact remains that the VALUE is too big.
ERROR_FIELD(this, "%s %s VALUE '%s' requires %zu bytes for size %u",
cbl_field_t::level_str(level), name, data.initial,
data.capacity + inbytesleft, data.capacity );
@ -3829,7 +3935,7 @@ cbl_field_t::internalize() {
// Replace data.initial only if iconv output differs.
if( 0 != memcmp(data.initial, output, out - output) ) {
assert(out <= output + data.capacity);
assert(out <= output + data.capacity || type == FldNumericEdited);
dbgmsg("%s: converted '%.*s' to %s",
__func__, data.capacity, data.initial, tocode);
struct localspace_t {
@ -3858,14 +3964,16 @@ cbl_field_t::internalize() {
data.capacity = out - output; // trailing '!' will be overwritten
}
// Pad with trailing blanks, tacking a '!' on the end.
for( const char *eout = output + data.capacity;
for( const char *eout = output + data.capacity;
out < eout;
out += spc.len ) {
memcpy(out, spc.space, spc.len);
}
out[0] = '!';
// Numeric literal strings may have leading zeros, making their length
// longer than their capacity.
out[0] = type == FldLiteralN? '\0' : '!';
assert(out[1] == '\0');
free(const_cast<char*>(data.initial));
data.orig = data.initial;
data.initial = output;
} else {
free(output);

View File

@ -224,6 +224,7 @@ enum symbol_type_t {
SymAlphabet,
SymFile,
SymDataSection,
SymLocale,
};
// The ISO specification says alphanumeric literals have a maximum length of
@ -237,7 +238,7 @@ struct cbl_field_data_t {
uint32_t capacity, // allocated space
digits; // magnitude: total digits (or characters)
int32_t rdigits; // digits to the right
const char *initial, *picture;
const char *orig, *initial, *picture;
enum etc_type_t { val88_e, upsi_e, value_e } etc_type;
const char *
@ -268,6 +269,7 @@ struct cbl_field_data_t {
, capacity(0)
, digits(0)
, rdigits(0)
, orig(0)
, initial(0)
, picture(0)
, etc_type(value_e)
@ -279,6 +281,7 @@ struct cbl_field_data_t {
, capacity(capacity)
, digits(0)
, rdigits(0)
, orig(0)
, initial(0)
, picture(0)
, etc_type(value_e)
@ -293,6 +296,7 @@ struct cbl_field_data_t {
, capacity(capacity)
, digits(digits)
, rdigits(rdigits)
, orig(0)
, initial(initial)
, picture(picture)
, etc_type(value_e)
@ -387,6 +391,12 @@ struct cbl_field_data_t {
return valify();
}
bool initial_within_capacity() const {
return initial[capacity] == '\0'
|| initial[capacity] == '!';
}
const char *original() const { return orig? orig : initial; }
protected:
cbl_field_data_t& copy_self( const cbl_field_data_t& that ) {
memsize = that.memsize;
@ -531,7 +541,7 @@ struct cbl_field_t {
uint32_t level;
cbl_occurs_t occurs;
struct codeset_t {
static const encodings_t standard_internal;
static const encodings_t standard_internal, source_encodings[2], *source_encoding;
cbl_encoding_t encoding;
size_t alphabet; // unlikely
explicit codeset_t(cbl_encoding_t encoding = custom_encoding_e,
@ -544,22 +554,26 @@ struct cbl_field_t {
||
(alphabet != 0 && encoding == custom_encoding_e);
}
bool consistent() const {
return valid() && ( encoding == current_encoding('A')
||
encoding == current_encoding('N')
||
encoding == UTF8_e );
}
bool set( cbl_encoding_t encoding, size_t alphabet = 0 ) {
assert(encoding <= iconv_YU_e);
assert(valid_encoding(encoding));
if( ! valid() ) { // setting first time
this->encoding = encoding;
this->alphabet = alphabet;
return valid();
}
// DUBNER override. Encoding has to change when
// 01 FOO VALUE ZERO. Just 0 is okay; ZERO is not.
this->encoding = encoding;
return this->encoding == encoding && this->alphabet == alphabet;
}
bool set( const char picture_fragment[] = nullptr) {
if( ! picture_fragment ) {
cbl_encoding_t currenc = current_encoding('A');
bool retval = set(currenc);
cbl_encoding_t enc = current_encoding('A');
bool retval = set(enc);
return retval;
}
size_t len = strlen(picture_fragment);
@ -568,14 +582,15 @@ struct cbl_field_t {
frag.begin(), ftoupper);
switch(frag[0]) {
case 'A': case 'X': case '9':
return set(current_encoding('A'));
return set(current_encoding(display_encoding_e));
case 'N': case 'U':
if( std::all_of(frag.begin(), frag.end(),
[first = frag[0]]( char ch ) {
return first == ch;
} ) ) {
// All N's indicates National; all U's indicates UTF-8.
auto enc = frag[0] == 'N'? current_encoding('N') : UTF8_e;
auto enc = frag[0] == 'N' ? current_encoding(national_encoding_e)
: UTF8_e;
return set(enc);
}
return false; // They all must be the same.
@ -739,7 +754,7 @@ struct cbl_field_t {
uint32_t size() const; // table capacity or capacity
const char * pretty_name() const {
if( name[0] == '_' && data.initial ) return data.initial;
if( name[0] == '_' && data.original() ) return data.original();
return name;
}
static const char * level_str(uint32_t level );
@ -1185,6 +1200,13 @@ struct cbl_arith_error_t {
cbl_label_addresses_t bottom;
};
struct cbl_delete_file_t {
cbl_label_addresses_t over;
cbl_label_addresses_t exception;
cbl_label_addresses_t no_exception;
cbl_label_addresses_t bottom;
};
struct cbl_compute_error_t {
// This is an int. The value is a cbl_compute_error_code_t
tree compute_error_code;
@ -1232,7 +1254,10 @@ struct cbl_label_t {
// for parse_xml processing:
struct cbl_xml_parse_t *xml_parse;
// For parser_file_delete_file
struct cbl_delete_file_t *delete_file;
} structs;
bool is_function() const { return type == LblFunction; }
@ -1525,6 +1550,19 @@ struct cbl_section_t {
}
};
struct cbl_locale_t {
cbl_name_t name;
cbl_encoding_t encoding;
cbl_name_t collation;
explicit cbl_locale_t(const cbl_name_t name,
const char iconv_name[] = nullptr );
bool operator<( const cbl_locale_t& that ) const {
return strcmp(name, that.name) < 0;
}
};
struct cbl_special_name_t {
int token;
enum special_name_t id;
@ -1536,22 +1574,35 @@ struct cbl_special_name_t {
char * hex_decode( const char text[] );
/*
* For a custom alphabet of single-byte encoding, cbl_alphabet_t::alphabet
* An alphabet may just name an encoding, which implies binary collation.
*
* An alphabet may reference a Special-Names LOCALE, which defines an encoding
* and a collation (perhaps by default).
*
* During Special-Names parsing, an Alphabet may reference an as-yet undefined
* LOCALE with an as-yet unknown encoding. As a placeholder it inserts a named,
* undefined cbl_locale_t symbol, which the Alphabet references. If that
* locale is never defined, the encoding remains unknown, resulting in an error
* diagnostic at the end of Special-Names.
*
* For a custom alphabet of single-byte encoding, cbl_alphabet_t::collation_sequence
* holds the collation position of each encoded value.
* If 'A' sorts first (after LOW-VALUE), then alphabet['A'] == 1.
* If the encoding is ASCII, then 'A' is 65 and alphabet[ 65] == 1.
* If the encoding is EBCDIC CP1140, then 'A' is 193 and alphabet[193] == 1.
* If 'A' sorts first (after LOW-VALUE), then collation_sequence['A'] == 1.
* If the encoding is ASCII, then 'A' is 65 and collation_sequence[ 65] == 1.
* If the encoding is EBCDIC CP1140, then 'A' is 193 and collation_sequence[193] == 1.
*/
struct cbl_alphabet_t {
YYLTYPE loc;
cbl_name_t name;
cbl_encoding_t encoding;
unsigned char low_index, high_index, last_index, alphabet[256];
size_t locale; // index to cbl_locale_t symbol
unsigned char low_index, high_index, last_index, collation_sequence[256];
unsigned char low_char, high_char;
cbl_alphabet_t()
: loc { 1,1, 1,1 }
, encoding(ASCII_e)
, locale(0)
, low_index(0)
, high_index(255)
, last_index(0)
@ -1559,12 +1610,13 @@ struct cbl_alphabet_t {
, high_char(0)
{
memset(name, '\0', sizeof(name));
memset(alphabet, 0xFF, sizeof(alphabet));
memset(collation_sequence, 0xFF, sizeof(collation_sequence));
}
cbl_alphabet_t(const YYLTYPE& loc, cbl_encoding_t enc)
: loc(loc)
, encoding(enc)
, locale(0)
, low_index(0)
, high_index(255)
, last_index(0)
@ -1572,14 +1624,17 @@ struct cbl_alphabet_t {
, high_char(0)
{
memset(name, '\0', sizeof(name));
memset(alphabet, 0xFF, sizeof(alphabet));
memset(collation_sequence, 0xFF, sizeof(collation_sequence));
}
cbl_alphabet_t(const YYLTYPE& loc, size_t locale, cbl_name_t name );
cbl_alphabet_t( const YYLTYPE& loc, const cbl_name_t name,
unsigned char low_index, unsigned char high_index,
unsigned char alphabet[] )
unsigned char collation_sequence[] )
: loc(loc)
, encoding(custom_encoding_e)
, locale(0)
, low_index(low_index), high_index(high_index)
, last_index(high_index)
, low_char(low_index)
@ -1587,21 +1642,23 @@ struct cbl_alphabet_t {
{
assert(strlen(name) < sizeof(this->name));
strcpy(this->name, name);
std::copy(alphabet, alphabet + sizeof(this->alphabet), this->alphabet);
std::copy(collation_sequence,
collation_sequence + sizeof(this->collation_sequence),
this->collation_sequence);
}
unsigned char low_value() const {
return alphabet[low_index];
return collation_sequence[low_index];
}
unsigned char high_value() const {
return alphabet[high_index];
return collation_sequence[high_index];
}
void
add_sequence( const YYLTYPE& loc, const unsigned char seq[] ) {
if( low_index == 0 ) low_index = seq[0];
unsigned char last = last_index > 0? alphabet[last_index] + 1 : 0;
unsigned char last = last_index > 0? collation_sequence[last_index] + 1 : 0;
for( const unsigned char *p = seq; !end_of_string(p); p++ ) {
assign(loc, *p, last++);
@ -1612,7 +1669,7 @@ struct cbl_alphabet_t {
add_interval( const YYLTYPE& loc, unsigned char low, unsigned char high ) {
if( low_index == 0 ) low_index = low;
unsigned char last = alphabet[last_index];
unsigned char last = collation_sequence[last_index];
for( unsigned char ch = low; ch < high; ch++ ) {
assign(loc, ch, last++);
@ -1649,8 +1706,11 @@ struct cbl_alphabet_t {
" 0 1 2 3 4 5 6 7"
" 8 9 A B C C E F");
unsigned int row = 0;
for( auto p = alphabet; p < alphabet + sizeof(alphabet); p++ ) {
if( (p - alphabet) % 16 == 0 ) fprintf(stderr, "\n%4X\t", row++);
for( auto p = collation_sequence;
p < collation_sequence + sizeof(collation_sequence); p++ ) {
if( (p - collation_sequence) % 16 == 0 ) {
fprintf(stderr, "\n%4X\t", row++);
}
fprintf(stderr, "%3u ", *p);
}
fprintf(stderr, "\n");
@ -1870,6 +1930,7 @@ struct symbol_elem_t {
cbl_field_t field;
cbl_label_t label;
cbl_special_name_t special;
cbl_locale_t locale;
cbl_alphabet_t alphabet;
cbl_file_t file;
cbl_section_t section;
@ -1927,6 +1988,9 @@ struct symbol_elem_t {
case SymSpecial:
elem.special = that.elem.special;
break;
case SymLocale:
elem.locale = that.elem.locale;
break;
case SymAlphabet:
elem.alphabet = that.elem.alphabet;
break;
@ -2092,6 +2156,18 @@ cbl_special_name_of( symbol_elem_t *e ) {
return &e->elem.special;
}
static inline cbl_locale_t *
cbl_locale_of( symbol_elem_t *e ) {
assert(e && e->type == SymLocale);
return &e->elem.locale;
}
static inline const cbl_locale_t *
cbl_locale_of( const symbol_elem_t *e ) {
assert(e && e->type == SymLocale);
return &e->elem.locale;
}
static inline cbl_alphabet_t *
cbl_alphabet_of( symbol_elem_t *e ) {
assert(e && e->type == SymAlphabet);
@ -2104,6 +2180,7 @@ cbl_alphabet_of( const symbol_elem_t *e ) {
return &e->elem.alphabet;
}
static inline cbl_file_t *
cbl_file_of( symbol_elem_t *e ) {
assert(e && e->type == SymFile);
@ -2477,6 +2554,7 @@ struct symbol_elem_t * symbol_literalA( size_t program, const char name[] );
struct cbl_special_name_t * symbol_special( special_name_t id );
struct symbol_elem_t * symbol_special( size_t program, const char name[] );
struct symbol_elem_t * symbol_locale( size_t program, const char name[] );
struct symbol_elem_t * symbol_alphabet( size_t program, const char name[] );
struct symbol_elem_t * symbol_file( size_t program, const char name[] );
@ -2524,6 +2602,7 @@ cbl_label_t * symbol_label_add( size_t program,
cbl_label_t * symbol_program_add( size_t program, cbl_label_t *input );
symbol_elem_t * symbol_special_add( size_t program,
cbl_special_name_t *special );
symbol_elem_t * symbol_locale_add( size_t program, const cbl_locale_t *locale );
symbol_elem_t * symbol_alphabet_add( size_t program,
const cbl_alphabet_t *alphabet );
symbol_elem_t * symbol_file_add( size_t program,
@ -2548,8 +2627,8 @@ static inline size_t upsi_register() {
return symbol_index(symbol_field(0,0,"UPSI-0"));
}
void wsclear( char ch);
const char *wsclear();
void wsclear( uint32_t ch);
const uint32_t *wsclear();
enum cbl_call_convention_t {
cbl_call_verbatim_e = 'V',

View File

@ -271,6 +271,8 @@ symbol_type_str( enum symbol_type_t type )
return "SymLabel";
case SymSpecial:
return "SymSpecial";
case SymLocale:
return "SymLocale";
case SymAlphabet:
return "SymAlphabet";
case SymFile:
@ -1094,28 +1096,18 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
if( has_attr(all_alpha_e) ) {
bool alpha_value = fig != zero_value_e;
// In order to check for all alphabetic characters, we have to convert
// data.initial back to ASCII:
size_t outchars;
char *initial = __gg__iconverter(codeset.encoding,
DEFAULT_CHARMAP_SOURCE,
data.initial,
data.capacity,
&outchars);
if( fig == normal_value_e ) {
alpha_value = std::all_of( initial,
initial +
data.capacity,
[]( char ch ) {
return ISSPACE(ch) ||
ISPUNCT(ch) ||
ISALPHA(ch); } );
alpha_value = std::none_of( data.initial,
data.initial +
data.capacity,
[]( char ch ) {
return
ISPUNCT(ch) ||
ISDIGIT(ch); } );
}
if( ! alpha_value ) {
error_msg(loc, "alpha-only %s VALUE '%s' contains non-alphabetic data",
name, fig == zero_value_e? cbl_figconst_str(fig) : initial);
name, fig == zero_value_e? cbl_figconst_str(fig) : data.initial);
}
}
@ -1315,7 +1307,7 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
size_t outcount;
char *in_ascii = static_cast<char *>(xmalloc(4 * src->data.capacity));
const char *in_asciip = __gg__iconverter( src->codeset.encoding,
DEFAULT_CHARMAP_SOURCE,
DEFAULT_SOURCE_ENCODING,
src->data.initial,
src->data.capacity,
&outcount );
@ -2078,7 +2070,8 @@ cobol_lineno() {
const char *
cobol_filename() {
return input_filenames.empty()? input_filename_vestige : input_filenames.top().name;
return input_filenames.empty()?
input_filename_vestige : input_filenames.top().name;
}
void

View File

@ -1376,7 +1376,7 @@ static encodings_t encodings[] = {
{ false, iconv_UTF_7_e, "UTF-7" },
// Is UTF-8 supported?? "supported" means "recognized by parser_alphabet",
// but UTF-8 is not a valid runtime encoding.
{ false, iconv_UTF_8_e, "UTF-8" },
{ false, iconv_UTF_8_e, "UTF-8" },
{ false, iconv_UTF_16_e, "UTF-16" },
{ false, iconv_UTF_16BE_e, "UTF-16BE" },
{ false, iconv_UTF_16LE_e, "UTF-16LE" },
@ -1439,10 +1439,20 @@ cbl_encoding_t
__gg__encoding_iconv_type( const char *name ) {
static encodings_t *eoencodings = encodings + COUNT_OF(encodings);
char *slashless = strdup(name);
assert(slashless);
char *pslash = strchr(slashless, '/');
if( pslash )
{
*pslash = '\0';
}
auto p = std::find_if( encodings, eoencodings,
[name]( const encodings_t& elem ) {
return strcmp(name, elem.name) == 0;
[slashless]( const encodings_t& elem ) {
return strcasecmp(slashless, elem.name) == 0;
} );
free(slashless);
return p < eoencodings? p->type : no_encoding_e;
}
@ -1557,7 +1567,7 @@ __gg__get_charmap(cbl_encoding_t encoding)
if( encoding == custom_encoding_e)
{
encoding = DEFAULT_CHARMAP_SOURCE;
encoding = DEFAULT_SOURCE_ENCODING;
}
charmap_t *retval;

View File

@ -228,13 +228,17 @@ char * __gg__iconverter(cbl_encoding_t from,
size_t length,
size_t *outlength);
#define DEFAULT_CHARMAP_SOURCE (iconv_CP1252_e)
#define DEFAULT_SOURCE_ENCODING (iconv_CP1252_e)
class charmap_t
{
private:
// This is the encoding of this character map
cbl_encoding_t m_encoding;
bool m_is_valid;
bool m_is_big_endian;
bool m_has_bom = false;
int m_stride; // Number of bytes between one character and the next
enum
{
@ -246,32 +250,114 @@ class charmap_t
// need be called but once for each ASCII value.
std::unordered_map<int, int>m_map_of_encodings;
void determine_sign_type()
{
if( mapped_character(ascii_0) & 0x80 )
{
m_numeric_sign_type = sign_type_ebcdic;
}
else
{
m_numeric_sign_type = sign_type_ascii;
}
}
public:
explicit charmap_t(cbl_encoding_t e) : m_encoding(e)
{
determine_sign_type();
}
explicit charmap_t(uint16_t e) : m_encoding(static_cast<cbl_encoding_t>(e))
{
determine_sign_type();
// We are constructing a new charmap_t from an arbitrary encoding. We
// need to figure out how wide it is, its endianness, whether or not
// it is EBCDIC-based, and so on.
// We do that by converting "0" to the target encoding, and we analyze
// what we get back.
size_t outlength = 0;
const char challenge[] = "0";
const unsigned char *response = PTRCAST(unsigned char,
__gg__iconverter(DEFAULT_SOURCE_ENCODING,
m_encoding,
challenge,
1,
&outlength));
unsigned char char_0 = 0x00;
m_is_valid = false;
m_has_bom = false;
m_is_big_endian = false;
if( outlength == 1 )
{
m_stride = 1;
// This is our happy place: A single-byte encoded character set.
char_0 = response[0];
}
else if( outlength == 2 )
{
m_stride = 2;
if( response[0] )
{
char_0 = response[0];
}
else if( response[1] )
{
m_is_big_endian = true;
char_0 = response[1];
}
}
else if( outlength == 4 )
{
// Check for the Byte Order Mark (BOM)
if( response[0] == 0xFF && response[1] == 0xFE )
{
m_stride = 2;
m_has_bom = true;
char_0 = response[2];
}
else if( response[0] == 0xFE && response[1] == 0xFF )
{
m_stride = 2;
m_has_bom = true;
m_is_big_endian = true;
char_0 = response[3];
}
else if( response[0] )
{
m_stride = 4;
char_0 = response[0];
}
else
{
m_stride = 4;
m_is_big_endian = true;
char_0 = response[3];
}
}
else if( outlength == 8 )
{
m_stride = 4;
if( response[0] == 0xFF && response[1] == 0xFE )
{
char_0 = response[4];
}
else if( response[0] == 0xFE && response[1] == 0xFF )
{
m_is_big_endian = true;
char_0 = response[7];
}
}
// With everything else established, we now check the zero character.
// We know about only 0x30 for ASCII and 0xF0 for EBCDIC.
if( char_0 == 0x30 )
{
m_is_valid = true;
m_numeric_sign_type = sign_type_ascii;
}
else if( char_0 == 0xF0 )
{
m_is_valid = true;
m_numeric_sign_type = sign_type_ebcdic;
}
}
bool is_valid() const{return m_is_valid ;}
bool is_big_endian() const{return m_is_big_endian;}
bool has_bom() const{return m_has_bom ;}
int stride() const{return m_stride ;}
int mapped_character(int ch)
{
// The assumption is that anybody calling this routine is providing
// a single-byte character in the DEFAULT_CHARMAP_SOURCE encoding. We
// a single-byte character in the DEFAULT_SOURCE_ENCODING encoding. We
// return the equivalent character in the m_encoding
int retval;
std::unordered_map<int, int>::const_iterator it =
@ -284,7 +370,7 @@ class charmap_t
{
retval = 0;
size_t outlength = 0;
const char *mapped = __gg__iconverter(DEFAULT_CHARMAP_SOURCE,
const char *mapped = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
m_encoding,
PTRCAST(char, &ch),
1,

View File

@ -1195,6 +1195,11 @@ enum cbl_encoding_t {
iconv_YU_e,
};
static inline bool
valid_encoding( cbl_encoding_t enc ) {
return enc <= iconv_YU_e;
}
#define ASCII_e iconv_ASCII_e
#define CP1252_e iconv_CP1252_e
#define EBCDIC_e iconv_CP1140_e

View File

@ -81,6 +81,7 @@ enum cblc_file_prior_op_t
file_op_rewrite,
file_op_delete,
file_op_close,
file_op_remove,
};
/* end implementation details */

View File

@ -191,9 +191,10 @@ handle_errno(cblc_file_t *file, const char *function, const char *msg)
static
char *
get_filename( const cblc_file_t *file,
int is_quoted)
get_filename( const cblc_file_t *file)
{
bool is_quoted = !!(file->flags & file_name_quoted_e);
static size_t fname_size = MINIMUM_ALLOCATION_SIZE;
static char *fname = static_cast<char *>(malloc(MINIMUM_ALLOCATION_SIZE));
massert(fname);
@ -1151,6 +1152,80 @@ __io__file_delete(cblc_file_t *file, bool is_random)
}
}
static void
__io__file_remove(cblc_file_t *file, char *filename, int is_quoted)
{
// filename is the result of a strdup or malloc. Because both FILE OPEN
// and FILE DELETE can establish or change a name, we free it here and
// replace it. The same is true in FILE DELETE Format 2
free(file->filename);
file->filename = filename;
file->flags &= ~file_name_quoted_e;
file->flags |= is_quoted ? file_name_quoted_e : file_flag_none_e;
int erc;
// This code copied from reopen
const char *trimmed_name = get_filename(file);
if( !trimmed_name[0] )
{
bool all_spaces = true;
for(size_t i=0; i<strlen(file->filename); i++)
{
if( file->filename[i] != ascii_space )
{
all_spaces = false;
}
break;
}
if( all_spaces )
{
warnx("Warning: %s specified with a filename that is all spaces",
file->name);
file->io_status = FsNameError; // "31"
goto done;
}
warnx( "%s(): There is no environment variable named \"%s\"\n",
__func__,
file->filename);
file->io_status = FsNoFile; // "35"
goto done;
}
// trimmed_name is now the file system name of the file to be removed.
// If the file is open, we flag that with "41"
if( file->file_pointer )
{
file->io_status = FsIsOpen; // "41"
goto done;
}
// There's been a lot of buildup. We can now try to remove the file:
errno = 0;
erc = remove(trimmed_name);
if( erc == 0 )
{
// All is copacetic. There was a file, and now it's gone.
file->io_status = FsSuccess; // "00"
}
else if( errno == ENOENT )
{
// The file didn't exist.
file->io_status = FsUnavail; // "05"
}
else
{
// We have some other kind of error. Lack of credentials, or whatever.
file->io_status = FsErrno; //
goto done;
}
file->prior_op = file_op_remove;
done:
file->errnum = errno;
establish_status(file, -1);
}
static void
indexed_file_start( cblc_file_t *file,
int relop,
@ -4115,7 +4190,7 @@ __gg__file_reopen(cblc_file_t *file, int mode_char)
// Stash the mode_char for later analysis during READ and WRITE operations
file->mode_char = mode_char;
char *trimmed_name;
trimmed_name = get_filename(file, !!(file->flags & file_name_quoted_e));
trimmed_name = get_filename(file);
if( !trimmed_name[0] )
{
bool all_spaces = true;
@ -4353,8 +4428,10 @@ __io__file_open(cblc_file_t *file,
}
else
{
// filename is the result of a strdup or malloc. We will free() it at
// file close time.
// filename is the result of a strdup or malloc. Because both FILE OPEN
// and FILE DELETE can establish or change a name, we free it here and
// replace it. The same is true in FILE DELETE Format 2
free(file->filename);
file->filename = filename;
file->flags &= ~file_name_quoted_e;
file->flags |= is_quoted ? file_name_quoted_e : file_flag_none_e;
@ -4492,6 +4569,9 @@ public:
size_t length, bool is_random );
typedef void (delete_t)( cblc_file_t *file,
bool is_random );
typedef void (remove_t)( cblc_file_t *file,
char *filename,
int is_quoted);
open_t *Open;
close_t *Close;
@ -4500,6 +4580,7 @@ public:
write_t *Write;
rewrite_t *Rewrite;
delete_t *Delete;
remove_t *Remove;
gcobol_io_t()
: Open(NULL)
@ -4509,15 +4590,17 @@ public:
, Write(NULL)
, Rewrite(NULL)
, Delete(NULL)
, Remove(NULL)
{}
gcobol_io_t( open_t *Open,
gcobol_io_t( open_t *Open,
close_t *Close,
start_t *Start,
read_t *Read,
write_t *Write,
rewrite_t *Rewrite,
delete_t *Delete )
delete_t *Delete,
remove_t *Remove)
: Open(Open)
, Close(Close)
, Start(Start)
@ -4525,6 +4608,7 @@ public:
, Write(Write)
, Rewrite(Rewrite)
, Delete(Delete)
, Remove(Remove)
{}
#if FILE_IO_IMPLEMENTED
@ -4552,7 +4636,8 @@ gcobol_fileops() {
__io__file_read,
__io__file_write,
__io__file_rewrite,
__io__file_delete );
__io__file_delete,
__io__file_remove);
}
/*
@ -4657,9 +4742,19 @@ extern "C"
void
__gg__file_delete(cblc_file_t *file, bool is_random)
{
// DELETE FILE Format 1 - deletes a record.
gcobol_io_t *functions = gcobol_io_funcs();
functions->Delete(file, is_random);
}
extern "C"
void
__gg__file_remove(cblc_file_t *file, char *name, int is_quoted)
{
// DELETE FILE Format 2 - removes a file.
gcobol_io_t *functions = gcobol_io_funcs();
functions->Remove(file, name, is_quoted);
}
/* end interface functions */

File diff suppressed because it is too large Load Diff

View File

@ -289,6 +289,7 @@ class ec_status_t {
case file_op_write: return "write";
case file_op_rewrite: return "rewrite";
case file_op_delete: return "delete";
case file_op_remove: return "remove";
}
return "???";
}
@ -1627,7 +1628,7 @@ int128_to_field(cblc_field_t *var,
var->picture);
size_t outlength;
const char *converted = __gg__iconverter(
DEFAULT_CHARMAP_SOURCE,
DEFAULT_SOURCE_ENCODING,
var->encoding,
PTRCAST(char, location),
var->capacity,
@ -2755,7 +2756,7 @@ __gg__dirty_to_float( const char *dirty,
int delta_r = 0;
// We now loop over the remaining input characters:
char ch = '\0';
unsigned char ch = '\0';
charmap_t *charmap = __gg__get_charmap(field->encoding);
@ -3055,7 +3056,7 @@ format_for_display_internal(char **dest,
// This buffer is larger than can validly be needed
unsigned char converted[128];
size_t outlength;
retval = DEFAULT_CHARMAP_SOURCE;
retval = DEFAULT_SOURCE_ENCODING;
const char *mapped = __gg__iconverter(
var->encoding,
retval,
@ -3285,7 +3286,7 @@ format_for_display_internal(char **dest,
}
char ach[128];
retval = DEFAULT_CHARMAP_SOURCE;
retval = DEFAULT_SOURCE_ENCODING;
charmap_t *charmap = __gg__get_charmap(retval);
__gg__binary_to_string_ascii(ach, digits, value);
@ -3724,7 +3725,13 @@ get_float128( const cblc_field_t *field,
{
if( __gg__decimal_point == '.' )
{
retval = strtofp128(field->initial, NULL);
size_t charsout;
char *converted = __gg__iconverter(field->encoding,
DEFAULT_SOURCE_ENCODING,
field->initial,
strlen(field->initial),
&charsout);
retval = strtofp128(converted, NULL);
}
else
{
@ -3954,7 +3961,7 @@ compare_field_class(const cblc_field_t *conditional,
walker = right + right_len;
GCOB_FP128 left_value;
if( left_flag == 'F' && left[0] == 'Z' )
if( left_flag == ascii_F && left[0] == ascii_Z )
{
left_value = 0;
}
@ -4375,9 +4382,11 @@ __gg__compare_2(cblc_field_t *left_side,
}
massert(buffer);
strcpy(buffer, right_side->initial);
if( __gg__decimal_point == ',' )
{
// We need to replace any commas with periods
// We are operating in DECIMAL IS COMMA mode, so we need to
// replace any commas with periods.
char *p = strchr(buffer, ',');
if(p)
{
@ -4385,8 +4394,9 @@ __gg__compare_2(cblc_field_t *left_side,
}
}
// buffer[] now contains the string we want to convert
// buffer[] now contains the right-side string we want to convert
// to one of the floating-point types. We want them to be the
// same size:
switch(left_side->capacity)
{
case 4:
@ -4970,7 +4980,7 @@ init_var_both(cblc_field_t *var,
{
strcpy(first, walker);
__gg__convert_encoding( first,
DEFAULT_CHARMAP_SOURCE,
DEFAULT_SOURCE_ENCODING,
var->encoding);
}
walker += strlen(first) + 1;
@ -4987,7 +4997,7 @@ init_var_both(cblc_field_t *var,
else
{
__gg__convert_encoding( last,
DEFAULT_CHARMAP_SOURCE,
DEFAULT_SOURCE_ENCODING,
var->encoding);
}
walker += strlen(last) + 1;
@ -6234,7 +6244,7 @@ __gg__move( cblc_field_t *fdest,
// ascii:
size_t charsout;
const char *converted = __gg__iconverter(fsource->encoding,
DEFAULT_CHARMAP_SOURCE,
DEFAULT_SOURCE_ENCODING,
PTRCAST(char, fsource->data+source_offset),
source_size,
&charsout);
@ -9142,11 +9152,17 @@ display_both(cblc_field_t *field,
}
}
size_t conversion_length = strlen(display_string);
if( charmap->stride() != 1 )
{
conversion_length = qual_size;
}
size_t outlength;
const char *converted = __gg__iconverter( encoding,
encout,
display_string,
strlen(display_string),
conversion_length,
&outlength);
write(file_descriptor,
converted,
@ -10059,7 +10075,7 @@ is_numeric_display_numeric(cblc_field_t *field, size_t offset, size_t size)
digits_e -= 1;
unsigned char final_char = (unsigned char)*digits_e;
final_char = charmap->set_digit_negative(final_char, false);
if( final_char<charmap->mapped_character(ascii_0)
if( final_char<charmap->mapped_character(ascii_0)
|| final_char>charmap->mapped_character(ascii_9) )
{
retval = 0;
@ -10420,14 +10436,14 @@ accept_envar( cblc_field_t *tgt,
// Convert the name to the console codeset:
__gg__convert_encoding( trimmed_env,
encoding,
DEFAULT_CHARMAP_SOURCE);
DEFAULT_SOURCE_ENCODING);
// Pick up the environment variable, and convert it to the internal codeset
const char *p = getenv(trimmed_env);
if(p)
{
retval = 0; // Okay
move_string(tgt, tgt_offset, tgt_length, p, DEFAULT_CHARMAP_SOURCE);
move_string(tgt, tgt_offset, tgt_length, p, DEFAULT_SOURCE_ENCODING);
}
free(env);
}
@ -10638,7 +10654,7 @@ __gg__get_argv( cblc_field_t *dest,
dest_offset,
dest_length,
stashed_argv[N],
DEFAULT_CHARMAP_SOURCE);
DEFAULT_SOURCE_ENCODING);
retcode = 0; // Okay
}
return retcode;
@ -11381,7 +11397,7 @@ __gg__unstring( const cblc_field_t *id1, // The string being unstring
id5_o[nreceiver],
id5_s[nreceiver],
"",
DEFAULT_CHARMAP_SOURCE);
DEFAULT_SOURCE_ENCODING);
}
}
@ -11768,6 +11784,7 @@ __gg__check_fatal_exception()
case file_op_write:
case file_op_rewrite:
case file_op_delete:
case file_op_remove:
break;
}
} else {
@ -12039,6 +12056,23 @@ __gg__adjust_dest_size(cblc_field_t *dest, size_t ncount)
}
}
extern "C"
void
__gg__adjust_encoding(cblc_field_t *field)
{
// Assume that field->data is in ASCII; We need to convert it to the target
size_t nbytes;
const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
field->encoding,
PTRCAST(char, field->data),
field->capacity,
&nbytes);
size_t tocopy = std::min(nbytes, field->allocated);
field->capacity = tocopy;
memcpy(field->data, converted, tocopy);
}
extern "C"
void
__gg__func_exception_location(cblc_field_t *dest)
@ -12088,6 +12122,7 @@ __gg__func_exception_location(cblc_field_t *dest)
}
__gg__adjust_dest_size(dest, strlen(ach));
memcpy(dest->data, ach, strlen(ach));
__gg__adjust_encoding(dest);
}
extern "C"
@ -12102,6 +12137,7 @@ __gg__func_exception_statement(cblc_field_t *dest)
}
__gg__adjust_dest_size(dest, strlen(ach));
memcpy(dest->data, ach, strlen(ach));
__gg__adjust_encoding(dest);
}
extern "C"
@ -12128,6 +12164,7 @@ __gg__func_exception_status(cblc_field_t *dest)
}
__gg__adjust_dest_size(dest, strlen(ach));
memcpy(dest->data, ach, strlen(ach));
__gg__adjust_encoding(dest);
}
extern "C"
@ -12195,6 +12232,7 @@ __gg__func_exception_file(cblc_field_t *dest,
__gg__adjust_dest_size(dest, strlen(ach));
memcpy(dest->data, ach, strlen(ach));
__gg__adjust_encoding(dest);
}
extern "C"
@ -12693,7 +12731,7 @@ __gg__just_mangle_name( const cblc_field_t *field,
// We need ach_name to be in ASCII:
size_t charsout;
const char *converted = __gg__iconverter(field->encoding,
DEFAULT_CHARMAP_SOURCE,
DEFAULT_SOURCE_ENCODING,
PTRCAST(char, field->data),
length,
&charsout);
@ -12784,7 +12822,7 @@ __gg__function_handle_from_name(int program_id,
size_t charsout;
const char *converted = __gg__iconverter(field->encoding,
DEFAULT_CHARMAP_SOURCE,
DEFAULT_SOURCE_ENCODING,
PTRCAST(char, field->data + offset),
length,
&charsout);
@ -13093,6 +13131,7 @@ __gg__deallocate( cblc_field_t *target,
static int
get_the_byte(cblc_field_t *field)
{
// This is a helper routine for ALLOCATE
int retval = -1;
if( field )
{
@ -13100,7 +13139,14 @@ get_the_byte(cblc_field_t *field)
retval = __gg__fc_char(field);
if(retval == -1)
{
retval = (int)__gg__get_integer_binary_value(field);
retval = (int)(unsigned char)__gg__get_integer_binary_value(field);
}
else
{
// This is a bit of a hack. It turns out the figurative constant is
// encoded in ASCII. We need it to be in the current DISPLAY encoding.
charmap_t *charmap = __gg__get_charmap(__gg__display_encoding);
retval = charmap->mapped_character(retval);
}
}
return retval;
@ -13373,6 +13419,7 @@ __gg__module_name(cblc_field_t *dest, module_type_t type)
__gg__adjust_dest_size(dest, strlen(result));
memcpy(dest->data, result, strlen(result)+1);
__gg__adjust_encoding(dest);
}
/*
@ -13652,7 +13699,7 @@ __gg__accept_arg_value( cblc_field_t *dest,
dest_offset,
dest_length,
stashed_argv[sv_argument_number],
DEFAULT_CHARMAP_SOURCE);
DEFAULT_SOURCE_ENCODING);
retcode = 0; // Okay
// The Fujitsu spec says bump this value by one.

View File

@ -408,6 +408,7 @@ static void fatalError(void * CTX, const char * msg, ...)
}
#if 0
static xmlEntityPtr getEntity(void * CTX,
const xmlChar * name)
{ SAYSO_DATAZ(name); }
@ -484,6 +485,7 @@ static void setDocumentLocator(void * CTX,
* xmlCtxtGetStandalone() to get data from the XML declaration.
*/
static void startDocument(void * CTX)
{
SAYSO();
}
@ -616,7 +618,6 @@ xmlchar_of( const char input[] ) {
static const char *
xmlParserErrors_str( xmlParserErrors erc, const char name[] ) {
const char *msg = "???";
switch( erc ) {
case XML_ERR_OK:
msg = "Success";
@ -630,6 +631,7 @@ xmlParserErrors_str( xmlParserErrors erc, const char name[] ) {
case XML_ERR_UNSUPPORTED_ENCODING:
msg = "Unsupported character encoding";
break;
#if LIBXML_VERSION >= 21400
case XML_ERR_RESOURCE_LIMIT:
msg = "Internal resource limit like maximum amplification factor exceeded";
@ -710,6 +712,7 @@ static class context_t {
}
}
protected:
void init() {
const char *external_entities = nullptr;