cobol: Convert to individual variable character-set encoding.

Prior to this "patch", the GCOBOL compiler was capable of producing binaries
that operated internally in either ASCII or EBCDIC.  The COBOL specification,
however, allows for the concurrent presence of two encodings, known as
"alphanumeric" and "national".

In order to support this capability, we have chosen to establish an "encoding"
characteristic that gets carried along with every variable,

This change affected many parts of the COBOL front end compiler.  If anybody
looks at any of the changes listed below, they will find they fall into two
classes:

1) Removing the dependence on a global ASCII vs EBCDIC determination.

2) Adding a dependence on a new ::encoding characteristic of the compile time
and run time cbl_field_t and cblc_field_t variable structures.  Those
structures now contain the cbl_encoding_t ::encoding members, which drive the
use of the iconv(3) function in moving back and forth between variable
encodings.

Although the effort is not complete, these changes represent the bulk of what
needs to be done.  With these changes in place, all of our current ASCII and
EBCDIC tests run properly.

gcc/cobol/ChangeLog:

	* cdf.y: In support of the described changes.
	* gcobol.1: Likewise.
	* genapi.cc (level_88_helper): Likewise.
	(get_level_88_domain): Likewise.
	(get_class_condition_string): Likewise.
	(initialize_variable_internal): Likewise.
	(gg_default_qualification): Likewise.
	(cobol_compare): Likewise.
	(move_tree): Likewise.
	(move_tree_to_field): Likewise.
	(psa_FldBlob): Likewise.
	(parser_accept_date_yymmdd): Likewise.
	(parser_accept_date_yyyymmdd): Likewise.
	(parser_accept_date_yyddd): Likewise.
	(parser_accept_date_yyyyddd): Likewise.
	(parser_accept_date_dow): Likewise.
	(parser_accept_date_hhmmssff): Likewise.
	(parser_alphabet): Likewise.
	(parser_alphabet_use): Likewise.
	(parser_display_internal): Likewise.
	(parser_display): Likewise.
	(is_valuable): Likewise.
	(parser_division): Likewise.
	(parser_relop_long): Likewise.
	(parser_setop): Likewise.
	(parser_set_conditional88): Likewise.
	(parser_file_add): Likewise.
	(parser_file_open): Likewise.
	(create_and_call): Likewise.
	(parser_call): Likewise.
	(mh_identical): Likewise.
	(mh_source_is_literalN): Likewise.
	(picky_memcpy): Likewise.
	(mh_numeric_display): Likewise.
	(mh_source_is_group): Likewise.
	(mh_source_is_literalA): Likewise.
	(move_helper): Likewise.
	(initial_from_initial): Likewise.
	(actually_create_the_static_field): Likewise.
	(psa_FldLiteralA): Likewise.
	(parser_symbol_add): Likewise.
	* genmath.cc (arithmetic_operation): Likewise.
	* genutil.cc (get_binary_value): Likewise.
	(get_literal_string): Likewise.
	* genutil.h (EBCDIC_MINUS): Likewise.
	(EBCDIC_PLUS): Likewise.
	(EBCDIC_ZERO): Likewise.
	(EBCDIC_NINE): Likewise.
	* parse.y: Likewise.
	* parse_ante.h (name_of): Likewise.
	(class prog_descr_t): Likewise.
	(current_encoding): Likewise.
	(needs_picture): Likewise.
	(is_callable): Likewise.
	(field_attr_str): Likewise.
	(value_encoding_check): Likewise.
	(field_alloc): Likewise.
	(file_add): Likewise.
	* scan.l: Likewise.
	* structs.cc (create_cblc_field_t): Likewise.
	* symbols.cc (elementize): Likewise.
	(cbl_field_attr_str): Likewise.
	(is_variable_length): Likewise.
	(field_str): Likewise.
	(extend_66_capacity): Likewise.
	(assert): Likewise.
	(symbols_update): Likewise.
	(symbol_field_parent_set): Likewise.
	(add_token): Likewise.
	(symbol_table_init): Likewise.
	(symbol_field_add): Likewise.
	(symbol_field_forward_add): Likewise.
	(symbol_field_same_as): Likewise.
	(cbl_alphabet_t::reencode): Likewise.
	(new_temporary_impl): Likewise.
	(parser_symbol_add2): Likewise.
	(new_literal_add): Likewise.
	(temporaries_t::literal): Likewise.
	(new_literal): Likewise.
	(standard_internal): Likewise.
	(new_temporary): Likewise.
	(cbl_field_t::holds_ascii): Likewise.
	(cbl_field_t::is_ascii): Likewise.
	(cbl_field_t::internalize): Likewise.
	(symbol_label_add): Likewise.
	(symbol_label_section_exists): Likewise.
	(cbl_occurs_t::subscript_ok): Likewise.
	(cbl_file_t::deforward): Likewise.
	(has_value): Likewise.
	* symbols.h (is_numeric): Likewise.
	(__gg__encoding_iconv_name): Likewise.
	(current_encoding): Likewise.
	(struct cbl_field_t): Likewise.
	(new_literal): Likewise.
	(class temporaries_t): Likewise.
	(struct function_descr_t): Likewise.
	(hex_decode): Likewise.
	(struct cbl_alphabet_t): Likewise.
	(struct cbl_file_t): Likewise.
	* symfind.cc (field_structure): Likewise.
	(erase_symbol_map_fwds): Likewise.
	(symbol_find): Likewise.
	* token_names.h: Likewise.
	* util.cc (cbl_field_type_str): Likewise.
	(is_elementary): Likewise.
	(symbol_field_type_update): Likewise.
	(cbl_field_t::report_invalid_initial_value): Likewise.
	(valid_move): Likewise.
	(valid_picture): Likewise.
	(type_capacity): Likewise.
	(gcc_location_set_impl): Likewise.
	(cbl_unimplementedw): Likewise.

libgcobol/ChangeLog:

	* charmaps.cc (raw_is_SBC): Likewise.
	(extract_next_code_point): Likewise.
	(flipper): Likewise.
	(__gg__ascii_to_ascii_chr): Likewise.
	(__gg__ascii_to_ebcdic_chr): Likewise.
	(__gg__raw_to_ascii): Likewise.
	(__gg__raw_to_ebcdic): Likewise.
	(convert_cp1252_to_utf8): Likewise.
	(__gg__text_conversion_override): Likewise.
	(__gg__ascii_to_ascii): Likewise.
	(__gg__encoding_iconv_name): Likewise.
	(__gg__encoding_iconv_type): Likewise.
	(__gg__ascii_to_ebcdic): Likewise.
	(__gg__iconverter): Likewise.
	(__gg__ebcdic_to_ascii): Likewise.
	(__gg__ascii_to_console): Likewise.
	(__gg__ebcdic_to_console): Likewise.
	(__gg__console_to_ascii): Likewise.
	(__gg__console_to_ebcdic): Likewise.
	(_to_ctype): Likewise.
	(_from_ctype): Likewise.
	(__gg__get_charmap): Likewise.
	* charmaps.h (internal_is_ebcdic): Likewise.
	(internal_space): Likewise.
	(internal_zero): Likewise.
	(internal_period): Likewise.
	(internal_comma): Likewise.
	(internal_dquote): Likewise.
	(internal_asterisk): Likewise.
	(internal_plus): Likewise.
	(internal_minus): Likewise.
	(internal_cr): Likewise.
	(internal_ff): Likewise.
	(internal_newline): Likewise.
	(internal_return): Likewise.
	(internal_0): Likewise.
	(internal_1): Likewise.
	(internal_2): Likewise.
	(internal_3): Likewise.
	(internal_4): Likewise.
	(internal_5): Likewise.
	(internal_6): Likewise.
	(internal_7): Likewise.
	(internal_8): Likewise.
	(internal_9): Likewise.
	(internal_colon): Likewise.
	(internal_query): Likewise.
	(internal_A): Likewise.
	(internal_B): Likewise.
	(internal_C): Likewise.
	(internal_D): Likewise.
	(internal_E): Likewise.
	(internal_F): Likewise.
	(internal_G): Likewise.
	(internal_H): Likewise.
	(internal_I): Likewise.
	(internal_J): Likewise.
	(internal_K): Likewise.
	(internal_L): Likewise.
	(internal_M): Likewise.
	(internal_N): Likewise.
	(internal_O): Likewise.
	(internal_P): Likewise.
	(internal_Q): Likewise.
	(internal_R): Likewise.
	(internal_S): Likewise.
	(internal_T): Likewise.
	(internal_U): Likewise.
	(internal_V): Likewise.
	(internal_W): Likewise.
	(internal_X): Likewise.
	(internal_Y): Likewise.
	(internal_Z): Likewise.
	(internal_a): Likewise.
	(internal_b): Likewise.
	(internal_c): Likewise.
	(internal_d): Likewise.
	(internal_e): Likewise.
	(internal_f): Likewise.
	(internal_g): Likewise.
	(internal_h): Likewise.
	(internal_i): Likewise.
	(internal_j): Likewise.
	(internal_k): Likewise.
	(internal_l): Likewise.
	(internal_m): Likewise.
	(internal_n): Likewise.
	(internal_o): Likewise.
	(internal_p): Likewise.
	(internal_q): Likewise.
	(internal_r): Likewise.
	(internal_s): Likewise.
	(internal_t): Likewise.
	(internal_u): Likewise.
	(internal_v): Likewise.
	(internal_w): Likewise.
	(internal_x): Likewise.
	(internal_y): Likewise.
	(internal_z): Likewise.
	(enum text_codeset_t): Likewise.
	(__gg__ascii_to_ascii_chr): Likewise.
	(__gg__ascii_to_ebcdic_chr): Likewise.
	(ascii_to_internal): Likewise.
	(__gg__ascii_to_ascii): Likewise.
	(__gg__ascii_to_ebcdic): Likewise.
	(ascii_to_internal_str): Likewise.
	(__gg__raw_to_ascii): Likewise.
	(__gg__raw_to_ebcdic): Likewise.
	(raw_to_internal): Likewise.
	(__gg__ascii_to_console): Likewise.
	(__gg__ebcdic_to_console): Likewise.
	(internal_to_console): Likewise.
	(__gg__console_to_ascii): Likewise.
	(__gg__console_to_ebcdic): Likewise.
	(console_to_internal): Likewise.
	(__gg__ebcdic_to_ascii): Likewise.
	(internal_to_ascii): Likewise.
	(__gg__encoding_iconv_name): Likewise.
	(__gg__encoding_iconv_type): Likewise.
	(__gg__iconverter): Likewise.
	(DEFAULT_CHARMAP_SOURCE): Likewise.
	(class charmap_t): Likewise.
	(__gg__get_charmap): Likewise.
	* common-defs.h (EBCDIC_MINUS): Likewise.
	(EBCDIC_PLUS): Likewise.
	(EBCDIC_ZERO): Likewise.
	(EBCDIC_NINE): Likewise.
	(PACKED_NYBBLE_PLUS): Likewise.
	(PACKED_NYBBLE_MINUS): Likewise.
	(PACKED_NYBBLE_UNSIGNED): Likewise.
	(NUMERIC_DISPLAY_SIGN_BIT_EBCDIC): Likewise.
	(NUMERIC_DISPLAY_SIGN_BIT): Likewise.
	(SEPARATE_PLUS): Likewise.
	(SEPARATE_MINUS): Likewise.
	(ZONED_ZERO): Likewise.
	(ZONE_SIGNED_EBCDIC): Likewise.
	(enum cbl_field_type_t): Likewise.
	(enum cbl_field_attr_t): Likewise.
	(enum cbl_figconst_t): Likewise.
	(enum cbl_encoding_t): Likewise.
	* constants.cc (struct cblc_field_t): Likewise.
	(X): Likewise.
	(S9): Likewise.
	* gcobolio.h: Likewise.
	* gfileio.cc (get_filename): Likewise.
	(relative_file_delete): Likewise.
	(relative_file_start): Likewise.
	(relative_file_rewrite): Likewise.
	(relative_file_write_varying): Likewise.
	(relative_file_write): Likewise.
	(sequential_file_write): Likewise.
	(line_sequential_file_read): Likewise.
	(sequential_file_read): Likewise.
	(relative_file_read): Likewise.
	(file_indexed_open): Likewise.
	(__gg__file_reopen): Likewise.
	(__io__file_open): Likewise.
	(__io__file_close): Likewise.
	(__gg__file_open): Likewise.
	* intrinsic.cc (trim_trailing_spaces): Likewise.
	(is_zulu_format): Likewise.
	(string_to_dest): Likewise.
	(get_all_time): Likewise.
	(ftime_replace): Likewise.
	(__gg__char): Likewise.
	(__gg__current_date): Likewise.
	(__gg__seconds_past_midnight): Likewise.
	(__gg__formatted_current_date): Likewise.
	(__gg__formatted_date): Likewise.
	(__gg__formatted_datetime): Likewise.
	(__gg__formatted_time): Likewise.
	(__gg__lower_case): Likewise.
	(numval): Likewise.
	(numval_c): Likewise.
	(__gg__ord): Likewise.
	(__gg__trim): Likewise.
	(__gg__random): Likewise.
	(__gg__random_next): Likewise.
	(__gg__reverse): Likewise.
	(__gg__upper_case): Likewise.
	(__gg__when_compiled): Likewise.
	(gets_int): Likewise.
	(gets_year): Likewise.
	(gets_month): Likewise.
	(gets_day): Likewise.
	(gets_day_of_week): Likewise.
	(gets_day_of_year): Likewise.
	(gets_week): Likewise.
	(gets_hours): Likewise.
	(gets_minutes): Likewise.
	(gets_seconds): Likewise.
	(gets_nanoseconds): Likewise.
	(fill_cobol_tm): Likewise.
	(__gg__hex_of): Likewise.
	(floating_format_tester): Likewise.
	(__gg__numval_f): Likewise.
	(__gg__test_numval_f): Likewise.
	(strcasestr): Likewise.
	(strlaststr): Likewise.
	(__gg__locale_compare): Likewise.
	(__gg__locale_date): Likewise.
	(__gg__locale_time): Likewise.
	(__gg__locale_time_from_seconds): Likewise.
	* libgcobol.cc (struct program_state): Likewise.
	(turn_sign_bit_on): Likewise.
	(turn_sign_bit_off): Likewise.
	(is_sign_bit_on): Likewise.
	(__gg__string_to_alpha_edited_ascii): Likewise.
	(int128_to_field): Likewise.
	(edited_to_binary): Likewise.
	(get_binary_value_local): Likewise.
	(__gg__get_date_yymmdd): Likewise.
	(__gg__get_date_yyyymmdd): Likewise.
	(__gg__get_date_yyddd): Likewise.
	(__gg__get_yyyyddd): Likewise.
	(__gg__get_date_dow): Likewise.
	(__gg__get_date_hhmmssff): Likewise.
	(__gg__dirty_to_binary_internal): Likewise.
	(__gg__dirty_to_binary): Likewise.
	(__gg__dirty_to_float): Likewise.
	(psz_to_internal): Likewise.
	(get_scaled_rdigits): Likewise.
	(format_for_display_internal): Likewise.
	(format_for_display_local): Likewise.
	(compare_88): Likewise.
	(compare_field_class): Likewise.
	(compare_strings): Likewise.
	(__gg__compare_2): Likewise.
	(init_var_both): Likewise.
	(alpha_to_alpha_move_from_location): Likewise.
	(alpha_to_alpha_move): Likewise.
	(__gg__move): Likewise.
	(__gg__move_literala): Likewise.
	(normalize_id): Likewise.
	(inspect_backward_format_1): Likewise.
	(__gg__inspect_format_1): Likewise.
	(inspect_backward_format_2): Likewise.
	(__gg__inspect_format_2): Likewise.
	(normalize_for_inspect_format_4): Likewise.
	(__gg__inspect_format_4): Likewise.
	(move_string): Likewise.
	(brute_force_trim): Likewise.
	(__gg__string): Likewise.
	(display_both): Likewise.
	(__gg__display_string): Likewise.
	(not_mangled_core): Likewise.
	(__gg__accept): Likewise.
	(__gg__set_initial_switch_value): Likewise.
	(__gg__onetime_initialization): Likewise.
	(is_numeric_display_numeric): Likewise.
	(is_alpha_a_number): Likewise.
	(__gg__classify): Likewise.
	(__gg__convert_encoding): Likewise.
	(__gg__convert_encoding_length): Likewise.
	(accept_envar): Likewise.
	(__gg__accept_envar): Likewise.
	(__gg__set_envar): Likewise.
	(__gg__get_argc): Likewise.
	(__gg__get_argv): Likewise.
	(__gg__get_command_line): Likewise.
	(__gg__alphabet_use): Likewise.
	(__gg__ascii_to_internal_field): Likewise.
	(__gg__ascii_to_internal): Likewise.
	(__gg__console_to_internal): Likewise.
	(__gg__parser_set_conditional): Likewise.
	(__gg__internal_to_console_in_place): Likewise.
	(__gg__literaln_alpha_compare): Likewise.
	(__gg__unstring): Likewise.
	(struct cbl_exception_t): Likewise.
	(__gg__codeset_figurative_constants): Likewise.
	(__gg__function_handle_from_cobpath): Likewise.
	(__gg__just_mangle_name): Likewise.
	(__gg__function_handle_from_name): Likewise.
	(get_the_byte): Likewise.
	(__gg__set_env_name): Likewise.
	(__gg__get_env_name): Likewise.
	(__gg__get_env_value): Likewise.
	(__gg__set_env_value): Likewise.
	(__gg__fprintf_stderr): Likewise.
	(__gg__accept_arg_value): Likewise.
	(__gg__fc_char): Likewise.
	* libgcobol.h (__gg__dirty_to_binary_internal): Likewise.
	(__gg__dirty_to_binary): Likewise.
	(__gg__internal_to_console_in_place): Likewise.
	(__gg__fc_char): Likewise.
	(__gg__convert_encoding): Likewise.
	(__gg__convert_encoding_length): Likewise.
	* stringbin.cc (string_from_combined): Likewise.
	(__gg__binary_to_string_internal): Likewise.
	(__gg__binary_to_string_encoded): Likewise.
	(__gg__numeric_display_to_binary): Likewise.
	(__gg__packed_to_binary): Likewise.
	* stringbin.h (__gg__binary_to_string_internal): Likewise.
	(__gg__binary_to_string_encoded): Likewise.
	(__gg__numeric_display_to_binary): Likewise.
	* valconv.cc (__gg__alphabet_create): Likewise.
	(__gg__string_to_numeric_edited): Likewise.
	(__gg__string_to_alpha_edited): Likewise.
	(__gg__remove_trailing_zeroes): Likewise.
	* valconv.h (__VALCONV_H): Likewise.
	* encodings.h: New file.

gcc/testsuite/ChangeLog:

	* cobol.dg/group1/check_88.cob: Likewise.
This commit is contained in:
Robert Dubner 2025-10-10 11:35:44 -04:00
parent 7fe86bb107
commit 0e95ebf465
30 changed files with 7513 additions and 4124 deletions

View File

@ -198,7 +198,7 @@ apply_cdf_turn( const exception_turn_t& turn ) {
%type <cdfarg> namelit name_any name_one
%type <string> name subscript subscripts inof
%token <boolean> BOOL
%token <number> FEATURE 365 NUMBER 303 EXCEPTION_NAME 280 "EXCEPTION NAME"
%token <number> FEATURE 366 NUMBER 303 EXCEPTION_NAME 280 "EXCEPTION NAME"
%type <cdfval> cdf_expr
%type <cdfval> cdf_relexpr cdf_reloper cdf_and cdf_bool_expr
@ -210,55 +210,55 @@ apply_cdf_turn( const exception_turn_t& turn ) {
%type <number> cdf_stackable
%token BY 486
%token COPY 362
%token CDF_DISPLAY 384 ">>DISPLAY"
%token IN 605
%token BY 487
%token COPY 363
%token CDF_DISPLAY 385 ">>DISPLAY"
%token IN 606
%token NAME 286
%token NUMSTR 305 "numeric literal"
%token OF 686
%token PSEUDOTEXT 721
%token REPLACING 743
%token OF 687
%token PSEUDOTEXT 723
%token REPLACING 745
%token LITERAL 298
%token SUPPRESS 376
%token SUPPRESS 377
%token LSUB 367 "("
%token SUBSCRIPT 375 RSUB 372 ")"
%token LSUB 368 "("
%token SUBSCRIPT 376 RSUB 373 ")"
%token CDF_DEFINE 383 ">>DEFINE"
%token CDF_IF 385 ">>IF"
%token CDF_ELSE 386 ">>ELSE"
%token CDF_END_IF 387 ">>END-IF"
%token CDF_EVALUATE 388 ">>EVALUATE"
%token CDF_WHEN 389 ">>WHEN"
%token CDF_END_EVALUATE 390 ">>END-EVALUATE"
%token CDF_DEFINE 384 ">>DEFINE"
%token CDF_IF 386 ">>IF"
%token CDF_ELSE 387 ">>ELSE"
%token CDF_END_IF 388 ">>END-IF"
%token CDF_EVALUATE 389 ">>EVALUATE"
%token CDF_WHEN 390 ">>WHEN"
%token CDF_END_EVALUATE 391 ">>END-EVALUATE"
%token ALL 450
%token CALL_CONVENTION 391 ">>CALL-CONVENTION"
%token COBOL_WORDS 380 ">>COBOL-WORDS"
%token CDF_PUSH 394 ">>PUSH"
%token CDF_POP 395 ">>POP"
%token SOURCE_FORMAT 396 ">>SOURCE FORMAT"
%token ALL 451
%token CALL_CONVENTION 392 ">>CALL-CONVENTION"
%token COBOL_WORDS 381 ">>COBOL-WORDS"
%token CDF_PUSH 395 ">>PUSH"
%token CDF_POP 396 ">>POP"
%token SOURCE_FORMAT 397 ">>SOURCE FORMAT"
%token AS 468 CONSTANT 361 DEFINED 363
%token AS 469 CONSTANT 362 DEFINED 364
%type <boolean> DEFINED
%token OTHER 698 PARAMETER_kw 368 "PARAMETER"
%token OFF 687 OVERRIDE 369
%token THRU 939
%token TRUE_kw 813 "True"
%token OTHER 699 PARAMETER_kw 369 "PARAMETER"
%token OFF 688 OVERRIDE 370
%token THRU 952
%token TRUE_kw 815 "True"
%token CALL_COBOL 392 "CALL"
%token CALL_VERBATIM 393 "CALL (as C)"
%token CALL_COBOL 393 "CALL"
%token CALL_VERBATIM 394 "CALL (as C)"
%token TURN 815 CHECKING 496 LOCATION 649 ON 689 WITH 841
%token TURN 817 CHECKING 497 LOCATION 650 ON 690 WITH 844
%left OR 940
%left AND 941
%right NOT 942
%left '<' '>' '=' NE 943 LE 944 GE 945
%left OR 953
%left AND 954
%right NOT 955
%left '<' '>' '=' NE 956 LE 957 GE 958
%left '-' '+'
%left '*' '/'
%right NEG 947
%right NEG 960
%define api.prefix {ydf}
%define api.token.prefix{YDF_}

View File

@ -1417,6 +1417,15 @@ it may contain several directory names separated by a colon
.Ev COBPATH
is searched first, followed by
.Ev LD_LIBRARY_PATH .
Note that
.Ev COBPATH does not change where the runtime linker looks for
.Pa libgcobol.so
itself.
How the runtime linker searches for
.Pa libgcobol.so
when the executable loads is controlled by
.Xr ld.so 8 ,
not libgcobol.
.Pp
Each directory is searched for files whose name ends in
.Ql ".so" .

File diff suppressed because it is too large Load Diff

View File

@ -178,6 +178,7 @@ arithmetic_operation(size_t nC, cbl_num_result_t *C,
temp_field.data.rdigits = remainder->field->data.rdigits ;
temp_field.data.initial = remainder->field->data.initial ;
temp_field.data.picture = remainder->field->data.picture ;
temp_field.codeset = remainder->field->codeset ;
parser_symbol_add(&temp_field);
temp_remainder.field = &temp_field;

View File

@ -890,6 +890,7 @@ get_binary_value( tree value,
signp,
pointer,
build_int_cst_type(INT, field->data.digits),
build_int_cst_type(INT, field->codeset.encoding),
NULL_TREE));
// Assign the value we got from the string to our "return" value:
gg_assign(value, gg_cast(TREE_TYPE(value), val128));
@ -1739,11 +1740,13 @@ get_literal_string(cbl_field_t *field)
size_t buffer_length = field->data.capacity+1;
char *buffer = static_cast<char *>(xcalloc(1, buffer_length));
for(size_t i=0; i<field->data.capacity; i++)
{
buffer[i] = ascii_to_internal(field->data.initial[i]);
}
size_t charsout;
const char *converted = __gg__iconverter(DEFAULT_CHARMAP_SOURCE,
field->codeset.encoding,
field->data.initial,
field->data.capacity,
&charsout);
memcpy(buffer, converted, field->data.capacity+1);
return buffer;
}

View File

@ -30,11 +30,6 @@
#ifndef _GENUTIL_H_
#define _GENUTIL_H_
#define EBCDIC_MINUS (0x60)
#define EBCDIC_PLUS (0x4E)
#define EBCDIC_ZERO (0xF0)
#define EBCDIC_NINE (0xF9)
bool internal_codeset_is_ebcdic();
extern bool exception_location_active;

View File

@ -51,9 +51,14 @@
accept_envar_e,
};
struct collating_an_t {
const char *alpha, *national;
};
class literal_t {
size_t isym;
public:
cbl_encoding_t encoding;
char prefix[3];
size_t len;
char *data;
@ -96,9 +101,32 @@
}
literal_t&
set_prefix( const char *input, size_t len ) {
encoding = current_encoding('A');
assert(len < sizeof(prefix));
std::fill(prefix, prefix + sizeof(prefix), '\0');
std::transform(input, input + len, prefix, toupper);
switch(prefix[0]) {
case '\0': case 'Z':
encoding = current_encoding('A');
break;
case 'N':
encoding = current_encoding('N');
if( 'X' == prefix[1] ) {
cbl_unimplemented("NX literals");
}
break;
case 'G':
cbl_unimplemented("DBCS encoding not supported");
break;
case 'U':
encoding = UTF8_e;
break;
case 'X':
break;
default:
gcc_unreachable();
}
assert(encoding <= iconv_YU_e);
return *this;
}
bool
@ -300,6 +328,7 @@
#include "genapi.h"
#include "../../libgcobol/exceptl.h"
#include "exceptg.h"
#include "../../libgcobol/charmaps.h"
#include "parse_ante.h"
%}
@ -364,6 +393,7 @@
%token <number> MIGHT_BE "IS or IS NOT"
FUNCTION_UDF "UDF name"
FUNCTION_UDF_0 "UDF"
DEFAULT
%token <string> DATE_FMT "date format"
TIME_FMT "time format"
@ -445,13 +475,13 @@
DAY_OF_WEEK "DAY-OF-WEEK"
DAY_TO_YYYYDDD "DAY-TO-YYYYDDD"
DBCS DE DEBUGGING DECIMAL_POINT
DECLARATIVES DEFAULT DELIMITED DELIMITER DEPENDING
DECLARATIVES DELIMITED DELIMITER DEPENDING
DESCENDING DETAIL DIRECT
DIRECT_ACCESS "DIRECT-ACCESS"
DOWN DUPLICATES
DYNAMIC
E EBCDIC EC EGCS ENTRY ENVIRONMENT EQUAL EVERY
E EBCDIC EC EGCS ENCODING ENTRY ENVIRONMENT EQUAL EVERY
EXAMINE EXHIBIT EXP EXP10 EXTEND EXTERNAL
EXCEPTION_FILE "EXCEPTION-FILE"
@ -539,7 +569,7 @@
PAGE_COUNTER "PAGE-COUNTER"
PF PH PI PIC PICTURE
PLUS PRESENT_VALUE PRINT_SWITCH
PROCEDURE PROCEDURES PROCEED PROCESS
PROCEDURE PROCEDURES PROCEED PROCESS PROCESSING
PROGRAM_ID "PROGRAM-ID"
PROGRAM_kw "Program" PROPERTY PROTOTYPE PSEUDOTEXT
@ -587,10 +617,9 @@
UP UPON UPOS UPPER_CASE USAGE USING
USUBSTR USUPPLEMENTARY UTILITY UUID4 UVALID UWIDTH
VALUE VARIANCE VARYING VOLATILE
VALIDATING VALUE VARIANCE VARYING VOLATILE
WHEN_COMPILED WITH WORKING_STORAGE
XML XMLGENERATE XMLPARSE
YEAR_TO_YYYY YYYYDDD YYYYMMDD
/* unused Context Words */
@ -655,6 +684,7 @@
END_SUBTRACT "END-SUBTRACT"
END_UNSTRING "END-UNSTRING"
END_WRITE "END-WRITE"
END_XML "END-XML"
END_IF "END-IF"
/* end tokens without semantic value */
@ -665,7 +695,7 @@
%type <number> sentence statements statement
%type <number> star_cbl_opt close_how
%type <number> test_before usage_clause1 might_be
%type <number> test_before usage_clause1 might_be alphanational
%type <boolean> all optional sign_leading on_off initialized strong is_signed
%type <number> count data_clauses data_clause
%type <number> nine nines nps relop spaces_etc reserved_value signed
@ -673,7 +703,9 @@
%type <number> true_false posneg eval_posneg
%type <number> open_io alphabet_etc
%type <special_type> device_name
%type <string> numed collating_sequence context_word ctx_name locale_spec
%type <string> numed context_word ctx_name locale_spec
%type <collating_sequences> collating_sequences collating_ans
%type <collating_name> collating_an
%type <literal> namestr alphabet_lit program_as repo_as
%type <field> perform_cond kind_of_name
%type <refer> alloc_ret
@ -842,6 +874,8 @@
%type <nameloc> repo_func_name
%type <namelocs> repo_func_names
%type <codeset> codeset_name
%type <locale_phrase> locale_phrase
%union {
bool boolean;
@ -859,6 +893,10 @@
struct { radix_t radix; char *string; } numstr;
struct { YYLTYPE loc; int token; literal_t name; } prog_end;
struct { int token; special_name_t id; } special_type;
struct { char locale_type; const char * name; } locale_phrase;
collating_an_t collating_sequences;
struct collating_name_t { int token; const char *name; } collating_name;
struct { size_t isym; cbl_encoding_t encoding; } codeset;
struct { cbl_field_type_t type;
uint32_t capacity; bool signable; } computational;
struct cbl_special_name_t *special;
@ -870,7 +908,7 @@
struct { cbl_file_t *file; file_status_t handled; } file_op;
struct cbl_label_t *label;
struct { cbl_label_t *label; int token; } exception;
struct cbl_field_data_t *field_data;
struct { cbl_encoding_t encoding; cbl_field_data_t *data; } field_data;
struct cbl_field_t *field;
struct { bool tf; cbl_field_t *field; } bool_field;
struct { int token; cbl_field_t *cond; } cond_field;
@ -948,7 +986,10 @@
}
%printer { fprintf(yyo, "clauses: 0x%04x", $$); } data_clauses
%printer { fprintf(yyo, "%s %s", refer_type_str($$), $$? $$->name() : "<none>"); } <refer>
%printer { fprintf(yyo, "%s %s %s",
refer_type_str($$),
$$? $$->name() : "<none>",
$$ && $$->field? $$->field->codeset.name() : "<none>"); } <refer>
%printer { fprintf(yyo, "%s", $$->field? name_of($$->field) : "[omitted]"); } alloc_ret
%printer { fprintf(yyo, "%s %s '%s' (%s)",
$$? cbl_field_type_str($$->type) : "<%empty>",
@ -1023,6 +1064,8 @@
SEARCH SET SELECT SORT SORT_MERGE
STRING_kw STOP SUBTRACT START
UNSTRING WRITE WHEN INVALID
XMLGENERATE "XML GENERATE"
XMLPARSE "XML PARSE"
%left ABS ACCESS ACOS ACTUAL ADVANCING AFP_5A AFTER ALL
ALLOCATE
@ -1031,7 +1074,7 @@
ALPHANUMERIC
ALPHANUMERIC_EDITED
ALPHED ALSO ALTERNATE ANNUITY ANUM ANY ANYCASE APPLY ARE
AREA AREAS AS
AREA AREAS AS ATTRIBUTES
ASCENDING ACTIVATING ASIN ASSIGN AT ATAN
BACKWARD BASED BASECONVERT
@ -1072,7 +1115,8 @@
DOWN DUPLICATES
DYNAMIC
E EBCDIC EC EGCS ENTRY ENVIRONMENT EQUAL ERROR EVERY
E EBCDIC EC EGCS ELEMENT
ENTRY ENVIRONMENT EQUAL ERROR EVERY
EXAMINE EXCEPTION EXHIBIT EXP EXP10 EXTEND EXTERNAL
EXCEPTION_FILE
@ -1143,12 +1187,13 @@
MIGHT_BE MINN MULTIPLE MOD MODE
MODULE_NAME
NAMED NAT NATIONAL
NAMED NAMESPACE NAMESPACE_PREFIX "NAMESPACE-PREFIX"
NAT NATIONAL
NATIONAL_EDITED
NATIONAL_OF
NATIVE NEGATIVE NESTED NEXT
NINEDOT NINES NINEV NO NOTE NO_CONDITION
NULLS NULLPTR NUMBER
NONNUMERIC NULLS NULLPTR NUMBER
NUME NUMED NUMED_CR NUMED_DB NUMERIC
NUMERIC_EDITED NUMSTR NUMVAL
NUMVAL_C
@ -1216,7 +1261,7 @@
VALUE VARIANCE VARYING VOLATILE
WHEN_COMPILED WITH WORKING_STORAGE
XML XMLGENERATE XMLPARSE
XML_DECLARATION "XML-DECLARATION"
YEAR_TO_YYYY YYYYDDD YYYYMMDD
ZERO
@ -1269,7 +1314,7 @@
END_EVALUATE END_MULTIPLY END_PERFORM
END_READ END_RETURN END_REWRITE
END_SEARCH END_START END_STRING END_SUBTRACT
END_UNSTRING END_WRITE
END_UNSTRING END_WRITE END_XML
error
END_IF
@ -1937,11 +1982,12 @@ selected_name: external scalar { $$ = $2; }
YYERROR;
}
uint32_t len = $name.len;
cbl_field_t field {
0, FldLiteralA, FldInvalid, quoted_e | constant_e,
0, 0, 0, nonarray, 0, "", 0, cbl_field_t::linkage_t(),
{len,len,0,0, $name.data}, NULL };
// Pretend hex-encoded because that means use verbatim.
cbl_field_t field { FldLiteralA,
hex_encoded_e | quoted_e | constant_e,
{len,len,0,0, $name.data} };
field.attr |= literal_attr($name.prefix);
field.codeset.set();
$$ = new cbl_refer_t( field_add(@name, &field) );
}
;
@ -2315,20 +2361,12 @@ config_paragraphs: config_paragraph
config_paragraph:
SPECIAL_NAMES '.'
| SPECIAL_NAMES '.' specials '.'
| SPECIAL_NAMES '.' special_names '.'
| SOURCE_COMPUTER '.'
| SOURCE_COMPUTER '.' NAME '.'
| SOURCE_COMPUTER '.' NAME with_debug '.'
| OBJECT_COMPUTER '.'
| OBJECT_COMPUTER '.' NAME collating_sequence[name] '.'
{
if( $name ) {
if( !current.collating_sequence($name) ) {
error_msg(@name, "collating sequence already defined as '%s'",
current.collating_sequence());
YYERROR;
}
}
}
| OBJECT_COMPUTER '.' NAME[computer] collations '.'
| REPOSITORY dot
| REPOSITORY dot repo_members '.'
;
@ -2452,23 +2490,84 @@ repo_program: PROGRAM_kw NAME repo_as
repo_property: PROPERTY NAME repo_as
;
with_debug: %empty
| with DEBUGGING MODE {
with_debug: with DEBUGGING MODE {
if( ! set_debug(true) ) {
error_msg(@2, "DEBUGGING MODE valid only in fixed format");
}
}
;
collating_sequence: %empty { $$ = NULL; }
| PROGRAM_kw COLLATING SEQUENCE is NAME[name] { $$ = $name; }
| PROGRAM_kw SEQUENCE is NAME[name] { $$ = $name; }
| COLLATING SEQUENCE is NAME[name] { $$ = $name; }
| SEQUENCE is NAME[name] { $$ = $name; }
collations: %empty
| collation_classification
| collation_sequence
| collation_classification collation_sequence
| collation_sequence collation_classification
;
collation_classification:
character CLASSIFICATION collating_sequences[seq]
{
warn_msg(@seq, "CHARACTER CLASSIFICATION ignored");
}
;
collation_sequence:
program_kw collating SEQUENCE collating_sequences[seq]
{
if( !current.collating_sequence($seq.alpha) ) {
error_msg(@seq, "collating sequence already defined as '%s'",
current.collating_sequence());
YYERROR;
}
}
;
specials: special_names
collating_sequences:
is NAME[name] {
$$.alpha = $name;
$$.national = nullptr;
}
| collating_ans { $$ = $1; }
;
collating_ans: collating_an[encoding] {
$$ = collating_an_t();
const char **pname =
$encoding.token == ALPHANUMERIC? &$$.alpha : &$$.national;
*pname = $encoding.name;
}
| collating_ans collating_an[encoding]
{
const char **pname =
$encoding.token == ALPHANUMERIC? &$$.alpha : &$$.national;
if( *pname ) {
error_msg(@encoding,
"%qs is repeated", keyword_str($encoding.token));
}
*pname = $encoding.name;
}
;
collating_an: for alphanational is locale_phrase[locale] {
$$.token = $alphanational;
$$.name = $locale.name;
if( ! $locale.name ) {
const char *locale_name = "???";
switch($locale.locale_type) {
case 'L': locale_name = "LOCALE"; break;
case 'D': locale_name = "DEFAULT"; break;
case 'S': locale_name = "SYSTEM-DEFAULT"; break;
case 'U': locale_name = "USER-DEFAULT"; break;
}
cbl_unimplemented("FOR %s IS %s",
keyword_str($$.token),
locale_name);
}
warn_msg(@locale, "LOCALE phrase ignored");
}
;
locale_phrase: NAME { $$.name = $1; $$.locale_type = '\0'; }
| LOCALE { $$.name = nullptr; $$.locale_type = 'L'; }
| DEFAULT { $$.name = nullptr; $$.locale_type = $1; }
;
special_names: special_name
| special_names special_name
;
@ -2481,12 +2580,26 @@ special_name: dev_mnemonic
if( !namcpy(@name, $abc->name, $name) ) YYERROR;
if( yydebug ) $abc->dump();
}
| ALPHABET NAME[name] for alphanational is alphabet_name[abc]
{
if( !$abc ) YYERROR;
assert($abc); // already in symbol table
if( !namcpy(@name, $abc->name, $name) ) YYERROR;
if( yydebug ) $abc->dump();
const size_t isym = symbol_index(symbol_elem_of($abc));
switch($alphanational) {
case ALPHANUMERIC:
current.alpha_encoding(isym, $abc->encoding);
break;
case NATIONAL:
current.national_encoding(isym, $abc->encoding);
break;
default: gcc_unreachable();
}
}
| CLASS NAME is domains
{
struct cbl_field_t field = { 0,
FldClass, FldInvalid, 0, 0, 0, 0, nonarray, @NAME.first_line, "",
0, cbl_field_t::linkage_t(),
{}, NULL };
struct cbl_field_t field = { FldClass, 0, {}, 0, $NAME };
if( !namcpy(@NAME, field.name, $2) ) YYERROR;
struct cbl_domain_t *domain =
@ -2496,6 +2609,7 @@ special_name: dev_mnemonic
field.data.false_value_as($domains);
field.data.domain_as(domain);
field.codeset.set();
domains.clear();
if( field_add(@2, &field) == NULL ) {
@ -2520,10 +2634,9 @@ special_name: dev_mnemonic
{
symbol_decimal_point_set(',');
}
| LOCALE NAME is locale_spec
{
current.locale($NAME, $locale_spec);
cbl_unimplemented("LOCALE syntax");
| LOCALE NAME is locale_spec[spec] {
current.locale($NAME, $spec);
cbl_unimplementedw("sorry, unimplemented: LOCALE %qs", $spec);
}
;
| upsi
@ -2626,6 +2739,7 @@ alphabet_name: STANDARD_ALPHABET { $$ = alphabet_add(@1, ASCII_e); }
| EBCDIC { $$ = alphabet_add(@1, EBCDIC_e); }
| alphabet_seqs
{
$1->reencode();
$$ = cbl_alphabet_of(symbol_alphabet_add(PROGRAM, $1));
}
| error
@ -2825,22 +2939,12 @@ domains: domain
domain: all LITERAL[a]
{
if( ! string_of($a) ) {
gcc_location_set(@a);
yywarn("'%s' has embedded NUL", $a.data);
}
$$ = NULL;
cbl_domain_t domain(@a, $all, $a.len, $a.data);
domains.push_back(domain);
}
| all[a_all] LITERAL[a] THRU all[z_all] LITERAL[z]
{
if( ! string_of($a) ) {
yywarn("'%s' has embedded NUL", $a.data);
}
if( ! string_of($z) ) {
yywarn("'%s' has embedded NUL", $z.data);
}
$$ = NULL;
cbl_domain_elem_t first(@a, $a_all, $a.len, $a.data),
last(@z, $z_all, $z.len, $z.data);
@ -2867,9 +2971,6 @@ domain: all LITERAL[a]
domains.push_back(domain);
}
| all[a_all] reserved_value[a] THRU all[z_all] LITERAL[z] {
if( ! string_of($z) ) {
yywarn("'%s' has embedded NUL", $z.data);
}
$$ = NULL;
if( $a == NULLS ) YYERROR;
auto value = constant_of(constant_index($a))->data.initial;
@ -2887,9 +2988,6 @@ domain: all LITERAL[a]
}
| when_set_to FALSE_kw is LITERAL[value]
{
if( ! string_of($value) ) {
yywarn("'%s' has embedded NUL", $value.data);
}
const char *dom = $value.data;
$$ = new cbl_domain_t(@value, false, $value.len, dom);
}
@ -2994,7 +3092,37 @@ fd_clause: record_desc
cbl_unimplementedw("RECORDING MODE was ignored, not defined by ISO 2023");
}
| VALUE OF fd_values
| CODESET is NAME
| CODESET is codeset_name[codeset] {
auto f = cbl_file_of(symbol_at(file_section_fd));
f->codeset = cbl_file_t::codeset_t($codeset.encoding,
$codeset.isym);
cbl_unimplementedw("sorry, unimplemented CODE-SET");
}
| CODESET for alphanational is codeset_name[codeset]
{
auto f = cbl_file_of(symbol_at(file_section_fd));
f->codeset = cbl_file_t::codeset_t($codeset.encoding,
$codeset.isym);
if( $codeset.isym == 0 ) {
switch( $alphanational) {
case ALPHANUMERIC:
if( $codeset.encoding != ASCII_e ) {
error_msg(@alphanational,
"FOR ALPHANUMERIC: invalid codeset");
}
break;
case NATIONAL:
if( $codeset.encoding != EBCDIC_e ) {
error_msg(@alphanational,
"FOR ALPHANUMERIC: invalid codeset");
}
break;
default:
gcc_unreachable();
}
}
cbl_unimplemented("CODE-SET");
}
| is GLOBAL
{
auto f = cbl_file_of(symbol_at(file_section_fd));
@ -3018,6 +3146,24 @@ fd_clause: record_desc
}
;
alphanational: ALPHANUMERIC { $$ = ALPHANUMERIC; }
| NATIONAL { $$ = NATIONAL; }
;
codeset_name: STANDARD_ALPHABET { $$.isym = 0; $$.encoding = ASCII_e; }
| NATIVE { $$.isym = 0; $$.encoding = EBCDIC_e; }
| EBCDIC { $$.isym = 0; $$.encoding = EBCDIC_e; }
| NAME
{
auto e = symbol_alphabet(PROGRAM, $NAME);
if( !e ) {
error_msg(@NAME, "invalid CODE-SET: %qs", $NAME);
YYERROR;
}
$$.isym = symbol_index(e);
$$.encoding = custom_encoding_e;
}
;
block_desc: BLOCK_kw contains rec_contains chars_recs
;
rec_contains: NUMSTR[min] {
@ -3377,11 +3523,8 @@ level_name: LEVEL ctx_name
error_msg(@LEVEL, "LEVEL %d not supported", $LEVEL);
YYERROR;
}
struct cbl_field_t field = { 0,
FldInvalid, FldInvalid, 0, 0, 0, capacity_cast($1),
nonarray, @ctx_name.first_line, "",
0, cbl_field_t::linkage_t(),
{}, NULL };
cbl_field_t field = { FldInvalid, capacity_cast($LEVEL),
@ctx_name.first_line };
if( !namcpy(@ctx_name, field.name, $2) ) YYERROR;
$$ = field_add(@$, &field);
@ -3402,10 +3545,9 @@ level_name: LEVEL ctx_name
error_msg(@LEVEL, "LEVEL %d not supported", $LEVEL);
YYERROR;
}
struct cbl_field_t field = { 0,
FldInvalid, FldInvalid, 0, 0, 0, capacity_cast($1),
nonarray, @LEVEL.first_line, "",
0, {}, {}, NULL };
struct cbl_field_t field = { FldInvalid,
capacity_cast($LEVEL),
@LEVEL.first_line };
$$ = field_add(@1, &field);
if( !$$ ) {
@ -3435,18 +3577,19 @@ value78: literalism
cbl_field_data_t data = {};
data.capacity = capacity_cast(strlen($1.data));
data.initial = $1.data;
$$ = new cbl_field_data_t(data);
$$.encoding = $1.encoding;
$$.data = new cbl_field_data_t(data);
}
| const_value
{
cbl_field_data_t data = {};
data = build_real (float128_type_node, $1);
$$ = new cbl_field_data_t(data);
$$.data = new cbl_field_data_t(data);
}
| reserved_value[value]
{
const auto field = constant_of(constant_index($value));
$$ = new cbl_field_data_t(field->data);
$$.data = new cbl_field_data_t(field->data);
}
| true_false
@ -3513,7 +3656,13 @@ data_descr1: level_name
if( !cdf_value(field.name, $lit.data) ) {
error_msg(@1, "%s was defined by CDF", field.name);
}
value_encoding_check(@lit, $1);
if( ! field.codeset.valid() ) {
if( ! field.codeset.set(field.codeset.standard_internal.type) ) {
error_msg(@lit, "CONSTANT inconsistent with encoding %s",
cbl_alphabet_t::encoding_str(field.codeset.encoding));
}
}
value_encoding_check(@lit, $1, $lit.encoding);
}
| level_name CONSTANT is_global FROM NAME
{
@ -3540,12 +3689,11 @@ data_descr1: level_name
dialect_error(@1, "level 78", "mf or gnu");
YYERROR;
}
struct cbl_field_t field = { 0, FldLiteralA, FldInvalid,
constant_e, 0, 0, 78, nonarray,
@name.first_line, "", 0, {}, *$data, NULL };
if( !namcpy(@name, field.name, $name) ) YYERROR;
cbl_field_t field = { FldLiteralA, constant_e, *$data.data,
78, $name, @name.first_line };
if( field.data.initial ) {
field.attr |= quoted_e;
field.codeset.set($data.encoding);
if( !cdf_value(field.name, field.data.initial) ) {
yywarn("%s was defined by CDF", field.name);
}
@ -3564,10 +3712,8 @@ data_descr1: level_name
| LEVEL88 NAME /* VALUE */ NULLPTR
{
struct cbl_field_t field = { 0,
FldClass, FldInvalid, 0, 0, 0, 88, nonarray, @NAME.first_line, "",
0, cbl_field_t::linkage_t(),
{}, NULL };
struct cbl_field_t field = {FldClass, 0, {},
88, $NAME, @NAME.first_line};
if( !namcpy(@NAME, field.name, $2) ) YYERROR;
auto fig = constant_of(constant_index(NULLS))->data.initial;
@ -3590,19 +3736,16 @@ data_descr1: level_name
}
| LEVEL88 NAME VALUE domains
{
struct cbl_field_t field = { 0,
FldClass, FldInvalid, 0, 0, 0, 88, nonarray, @NAME.first_line, "",
0, cbl_field_t::linkage_t(),
{}, NULL };
if( !namcpy(@NAME, field.name, $2) ) YYERROR;
struct cbl_domain_t *domain =
cbl_field_t field = {
FldClass, 0, {}, 88, $NAME, @NAME.first_line};
cbl_domain_t *domain =
new cbl_domain_t[ domains.size() + 1];
std::copy(domains.begin(), domains.end(), domain);
field.data.domain_as(domain);
field.data.false_value_as($domains);
field.codeset.set();
domains.clear();
if( ($$ = field_add(@2, &field)) == NULL ) {
@ -3799,10 +3942,8 @@ data_descr1: level_name
}
// Ensure signed initial VALUE is for signed numeric type
if( is_numeric($field) &&
$field->data.initial &&
$field->type != FldFloat )
{
if( is_numeric($field) ) {
if( $field->data.initial && $field->type != FldFloat ) {
switch( $field->data.initial[0] ) {
case '-':
if( !$field->has_attr(signable_e) ) {
@ -3811,6 +3952,7 @@ data_descr1: level_name
}
}
}
}
// Verify VALUE
$field->report_invalid_initial_value(@data_clauses);
@ -4219,6 +4361,11 @@ alphanum_pic: alphanum_part {
;
alphanum_part: ALNUM[picture] count
{
auto field = current_field();
if( ! field->codeset.set($picture) ) {
error_msg(@picture, "PICTURE inconsistent with encoding %s",
cbl_alphabet_t::encoding_str(field->codeset.encoding));
}
$$.attr = uniform_picture($picture);
$$.nbyte = strlen($picture);
auto count($count);
@ -4498,6 +4645,13 @@ usage_clause1: usage BIT
| usage INDEX {
$$ = symbol_field_index_set( current_field() )->type;
}
| usage NATIONAL {
auto field = current_field();
if( ! field->codeset.set(EBCDIC_e) ) {
error_msg(@2, "usage NATIONAL conflicts with PICTURE");
}
$$ = FldInvalid;
}
// We should enforce data/code pointers with a different type.
| usage POINTER
{
@ -4535,6 +4689,10 @@ usage_clause1: usage BIT
value_clause: VALUE all LITERAL[lit] {
cbl_field_t *field = current_field();
if( ! field->codeset.set($lit.encoding) ) {
error_msg(@lit, "VALUE inconsistent with encoding %s",
cbl_alphabet_t::encoding_str(field->codeset.encoding));
}
field->data.initial = $lit.data;
field->attr |= literal_attr($lit.prefix);
// The __gg__initialize_data routine needs to know that VALUE is a
@ -4555,7 +4713,7 @@ value_clause: VALUE all LITERAL[lit] {
}
}
}
value_encoding_check(@lit, field);
value_encoding_check(@lit, field, $lit.encoding);
}
| VALUE all cce_expr[value] {
cbl_field_t *field = current_field();
@ -4583,6 +4741,13 @@ value_clause: VALUE all LITERAL[lit] {
}
| VALUE all reserved_value[value]
{
cbl_field_t *field = current_field();
if( ! field->codeset.valid() ) {
if( ! field->codeset.set(field->codeset.standard_internal.type) ) {
error_msg(@value, "VALUE inconsistent with encoding %s",
cbl_alphabet_t::encoding_str(field->codeset.encoding));
}
}
if( $value != NULLS ) {
auto fig = constant_of(constant_index($value));
current_field()->data.initial = fig->data.initial;
@ -5082,6 +5247,8 @@ statement: error {
| subtract { $$ = SUBTRACT; }
| unstring { $$ = UNSTRING; }
| write { $$ = WRITE; }
| xmlgenerate { $$ = XMLGENERATE; }
| xmlparse { $$ = XMLPARSE; }
;
/*
@ -6731,6 +6898,8 @@ context_word: APPLY { static char s[] ="APPLY";
$$ = s; } // OPTIONS paragraph
| ATTRIBUTE { static char s[] ="ATTRIBUTE";
$$ = s; } // SET statement
| ATTRIBUTES { static char s[] ="ATTRIBUTES";
$$ = s; } // XML GENERATE
| AUTO { static char s[] ="AUTO";
$$ = s; } // screen description entry
| AUTOMATIC { static char s[] ="AUTOMATIC";
@ -9260,9 +9429,12 @@ inspect: INSPECT backward inspected TALLYING tallies
if( is_literal(match) && is_literal(replace) ) {
if( !$match->all && !$replace_oper->all) {
if( match->data.capacity != replace->data.capacity ) {
// Make a copy of replace, because nice_name returns a static
char *replace_name = xstrdup(nice_name_of(replace));
error_msg(@match, "%qs, size %u NOT EQUAL %qs, size %u",
nice_name_of(match), match->data.capacity,
nice_name_of(replace), replace->data.capacity);
replace_name, replace->data.capacity);
free(replace_name);
YYERROR;
}
}
@ -9728,7 +9900,12 @@ ffi_name: scalar
$$->field = new_literal(strlen(L.name), L.name, quoted_e);
}
}
| LITERAL { $$ = new_reference(new_literal($1, quoted_e)); }
| LITERAL
{
// Pretend hex-encoded because that means use verbatim.
auto attr = cbl_field_attr_t(quoted_e | hex_encoded_e);
$$ = new_reference(new_literal($1, attr));
}
;
parameters: parameter { $$ = new ffi_args_t($1); }
@ -11158,6 +11335,10 @@ first_last: %empty { $$ = 0; }
| LAST { $$ = 'L'; }
;
for: %empty
| FOR
;
is_global: %empty %prec GLOBAL { $$ = false; }
| is GLOBAL { $$ = true; }
;
@ -11419,7 +11600,134 @@ cdf_none: ENTER
| SERVICE_RELOAD
;
xmlgenerate: xmlgen_impl end_xml {
cbl_unimplemented("XML GENERATE");
}
| xmlgen_cond end_xml {
cbl_unimplemented("XML GENERATE");
}
;
xmlgen_impl:
XMLGENERATE xmlgen_body
;
xmlgen_cond: XMLGENERATE xmlgen_body[body] xmlexcepts[err]
;
xmlgen_body: XMLGENERATE name[id1] FROM name[id2]
xmlgen_count xmlencoding xmlgen_decl xmlgen_namespace
xmlgen_nameof xmlgen_typeof xmlgen_suppress
;
xmlgen_count: %empty
| COUNT in name[id3]
;
xmlgen_decl: %empty
| with XML_DECLARATION with ATTRIBUTES
;
xmlgen_namespace:
%empty
| NAMESPACE is name[id4] namespace_prefix
;
namespace_prefix:
%empty
| NAMESPACE_PREFIX is namestr[id5]
;
xmlgen_nameof: %empty
| NAME of xmlgen_ids
;
xmlgen_ids: xmlgen_id
| xmlgen_ids xmlgen_id
;
xmlgen_id: name[id6] is LITERAL[lit]
;
xmlgen_typeof: %empty
| TYPE of xmlgen_types
;
xmlgen_types: xmlgen_type
| xmlgen_types xmlgen_type
;
xmlgen_type: name[id6] is xmlgen_eltype
;
xmlgen_eltype: ATTRIBUTE
| ELEMENT
| CONTENT
;
xmlgen_suppress:
%empty
| SUPPRESS xml_suppressions
;
xml_suppressions:
xml_suppression
| xml_suppressions xml_suppression
;
xml_suppression:
name[id8] xml_when_phrase
| xml_generic_suppression xml_when_figs
;
xml_when_phrase:
%empty %prec ZERO
| xml_when_figs
;
xml_when_figs: xml_when_fig
| xml_when_figs OR xml_when_fig
;
xml_when_fig: ZERO
| SPACES
| LOW_VALUES
| HIGH_VALUES
;
xml_generic_suppression:
%empty
| EVERY xml_generic_numeric xmlgen_eltype
;
xml_generic_numeric:
%empty
| NUMERIC
| NONNUMERIC
;
xmlparse: xmlparse_impl end_xml {
cbl_unimplemented("XML PARSE");
}
| xmlparse_cond end_xml {
cbl_unimplemented("XML PARSE");
}
;
xmlparse_impl: XMLPARSE xmlparse_body
;
xmlparse_cond: XMLPARSE xmlparse_body[body] xmlexcepts[err]
;
xmlparse_body: XMLPARSE name xmlencoding xmlreturning xmlvalidating
PROCESSING PROCEDURE is xmlprocs
;
xmlencoding: %empty %prec NAME
| with ENCODING name [codepage]
;
xmlreturning: %empty
| RETURNING NATIONAL
;
xmlvalidating: %empty
| VALIDATING with name
| VALIDATING with FILE_KW name
;
xmlprocs: label_1[proc]
| label_1[proc1] THRU label_1[proc2]
;
xmlexcepts: xmlexcept[a] statements %prec XMLPARSE
| xmlexcepts[a] xmlexcept[b] statements %prec XMLPARSE
;
xmlexcept: EXCEPTION
;
end_xml: %empty %prec XMLPARSE
| END_XML %prec XMLPARSE
;
%%
static YYLTYPE
@ -11436,11 +11744,9 @@ void ast_call( const YYLTYPE& loc, cbl_refer_t name, const cbl_refer_t& returnin
bool is_function)
{
if( is_literal(name.field) ) {
cbl_field_t called = { 0, FldLiteralA, FldInvalid, quoted_e | constant_e,
0, 0, 77, nonarray, 0, "",
0, cbl_field_t::linkage_t(), {}, NULL };
cbl_field_t called = { FldLiteralA, quoted_e | constant_e,
name.field->data, 77 };
snprintf(called.name, sizeof(called.name), "_%s", name.field->data.initial);
called.data = name.field->data;
name.field = cbl_field_of(symbol_field_add(PROGRAM, &called));
symbol_field_location(field_index(name.field), loc);
parser_symbol_add(name.field);
@ -12410,7 +12716,6 @@ data_category_of( const cbl_refer_t& refer ) {
case FldIndex:
case FldSwitch:
case FldDisplay:
case FldBlob:
return data_category_none;
}
gcc_unreachable();
@ -12443,7 +12748,6 @@ valid_target( const cbl_refer_t& refer ) {
case FldIndex:
case FldSwitch:
case FldDisplay:
case FldBlob:
return false;
}
gcc_unreachable();
@ -12988,7 +13292,7 @@ new_literal( const literal_t& lit, enum cbl_field_attr_t attr ) {
attrs |= constant_e;
attrs |= literal_attr(lit.prefix);
return new_literal(lit.len, lit.data, cbl_field_attr_t(attrs));
return new_literal(lit.len, lit.data, cbl_field_attr_t(attrs), lit.encoding);
}
bool
@ -13096,7 +13400,8 @@ literal_attr( const char prefix[] ) {
case 1:
switch(prefix[0]) {
case 'B': return bool_encoded_e;
case 'N': cbl_unimplemented("National"); return none_e;
case 'N':
case 'U': return none_e; // nothing to say yet
case 'X': return hex_encoded_e;
case 'Z': return quoted_e;
}
@ -13107,7 +13412,8 @@ literal_attr( const char prefix[] ) {
case 'X':
switch(prefix[0]) {
case 'B': return cbl_field_attr_t(hex_encoded_e | bool_encoded_e);
case 'N': cbl_unimplemented("National"); return none_e;
case 'N':
case 'U': cbl_unimplemented("National"); return none_e;
}
break;
}
@ -13181,6 +13487,8 @@ bool
cobol_gcobol_feature_set( cbl_gcobol_feature_t gcobol_feature, bool on ) {
if( gcobol_feature == feature_internal_ebcdic_e ) {
if( internal_ebcdic_locked ) return false;
if( ! on ) gcc_unreachable();
current.default_encoding.set(EBCDIC_e);
}
if( on ) {
cbl_gcobol_features |= gcobol_feature;

View File

@ -273,8 +273,35 @@ static inline char * dequote( char input[] ) {
static const char *
name_of( cbl_field_t *field ) {
assert(field);
// Because this can be called after .initial has been converted to the
// field->codeset.encoding, we have to undo that. There may be some danger
// associated with returning a static. I don't actually know. -- RJD.
static size_t static_length = 0;
static char * static_buffer = nullptr;
if( field->name[0] == '_' )
{
// Make a copy of .initial
if( static_length < field->data.capacity+1 )
{
static_length = field->data.capacity+1;
static_buffer = static_cast<char *>(xrealloc(static_buffer,
static_length));
memcpy(static_buffer, field->data.initial, field->data.capacity);
static_buffer[field->data.capacity] = '\0';
}
// Convert it from ->encoding to DEFAULT_CHARMAP_SOURCE
size_t charsout;
char *converted = __gg__iconverter(field->codeset.encoding,
DEFAULT_CHARMAP_SOURCE,
field->data.initial,
field->data.capacity,
&charsout );
memcpy(static_buffer, converted, charsout);
static_buffer[charsout] = '\0';
}
return field->name[0] == '_' && field->data.initial?
field->data.initial : field->name;
static_buffer : field->name;
}
static const char *
@ -1310,6 +1337,25 @@ class prog_descr_t {
size_t program_index;
cbl_label_t *declaratives_eval, *paragraph, *section;
const char *collating_sequence;
struct encoding_t {
struct encoding_base_t {
size_t isym;
cbl_encoding_t encoding;
encoding_base_t() : isym(0), encoding(CP1252_e) {}
encoding_base_t(cbl_encoding_t encoding) : isym(0), encoding(encoding) {}
void set( size_t isym, cbl_encoding_t encoding ) {
this->isym = isym;
this->encoding = encoding;
}
void set( cbl_encoding_t encoding ) {
assert(encoding != custom_encoding_e);
this->isym = 0;
this->encoding = encoding;
}
} alpha, national;
encoding_t() : national(EBCDIC_e) {}
} alphabet;
struct locale_t {
cbl_name_t name; const char *os_name;
locale_t() : name(""), os_name(nullptr) {}
@ -1599,6 +1645,8 @@ static class current_t {
rel_part_t antecedent_cache;
public:
static prog_descr_t::encoding_t::encoding_base_t default_encoding;
current_t()
: first_statement(0)
, in_declaratives(false)
@ -1836,6 +1884,26 @@ static class current_t {
return client->second;
}
void alpha_encoding( size_t isym, cbl_encoding_t encoding ) {
prog_descr_t& program = programs.top();
program.alphabet.alpha.set(isym, encoding);
}
void national_encoding( size_t isym, cbl_encoding_t encoding ) {
prog_descr_t& program = programs.top();
program.alphabet.national.set(isym, encoding);
}
cbl_encoding_t alpha_encoding() const {
if( programs.empty() ) return CP1252_e;
const prog_descr_t& program = programs.top();
return program.alphabet.alpha.encoding;
}
cbl_encoding_t national_encoding() const {
if( programs.empty() ) return EBCDIC_e;
const prog_descr_t& program = programs.top();
return program.alphabet.national.encoding;
}
bool
collating_sequence( const cbl_name_t name ) {
assert(name);
@ -1891,7 +1959,16 @@ static class current_t {
const cbl_label_t *L;
if( (L = symbol_program_add(parent, &label)) == NULL ) return false;
programs.push( prog_descr_t(symbol_index(symbol_elem_of(L))) );
prog_descr_t program(symbol_index(symbol_elem_of(L)));
#if 1 //EBCDIC // enable when ready
auto alpha_encoding =
programs.empty()? default_encoding : programs.top().alphabet.alpha;
if( alpha_encoding.encoding == EBCDIC_e ) {
dbgmsg("%s:%d: We're in EBCDIC", __func__, __LINE__);
}
program.alphabet.alpha = alpha_encoding;
#endif
programs.push( program );
programs.apply_pending();
bool fOK = symbol_at(programs.top().program_index) + 1 == symbols_end();
@ -2009,6 +2086,14 @@ static class current_t {
parser_leave_section( programs.top().section );
programs.pop();
#if 0
if( programs.empty() ) {
// The default encoding can be changed only with -finternal-ebcdic, and
// remains in effect for all programs while the compiler runs.
// This comment here to remind us.
default_encoding = prog_descr_t::encoding_t::encoding_base_t();
}
#endif
debugging_clients.clear();
error_clients.clear();
exception_clients.clear();
@ -2189,6 +2274,8 @@ static class current_t {
cbl_label_t * compute_label() { return error_labels.compute_error; }
} current;
prog_descr_t::encoding_t::encoding_base_t current_t::default_encoding;
void current_enabled_ecs( tree ena ) {
current.declaratives.runtime.ena = ena;
}
@ -2208,6 +2295,22 @@ cbl_options_t current_options() {
return current.options_paragraph;
}
cbl_encoding_t current_encoding( char a_or_n ) {
cbl_encoding_t retval;
switch(a_or_n) {
case 'A':
retval = current.alpha_encoding();
break;
case 'N':
retval = current.national_encoding();
break;
default:
gcc_unreachable();
break;
}
return retval;
}
size_t current_program_index() {
return current.program()? current.program_index() : 0;
}
@ -2338,7 +2441,6 @@ needs_picture( cbl_field_type_t type ) {
case FldNumericBin5:
return false;
case FldBlob:
case FldClass:
case FldConditional:
case FldForward:
@ -2367,7 +2469,6 @@ is_callable( const cbl_field_t *field ) {
case FldForward:
case FldSwitch:
case FldDisplay:
case FldBlob:
case FldNumericDisplay:
case FldNumericBinary:
case FldFloat:
@ -2763,7 +2864,7 @@ field_attr_str( const cbl_field_t *field ) {
intermediate_e, embiggened_e, all_alpha_e, all_x_e,
all_ax_e, prog_ptr_e, scaled_e, refmod_e, based_e, any_length_e,
global_e, external_e, blank_zero_e, linkage_e, local_e, leading_e,
separate_e, envar_e, dnu_1_e, bool_encoded_e, hex_encoded_e,
separate_e, envar_e, encoded_e, bool_encoded_e, hex_encoded_e,
depends_on_e, initialized_e, has_value_e, ieeedec_e, big_endian_e,
same_as_e, record_key_e, typedef_e, strongdef_e,
};
@ -2871,29 +2972,27 @@ blank_pad_initial( const char initial[], size_t capacity, size_t new_size ) {
return p;
}
static bool
value_encoding_check( const YYLTYPE& loc, cbl_field_t *field ) {
static void
value_encoding_check( const YYLTYPE& loc, cbl_field_t *field, cbl_encoding_t encoding ) {
if( ! field->internalize() ) {
error_msg(loc, "inconsistent string literal encoding for '%s'",
field->data.initial);
return false;
}
return true;
if( encoding != field->codeset.encoding ) {
warn_msg(loc, "VALUE encoded as %qs for data item encoded as %qs",
__gg__encoding_iconv_name(encoding), field->codeset.name());
}
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
static struct cbl_field_t *
field_alloc( const YYLTYPE& loc, cbl_field_type_t type, size_t parent, const char name[] ) {
cbl_field_t *f, field = {};
field.type = type;
field.usage = FldInvalid;
static const uint32_t level = 0;
cbl_field_t *f, field = { type, 0, cbl_field_data_t(), level, name, yylineno };
field.parent = parent;
field.line = yylineno;
if( !namcpy(loc, field.name, name) ) return NULL;
f = field_add(loc, &field);
assert(f);
return f;
@ -2909,7 +3008,7 @@ static cbl_file_t *
file_add( YYLTYPE loc, cbl_file_t *file ) {
gcc_assert(file);
enum { level = 1 };
struct cbl_field_t area = { 0, FldAlphanumeric, FldInvalid, 0, 0,0, level, {}, yylineno },
struct cbl_field_t area{ FldAlphanumeric, level, yylineno },
*field = field_add(loc, &area);
file->default_record = field_index(field);
@ -2928,6 +3027,7 @@ file_add( YYLTYPE loc, cbl_file_t *file ) {
"%s%s", record_area_name_stem, file->name);
}
field->file = field->parent = symbol_index(e);
field->codeset.set();
return file;
}

View File

@ -104,7 +104,9 @@ NP P+|(P{COUNT})
UNSIGNED [[:space:]]+UNSIGNED
SIGNED [[:space:]]+SIGNED
ALNUM [AX9]+
PREFIX G|N|U|Z
ALNUM [AX9]+|N+|U+
AX [AX]{COUNT}?
B0 [B0/]{COUNT}?
@ -452,16 +454,26 @@ COPY {
myless(0);
}
ATTRIBUTES { return ATTRIBUTES; }
ELEMENT { return ELEMENT; }
ENCODING { return ENCODING; }
EXTEND { return EXTEND;}
INITIALIZE { return INITIALIZE; }
INSPECT { return INSPECT; }
INVOKE { return INVOKE; }
INTRINSIC { return INTRINSIC; }
INVOKE { return INVOKE; }
MERGE { return MERGE; }
NAMESPACE { return NAMESPACE; }
NAMESPACE-PREFIX { return NAMESPACE_PREFIX; }
NONNUMERIC { return NONNUMERIC; }
PROCESSING { return PROCESSING; }
UNSTRING { return UNSTRING; }
XML { return XML; }
XMLGENERATE { return XMLGENERATE; }
XMLPARSE { return XMLPARSE; }
VALIDATING { return VALIDATING; }
XML{SPC}GENERATE { return XMLGENERATE; }
XML{SPC}PARSE { return XMLPARSE; }
XML-DECLARATION { return XML_DECLARATION; }
END-XML { return END_XML; }
ZEROE?S? { return ZERO; }
@ -802,7 +814,11 @@ DEPENDING { return DEPENDING; }
DELIMITER { return DELIMITER; }
DELETE { return DELETE; }
DEFAULT { return DEFAULT; }
DEFAULT { yylval.number = 'D'; return DEFAULT; }
SYSTEM-DEFAULT { yylval.number = 'S'; return DEFAULT; }
USER-DEFAULT { yylval.number = 'U'; return DEFAULT; }
DECLARATIVES { return DECLARATIVES; }
DECIMAL-POINT { return DECIMAL_POINT; }
DEBUGGING { return DEBUGGING; }
@ -1142,9 +1158,9 @@ USE({SPC}FOR)? { return USE; }
return token == NAME88? NAME : token;
}
Z?[''] { yylval.literal.set_prefix(yytext, yyleng-1);
{PREFIX}?[''] { yylval.literal.set_prefix(yytext, yyleng-1);
yy_push_state(quoted1); }
Z?[""] { yylval.literal.set_prefix(yytext, yyleng-1);
{PREFIX}?[""] { yylval.literal.set_prefix(yytext, yyleng-1);
yy_push_state(quoted2); }
N?X/{hexseq} { yylval.literal.set_prefix(yytext, yyleng);
yy_push_state(hex_state); }
@ -1289,12 +1305,12 @@ USE({SPC}FOR)? { return USE; }
}
/* CDF REPLACING needs quotes to distinquish strings from identifiers. */
Z?['']{STRING1}[''] { auto *s = xstrdup(yytext);
{PREFIX}?['']{STRING1}[''] { auto *s = xstrdup(yytext);
std::replace(s, s + strlen(s), '\'', '"');
ydflval.string = s;
update_location_col(s);
return LITERAL; }
Z?[""]{STRING}[""] { ydflval.string = xstrdup(yytext);
{PREFIX}?[""]{STRING}[""] { ydflval.string = xstrdup(yytext);
update_location_col(yytext);
return LITERAL; }
[=]{4} { static char nullstring[] = "";
@ -1403,9 +1419,9 @@ USE({SPC}FOR)? { return USE; }
yylval.string = xstrdup(yytext);
return NAME;
}
Z?[''] { yylval.literal.set_prefix(yytext, yyleng-1);
{PREFIX}?[''] { yylval.literal.set_prefix(yytext, yyleng-1);
BEGIN(quoted1); }
Z?[""] { yylval.literal.set_prefix(yytext, yyleng-1);
{PREFIX}?[""] { yylval.literal.set_prefix(yytext, yyleng-1);
BEGIN(quoted2); }
. { myless(0); yy_pop_state();
@ -1445,11 +1461,11 @@ USE({SPC}FOR)? { return USE; }
BX/{hexseq} { yylval.numstr.radix = hexadecimal_e;
yy_push_state(numstr_state); }
Z?[''] { yylval.literal.set_prefix(yytext, yyleng-1);
{PREFIX}?[''] { yylval.literal.set_prefix(yytext, yyleng-1);
yy_push_state(quoted1); }
Z?[""] { yylval.literal.set_prefix(yytext, yyleng-1);
{PREFIX}?[""] { yylval.literal.set_prefix(yytext, yyleng-1);
yy_push_state(quoted2); }
Z?[""]/{STRING}[""] { yylval.literal.set_prefix(yytext, yyleng-1);
{PREFIX}?[""]/{STRING}[""] { yylval.literal.set_prefix(yytext, yyleng-1);
yy_push_state(quoted2); }
{INTEGERZ}/[[:punct:]][[:space:]]{BLANK_OEOL} { return numstr_of(yytext); }

View File

@ -181,11 +181,13 @@ create_cblc_field_t()
signed char level; // This variable's level in the naming heirarchy
signed char digits; // Digits specified in PIC string; e.g. 5 for 99v999
signed char rdigits; // Digits to the right of the decimal point. 3 for 99v999
cbl_encoding_t encoding; // Same as cbl_field_t::codeset::encoding
int alphabet; // Same as cbl_field_t::codeset::language
} cblc_field_t;
*/
tree retval = NULL_TREE;
retval = gg_get_filelevel_struct_type_decl( "cblc_field_t",
16,
17,
UCHAR_P, "data",
SIZE_T, "capacity",
SIZE_T, "allocated",
@ -201,7 +203,8 @@ create_cblc_field_t()
SCHAR, "level",
SCHAR, "digits",
SCHAR, "rdigits",
INT, "dummy"); // Needed to make it an even number of 32-bit ints
INT, "encoding",
INT, "alphabet");
retval = TREE_TYPE(retval);
return retval;
@ -245,13 +248,15 @@ typedef struct cblc_file_t
int recent_char; // This is the most recent char sent to the file
int recent_key;
cblc_file_prior_op_t prior_op;
int encoding; // Actually cbl_encoding_t
int alphabet; // Actually cbl_encoding_t
int dummy // We need an even number of INT
} cblc_file_t;
*/
tree retval = NULL_TREE;
retval = gg_get_filelevel_struct_type_decl( "cblc_file_t",
31,
33,
CHAR_P, "name",
SIZE_T, "symbol_table_index",
CHAR_P, "filename",
@ -282,6 +287,8 @@ typedef struct cblc_file_t
INT, "recent_char",
INT, "recent_key",
INT, "prior_op",
INT, "encoding", // Actually cbl_encoding_t
INT, "alphabet",
INT, "dummy");
retval = TREE_TYPE(retval);
return retval;

View File

@ -47,6 +47,7 @@
#include "inspect.h"
#include "../../libgcobol/io.h"
#include "genapi.h"
#include "../../libgcobol/charmaps.h"
#pragma GCC diagnostic ignored "-Wunused-result"
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
@ -289,6 +290,10 @@ static symbol_elem_t
elementize( const cbl_field_t& field ) {
symbol_elem_t sym (SymField);
sym.elem.field = field;
// Dubner did the following because he didn't feel like creating yet another
// cbl_field_t constructor that included the hardcoded encoding for the
// global special registers.
sym.elem.field.codeset.encoding = iconv_CP1252_e;
return sym;
}
@ -760,7 +765,7 @@ cbl_field_attr_str( cbl_field_attr_t attr ) {
case leading_e: return "leading";
case separate_e: return "separate";
case envar_e: return "envar";
case dnu_1_e: return "dnu_1";
case encoded_e: return "encoded";
case bool_encoded_e: return "bool";
case hex_encoded_e: return "hex";
case depends_on_e: return "depends_on";
@ -1451,7 +1456,7 @@ cbl_field_t::attr_str( const std::vector<cbl_field_attr_t>& attrs ) const
char *
field_str( const cbl_field_t *field ) {
static char string[3*sizeof(cbl_name_t)];
static char string[4*sizeof(cbl_name_t)];
char *pend = string;
char name[2*sizeof(cbl_name_t)] = "";
@ -1468,6 +1473,12 @@ field_str( const cbl_field_t *field ) {
}
}
if( field->codeset.valid() ) {
strcat(name, " (");
strcat(name, field->codeset.name());
strcat(name, ") ");
}
pend += snprintf(pend, string + sizeof(string) - pend,
"%02u %-20s ", field->level, name);
@ -1481,58 +1492,66 @@ field_str( const cbl_field_t *field ) {
if( 'r' == parredef && field->level == 0 ) parredef = 'p';
if( field->has_attr(typedef_e) ) parredef = 'T';
const char *data = field->data.initial? field->data.initial : NULL;
if( data ) {
auto fig = cbl_figconst_of(data);
const char *init = field->data.initial? field->data.initial : NULL;
if( init ) {
auto fig = cbl_figconst_of(init);
if( normal_value_e != fig ) {
data = cbl_figconst_str(fig);
init = cbl_figconst_str(fig);
} else {
char *s;
auto n = asprintf(&s, "'%s'", data);
gcc_assert(n);
auto eodata = data + field->data.capacity;
// It is possible for data.initial to be shorter than capacity.
#if 0
// At this point, we might have to convert 'init' back to ASCII
char *false_init = static_cast<char *>(xmalloc(field->init.capacity+1));
memcpy(false_init, field->init.initial, field->data.capacity);
false_data[field->data.capacity] = '\0';
size_t charsout;
// This whole thing needs to be reexamined. There is an assumption for
// FldAlphanumeric values that the valid data in data.initial be the same
// length as data.capacity. But that does not hold true for other types.
// For example, a PIC 9V9 has a capacity of two, but the initial
// string provided by the COBOL programmer might be "1.2". Likewise, a
// PIC 999999 (capacity 5) might have a value of "1".
cbl_encoding_t enc_from = field->codeset.encoding;
if( field->type == FldNumericDisplay )
{
// Apparently we need to trace back the meaning of data.literal for
// field::type == FldNumericDisplay
enc_from = DEFAULT_CHARMAP_SOURCE;
}
for(size_t i = 0; i<field->data.capacity; i++)
{
if( data[i] == '\0' )
{
eodata = data + i;
break;
init = __gg__iconverter(enc_from,
DEFAULT_CHARMAP_SOURCE,
false_data,
field->data.capacity,
&charsout);
#endif
auto eoinit = init + strlen(init);
char *s = xasprintf("'%s'", init);
// No NUL within the initial data.
auto ok = std::none_of( init, eoinit,
[]( char ch ) { return ch == '\0'; } );
assert(ok);
// If any of the init are unprintable, provide a hex version.
if( ! std::all_of(init, eoinit, fisprint) ) {
if( is_elementary(field->type) && field->type != FldPointer ) {
const size_t len = strlen(s) + 8 + 2 * field->data.capacity;
s = reinterpret_cast<char*>(xrealloc(s, len));
strcat( s, " (0x" );
char *p = s + strlen(s);
for( auto d=init; d < eoinit; d++ ) {
p += sprintf(p, "%02x", static_cast<unsigned char>(*d));
}
strcat( s, ")" );
assert(strlen(s) < len);
}
}
if( eodata != std::find_if_not(data, eodata, fisprint) ) {
char *p = reinterpret_cast<char*>(xrealloc(s, n + 8 + 2 * field->data.capacity));
if( is_elementary(field->type) &&
field->type != FldPointer && p != NULL ) {
s = p;
p += n;
strcat( p, "(0x" );
p += 3;
for( auto d=data; d < eodata; d++ ) {
p += sprintf(p, "%02x", *d);
}
strcat( p++, ")" );
}
}
data = s;
init = s;
}
} else {
data = "NULL";
init = "NULL";
if( field->type == FldSwitch ) {
data = xasprintf("0x%02x", field->data.upsi_mask_of()->value);
init = xasprintf("0x%02x", field->data.upsi_mask_of()->value);
}
}
if( field->level == 88 ) {
const auto& dom = *field->data.domain_of();
data = xasprintf("%s%s %s - %s%s",
init = xasprintf("%s%s %s - %s%s",
dom.first.all? "A" : "",
value_or_figconst_name(dom.first.name()) ,
dom.first.is_numeric? "(num)" : "",
@ -1551,7 +1570,7 @@ field_str( const cbl_field_t *field ) {
intermediate_e, embiggened_e, all_alpha_e, all_x_e,
all_ax_e, prog_ptr_e, scaled_e, refmod_e, based_e, any_length_e,
/* global_e, external_e, */ blank_zero_e, /* linkage_e, local_e, */ leading_e,
separate_e, envar_e, dnu_1_e, bool_encoded_e, hex_encoded_e,
separate_e, envar_e, encoded_e, bool_encoded_e, hex_encoded_e,
depends_on_e, /* initialized_e, */ has_value_e, ieeedec_e, big_endian_e,
same_as_e, record_key_e, typedef_e, strongdef_e,
};
@ -1564,7 +1583,7 @@ field_str( const cbl_field_t *field ) {
storage_type,
field->data.memsize,
field->data.capacity, field->data.digits, field->data.rdigits,
data, field->attr_str(attrs), field->line );
init, field->attr_str(attrs), field->line );
return string;
}
@ -1593,22 +1612,14 @@ static void
extend_66_capacity( cbl_field_t *alias ) {
static_assert(sizeof(symbol_elem_t*) == sizeof(const char *),
"all pointers must be same size");
assert(alias->data.picture);
assert(alias->level == 66);
assert(alias->type == FldGroup);
assert(alias->data.picture);
// If data.picture is not NULL, it is the THRU symbol, see symbol_field_alias2.
symbol_elem_t *e = symbol_at(alias->parent);
symbol_elem_t *e2 =
reinterpret_cast<symbol_elem_t*>(const_cast<char*>(alias->data.picture));
#ifndef __OPTIMIZE__
#pragma message "The assert(e < e2) needs fixing"
// The following assert fails when valgrind is involved. This is the known
// problem of expecting mmap() to put new memory maps after older memory
// maps; that assumption fails when valgrind is involved.
// For now I am defeating the assert when using -O0 so that I can run the
// NIST "make valgrind" tests. But this should be fixed so that the
// symbol table index is used, not the entry locations.
assert(e < e2);
#endif
assert(symbol_index(e) < symbol_index(e2));
alias->data.picture = NULL;
capacity_of cap;
@ -1853,6 +1864,44 @@ symbols_update( size_t first, bool parsed_ok ) {
field->line, field->level_str(), field->name);
continue;
}
if( is_numeric(field) && ! field->has_attr(constant_e) ) {
if( field->data.capacity == 0 ) {
ERROR_FIELD(field, "numeric %qs has USAGE that requires PICTURE %s",
field->name, field->data.initial);
}
}
if( ! field->codeset.valid() ) {
switch(field->type) {
case FldForward:
case FldInvalid:
gcc_unreachable();
case FldAlphaEdited:
case FldAlphanumeric:
case FldClass:
case FldDisplay:
case FldGroup:
case FldLiteralA:
case FldNumericDisplay:
case FldNumericEdited:
if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) {
error_msg(symbol_field_location(field_index(field)),
"internal: %qs encoding not defined", field->name);
}
break;
case FldConditional:
case FldFloat:
case FldIndex:
case FldLiteralN:
case FldNumericBin5:
case FldNumericBinary:
case FldPacked:
case FldPointer:
case FldSwitch:
break;
}
}
assert( ! field->is_typedef() );
@ -2076,6 +2125,12 @@ symbol_field_parent_set( cbl_field_t *field )
return NULL;
}
prior->type = FldGroup;
prior->codeset.set();
//// if( ! prior->codeset.set() ) { // maybe just ignore?
//// Dubner sez: Ignore. This was triggering with -finternal-ebcdic
//// ERROR_FIELD(prior, "%qs is already National", prior->name);
//// return NULL;
//// }
field->attr |= numeric_group_attrs(prior);
}
// verify level 88 domain value
@ -2134,7 +2189,7 @@ add_token( symbol_elem_t sym ) {
}
/*
* When adding registers, be sure to add a complementary cblc_field_t
* When adding special registers, be sure to create the actual cblc_field_t
* in libgcobol/constants.cc.
*/
void
@ -2153,41 +2208,43 @@ symbol_table_init(void) {
// These should match the definitions in libgcobol/constants.cc
static cbl_field_t constants[] = {
{ 0, FldAlphanumeric, FldInvalid, space_value_e | constq | register_e, 0, 0, 0, nonarray, 0,
"SPACE", 0, {}, {1,1,0,0, " \0\xFF"}, NULL },
{ 0, FldAlphanumeric, FldInvalid, space_value_e | constq | register_e, 0, 0, 0, nonarray, 0,
"SPACES", 0, {}, {1,1,0,0, " \0\xFF"}, NULL },
{ 0, FldAlphanumeric, FldInvalid, low_value_e | constq | register_e, 0, 0, 0, nonarray, 0,
"LOW_VALUES", 0, {}, {1,1,0,0, "L\0\xFF"}, NULL },
{ 0, FldAlphanumeric, FldInvalid, zero_value_e | constq | register_e, 0, 0, 0, nonarray, 0,
"ZEROS", 0, {}, {1,1,0,0, "0"}, NULL },
{ 0, FldAlphanumeric, FldInvalid, high_value_e | constq | register_e, 0, 0, 0, nonarray, 0,
"HIGH_VALUES", 0, {}, {1,1,0,0, "H\0\xFF"}, NULL },
{ FldAlphanumeric, space_value_e | constq | register_e,
{1,1,0,0, " \0\xFF"}, 0, "SPACE" },
{ FldAlphanumeric, space_value_e | constq | register_e,
{1,1,0,0, " \0\xFF"}, 0, "SPACES" },
{ FldAlphanumeric, low_value_e | constq | register_e,
{1,1,0,0, "L\0\xFF"}, 0, "LOW_VALUES" },
{ FldAlphanumeric, zero_value_e | constq | register_e,
{1,1,0,0, "0"}, 0, "ZEROS" },
{ FldAlphanumeric, high_value_e | constq | register_e,
{1,1,0,0, "H\0\xFF"}, 0, "HIGH_VALUES" },
// IBM standard: QUOTE is a double-quote unless APOST compiler option
{ 0, FldAlphanumeric, FldInvalid, quote_value_e | constq | register_e , 0, 0, 0, nonarray, 0,
"QUOTES", 0, {}, {1,1,0,0, "\"\0\xFF"}, NULL },
{ 0, FldPointer, FldPointer, constq | register_e , 0, 0, 0, nonarray, 0,
"NULLS", 0, {}, {8,8,0,0, zeroes_for_null_pointer}, NULL },
{ FldAlphanumeric, quote_value_e | constq | register_e ,
{1,1,0,0, "\"\0\xFF"}, 0, "QUOTES" },
{ FldPointer, constq | register_e ,
{8,8,0,0, zeroes_for_null_pointer}, 0, "NULLS" },
// IBM defines TALLY
// 01 TALLY GLOBAL PICTURE 9(5) USAGE BINARY VALUE ZERO.
{ 0, FldNumericBin5, FldInvalid, signable_e | register_e, 0, 0, 0, nonarray, 0,
"_TALLY", 0, {}, {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, NULL },
{ FldNumericBin5, signable_e | register_e,
{16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_TALLY" },
// 01 ARGI is the current index into the argv array
{ 0, FldNumericBin5, FldInvalid, signable_e | register_e, 0, 0, 0, nonarray, 0,
"_ARGI", 0, {}, {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, NULL },
{ FldNumericBin5, signable_e | register_e,
{16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_ARGI" },
// These last two don't require actual storage; they get BOOL var_decl_node
// in parser_symbol_add()
{ 0, FldConditional, FldInvalid, constant_e | register_e , 0, 0, 0, nonarray, 0,
"_VERY_TRUE", 0, {}, {1,1,0,0, ""}, NULL },
{ 0, FldConditional, FldInvalid, constant_e | register_e , 0, 0, 0, nonarray, 0,
"_VERY_FALSE", 0, {}, {1,1,0,0, ""}, NULL },
{ FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_TRUE" },
{ FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_FALSE" },
};
for( struct cbl_field_t *f = constants;
f < constants + COUNT_OF(constants); f++ ) {
f->our_index = table.nelem;
struct symbol_elem_t sym(SymField, 0);
sym.elem.field = *f;
// The following makes these constants match the definitions in
// constants.cc. Consider expanding the constructor fo cbl_field_t to
// handle encoding.
sym.elem.field.codeset.encoding = iconv_CP1252_e;
table.elems[table.nelem++] = sym;
}
@ -2253,30 +2310,30 @@ symbol_table_init(void) {
**/
static cbl_field_t debug_registers[] = {
{ 0, FldGroup, FldInvalid, register_e, 0,0,1, nonarray, 0,
"DEBUG-ITEM", 0, {}, {132,132,0,0, NULL}, NULL },
{ 0, FldAlphanumeric, FldInvalid, register_e, 0,0,2, nonarray, 0,
"DEBUG-LINE", 0, {}, {6,6,0,0, " "}, NULL },
{ 0, FldAlphanumeric, FldInvalid, register_e|filler_e, 0,0,2, nonarray, 0,
"FILLER", 0, {}, {1,1,0,0, " "}, NULL },
{ 0, FldAlphanumeric, FldInvalid, register_e, 0,0,2, nonarray, 0,
"DEBUG-NAME", 0, {}, {30,30,0,0, NULL}, NULL },
{ 0, FldAlphanumeric, FldInvalid, register_e|filler_e, 0,0,2, nonarray, 0,
"FILLER", 0, {}, {1,1,0,0, " "}, NULL },
{ 0, FldNumericDisplay, FldInvalid, signable_e | register_e | leading_e | separate_e, 0,0,2, nonarray, 0,
"DEBUG-SUB-1", 0, {}, {5,5,4,0, NULL}, NULL },
{ 0, FldAlphanumeric, FldInvalid, register_e|filler_e, 0,0,2, nonarray, 0,
"FILLER", 0, {}, {1,1,0,0, " "}, NULL },
{ 0, FldNumericDisplay, FldInvalid, signable_e | register_e | leading_e | separate_e, 0,0,2, nonarray, 0,
"DEBUG-SUB-2", 0, {}, {5,5,4,0, NULL}, NULL },
{ 0, FldAlphanumeric, FldInvalid, register_e|filler_e, 0,0,2, nonarray, 0,
"FILLER", 0, {}, {1,1,0,0, " "}, NULL },
{ 0, FldNumericDisplay, FldInvalid, signable_e | register_e | leading_e | separate_e, 0,0,2, nonarray, 0,
"DEBUG-SUB-3", 0, {}, {5,5,4,0, NULL}, NULL },
{ 0, FldAlphanumeric, FldInvalid, register_e|filler_e, 0,0,2, nonarray, 0,
"FILLER", 0, {}, {1,1,0,0, " "}, NULL },
{ 0, FldAlphanumeric, FldInvalid, signable_e | register_e, 0,0,2, nonarray, 0,
"DEBUG-CONTENTS", 0, {}, {76,76,0,0, NULL}, NULL },
{ FldGroup, register_e,
{132,132,0,0, NULL}, 1, "DEBUG-ITEM" },
{ FldAlphanumeric, register_e,
{6,6,0,0, " "}, 2, "DEBUG-LINE" },
{ FldAlphanumeric, register_e|filler_e,
{1,1,0,0, " "}, 2, "FILLER" },
{ FldAlphanumeric, register_e,
{30,30,0,0, NULL}, 2, "DEBUG-NAME" },
{ FldAlphanumeric, register_e|filler_e,
{1,1,0,0, " "}, 2, "FILLER" },
{ FldNumericDisplay, signable_e | register_e | leading_e | separate_e,
{5,5,4,0, NULL}, 2, "DEBUG-SUB-1" },
{ FldAlphanumeric, register_e|filler_e,
{1,1,0,0, " "}, 2, "FILLER" },
{ FldNumericDisplay, signable_e | register_e | leading_e | separate_e,
{5,5,4,0, NULL}, 2, "DEBUG-SUB-2" },
{ FldAlphanumeric, register_e|filler_e,
{1,1,0,0, " "}, 2, "FILLER" },
{ FldNumericDisplay, signable_e | register_e | leading_e | separate_e,
{5,5,4,0, NULL}, 2, "DEBUG-SUB-3" },
{ FldAlphanumeric, register_e | filler_e,
{1,1,0,0, " "}, 2, "FILLER" },
{ FldAlphanumeric, signable_e | register_e,
{76,76,0,0, NULL}, 2, "DEBUG-CONTENTS" },
};
// debug registers
@ -2296,22 +2353,14 @@ symbol_table_init(void) {
std::for_each(debug_start+1, p, parent_elem_set(debug_start - table.elems));
static cbl_field_t special_registers[] = {
{ 0, FldNumericDisplay, FldInvalid, register_e, 0, 0, 0, nonarray, 0, "_FILE_STATUS",
0, {}, {2,2,2,0, NULL}, NULL },
{ 0, FldNumericBin5, FldInvalid, register_e, 0, 0, 0, nonarray, 0, "UPSI-0",
0, {}, {2,2,4,0, NULL}, NULL },
{ 0, FldNumericBin5, FldInvalid, signable_e|register_e, 0, 0, 0, nonarray, 0, "RETURN-CODE",
0, {}, {2,2,4,0, NULL}, NULL },
{ 0, FldNumericBin5, FldInvalid, register_e, 0, 0, 0, nonarray, 0, "LINAGE-COUNTER",
0, {}, {2,2,4,0, NULL}, NULL },
{ 0, FldLiteralA, FldInvalid, register_e, 0, 0, 0, nonarray, 0, "_dev_stdin",
0, {}, {0,0,0,0, "/dev/stdin"}, NULL },
{ 0, FldLiteralA, FldInvalid, constq|register_e, 0, 0, 0, nonarray, 0, "_dev_stdout",
0, {}, {0,0,0,0, "/dev/stdout"}, NULL },
{ 0, FldLiteralA, FldInvalid, constq|register_e, 0, 0, 0, nonarray, 0, "_dev_stderr",
0, {}, {0,0,0,0, "/dev/stderr"}, NULL },
{ 0, FldLiteralA, FldInvalid, constq|register_e, 0, 0, 0, nonarray, 0, "_dev_null",
0, {}, {0,0,0,0, "/dev/null"}, NULL },
{ FldNumericDisplay, register_e, {2,2,2,0, NULL}, 0, "_FILE_STATUS" },
{ FldNumericBin5, register_e, {2,2,4,0, NULL}, 0, "UPSI-0" },
{ FldNumericBin5, signable_e|register_e, {2,2,4,0, NULL}, 0, "RETURN-CODE" },
{ FldNumericBin5, register_e, {2,2,4,0, NULL}, 0, "LINAGE-COUNTER" },
{ FldLiteralA, register_e, {0,0,0,0, "/dev/stdin"}, 0, "_dev_stdin" },
{ FldLiteralA, constq|register_e, {0,0,0,0, "/dev/stdout"}, 0, "_dev_stdout" },
{ FldLiteralA, constq|register_e, {0,0,0,0, "/dev/stderr"}, 0, "_dev_stderr" },
{ FldLiteralA, constq|register_e, {0,0,0,0, "/dev/null"}, 0, "_dev_null" },
};
// special registers
@ -2528,6 +2577,9 @@ symbol_field_add( size_t program, struct cbl_field_t *field )
field->attr = inherit & parent->attr;
field->attr |= numeric_group_attrs(parent);
field->usage = parent->usage;
if( field->level == 66 || field->level == 88 ) {
field->codeset = parent->codeset;
}
// BINARY-LONG, for example, sets capacity.
if( is_numeric(parent->usage) && parent->data.capacity > 0 ) {
field->type = parent->usage;
@ -2696,11 +2748,8 @@ symbol_field_forward_add( size_t program, size_t parent,
auto e = symbol_field(program, parent, name);
if( e ) return e;
struct cbl_field_t field = { 0,
FldForward, FldInvalid, 0, parent, 0, 0,
nonarray, line, "",
0, cbl_field_t::linkage_t(),
{0,0,0,0, " "}, NULL };
cbl_field_t field = { FldForward, 0, line };
field.parent = parent;
if( sizeof(field.name) < strlen(name) ) {
dbgmsg("%s:%d: logic error: name %s too long", __func__, __LINE__, name);
return NULL;
@ -2886,6 +2935,7 @@ symbol_field_same_as( cbl_field_t *tgt, const cbl_field_t *src ) {
cbl_field_t dup = {};
dup.parent = field_index(tgt);
dup.line = tgt->line;
dup.codeset = tgt->codeset;
elem_group_t group(++bog, eog);
@ -3097,6 +3147,87 @@ constant_of( size_t isym )
return field;
}
/*
* As parsed, the alphabet reflects the encoding of the source code. If the
* program uses a different encoding for alphanumeric, convert the alphabet to
* that.
*
* Because a custom alphabet is rare and occurs at most only once per program,
* we don't attempt to avoid re-encoding. "Conversion" of ASCII to ASCII is at
* most 256 calls to iconv(3).
*/
void
cbl_alphabet_t::reencode() {
const unsigned char * const pend = alphabet + sizeof(alphabet);
std::vector<char> tgt(256, (char)0xFF);
/*
* For now, assume CP1252 source-code encoding because we're not capturing it
* anywhere except in cbl_field_t::internalize(). The only known examples of
* a custom alphabet are from NIST, which of course are ASCII.
*/
const char *fromcode = __gg__encoding_iconv_name(CP1252_e);
const char *tocode = __gg__encoding_iconv_name(current_encoding('A'));
iconv_t cd = iconv_open(tocode, fromcode);
#if optimal_reencode
if( fromcode == tocode ) { // semantically
tgt.resize(0);
return tgt; // Return empty vector; caller copies zero bytes.
}
#endif
/*
* Each position in the alphabet array represents a letter in the source-code
* encoding. The value at that position represents the letter's collation
* position, its sort order. For each letter in alphabet, determine value of
* that letter in the alphanumeric encoding, and set its collation position
* in that alphabet.
*/
for( const unsigned char *p = alphabet; p < pend; p++ ) {
if( *p == 0xFF ) continue;
unsigned char ch = p - alphabet;
unsigned char pos[8] = {};
size_t inbytesleft = 1, outbytesleft = sizeof(pos);
char *inbuf = reinterpret_cast<char*>(&ch),
*outbuf = reinterpret_cast<char*>(pos);
size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
if( n == size_t(-1) ) {
error_msg(loc, "%s character '%c' (%x hex) cannot be converted to %s",
fromcode, ch, ch, tocode);
continue;
}
if( n != 0 ) {
dbgmsg("%s character '%c' (%x hex) irreversibly converted to %s",
fromcode, ch, ch, tocode);
}
assert(outbytesleft < sizeof(pos));
n = sizeof(pos) - outbytesleft;
if( 1 < n ) {
error_msg(loc, "%s character '%c' (%x hex) requires %zu bytes as %s",
fromcode, ch, ch, n, tocode);
continue;
}
if( ch == low_index ) {
low_index = pos[0];
}
if( ch == last_index ) {
last_index = pos[0];
}
if( ch == high_index ) {
high_index = pos[0];
}
tgt.at(pos[0]) = *p;
}
std::copy(tgt.begin(), tgt.end(), alphabet);
}
bool
cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high_value ) {
if( alphabet[ch] == 0xFF || alphabet[ch] == high_value) {
@ -3158,33 +3289,21 @@ new_temporary_impl( enum cbl_field_type_t type, const cbl_name_t name = nullptr
{
extern int yylineno;
static const struct cbl_field_t empty_alpha = {
0, FldAlphanumeric, FldInvalid,
intermediate_e, 0, 0, 0, nonarray, 0, "",
0, cbl_field_t::linkage_t(),
{MAXIMUM_ALPHA_LENGTH, MAXIMUM_ALPHA_LENGTH,
0, 0, NULL}, NULL };
FldAlphanumeric, intermediate_e,
{MAXIMUM_ALPHA_LENGTH,
MAXIMUM_ALPHA_LENGTH, 0, 0, NULL} };
static const struct cbl_field_t empty_float = {
0, FldFloat, FldInvalid,
intermediate_e,
0, 0, 0, nonarray, 0, "",
0, cbl_field_t::linkage_t(),
{16, 16, 32, 0, NULL}, NULL };
FldFloat, intermediate_e,
{16, 16, 32, 0, NULL} };
static const struct cbl_field_t empty_comp5 = {
0, FldNumericBin5, FldInvalid,
FldNumericBin5,
signable_e | intermediate_e,
0, 0, 0, nonarray, 0, "",
0, cbl_field_t::linkage_t(),
{16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, NULL };
{16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL} };
static const struct cbl_field_t empty_conditional = {
0, FldConditional, FldInvalid, intermediate_e,
0, 0, 0, nonarray, 0, "",
0, cbl_field_t::linkage_t(),
{}, NULL };
FldConditional, intermediate_e, cbl_field_data_t{} };
static struct cbl_field_t empty_literal = {
0, FldInvalid, FldInvalid, CONSTANT_E,
0, 0, 0, nonarray, 0, "",
0, cbl_field_t::linkage_t(),
{}, NULL };
FldInvalid, CONSTANT_E, cbl_field_data_t{} };
struct cbl_field_t *f = new cbl_field_t;
f->type = type;
@ -3200,7 +3319,6 @@ new_temporary_impl( enum cbl_field_type_t type, const cbl_name_t name = nullptr
case FldSwitch:
case FldDisplay:
case FldPointer:
case FldBlob:
break;
case FldConditional:
*f = empty_conditional;
@ -3234,6 +3352,8 @@ new_temporary_impl( enum cbl_field_type_t type, const cbl_name_t name = nullptr
f->data.initial = name; // capture e.g. the function name
f->codeset.set();
return f;
}
@ -3246,12 +3366,17 @@ new_temporary_decl() {
static inline cbl_field_t *
parser_symbol_add2( cbl_field_t *field ) {
if( ! field->codeset.valid() ) {
dbgmsg( "%s:%d: %s (%s) has no encoding", __func__, __LINE__,
field->name, cbl_field_type_str(field->type) );
}
parser_symbol_add(field);
return field;
}
static cbl_field_t *
new_literal_add( const char initial[], uint32_t len, enum cbl_field_attr_t attr ) {
new_literal_add( const char initial[], uint32_t len,
cbl_field_attr_t attr, cbl_encoding_t encoding ) {
cbl_field_t *field = NULL;
if( !(attr & quoted_e) )
{
@ -3266,9 +3391,11 @@ new_literal_add( const char initial[], uint32_t len, enum cbl_field_attr_t attr
field->attr |= attr;
field->data.initial = len > 0? initial : empty;
field->data.capacity = len;
}
if( ! field->internalize() )
{
if( ! field->has_attr(hex_encoded_e) ) {
field->codeset.set(encoding);
if( ! field->internalize() ) {
ERROR_FIELD(field, "inconsistent string literal encoding for '%s'", initial);
}
}
@ -3286,22 +3413,26 @@ new_literal_add( const char initial[], uint32_t len, enum cbl_field_attr_t attr
static temporaries_t temporaries;
cbl_field_t *
temporaries_t::literal( const char value[], uint32_t len, cbl_field_attr_t attr ) {
auto key = literal_an(value, quoted_e == (attr & quoted_e));
temporaries_t::literal( uint32_t len, const char value[],
cbl_field_attr_t attr, cbl_encoding_t encoding ) {
bool is_quoted2 = quoted_e == (attr & quoted_e);
bool is_verbatim = hex_encoded_e == (attr & hex_encoded_e);
auto key = literal_an(value, is_quoted2, is_verbatim);
if( 0 == (attr & hex_encoded_e) ) {
if( ! is_verbatim ) { // TODO: try without this test once National is ready
auto p = literals.find(key);
if( p != literals.end() ) {
cbl_field_t *field = p->second;
return field;
}
}
return literals[key] = new_literal_add(value, len, attr);
return literals[key] = new_literal_add(value, len, attr, encoding);
}
cbl_field_t *
new_literal( uint32_t len, const char initial[], enum cbl_field_attr_t attr ) {
return temporaries.literal(initial, len, attr);
new_literal( uint32_t len, const char initial[],
cbl_field_attr_t attr, cbl_encoding_t encoding ) {
return temporaries.literal(len, initial, attr, encoding);
}
void
@ -3400,6 +3531,11 @@ new_alphanumeric( size_t capacity, const cbl_name_t name = nullptr ) {
return parser_symbol_add2(field);
}
extern os_locale_t os_locale;
const encodings_t cbl_field_t::codeset_t::standard_internal = { iconv_CP1252_e, "CP1252" };
#define standard_internal cbl_field_t::codeset_t::standard_internal
cbl_field_t *
new_temporary( enum cbl_field_type_t type, const char *initial ) {
if( ! initial ) {
@ -3407,8 +3543,9 @@ new_temporary( enum cbl_field_type_t type, const char *initial ) {
return temporaries.acquire(type, initial);
}
if( is_literal(type) ) {
auto field = temporaries.literal(initial,
type == FldLiteralA? quoted_e : none_e);
auto field = temporaries.literal(strlen(initial), initial,
type == FldLiteralA? quoted_e : none_e,
standard_internal.type);
return field;
}
cbl_field_t *field = new_temporary_impl(type, initial);
@ -3451,12 +3588,38 @@ new_temporary_clone( const cbl_field_t *orig) {
temporaries.add(field);
}
field->data = orig->data;
if( field->type == FldNumericBin5 ) field->type = orig->type;
if( field->type == FldNumericBin5 ) {
field->type = orig->type;
field->codeset = orig->codeset;
}
field->attr = intermediate_e;
return parser_symbol_add2(field);
}
/*
* This set of ASCII-like encodings is incomplete and possibly wrong. A
* complete definition would better supported with a Boolean in enccodings_t.
* If it returns false pessimistically, the only consequence is inefficiency:
* the string is processed by iconv(3).
*/
bool
cbl_field_t::holds_ascii() const {
// True if the encoding is a superset of ASCII.
switch(codeset.encoding) {
case ASCII_e:
case CP1252_e:
case iso646_e:
return true;
default:
if( iconv_1026_e <= codeset.encoding &&
codeset.encoding <= iconv_ANSI_X3_4_e ) {
return true;
}
}
return false;
}
bool
cbl_field_t::is_ascii() const {
return std::all_of( data.initial,
@ -3482,8 +3645,6 @@ cbl_field_t::is_ascii() const {
* compilation, if it moves off the default, it adjusts only once, and
* never reverts.
*/
static const char standard_internal[] = "CP1252";
extern os_locale_t os_locale;
static const char *
guess_encoding() {
@ -3500,52 +3661,88 @@ guess_encoding() {
}
}
return standard_internal;
return standard_internal.name;
}
const char *
cbl_field_t::internalize() {
static const char *tocode = standard_internal;
static const char *fromcode = guess_encoding();
static iconv_t cd = iconv_open(tocode, fromcode);
static const size_t noconv = size_t(-1);
static std::map<std::string, iconv_t> tocodes;
if (cd == (iconv_t)-1) {
yywarn("failed %<iconv_open%> tocode = %<%s%> fromcode = %s", tocode, fromcode);
if( ! codeset.valid() ) {
dbgmsg("%s:%d: not converting %s", __func__, __LINE__, data.initial);
return data.initial;
}
bool using_assumed = fromcode == os_locale.assumed;
const char *tocode = __gg__encoding_iconv_name(codeset.encoding);
std::string toname(tocode);
auto p = tocodes.find(toname);
if( p == tocodes.end() ) {
tocodes[toname] = iconv_open(tocode, fromcode);
}
iconv_t cd = tocodes[toname];
if (cd == (iconv_t)-1) {
yywarn("failed %<iconv_open%> tocode = %qs fromcode = %qs", tocode, fromcode);
}
if( fromcode == tocode || has_attr(hex_encoded_e) ) {
return data.initial;
}
if( is_ascii() ) return data.initial;
if( data.capacity == 0 ) {
assert(0 == strlen(data.initial));
return data.initial;
}
if( holds_ascii() && is_ascii() ) return data.initial;
assert(data.capacity > 0);
std::vector<char> output(data.capacity + 2, '\0');
char *out = output.data();
// The final 2 bytes of the output are "!\0". It's a debugging sentinel.
size_t n;
size_t inbytesleft = data.capacity;
size_t outbytesleft = inbytesleft;
char *in = const_cast<char*>(data.initial);
size_t n, inbytesleft = data.capacity, outbytesleft = output.size();
char *out = static_cast<char*>( xcalloc(1, outbytesleft + 2) ), *output = out;
if( !is_literal(this) && inbytesleft < strlen(data.initial) ) {
inbytesleft = strlen(data.initial);
}
const unsigned int in_len = inbytesleft;
assert(fromcode != tocode);
while( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) {
if( !using_assumed ) break; // change only once
fromcode = guess_encoding();
cd = iconv_open(tocode, fromcode);
/*
* If we're currently assuming the source code is encoded according to the
* locale (the default), and there's an iconv failure, try once more using a
* different assumption, that the source code is encoded as CP1252.
*
* This heuristic means that some UTF-8 literals could be converted until a
* CP1252 byte is encountered. We could be stricter about that.
*
* Also possible is a failure to avoid iconv with fromcode and tocode denote
* the same encoding but with different spellings, e.g. CP1252 and CP1252//.
*/
do {
if( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) {
if( fromcode == os_locale.assumed ) {
fromcode = standard_internal.name;
tocodes.clear();
cd = tocodes[toname] = iconv_open(tocode, fromcode);
dbgmsg("%s: trying input encoding %s", __func__, fromcode);
if( fromcode == tocode ) break;
if( fromcode == tocode ) return data.initial; // no conversion required.
n = noconv - 1; // try again
}
}
if( n == 0 ) break;
} while( n != noconv );
if( n == noconv ) {
if( !using_assumed ) {
yywarn("failed to decode '%s' as %s", data.initial, fromcode);
return NULL;
}
size_t i = in_len - inbytesleft;
yywarn("failed to encode %s %qs as %s (%zu of %u bytes left)",
fromcode, data.initial + i, tocode, inbytesleft, in_len);
if( false ) return NULL;
return data.initial;
}
@ -3558,27 +3755,47 @@ cbl_field_t::internalize() {
}
// Replace data.initial only if iconv output differs.
if( 0 != memcmp(data.initial, output.data(), out - output.data()) ) {
assert(out <= output.data() + data.capacity);
if( 0 != memcmp(data.initial, output, out - output) ) {
assert(out <= output + data.capacity);
dbgmsg("%s: converted '%.*s' to %s",
__func__, data.capacity, data.initial, tocode);
struct localspace_t {
char space[4];
size_t len, erc;
explicit localspace_t( iconv_t cd ) {
static char input[1] = { 0x20 };
size_t inbytesleft2 = sizeof(input), outbytesleft2 = sizeof(space);
char *in2 = input, *out2 = space;
int len = int(out - output.data());
char *mem = static_cast<char*>( xcalloc(1, output.size()) );
erc = iconv(cd, &in2, &inbytesleft2, &out2, &outbytesleft2);
len = out2 - space;
}
bool valid() const { return 0 < len && erc != size_t(-1); }
} spc(cd);
// Set the new memory to all blanks, tacking a '!' on the end.
memset(mem, 0x20, output.size() - 1);
mem[ output.size() - 2] = '!';
if( ! spc.valid() ) {
dbgmsg("%s:%d: iconv failed for %s: %s", __func__, __LINE__,
tocode, xstrerror(errno));
ERROR_FIELD(this, "iconv failed: %s", xstrerror(errno));
return data.initial;
}
assert( 0 < spc.len && spc.valid() );
if( is_literal(this) ) {
data.capacity = len; // trailing '!' will be overwritten
data.capacity = out - output; // trailing '!' will be overwritten
}
memcpy(mem, output.data(), len); // copy only as much as iconv converted
// Pad with trailing blanks, tacking a '!' on the end.
for( const char *eout = output + data.capacity;
out < eout;
out += spc.len ) {
memcpy(out, spc.space, spc.len);
}
out[0] = '!';
assert(out[1] == '\0');
free(const_cast<char*>(data.initial));
data.initial = mem;
data.initial = output;
} else {
free(output);
}
return data.initial;
@ -4597,6 +4814,8 @@ cbl_file_key_t::str() const {
*/
void
cbl_file_t::deforward() {
const size_t ifile( symbol_index(symbol_elem_of(this)) );
if( user_status ) {
user_status = symbol_forward_to(user_status);
@ -4608,7 +4827,7 @@ cbl_file_t::deforward() {
}
for( auto p = keys; p < keys + nkey; p++ ) {
p->deforward( symbol_index(symbol_elem_of(this)) );
p->deforward(ifile);
}
}
@ -4728,7 +4947,6 @@ has_value( cbl_field_type_t type ) {
case FldForward:
case FldSwitch:
case FldDisplay:
case FldBlob:
return false;
case FldIndex:
case FldPointer:

View File

@ -118,7 +118,6 @@ is_numeric( cbl_field_type_t type ) {
case FldSwitch:
case FldDisplay:
case FldPointer: // not numeric because not computable, only settable
case FldBlob:
return false;
// These types are computable or, in the case of FldIndex, may be
// arbitrarily set and incremented.
@ -500,8 +499,12 @@ struct cbl_subtable_t {
size_t offset, isym;
};
const char * __gg__encoding_iconv_name( cbl_encoding_t encoding );
bool is_elementary( enum cbl_field_type_t type );
cbl_encoding_t current_encoding( char a_or_n );
/* In cbl_field_t:
* 'offset' is overloaded for FldAlphanumeric/temporary/intermediate variables
* For such variables, offset is a copy of the initial capacity. This is in
@ -512,13 +515,72 @@ bool is_elementary( enum cbl_field_type_t type );
struct cbl_field_t {
size_t offset;
enum cbl_field_type_t type, usage;
cbl_field_type_t type, usage;
uint64_t attr;
static_assert(sizeof(attr) == sizeof(cbl_field_attr_t), "wrong attr size");
size_t parent; // symbols[] index of our parent
size_t our_index; // symbols[] index of this field, set in symbol_add()
uint32_t level;
struct cbl_occurs_t occurs;
cbl_occurs_t occurs;
struct codeset_t {
static const encodings_t standard_internal;
cbl_encoding_t encoding;
size_t alphabet; // unlikely
explicit codeset_t(cbl_encoding_t encoding = custom_encoding_e,
size_t alphabet = 0) // combination means "not set"
: encoding(encoding), alphabet(alphabet)
{}
bool valid() const {
return
(alphabet == 0 && encoding != custom_encoding_e)
||
(alphabet != 0 && encoding == custom_encoding_e);
}
bool set( cbl_encoding_t encoding, size_t alphabet = 0 ) {
assert(encoding <= iconv_YU_e);
if( ! valid() ) { // setting first time
this->encoding = encoding;
this->alphabet = alphabet;
return valid();
}
// DUBNER override. Encoding has to change when
// 01 FOO VALUE ZERO. Just 0 is okay; ZERO is not.
this->encoding = encoding;
return this->encoding == encoding && this->alphabet == alphabet;
}
bool set( const char picture_fragment[] = nullptr) {
if( ! picture_fragment ) {
cbl_encoding_t currenc = current_encoding('A');
bool retval = set(currenc);
return retval;
}
size_t len = strlen(picture_fragment);
std::vector<char> frag(len);
std::transform(picture_fragment, picture_fragment + len,
frag.begin(), ftoupper);
switch(frag[0]) {
case 'A': case 'X': case '9':
return set(current_encoding('A'));
case 'N': case 'U':
if( std::all_of(frag.begin(), frag.end(),
[first = frag[0]]( char ch ) {
return first == ch;
} ) ) {
// All N's indicates National; all U's indicates UTF-8.
auto enc = frag[0] == 'N'? current_encoding('N') : UTF8_e;
return set(enc);
}
return false; // They all must be the same.
}
gcc_unreachable();
}
cbl_encoding_t set() const {
return valid()? encoding : cbl_encoding_t(-1);
}
const char *name() const {
return valid()? __gg__encoding_iconv_name(encoding) : "nocoding";
}
} codeset;
int line; // Where it appears in the file.
cbl_name_t name; // Appears in the GIMPLE dump.
size_t file; // nonzero if field is 01 record for a file
@ -527,18 +589,45 @@ struct cbl_field_t {
cbl_ffi_crv_t crv; // Using by C/R/V in Linkage
linkage_t() : optional(false), crv(by_default_e) {}
} linkage;
struct cbl_field_data_t data;
cbl_field_data_t data;
tree var_decl_node; // Reference to the pointer to the cblc_field_t structure
tree data_decl_node; // Reference to the run-time data of the COBOL variable
// // For linkage_e variables, data_decl_node is a pointer
// // to the data, rather than the actual data
cbl_field_t()
: offset(0), type(FldInvalid), usage(FldInvalid), attr(0)
, parent(0), our_index(0), level(0)
, line(0), name(""), file(0)
, var_decl_node(nullptr), data_decl_node(nullptr)
{}
cbl_field_t( cbl_field_type_t type, uint64_t attr,
const cbl_field_data_t& data,
uint32_t level = 0, const cbl_name_t name = "", int line = 0 )
: offset(0), type(type), usage(FldInvalid), attr(attr)
, parent(0), our_index(0), level(level)
, line(line), file(0), data(data)
, var_decl_node(nullptr), data_decl_node(nullptr)
{
gcc_assert(strlen(name) < sizeof this->name);
strcpy(this->name, name);
}
cbl_field_t( cbl_field_type_t type, uint32_t level, int line, uint64_t attr = 0 )
: offset(0), type(type), usage(FldInvalid), attr(attr)
, parent(0), our_index(0), level(level)
, line(line), name(""), file(0)
, var_decl_node(nullptr), data_decl_node(nullptr)
{}
void set_linkage( cbl_ffi_crv_t crv, bool optional ) {
linkage.optional = optional;
linkage.crv = crv;
assert(crv != by_content_e);
}
bool holds_ascii() const;
inline bool is_typedef() const {
return has_attr(typedef_e);
}
@ -582,6 +671,7 @@ struct cbl_field_t {
attr |= same_as_e;
data = that.data;
codeset = that.codeset;
if( ! (is_typedef || that.type == FldClass) ) {
data.initial = NULL;
@ -1202,27 +1292,40 @@ cbl_field_t * new_temporary_clone( const cbl_field_t *orig);
cbl_field_t * keep_temporary( cbl_field_type_t type );
cbl_field_t * new_literal( uint32_t len, const char initial[],
enum cbl_field_attr_t attr = none_e );
cbl_field_attr_t attr,
cbl_encoding_t encoding = ASCII_e );
static inline cbl_field_t *
new_literal( uint32_t len, const char initial[] ) {
return new_literal(len, initial, none_e);
}
void symbol_temporaries_free();
class temporaries_t {
friend void symbol_temporaries_free();
struct literal_an {
bool is_quoted;
bool is_quoted, is_verbatim; // verbatim: don't use codeset
std::string value;
literal_an() : is_quoted(false), value("???") {}
literal_an( const char value[], bool is_quoted )
: is_quoted(is_quoted), value(value) {}
literal_an() : is_quoted(false), is_verbatim(false), value("???") {}
literal_an( const char value[], bool is_quoted, bool is_verbatim = false )
: is_quoted(is_quoted), is_verbatim(is_verbatim), value(value) {}
literal_an( const literal_an& that )
: is_quoted(that.is_quoted), value(that.value) {}
: is_quoted(that.is_quoted),
is_verbatim(that.is_verbatim),
value(that.value)
{}
literal_an& operator=( const literal_an& that ) {
is_quoted = that.is_quoted;
is_verbatim = that.is_verbatim;
value = that.value;
return *this;
}
bool operator<( const literal_an& that ) const {
if( value == that.value ) { // alpha before numeric
if( is_quoted == that.is_quoted ) { // verbatim before not
return (is_verbatim? 0 : 1) < (that.is_verbatim? 0 : 1);
}
return (is_quoted? 0 : 1) < (that.is_quoted? 0 : 1);
}
return value < that.value;
@ -1235,7 +1338,8 @@ class temporaries_t {
fieldmap_t used, freed;
public:
cbl_field_t * literal( const char value[], uint32_t len, cbl_field_attr_t attr = none_e );
cbl_field_t * literal( uint32_t len, const char value[],
cbl_field_attr_t attr, cbl_encoding_t encoding );
cbl_field_t * reuse( cbl_field_type_t type );
cbl_field_t * acquire( cbl_field_type_t type, const cbl_name_t name = nullptr );
cbl_field_t * add( cbl_field_t *field );
@ -1338,7 +1442,6 @@ struct function_descr_t {
case FldForward:
case FldIndex:
case FldSwitch:
case FldBlob:
return '?';
case FldPointer:
return 'O';
@ -1410,6 +1513,13 @@ struct cbl_special_name_t {
char * hex_decode( const char text[] );
/*
* For a custom alphabet of single-byte encoding, cbl_alphabet_t::alphabet
* holds the collation position of each encoded value.
* If 'A' sorts first (after LOW-VALUE), then alphabet['A'] == 1.
* If the encoding is ASCII, then 'A' is 65 and alphabet[ 65] == 1.
* If the encoding is EBCDIC CP1140, then 'A' is 193 and alphabet[193] == 1.
*/
struct cbl_alphabet_t {
YYLTYPE loc;
cbl_name_t name;
@ -1482,6 +1592,7 @@ struct cbl_alphabet_t {
void also( const YYLTYPE& loc, size_t ch );
bool assign( const YYLTYPE& loc, unsigned char ch, unsigned char value );
void reencode();
static const char *
encoding_str( cbl_encoding_t encoding ) {
@ -1489,7 +1600,13 @@ struct cbl_alphabet_t {
case ASCII_e: return "ascii";
case iso646_e: return "iso646";
case EBCDIC_e: return "ebcdic";
case UTF8_e: return "utf8";
case custom_encoding_e: return "custom";
default:
{
auto p = __gg__encoding_iconv_name( encoding );
if( p ) return p;
}
}
return "???";
}
@ -1644,6 +1761,13 @@ struct cbl_file_t {
size_t user_status; // index into symbol table for file status
size_t vsam_status; // index into symbol table for vsam status PIC X(6)
size_t record_length; // DEPENDS ON
struct codeset_t {
cbl_encoding_t encoding;
size_t alphabet; // unlikely
explicit codeset_t(cbl_encoding_t encoding = CP1252_e, size_t alphabet = 0)
: encoding(encoding), alphabet(alphabet)
{}
} codeset;
int line;
cbl_name_t name;
cbl_sortreturn_t *addresses; // Used during parser_return_start, et al.

View File

@ -205,6 +205,7 @@ field_structure( symbol_elem_t& sym ) {
if( !is_data_field(sym) ) return none;
cbl_field_t *field = cbl_field_of(&sym);
assert(field->type != FldForward); // eliminated by is_data_field
symbol_map_t::key_type key( sym.program, field->name, field->parent );
symbol_map_t::value_type elem( key, std::vector<size_t>() );
@ -232,16 +233,6 @@ field_structure( symbol_elem_t& sym ) {
return elem;
}
void erase_symbol_map_fwds( size_t beg ) {
for( auto p = symbols_begin(beg); p < symbols_end(); p++ ) {
if( p->type != SymField ) continue;
const auto& field(*cbl_field_of(p));
if( field.type == FldForward ) {
symbol_map.erase( sym_name_t(p->program, field.name, field.parent) );
}
}
}
void
build_symbol_map() {
static size_t beg = 0;

File diff suppressed because it is too large Load Diff

View File

@ -61,6 +61,7 @@
#include "../../libgcobol/io.h"
#include "genapi.h"
#include "genutil.h"
#include "../../libgcobol/charmaps.h"
#pragma GCC diagnostic ignored "-Wunused-result"
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
@ -323,8 +324,6 @@ cbl_field_type_str( enum cbl_field_type_t type )
return "FldSwitch";
case FldPointer:
return "FldPointer";
case FldBlob:
return "FldBlob";
}
cbl_internal_error("%s:%d: invalid %<symbol_type_t%> %d", __func__, __LINE__, type);
return "???";
@ -613,7 +612,6 @@ is_elementary( enum cbl_field_type_t type )
case FldForward:
case FldIndex:
case FldSwitch:
case FldBlob:
return false;
case FldPointer:
case FldAlphanumeric:
@ -805,6 +803,7 @@ symbol_field_type_update( cbl_field_t *field,
// type matches itself
if( field->type == candidate ) {
if( is_usage ) field->usage = candidate;
field->codeset.set();
return true;
}
if( is_usage && field->usage == candidate ) return true;
@ -831,7 +830,6 @@ symbol_field_type_update( cbl_field_t *field,
*/
if( is_usage ) {
switch(field->type) {
case FldBlob:
case FldDisplay:
gcc_unreachable(); // type is never just "display"
break;
@ -882,11 +880,24 @@ symbol_field_type_update( cbl_field_t *field,
case FldInvalid:
field->type = candidate;
field->attr |= numeric_group_attrs(field);
// update encoding
switch( field->type ) {
case FldNumericDisplay:
case FldAlphaEdited:
case FldNumericEdited:
{
bool retval = field->codeset.set();
return retval;
}
default:
break;
}
return true;
case FldDisplay:
if( is_displayable(candidate) ) {
field->type = candidate;
field->attr |= numeric_group_attrs(field);
if( ! field->codeset.valid() ) return field->codeset.set();
return true;
}
break;
@ -897,6 +908,7 @@ symbol_field_type_update( cbl_field_t *field,
field->clear_attr(all_x_e);
field->type = field->usage;
field->attr |= numeric_group_attrs(field);
if( ! field->codeset.valid() ) return field->codeset.set();
return true;
case FldNumericDisplay:
case FldNumericEdited:
@ -908,7 +920,6 @@ symbol_field_type_update( cbl_field_t *field,
case FldForward:
case FldSwitch:
case FldPointer:
case FldBlob:
// invalid usage value
gcc_unreachable();
break;
@ -1083,10 +1094,20 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
if( has_attr(all_alpha_e) ) {
bool alpha_value = fig != zero_value_e;
// In order to check for all alphabetic characters, we have to convert
// data.initial back to ASCII:
size_t outchars;
char *initial = __gg__iconverter(codeset.encoding,
DEFAULT_CHARMAP_SOURCE,
data.initial,
data.capacity,
&outchars);
if( fig == normal_value_e ) {
alpha_value = std::all_of( data.initial,
data.initial +
strlen(data.initial),
alpha_value = std::all_of( initial,
initial +
data.capacity,
[]( char ch ) {
return ISSPACE(ch) ||
ISPUNCT(ch) ||
@ -1094,7 +1115,7 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
}
if( ! alpha_value ) {
error_msg(loc, "alpha-only %s VALUE '%s' contains non-alphabetic data",
name, fig == zero_value_e? cbl_figconst_str(fig) : data.initial);
name, fig == zero_value_e? cbl_figconst_str(fig) : initial);
}
}
@ -1262,7 +1283,6 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
return false;
// parser should not allow the following types here
case FldForward:
case FldBlob:
default:
if( sizeof(matrix[0]) < field->type ) {
cbl_internal_error("logic error: MOVE %s %s invalid type:",
@ -1292,8 +1312,16 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
case 0:
if( src->type == FldLiteralA && is_numericish(tgt) && !is_literal(tgt) ) {
// Allow if input string is an integer.
const char *p = src->data.initial, *pend = p + src->data.capacity;
if( p[0] == '+' || p[0] == '-' ) p++;
size_t outcount;
char *in_ascii = static_cast<char *>(xmalloc(4 * src->data.capacity));
const char *in_asciip = __gg__iconverter( src->codeset.encoding,
DEFAULT_CHARMAP_SOURCE,
src->data.initial,
src->data.capacity,
&outcount );
memcpy(in_ascii, in_asciip, outcount);
const char *p = in_ascii, *pend = p + src->data.capacity;
if( (p[0] == ascii_plus) || (p[0] == ascii_minus) ) p++;
retval = std::all_of( p, pend, isdigit );
if( yydebug && ! retval ) {
auto bad = std::find_if( p, pend,
@ -1302,6 +1330,7 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
HOST_SIZE_T_PRINT_UNSIGNED,
__func__, __LINE__, *bad, (fmt_size_t)(bad - p));
}
free(in_ascii);
}
break;
case 1:
@ -1340,8 +1369,6 @@ bool
valid_picture( enum cbl_field_type_t type, const char picture[] )
{
switch(type) {
case FldBlob:
gcc_unreachable(); // can't get here via the parser
case FldInvalid:
case FldGroup:
case FldLiteralA:
@ -1386,7 +1413,6 @@ uint32_t
type_capacity( enum cbl_field_type_t type, uint32_t digits )
{
switch(type) {
case FldBlob: gcc_unreachable();
case FldInvalid:
case FldGroup:
case FldAlphanumeric:
@ -2085,11 +2111,6 @@ template <typename LOC>
static void
gcc_location_set_impl( const LOC& loc ) {
// Set the position to the first line & column in the location.
if( getenv("KILROY") )
{
fprintf(stderr, "********** KILROY %d\n", loc.first_line);
}
static location_t loc_m_1 = 0;
token_location = linemap_line_start( line_table, loc.first_line, 80 );
@ -2503,7 +2524,7 @@ cbl_unimplementedw(const char *gmsgid, ...) {
auto_diagnostic_group d;
va_list ap;
va_start(ap, gmsgid);
emit_diagnostic_valist( diagnostics::kind::sorry,
emit_diagnostic_valist( diagnostics::kind::warning,
token_location, option_zero, gmsgid, &ap );
va_end(ap);
}

View File

@ -13,11 +13,11 @@
*> { dg-output { (\n|\r\n|\r)} }
*> { dg-output {There should be no garbage after character 32(\n|\r\n|\r)} }
*> { dg-output {\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\*\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-(\n|\r\n|\r)} }
*> { dg-output {.* Bundesstra.e (\n|\r\n|\r)} }
*> { dg-output {.* Bundesstra.e (\n|\r\n|\r)} }
*> { dg-output {.*(\n|\r\n|\r)} }
*> { dg-output {.*(\n|\r\n|\r)} }
*> { dg-output { (\n|\r\n|\r)} }
*> { dg-output {There should be no spaces before the final quote(\n|\r\n|\r)} }
*> { dg-output {".* Bundesstra.e"(\n|\r\n|\r)} }
*> { dg-output {".*"(\n|\r\n|\r)} }
*> { dg-output { (\n|\r\n|\r)} }
*> { dg-output { IsLow ""(\n|\r\n|\r)} }
*> { dg-output { IsZero "000"(\n|\r\n|\r)} }
@ -39,7 +39,7 @@
88 CheckZero VALUE ZERO.
88 CheckQuotes VALUE QUOTE.
88 CheckBob VALUE "bob".
88 CheckBinary VALUE X"000102". *> { dg-warning embedded }
88 CheckBinary VALUE X"000102". *>
01 000VARL PIC XXX VALUE LOW-VALUE.
01 000VARS PIC XXX VALUE SPACE.
01 000VARQ PIC XXX VALUE QUOTE.

File diff suppressed because it is too large Load Diff

View File

@ -103,11 +103,16 @@
Stay alert! */
extern int __gg__decimal_point ;
extern int __gg__decimal_separator ;
extern int __gg__quote_character ;
extern int __gg__low_value_character ;
extern int __gg__high_value_character ;
extern char **__gg__currency_signs ;
extern int __gg__default_currency_sign;
extern char *__gg__ct_currency_signs[256]; // Compile-time currency signs
extern bool __gg__ebcdic_codeset_in_use;
#define internal_is_ebcdic (__gg__ebcdic_codeset_in_use)
extern unsigned short const *__gg__internal_codeset_map;
#define NULLCH ('\0')
#define DEGENERATE_HIGH_VALUE 0xFF
@ -197,84 +202,6 @@ extern unsigned short const *__gg__internal_codeset_map;
#define ascii_newline ((uint8_t)('\n'))
#define ascii_return ((uint8_t)('\r'))
#define internal_space ((uint8_t)__gg__internal_codeset_map[ascii_space])
#define internal_zero ((uint8_t)__gg__internal_codeset_map[ascii_zero])
#define internal_period ((uint8_t)__gg__internal_codeset_map[ascii_period])
#define internal_comma ((uint8_t)__gg__internal_codeset_map[ascii_comma])
#define internal_dquote ((uint8_t)__gg__internal_codeset_map[ascii_dquote])
#define internal_asterisk ((uint8_t)__gg__internal_codeset_map[ascii_asterisk])
#define internal_plus ((uint8_t)__gg__internal_codeset_map[ascii_plus])
#define internal_minus ((uint8_t)__gg__internal_codeset_map[ascii_minus])
#define internal_cr ((uint8_t)__gg__internal_codeset_map[ascii_cr])
#define internal_ff ((uint8_t)__gg__internal_codeset_map[ascii_ff])
#define internal_newline ((uint8_t)__gg__internal_codeset_map[ascii_newline])
#define internal_return ((uint8_t)__gg__internal_codeset_map[ascii_return])
#define internal_0 ((uint8_t)__gg__internal_codeset_map[ascii_0])
#define internal_1 ((uint8_t)__gg__internal_codeset_map[ascii_1])
#define internal_2 ((uint8_t)__gg__internal_codeset_map[ascii_2])
#define internal_3 ((uint8_t)__gg__internal_codeset_map[ascii_3])
#define internal_4 ((uint8_t)__gg__internal_codeset_map[ascii_4])
#define internal_5 ((uint8_t)__gg__internal_codeset_map[ascii_5])
#define internal_6 ((uint8_t)__gg__internal_codeset_map[ascii_6])
#define internal_7 ((uint8_t)__gg__internal_codeset_map[ascii_7])
#define internal_8 ((uint8_t)__gg__internal_codeset_map[ascii_8])
#define internal_9 ((uint8_t)__gg__internal_codeset_map[ascii_9])
#define internal_colon ((uint8_t)__gg__internal_codeset_map[ascii_colon])
#define internal_query ((uint8_t)__gg__internal_codeset_map[ascii_query])
#define internal_A ((uint8_t)__gg__internal_codeset_map[ascii_A])
#define internal_B ((uint8_t)__gg__internal_codeset_map[ascii_B])
#define internal_C ((uint8_t)__gg__internal_codeset_map[ascii_C])
#define internal_D ((uint8_t)__gg__internal_codeset_map[ascii_D])
#define internal_E ((uint8_t)__gg__internal_codeset_map[ascii_E])
#define internal_F ((uint8_t)__gg__internal_codeset_map[ascii_F])
#define internal_G ((uint8_t)__gg__internal_codeset_map[ascii_G])
#define internal_H ((uint8_t)__gg__internal_codeset_map[ascii_H])
#define internal_I ((uint8_t)__gg__internal_codeset_map[ascii_I])
#define internal_J ((uint8_t)__gg__internal_codeset_map[ascii_J])
#define internal_K ((uint8_t)__gg__internal_codeset_map[ascii_K])
#define internal_L ((uint8_t)__gg__internal_codeset_map[ascii_L])
#define internal_M ((uint8_t)__gg__internal_codeset_map[ascii_M])
#define internal_N ((uint8_t)__gg__internal_codeset_map[ascii_N])
#define internal_O ((uint8_t)__gg__internal_codeset_map[ascii_O])
#define internal_P ((uint8_t)__gg__internal_codeset_map[ascii_P])
#define internal_Q ((uint8_t)__gg__internal_codeset_map[ascii_Q])
#define internal_R ((uint8_t)__gg__internal_codeset_map[ascii_R])
#define internal_S ((uint8_t)__gg__internal_codeset_map[ascii_S])
#define internal_T ((uint8_t)__gg__internal_codeset_map[ascii_T])
#define internal_U ((uint8_t)__gg__internal_codeset_map[ascii_U])
#define internal_V ((uint8_t)__gg__internal_codeset_map[ascii_V])
#define internal_W ((uint8_t)__gg__internal_codeset_map[ascii_W])
#define internal_X ((uint8_t)__gg__internal_codeset_map[ascii_X])
#define internal_Y ((uint8_t)__gg__internal_codeset_map[ascii_Y])
#define internal_Z ((uint8_t)__gg__internal_codeset_map[ascii_Z])
#define internal_a ((uint8_t)__gg__internal_codeset_map[ascii_a])
#define internal_b ((uint8_t)__gg__internal_codeset_map[ascii_b])
#define internal_c ((uint8_t)__gg__internal_codeset_map[ascii_c])
#define internal_d ((uint8_t)__gg__internal_codeset_map[ascii_d])
#define internal_e ((uint8_t)__gg__internal_codeset_map[ascii_e])
#define internal_f ((uint8_t)__gg__internal_codeset_map[ascii_f])
#define internal_g ((uint8_t)__gg__internal_codeset_map[ascii_g])
#define internal_h ((uint8_t)__gg__internal_codeset_map[ascii_h])
#define internal_i ((uint8_t)__gg__internal_codeset_map[ascii_i])
#define internal_j ((uint8_t)__gg__internal_codeset_map[ascii_j])
#define internal_k ((uint8_t)__gg__internal_codeset_map[ascii_k])
#define internal_l ((uint8_t)__gg__internal_codeset_map[ascii_l])
#define internal_m ((uint8_t)__gg__internal_codeset_map[ascii_m])
#define internal_n ((uint8_t)__gg__internal_codeset_map[ascii_n])
#define internal_o ((uint8_t)__gg__internal_codeset_map[ascii_o])
#define internal_p ((uint8_t)__gg__internal_codeset_map[ascii_p])
#define internal_q ((uint8_t)__gg__internal_codeset_map[ascii_q])
#define internal_r ((uint8_t)__gg__internal_codeset_map[ascii_r])
#define internal_s ((uint8_t)__gg__internal_codeset_map[ascii_s])
#define internal_t ((uint8_t)__gg__internal_codeset_map[ascii_t])
#define internal_u ((uint8_t)__gg__internal_codeset_map[ascii_u])
#define internal_v ((uint8_t)__gg__internal_codeset_map[ascii_v])
#define internal_w ((uint8_t)__gg__internal_codeset_map[ascii_w])
#define internal_x ((uint8_t)__gg__internal_codeset_map[ascii_x])
#define internal_y ((uint8_t)__gg__internal_codeset_map[ascii_y])
#define internal_z ((uint8_t)__gg__internal_codeset_map[ascii_z])
enum text_device_t
{
td_default_e,
@ -290,7 +217,6 @@ enum text_codeset_t
cs_cp1140_e
};
extern unsigned char __gg__data_space[1] ;
extern unsigned char __gg__data_low_values[1] ;
extern unsigned char __gg__data_zeros[1] ;
@ -315,56 +241,197 @@ extern const unsigned short __gg__ebcdic_to_cp1252_collation[256];
// These routines convert a single ASCII character to either ASCII or EBCDIC
extern "C"
char __gg__ascii_to_ascii_chr(char ch);
extern "C"
char __gg__ascii_to_ebcdic_chr(char ch);
extern "C"
char (*__gg__ascii_to_internal_chr)(char);
#define ascii_to_internal(a) ((*__gg__ascii_to_internal_chr)(a))
extern "C"
void __gg__ascii_to_ascii(char *str, size_t length);
extern "C"
void __gg__ascii_to_ebcdic(char *str, size_t length);
extern "C"
void (*__gg__ascii_to_internal_str)(char *str, size_t length);
#define ascii_to_internal_str(a, b) ((*__gg__ascii_to_internal_str)((a), (b)))
extern "C"
char *__gg__raw_to_ascii(char **dest, size_t *dest_size, const char *str, size_t length);
extern "C"
char *__gg__raw_to_ebcdic(char **dest, size_t *dest_size, const char *in, size_t length);
extern "C"
char *(*__gg__raw_to_internal)(char **dest, size_t *dest_length, const char *in, size_t length);
#define raw_to_internal(a, b, c, d) ((*__gg__raw_to_internal)((a), (b), (c), (d)))
extern "C"
char *__gg__ascii_to_console(char **dest, size_t *dest_size, char const * const str, const size_t length);
extern "C"
char *__gg__ebcdic_to_console(char **dest, size_t *dest_size, char const * const str, const size_t length);
extern "C"
char *(*__gg__internal_to_console_cm)(char **dest, size_t *dest_size, const char *in, size_t length);
#define internal_to_console(a, b, c, d) ((*__gg__internal_to_console_cm)((a), (b), (c), (d)))
extern "C"
void __gg__console_to_ascii(char * const str, size_t length);
extern "C"
void __gg__console_to_ebcdic(char * const str, size_t length);
extern "C"
void (*__gg__console_to_internal_cm)(char * const str, size_t length);
#define console_to_internal(a, b) ((*__gg__console_to_internal_cm)((a), (b)))
extern "C"
void __gg__ebcdic_to_ascii(char *str, const size_t length);
extern "C"
void (*__gg__internal_to_ascii)(char *str, size_t length);
#define internal_to_ascii(a, b) ((*__gg__internal_to_ascii)((a), (b)))
extern "C" void __gg__set_internal_codeset(int use_ebcdic);
extern "C"
void __gg__text_conversion_override(text_device_t device,
text_codeset_t codeset);
const char * __gg__encoding_iconv_name( cbl_encoding_t encoding );
cbl_encoding_t __gg__encoding_iconv_type( const char *name );
char * __gg__iconverter(cbl_encoding_t from,
cbl_encoding_t to,
const char *str,
size_t length,
size_t *outlength);
#define DEFAULT_CHARMAP_SOURCE (iconv_CP1252_e)
class charmap_t
{
private:
// This is the encoding of this character map
cbl_encoding_t m_encoding;
enum
{
sign_type_ascii,
sign_type_ebcdic,
} m_numeric_sign_type;
// This map retains the ASCII-to-encoded value in m_encoding, so that iconv
// need be called but once for each ASCII value.
std::unordered_map<int, int>m_map_of_encodings;
void determine_sign_type()
{
if( mapped_character(ascii_0) & 0x80 )
{
m_numeric_sign_type = sign_type_ebcdic;
}
else
{
m_numeric_sign_type = sign_type_ascii;
}
}
public:
explicit charmap_t(cbl_encoding_t e) : m_encoding(e)
{
determine_sign_type();
}
explicit charmap_t(uint16_t e) : m_encoding(static_cast<cbl_encoding_t>(e))
{
determine_sign_type();
}
int mapped_character(int ch)
{
// The assumption is that anybody calling this routine is providing
// a single-byte character in the DEFAULT_CHARMAP_SOURCE encoding. We
// return the equivalent character in the m_encoding
int retval;
std::unordered_map<int, int>::const_iterator it =
m_map_of_encodings.find(ch);
if( it != m_map_of_encodings.end() )
{
retval = it->second;
}
else
{
retval = 0;
size_t outlength = 0;
const char *mapped = __gg__iconverter(DEFAULT_CHARMAP_SOURCE,
m_encoding,
PTRCAST(char, &ch),
1,
&outlength);
memcpy(&retval, mapped, outlength);
m_map_of_encodings[ch] = retval;
}
return retval;
}
int decimal_point()
{
return mapped_character(__gg__decimal_point);
}
int decimal_separator()
{
return mapped_character(__gg__decimal_separator);
}
int quote_character()
{
return mapped_character(__gg__quote_character);
}
int low_value_character()
{
return __gg__low_value_character;
}
int high_value_character()
{
return __gg__high_value_character;
}
int figconst_character(cbl_figconst_t figconst)
{
int const_char = 0; // Head off a compiler warning
switch(figconst)
{
case normal_value_e :
const_char = -1;
break;
case low_value_e :
const_char = low_value_character();
break;
case zero_value_e :
const_char = mapped_character(ascii_0);
break;
case space_value_e :
const_char = mapped_character(ascii_space);
break;
case quote_value_e :
const_char = quote_character();
break;
case high_value_e :
const_char = high_value_character();
break;
case null_value_e:
const_char = '\0';
break;
default:
abort();
break;
}
return const_char;
}
bool
is_digit_negative(int digit)
{
bool retval;
switch(m_numeric_sign_type)
{
case sign_type_ascii:
retval = !!(digit & NUMERIC_DISPLAY_SIGN_BIT_ASCII);
break;
case sign_type_ebcdic:
retval = !!((~digit) & NUMERIC_DISPLAY_SIGN_BIT_EBCDIC);
break;
}
return retval;
}
int
set_digit_negative(int digit, bool is_negative)
{
switch(m_numeric_sign_type)
{
case sign_type_ascii:
if( is_negative )
{
digit |= NUMERIC_DISPLAY_SIGN_BIT_ASCII;
}
else
{
digit &= ~NUMERIC_DISPLAY_SIGN_BIT_ASCII;
}
break;
case sign_type_ebcdic:
if( is_negative )
{
digit &= ~NUMERIC_DISPLAY_SIGN_BIT_EBCDIC;
}
else
{
digit |= NUMERIC_DISPLAY_SIGN_BIT_EBCDIC;
}
break;
}
return digit;
}
bool
is_like_ebcdic() const
{
return m_numeric_sign_type == sign_type_ebcdic;
}
};
charmap_t *__gg__get_charmap(cbl_encoding_t encoding);
#endif

View File

@ -35,6 +35,8 @@
#include <cstdint>
#include <list>
#include "encodings.h"
#define COUNT_OF(X) (sizeof(X) / sizeof(X[0]))
// This constant establishes the maximum number of digits in a fixed point
@ -79,26 +81,8 @@
value is flagged negative by turning on the 0x10 bit, turning the 0xC0 to
0xD0. */
#define EBCDIC_MINUS (0x60)
#define EBCDIC_PLUS (0x4E)
#define EBCDIC_ZERO (0xF0)
#define EBCDIC_NINE (0xF9)
#define PACKED_NYBBLE_PLUS 0x0C
#define PACKED_NYBBLE_MINUS 0x0D
#define PACKED_NYBBLE_UNSIGNED 0x0F
#define NUMERIC_DISPLAY_SIGN_BIT_ASCII 0x40
#define NUMERIC_DISPLAY_SIGN_BIT_EBCDIC 0x10
#define NUMERIC_DISPLAY_SIGN_BIT (__gg__ebcdic_codeset_in_use ? \
NUMERIC_DISPLAY_SIGN_BIT_EBCDIC : \
NUMERIC_DISPLAY_SIGN_BIT_ASCII)
#define SEPARATE_PLUS (__gg__ebcdic_codeset_in_use ? EBCDIC_PLUS : '+')
#define SEPARATE_MINUS (__gg__ebcdic_codeset_in_use ? EBCDIC_MINUS : '-')
#define ZONED_ZERO (__gg__ebcdic_codeset_in_use ? EBCDIC_ZERO : '0')
#define ZONE_SIGNED_EBCDIC (0xC0)
#define NUMERIC_DISPLAY_SIGN_BIT_EBCDIC 0x20
#define LEVEL01 (1)
#define LEVEL49 (49)
@ -106,7 +90,6 @@
// In the __gg__move_literala() call, we piggyback this bit onto the
// cbl_round_t parameter, just to cut down on the number of parameters passed
#define REFER_ALL_BIT 0x80
// Other bits for handling MOVE ALL and so on.
@ -169,7 +152,6 @@ enum cbl_field_type_t {
FldSwitch,
FldDisplay,
FldPointer,
FldBlob,
};
@ -231,7 +213,7 @@ enum cbl_field_attr_t : uint64_t {
leading_e = 0x0004000000, // leading sign (signable_e alone means trailing)
separate_e = 0x0008000000, // separate sign
envar_e = 0x0010000000, // names an environment variable
dnu_1_e = 0x0020000000, // unused: this attribute bit is available
encoded_e = 0x0020000000, // data.initial matches codeset.encoding
bool_encoded_e = 0x0040000000, // data.initial is a boolean string
hex_encoded_e = 0x0080000000, // data.initial is a hex-encoded string
depends_on_e = 0x0100000000, // A group hierachy contains a DEPENDING_ON
@ -264,7 +246,6 @@ enum cbl_figconst_t
#define FIGCONST_MASK (figconst_1_e|figconst_2_e|figconst_4_e)
#define DATASECT_MASK (linkage_e | local_e)
enum cbl_file_org_t {
file_disorganized_e,
file_sequential_e,
@ -370,13 +351,6 @@ enum cbl_arith_format_t {
no_giving_e, giving_e,
corresponding_e };
enum cbl_encoding_t {
ASCII_e, // STANDARD-1 (in caps to avoid conflict with ascii_e in libgcobol.cc)
iso646_e, // STANDARD-2
EBCDIC_e, // NATIVE or EBCDIC
custom_encoding_e,
};
enum cbl_truncation_mode {
trunc_std_e,
trunc_opt_e,

View File

@ -107,7 +107,8 @@ struct cblc_field_t __ggsr___2_##a = { \
.level = 0 , \
.digits = 0 , \
.rdigits = 0 , \
.dummy = 0 , \
.encoding = iconv_CP1252_e \
.alphabet = 0 \
};
unsigned char __gg__data_space[1] = {' '};
@ -122,12 +123,13 @@ struct cblc_field_t __ggsr__space = {
.parent = NULL,
.occurs_lower = 0 ,
.occurs_upper = 0 ,
.attr = 0x284 ,
.attr = quoted_e | constant_e | space_value_e ,
.type = FldAlphanumeric ,
.level = 0 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
struct cblc_field_t __ggsr__spaces = {
@ -141,12 +143,13 @@ struct cblc_field_t __ggsr__spaces = {
.parent = NULL,
.occurs_lower = 0 ,
.occurs_upper = 0 ,
.attr = 0x284 ,
.attr = quoted_e | constant_e | space_value_e ,
.type = FldAlphanumeric ,
.level = 0 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
unsigned char __gg__data_low_values[1] = {'\0'};
@ -166,7 +169,8 @@ struct cblc_field_t __ggsr__low_values = {
.level = 0 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
unsigned char __gg__data_zeros[1] = {'0'};
@ -186,7 +190,8 @@ struct cblc_field_t __ggsr__zeros = {
.level = 0 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
unsigned char __gg__data_high_values[1] = {0xFF};
@ -206,7 +211,8 @@ struct cblc_field_t __ggsr__high_values = {
.level = 0 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
unsigned char __gg__data_quotes[1] = {0xFF};
@ -226,7 +232,8 @@ struct cblc_field_t __ggsr__quotes = {
.level = 0 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
unsigned char __gg__data_nulls[8] = {0,0,0,0,0,0,0,0};
@ -246,7 +253,8 @@ struct cblc_field_t __ggsr__nulls = {
.level = 0 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
unsigned char __gg__data__file_status[2] = {0,0};
@ -266,7 +274,8 @@ struct cblc_field_t __ggsr___file_status = {
.level = 0 ,
.digits = 2 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
@ -287,7 +296,8 @@ struct cblc_field_t __ggsr___14_linage_counter6 = {
.level = 0 ,
.digits = 4 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
@ -308,7 +318,8 @@ struct cblc_field_t __ggsr__upsi_0 = {
.level = 0 ,
.digits = 4 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
short __gg__data_return_code = 0;
@ -328,7 +339,8 @@ struct cblc_field_t __ggsr__return_code = {
.level = 0 ,
.digits = 4 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
unsigned char __gg___data_dev_stdin[] = "/dev/stdin";
@ -348,7 +360,8 @@ struct cblc_field_t __ggsr___dev_stdin = {
.level = 0 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
unsigned char __gg___data_dev_stdout[] = "/dev/stdout";
@ -368,7 +381,8 @@ struct cblc_field_t __ggsr___dev_stdout = {
.level = 0 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
unsigned char __gg___data_dev_stderr[] = "/dev/stderr";
@ -388,7 +402,8 @@ struct cblc_field_t __ggsr___dev_stderr = {
.level = 0 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
unsigned char __gg___data_dev_null[] = "/dev/null";
@ -408,7 +423,8 @@ struct cblc_field_t __ggsr___dev_null = {
.level = 0 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
unsigned char __gg__data_tally[] = {0,0};
@ -428,7 +444,8 @@ struct cblc_field_t __ggsr__tally = {
.level = 0 ,
.digits = 5 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
unsigned char __gg__data_argi[] = {0,0};
@ -448,9 +465,26 @@ struct cblc_field_t __ggsr__argi = {
.level = 0 ,
.digits = 5 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
/**
Special registers used by the XML parser
Special register Implicit definition and usage Content
XML-EVENT PICTURE X(30) USAGE DISPLAY VALUE SPACE *> name of XML event
XML-CODE PICTURE S9(9) USAGE BINARY VALUE ZERO *> status of XML event
XML-TEXT Variable-length alphanumeric item
XML-NTEXT Variable-length national item
**/
/* The following defines storage for the global DEBUG-ITEM:
01 DEBUG-ITEM.
@ -491,7 +525,8 @@ struct cblc_field_t __ggsr__debug_item = {
.level = 01 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
struct cblc_field_t __ggsr__debug_line = {
@ -510,7 +545,8 @@ struct cblc_field_t __ggsr__debug_line = {
.level = 05 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
struct cblc_field_t __ggsr__debug_filler_1 = {
@ -529,7 +565,8 @@ struct cblc_field_t __ggsr__debug_filler_1 = {
.level = 05 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
struct cblc_field_t __ggsr__debug_name = {
@ -548,7 +585,8 @@ struct cblc_field_t __ggsr__debug_name = {
.level = 05 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
struct cblc_field_t __ggsr__debug_filler_2 = {
@ -567,7 +605,8 @@ struct cblc_field_t __ggsr__debug_filler_2 = {
.level = 05 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
struct cblc_field_t __ggsr__debug_sub_1 = {
@ -586,7 +625,8 @@ struct cblc_field_t __ggsr__debug_sub_1 = {
.level = 05 ,
.digits = 4 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
struct cblc_field_t __ggsr__debug_filler_3 = {
@ -605,7 +645,8 @@ struct cblc_field_t __ggsr__debug_filler_3 = {
.level = 05 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
struct cblc_field_t __ggsr__debug_sub_2 = {
@ -624,7 +665,8 @@ struct cblc_field_t __ggsr__debug_sub_2 = {
.level = 05 ,
.digits = 4 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
struct cblc_field_t __ggsr__debug_filler_4 = {
@ -643,7 +685,8 @@ struct cblc_field_t __ggsr__debug_filler_4 = {
.level = 05 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
struct cblc_field_t __ggsr__debug_sub_3 = {
@ -662,7 +705,8 @@ struct cblc_field_t __ggsr__debug_sub_3 = {
.level = 05 ,
.digits = 4 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
struct cblc_field_t __ggsr__debug_filler_5 = {
@ -681,7 +725,8 @@ struct cblc_field_t __ggsr__debug_filler_5 = {
.level = 05 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
struct cblc_field_t __ggsr__debug_contents = {
@ -700,7 +745,8 @@ struct cblc_field_t __ggsr__debug_contents = {
.level = 05 ,
.digits = 0 ,
.rdigits = 0 ,
.dummy = 0 ,
.encoding = iconv_CP1252_e ,
.alphabet = 0 ,
};
#pragma GCC diagnostic pop

1209
libgcobol/encodings.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -61,7 +61,8 @@ typedef struct cblc_field_t
signed char level; // This variable's level in the naming heirarchy
signed char digits; // Digits specified in PIC string; e.g. 5 for 99v999
signed char rdigits; // Digits to the right of the decimal point. 3 for 99v999
int dummy; // GCC seems to want an even number of 32-bit values
cbl_encoding_t encoding; //
int alphabet; // Same as cbl_field_t::codeset::language
} cblc_field_t;
/*
@ -126,6 +127,8 @@ typedef struct cblc_file_t
int recent_char; // This is the most recent char sent to the file
int recent_key;
cblc_file_prior_op_t prior_op; // run-time type is INT
cbl_encoding_t encoding; // We assume size int
int alphabet; // Actually cbl_encoding_t
int dummy;
} cblc_file_t;

View File

@ -197,10 +197,13 @@ get_filename( const cblc_file_t *file,
static size_t fname_size = MINIMUM_ALLOCATION_SIZE;
static char *fname = static_cast<char *>(malloc(MINIMUM_ALLOCATION_SIZE));
massert(fname);
fname = internal_to_console(&fname,
&fname_size,
file->filename,
strlen(file->filename));
if( strlen(file->filename)+1 > fname_size)
{
fname_size = strlen(file->filename)+1 ;
fname = static_cast<char *>(realloc(fname, fname_size));
}
strcpy(fname, file->filename);
if( !is_quoted )
{
@ -320,10 +323,21 @@ __gg__file_init(
int access,
int optional,
size_t record_area_min,
size_t record_area_max)
size_t record_area_max,
cbl_encoding_t encoding,
int alphabet
)
{
if( !(file->flags & file_flag_initialized_e) )
{
if( encoding != iconv_CP1140_e && __gg__ebcdic_codeset_in_use )
{
// This code is to be eliminated when 'encoding' is valid.
encoding = iconv_CP1140_e;
}
charmap_t *charmap = __gg__get_charmap(encoding);
file->name = strdup(name);
file->symbol_table_index = symbol_table_index;
file->filename = NULL ;
@ -343,7 +357,7 @@ __gg__file_init(
file->access = (cbl_file_access_t)access ;
file->errnum = 0 ;
file->io_status = FsSuccess ;
file->delimiter = internal_newline ;
file->delimiter = charmap->mapped_character(ascii_newline) ;
file->flags = file_flag_none_e;
file->flags |= (optional ? file_flag_optional_e : file_flag_none_e)
+ file_flag_initialized_e;
@ -351,6 +365,8 @@ __gg__file_init(
file->record_area_max = record_area_max;
file->prior_read_location = 0;
file->prior_op = file_op_none;
file->encoding = encoding;
file->alphabet = alphabet;
if( file->access == file_inaccessible_e )
{
@ -727,7 +743,10 @@ relative_file_delete(cblc_file_t *file, bool is_random)
goto done;
}
if( presult == 0 || record_marker != internal_newline )
charmap_t *charmap = __gg__get_charmap(file->encoding);
if( presult == 0
|| record_marker != charmap->mapped_character(ascii_newline) )
{
// There isn't a record there for us to delete, which is an error
file->io_status = FsNotFound; // "23"
@ -1428,7 +1447,8 @@ relative_file_start(cblc_file_t *file,
// end of file
goto done;
}
if( record_marker == internal_newline )
charmap_t *charmap = __gg__get_charmap(file->encoding);
if( record_marker == charmap->mapped_character(ascii_newline) )
{
// The record is a valid one
fpos = rfp.record_position;
@ -1881,7 +1901,8 @@ relative_file_rewrite( cblc_file_t *file, size_t length, bool is_random )
goto done;
}
if( presult == 0 || record_marker != internal_newline )
charmap_t *charmap = __gg__get_charmap(file->encoding);
if( presult == 0 || record_marker != charmap->mapped_character(ascii_newline) )
{
// The record is not specified:
file->io_status = FsNotFound; // "23"
@ -2336,7 +2357,8 @@ relative_file_write_varying(cblc_file_t *file,
while( payload_length < file->record_area_max )
{
fputc(internal_space, file->file_pointer);
charmap_t *charmap = __gg__get_charmap(file->encoding);
fputc(charmap->mapped_character(ascii_space), file->file_pointer);
if( handle_ferror(file, __func__, "fputc() error") )
{
goto done;
@ -2377,7 +2399,12 @@ relative_file_write(cblc_file_t *file,
file->io_status = FsErrno;
long necessary_file_size;
const unsigned char achPostamble[] = {internal_cr, internal_newline};
charmap_t *charmap = __gg__get_charmap(file->encoding);
const unsigned char achPostamble[] =
{
(unsigned char)charmap->mapped_character(ascii_cr),
(unsigned char)charmap->mapped_character(ascii_newline)
};
relative_file_parameters rfp;
@ -2425,7 +2452,7 @@ relative_file_write(cblc_file_t *file,
goto done;
}
if( presult == 1 && record_marker == internal_newline )
if( presult == 1 && record_marker == charmap->mapped_character(ascii_newline) )
{
// The slot has something in it already:
file->io_status = FsDupWrite; // "22"
@ -2467,7 +2494,7 @@ relative_file_write(cblc_file_t *file,
size_t padding = file->record_area_max - length;
while(padding--)
{
fputc(internal_space, file->file_pointer);
fputc(charmap->mapped_character(ascii_space), file->file_pointer);
}
}
@ -2502,6 +2529,8 @@ sequential_file_write(cblc_file_t *file,
int lines)
{
// This code handles SEQUENTIAL and LINE SEQUENTIAl
charmap_t *charmap = __gg__get_charmap(file->encoding);
char ch = '\0';
size_t characters_to_write;
@ -2510,7 +2539,7 @@ sequential_file_write(cblc_file_t *file,
if( lines < -1 )
{
// We are using -666 for a form feed
ch = internal_ff; // Form feed
ch = charmap->mapped_character(ascii_ff); // Form feed
lcount = 1;
}
else if( lines == -1 )
@ -2521,12 +2550,12 @@ sequential_file_write(cblc_file_t *file,
else if( lines == 0 )
{
lcount = 1;
ch = internal_return;
ch = charmap->mapped_character(ascii_return);
}
else /* if( lines > 0 ) */
{
lcount = lines;
ch = internal_newline;
ch = charmap->mapped_character(ascii_newline);
}
// By default, we write out the number of characters in the record area
@ -2545,19 +2574,19 @@ sequential_file_write(cblc_file_t *file,
{
// If file-sequential, then trailing spaces are removed:
while( characters_to_write > 0
&& location[characters_to_write-1] == internal_space )
&& location[characters_to_write-1] == charmap->mapped_character(ascii_space) )
{
characters_to_write -= 1;
}
}
if( after && file->org == file_line_sequential_e && ch == internal_newline )
if( after && file->org == file_line_sequential_e && ch == charmap->mapped_character(ascii_newline) )
{
// In general, we terminate every line with a newline. Because this
// line is supposed to start with a newline, we decrement the line
// counter by one if we had already sent one.
if( lcount && ( file->recent_char == internal_newline
|| file->recent_char == internal_ff) )
if( lcount && ( file->recent_char == charmap->mapped_character(ascii_newline)
|| file->recent_char == charmap->mapped_character(ascii_ff)) )
{
lcount -= 1;
}
@ -2575,7 +2604,7 @@ sequential_file_write(cblc_file_t *file,
file->recent_char = ch;
}
// That might have been a formfeed; switch back to newline:
ch = internal_newline;
ch = charmap->mapped_character(ascii_newline);
}
switch(file->org)
@ -2660,12 +2689,12 @@ sequential_file_write(cblc_file_t *file,
{
goto done;
}
file->recent_char = internal_newline;
file->recent_char = charmap->mapped_character(ascii_newline);
}
if( !after )
{
// We did the output BEFORE, so now it's time to send some internal_newlines
// We did the output BEFORE, so now it's time to send some newlines
while(lcount--)
{
fputc(ch, file->file_pointer);
@ -3004,7 +3033,9 @@ line_sequential_file_read( cblc_file_t *file)
while(remaining < file->record_area_max )
{
// Space fill shorty records
file->default_record->data[remaining++] = internal_space;
charmap_t *charmap = __gg__get_charmap(file->encoding);
file->default_record->data[remaining++] =
charmap->mapped_character(ascii_space);
}
if( hit_eof && !characters_read)
@ -3028,7 +3059,7 @@ line_sequential_file_read( cblc_file_t *file)
else // We filled the whole record area. Look ahead one character
{
#ifdef POSSIBLY_IBM
// In this code, unread characters before the internal_newline
// In this code, unread characters before the newline
// are read next time. See page 133 of the IBM Language Reference
// Manual: "If the first unread character is the record delimiter, it
// is discarded. Otherwise, the first unread character becomes the first
@ -3046,7 +3077,7 @@ line_sequential_file_read( cblc_file_t *file)
goto done;
}
#else
// In this code, extra characters before the internal_newline
// In this code, extra characters before the newline
// are read next time are discarded. GnuCOBOL works this way, and
// the Michael Coughlin "Beginning COBOL" examples require this mode.
// The ISO/IEC 2014 standard is silent on the question of LINE
@ -3165,7 +3196,10 @@ sequential_file_read( cblc_file_t *file)
}
if( characters_read < bytes_in_record )
{
memset(file->default_record->data, internal_space, bytes_to_read);
charmap_t *charmap = __gg__get_charmap(file->encoding);
memset( file->default_record->data,
charmap->mapped_character(ascii_space),
bytes_to_read);
file->io_status = FsEofSeq; // "10"
fpos = -1;
goto done;
@ -3472,7 +3506,8 @@ relative_file_read( cblc_file_t *file,
{
goto done;
}
if(record_marker == internal_newline)
charmap_t *charmap = __gg__get_charmap(file->encoding);
if(record_marker == charmap->mapped_character(ascii_newline) )
{
// We have a good record to read:
@ -3953,16 +3988,6 @@ file_indexed_open(cblc_file_t *file)
case '+':
if( file->flags & file_flag_existed_e )
{
// We need to open the file for reading, and build the
// maps for each index:
static size_t fname_size = MINIMUM_ALLOCATION_SIZE;
static char *fname = static_cast<char *>(malloc(fname_size));
massert(fname);
internal_to_console(&fname,
&fname_size,
file->filename, strlen(file->filename));
// We are going to scan through the entire file, building index
// entries for each record.
@ -4102,7 +4127,7 @@ __gg__file_reopen(cblc_file_t *file, int mode_char)
bool all_spaces = true;
for(size_t i=0; i<strlen(file->filename); i++)
{
if( file->filename[i] != internal_space )
if( file->filename[i] != ascii_space )
{
all_spaces = false;
}
@ -4116,16 +4141,9 @@ __gg__file_reopen(cblc_file_t *file, int mode_char)
goto done;
}
static size_t fname_size = MINIMUM_ALLOCATION_SIZE;
static char *fname = static_cast<char *>(malloc(fname_size));
massert(fname)
internal_to_console(&fname,
&fname_size,
file->filename,
strlen(file->filename));
warnx( "%s(): There is no environment variable named \"%s\"\n",
__func__,
fname);
file->filename);
file->io_status = FsNoFile; // "35"
goto done;
}
@ -4323,7 +4341,8 @@ __io__file_open(cblc_file_t *file,
int mode_char,
int is_quoted)
{
// Filename is a pointer to a malloc() buffer.
// 'filename' is a pointer to a malloc() buffer.
// The 'filename' has to be in the system encoding, typically ASCII
// The complication: A filename can be literal text, it can be from a COBOL
// alphanumeric variable, or it can be the name of an environment variable
@ -4401,9 +4420,7 @@ __io__file_close( cblc_file_t *file, int how )
file_indexed_close(file);
}
// The filename can be from a COBOL alphanumeric variable, which means it can
// between a file_close and a subsequent file_open. So, we get rid of it
// here
// The filename was malloced. So, we get rid of it here.
free(file->filename);
file->filename = NULL;
@ -4588,6 +4605,7 @@ __gg__file_open(cblc_file_t *file,
int mode_char,
int is_quoted)
{
// The 'filename' has to be in the system encoding, typically ASCII
gcobol_io_t *functions = gcobol_io_funcs();
functions->Open(file, filename, mode_char, is_quoted);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -52,7 +52,8 @@ extern "C" __int128 __gg__power_of_ten(int n);
extern "C" __int128 __gg__dirty_to_binary_source( const char *dirty,
int length,
int *rdigits);
extern "C" __int128 __gg__dirty_to_binary_internal( const char *dirty,
extern "C" __int128 __gg__dirty_to_binary(const char *dirty,
cbl_encoding_t encoding,
int length,
int *rdigits);
extern "C" __int128 __gg__binary_value_from_field( int *rdigits,
@ -116,7 +117,6 @@ extern "C" void __gg__realloc_if_necessary( char **dest,
size_t *dest_size,
size_t new_size);
extern "C" void __gg__set_exception_file(const cblc_file_t *file);
extern "C" void __gg__internal_to_console_in_place(char *loc, size_t length);
extern "C" __int128 __gg__binary_value_from_qualified_field(int *rdigits,
const cblc_field_t *var,
size_t offset,
@ -129,4 +129,17 @@ extern "C" __int128 __gg__integer_from_qualified_field(cblc_field_t *var,
size_t var_size);
void __gg__abort(const char *msg);
int __gg__fc_char(const cblc_field_t *field);
extern "C"
void __gg__convert_encoding(char *psz,
cbl_encoding_t from,
cbl_encoding_t to );
extern "C"
void __gg__convert_encoding_length(char *pch,
size_t length,
cbl_encoding_t from,
cbl_encoding_t to );
#endif

View File

@ -153,7 +153,7 @@ string_from_combined(const COMBINED &combined)
{
case 1:
// We know that val8 is a single digit
combined_string[combined.start] = combined.val8 + zero_char;;
combined_string[combined.start] = combined.val8 + zero_char;
break;
case 2:
@ -298,9 +298,13 @@ __gg__binary_to_string_ascii(char *result, int digits, __int128 value)
}
bool
__gg__binary_to_string_internal(char *result, int digits, __int128 value)
__gg__binary_to_string_encoded( char *result,
int digits,
__int128 value,
cbl_encoding_t encoding)
{
zero_char = internal_zero;
charmap_t *charmap = __gg__get_charmap(encoding);
zero_char = charmap->mapped_character(ascii_0);
// Note that this routine does not terminate the generated string with a
// NUL. This routine is sometimes used to generate a NumericDisplay string
@ -328,7 +332,6 @@ __gg__binary_to_string_internal(char *result, int digits, __int128 value)
return retval;
}
static
void
packed_from_combined(const COMBINED &combined)
@ -480,7 +483,8 @@ extern "C"
__int128
__gg__numeric_display_to_binary(unsigned char *signp,
const unsigned char *psz,
int n )
int n,
cbl_encoding_t encoding)
{
/* This is specific to numeric display values.
@ -504,6 +508,11 @@ __gg__numeric_display_to_binary(unsigned char *signp,
and so we build up a 128-bit result in three 64-bit pieces, and assemble
them at the end. */
charmap_t *charmap = __gg__get_charmap(encoding);
unsigned char zero = charmap->mapped_character(ascii_0);
unsigned char minus = charmap->mapped_character(ascii_minus);
bool is_ebcdic = (zero == 0xF0);
static const uint8_t lookup[] =
{
@ -575,10 +584,10 @@ __gg__numeric_display_to_binary(unsigned char *signp,
unsigned char sign_byte = *signp;
const unsigned char *mapper;
if( internal_is_ebcdic )
if( is_ebcdic )
{
mapper = from_ebcdic;
if( sign_byte == EBCDIC_MINUS )
if( sign_byte == minus )
{
is_negative = true;
}
@ -595,7 +604,7 @@ __gg__numeric_display_to_binary(unsigned char *signp,
else
{
mapper = from_ascii;
if( sign_byte == '-' )
if( sign_byte == minus )
{
is_negative = true;
}
@ -692,7 +701,6 @@ __gg__numeric_display_to_binary(unsigned char *signp,
// Replace the original sign byte:
*signp = sign_byte; // cppcheck-suppress redundantAssignment
return retval;
}
@ -788,6 +796,7 @@ __gg__packed_to_binary(const unsigned char *psz,
// back up one byte to fetch the sign nybble.
uint8_t sign_nybble = *(psz-1) & 0x0F;
enum{ PACKED_NYBBLE_MINUS= 0x0D};
if( sign_nybble > 9 )
{

View File

@ -35,9 +35,10 @@ bool __gg__binary_to_string_ascii(char *result,
int digits,
__int128 value);
extern "C"
bool __gg__binary_to_string_internal( char *result,
bool __gg__binary_to_string_encoded(char *result,
int digits,
__int128 value);
__int128 value,
cbl_encoding_t encoding);
extern "C"
void __gg__binary_to_packed( unsigned char *result,
@ -47,7 +48,8 @@ void __gg__binary_to_packed( unsigned char *result,
extern "C"
__int128 __gg__numeric_display_to_binary( unsigned char *sign_byte,
const unsigned char *digits,
int ndigits );
int ndigits,
cbl_encoding_t encoding);
extern "C"
__int128

View File

@ -39,22 +39,11 @@
#include "ec.h"
#include "common-defs.h"
#include "valconv.h"
#include "charmaps.h"
#include "valconv.h"
#include "exceptl.h"
int __gg__decimal_point = '.' ;
int __gg__decimal_separator = ',' ;
int __gg__quote_character = '"' ;
int __gg__low_value_character = 0x00 ;
int __gg__high_value_character = 0xFF ;
char **__gg__currency_signs ;
int __gg__default_currency_sign;
char *__gg__ct_currency_signs[256]; // Compile-time currency signs
std::unordered_map<size_t, alphabet_state> __gg__alphabet_states;
extern "C"
@ -113,7 +102,6 @@ __gg__alphabet_create( cbl_encoding_t encoding,
return;
}
static int
expand_picture(char *dest, const char *picture)
{
@ -227,6 +215,10 @@ __gg__string_to_numeric_edited( char * const dest,
int is_negative,
const char *picture)
{
// This routine operates in ASCII space. Life is hard enough without trying
// to do this in EBCDIC, too. So, 'source' and 'picture' are assumed to be
// CP1252
// We need to expand the picture string. We assume that the caller left
// enough room in dest to take the expanded picture string.
@ -449,7 +441,6 @@ __gg__string_to_numeric_edited( char * const dest,
}
}
if( index_s >= decimal_point_index )
{
// We are to the right of the decimal point, and so we
@ -1214,24 +1205,29 @@ got_float:
}
}
}
bool retval = false;
bool retval = false;
return retval;
}
extern "C"
void
__gg__string_to_alpha_edited( char *dest,
cbl_encoding_t dest_encoding,
const char *source,
int slength,
const char *picture)
{
// 'source' is in 'dest' encoding
// Put the PICTURE into the data area. If the caller didn't leave enough
// room, well, poo on them. Said another way; if they specify disaster,
// disaster is what they will get.
// This routine expands picture into dest using ascii characters, but
// replaces them with internal characters
// replaces them with encoded characters
charmap_t *charmap_dest = __gg__get_charmap(dest_encoding);
int destlength = expand_picture(dest, picture);
@ -1246,15 +1242,15 @@ __gg__string_to_alpha_edited( char *dest,
{
case ascii_b: // Replaced with space
case ascii_B:
dest[dindex] = internal_space;
dest[dindex] = charmap_dest->mapped_character(ascii_space);
break;
case ascii_zero: // These are left alone:
dest[dindex] = ascii_to_internal(ascii_zero);
dest[dindex] = charmap_dest->mapped_character(ascii_0);
break;
case ascii_slash:
dest[dindex] = ascii_to_internal(ascii_slash);
dest[dindex] = charmap_dest->mapped_character(ascii_slash);
break;
default:
@ -1267,7 +1263,7 @@ __gg__string_to_alpha_edited( char *dest,
}
else
{
sch = internal_space;
sch = charmap_dest->mapped_character(ascii_space);;
}
dest[dindex] = sch;
}
@ -1323,7 +1319,7 @@ __gg__remove_trailing_zeroes(char *p)
if( strchr(left, '.') )
{
while(*right == '0' || *right == internal_space)
while( *right == '0' )
{
right -= 1;
}

View File

@ -31,16 +31,6 @@
#ifndef __VALCONV_H
#define __VALCONV_H
extern int __gg__decimal_point ;
extern int __gg__decimal_separator ;
extern int __gg__quote_character ;
extern int __gg__low_value_character ;
extern int __gg__high_value_character ;
extern char **__gg__currency_signs ;
extern int __gg__default_currency_sign;
extern char *__gg__ct_currency_signs[256]; // Compile-time currency signs
// All "ordinals" are zero-based ordinals. The COBOL spec's ordinal values
// for ordinary ASCII/EBCDIC ranger from 1 to 256, so we call them zero through
// 255. We use unsigned ints so that when an custom alphabet is described, we
@ -69,6 +59,7 @@ extern "C"
int is_negative,
const char *picture);
void __gg__string_to_alpha_edited(char *dest,
cbl_encoding_t dest_encoding,
const char *source,
int slength,
const char *picture);