mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			147 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
			
		
		
	
	
			147 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
| #!/usr/bin/perl -w
 | |
| # unicode-decomp.pl - script to generate database for java.text.Collator
 | |
| # Copyright (C) 1998, 1999, 2002 Free Software Foundation, Inc.
 | |
| #
 | |
| # This file is part of libjava.
 | |
| # 
 | |
| # This software is copyrighted work licensed under the terms of the
 | |
| # Libjava License.  Please consult the file "LIBJAVA_LICENSE" for
 | |
| # details.
 | |
| 
 | |
| # Code for reading UnicodeData.txt and generating the code for
 | |
| # gnu.java.lang.CharData.  For now, the relevant Unicode definition files
 | |
| # are found in libjava/gnu/gcj/convert/.
 | |
| #
 | |
| # Usage: ./unicode-decomp.pl [-n] <UnicodeData.txt> <decomp.h>
 | |
| #   where <UnicodeData.txt> is obtained from www.unicode.org (named
 | |
| #   UnicodeData-3.0.0.txt for Unicode version 3.0.0), and <CharData.java>
 | |
| #   is the final location of include/java-chardecomp.h.
 | |
| #   As of JDK 1.4, use Unicode version 3.0.0 for best results.
 | |
| #
 | |
| # If this exits with nonzero status, then you must investigate the
 | |
| # cause of the problem.
 | |
| # Diagnostics and other information to stderr.
 | |
| # With -n, the files are not created, but all processing still occurs.
 | |
| 
 | |
| # These maps characters to their decompositions.
 | |
| my %canonical_decomposition = ();
 | |
| my %full_decomposition = ();
 | |
| 
 | |
| # Handle `-n' and open output files.
 | |
| if ($ARGV[0] && $ARGV[0] eq '-n')
 | |
| {
 | |
|     shift @ARGV;
 | |
|     $ARGV[1] = '/dev/null';
 | |
| }
 | |
| die "Usage: $0 <UnicodeData.txt> <java-chardecomp.h>" unless @ARGV == 2;
 | |
| open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n";
 | |
| 
 | |
| # Process the Unicode file.
 | |
| $| = 1;
 | |
| my $count = 0;
 | |
| print STDERR "Parsing attributes file";
 | |
| while (<UNICODE>)
 | |
| {
 | |
|     print STDERR "." unless $count++ % 1000;
 | |
|     chomp;
 | |
|     s/\r//g;
 | |
|     my ($ch, undef, undef, undef, undef, $decomp) = split ';';
 | |
|     $ch = hex($ch);
 | |
| 
 | |
|     if ($decomp ne '')
 | |
|     {
 | |
|         my $is_full = 0;
 | |
|         my @decomp = ();
 | |
|         foreach (split (' ', $decomp))
 | |
|         {
 | |
|             if (/^\<.*\>$/)
 | |
|             {
 | |
|                 $is_full = 1;
 | |
|                 next;
 | |
|             }
 | |
| 	    push (@decomp, hex ($_));
 | |
| 	}
 | |
|         my $s = pack "n*", @decomp;
 | |
|         if ($is_full)
 | |
|         {
 | |
|             $full_decomposition{$ch} = $s;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             $canonical_decomposition{$ch} = $s;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| # Now generate decomposition tables.
 | |
| open DECOMP, "> $ARGV[1]" or die "Can't open output file: $!\n";
 | |
| print STDERR "\nGenerating tables\n";
 | |
| print DECOMP <<EOF;
 | |
| // java-chardecomp.h - Decomposition character tables -*- c++ -*-
 | |
| 
 | |
| #ifndef __JAVA_CHARDECOMP_H__
 | |
| #define __JAVA_CHARDECOMP_H__
 | |
| 
 | |
| 
 | |
| // These tables are automatically generated by the $0
 | |
| // script.  DO NOT EDIT the tables.  Instead, fix the script
 | |
| // and run it again.
 | |
| 
 | |
| // This file should only be included by natCollator.cc
 | |
| 
 | |
| struct decomp_entry
 | |
| {
 | |
|   jchar key;
 | |
|   const char *value;
 | |
| };
 | |
| 
 | |
| EOF
 | |
| 
 | |
| &write_decompositions;
 | |
| 
 | |
| print DECOMP "#endif /* __JAVA_CHARDECOMP_H__ */\n";
 | |
| 
 | |
| close(DECOMP);
 | |
| print STDERR "Done\n";
 | |
| exit;
 | |
| 
 | |
| 
 | |
| # Write a single decomposition table.
 | |
| sub write_single_decomposition($$%)
 | |
| {
 | |
|     my ($name, $is_canon, %table) = @_;
 | |
|     my $first_line = 1;
 | |
|     print DECOMP "static const decomp_entry ${name}_decomposition[] =\n{\n";
 | |
| 
 | |
|     for my $key (0 .. 0xffff)
 | |
|     {
 | |
| 	next if ! defined $table{$key};
 | |
|         print DECOMP ",\n" unless $first_line;
 | |
| 	$first_line = 0;
 | |
| 
 | |
| 	printf DECOMP "  { 0x%04x, \"", $key;
 | |
| 
 | |
| 	# We represent the expansion as a series of bytes, terminated
 | |
| 	# with a double nul.  This is ugly, but relatively
 | |
| 	# space-efficient.  Most expansions are short, but there are a
 | |
| 	# few that are very long (e.g. \uFDFA).  This means that if we
 | |
| 	# chose a fixed-space representation we would waste a lot of
 | |
| 	# space.
 | |
| 	my @expansion = unpack "n*", $table{$key};
 | |
| 	foreach my $char (@expansion)
 | |
| 	{
 | |
| 	    printf DECOMP "\\x%02x\\x%02x", ($char / 256), ($char % 256);
 | |
| 	}
 | |
| 
 | |
| 	print DECOMP "\" }";
 | |
|     }
 | |
| 
 | |
|     print DECOMP "\n};\n\n";
 | |
| }
 | |
| 
 | |
| sub write_decompositions()
 | |
| {
 | |
|     &write_single_decomposition ('canonical', 1, %canonical_decomposition);
 | |
|     &write_single_decomposition ('full', 0, %full_decomposition);
 | |
| }
 |