ISO_8859_1.java, [...]: Fixed canonical names and aliases according to...

2005-02-07  Robert Schuster  <thebohemian@gmx.net>

        * gnu/java/nio/charset/ISO_8859_1.java,
        gnu/java/nio/charset/US_ASCII.java,
        gnu/java/nio/charset/UTF_16.java,
        gnu/java/nio/charset/UTF_16_LE.java,
        gnu/java/nio/charset/UTF_16_BE.java,
        gnu/java/nio/charset/UTF_8.java: Fixed canonical names
         and aliases according to
         "http://www.iana.org/assignments/character-sets",
         "http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html"
         and "http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL".
        * gnu/java/nio/charset/Provider.java: Made charset lookup
         case-insensitive which fixes bug #11740.

From-SVN: r94711
This commit is contained in:
Robert Schuster 2005-02-07 20:34:17 +00:00 committed by Anthony Green
parent f0d87cdad8
commit c686e630c3
8 changed files with 139 additions and 26 deletions

View File

@ -1,3 +1,18 @@
2005-02-07 Robert Schuster <thebohemian@gmx.net>
* gnu/java/nio/charset/ISO_8859_1.java,
gnu/java/nio/charset/US_ASCII.java,
gnu/java/nio/charset/UTF_16.java,
gnu/java/nio/charset/UTF_16_LE.java,
gnu/java/nio/charset/UTF_16_BE.java,
gnu/java/nio/charset/UTF_8.java: Fixed canonical names
and aliases according to
"http://www.iana.org/assignments/character-sets",
"http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html"
and "http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL".
* gnu/java/nio/charset/Provider.java: Made charset lookup
case-insensitive which fixes bug #11740.
2005-02-07 Tom Tromey <tromey@redhat.com> 2005-02-07 Tom Tromey <tromey@redhat.com>
PR libgcj/19611: PR libgcj/19611:

View File

@ -1,5 +1,5 @@
/* ISO_8859_1.java -- /* ISO_8859_1.java --
Copyright (C) 2002, 2004 Free Software Foundation, Inc. Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath. This file is part of GNU Classpath.
@ -53,7 +53,28 @@ final class ISO_8859_1 extends Charset
{ {
ISO_8859_1 () ISO_8859_1 ()
{ {
super ("ISO-8859-1", new String[]{"ISO-LATIN-1"}); /* Canonical charset name chosen according to:
* http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
*/
super ("ISO-8859-1", new String[] {
/* These names are provided by
* http://www.iana.org/assignments/character-sets
*/
"iso-ir-100",
"ISO_8859-1",
"latin1",
"l1",
"IBM819",
"CP819",
"csISOLatin1",
"8859_1",
/* These names are provided by
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
*/
"ISO8859_1", "ISO_8859_1", "ibm-819", "ISO_8859-1:1987",
"819"
});
} }
public boolean contains (Charset cs) public boolean contains (Charset cs)

View File

@ -1,5 +1,5 @@
/* Provider.java -- /* Provider.java --
Copyright (C) 2002 Free Software Foundation, Inc. Copyright (C) 2002, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath. This file is part of GNU Classpath.
@ -48,6 +48,7 @@ import java.util.Iterator;
* {@link Charset#charsetForName} and * {@link Charset#availableCharsets}. * {@link Charset#charsetForName} and * {@link Charset#availableCharsets}.
* *
* @author Jesse Rosenstock * @author Jesse Rosenstock
* @author Robert Schuster (thebohemian@gmx.net)
* @see Charset * @see Charset
*/ */
public final class Provider extends CharsetProvider public final class Provider extends CharsetProvider
@ -63,12 +64,14 @@ public final class Provider extends CharsetProvider
} }
/** /**
* Map from charset name to charset canonical name. * Map from charset name to charset canonical name. The strings
* are all lower-case to allow case-insensitive retrieval of
* Charset instances.
*/ */
private final HashMap canonicalNames; private final HashMap canonicalNames;
/** /**
* Map from canonical name to Charset. * Map from lower-case canonical name to Charset.
* TODO: We may want to use soft references. We would then need to keep * TODO: We may want to use soft references. We would then need to keep
* track of the class name to regenerate the object. * track of the class name to regenerate the object.
*/ */
@ -76,8 +79,6 @@ public final class Provider extends CharsetProvider
private Provider () private Provider ()
{ {
// FIXME: We might need to make the name comparison case insensitive.
// Verify this with the Sun JDK.
canonicalNames = new HashMap (); canonicalNames = new HashMap ();
charsets = new HashMap (); charsets = new HashMap ();
@ -106,24 +107,42 @@ public final class Provider extends CharsetProvider
.iterator (); .iterator ();
} }
/**
* Returns a Charset instance by converting the given
* name to lower-case, looking up the canonical charset
* name and finally looking up the Charset with that name.
*
* <p>The lookup is therefore case-insensitive.</p>
*
* @returns The Charset having <code>charsetName</code>
* as its alias or null if no such Charset exist.
*/
public Charset charsetForName (String charsetName) public Charset charsetForName (String charsetName)
{ {
return (Charset) charsets.get (canonicalize (charsetName)); return (Charset) charsets.get(canonicalNames.get(charsetName.toLowerCase()));
}
private Object canonicalize (String charsetName)
{
Object o = canonicalNames.get (charsetName);
return o == null ? charsetName : o;
} }
/**
* Puts a Charset under its canonical name into the 'charsets' map.
* Then puts a mapping from all its alias names to the canonical name.
*
* <p>All names are converted to lower-case</p>.
*
* @param cs
*/
private void addCharset (Charset cs) private void addCharset (Charset cs)
{ {
String canonicalName = cs.name (); String canonicalName = cs.name().toLowerCase();
charsets.put (canonicalName, cs); charsets.put (canonicalName, cs);
/* Adds a mapping between the canonical name
* itself making a lookup using that name
* no special case.
*/
canonicalNames.put(canonicalName, canonicalName);
for (Iterator i = cs.aliases ().iterator (); i.hasNext (); ) for (Iterator i = cs.aliases ().iterator (); i.hasNext (); )
canonicalNames.put (i.next (), canonicalName); canonicalNames.put (((String) i.next()).toLowerCase(), canonicalName);
} }
public static synchronized Provider provider () public static synchronized Provider provider ()

View File

@ -1,5 +1,5 @@
/* US_ASCII.java -- /* US_ASCII.java --
Copyright (C) 2002, 2004 Free Software Foundation, Inc. Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath. This file is part of GNU Classpath.
@ -53,7 +53,29 @@ final class US_ASCII extends Charset
{ {
US_ASCII () US_ASCII ()
{ {
super ("US-ASCII", new String[]{"ISO646-US"}); /* Canonical charset name chosen according to:
* http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
*/
super ("US-ASCII", new String[] {
/* These names are provided by
* http://www.iana.org/assignments/character-sets
*/
"iso-ir-6",
"ANSI_X3.4-1986",
"ISO_646.irv:1991",
"ASCII",
"ISO646-US",
"ASCII",
"us",
"IBM367",
"cp367",
"csASCII",
/* These names are provided by
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
*/
"ANSI_X3.4-1968", "iso_646.irv:1983", "ascii7", "646",
"windows-20127"
});
} }
public boolean contains (Charset cs) public boolean contains (Charset cs)

View File

@ -1,5 +1,5 @@
/* UTF_16.java -- /* UTF_16.java --
Copyright (C) 2002, 2004 Free Software Foundation, Inc. Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath. This file is part of GNU Classpath.
@ -51,7 +51,14 @@ final class UTF_16 extends Charset
{ {
UTF_16 () UTF_16 ()
{ {
super ("UTF-16", null); super ("UTF-16", new String[] {
// witnessed by the internet
"UTF16",
/* These names are provided by
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
*/
"ISO-10646-UCS-2", "unicode", "csUnicode", "ucs-2"
});
} }
public boolean contains (Charset cs) public boolean contains (Charset cs)

View File

@ -1,5 +1,5 @@
/* UTF_16BE.java -- /* UTF_16BE.java --
Copyright (C) 2002, 2004 Free Software Foundation, Inc. Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath. This file is part of GNU Classpath.
@ -51,7 +51,18 @@ final class UTF_16BE extends Charset
{ {
UTF_16BE () UTF_16BE ()
{ {
super ("UTF-16BE", null); super ("UTF-16BE", new String[] {
// witnessed by the internet
"UTF16BE",
/* These names are provided by
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
*/
"x-utf-16be", "ibm-1200", "ibm-1201", "ibm-5297",
"ibm-13488", "ibm-17584", "windows-1201", "cp1200", "cp1201",
"UTF16_BigEndian",
// see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
"UnicodeBigUnmarked"
});
} }
public boolean contains (Charset cs) public boolean contains (Charset cs)

View File

@ -1,5 +1,5 @@
/* UTF_16LE.java -- /* UTF_16LE.java --
Copyright (C) 2002, 2004 Free Software Foundation, Inc. Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath. This file is part of GNU Classpath.
@ -51,7 +51,17 @@ final class UTF_16LE extends Charset
{ {
UTF_16LE () UTF_16LE ()
{ {
super ("UTF-16LE", null); super ("UTF-16LE", new String[] {
// witnessed by the internet
"UTF16LE",
/* These names are provided by
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
*/
"x-utf-16le", "ibm-1202", "ibm-13490", "ibm-17586",
"UTF16_LittleEndian",
// see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
"UnicodeLittleUnmarked"
});
} }
public boolean contains (Charset cs) public boolean contains (Charset cs)

View File

@ -1,5 +1,5 @@
/* UTF_8.java -- /* UTF_8.java --
Copyright (C) 2002, 2004 Free Software Foundation, Inc. Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath. This file is part of GNU Classpath.
@ -62,7 +62,15 @@ final class UTF_8 extends Charset
{ {
UTF_8 () UTF_8 ()
{ {
super ("UTF-8", null); super ("UTF-8", new String[] {
/* These names are provided by
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
*/
"ibm-1208", "ibm-1209", "ibm-5304", "ibm-5305",
"windows-65001", "cp1208",
// see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
"UTF8"
});
} }
public boolean contains (Charset cs) public boolean contains (Charset cs)