mirror of git://gcc.gnu.org/git/gcc.git
ISO_8859_1.java, [...]: Fixed canonical names and aliases according to...
2005-02-07 Robert Schuster <thebohemian@gmx.net>
* gnu/java/nio/charset/ISO_8859_1.java,
gnu/java/nio/charset/US_ASCII.java,
gnu/java/nio/charset/UTF_16.java,
gnu/java/nio/charset/UTF_16_LE.java,
gnu/java/nio/charset/UTF_16_BE.java,
gnu/java/nio/charset/UTF_8.java: Fixed canonical names
and aliases according to
"http://www.iana.org/assignments/character-sets",
"http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html"
and "http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL".
* gnu/java/nio/charset/Provider.java: Made charset lookup
case-insensitive which fixes bug #11740.
From-SVN: r94711
This commit is contained in:
parent
f0d87cdad8
commit
c686e630c3
|
|
@ -1,3 +1,18 @@
|
||||||
|
2005-02-07 Robert Schuster <thebohemian@gmx.net>
|
||||||
|
|
||||||
|
* gnu/java/nio/charset/ISO_8859_1.java,
|
||||||
|
gnu/java/nio/charset/US_ASCII.java,
|
||||||
|
gnu/java/nio/charset/UTF_16.java,
|
||||||
|
gnu/java/nio/charset/UTF_16_LE.java,
|
||||||
|
gnu/java/nio/charset/UTF_16_BE.java,
|
||||||
|
gnu/java/nio/charset/UTF_8.java: Fixed canonical names
|
||||||
|
and aliases according to
|
||||||
|
"http://www.iana.org/assignments/character-sets",
|
||||||
|
"http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html"
|
||||||
|
and "http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL".
|
||||||
|
* gnu/java/nio/charset/Provider.java: Made charset lookup
|
||||||
|
case-insensitive which fixes bug #11740.
|
||||||
|
|
||||||
2005-02-07 Tom Tromey <tromey@redhat.com>
|
2005-02-07 Tom Tromey <tromey@redhat.com>
|
||||||
|
|
||||||
PR libgcj/19611:
|
PR libgcj/19611:
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
/* ISO_8859_1.java --
|
/* ISO_8859_1.java --
|
||||||
Copyright (C) 2002, 2004 Free Software Foundation, Inc.
|
Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
|
||||||
|
|
||||||
This file is part of GNU Classpath.
|
This file is part of GNU Classpath.
|
||||||
|
|
||||||
|
|
@ -53,7 +53,28 @@ final class ISO_8859_1 extends Charset
|
||||||
{
|
{
|
||||||
ISO_8859_1 ()
|
ISO_8859_1 ()
|
||||||
{
|
{
|
||||||
super ("ISO-8859-1", new String[]{"ISO-LATIN-1"});
|
/* Canonical charset name chosen according to:
|
||||||
|
* http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
|
||||||
|
*/
|
||||||
|
super ("ISO-8859-1", new String[] {
|
||||||
|
/* These names are provided by
|
||||||
|
* http://www.iana.org/assignments/character-sets
|
||||||
|
*/
|
||||||
|
"iso-ir-100",
|
||||||
|
"ISO_8859-1",
|
||||||
|
"latin1",
|
||||||
|
"l1",
|
||||||
|
"IBM819",
|
||||||
|
"CP819",
|
||||||
|
"csISOLatin1",
|
||||||
|
"8859_1",
|
||||||
|
/* These names are provided by
|
||||||
|
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
|
||||||
|
*/
|
||||||
|
"ISO8859_1", "ISO_8859_1", "ibm-819", "ISO_8859-1:1987",
|
||||||
|
"819"
|
||||||
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean contains (Charset cs)
|
public boolean contains (Charset cs)
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
/* Provider.java --
|
/* Provider.java --
|
||||||
Copyright (C) 2002 Free Software Foundation, Inc.
|
Copyright (C) 2002, 2005 Free Software Foundation, Inc.
|
||||||
|
|
||||||
This file is part of GNU Classpath.
|
This file is part of GNU Classpath.
|
||||||
|
|
||||||
|
|
@ -48,6 +48,7 @@ import java.util.Iterator;
|
||||||
* {@link Charset#charsetForName} and * {@link Charset#availableCharsets}.
|
* {@link Charset#charsetForName} and * {@link Charset#availableCharsets}.
|
||||||
*
|
*
|
||||||
* @author Jesse Rosenstock
|
* @author Jesse Rosenstock
|
||||||
|
* @author Robert Schuster (thebohemian@gmx.net)
|
||||||
* @see Charset
|
* @see Charset
|
||||||
*/
|
*/
|
||||||
public final class Provider extends CharsetProvider
|
public final class Provider extends CharsetProvider
|
||||||
|
|
@ -63,12 +64,14 @@ public final class Provider extends CharsetProvider
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Map from charset name to charset canonical name.
|
* Map from charset name to charset canonical name. The strings
|
||||||
|
* are all lower-case to allow case-insensitive retrieval of
|
||||||
|
* Charset instances.
|
||||||
*/
|
*/
|
||||||
private final HashMap canonicalNames;
|
private final HashMap canonicalNames;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Map from canonical name to Charset.
|
* Map from lower-case canonical name to Charset.
|
||||||
* TODO: We may want to use soft references. We would then need to keep
|
* TODO: We may want to use soft references. We would then need to keep
|
||||||
* track of the class name to regenerate the object.
|
* track of the class name to regenerate the object.
|
||||||
*/
|
*/
|
||||||
|
|
@ -76,8 +79,6 @@ public final class Provider extends CharsetProvider
|
||||||
|
|
||||||
private Provider ()
|
private Provider ()
|
||||||
{
|
{
|
||||||
// FIXME: We might need to make the name comparison case insensitive.
|
|
||||||
// Verify this with the Sun JDK.
|
|
||||||
canonicalNames = new HashMap ();
|
canonicalNames = new HashMap ();
|
||||||
charsets = new HashMap ();
|
charsets = new HashMap ();
|
||||||
|
|
||||||
|
|
@ -106,24 +107,42 @@ public final class Provider extends CharsetProvider
|
||||||
.iterator ();
|
.iterator ();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a Charset instance by converting the given
|
||||||
|
* name to lower-case, looking up the canonical charset
|
||||||
|
* name and finally looking up the Charset with that name.
|
||||||
|
*
|
||||||
|
* <p>The lookup is therefore case-insensitive.</p>
|
||||||
|
*
|
||||||
|
* @returns The Charset having <code>charsetName</code>
|
||||||
|
* as its alias or null if no such Charset exist.
|
||||||
|
*/
|
||||||
public Charset charsetForName (String charsetName)
|
public Charset charsetForName (String charsetName)
|
||||||
{
|
{
|
||||||
return (Charset) charsets.get (canonicalize (charsetName));
|
return (Charset) charsets.get(canonicalNames.get(charsetName.toLowerCase()));
|
||||||
}
|
|
||||||
|
|
||||||
private Object canonicalize (String charsetName)
|
|
||||||
{
|
|
||||||
Object o = canonicalNames.get (charsetName);
|
|
||||||
return o == null ? charsetName : o;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Puts a Charset under its canonical name into the 'charsets' map.
|
||||||
|
* Then puts a mapping from all its alias names to the canonical name.
|
||||||
|
*
|
||||||
|
* <p>All names are converted to lower-case</p>.
|
||||||
|
*
|
||||||
|
* @param cs
|
||||||
|
*/
|
||||||
private void addCharset (Charset cs)
|
private void addCharset (Charset cs)
|
||||||
{
|
{
|
||||||
String canonicalName = cs.name ();
|
String canonicalName = cs.name().toLowerCase();
|
||||||
charsets.put (canonicalName, cs);
|
charsets.put (canonicalName, cs);
|
||||||
|
|
||||||
|
/* Adds a mapping between the canonical name
|
||||||
|
* itself making a lookup using that name
|
||||||
|
* no special case.
|
||||||
|
*/
|
||||||
|
canonicalNames.put(canonicalName, canonicalName);
|
||||||
|
|
||||||
for (Iterator i = cs.aliases ().iterator (); i.hasNext (); )
|
for (Iterator i = cs.aliases ().iterator (); i.hasNext (); )
|
||||||
canonicalNames.put (i.next (), canonicalName);
|
canonicalNames.put (((String) i.next()).toLowerCase(), canonicalName);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static synchronized Provider provider ()
|
public static synchronized Provider provider ()
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
/* US_ASCII.java --
|
/* US_ASCII.java --
|
||||||
Copyright (C) 2002, 2004 Free Software Foundation, Inc.
|
Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
|
||||||
|
|
||||||
This file is part of GNU Classpath.
|
This file is part of GNU Classpath.
|
||||||
|
|
||||||
|
|
@ -53,7 +53,29 @@ final class US_ASCII extends Charset
|
||||||
{
|
{
|
||||||
US_ASCII ()
|
US_ASCII ()
|
||||||
{
|
{
|
||||||
super ("US-ASCII", new String[]{"ISO646-US"});
|
/* Canonical charset name chosen according to:
|
||||||
|
* http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
|
||||||
|
*/
|
||||||
|
super ("US-ASCII", new String[] {
|
||||||
|
/* These names are provided by
|
||||||
|
* http://www.iana.org/assignments/character-sets
|
||||||
|
*/
|
||||||
|
"iso-ir-6",
|
||||||
|
"ANSI_X3.4-1986",
|
||||||
|
"ISO_646.irv:1991",
|
||||||
|
"ASCII",
|
||||||
|
"ISO646-US",
|
||||||
|
"ASCII",
|
||||||
|
"us",
|
||||||
|
"IBM367",
|
||||||
|
"cp367",
|
||||||
|
"csASCII",
|
||||||
|
/* These names are provided by
|
||||||
|
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
|
||||||
|
*/
|
||||||
|
"ANSI_X3.4-1968", "iso_646.irv:1983", "ascii7", "646",
|
||||||
|
"windows-20127"
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean contains (Charset cs)
|
public boolean contains (Charset cs)
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
/* UTF_16.java --
|
/* UTF_16.java --
|
||||||
Copyright (C) 2002, 2004 Free Software Foundation, Inc.
|
Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
|
||||||
|
|
||||||
This file is part of GNU Classpath.
|
This file is part of GNU Classpath.
|
||||||
|
|
||||||
|
|
@ -51,7 +51,14 @@ final class UTF_16 extends Charset
|
||||||
{
|
{
|
||||||
UTF_16 ()
|
UTF_16 ()
|
||||||
{
|
{
|
||||||
super ("UTF-16", null);
|
super ("UTF-16", new String[] {
|
||||||
|
// witnessed by the internet
|
||||||
|
"UTF16",
|
||||||
|
/* These names are provided by
|
||||||
|
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
|
||||||
|
*/
|
||||||
|
"ISO-10646-UCS-2", "unicode", "csUnicode", "ucs-2"
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean contains (Charset cs)
|
public boolean contains (Charset cs)
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
/* UTF_16BE.java --
|
/* UTF_16BE.java --
|
||||||
Copyright (C) 2002, 2004 Free Software Foundation, Inc.
|
Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
|
||||||
|
|
||||||
This file is part of GNU Classpath.
|
This file is part of GNU Classpath.
|
||||||
|
|
||||||
|
|
@ -51,7 +51,18 @@ final class UTF_16BE extends Charset
|
||||||
{
|
{
|
||||||
UTF_16BE ()
|
UTF_16BE ()
|
||||||
{
|
{
|
||||||
super ("UTF-16BE", null);
|
super ("UTF-16BE", new String[] {
|
||||||
|
// witnessed by the internet
|
||||||
|
"UTF16BE",
|
||||||
|
/* These names are provided by
|
||||||
|
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
|
||||||
|
*/
|
||||||
|
"x-utf-16be", "ibm-1200", "ibm-1201", "ibm-5297",
|
||||||
|
"ibm-13488", "ibm-17584", "windows-1201", "cp1200", "cp1201",
|
||||||
|
"UTF16_BigEndian",
|
||||||
|
// see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
|
||||||
|
"UnicodeBigUnmarked"
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean contains (Charset cs)
|
public boolean contains (Charset cs)
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
/* UTF_16LE.java --
|
/* UTF_16LE.java --
|
||||||
Copyright (C) 2002, 2004 Free Software Foundation, Inc.
|
Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
|
||||||
|
|
||||||
This file is part of GNU Classpath.
|
This file is part of GNU Classpath.
|
||||||
|
|
||||||
|
|
@ -51,7 +51,17 @@ final class UTF_16LE extends Charset
|
||||||
{
|
{
|
||||||
UTF_16LE ()
|
UTF_16LE ()
|
||||||
{
|
{
|
||||||
super ("UTF-16LE", null);
|
super ("UTF-16LE", new String[] {
|
||||||
|
// witnessed by the internet
|
||||||
|
"UTF16LE",
|
||||||
|
/* These names are provided by
|
||||||
|
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
|
||||||
|
*/
|
||||||
|
"x-utf-16le", "ibm-1202", "ibm-13490", "ibm-17586",
|
||||||
|
"UTF16_LittleEndian",
|
||||||
|
// see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
|
||||||
|
"UnicodeLittleUnmarked"
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean contains (Charset cs)
|
public boolean contains (Charset cs)
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
/* UTF_8.java --
|
/* UTF_8.java --
|
||||||
Copyright (C) 2002, 2004 Free Software Foundation, Inc.
|
Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
|
||||||
|
|
||||||
This file is part of GNU Classpath.
|
This file is part of GNU Classpath.
|
||||||
|
|
||||||
|
|
@ -62,7 +62,15 @@ final class UTF_8 extends Charset
|
||||||
{
|
{
|
||||||
UTF_8 ()
|
UTF_8 ()
|
||||||
{
|
{
|
||||||
super ("UTF-8", null);
|
super ("UTF-8", new String[] {
|
||||||
|
/* These names are provided by
|
||||||
|
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
|
||||||
|
*/
|
||||||
|
"ibm-1208", "ibm-1209", "ibm-5304", "ibm-5305",
|
||||||
|
"windows-65001", "cp1208",
|
||||||
|
// see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
|
||||||
|
"UTF8"
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean contains (Charset cs)
|
public boolean contains (Charset cs)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue