mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			206 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Java
		
	
	
	
			
		
		
	
	
			206 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Java
		
	
	
	
/* UnicodeReader.java --
 | 
						|
   Copyright (C) 2005  Free Software Foundation, Inc.
 | 
						|
 | 
						|
This file is part of GNU Classpath.
 | 
						|
 | 
						|
GNU Classpath is free software; you can redistribute it and/or modify
 | 
						|
it under the terms of the GNU General Public License as published by
 | 
						|
the Free Software Foundation; either version 2, or (at your option)
 | 
						|
any later version.
 | 
						|
 | 
						|
GNU Classpath is distributed in the hope that it will be useful, but
 | 
						|
WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
General Public License for more details.
 | 
						|
 | 
						|
You should have received a copy of the GNU General Public License
 | 
						|
along with GNU Classpath; see the file COPYING.  If not, write to the
 | 
						|
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 | 
						|
02110-1301 USA.
 | 
						|
 | 
						|
Linking this library statically or dynamically with other modules is
 | 
						|
making a combined work based on this library.  Thus, the terms and
 | 
						|
conditions of the GNU General Public License cover the whole
 | 
						|
combination.
 | 
						|
 | 
						|
As a special exception, the copyright holders of this library give you
 | 
						|
permission to link this library with independent modules to produce an
 | 
						|
executable, regardless of the license terms of these independent
 | 
						|
modules, and to copy and distribute the resulting executable under
 | 
						|
terms of your choice, provided that you also meet, for each linked
 | 
						|
independent module, the terms and conditions of the license of that
 | 
						|
module.  An independent module is a module which is not derived from
 | 
						|
or based on this library.  If you modify this library, you may extend
 | 
						|
this exception to your version of the library, but you are not
 | 
						|
obligated to do so.  If you do not wish to do so, delete this
 | 
						|
exception statement from your version. */
 | 
						|
 | 
						|
package gnu.xml.stream;
 | 
						|
 | 
						|
import java.io.IOException;
 | 
						|
import java.io.Reader;
 | 
						|
 | 
						|
/**
 | 
						|
 * A reader that converts UTF-16 characters to Unicode code points.
 | 
						|
 *
 | 
						|
 * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
 | 
						|
 */
 | 
						|
public class UnicodeReader
 | 
						|
{
 | 
						|
 | 
						|
  final Reader in;
 | 
						|
 | 
						|
  UnicodeReader(Reader in)
 | 
						|
  {
 | 
						|
    this.in = in;
 | 
						|
  }
 | 
						|
 | 
						|
  public void mark(int limit)
 | 
						|
    throws IOException
 | 
						|
  {
 | 
						|
    in.mark(limit * 2);
 | 
						|
  }
 | 
						|
 | 
						|
  public void reset()
 | 
						|
    throws IOException
 | 
						|
  {
 | 
						|
    in.reset();
 | 
						|
  }
 | 
						|
 | 
						|
  public int read()
 | 
						|
    throws IOException
 | 
						|
  {
 | 
						|
    int ret = in.read();
 | 
						|
    if (ret == -1)
 | 
						|
      return ret;
 | 
						|
    if (ret >= 0xd800 && ret < 0xdc00)
 | 
						|
      {
 | 
						|
        // Unicode surrogate?
 | 
						|
        int low = in.read();
 | 
						|
        if (low >= 0xdc00 && low < 0xe000)
 | 
						|
          ret = Character.toCodePoint((char) ret, (char) low);
 | 
						|
        else
 | 
						|
          throw new IOException("unpaired surrogate: U+" +
 | 
						|
                                Integer.toHexString(ret));
 | 
						|
      }
 | 
						|
    else if (ret >= 0xdc00 && ret < 0xe000)
 | 
						|
      throw new IOException("unpaired surrogate: U+" +
 | 
						|
                            Integer.toHexString(ret));
 | 
						|
    return ret;
 | 
						|
  }
 | 
						|
 | 
						|
  public int read(int[] buf, int off, int len)
 | 
						|
    throws IOException
 | 
						|
  {
 | 
						|
    if (len == 0)
 | 
						|
      return 0;
 | 
						|
    char[] b2 = new char[len];
 | 
						|
    int ret = in.read(b2, 0, len);
 | 
						|
    if (ret <= 0)
 | 
						|
      return ret;
 | 
						|
    int l = ret - 1;
 | 
						|
    int i = 0, j = off;
 | 
						|
    for (; i < l; i++)
 | 
						|
      {
 | 
						|
        char c = b2[i];
 | 
						|
        if (c >= 0xd800 && c < 0xdc00)
 | 
						|
          {
 | 
						|
            // Unicode surrogate?
 | 
						|
            char d = b2[i + 1];
 | 
						|
            if (d >= 0xdc00 && d < 0xe000)
 | 
						|
              {
 | 
						|
                buf[j++] = Character.toCodePoint(c, d);
 | 
						|
                i++;
 | 
						|
                continue;
 | 
						|
              }
 | 
						|
            else
 | 
						|
              throw new IOException("unpaired surrogate: U+" +
 | 
						|
                                    Integer.toHexString(c));
 | 
						|
          }
 | 
						|
        else if (c >= 0xdc00 && c < 0xe000)
 | 
						|
          throw new IOException("unpaired surrogate: U+" +
 | 
						|
                                Integer.toHexString(c));
 | 
						|
        buf[j++] = (int) c;
 | 
						|
      }
 | 
						|
    if (i == l)
 | 
						|
      {
 | 
						|
        // last char
 | 
						|
        char c = b2[l];
 | 
						|
        if (c >= 0xd800 && c < 0xdc00)
 | 
						|
          {
 | 
						|
            int low = in.read();
 | 
						|
            if (low >= 0xdc00 && low < 0xe000)
 | 
						|
              {
 | 
						|
                buf[j++] = Character.toCodePoint(c, (char) low);
 | 
						|
                return j;
 | 
						|
              }
 | 
						|
            else
 | 
						|
              throw new IOException("unpaired surrogate: U+" +
 | 
						|
                                    Integer.toHexString(c));
 | 
						|
          }
 | 
						|
        else if (c >= 0xdc00 && c < 0xe000)
 | 
						|
          throw new IOException("unpaired surrogate: U+" +
 | 
						|
                                Integer.toHexString(c));
 | 
						|
        buf[j++] = (int) c;
 | 
						|
      }
 | 
						|
    return j;
 | 
						|
  }
 | 
						|
 | 
						|
  public void close()
 | 
						|
    throws IOException
 | 
						|
  {
 | 
						|
    in.close();
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Returns the specified UTF-16 char array as an array of Unicode code
 | 
						|
   * points.
 | 
						|
   */
 | 
						|
  public static int[] toCodePointArray(String text)
 | 
						|
    throws IOException
 | 
						|
  {
 | 
						|
    char[] b2 = text.toCharArray();
 | 
						|
    int[] buf = new int[b2.length];
 | 
						|
    if (b2.length > 0)
 | 
						|
      {
 | 
						|
        int l = b2.length - 1;
 | 
						|
        int i = 0, j = 0;
 | 
						|
        for (; i < l; i++)
 | 
						|
          {
 | 
						|
            char c = b2[i];
 | 
						|
            if (c >= 0xd800 && c < 0xdc00)
 | 
						|
              {
 | 
						|
                // Unicode surrogate?
 | 
						|
                char d = b2[i + 1];
 | 
						|
                if (d >= 0xdc00 && d < 0xe000)
 | 
						|
                  {
 | 
						|
                    buf[j++] = Character.toCodePoint(c, d);
 | 
						|
                    i++;
 | 
						|
                    continue;
 | 
						|
                  }
 | 
						|
                else
 | 
						|
                  throw new IOException("unpaired surrogate: U+" +
 | 
						|
                                        Integer.toHexString(c));
 | 
						|
              }
 | 
						|
            else if (c >= 0xdc00 && c < 0xe000)
 | 
						|
              throw new IOException("unpaired surrogate: U+" +
 | 
						|
                                    Integer.toHexString(c));
 | 
						|
            buf[j++] = (int) c;
 | 
						|
          }
 | 
						|
        if (i == l)
 | 
						|
          {
 | 
						|
            // last char
 | 
						|
            buf[j++] = (int) b2[l];
 | 
						|
            if (j < buf.length)
 | 
						|
              {
 | 
						|
                int[] buf2 = new int[j];
 | 
						|
                System.arraycopy(buf, 0, buf2, 0, j);
 | 
						|
                buf = buf2;
 | 
						|
              }
 | 
						|
          }
 | 
						|
      }
 | 
						|
    return buf;
 | 
						|
  }
 | 
						|
 | 
						|
}
 |