mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			206 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Java
		
	
	
	
			
		
		
	
	
			206 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Java
		
	
	
	
| /* UnicodeReader.java --
 | |
|    Copyright (C) 2005  Free Software Foundation, Inc.
 | |
| 
 | |
| This file is part of GNU Classpath.
 | |
| 
 | |
| GNU Classpath is free software; you can redistribute it and/or modify
 | |
| it under the terms of the GNU General Public License as published by
 | |
| the Free Software Foundation; either version 2, or (at your option)
 | |
| any later version.
 | |
| 
 | |
| GNU Classpath is distributed in the hope that it will be useful, but
 | |
| WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
| General Public License for more details.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License
 | |
| along with GNU Classpath; see the file COPYING.  If not, write to the
 | |
| Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 | |
| 02110-1301 USA.
 | |
| 
 | |
| Linking this library statically or dynamically with other modules is
 | |
| making a combined work based on this library.  Thus, the terms and
 | |
| conditions of the GNU General Public License cover the whole
 | |
| combination.
 | |
| 
 | |
| As a special exception, the copyright holders of this library give you
 | |
| permission to link this library with independent modules to produce an
 | |
| executable, regardless of the license terms of these independent
 | |
| modules, and to copy and distribute the resulting executable under
 | |
| terms of your choice, provided that you also meet, for each linked
 | |
| independent module, the terms and conditions of the license of that
 | |
| module.  An independent module is a module which is not derived from
 | |
| or based on this library.  If you modify this library, you may extend
 | |
| this exception to your version of the library, but you are not
 | |
| obligated to do so.  If you do not wish to do so, delete this
 | |
| exception statement from your version. */
 | |
| 
 | |
| package gnu.xml.stream;
 | |
| 
 | |
| import java.io.IOException;
 | |
| import java.io.Reader;
 | |
| 
 | |
| /**
 | |
|  * A reader that converts UTF-16 characters to Unicode code points.
 | |
|  *
 | |
|  * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
 | |
|  */
 | |
| public class UnicodeReader
 | |
| {
 | |
| 
 | |
|   final Reader in;
 | |
| 
 | |
|   UnicodeReader(Reader in)
 | |
|   {
 | |
|     this.in = in;
 | |
|   }
 | |
| 
 | |
|   public void mark(int limit)
 | |
|     throws IOException
 | |
|   {
 | |
|     in.mark(limit * 2);
 | |
|   }
 | |
| 
 | |
|   public void reset()
 | |
|     throws IOException
 | |
|   {
 | |
|     in.reset();
 | |
|   }
 | |
| 
 | |
|   public int read()
 | |
|     throws IOException
 | |
|   {
 | |
|     int ret = in.read();
 | |
|     if (ret == -1)
 | |
|       return ret;
 | |
|     if (ret >= 0xd800 && ret < 0xdc00)
 | |
|       {
 | |
|         // Unicode surrogate?
 | |
|         int low = in.read();
 | |
|         if (low >= 0xdc00 && low < 0xe000)
 | |
|           ret = Character.toCodePoint((char) ret, (char) low);
 | |
|         else
 | |
|           throw new IOException("unpaired surrogate: U+" +
 | |
|                                 Integer.toHexString(ret));
 | |
|       }
 | |
|     else if (ret >= 0xdc00 && ret < 0xe000)
 | |
|       throw new IOException("unpaired surrogate: U+" +
 | |
|                             Integer.toHexString(ret));
 | |
|     return ret;
 | |
|   }
 | |
| 
 | |
|   public int read(int[] buf, int off, int len)
 | |
|     throws IOException
 | |
|   {
 | |
|     if (len == 0)
 | |
|       return 0;
 | |
|     char[] b2 = new char[len];
 | |
|     int ret = in.read(b2, 0, len);
 | |
|     if (ret <= 0)
 | |
|       return ret;
 | |
|     int l = ret - 1;
 | |
|     int i = 0, j = off;
 | |
|     for (; i < l; i++)
 | |
|       {
 | |
|         char c = b2[i];
 | |
|         if (c >= 0xd800 && c < 0xdc00)
 | |
|           {
 | |
|             // Unicode surrogate?
 | |
|             char d = b2[i + 1];
 | |
|             if (d >= 0xdc00 && d < 0xe000)
 | |
|               {
 | |
|                 buf[j++] = Character.toCodePoint(c, d);
 | |
|                 i++;
 | |
|                 continue;
 | |
|               }
 | |
|             else
 | |
|               throw new IOException("unpaired surrogate: U+" +
 | |
|                                     Integer.toHexString(c));
 | |
|           }
 | |
|         else if (c >= 0xdc00 && c < 0xe000)
 | |
|           throw new IOException("unpaired surrogate: U+" +
 | |
|                                 Integer.toHexString(c));
 | |
|         buf[j++] = (int) c;
 | |
|       }
 | |
|     if (i == l)
 | |
|       {
 | |
|         // last char
 | |
|         char c = b2[l];
 | |
|         if (c >= 0xd800 && c < 0xdc00)
 | |
|           {
 | |
|             int low = in.read();
 | |
|             if (low >= 0xdc00 && low < 0xe000)
 | |
|               {
 | |
|                 buf[j++] = Character.toCodePoint(c, (char) low);
 | |
|                 return j;
 | |
|               }
 | |
|             else
 | |
|               throw new IOException("unpaired surrogate: U+" +
 | |
|                                     Integer.toHexString(c));
 | |
|           }
 | |
|         else if (c >= 0xdc00 && c < 0xe000)
 | |
|           throw new IOException("unpaired surrogate: U+" +
 | |
|                                 Integer.toHexString(c));
 | |
|         buf[j++] = (int) c;
 | |
|       }
 | |
|     return j;
 | |
|   }
 | |
| 
 | |
|   public void close()
 | |
|     throws IOException
 | |
|   {
 | |
|     in.close();
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Returns the specified UTF-16 char array as an array of Unicode code
 | |
|    * points.
 | |
|    */
 | |
|   public static int[] toCodePointArray(String text)
 | |
|     throws IOException
 | |
|   {
 | |
|     char[] b2 = text.toCharArray();
 | |
|     int[] buf = new int[b2.length];
 | |
|     if (b2.length > 0)
 | |
|       {
 | |
|         int l = b2.length - 1;
 | |
|         int i = 0, j = 0;
 | |
|         for (; i < l; i++)
 | |
|           {
 | |
|             char c = b2[i];
 | |
|             if (c >= 0xd800 && c < 0xdc00)
 | |
|               {
 | |
|                 // Unicode surrogate?
 | |
|                 char d = b2[i + 1];
 | |
|                 if (d >= 0xdc00 && d < 0xe000)
 | |
|                   {
 | |
|                     buf[j++] = Character.toCodePoint(c, d);
 | |
|                     i++;
 | |
|                     continue;
 | |
|                   }
 | |
|                 else
 | |
|                   throw new IOException("unpaired surrogate: U+" +
 | |
|                                         Integer.toHexString(c));
 | |
|               }
 | |
|             else if (c >= 0xdc00 && c < 0xe000)
 | |
|               throw new IOException("unpaired surrogate: U+" +
 | |
|                                     Integer.toHexString(c));
 | |
|             buf[j++] = (int) c;
 | |
|           }
 | |
|         if (i == l)
 | |
|           {
 | |
|             // last char
 | |
|             buf[j++] = (int) b2[l];
 | |
|             if (j < buf.length)
 | |
|               {
 | |
|                 int[] buf2 = new int[j];
 | |
|                 System.arraycopy(buf, 0, buf2, 0, j);
 | |
|                 buf = buf2;
 | |
|               }
 | |
|           }
 | |
|       }
 | |
|     return buf;
 | |
|   }
 | |
| 
 | |
| }
 |