mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			426 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Java
		
	
	
	
			
		
		
	
	
			426 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Java
		
	
	
	
/* NumericShaper.java
 | 
						|
   Copyright (C) 2003 Free Software Foundation, Inc.
 | 
						|
 | 
						|
This file is part of GNU Classpath.
 | 
						|
 | 
						|
GNU Classpath is free software; you can redistribute it and/or modify
 | 
						|
it under the terms of the GNU General Public License as published by
 | 
						|
the Free Software Foundation; either version 2, or (at your option)
 | 
						|
any later version.
 | 
						|
 | 
						|
GNU Classpath is distributed in the hope that it will be useful, but
 | 
						|
WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
General Public License for more details.
 | 
						|
 | 
						|
You should have received a copy of the GNU General Public License
 | 
						|
along with GNU Classpath; see the file COPYING.  If not, write to the
 | 
						|
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 | 
						|
02110-1301 USA.
 | 
						|
 | 
						|
Linking this library statically or dynamically with other modules is
 | 
						|
making a combined work based on this library.  Thus, the terms and
 | 
						|
conditions of the GNU General Public License cover the whole
 | 
						|
combination.
 | 
						|
 | 
						|
As a special exception, the copyright holders of this library give you
 | 
						|
permission to link this library with independent modules to produce an
 | 
						|
executable, regardless of the license terms of these independent
 | 
						|
modules, and to copy and distribute the resulting executable under
 | 
						|
terms of your choice, provided that you also meet, for each linked
 | 
						|
independent module, the terms and conditions of the license of that
 | 
						|
module.  An independent module is a module which is not derived from
 | 
						|
or based on this library.  If you modify this library, you may extend
 | 
						|
this exception to your version of the library, but you are not
 | 
						|
obligated to do so.  If you do not wish to do so, delete this
 | 
						|
exception statement from your version. */
 | 
						|
 | 
						|
 | 
						|
package java.awt.font;
 | 
						|
 | 
						|
import java.io.Serializable;
 | 
						|
import java.lang.Character.UnicodeBlock;
 | 
						|
 | 
						|
/**
 | 
						|
 * This class handles numeric shaping.  A shaper can either be contextual
 | 
						|
 * or not.  A non-contextual shaper will always translate ASCII digits
 | 
						|
 * in its input into the target Unicode range.  A contextual shaper will
 | 
						|
 * change the target Unicode range depending on the characters it has
 | 
						|
 * previously processed.
 | 
						|
 *
 | 
						|
 * @author Michael Koch
 | 
						|
 * @author Tom Tromey
 | 
						|
 *
 | 
						|
 * @since 1.4
 | 
						|
 * @specnote This class does not handle LIMBU or OSMANYA.
 | 
						|
 * @specnote The JDK does not seem to properly handle ranges without a
 | 
						|
 * digit zero, such as TAMIL.  This implementation does.
 | 
						|
 */
 | 
						|
public final class NumericShaper implements Serializable
 | 
						|
{
 | 
						|
  private static final long serialVersionUID = -8022764705923730308L;
 | 
						|
 | 
						|
  /** Convenience constant representing all the valid Unicode ranges.  */
 | 
						|
  public static final int ALL_RANGES  = 524287;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Constant representing the Unicode ARABIC range.  Shaping done
 | 
						|
   * using this range will translate to the arabic decimal characters.
 | 
						|
   * Use EASTERN_ARABIC if you want to shape to the eastern arabic
 | 
						|
   * (also known as the extended arabic) decimal characters.
 | 
						|
   */
 | 
						|
  public static final int ARABIC  = 2;
 | 
						|
 | 
						|
  /** Constant representing the Unicode BENGALI range.  */
 | 
						|
  public static final int BENGALI  = 16;
 | 
						|
 | 
						|
  /** Constant representing the Unicode DEVANAGARI range.  */
 | 
						|
  public static final int DEVANAGARI  = 8;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Constant representing the Unicode extended arabic range.
 | 
						|
   * In Unicode there are two different sets of arabic digits;
 | 
						|
   * this selects the extended or eastern set.
 | 
						|
   */
 | 
						|
  public static final int EASTERN_ARABIC  = 4;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Constant representing the Unicode ETHIOPIC range.  Note that
 | 
						|
   * there is no digit zero in this range; an ASCII digit zero
 | 
						|
   * is left unchanged when shaping to this range.
 | 
						|
   */
 | 
						|
  public static final int ETHIOPIC  = 65536;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Constant representing the Unicode EUROPEAN range.  For
 | 
						|
   * contextual shaping purposes, characters in the various
 | 
						|
   * extended Latin character blocks are recognized as EUROPEAN.
 | 
						|
   */
 | 
						|
  public static final int EUROPEAN  = 1;
 | 
						|
 | 
						|
  /** Constant representing the Unicode GUJARATI range.  */
 | 
						|
  public static final int GUJARATI  = 64;
 | 
						|
 | 
						|
  /** Constant representing the Unicode GURMUKHI range.  */
 | 
						|
  public static final int GURMUKHI  = 32;
 | 
						|
 | 
						|
  /** Constant representing the Unicode KANNADA range.  */
 | 
						|
  public static final int KANNADA  = 1024;
 | 
						|
 | 
						|
  /** Constant representing the Unicode KHMER range.  */
 | 
						|
  public static final int KHMER  = 131072;
 | 
						|
 | 
						|
  /** Constant representing the Unicode LAO range.  */
 | 
						|
  public static final int LAO  = 8192;
 | 
						|
 | 
						|
  /** Constant representing the Unicode MALAYALAM range.  */
 | 
						|
  public static final int MALAYALAM  = 2048;
 | 
						|
 | 
						|
  /** Constant representing the Unicode MONGOLIAN range.  */
 | 
						|
  public static final int MONGOLIAN  = 262144;
 | 
						|
 | 
						|
  /** Constant representing the Unicode MYANMAR range.  */
 | 
						|
  public static final int MYANMAR  = 32768;
 | 
						|
 | 
						|
  /** Constant representing the Unicode ORIYA range.  */
 | 
						|
  public static final int ORIYA  = 128;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Constant representing the Unicode TAMIL range.  Note that
 | 
						|
   * there is no digit zero in this range; an ASCII digit zero
 | 
						|
   * is left unchanged when shaping to this range.
 | 
						|
   */
 | 
						|
  public static final int TAMIL  = 256;
 | 
						|
 | 
						|
  /** Constant representing the Unicode TELUGU range.  */
 | 
						|
  public static final int TELUGU  = 512;
 | 
						|
 | 
						|
  /** Constant representing the Unicode THAI range.  */
 | 
						|
  public static final int THAI  = 4096;
 | 
						|
 | 
						|
  /** Constant representing the Unicode TIBETAN range.  */
 | 
						|
  public static final int TIBETAN  = 16384;
 | 
						|
 | 
						|
  /**
 | 
						|
   * This table holds the zero digits for each language.  This is hard-coded
 | 
						|
   * because the values will not change and the table layout is tied to the
 | 
						|
   * other constants in this class in any case.  In the two places where a
 | 
						|
   * language does not have a zero digit, the character immediately preceeding
 | 
						|
   * the one digit is used instead.  These languages are special-cased in
 | 
						|
   * the shaping code.
 | 
						|
   */
 | 
						|
  private static final char[] zeroDigits =
 | 
						|
  {
 | 
						|
    '0',      // EUROPEAN
 | 
						|
    '\u0660', // ARABIC
 | 
						|
    '\u06f0', // EASTERN_ARABIC
 | 
						|
    '\u0966', // DEVANAGARI
 | 
						|
    '\u09e6', // BENGALI
 | 
						|
    '\u0a66', // GURMUKHI
 | 
						|
    '\u0ae6', // GUJARATI
 | 
						|
    '\u0b66', // ORIYA
 | 
						|
    '\u0be6', // TAMIL - special case as there is no digit zero
 | 
						|
    '\u0c66', // TELUGU
 | 
						|
    '\u0ce6', // KANNADA
 | 
						|
    '\u0d66', // MALAYALAM
 | 
						|
    '\u0e50', // THAI
 | 
						|
    '\u0ed0', // LAO
 | 
						|
    '\u0f20', // TIBETAN
 | 
						|
    '\u1040', // MYANMAR
 | 
						|
    '\u1368', // ETHIOPIC - special case as there is no digit zero
 | 
						|
    '\u17e0', // KHMER
 | 
						|
    '\u1810'  // MONGOLIAN
 | 
						|
  };
 | 
						|
 | 
						|
  /**
 | 
						|
   * The default initial context for this shaper, specified as
 | 
						|
   * an integer from 0 to 18.
 | 
						|
   */
 | 
						|
  private int key;
 | 
						|
 | 
						|
  /**
 | 
						|
   * The target ranges handled by this shaper.  If the shaper
 | 
						|
   * is not contextual, the high bit of this field will be set.
 | 
						|
   * @specnote This was discovered by reading the serialization spec
 | 
						|
   */
 | 
						|
  private int mask;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Create a new numeric shaper.  The key given is a constant from
 | 
						|
   * this class, the constructor turns it into its internal form.
 | 
						|
   * @param key the key to use, as one of the manifest constants
 | 
						|
   * @param mask a mask of languages to shape for
 | 
						|
   */
 | 
						|
  private NumericShaper (int key, int mask)
 | 
						|
  {
 | 
						|
    // This internal form is a bit goofy, but it is specified by
 | 
						|
    // the serialization spec.
 | 
						|
    this.key = Integer.numberOfTrailingZeros(key);
 | 
						|
    this.mask = mask;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Return an integer representing all the languages for which this
 | 
						|
   * shaper will shape.  The result is taken by "or"ing together
 | 
						|
   * the constants representing the various languages.
 | 
						|
   */
 | 
						|
  public int getRanges ()
 | 
						|
  {
 | 
						|
    return mask & ALL_RANGES;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Return true if this shaper is contextual, false if it is not.
 | 
						|
   */
 | 
						|
  public boolean isContextual ()
 | 
						|
  {
 | 
						|
    return mask > 0;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Shape the text in the given array.  The starting context will
 | 
						|
   * be the context passed to the shaper at creation time.
 | 
						|
   * @param text the text to shape
 | 
						|
   * @param start the index of the starting character of the array
 | 
						|
   * @param count the number of characters in the array
 | 
						|
   */
 | 
						|
  public void shape (char[] text, int start, int count)
 | 
						|
  {
 | 
						|
    shape (text, start, count, 1 << key);
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Given a unicode block object, return corresponding language constant.
 | 
						|
   * If the block is not recognized, returns zero.  Note that as there
 | 
						|
   * is no separate ARABIC block in Character, this case must
 | 
						|
   * be specially handled by the caller; EASTERN_ARABIC is preferred when
 | 
						|
   * both are specified.
 | 
						|
   * @param b the unicode block to classify
 | 
						|
   * @return the language constant, or zero if not recognized
 | 
						|
   */
 | 
						|
  private int classify(UnicodeBlock b)
 | 
						|
  {
 | 
						|
    if (b == null)
 | 
						|
      return 0;
 | 
						|
    // ARABIC is handled by the caller; from testing we know
 | 
						|
    // that EASTERN_ARABIC takes precedence.
 | 
						|
    if (b == UnicodeBlock.ARABIC)
 | 
						|
      return EASTERN_ARABIC;
 | 
						|
    if (b == UnicodeBlock.BENGALI)
 | 
						|
      return BENGALI;
 | 
						|
    if (b == UnicodeBlock.DEVANAGARI)
 | 
						|
      return DEVANAGARI;
 | 
						|
    if (b == UnicodeBlock.ETHIOPIC)
 | 
						|
      return ETHIOPIC;
 | 
						|
    if (b == UnicodeBlock.BASIC_LATIN
 | 
						|
        || b == UnicodeBlock.LATIN_1_SUPPLEMENT
 | 
						|
        || b == UnicodeBlock.LATIN_EXTENDED_A
 | 
						|
        || b == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
 | 
						|
        || b == UnicodeBlock.LATIN_EXTENDED_B)
 | 
						|
      return EUROPEAN;
 | 
						|
    if (b == UnicodeBlock.GUJARATI)
 | 
						|
      return GUJARATI;
 | 
						|
    if (b == UnicodeBlock.GURMUKHI)
 | 
						|
      return GURMUKHI;
 | 
						|
    if (b == UnicodeBlock.KANNADA)
 | 
						|
      return KANNADA;
 | 
						|
    if (b == UnicodeBlock.KHMER)
 | 
						|
      return KHMER;
 | 
						|
    if (b == UnicodeBlock.LAO)
 | 
						|
      return LAO;
 | 
						|
    if (b == UnicodeBlock.MALAYALAM)
 | 
						|
      return MALAYALAM;
 | 
						|
    if (b == UnicodeBlock.MONGOLIAN)
 | 
						|
      return MONGOLIAN;
 | 
						|
    if (b == UnicodeBlock.MYANMAR)
 | 
						|
      return MYANMAR;
 | 
						|
    if (b == UnicodeBlock.ORIYA)
 | 
						|
      return ORIYA;
 | 
						|
    if (b == UnicodeBlock.TAMIL)
 | 
						|
      return TAMIL;
 | 
						|
    if (b == UnicodeBlock.TELUGU)
 | 
						|
      return TELUGU;
 | 
						|
    if (b == UnicodeBlock.THAI)
 | 
						|
      return THAI;
 | 
						|
    if (b == UnicodeBlock.TIBETAN)
 | 
						|
      return TIBETAN;
 | 
						|
    return 0;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Shape the given text, using the indicated initial context.
 | 
						|
   * If this shaper is not a contextual shaper, then the given context
 | 
						|
   * will be ignored.
 | 
						|
   * @param text the text to shape
 | 
						|
   * @param start the index of the first character of the text to shape
 | 
						|
   * @param count the number of characters to shape in the text
 | 
						|
   * @param context the initial context
 | 
						|
   * @throws IllegalArgumentException if the initial context is invalid
 | 
						|
   */
 | 
						|
  public void shape (char[] text, int start, int count, int context)
 | 
						|
  {
 | 
						|
    int currentContext;
 | 
						|
    if (isContextual())
 | 
						|
      {
 | 
						|
        if (Integer.bitCount(context) != 1 || (context & ~ALL_RANGES) != 0)
 | 
						|
          throw new IllegalArgumentException("invalid context argument");
 | 
						|
        // If the indicated context is not one we are handling, reset it.
 | 
						|
        if ((context & mask) == 0)
 | 
						|
          currentContext = -1;
 | 
						|
        else
 | 
						|
          currentContext = Integer.numberOfTrailingZeros(context);
 | 
						|
      }
 | 
						|
    else
 | 
						|
      currentContext = key;
 | 
						|
 | 
						|
    for (int i = 0; i < count; ++i)
 | 
						|
      {
 | 
						|
        char c = text[start + i];
 | 
						|
        if (c >= '0' && c <= '9')
 | 
						|
          {
 | 
						|
            if (currentContext >= 0)
 | 
						|
              {
 | 
						|
                // Shape into the current context.
 | 
						|
                if (c == '0'
 | 
						|
                  && ((1 << currentContext) == TAMIL
 | 
						|
                      || (1 << currentContext) == ETHIOPIC))
 | 
						|
                  {
 | 
						|
                    // No digit 0 in this context; do nothing.
 | 
						|
                  }
 | 
						|
                else
 | 
						|
                  text[start + i]
 | 
						|
                    = (char) (zeroDigits[currentContext] + c - '0');
 | 
						|
              }
 | 
						|
          }
 | 
						|
        else if (isContextual())
 | 
						|
          {
 | 
						|
            // if c is in a group, set currentContext; else reset it.
 | 
						|
            int group = classify(UnicodeBlock.of(c));
 | 
						|
            // Specially handle ARABIC.
 | 
						|
            if (group == EASTERN_ARABIC && (mask & EASTERN_ARABIC) == 0
 | 
						|
                && (mask & ARABIC) != 0)
 | 
						|
              group = ARABIC;
 | 
						|
            if ((mask & group) != 0)
 | 
						|
              {
 | 
						|
                // The character was classified as being in a group
 | 
						|
                // we recognize, and it was selected by the shaper.
 | 
						|
                // So, change the context.
 | 
						|
                currentContext = Integer.numberOfTrailingZeros(group);
 | 
						|
              }
 | 
						|
          }
 | 
						|
      }
 | 
						|
  }
 | 
						|
 | 
						|
  public boolean equals (Object obj)
 | 
						|
  {
 | 
						|
    if (! (obj instanceof NumericShaper))
 | 
						|
      return false;
 | 
						|
    NumericShaper tmp = (NumericShaper) obj;
 | 
						|
    return key == tmp.key && mask == tmp.mask;
 | 
						|
  }
 | 
						|
 | 
						|
  public int hashCode ()
 | 
						|
  {
 | 
						|
    return key ^ mask;
 | 
						|
  }
 | 
						|
 | 
						|
  public String toString ()
 | 
						|
  {
 | 
						|
    // For debugging only.
 | 
						|
    return "key=" + key + "; mask=" + mask;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Return a non-contextual shaper which can shape to a single range.
 | 
						|
   * All ASCII digits in the input text are translated to this language.
 | 
						|
   * @param singleRange the target language
 | 
						|
   * @return a non-contextual shaper for this language
 | 
						|
   * @throws IllegalArgumentException if the argument does not name a
 | 
						|
   * single language, as specified by the constants declared in this class
 | 
						|
   */
 | 
						|
  public static NumericShaper getShaper (int singleRange)
 | 
						|
  {
 | 
						|
    if (Integer.bitCount(singleRange) != 1)
 | 
						|
      throw new IllegalArgumentException("more than one bit set in argument");
 | 
						|
    if ((singleRange & ~ALL_RANGES) != 0)
 | 
						|
      throw new IllegalArgumentException("argument out of range");
 | 
						|
    return new NumericShaper(singleRange, Integer.MIN_VALUE | singleRange);
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Return a contextual shaper which can shape to any of the indicated
 | 
						|
   * languages.  The default initial context for this shaper is EUROPEAN.
 | 
						|
   * @param ranges the ranges to shape to
 | 
						|
   * @return a contextual shaper which will target any of these ranges
 | 
						|
   * @throws IllegalArgumentException if the argument specifies an
 | 
						|
   * unrecognized range
 | 
						|
   */
 | 
						|
  public static NumericShaper getContextualShaper (int ranges)
 | 
						|
  {
 | 
						|
    if ((ranges & ~ALL_RANGES) != 0)
 | 
						|
      throw new IllegalArgumentException("argument out of range");
 | 
						|
    return new NumericShaper(EUROPEAN, ranges);
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Return a contextual shaper which can shape to any of the indicated
 | 
						|
   * languages.  The default initial context for this shaper is given as
 | 
						|
   * an argument.
 | 
						|
   * @param ranges the ranges to shape to
 | 
						|
   * @param defaultContext the default initial context
 | 
						|
   * @return a contextual shaper which will target any of these ranges
 | 
						|
   * @throws IllegalArgumentException if the ranges argument specifies an
 | 
						|
   * unrecognized range, or if the defaultContext argument does not specify
 | 
						|
   * a single valid range
 | 
						|
   */
 | 
						|
  public static NumericShaper getContextualShaper (int ranges,
 | 
						|
                                                   int defaultContext)
 | 
						|
  {
 | 
						|
    if (Integer.bitCount(defaultContext) != 1)
 | 
						|
      throw new IllegalArgumentException("more than one bit set in context");
 | 
						|
    if ((ranges & ~ALL_RANGES) != 0 || (defaultContext & ~ALL_RANGES) != 0)
 | 
						|
      throw new IllegalArgumentException("argument out of range");
 | 
						|
    return new NumericShaper(defaultContext, ranges);
 | 
						|
  }
 | 
						|
}
 |