mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			1012 lines
		
	
	
		
			33 KiB
		
	
	
	
		
			Java
		
	
	
	
			
		
		
	
	
			1012 lines
		
	
	
		
			33 KiB
		
	
	
	
		
			Java
		
	
	
	
/* RuleBasedCollator.java -- Concrete Collator Class
 | 
						|
   Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005  Free Software Foundation, Inc.
 | 
						|
 | 
						|
This file is part of GNU Classpath.
 | 
						|
 | 
						|
GNU Classpath is free software; you can redistribute it and/or modify
 | 
						|
it under the terms of the GNU General Public License as published by
 | 
						|
the Free Software Foundation; either version 2, or (at your option)
 | 
						|
any later version.
 | 
						|
 | 
						|
GNU Classpath is distributed in the hope that it will be useful, but
 | 
						|
WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
General Public License for more details.
 | 
						|
 | 
						|
You should have received a copy of the GNU General Public License
 | 
						|
along with GNU Classpath; see the file COPYING.  If not, write to the
 | 
						|
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 | 
						|
02110-1301 USA.
 | 
						|
 | 
						|
Linking this library statically or dynamically with other modules is
 | 
						|
making a combined work based on this library.  Thus, the terms and
 | 
						|
conditions of the GNU General Public License cover the whole
 | 
						|
combination.
 | 
						|
 | 
						|
As a special exception, the copyright holders of this library give you
 | 
						|
permission to link this library with independent modules to produce an
 | 
						|
executable, regardless of the license terms of these independent
 | 
						|
modules, and to copy and distribute the resulting executable under
 | 
						|
terms of your choice, provided that you also meet, for each linked
 | 
						|
independent module, the terms and conditions of the license of that
 | 
						|
module.  An independent module is a module which is not derived from
 | 
						|
or based on this library.  If you modify this library, you may extend
 | 
						|
this exception to your version of the library, but you are not
 | 
						|
obligated to do so.  If you do not wish to do so, delete this
 | 
						|
exception statement from your version. */
 | 
						|
 | 
						|
 | 
						|
package java.text;
 | 
						|
 | 
						|
import gnu.classpath.NotImplementedException;
 | 
						|
 | 
						|
import java.util.ArrayList;
 | 
						|
import java.util.HashMap;
 | 
						|
 | 
						|
/* Written using "Java Class Libraries", 2nd edition, plus online
 | 
						|
 * API docs for JDK 1.2 from http://www.javasoft.com.
 | 
						|
 * Status: Believed complete and correct
 | 
						|
 */
 | 
						|
 | 
						|
/**
 | 
						|
 * This class is a concrete subclass of <code>Collator</code> suitable
 | 
						|
 * for string collation in a wide variety of languages.  An instance of
 | 
						|
 * this class is normally returned by the <code>getInstance</code> method
 | 
						|
 * of <code>Collator</code> with rules predefined for the requested
 | 
						|
 * locale.  However, an instance of this class can be created manually
 | 
						|
 * with any desired rules.
 | 
						|
 * <p>
 | 
						|
 * Rules take the form of a <code>String</code> with the following syntax
 | 
						|
 * <ul>
 | 
						|
 * <li> Modifier: '@'</li>
 | 
						|
 * <li> Relation: '<' | ';' | ',' | '=' : <text></li>
 | 
						|
 * <li> Reset: '&' : <text></li>
 | 
						|
 * </ul>
 | 
						|
 * The modifier character indicates that accents sort backward as is the
 | 
						|
 * case with French.  The modifier applies to all rules <b>after</b>
 | 
						|
 * the modifier but before the next primary sequence. If placed at the end
 | 
						|
 * of the sequence if applies to all unknown accented character.
 | 
						|
 * The relational operators specify how the text
 | 
						|
 * argument relates to the previous term.  The relation characters have
 | 
						|
 * the following meanings:
 | 
						|
 * <ul>
 | 
						|
 * <li>'<' - The text argument is greater than the prior term at the primary
 | 
						|
 * difference level.</li>
 | 
						|
 * <li>';' - The text argument is greater than the prior term at the secondary
 | 
						|
 * difference level.</li>
 | 
						|
 * <li>',' - The text argument is greater than the prior term at the tertiary
 | 
						|
 * difference level.</li>
 | 
						|
 * <li>'=' - The text argument is equal to the prior term</li>
 | 
						|
 * </ul>
 | 
						|
 * <p>
 | 
						|
 * As for the text argument itself, this is any sequence of Unicode
 | 
						|
 * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F,
 | 
						|
 * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are
 | 
						|
 * desired, they must be enclosed in single quotes.  If any whitespace is
 | 
						|
 * encountered, it is ignored.  (For example, "a b" is equal to "ab").
 | 
						|
 * <p>
 | 
						|
 * The reset operation inserts the following rule at the point where the
 | 
						|
 * text argument to it exists in the previously declared rule string.  This
 | 
						|
 * makes it easy to add new rules to an existing string by simply including
 | 
						|
 * them in a reset sequence at the end.  Note that the text argument, or
 | 
						|
 * at least the first character of it, must be present somewhere in the
 | 
						|
 * previously declared rules in order to be inserted properly.  If this
 | 
						|
 * is not satisfied, a <code>ParseException</code> will be thrown.
 | 
						|
 * <p>
 | 
						|
 * This system of configuring <code>RuleBasedCollator</code> is needlessly
 | 
						|
 * complex and the people at Taligent who developed it (along with the folks
 | 
						|
 * at Sun who accepted it into the Java standard library) deserve a slow
 | 
						|
 * and agonizing death.
 | 
						|
 * <p>
 | 
						|
 * Here are a couple of example of rule strings:
 | 
						|
 * <p>
 | 
						|
 * "< a < b < c" - This string says that a is greater than b which is
 | 
						|
 * greater than c, with all differences being primary differences.
 | 
						|
 * <p>
 | 
						|
 * "< a,A < b,B < c,C" - This string says that 'A' is greater than 'a' with
 | 
						|
 * a tertiary strength comparison.  Both 'b' and 'B' are greater than 'a' and
 | 
						|
 * 'A' during a primary strength comparison.  But 'B' is greater than 'b'
 | 
						|
 * under a tertiary strength comparison.
 | 
						|
 * <p>
 | 
						|
 * "< a < c & a < b " - This sequence is identical in function to the
 | 
						|
 * "< a < b < c" rule string above.  The '&' reset symbol indicates that
 | 
						|
 * the rule "< b" is to be inserted after the text argument "a" in the
 | 
						|
 * previous rule string segment.
 | 
						|
 * <p>
 | 
						|
 * "< a < b & y < z" - This is an error.  The character 'y' does not appear
 | 
						|
 * anywhere in the previous rule string segment so the rule following the
 | 
						|
 * reset rule cannot be inserted.
 | 
						|
 * <p>
 | 
						|
 * "< a & A @ < e & E < f& F" - This sequence is equivalent to the following
 | 
						|
 * "< a & A < E & e < f & F".
 | 
						|
 * <p>
 | 
						|
 * For a description of the various comparison strength types, see the
 | 
						|
 * documentation for the <code>Collator</code> class.
 | 
						|
 * <p>
 | 
						|
 * As an additional complication to this already overly complex rule scheme,
 | 
						|
 * if any characters precede the first rule, these characters are considered
 | 
						|
 * ignorable.  They will be treated as if they did not exist during
 | 
						|
 * comparisons.  For example, "- < a < b ..." would make '-' an ignorable
 | 
						|
 * character such that the strings "high-tech" and "hightech" would
 | 
						|
 * be considered identical.
 | 
						|
 * <p>
 | 
						|
 * A <code>ParseException</code> will be thrown for any of the following
 | 
						|
 * conditions:
 | 
						|
 * <ul>
 | 
						|
 * <li>Unquoted punctuation characters in a text argument.</li>
 | 
						|
 * <li>A relational or reset operator not followed by a text argument</li>
 | 
						|
 * <li>A reset operator where the text argument is not present in
 | 
						|
 * the previous rule string section.</li>
 | 
						|
 * </ul>
 | 
						|
 *
 | 
						|
 * @author Aaron M. Renn (arenn@urbanophile.com)
 | 
						|
 * @author Tom Tromey (tromey@cygnus.com)
 | 
						|
 * @author Guilhem Lavaux (guilhem@kaffe.org)
 | 
						|
 */
 | 
						|
public class RuleBasedCollator extends Collator
 | 
						|
{
 | 
						|
  /**
 | 
						|
   * This class describes what rank has a character (or a sequence of characters)
 | 
						|
   * in the lexicographic order. Each element in a rule has a collation element.
 | 
						|
   */
 | 
						|
  static final class CollationElement
 | 
						|
  {
 | 
						|
    final String key;
 | 
						|
    final int primary;
 | 
						|
    final short secondary;
 | 
						|
    final short tertiary;
 | 
						|
    final short equality;
 | 
						|
    final boolean ignore;
 | 
						|
    final String expansion;
 | 
						|
 | 
						|
    CollationElement(String key, int primary, short secondary, short tertiary,
 | 
						|
                     short equality, String expansion, boolean ignore)
 | 
						|
    {
 | 
						|
      this.key = key;
 | 
						|
      this.primary = primary;
 | 
						|
      this.secondary = secondary;
 | 
						|
      this.tertiary = tertiary;
 | 
						|
      this.equality = equality;
 | 
						|
      this.ignore = ignore;
 | 
						|
      this.expansion = expansion;
 | 
						|
    }
 | 
						|
 | 
						|
    int getValue()
 | 
						|
    {
 | 
						|
      return (primary << 16) + (secondary << 8) + tertiary;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Basic collation instruction (internal format) to build the series of
 | 
						|
   * collation elements. It contains an instruction which specifies the new
 | 
						|
   * state of the generator. The sequence of instruction should not contain
 | 
						|
   * RESET (it is used by
 | 
						|
   * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)})
 | 
						|
   * as a temporary state while merging two sets of instructions.
 | 
						|
   */
 | 
						|
  private static final class CollationSorter
 | 
						|
  {
 | 
						|
    static final int GREATERP = 0;
 | 
						|
    static final int GREATERS = 1;
 | 
						|
    static final int GREATERT = 2;
 | 
						|
    static final int EQUAL = 3;
 | 
						|
    static final int RESET = 4;
 | 
						|
    static final int INVERSE_SECONDARY = 5;
 | 
						|
 | 
						|
    final int comparisonType;
 | 
						|
    final String textElement;
 | 
						|
    final int hashText;
 | 
						|
    final int offset;
 | 
						|
    final boolean ignore;
 | 
						|
 | 
						|
    String expansionOrdering;
 | 
						|
 | 
						|
    private CollationSorter(final int comparisonType, final String textElement,
 | 
						|
                            final int offset, final boolean ignore)
 | 
						|
    {
 | 
						|
      this.comparisonType = comparisonType;
 | 
						|
      this.textElement = textElement;
 | 
						|
      this.offset = offset;
 | 
						|
      this.ignore = ignore;
 | 
						|
      hashText = textElement.hashCode();
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This is the original rule string.
 | 
						|
   */
 | 
						|
  private String rules;
 | 
						|
 | 
						|
  /**
 | 
						|
   * This is the table of collation element values
 | 
						|
   */
 | 
						|
  private CollationElement[] ce_table;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Quick-prefix finder.
 | 
						|
   */
 | 
						|
  HashMap<String,CollationElement> prefix_tree;
 | 
						|
 | 
						|
  /**
 | 
						|
   * This is the value of the last sequence entered into
 | 
						|
   * <code>ce_table</code>. It is used to compute the
 | 
						|
   * ordering value of unspecified character.
 | 
						|
   */
 | 
						|
  private int last_primary_value;
 | 
						|
 | 
						|
  /**
 | 
						|
   * This is the value of the last secondary sequence of the
 | 
						|
   * primary 0, entered into
 | 
						|
   * <code>ce_table</code>. It is used to compute the
 | 
						|
   * ordering value of an unspecified accented character.
 | 
						|
   */
 | 
						|
  private int last_tertiary_value;
 | 
						|
 | 
						|
  /**
 | 
						|
   * This variable is true if accents need to be sorted
 | 
						|
   * in the other direction.
 | 
						|
   */
 | 
						|
  private boolean inverseAccentComparison;
 | 
						|
 | 
						|
  /**
 | 
						|
   * This collation element is special to unknown sequence.
 | 
						|
   * The JDK uses it to mark and sort the characters which has
 | 
						|
   * no collation rules.
 | 
						|
   */
 | 
						|
  static final CollationElement SPECIAL_UNKNOWN_SEQ =
 | 
						|
    new CollationElement("", (short) 32767, (short) 0, (short) 0,
 | 
						|
                         (short) 0, null, false);
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method initializes a new instance of <code>RuleBasedCollator</code>
 | 
						|
   * with the specified collation rules.  Note that an application normally
 | 
						|
   * obtains an instance of <code>RuleBasedCollator</code> by calling the
 | 
						|
   * <code>getInstance</code> method of <code>Collator</code>.  That method
 | 
						|
   * automatically loads the proper set of rules for the desired locale.
 | 
						|
   *
 | 
						|
   * @param rules The collation rule string.
 | 
						|
   *
 | 
						|
   * @exception ParseException If the rule string contains syntax errors.
 | 
						|
   */
 | 
						|
  public RuleBasedCollator(String rules) throws ParseException
 | 
						|
  {
 | 
						|
    if (rules.equals(""))
 | 
						|
      throw new ParseException("empty rule set", 0);
 | 
						|
 | 
						|
    this.rules = rules;
 | 
						|
 | 
						|
    buildCollationVector(parseString(rules));
 | 
						|
    buildPrefixAccess();
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method returns the number of common characters at the beginning
 | 
						|
   * of the string of the two parameters.
 | 
						|
   *
 | 
						|
   * @param prefix A string considered as a prefix to test against
 | 
						|
   * the other string.
 | 
						|
   * @param s A string to test the prefix against.
 | 
						|
   * @return The number of common characters.
 | 
						|
   */
 | 
						|
  static int findPrefixLength(String prefix, String s)
 | 
						|
  {
 | 
						|
    int index;
 | 
						|
    int len = prefix.length();
 | 
						|
 | 
						|
    for (index = 0; index < len && index < s.length(); ++index)
 | 
						|
      {
 | 
						|
        if (prefix.charAt(index) != s.charAt(index))
 | 
						|
          return index;
 | 
						|
      }
 | 
						|
 | 
						|
 | 
						|
    return index;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods
 | 
						|
   * checks whether it is possible to find an anchor point for the rules to be merged and
 | 
						|
   * then insert them at that precise point.
 | 
						|
   *
 | 
						|
   * @param offset Offset in the string containing rules of the beginning of the rules
 | 
						|
   * being merged in.
 | 
						|
   * @param starter Text of the rules being merged.
 | 
						|
   * @param main Repository of all already parsed rules.
 | 
						|
   * @param patch Rules to be merged into the repository.
 | 
						|
   * @throws ParseException if it is impossible to find an anchor point for the new rules.
 | 
						|
   */
 | 
						|
  private void mergeRules(int offset, String starter, ArrayList<CollationSorter> main,
 | 
						|
                          ArrayList<CollationSorter> patch)
 | 
						|
    throws ParseException
 | 
						|
  {
 | 
						|
    int insertion_point = -1;
 | 
						|
    int max_length = 0;
 | 
						|
 | 
						|
    /* We must check that no rules conflict with another already present. If it
 | 
						|
     * is the case delete the old rule.
 | 
						|
     */
 | 
						|
 | 
						|
    /* For the moment good old O(N^2) algorithm.
 | 
						|
     */
 | 
						|
    for (int i = 0; i < patch.size(); i++)
 | 
						|
      {
 | 
						|
        int j = 0;
 | 
						|
 | 
						|
        while (j < main.size())
 | 
						|
          {
 | 
						|
            CollationSorter rule1 = patch.get(i);
 | 
						|
            CollationSorter rule2 = main.get(j);
 | 
						|
 | 
						|
            if (rule1.textElement.equals(rule2.textElement))
 | 
						|
              main.remove(j);
 | 
						|
            else
 | 
						|
              j++;
 | 
						|
          }
 | 
						|
      }
 | 
						|
 | 
						|
    // Find the insertion point... O(N)
 | 
						|
    for (int i = 0; i < main.size(); i++)
 | 
						|
      {
 | 
						|
        CollationSorter sorter = main.get(i);
 | 
						|
        int length = findPrefixLength(starter, sorter.textElement);
 | 
						|
 | 
						|
        if (length > max_length)
 | 
						|
          {
 | 
						|
            max_length = length;
 | 
						|
            insertion_point = i+1;
 | 
						|
          }
 | 
						|
      }
 | 
						|
 | 
						|
    if (insertion_point < 0)
 | 
						|
      throw new ParseException("no insertion point found for " + starter, offset);
 | 
						|
 | 
						|
    if (max_length < starter.length())
 | 
						|
      {
 | 
						|
        /*
 | 
						|
         * We need to expand the first entry. It must be sorted
 | 
						|
         * like if it was the reference key itself (like the spec
 | 
						|
         * said. So the first entry is special: the element is
 | 
						|
         * replaced by the specified text element for the sorting.
 | 
						|
         * This text replace the old one for comparisons. However
 | 
						|
         * to preserve the behaviour we replace the first key (corresponding
 | 
						|
         * to the found prefix) by a new code rightly ordered in the
 | 
						|
         * sequence. The rest of the subsequence must be appended
 | 
						|
         * to the end of the sequence.
 | 
						|
         */
 | 
						|
        CollationSorter sorter = patch.get(0);
 | 
						|
 | 
						|
        sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
 | 
						|
 | 
						|
        main.add(insertion_point, sorter);
 | 
						|
 | 
						|
        /*
 | 
						|
         * This is a new set of rules. Append to the list.
 | 
						|
         */
 | 
						|
        patch.remove(0);
 | 
						|
        insertion_point++;
 | 
						|
      }
 | 
						|
 | 
						|
    // Now insert all elements of patch at the insertion point.
 | 
						|
    for (int i = 0; i < patch.size(); i++)
 | 
						|
      main.add(i+insertion_point, patch.get(i));
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method parses a string and build a set of sorting instructions. The parsing
 | 
						|
   * may only be partial on the case the rules are to be merged sometime later.
 | 
						|
   *
 | 
						|
   * @param stop_on_reset If this parameter is true then the parser stops when it
 | 
						|
   * encounters a reset instruction. In the other case, it tries to parse the subrules
 | 
						|
   * and merged it in the same repository.
 | 
						|
   * @param v Output vector for the set of instructions.
 | 
						|
   * @param base_offset Offset in the string to begin parsing.
 | 
						|
   * @param rules Rules to be parsed.
 | 
						|
   * @return -1 if the parser reached the end of the string, an integer representing the
 | 
						|
   * offset in the string at which it stopped parsing.
 | 
						|
   * @throws ParseException if something turned wrong during the parsing. To get details
 | 
						|
   * decode the message.
 | 
						|
   */
 | 
						|
  private int subParseString(boolean stop_on_reset, ArrayList<CollationSorter> v,
 | 
						|
                             int base_offset, String rules)
 | 
						|
    throws ParseException
 | 
						|
  {
 | 
						|
    boolean ignoreChars = (base_offset == 0);
 | 
						|
    int operator = -1;
 | 
						|
    StringBuilder sb = new StringBuilder();
 | 
						|
    boolean doubleQuote = false;
 | 
						|
    boolean eatingChars = false;
 | 
						|
    boolean nextIsModifier = false;
 | 
						|
    boolean isModifier = false;
 | 
						|
    int i;
 | 
						|
 | 
						|
main_parse_loop:
 | 
						|
    for (i = 0; i < rules.length(); i++)
 | 
						|
      {
 | 
						|
        char c = rules.charAt(i);
 | 
						|
        int type = -1;
 | 
						|
 | 
						|
        if (!eatingChars &&
 | 
						|
            ((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
 | 
						|
              continue;
 | 
						|
 | 
						|
        isModifier = nextIsModifier;
 | 
						|
        nextIsModifier = false;
 | 
						|
 | 
						|
        if (eatingChars && c != '\'')
 | 
						|
          {
 | 
						|
            doubleQuote = false;
 | 
						|
            sb.append(c);
 | 
						|
            continue;
 | 
						|
          }
 | 
						|
        if (doubleQuote && eatingChars)
 | 
						|
          {
 | 
						|
            sb.append(c);
 | 
						|
            doubleQuote = false;
 | 
						|
            continue;
 | 
						|
          }
 | 
						|
 | 
						|
        switch (c)
 | 
						|
          {
 | 
						|
          case '!':
 | 
						|
            throw new ParseException
 | 
						|
              ("Modifier '!' is not yet supported by Classpath", i + base_offset);
 | 
						|
          case '<':
 | 
						|
            type = CollationSorter.GREATERP;
 | 
						|
            break;
 | 
						|
          case ';':
 | 
						|
            type = CollationSorter.GREATERS;
 | 
						|
            break;
 | 
						|
          case ',':
 | 
						|
            type = CollationSorter.GREATERT;
 | 
						|
            break;
 | 
						|
          case '=':
 | 
						|
            type = CollationSorter.EQUAL;
 | 
						|
            break;
 | 
						|
          case '\'':
 | 
						|
            eatingChars = !eatingChars;
 | 
						|
            doubleQuote = true;
 | 
						|
            break;
 | 
						|
          case '@':
 | 
						|
            if (ignoreChars)
 | 
						|
              throw new ParseException
 | 
						|
                ("comparison list has not yet been started. You may only use"
 | 
						|
                 + "(<,;=&)", i + base_offset);
 | 
						|
            // Inverse the order of secondaries from now on.
 | 
						|
            nextIsModifier = true;
 | 
						|
            type = CollationSorter.INVERSE_SECONDARY;
 | 
						|
            break;
 | 
						|
          case '&':
 | 
						|
            type = CollationSorter.RESET;
 | 
						|
            if (stop_on_reset)
 | 
						|
              break main_parse_loop;
 | 
						|
            break;
 | 
						|
          default:
 | 
						|
            if (operator < 0)
 | 
						|
              throw new ParseException
 | 
						|
                ("operator missing at " + (i + base_offset), i + base_offset);
 | 
						|
            if (! eatingChars
 | 
						|
                && ((c >= 0x21 && c <= 0x2F)
 | 
						|
                    || (c >= 0x3A && c <= 0x40)
 | 
						|
                    || (c >= 0x5B && c <= 0x60)
 | 
						|
                    || (c >= 0x7B && c <= 0x7E)))
 | 
						|
              throw new ParseException
 | 
						|
                ("unquoted punctuation character '" + c + "'", i + base_offset);
 | 
						|
 | 
						|
            //type = ignoreChars ? CollationSorter.IGNORE : -1;
 | 
						|
            sb.append(c);
 | 
						|
            break;
 | 
						|
          }
 | 
						|
 | 
						|
        if (type  < 0)
 | 
						|
          continue;
 | 
						|
 | 
						|
        if (operator < 0)
 | 
						|
          {
 | 
						|
            operator = type;
 | 
						|
            continue;
 | 
						|
          }
 | 
						|
 | 
						|
        if (sb.length() == 0 && !isModifier)
 | 
						|
          throw new ParseException
 | 
						|
            ("text element empty at " + (i+base_offset), i+base_offset);
 | 
						|
 | 
						|
        if (operator == CollationSorter.RESET)
 | 
						|
          {
 | 
						|
            /* Reposition in the sorting list at the position
 | 
						|
             * indicated by the text element.
 | 
						|
             */
 | 
						|
            String subrules = rules.substring(i);
 | 
						|
            ArrayList<CollationSorter> sorted_rules = new ArrayList<CollationSorter>();
 | 
						|
            int idx;
 | 
						|
 | 
						|
            // Parse the subrules but do not iterate through all
 | 
						|
            // sublist. This is the privilege of the first call.
 | 
						|
            idx = subParseString(true, sorted_rules, base_offset+i, subrules);
 | 
						|
 | 
						|
            // Merge new parsed rules into the list.
 | 
						|
            mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
 | 
						|
            sb.setLength(0);
 | 
						|
 | 
						|
            // Reset state to none.
 | 
						|
            operator = -1;
 | 
						|
            type = -1;
 | 
						|
            // We have found a new subrule at 'idx' but it has not been parsed.
 | 
						|
            if (idx >= 0)
 | 
						|
              {
 | 
						|
                i += idx-1;
 | 
						|
                continue main_parse_loop;
 | 
						|
              }
 | 
						|
            else
 | 
						|
                // No more rules.
 | 
						|
                break main_parse_loop;
 | 
						|
          }
 | 
						|
 | 
						|
        String textElement = sb.toString();
 | 
						|
        if (operator == CollationSorter.GREATERP)
 | 
						|
          ignoreChars = false;
 | 
						|
        CollationSorter sorter = new CollationSorter(operator, textElement,
 | 
						|
                                                     base_offset + rules.length(),
 | 
						|
                                                     ignoreChars);
 | 
						|
        sb.setLength(0);
 | 
						|
 | 
						|
        v.add(sorter);
 | 
						|
        operator = type;
 | 
						|
      }
 | 
						|
 | 
						|
    if (operator >= 0)
 | 
						|
      {
 | 
						|
        int pos = rules.length() + base_offset;
 | 
						|
 | 
						|
        if ((sb.length() != 0 && nextIsModifier)
 | 
						|
            || (sb.length() == 0 && !nextIsModifier && !eatingChars))
 | 
						|
          throw new ParseException("text element empty at " + pos, pos);
 | 
						|
 | 
						|
        if (operator == CollationSorter.GREATERP)
 | 
						|
          ignoreChars = false;
 | 
						|
 | 
						|
        CollationSorter sorter = new CollationSorter(operator, sb.toString(),
 | 
						|
                                                     base_offset+pos, ignoreChars);
 | 
						|
        v.add(sorter);
 | 
						|
      }
 | 
						|
 | 
						|
    if (i == rules.length())
 | 
						|
      return -1;
 | 
						|
    else
 | 
						|
      return i;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method creates a copy of this object.
 | 
						|
   *
 | 
						|
   * @return A copy of this object.
 | 
						|
   */
 | 
						|
  public Object clone()
 | 
						|
  {
 | 
						|
    return super.clone();
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method completely parses a string 'rules' containing sorting rules.
 | 
						|
   *
 | 
						|
   * @param rules String containing the rules to be parsed.
 | 
						|
   * @return A set of sorting instructions stored in a Vector.
 | 
						|
   * @throws ParseException if something turned wrong during the parsing. To get details
 | 
						|
   * decode the message.
 | 
						|
   */
 | 
						|
  private ArrayList<CollationSorter> parseString(String rules)
 | 
						|
    throws ParseException
 | 
						|
  {
 | 
						|
    ArrayList<CollationSorter> v = new ArrayList<CollationSorter>();
 | 
						|
 | 
						|
    // result of the first subParseString is not absolute (may be -1 or a
 | 
						|
    // positive integer). But we do not care.
 | 
						|
    subParseString(false, v, 0, rules);
 | 
						|
 | 
						|
    return v;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method uses the sorting instructions built by {@link #parseString}
 | 
						|
   * to build collation elements which can be directly used to sort strings.
 | 
						|
   *
 | 
						|
   * @param parsedElements Parsed instructions stored in a ArrayList.
 | 
						|
   * @throws ParseException if the order of the instructions are not valid.
 | 
						|
   */
 | 
						|
  private void buildCollationVector(ArrayList<CollationSorter> parsedElements)
 | 
						|
    throws ParseException
 | 
						|
  {
 | 
						|
    int primary_seq = 0;
 | 
						|
    int last_tertiary_seq = 0;
 | 
						|
    short secondary_seq = 0;
 | 
						|
    short tertiary_seq = 0;
 | 
						|
    short equality_seq = 0;
 | 
						|
    boolean inverseComparisons = false;
 | 
						|
    final boolean DECREASING = false;
 | 
						|
    final boolean INCREASING = true;
 | 
						|
    boolean secondaryType = INCREASING;
 | 
						|
    ArrayList<CollationElement> v = new ArrayList<CollationElement>();
 | 
						|
 | 
						|
    // elts is completely sorted.
 | 
						|
element_loop:
 | 
						|
    for (int i = 0; i < parsedElements.size(); i++)
 | 
						|
      {
 | 
						|
        CollationSorter elt = parsedElements.get(i);
 | 
						|
 | 
						|
        switch (elt.comparisonType)
 | 
						|
          {
 | 
						|
          case CollationSorter.GREATERP:
 | 
						|
            primary_seq++;
 | 
						|
            if (inverseComparisons)
 | 
						|
              {
 | 
						|
                secondary_seq = Short.MAX_VALUE;
 | 
						|
                secondaryType = DECREASING;
 | 
						|
              }
 | 
						|
            else
 | 
						|
              {
 | 
						|
                secondary_seq = 0;
 | 
						|
                secondaryType = INCREASING;
 | 
						|
              }
 | 
						|
            tertiary_seq = 0;
 | 
						|
            equality_seq = 0;
 | 
						|
            inverseComparisons = false;
 | 
						|
            break;
 | 
						|
          case CollationSorter.GREATERS:
 | 
						|
            if (secondaryType == DECREASING)
 | 
						|
              secondary_seq--;
 | 
						|
            else
 | 
						|
              secondary_seq++;
 | 
						|
            tertiary_seq = 0;
 | 
						|
            equality_seq = 0;
 | 
						|
            break;
 | 
						|
          case CollationSorter.INVERSE_SECONDARY:
 | 
						|
            inverseComparisons = true;
 | 
						|
            continue element_loop;
 | 
						|
          case CollationSorter.GREATERT:
 | 
						|
            tertiary_seq++;
 | 
						|
            if (primary_seq == 0)
 | 
						|
              last_tertiary_seq = tertiary_seq;
 | 
						|
            equality_seq = 0;
 | 
						|
            break;
 | 
						|
          case CollationSorter.EQUAL:
 | 
						|
            equality_seq++;
 | 
						|
            break;
 | 
						|
          case CollationSorter.RESET:
 | 
						|
            throw new ParseException
 | 
						|
              ("Invalid reached state 'RESET'. Internal error", elt.offset);
 | 
						|
          default:
 | 
						|
            throw new ParseException
 | 
						|
              ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);
 | 
						|
          }
 | 
						|
 | 
						|
        v.add(new CollationElement(elt.textElement, primary_seq,
 | 
						|
                                   secondary_seq, tertiary_seq,
 | 
						|
                                   equality_seq, elt.expansionOrdering, elt.ignore));
 | 
						|
      }
 | 
						|
 | 
						|
    this.inverseAccentComparison = inverseComparisons;
 | 
						|
 | 
						|
    ce_table = v.toArray(new CollationElement[v.size()]);
 | 
						|
 | 
						|
    last_primary_value = primary_seq+1;
 | 
						|
    last_tertiary_value = last_tertiary_seq+1;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Build a tree where all keys are the texts of collation elements and data is
 | 
						|
   * the collation element itself. The tree is used when extracting all prefix
 | 
						|
   * for a given text.
 | 
						|
   */
 | 
						|
  private void buildPrefixAccess()
 | 
						|
  {
 | 
						|
    prefix_tree = new HashMap<String,CollationElement>();
 | 
						|
 | 
						|
    for (int i = 0; i < ce_table.length; i++)
 | 
						|
      {
 | 
						|
        CollationElement e = ce_table[i];
 | 
						|
 | 
						|
        prefix_tree.put(e.key, e);
 | 
						|
      }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method returns an integer which indicates whether the first
 | 
						|
   * specified <code>String</code> is less than, greater than, or equal to
 | 
						|
   * the second.  The value depends not only on the collation rules in
 | 
						|
   * effect, but also the strength and decomposition settings of this object.
 | 
						|
   *
 | 
						|
   * @param source The first <code>String</code> to compare.
 | 
						|
   * @param target A second <code>String</code> to compare to the first.
 | 
						|
   *
 | 
						|
   * @return A negative integer if source < target, a positive integer
 | 
						|
   * if source > target, or 0 if source == target.
 | 
						|
   */
 | 
						|
  public int compare(String source, String target)
 | 
						|
  {
 | 
						|
    CollationElementIterator cs, ct;
 | 
						|
    CollationElement ord1block = null;
 | 
						|
    CollationElement ord2block = null;
 | 
						|
    boolean advance_block_1 = true;
 | 
						|
    boolean advance_block_2 = true;
 | 
						|
 | 
						|
    cs = getCollationElementIterator(source);
 | 
						|
    ct = getCollationElementIterator(target);
 | 
						|
 | 
						|
    for(;;)
 | 
						|
      {
 | 
						|
        int ord1;
 | 
						|
        int ord2;
 | 
						|
 | 
						|
        /*
 | 
						|
         * We have to check whether the characters are ignorable.
 | 
						|
         * If it is the case then forget them.
 | 
						|
         */
 | 
						|
        if (advance_block_1)
 | 
						|
          {
 | 
						|
            ord1block = cs.nextBlock();
 | 
						|
            if (ord1block != null && ord1block.ignore)
 | 
						|
              continue;
 | 
						|
          }
 | 
						|
 | 
						|
        if (advance_block_2)
 | 
						|
          {
 | 
						|
            ord2block = ct.nextBlock();
 | 
						|
            if (ord2block != null && ord2block.ignore)
 | 
						|
              {
 | 
						|
                advance_block_1 = false;
 | 
						|
                continue;
 | 
						|
              }
 | 
						|
         }
 | 
						|
        else
 | 
						|
          advance_block_2 = true;
 | 
						|
 | 
						|
        if (!advance_block_1)
 | 
						|
          advance_block_1 = true;
 | 
						|
 | 
						|
        if (ord1block != null)
 | 
						|
          ord1 = ord1block.getValue();
 | 
						|
        else
 | 
						|
          {
 | 
						|
            if (ord2block == null)
 | 
						|
              return 0;
 | 
						|
            return -1;
 | 
						|
          }
 | 
						|
 | 
						|
        if (ord2block == null)
 | 
						|
          return 1;
 | 
						|
 | 
						|
        ord2 = ord2block.getValue();
 | 
						|
 | 
						|
        // We know chars are totally equal, so skip
 | 
						|
        if (ord1 == ord2)
 | 
						|
          {
 | 
						|
            if (getStrength() == IDENTICAL)
 | 
						|
              if (!ord1block.key.equals(ord2block.key))
 | 
						|
                return ord1block.key.compareTo(ord2block.key);
 | 
						|
            continue;
 | 
						|
          }
 | 
						|
 | 
						|
        // Check for primary strength differences
 | 
						|
        int prim1 = CollationElementIterator.primaryOrder(ord1);
 | 
						|
        int prim2 = CollationElementIterator.primaryOrder(ord2);
 | 
						|
 | 
						|
        if (prim1 == 0 && getStrength() < TERTIARY)
 | 
						|
          {
 | 
						|
            advance_block_2 = false;
 | 
						|
            continue;
 | 
						|
          }
 | 
						|
        else if (prim2 == 0 && getStrength() < TERTIARY)
 | 
						|
          {
 | 
						|
            advance_block_1 = false;
 | 
						|
            continue;
 | 
						|
          }
 | 
						|
 | 
						|
        if (prim1 < prim2)
 | 
						|
          return -1;
 | 
						|
        else if (prim1 > prim2)
 | 
						|
          return 1;
 | 
						|
        else if (getStrength() == PRIMARY)
 | 
						|
          continue;
 | 
						|
 | 
						|
        // Check for secondary strength differences
 | 
						|
        int sec1 = CollationElementIterator.secondaryOrder(ord1);
 | 
						|
        int sec2 = CollationElementIterator.secondaryOrder(ord2);
 | 
						|
 | 
						|
        if (sec1 < sec2)
 | 
						|
          return -1;
 | 
						|
        else if (sec1 > sec2)
 | 
						|
          return 1;
 | 
						|
        else if (getStrength() == SECONDARY)
 | 
						|
          continue;
 | 
						|
 | 
						|
        // Check for tertiary differences
 | 
						|
        int tert1 = CollationElementIterator.tertiaryOrder(ord1);
 | 
						|
        int tert2 = CollationElementIterator.tertiaryOrder(ord2);
 | 
						|
 | 
						|
        if (tert1 < tert2)
 | 
						|
          return -1;
 | 
						|
        else if (tert1 > tert2)
 | 
						|
          return 1;
 | 
						|
        else if (getStrength() == TERTIARY)
 | 
						|
          continue;
 | 
						|
 | 
						|
        // Apparently JDK does this (at least for my test case).
 | 
						|
        return ord1block.key.compareTo(ord2block.key);
 | 
						|
      }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method tests this object for equality against the specified
 | 
						|
   * object.  This will be true if and only if the specified object is
 | 
						|
   * another reference to this object.
 | 
						|
   *
 | 
						|
   * @param obj The <code>Object</code> to compare against this object.
 | 
						|
   *
 | 
						|
   * @return <code>true</code> if the specified object is equal to this object,
 | 
						|
   * <code>false</code> otherwise.
 | 
						|
   */
 | 
						|
  public boolean equals(Object obj)
 | 
						|
  {
 | 
						|
    if (obj == this)
 | 
						|
      return true;
 | 
						|
    else
 | 
						|
      return false;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method builds a default collation element without invoking
 | 
						|
   * the database created from the rules passed to the constructor.
 | 
						|
   *
 | 
						|
   * @param c Character which needs a collation element.
 | 
						|
   * @return A valid brand new CollationElement instance.
 | 
						|
   */
 | 
						|
  CollationElement getDefaultElement(char c)
 | 
						|
  {
 | 
						|
    int v;
 | 
						|
 | 
						|
    // Preliminary support for generic accent sorting inversion (I don't know if all
 | 
						|
    // characters in the range should be sorted backward). This is the place
 | 
						|
    // to fix this if needed.
 | 
						|
    if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
 | 
						|
      v = 0x0361 - ((int) c - 0x02B9);
 | 
						|
    else
 | 
						|
      v = (short) c;
 | 
						|
    return new CollationElement("" + c, last_primary_value + v,
 | 
						|
                                (short) 0, (short) 0, (short) 0, null, false);
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method builds a default collation element for an accented character
 | 
						|
   * without invoking the database created from the rules passed to the constructor.
 | 
						|
   *
 | 
						|
   * @param c Character which needs a collation element.
 | 
						|
   * @return A valid brand new CollationElement instance.
 | 
						|
   */
 | 
						|
  CollationElement getDefaultAccentedElement(char c)
 | 
						|
  {
 | 
						|
    int v;
 | 
						|
 | 
						|
    // Preliminary support for generic accent sorting inversion (I don't know if all
 | 
						|
    // characters in the range should be sorted backward). This is the place
 | 
						|
    // to fix this if needed.
 | 
						|
    if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
 | 
						|
      v = 0x0361 - ((int) c - 0x02B9);
 | 
						|
    else
 | 
						|
      v = (short) c;
 | 
						|
    return new CollationElement("" + c, (short) 0,
 | 
						|
                                (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false);
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method returns an instance for <code>CollationElementIterator</code>
 | 
						|
   * for the specified <code>String</code> under the collation rules for this
 | 
						|
   * object.
 | 
						|
   *
 | 
						|
   * @param source The <code>String</code> to return the
 | 
						|
   * <code>CollationElementIterator</code> instance for.
 | 
						|
   *
 | 
						|
   * @return A <code>CollationElementIterator</code> for the specified
 | 
						|
   * <code>String</code>.
 | 
						|
   */
 | 
						|
  public CollationElementIterator getCollationElementIterator(String source)
 | 
						|
  {
 | 
						|
    return new CollationElementIterator(this, source);
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method returns an instance of <code>CollationElementIterator</code>
 | 
						|
   * for the <code>String</code> represented by the specified
 | 
						|
   * <code>CharacterIterator</code>.
 | 
						|
   *
 | 
						|
   * @param source The <code>CharacterIterator</code> with the desired <code>String</code>.
 | 
						|
   *
 | 
						|
   * @return A <code>CollationElementIterator</code> for the specified <code>String</code>.
 | 
						|
   */
 | 
						|
  public CollationElementIterator getCollationElementIterator(CharacterIterator source)
 | 
						|
  {
 | 
						|
    return new CollationElementIterator(this, source);
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method returns an instance of <code>CollationKey</code> for the
 | 
						|
   * specified <code>String</code>.  The object returned will have a
 | 
						|
   * more efficient mechanism for its comparison function that could
 | 
						|
   * provide speed benefits if multiple comparisons are performed, such
 | 
						|
   * as during a sort.
 | 
						|
   *
 | 
						|
   * @param source The <code>String</code> to create a <code>CollationKey</code> for.
 | 
						|
   *
 | 
						|
   * @return A <code>CollationKey</code> for the specified <code>String</code>.
 | 
						|
   */
 | 
						|
  public CollationKey getCollationKey(String source)
 | 
						|
  {
 | 
						|
    CollationElementIterator cei = getCollationElementIterator(source);
 | 
						|
    ArrayList<Integer> vect = new ArrayList<Integer>();
 | 
						|
 | 
						|
    int ord = cei.next();
 | 
						|
    cei.reset(); //set to start of string
 | 
						|
 | 
						|
    while (ord != CollationElementIterator.NULLORDER)
 | 
						|
      {
 | 
						|
        // If the primary order is null, it means this is an ignorable
 | 
						|
        // character.
 | 
						|
        if (CollationElementIterator.primaryOrder(ord) == 0)
 | 
						|
          {
 | 
						|
            ord = cei.next();
 | 
						|
            continue;
 | 
						|
          }
 | 
						|
        switch (getStrength())
 | 
						|
          {
 | 
						|
            case PRIMARY:
 | 
						|
              ord = CollationElementIterator.primaryOrder(ord);
 | 
						|
              break;
 | 
						|
 | 
						|
            case SECONDARY:
 | 
						|
              ord = CollationElementIterator.primaryOrder(ord) << 8;
 | 
						|
              ord |= CollationElementIterator.secondaryOrder(ord);
 | 
						|
 | 
						|
            default:
 | 
						|
               break;
 | 
						|
          }
 | 
						|
 | 
						|
        vect.add(Integer.valueOf(ord));
 | 
						|
        ord = cei.next(); //increment to next key
 | 
						|
      }
 | 
						|
 | 
						|
    Integer[] objarr = vect.toArray(new Integer[vect.size()]);
 | 
						|
    byte[] key = new byte[objarr.length * 4];
 | 
						|
 | 
						|
    for (int i = 0; i < objarr.length; i++)
 | 
						|
      {
 | 
						|
        int j = objarr[i].intValue();
 | 
						|
        key [i * 4] = (byte) ((j & 0xFF000000) >> 24);
 | 
						|
        key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);
 | 
						|
        key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);
 | 
						|
        key [i * 4 + 3] = (byte) (j & 0x000000FF);
 | 
						|
      }
 | 
						|
 | 
						|
    return new CollationKey(this, source, key);
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method returns a <code>String</code> containing the collation rules
 | 
						|
   * for this object.
 | 
						|
   *
 | 
						|
   * @return The collation rules for this object.
 | 
						|
   */
 | 
						|
  public String getRules()
 | 
						|
  {
 | 
						|
    return rules;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * This method returns a hash value for this object.
 | 
						|
   *
 | 
						|
   * @return A hash value for this object.
 | 
						|
   */
 | 
						|
  public int hashCode()
 | 
						|
  {
 | 
						|
    return System.identityHashCode(this);
 | 
						|
  }
 | 
						|
}
 |