mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			490 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Java
		
	
	
	
			
		
		
	
	
			490 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Java
		
	
	
	
| /* CollationElementIterator.java -- Walks through collation elements
 | |
|    Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004, 2012  Free Software Foundation
 | |
| 
 | |
| This file is part of GNU Classpath.
 | |
| 
 | |
| GNU Classpath is free software; you can redistribute it and/or modify
 | |
| it under the terms of the GNU General Public License as published by
 | |
| the Free Software Foundation; either version 2, or (at your option)
 | |
| any later version.
 | |
| 
 | |
| GNU Classpath is distributed in the hope that it will be useful, but
 | |
| WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
| General Public License for more details.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License
 | |
| along with GNU Classpath; see the file COPYING.  If not, write to the
 | |
| Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 | |
| 02110-1301 USA.
 | |
| 
 | |
| Linking this library statically or dynamically with other modules is
 | |
| making a combined work based on this library.  Thus, the terms and
 | |
| conditions of the GNU General Public License cover the whole
 | |
| combination.
 | |
| 
 | |
| As a special exception, the copyright holders of this library give you
 | |
| permission to link this library with independent modules to produce an
 | |
| executable, regardless of the license terms of these independent
 | |
| modules, and to copy and distribute the resulting executable under
 | |
| terms of your choice, provided that you also meet, for each linked
 | |
| independent module, the terms and conditions of the license of that
 | |
| module.  An independent module is a module which is not derived from
 | |
| or based on this library.  If you modify this library, you may extend
 | |
| this exception to your version of the library, but you are not
 | |
| obligated to do so.  If you do not wish to do so, delete this
 | |
| exception statement from your version. */
 | |
| 
 | |
| 
 | |
| package java.text;
 | |
| 
 | |
| import gnu.java.lang.CPStringBuilder;
 | |
| 
 | |
| import java.util.ArrayList;
 | |
| 
 | |
| /* Written using "Java Class Libraries", 2nd edition, plus online
 | |
|  * API docs for JDK 1.2 from http://www.javasoft.com.
 | |
|  * Status: Believed complete and correct to JDK 1.1.
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * This class walks through the character collation elements of a
 | |
|  * <code>String</code> as defined by the collation rules in an instance of
 | |
|  * <code>RuleBasedCollator</code>.  There is no public constructor for
 | |
|  * this class.  An instance is created by calling the
 | |
|  * <code>getCollationElementIterator</code> method on
 | |
|  * <code>RuleBasedCollator</code>.
 | |
|  *
 | |
|  * @author Aaron M. Renn (arenn@urbanophile.com)
 | |
|  * @author Tom Tromey (tromey@cygnus.com)
 | |
|  * @author Guilhem Lavaux (guilhem.lavaux@free.fr)
 | |
|  */
 | |
| public final class CollationElementIterator
 | |
| {
 | |
|   /**
 | |
|    * This is a constant value that is returned to indicate that the end of
 | |
|    * the string was encountered.
 | |
|    */
 | |
|   public static final int NULLORDER = -1;
 | |
| 
 | |
|   /**
 | |
|    * This is the RuleBasedCollator this object was created from.
 | |
|    */
 | |
|   RuleBasedCollator collator;
 | |
| 
 | |
|   /**
 | |
|    * This is the String that is being iterated over.
 | |
|    */
 | |
|   CharacterIterator text;
 | |
| 
 | |
|   /**
 | |
|    * This is the index into the collation decomposition where we are currently scanning.
 | |
|    */
 | |
|   int index;
 | |
| 
 | |
|   /**
 | |
|    * This is the index into the String where we are currently scanning.
 | |
|    */
 | |
|   int textIndex;
 | |
| 
 | |
|   /**
 | |
|    * Array containing the collation decomposition of the
 | |
|    * text given to the constructor.
 | |
|    */
 | |
|   private RuleBasedCollator.CollationElement[] textDecomposition;
 | |
| 
 | |
|   /**
 | |
|    * Array containing the index of the specified block.
 | |
|    */
 | |
|   private int[] textIndexes;
 | |
| 
 | |
|   /**
 | |
|    * This method initializes a new instance of <code>CollationElementIterator</code>
 | |
|    * to iterate over the specified <code>String</code> using the rules in the
 | |
|    * specified <code>RuleBasedCollator</code>.
 | |
|    *
 | |
|    * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
 | |
|    * @param text The <code>String</code> to iterate over.
 | |
|    */
 | |
|   CollationElementIterator(RuleBasedCollator collator, String text)
 | |
|   {
 | |
|     this.collator = collator;
 | |
| 
 | |
|     setText (text);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * This method initializes a new instance of <code>CollationElementIterator</code>
 | |
|    * to iterate over the specified <code>String</code> using the rules in the
 | |
|    * specified <code>RuleBasedCollator</code>.
 | |
|    *
 | |
|    * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
 | |
|    * @param text The character iterator to iterate over.
 | |
|    */
 | |
|   CollationElementIterator(RuleBasedCollator collator, CharacterIterator text)
 | |
|   {
 | |
|     this.collator = collator;
 | |
| 
 | |
|     setText (text);
 | |
|   }
 | |
| 
 | |
|   RuleBasedCollator.CollationElement nextBlock()
 | |
|   {
 | |
|     if (index >= textDecomposition.length)
 | |
|       return null;
 | |
| 
 | |
|     RuleBasedCollator.CollationElement e = textDecomposition[index];
 | |
| 
 | |
|     textIndex = textIndexes[index+1];
 | |
| 
 | |
|     index++;
 | |
| 
 | |
|     return e;
 | |
|   }
 | |
| 
 | |
|   RuleBasedCollator.CollationElement previousBlock()
 | |
|   {
 | |
|     if (index == 0)
 | |
|       return null;
 | |
| 
 | |
|     index--;
 | |
|     RuleBasedCollator.CollationElement e = textDecomposition[index];
 | |
| 
 | |
|     textIndex = textIndexes[index+1];
 | |
| 
 | |
|     return e;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * This method returns the collation ordering value of the next character sequence
 | |
|    * in the string (it may be an extended character following collation rules).
 | |
|    * This method will return <code>NULLORDER</code> if the
 | |
|    * end of the string was reached.
 | |
|    *
 | |
|    * @return The collation ordering value.
 | |
|    */
 | |
|   public int next()
 | |
|   {
 | |
|     RuleBasedCollator.CollationElement e = nextBlock();
 | |
| 
 | |
|     if (e == null)
 | |
|       return NULLORDER;
 | |
| 
 | |
|     return e.getValue();
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * This method returns the collation ordering value of the previous character
 | |
|    * in the string.  This method will return <code>NULLORDER</code> if the
 | |
|    * beginning of the string was reached.
 | |
|    *
 | |
|    * @return The collation ordering value.
 | |
|    */
 | |
|   public int previous()
 | |
|   {
 | |
|     RuleBasedCollator.CollationElement e = previousBlock();
 | |
| 
 | |
|     if (e == null)
 | |
|       return NULLORDER;
 | |
| 
 | |
|     return e.getValue();
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * This method returns the primary order value for the given collation
 | |
|    * value.
 | |
|    *
 | |
|    * @param order The collation value returned from <code>next()</code> or
 | |
|    *              <code>previous()</code>.
 | |
|    *
 | |
|    * @return The primary order value of the specified collation value.  This is
 | |
|    *         the high 16 bits.
 | |
|    */
 | |
|   public static int primaryOrder(int order)
 | |
|   {
 | |
|     // From the JDK 1.2 spec.
 | |
|     return order >>> 16;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * This method resets the internal position pointer to read from the
 | |
|    * beginning of the <code>String</code> again.
 | |
|    */
 | |
|   public void reset()
 | |
|   {
 | |
|     index = 0;
 | |
|     textIndex = 0;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * This method returns the secondary order value for the given collation
 | |
|    * value.
 | |
|    *
 | |
|    * @param order The collation value returned from <code>next()</code> or
 | |
|    *              <code>previous()</code>.
 | |
|    *
 | |
|    * @return The secondary order value of the specified collation value.  This
 | |
|    *         is the bits 8-15.
 | |
|    */
 | |
|   public static short secondaryOrder(int order)
 | |
|   {
 | |
|     // From the JDK 1.2 spec.
 | |
|     return (short) ((order >>> 8) & 255);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * This method returns the tertiary order value for the given collation
 | |
|    * value.
 | |
|    *
 | |
|    * @param order The collation value returned from <code>next()</code> or
 | |
|    *              <code>previous()</code>.
 | |
|    *
 | |
|    * @return The tertiary order value of the specified collation value.  This
 | |
|    *         is the low eight bits.
 | |
|    */
 | |
|   public static short tertiaryOrder(int order)
 | |
|   {
 | |
|     // From the JDK 1.2 spec.
 | |
|     return (short) (order & 255);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * This method sets the <code>String</code> that it is iterating over
 | |
|    * to the specified <code>String</code>.
 | |
|    *
 | |
|    * @param text The new <code>String</code> to iterate over.
 | |
|    *
 | |
|    * @since 1.2
 | |
|    */
 | |
|   public void setText(String text)
 | |
|   {
 | |
|     int idx = 0;
 | |
|     int idx_idx = 0;
 | |
|     int alreadyExpanded = 0;
 | |
|     int idxToMove = 0;
 | |
| 
 | |
|     this.text = new StringCharacterIterator(text);
 | |
|     this.index = 0;
 | |
| 
 | |
|     String work_text = text.intern();
 | |
| 
 | |
|     ArrayList<RuleBasedCollator.CollationElement> aElement = new ArrayList<RuleBasedCollator.CollationElement>();
 | |
|     ArrayList<Integer> aIdx = new ArrayList<Integer>();
 | |
| 
 | |
|     // Build element collection ordered as they come in "text".
 | |
|     while (idx < work_text.length())
 | |
|       {
 | |
|         String key, keyOld;
 | |
| 
 | |
|         Object object = null;
 | |
|         int p = 1;
 | |
| 
 | |
|         // IMPROVE: use a TreeMap with a prefix-ordering rule.
 | |
|         keyOld = key = null;
 | |
|         do
 | |
|           {
 | |
|             if (object != null)
 | |
|               keyOld = key;
 | |
|             key = work_text.substring (idx, idx+p);
 | |
|             object = collator.prefix_tree.get (key);
 | |
|             if (object != null && idx < alreadyExpanded)
 | |
|               {
 | |
|                 RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
 | |
|                 if (prefix.expansion != null &&
 | |
|                     prefix.expansion.startsWith(work_text.substring(0, idx)))
 | |
|                 {
 | |
|                   object = null;
 | |
|                   key = keyOld;
 | |
|                 }
 | |
|               }
 | |
|             p++;
 | |
|           }
 | |
|         while (idx+p <= work_text.length());
 | |
| 
 | |
|         if (object == null)
 | |
|           key = keyOld;
 | |
| 
 | |
|         RuleBasedCollator.CollationElement prefix =
 | |
|           (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
 | |
| 
 | |
|         /*
 | |
|          * First case: There is no such sequence in the database.
 | |
|          * We will have to build one from the context.
 | |
|          */
 | |
|         if (prefix == null)
 | |
|           {
 | |
|             /*
 | |
|              * We are dealing with sequences in an expansion. They
 | |
|              * are treated as accented characters (tertiary order).
 | |
|              */
 | |
|             if (alreadyExpanded > 0)
 | |
|               {
 | |
|                 RuleBasedCollator.CollationElement e =
 | |
|                   collator.getDefaultAccentedElement (work_text.charAt (idx));
 | |
| 
 | |
|                 aElement.add (e);
 | |
|                 aIdx.add (Integer.valueOf(idx_idx));
 | |
|                 idx++;
 | |
|                 alreadyExpanded--;
 | |
|                 if (alreadyExpanded == 0)
 | |
|                   {
 | |
|                     /* There is not any characters left in the expansion set.
 | |
|                      * We can increase the pointer in the source string.
 | |
|                      */
 | |
|                     idx_idx += idxToMove;
 | |
|                     idxToMove = 0;
 | |
|                   }
 | |
|                 else
 | |
|                   idx_idx++;
 | |
|               }
 | |
|             else
 | |
|               {
 | |
|                 /* This is a normal character. */
 | |
|                 RuleBasedCollator.CollationElement e =
 | |
|                   collator.getDefaultElement (work_text.charAt (idx));
 | |
|                 Integer iRef = Integer.valueOf(idx_idx);
 | |
| 
 | |
|                 /* Don't forget to mark it as a special sequence so the
 | |
|                  * string can be ordered.
 | |
|                  */
 | |
|                 aElement.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
 | |
|                 aIdx.add (iRef);
 | |
|                 aElement.add (e);
 | |
|                 aIdx.add (iRef);
 | |
|                 idx_idx++;
 | |
|                 idx++;
 | |
|               }
 | |
|             continue;
 | |
|           }
 | |
| 
 | |
|         /*
 | |
|          * Second case: Here we have found a matching sequence.
 | |
|          * Here we have an expansion string prepend it to the "work text" and
 | |
|          * add the corresponding sorting element. We must also mark
 | |
|          */
 | |
|         if (prefix.expansion != null)
 | |
|           {
 | |
|             work_text = prefix.expansion
 | |
|               + work_text.substring (idx+prefix.key.length());
 | |
|             idx = 0;
 | |
|             aElement.add (prefix);
 | |
|             aIdx.add (Integer.valueOf(idx_idx));
 | |
|             if (alreadyExpanded == 0)
 | |
|               idxToMove = prefix.key.length();
 | |
|             alreadyExpanded += prefix.expansion.length()-prefix.key.length();
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             /* Third case: the simplest. We have got the prefix and it
 | |
|              * has not to be expanded.
 | |
|              */
 | |
|             aElement.add (prefix);
 | |
|             aIdx.add (Integer.valueOf(idx_idx));
 | |
|             idx += prefix.key.length();
 | |
|             /* If the sequence is in an expansion, we must decrease the
 | |
|              * counter.
 | |
|              */
 | |
|             if (alreadyExpanded > 0)
 | |
|               {
 | |
|                 alreadyExpanded -= prefix.key.length();
 | |
|                 if (alreadyExpanded == 0)
 | |
|                   {
 | |
|                     idx_idx += idxToMove;
 | |
|                     idxToMove = 0;
 | |
|                   }
 | |
|               }
 | |
|             else
 | |
|               idx_idx += prefix.key.length();
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     textDecomposition = aElement.toArray(new RuleBasedCollator.CollationElement[aElement.size()]);
 | |
|     textIndexes = new int[aIdx.size()+1];
 | |
|     for (int i = 0; i < aIdx.size(); i++)
 | |
|       {
 | |
|         textIndexes[i] = aIdx.get(i).intValue();
 | |
|       }
 | |
|     textIndexes[aIdx.size()] = text.length();
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * This method sets the <code>String</code> that it is iterating over
 | |
|    * to the <code>String</code> represented by the specified
 | |
|    * <code>CharacterIterator</code>.
 | |
|    *
 | |
|    * @param source The <code>CharacterIterator</code> containing the new
 | |
|    * <code>String</code> to iterate over.
 | |
|    */
 | |
|   public void setText(CharacterIterator source)
 | |
|   {
 | |
|     CPStringBuilder expand = new CPStringBuilder();
 | |
| 
 | |
|     // For now assume we read from the beginning of the string.
 | |
|     for (char c = source.first();
 | |
|          c != CharacterIterator.DONE;
 | |
|          c = source.next())
 | |
|       expand.append(c);
 | |
| 
 | |
|     setText(expand.toString());
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * This method returns the current offset into the <code>String</code>
 | |
|    * that is being iterated over.
 | |
|    *
 | |
|    * @return The iteration index position.
 | |
|    *
 | |
|    * @since 1.2
 | |
|    */
 | |
|   public int getOffset()
 | |
|   {
 | |
|     return textIndex;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * This method sets the iteration index position into the current
 | |
|    * <code>String</code> to the specified value.  This value must not
 | |
|    * be negative and must not be greater than the last index position
 | |
|    * in the <code>String</code>.
 | |
|    *
 | |
|    * @param offset The new iteration index position.
 | |
|    *
 | |
|    * @exception IllegalArgumentException If the new offset is not valid.
 | |
|    */
 | |
|   public void setOffset(int offset)
 | |
|   {
 | |
|     if (offset < 0)
 | |
|       throw new IllegalArgumentException("Negative offset: " + offset);
 | |
| 
 | |
|     if (offset > (text.getEndIndex() - 1))
 | |
|       throw new IllegalArgumentException("Offset too large: " + offset);
 | |
| 
 | |
|     for (index = 0; index < textDecomposition.length; index++)
 | |
|       {
 | |
|         if (offset <= textIndexes[index])
 | |
|           break;
 | |
|       }
 | |
|     /*
 | |
|      * As textIndexes[0] == 0, we should not have to take care whether index is
 | |
|      * greater than 0. It is always.
 | |
|      */
 | |
|     if (textIndexes[index] == offset)
 | |
|       textIndex = offset;
 | |
|     else
 | |
|       textIndex = textIndexes[index-1];
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * This method returns the maximum length of any expansion sequence that
 | |
|    * ends with the specified collation order value.  (Whatever that means).
 | |
|    *
 | |
|    * @param value The collation order value
 | |
|    *
 | |
|    * @return The maximum length of an expansion sequence.
 | |
|    */
 | |
|   public int getMaxExpansion(int value)
 | |
|   {
 | |
|     return 1;
 | |
|   }
 | |
| }
 |