mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			5832 lines
		
	
	
		
			160 KiB
		
	
	
	
		
			Java
		
	
	
	
			
		
		
	
	
			5832 lines
		
	
	
		
			160 KiB
		
	
	
	
		
			Java
		
	
	
	
| /* XmlParser.java --
 | |
|    Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
 | |
| 
 | |
| This file is part of GNU Classpath.
 | |
| 
 | |
| GNU Classpath is free software; you can redistribute it and/or modify
 | |
| it under the terms of the GNU General Public License as published by
 | |
| the Free Software Foundation; either version 2, or (at your option)
 | |
| any later version.
 | |
| 
 | |
| GNU Classpath is distributed in the hope that it will be useful, but
 | |
| WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
| General Public License for more details.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License
 | |
| along with GNU Classpath; see the file COPYING.  If not, write to the
 | |
| Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 | |
| 02110-1301 USA.
 | |
| 
 | |
| Linking this library statically or dynamically with other modules is
 | |
| making a combined work based on this library.  Thus, the terms and
 | |
| conditions of the GNU General Public License cover the whole
 | |
| combination.
 | |
| 
 | |
| As a special exception, the copyright holders of this library give you
 | |
| permission to link this library with independent modules to produce an
 | |
| executable, regardless of the license terms of these independent
 | |
| modules, and to copy and distribute the resulting executable under
 | |
| terms of your choice, provided that you also meet, for each linked
 | |
| independent module, the terms and conditions of the license of that
 | |
| module.  An independent module is a module which is not derived from
 | |
| or based on this library.  If you modify this library, you may extend
 | |
| this exception to your version of the library, but you are not
 | |
| obligated to do so.  If you do not wish to do so, delete this
 | |
| exception statement from your version.
 | |
| 
 | |
| Partly derived from code which carried the following notice:
 | |
| 
 | |
|   Copyright (c) 1997, 1998 by Microstar Software Ltd.
 | |
| 
 | |
|   AElfred is free for both commercial and non-commercial use and
 | |
|   redistribution, provided that Microstar's copyright and disclaimer are
 | |
|   retained intact.  You are free to modify AElfred for your own use and
 | |
|   to redistribute AElfred with your modifications, provided that the
 | |
|   modifications are clearly documented.
 | |
| 
 | |
|   This program is distributed in the hope that it will be useful, but
 | |
|   WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|   merchantability or fitness for a particular purpose.  Please use it AT
 | |
|   YOUR OWN RISK.
 | |
| */
 | |
| 
 | |
| package gnu.xml.aelfred2;
 | |
| 
 | |
| import gnu.java.security.action.GetPropertyAction;
 | |
| 
 | |
| import java.io.BufferedInputStream;
 | |
| import java.io.CharConversionException;
 | |
| import java.io.EOFException;
 | |
| import java.io.InputStream;
 | |
| import java.io.InputStreamReader;
 | |
| import java.io.IOException;
 | |
| import java.io.Reader;
 | |
| import java.io.UnsupportedEncodingException;
 | |
| import java.net.URL;
 | |
| import java.net.URLConnection;
 | |
| import java.security.AccessController;
 | |
| 
 | |
| import java.util.Iterator;
 | |
| import java.util.HashMap;
 | |
| import java.util.LinkedList;
 | |
| 
 | |
| import org.xml.sax.InputSource;
 | |
| import org.xml.sax.SAXException;
 | |
| 
 | |
| 
 | |
| /**
 | |
|  * Parse XML documents and return parse events through call-backs.
 | |
|  * Use the <code>SAXDriver</code> class as your entry point, as all
 | |
|  * internal parser interfaces are subject to change.
 | |
|  *
 | |
|  * @author Written by David Megginson <dmeggins@microstar.com>
 | |
|  *      (version 1.2a with bugfixes)
 | |
|  * @author Updated by David Brownell <dbrownell@users.sourceforge.net>
 | |
|  * @see SAXDriver
 | |
|  */
 | |
| final class XmlParser
 | |
| {
 | |
| 
 | |
|   // avoid slow per-character readCh()
 | |
|   private final static boolean USE_CHEATS = true;
 | |
| 
 | |
|   ////////////////////////////////////////////////////////////////////////
 | |
|   // Constants.
 | |
|   ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
|   //
 | |
|   // Constants for element content type.
 | |
|   //
 | |
| 
 | |
|   /**
 | |
|    * Constant: an element has not been declared.
 | |
|    * @see #getElementContentType
 | |
|    */
 | |
|   public final static int CONTENT_UNDECLARED = 0;
 | |
| 
 | |
|   /**
 | |
|    * Constant: the element has a content model of ANY.
 | |
|    * @see #getElementContentType
 | |
|    */
 | |
|   public final static int CONTENT_ANY = 1;
 | |
| 
 | |
|   /**
 | |
|    * Constant: the element has declared content of EMPTY.
 | |
|    * @see #getElementContentType
 | |
|    */
 | |
|   public final static int CONTENT_EMPTY = 2;
 | |
| 
 | |
|   /**
 | |
|    * Constant: the element has mixed content.
 | |
|    * @see #getElementContentType
 | |
|    */
 | |
|   public final static int CONTENT_MIXED = 3;
 | |
| 
 | |
|   /**
 | |
|    * Constant: the element has element content.
 | |
|    * @see #getElementContentType
 | |
|    */
 | |
|   public final static int CONTENT_ELEMENTS = 4;
 | |
| 
 | |
| 
 | |
|   //
 | |
|   // Constants for the entity type.
 | |
|   //
 | |
| 
 | |
|   /**
 | |
|    * Constant: the entity has not been declared.
 | |
|    * @see #getEntityType
 | |
|    */
 | |
|   public final static int ENTITY_UNDECLARED = 0;
 | |
| 
 | |
|   /**
 | |
|    * Constant: the entity is internal.
 | |
|    * @see #getEntityType
 | |
|    */
 | |
|   public final static int ENTITY_INTERNAL = 1;
 | |
| 
 | |
|   /**
 | |
|    * Constant: the entity is external, non-parsable data.
 | |
|    * @see #getEntityType
 | |
|    */
 | |
|   public final static int ENTITY_NDATA = 2;
 | |
| 
 | |
|   /**
 | |
|    * Constant: the entity is external XML data.
 | |
|    * @see #getEntityType
 | |
|    */
 | |
|   public final static int ENTITY_TEXT = 3;
 | |
| 
 | |
|   //
 | |
|   // Attribute type constants are interned literal strings.
 | |
|   //
 | |
| 
 | |
|   //
 | |
|   // Constants for supported encodings.  "external" is just a flag.
 | |
|   //
 | |
|   private final static int ENCODING_EXTERNAL = 0;
 | |
|   private final static int ENCODING_UTF_8 = 1;
 | |
|   private final static int ENCODING_ISO_8859_1 = 2;
 | |
|   private final static int ENCODING_UCS_2_12 = 3;
 | |
|   private final static int ENCODING_UCS_2_21 = 4;
 | |
|   private final static int ENCODING_UCS_4_1234 = 5;
 | |
|   private final static int ENCODING_UCS_4_4321 = 6;
 | |
|   private final static int ENCODING_UCS_4_2143 = 7;
 | |
|   private final static int ENCODING_UCS_4_3412 = 8;
 | |
|   private final static int ENCODING_ASCII = 9;
 | |
| 
 | |
|   //
 | |
|   // Constants for attribute default value.
 | |
|   //
 | |
| 
 | |
|   /**
 | |
|    * Constant: the attribute is not declared.
 | |
|    * @see #getAttributeDefaultValueType
 | |
|    */
 | |
|   public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
 | |
| 
 | |
|   /**
 | |
|    * Constant: the attribute has a literal default value specified.
 | |
|    * @see #getAttributeDefaultValueType
 | |
|    * @see #getAttributeDefaultValue
 | |
|    */
 | |
|   public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
 | |
| 
 | |
|   /**
 | |
|    * Constant: the attribute was declared #IMPLIED.
 | |
|    * @see #getAttributeDefaultValueType
 | |
|    */
 | |
|   public final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
 | |
| 
 | |
|   /**
 | |
|    * Constant: the attribute was declared #REQUIRED.
 | |
|    * @see #getAttributeDefaultValueType
 | |
|    */
 | |
|   public final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
 | |
| 
 | |
|   /**
 | |
|    * Constant: the attribute was declared #FIXED.
 | |
|    * @see #getAttributeDefaultValueType
 | |
|    * @see #getAttributeDefaultValue
 | |
|    */
 | |
|   public final static int ATTRIBUTE_DEFAULT_FIXED = 34;
 | |
| 
 | |
|   //
 | |
|   // Constants for input.
 | |
|   //
 | |
|   private final static int INPUT_NONE = 0;
 | |
|   private final static int INPUT_INTERNAL = 1;
 | |
|   private final static int INPUT_STREAM = 3;
 | |
|   private final static int INPUT_READER = 5;
 | |
| 
 | |
|   //
 | |
|   // Flags for reading literals.
 | |
|   //
 | |
|   // expand general entity refs (attribute values in dtd and content)
 | |
|   private final static int LIT_ENTITY_REF = 2;
 | |
|   // normalize this value (space chars) (attributes, public ids)
 | |
|   private final static int LIT_NORMALIZE = 4;
 | |
|   // literal is an attribute value
 | |
|   private final static int LIT_ATTRIBUTE = 8;
 | |
|   // don't expand parameter entities
 | |
|   private final static int LIT_DISABLE_PE = 16;
 | |
|   // don't expand [or parse] character refs
 | |
|   private final static int LIT_DISABLE_CREF = 32;
 | |
|   // don't parse general entity refs
 | |
|   private final static int LIT_DISABLE_EREF = 64;
 | |
|   // literal is a public ID value
 | |
|   private final static int LIT_PUBID = 256;
 | |
| 
 | |
|   //
 | |
|   // Flags affecting PE handling in DTDs (if expandPE is true).
 | |
|   // PEs expand with space padding, except inside literals.
 | |
|   //
 | |
|   private final static int CONTEXT_NORMAL = 0;
 | |
|   private final static int CONTEXT_LITERAL = 1;
 | |
| 
 | |
|   // Emit warnings for relative URIs with no base URI.
 | |
|   static boolean uriWarnings;
 | |
|   static
 | |
|   {
 | |
|     String key = "gnu.xml.aelfred2.XmlParser.uriWarnings";
 | |
|     GetPropertyAction a = new GetPropertyAction(key);
 | |
|     uriWarnings = "true".equals(AccessController.doPrivileged(a));
 | |
|   }
 | |
| 
 | |
|   //
 | |
|   // The current XML handler interface.
 | |
|   //
 | |
|   private SAXDriver handler;
 | |
| 
 | |
|   //
 | |
|   // I/O information.
 | |
|   //
 | |
|   private Reader reader;   // current reader
 | |
|   private InputStream is;     // current input stream
 | |
|   private int line;     // current line number
 | |
|   private int column;   // current column number
 | |
|   private int sourceType;   // type of input source
 | |
|   private LinkedList inputStack;   // stack of input soruces
 | |
|   private URLConnection externalEntity; // current external entity
 | |
|   private int encoding;   // current character encoding
 | |
|   private int currentByteCount; // bytes read from current source
 | |
|   private InputSource scratch;  // temporary
 | |
| 
 | |
|   //
 | |
|   // Buffers for decoded but unparsed character input.
 | |
|   //
 | |
|   private char[] readBuffer;
 | |
|   private int readBufferPos;
 | |
|   private int readBufferLength;
 | |
|   private int readBufferOverflow;  // overflow from last data chunk.
 | |
| 
 | |
|   //
 | |
|   // Buffer for undecoded raw byte input.
 | |
|   //
 | |
|   private final static int READ_BUFFER_MAX = 16384;
 | |
|   private byte[] rawReadBuffer;
 | |
| 
 | |
| 
 | |
|   //
 | |
|   // Buffer for attribute values, char refs, DTD stuff.
 | |
|   //
 | |
|   private static int DATA_BUFFER_INITIAL = 4096;
 | |
|   private char[] dataBuffer;
 | |
|   private int dataBufferPos;
 | |
| 
 | |
|   //
 | |
|   // Buffer for parsed names.
 | |
|   //
 | |
|   private static int NAME_BUFFER_INITIAL = 1024;
 | |
|   private char[] nameBuffer;
 | |
|   private int nameBufferPos;
 | |
| 
 | |
|   //
 | |
|   // Save any standalone flag
 | |
|   //
 | |
|   private boolean docIsStandalone;
 | |
| 
 | |
|   //
 | |
|   // Hashtables for DTD information on elements, entities, and notations.
 | |
|   // Populated until we start ignoring decls (because of skipping a PE)
 | |
|   //
 | |
|   private HashMap elementInfo;
 | |
|   private HashMap entityInfo;
 | |
|   private HashMap notationInfo;
 | |
|   private boolean skippedPE;
 | |
| 
 | |
|   //
 | |
|   // Element type currently in force.
 | |
|   //
 | |
|   private String currentElement;
 | |
|   private int currentElementContent;
 | |
| 
 | |
|   //
 | |
|   // Stack of entity names, to detect recursion.
 | |
|   //
 | |
|   private LinkedList entityStack;
 | |
| 
 | |
|   //
 | |
|   // PE expansion is enabled in most chunks of the DTD, not all.
 | |
|   // When it's enabled, literals are treated differently.
 | |
|   //
 | |
|   private boolean inLiteral;
 | |
|   private boolean expandPE;
 | |
|   private boolean peIsError;
 | |
| 
 | |
|   //
 | |
|   // can't report entity expansion inside two constructs:
 | |
|   // - attribute expansions (internal entities only)
 | |
|   // - markup declarations (parameter entities only)
 | |
|   //
 | |
|   private boolean doReport;
 | |
| 
 | |
|   //
 | |
|   // Symbol table, for caching interned names.
 | |
|   //
 | |
|   // These show up wherever XML names or nmtokens are used:  naming elements,
 | |
|   // attributes, PIs, notations, entities, and enumerated attribute values.
 | |
|   //
 | |
|   // NOTE:  This hashtable doesn't grow.  The default size is intended to be
 | |
|   // rather large for most documents.  Example:  one snapshot of the DocBook
 | |
|   // XML 4.1 DTD used only about 350 such names.  As a rule, only pathological
 | |
|   // documents (ones that don't reuse names) should ever see much collision.
 | |
|   //
 | |
|   // Be sure that SYMBOL_TABLE_LENGTH always stays prime, for best hashing.
 | |
|   // "2039" keeps the hash table size at about two memory pages on typical
 | |
|   // 32 bit hardware.
 | |
|   //
 | |
|   private final static int SYMBOL_TABLE_LENGTH = 2039;
 | |
| 
 | |
|   private Object[][] symbolTable;
 | |
| 
 | |
|   //
 | |
|   // Hash table of attributes found in current start tag.
 | |
|   //
 | |
|   private String[] tagAttributes;
 | |
|   private int tagAttributePos;
 | |
| 
 | |
|   //
 | |
|   // Utility flag: have we noticed a CR while reading the last
 | |
|   // data chunk?  If so, we will have to go back and normalise
 | |
|   // CR or CR/LF line ends.
 | |
|   //
 | |
|   private boolean sawCR;
 | |
| 
 | |
|   //
 | |
|   // Utility flag: are we in CDATA?  If so, whitespace isn't ignorable.
 | |
|   //
 | |
|   private boolean inCDATA;
 | |
| 
 | |
|   //
 | |
|   // Xml version.
 | |
|   //
 | |
|   private static final int XML_10 = 0;
 | |
|   private static final int XML_11 = 1;
 | |
|   private int xmlVersion = XML_10;
 | |
| 
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
|   // Constructors.
 | |
|   ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
|   /**
 | |
|    * Construct a new parser with no associated handler.
 | |
|    * @see #setHandler
 | |
|    * @see #parse
 | |
|    */
 | |
|   // package private
 | |
|   XmlParser()
 | |
|   {
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Set the handler that will receive parsing events.
 | |
|    * @param handler The handler to receive callback events.
 | |
|    * @see #parse
 | |
|    */
 | |
|   // package private
 | |
|   void setHandler(SAXDriver handler)
 | |
|   {
 | |
|     this.handler = handler;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse an XML document from the character stream, byte stream, or URI
 | |
|    * that you provide (in that order of preference).  Any URI that you
 | |
|    * supply will become the base URI for resolving relative URI, and may
 | |
|    * be used to acquire a reader or byte stream.
 | |
|    *
 | |
|    * <p> Only one thread at a time may use this parser; since it is
 | |
|    * private to this package, post-parse cleanup is done by the caller,
 | |
|    * which MUST NOT REUSE the parser (just null it).
 | |
|    *
 | |
|    * @param systemId Absolute URI of the document; should never be null,
 | |
|    *    but may be so iff a reader <em>or</em> a stream is provided.
 | |
|    * @param publicId The public identifier of the document, or null.
 | |
|    * @param reader A character stream; must be null if stream isn't.
 | |
|    * @param stream A byte input stream; must be null if reader isn't.
 | |
|    * @param encoding The suggested encoding, or null if unknown.
 | |
|    * @exception java.lang.Exception Basically SAXException or IOException
 | |
|    */
 | |
|   // package private
 | |
|   void doParse(String systemId, String publicId, Reader reader,
 | |
|                InputStream stream, String encoding)
 | |
|     throws Exception
 | |
|   {
 | |
|     if (handler == null)
 | |
|       {
 | |
|         throw new IllegalStateException("no callback handler");
 | |
|       }
 | |
| 
 | |
|     initializeVariables();
 | |
| 
 | |
|     // predeclare the built-in entities here (replacement texts)
 | |
|     // we don't need to intern(), since we're guaranteed literals
 | |
|     // are always (globally) interned.
 | |
|     setInternalEntity("amp", "&");
 | |
|     setInternalEntity("lt", "<");
 | |
|     setInternalEntity("gt", ">");
 | |
|     setInternalEntity("apos", "'");
 | |
|     setInternalEntity("quot", """);
 | |
| 
 | |
|     try
 | |
|       {
 | |
|         // pushURL first to ensure locator is correct in startDocument
 | |
|         // ... it might report an IO or encoding exception.
 | |
|         handler.startDocument();
 | |
|         pushURL(false, "[document]",
 | |
|                 // default baseURI: null
 | |
|                 new ExternalIdentifiers(publicId, systemId, null),
 | |
|                 reader, stream, encoding, false);
 | |
| 
 | |
|         parseDocument();
 | |
|       }
 | |
|     catch (EOFException e)
 | |
|       {
 | |
|         //empty input
 | |
|         error("empty document, with no root element.");
 | |
|       }
 | |
|     finally
 | |
|       {
 | |
|         if (reader != null)
 | |
|           {
 | |
|             try
 | |
|               {
 | |
|                 reader.close();
 | |
|               }
 | |
|             catch (IOException e)
 | |
|               {
 | |
|                 /* ignore */
 | |
|               }
 | |
|           }
 | |
|         if (stream != null)
 | |
|           {
 | |
|             try
 | |
|               {
 | |
|                 stream.close();
 | |
|               }
 | |
|             catch (IOException e)
 | |
|               {
 | |
|                 /* ignore */
 | |
|               }
 | |
|           }
 | |
|         if (is != null)
 | |
|           {
 | |
|             try
 | |
|               {
 | |
|                 is.close();
 | |
|               }
 | |
|             catch (IOException e)
 | |
|               {
 | |
|                 /* ignore */
 | |
|               }
 | |
|           }
 | |
|         scratch = null;
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
|   // Error reporting.
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
| 
 | |
|   /**
 | |
|    * Report an error.
 | |
|    * @param message The error message.
 | |
|    * @param textFound The text that caused the error (or null).
 | |
|    * @see SAXDriver#error
 | |
|    * @see #line
 | |
|    */
 | |
|   private void error(String message, String textFound, String textExpected)
 | |
|     throws SAXException
 | |
|   {
 | |
|     if (textFound != null)
 | |
|       {
 | |
|         message = message + " (found \"" + textFound + "\")";
 | |
|       }
 | |
|     if (textExpected != null)
 | |
|       {
 | |
|         message = message + " (expected \"" + textExpected + "\")";
 | |
|       }
 | |
|     handler.fatal(message);
 | |
| 
 | |
|     // "can't happen"
 | |
|     throw new SAXException(message);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Report a serious error.
 | |
|    * @param message The error message.
 | |
|    * @param textFound The text that caused the error (or null).
 | |
|    */
 | |
|   private void error(String message, char textFound, String textExpected)
 | |
|     throws SAXException
 | |
|   {
 | |
|     error(message, Character.toString(textFound), textExpected);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Report typical case fatal errors.
 | |
|    */
 | |
|   private void error(String message)
 | |
|     throws SAXException
 | |
|   {
 | |
|     handler.fatal(message);
 | |
|   }
 | |
| 
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
|   // Major syntactic productions.
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
| 
 | |
|   /**
 | |
|    * Parse an XML document.
 | |
|    * <pre>
 | |
|    * [1] document ::= prolog element Misc*
 | |
|    * </pre>
 | |
|    * <p>This is the top-level parsing function for a single XML
 | |
|    * document.  As a minimum, a well-formed document must have
 | |
|    * a document element, and a valid document must have a prolog
 | |
|    * (one with doctype) as well.
 | |
|    */
 | |
|   private void parseDocument()
 | |
|     throws Exception
 | |
|   {
 | |
|     try
 | |
|       {                                       // added by MHK
 | |
|         boolean sawDTD = parseProlog();
 | |
|         require('<');
 | |
|         parseElement(!sawDTD);
 | |
|       }
 | |
|     catch (EOFException ee)
 | |
|       {                 // added by MHK
 | |
|         error("premature end of file", "[EOF]", null);
 | |
|       }
 | |
| 
 | |
|     try
 | |
|       {
 | |
|         parseMisc();   //skip all white, PIs, and comments
 | |
|         char c = readCh();    //if this doesn't throw an exception...
 | |
|         error("unexpected characters after document end", c, null);
 | |
|       }
 | |
|     catch (EOFException e)
 | |
|       {
 | |
|         return;
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   static final char[] startDelimComment = { '<', '!', '-', '-' };
 | |
|   static final char[] endDelimComment = { '-', '-' };
 | |
| 
 | |
|   /**
 | |
|    * Skip a comment.
 | |
|    * <pre>
 | |
|    * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* "-->"
 | |
|    * </pre>
 | |
|    * <p> (The <code><!--</code> has already been read.)
 | |
|    */
 | |
|   private void parseComment()
 | |
|     throws Exception
 | |
|   {
 | |
|     char c;
 | |
|     boolean saved = expandPE;
 | |
| 
 | |
|     expandPE = false;
 | |
|     parseUntil(endDelimComment);
 | |
|     require('>');
 | |
|     expandPE = saved;
 | |
|     handler.comment(dataBuffer, 0, dataBufferPos);
 | |
|     dataBufferPos = 0;
 | |
|   }
 | |
| 
 | |
|   static final char[] startDelimPI = { '<', '?' };
 | |
|   static final char[] endDelimPI = { '?', '>' };
 | |
| 
 | |
|   /**
 | |
|    * Parse a processing instruction and do a call-back.
 | |
|    * <pre>
 | |
|    * [16] PI ::= '<?' PITarget
 | |
|    *    (S (Char* - (Char* '?>' Char*)))?
 | |
|    *    '?>'
 | |
|    * [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') )
 | |
|    * </pre>
 | |
|    * <p> (The <code><?</code> has already been read.)
 | |
|    */
 | |
|   private void parsePI()
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     String name;
 | |
|     boolean saved = expandPE;
 | |
| 
 | |
|     expandPE = false;
 | |
|     name = readNmtoken(true);
 | |
|     //NE08
 | |
|     if (name.indexOf(':') >= 0)
 | |
|       {
 | |
|         error("Illegal character(':') in processing instruction name ",
 | |
|               name, null);
 | |
|       }
 | |
|     if ("xml".equalsIgnoreCase(name))
 | |
|       {
 | |
|         error("Illegal processing instruction target", name, null);
 | |
|       }
 | |
|     if (!tryRead(endDelimPI))
 | |
|       {
 | |
|         requireWhitespace();
 | |
|         parseUntil(endDelimPI);
 | |
|       }
 | |
|     expandPE = saved;
 | |
|     handler.processingInstruction(name, dataBufferToString());
 | |
|   }
 | |
| 
 | |
|   static final char[] endDelimCDATA = { ']', ']', '>' };
 | |
| 
 | |
|   private boolean isDirtyCurrentElement;
 | |
| 
 | |
|   /**
 | |
|    * Parse a CDATA section.
 | |
|    * <pre>
 | |
|    * [18] CDSect ::= CDStart CData CDEnd
 | |
|    * [19] CDStart ::= '<![CDATA['
 | |
|    * [20] CData ::= (Char* - (Char* ']]>' Char*))
 | |
|    * [21] CDEnd ::= ']]>'
 | |
|    * </pre>
 | |
|    * <p> (The '<![CDATA[' has already been read.)
 | |
|    */
 | |
|   private void parseCDSect()
 | |
|     throws Exception
 | |
|   {
 | |
|     parseUntil(endDelimCDATA);
 | |
|     dataBufferFlush();
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse the prolog of an XML document.
 | |
|    * <pre>
 | |
|    * [22] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)?
 | |
|    * </pre>
 | |
|    * <p>We do not look for the XML declaration here, because it was
 | |
|    * handled by pushURL ().
 | |
|    * @see pushURL
 | |
|    * @return true if a DTD was read.
 | |
|    */
 | |
|   private boolean parseProlog()
 | |
|     throws Exception
 | |
|   {
 | |
|     parseMisc();
 | |
| 
 | |
|     if (tryRead("<!DOCTYPE"))
 | |
|       {
 | |
|         parseDoctypedecl();
 | |
|         parseMisc();
 | |
|         return true;
 | |
|       }
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   private void checkLegalVersion(String version)
 | |
|     throws SAXException
 | |
|   {
 | |
|     int len = version.length();
 | |
|     for (int i = 0; i < len; i++)
 | |
|       {
 | |
|         char c = version.charAt(i);
 | |
|         if ('0' <= c && c <= '9')
 | |
|           {
 | |
|             continue;
 | |
|           }
 | |
|         if (c == '_' || c == '.' || c == ':' || c == '-')
 | |
|           {
 | |
|             continue;
 | |
|           }
 | |
|         if ('a' <= c && c <= 'z')
 | |
|           {
 | |
|             continue;
 | |
|           }
 | |
|         if ('A' <= c && c <= 'Z')
 | |
|           {
 | |
|             continue;
 | |
|           }
 | |
|         error ("illegal character in version", version, "1.0");
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse the XML declaration.
 | |
|    * <pre>
 | |
|    * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
 | |
|    * [24] VersionInfo ::= S 'version' Eq
 | |
|    *    ("'" VersionNum "'" | '"' VersionNum '"' )
 | |
|    * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')*
 | |
|    * [32] SDDecl ::= S 'standalone' Eq
 | |
|    *    ( "'"" ('yes' | 'no') "'"" | '"' ("yes" | "no") '"' )
 | |
|    * [80] EncodingDecl ::= S 'encoding' Eq
 | |
|    *    ( "'" EncName "'" | "'" EncName "'" )
 | |
|    * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
 | |
|    * </pre>
 | |
|    * <p> (The <code><?xml</code> and whitespace have already been read.)
 | |
|    * @return the encoding in the declaration, uppercased; or null
 | |
|    * @see #parseTextDecl
 | |
|    * @see #setupDecoding
 | |
|    */
 | |
|   private String parseXMLDecl(boolean ignoreEncoding)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     String version;
 | |
|     String encodingName = null;
 | |
|     String standalone = null;
 | |
|     int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
 | |
|     String inputEncoding = null;
 | |
| 
 | |
|     switch (this.encoding)
 | |
|       {
 | |
|       case ENCODING_EXTERNAL:
 | |
|       case ENCODING_UTF_8:
 | |
|         inputEncoding = "UTF-8";
 | |
|         break;
 | |
|       case ENCODING_ISO_8859_1:
 | |
|         inputEncoding = "ISO-8859-1";
 | |
|         break;
 | |
|       case ENCODING_UCS_2_12:
 | |
|         inputEncoding = "UTF-16BE";
 | |
|         break;
 | |
|       case ENCODING_UCS_2_21:
 | |
|         inputEncoding = "UTF-16LE";
 | |
|         break;
 | |
|       }
 | |
| 
 | |
|     // Read the version.
 | |
|     require("version");
 | |
|     parseEq();
 | |
|     checkLegalVersion(version = readLiteral(flags));
 | |
|     if (!version.equals("1.0"))
 | |
|       {
 | |
|         if (version.equals("1.1"))
 | |
|           {
 | |
|             handler.warn("expected XML version 1.0, not: " + version);
 | |
|             xmlVersion = XML_11;
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             error("illegal XML version", version, "1.0 or 1.1");
 | |
|           }
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         xmlVersion = XML_10;
 | |
|       }
 | |
|     // Try reading an encoding declaration.
 | |
|     boolean white = tryWhitespace();
 | |
| 
 | |
|     if (tryRead("encoding"))
 | |
|       {
 | |
|         if (!white)
 | |
|           {
 | |
|             error("whitespace required before 'encoding='");
 | |
|           }
 | |
|         parseEq();
 | |
|         encodingName = readLiteral(flags);
 | |
|         if (!ignoreEncoding)
 | |
|           {
 | |
|             setupDecoding(encodingName);
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // Try reading a standalone declaration
 | |
|     if (encodingName != null)
 | |
|       {
 | |
|         white = tryWhitespace();
 | |
|       }
 | |
|     if (tryRead("standalone"))
 | |
|       {
 | |
|         if (!white)
 | |
|           {
 | |
|             error("whitespace required before 'standalone='");
 | |
|           }
 | |
|         parseEq();
 | |
|         standalone = readLiteral(flags);
 | |
|         if ("yes".equals(standalone))
 | |
|           {
 | |
|             docIsStandalone = true;
 | |
|           }
 | |
|         else if (!"no".equals(standalone))
 | |
|           {
 | |
|             error("standalone flag must be 'yes' or 'no'");
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     skipWhitespace();
 | |
|     require("?>");
 | |
| 
 | |
|     if (inputEncoding == null)
 | |
|       {
 | |
|         inputEncoding = encodingName;
 | |
|       }
 | |
|     return encodingName;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse a text declaration.
 | |
|    * <pre>
 | |
|    * [79] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
 | |
|    * [80] EncodingDecl ::= S 'encoding' Eq
 | |
|    *    ( '"' EncName '"' | "'" EncName "'" )
 | |
|    * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
 | |
|    * </pre>
 | |
|    * <p> (The <code><?xml</code>' and whitespace have already been read.)
 | |
|    * @return the encoding in the declaration, uppercased; or null
 | |
|    * @see #parseXMLDecl
 | |
|    * @see #setupDecoding
 | |
|    */
 | |
|   private String parseTextDecl(boolean ignoreEncoding)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     String encodingName = null;
 | |
|     int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
 | |
| 
 | |
|     // Read an optional version.
 | |
|     if (tryRead ("version"))
 | |
|       {
 | |
|         String version;
 | |
|         parseEq();
 | |
|         checkLegalVersion(version = readLiteral(flags));
 | |
| 
 | |
|         if (version.equals("1.1"))
 | |
|           {
 | |
|             if (xmlVersion == XML_10)
 | |
|               {
 | |
|                 error("external subset has later version number.", "1.0",
 | |
|                       version);
 | |
|               }
 | |
|             handler.warn("expected XML version 1.0, not: " + version);
 | |
|             xmlVersion = XML_11;
 | |
|           }
 | |
|         else if (!version.equals("1.0"))
 | |
|           {
 | |
|             error("illegal XML version", version, "1.0 or 1.1");
 | |
|           }
 | |
|         requireWhitespace();
 | |
|       }
 | |
| 
 | |
|     // Read the encoding.
 | |
|     require("encoding");
 | |
|     parseEq();
 | |
|     encodingName = readLiteral(flags);
 | |
|     if (!ignoreEncoding)
 | |
|       {
 | |
|         setupDecoding(encodingName);
 | |
|       }
 | |
|     skipWhitespace();
 | |
|     require("?>");
 | |
| 
 | |
|     return encodingName;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Sets up internal state so that we can decode an entity using the
 | |
|    * specified encoding.  This is used when we start to read an entity
 | |
|    * and we have been given knowledge of its encoding before we start to
 | |
|    * read any data (e.g. from a SAX input source or from a MIME type).
 | |
|    *
 | |
|    * <p> It is also used after autodetection, at which point only very
 | |
|    * limited adjustments to the encoding may be used (switching between
 | |
|    * related builtin decoders).
 | |
|    *
 | |
|    * @param encodingName The name of the encoding specified by the user.
 | |
|    * @exception IOException if the encoding isn't supported either
 | |
|    *  internally to this parser, or by the hosting JVM.
 | |
|    * @see #parseXMLDecl
 | |
|    * @see #parseTextDecl
 | |
|      */
 | |
|   private void setupDecoding(String encodingName)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     encodingName = encodingName.toUpperCase();
 | |
| 
 | |
|     // ENCODING_EXTERNAL indicates an encoding that wasn't
 | |
|     // autodetected ... we can use builtin decoders, or
 | |
|     // ones from the JVM (InputStreamReader).
 | |
| 
 | |
|     // Otherwise we can only tweak what was autodetected, and
 | |
|     // only for single byte (ASCII derived) builtin encodings.
 | |
| 
 | |
|     // ASCII-derived encodings
 | |
|     if (encoding == ENCODING_UTF_8 || encoding == ENCODING_EXTERNAL)
 | |
|       {
 | |
|         if (encodingName.equals("ISO-8859-1")
 | |
|             || encodingName.equals("8859_1")
 | |
|             || encodingName.equals("ISO8859_1"))
 | |
|           {
 | |
|             encoding = ENCODING_ISO_8859_1;
 | |
|             return;
 | |
|           }
 | |
|         else if (encodingName.equals("US-ASCII")
 | |
|                  || encodingName.equals("ASCII"))
 | |
|           {
 | |
|             encoding = ENCODING_ASCII;
 | |
|             return;
 | |
|           }
 | |
|         else if (encodingName.equals("UTF-8")
 | |
|                  || encodingName.equals("UTF8"))
 | |
|           {
 | |
|             encoding = ENCODING_UTF_8;
 | |
|             return;
 | |
|           }
 | |
|         else if (encoding != ENCODING_EXTERNAL)
 | |
|           {
 | |
|             // used to start with a new reader ...
 | |
|             throw new UnsupportedEncodingException(encodingName);
 | |
|           }
 | |
|         // else fallthrough ...
 | |
|         // it's ASCII-ish and something other than a builtin
 | |
|       }
 | |
| 
 | |
|     // Unicode and such
 | |
|     if (encoding == ENCODING_UCS_2_12 || encoding == ENCODING_UCS_2_21)
 | |
|       {
 | |
|         if (!(encodingName.equals("ISO-10646-UCS-2")
 | |
|               || encodingName.equals("UTF-16")
 | |
|               || encodingName.equals("UTF-16BE")
 | |
|               || encodingName.equals("UTF-16LE")))
 | |
|           {
 | |
|             error("unsupported Unicode encoding", encodingName, "UTF-16");
 | |
|           }
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|     // four byte encodings
 | |
|     if (encoding == ENCODING_UCS_4_1234
 | |
|         || encoding == ENCODING_UCS_4_4321
 | |
|         || encoding == ENCODING_UCS_4_2143
 | |
|         || encoding == ENCODING_UCS_4_3412)
 | |
|       {
 | |
|         // Strictly:  "UCS-4" == "UTF-32BE"; also, "UTF-32LE" exists
 | |
|         if (!encodingName.equals("ISO-10646-UCS-4"))
 | |
|           {
 | |
|             error("unsupported 32-bit encoding", encodingName,
 | |
|                   "ISO-10646-UCS-4");
 | |
|           }
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|     // assert encoding == ENCODING_EXTERNAL
 | |
|     // if (encoding != ENCODING_EXTERNAL)
 | |
|     //     throw new RuntimeException ("encoding = " + encoding);
 | |
| 
 | |
|     if (encodingName.equals("UTF-16BE"))
 | |
|       {
 | |
|         encoding = ENCODING_UCS_2_12;
 | |
|         return;
 | |
|       }
 | |
|     if (encodingName.equals("UTF-16LE"))
 | |
|       {
 | |
|         encoding = ENCODING_UCS_2_21;
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|     // We couldn't use the builtin decoders at all.  But we can try to
 | |
|     // create a reader, since we haven't messed up buffering.  Tweak
 | |
|     // the encoding name if necessary.
 | |
| 
 | |
|     if (encodingName.equals("UTF-16")
 | |
|         || encodingName.equals("ISO-10646-UCS-2"))
 | |
|       {
 | |
|         encodingName = "Unicode";
 | |
|       }
 | |
|     // Ignoring all the EBCDIC aliases here
 | |
| 
 | |
|     reader = new InputStreamReader(is, encodingName);
 | |
|     sourceType = INPUT_READER;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse miscellaneous markup outside the document element and DOCTYPE
 | |
|    * declaration.
 | |
|    * <pre>
 | |
|    * [27] Misc ::= Comment | PI | S
 | |
|    * </pre>
 | |
|    */
 | |
|   private void parseMisc()
 | |
|     throws Exception
 | |
|   {
 | |
|     while (true)
 | |
|       {
 | |
|         skipWhitespace();
 | |
|         if (tryRead(startDelimPI))
 | |
|           {
 | |
|             parsePI();
 | |
|           }
 | |
|         else if (tryRead(startDelimComment))
 | |
|           {
 | |
|             parseComment();
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             return;
 | |
|           }
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse a document type declaration.
 | |
|    * <pre>
 | |
|    * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
 | |
|    *    ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
 | |
|    * </pre>
 | |
|    * <p> (The <code><!DOCTYPE</code> has already been read.)
 | |
|    */
 | |
|   private void parseDoctypedecl()
 | |
|     throws Exception
 | |
|   {
 | |
|     String rootName;
 | |
|     ExternalIdentifiers ids;
 | |
| 
 | |
|     // Read the document type name.
 | |
|     requireWhitespace();
 | |
|     rootName = readNmtoken(true);
 | |
| 
 | |
|     // Read the External subset's IDs
 | |
|     skipWhitespace();
 | |
|     ids = readExternalIds(false, true);
 | |
| 
 | |
|     // report (a) declaration of name, (b) lexical info (ids)
 | |
|     handler.doctypeDecl(rootName, ids.publicId, ids.systemId);
 | |
| 
 | |
|     // Internal subset is parsed first, if present
 | |
|     skipWhitespace();
 | |
|     if (tryRead('['))
 | |
|       {
 | |
| 
 | |
|         // loop until the subset ends
 | |
|         while (true)
 | |
|           {
 | |
|             doReport = expandPE = true;
 | |
|             skipWhitespace();
 | |
|             doReport = expandPE = false;
 | |
|             if (tryRead(']'))
 | |
|               {
 | |
|                 break;     // end of subset
 | |
|               }
 | |
|             else
 | |
|               {
 | |
|                 // WFC, PEs in internal subset (only between decls)
 | |
|                 peIsError = expandPE = true;
 | |
|                 parseMarkupdecl();
 | |
|                 peIsError = expandPE = false;
 | |
|               }
 | |
|           }
 | |
|       }
 | |
|     skipWhitespace();
 | |
|     require('>');
 | |
| 
 | |
|     // Read the external subset, if any
 | |
|     InputSource subset;
 | |
| 
 | |
|     if (ids.systemId == null)
 | |
|       {
 | |
|         subset = handler.getExternalSubset(rootName,
 | |
|                                            handler.getSystemId());
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         subset = null;
 | |
|       }
 | |
|     if (ids.systemId != null || subset != null)
 | |
|       {
 | |
|         pushString(null, ">");
 | |
| 
 | |
|         // NOTE:  [dtd] is so we say what SAX2 expects,
 | |
|         // though it's misleading (subset, not entire dtd)
 | |
|         if (ids.systemId != null)
 | |
|           {
 | |
|             pushURL(true, "[dtd]", ids, null, null, null, true);
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             handler.warn("modifying document by adding external subset");
 | |
|             pushURL(true, "[dtd]",
 | |
|                     new ExternalIdentifiers(subset.getPublicId(),
 | |
|                                             subset.getSystemId(),
 | |
|                                             null),
 | |
|                     subset.getCharacterStream(),
 | |
|                     subset.getByteStream(),
 | |
|                     subset.getEncoding(),
 | |
|                     false);
 | |
|           }
 | |
| 
 | |
|         // Loop until we end up back at '>'
 | |
|         while (true)
 | |
|           {
 | |
|             doReport = expandPE = true;
 | |
|             skipWhitespace();
 | |
|             doReport = expandPE = false;
 | |
|             if (tryRead('>'))
 | |
|               {
 | |
|                 break;
 | |
|               }
 | |
|             else
 | |
|               {
 | |
|                 expandPE = true;
 | |
|                 parseMarkupdecl();
 | |
|                 expandPE = false;
 | |
|               }
 | |
|           }
 | |
| 
 | |
|         // the ">" string isn't popped yet
 | |
|         if (inputStack.size() != 1)
 | |
|           {
 | |
|             error("external subset has unmatched '>'");
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // done dtd
 | |
|     handler.endDoctype();
 | |
|     expandPE = false;
 | |
|     doReport = true;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse a markup declaration in the internal or external DTD subset.
 | |
|    * <pre>
 | |
|    * [29] markupdecl ::= elementdecl | Attlistdecl | EntityDecl
 | |
|    *    | NotationDecl | PI | Comment
 | |
|    * [30] extSubsetDecl ::= (markupdecl | conditionalSect
 | |
|    *    | PEReference | S) *
 | |
|    * </pre>
 | |
|    * <p> Reading toplevel PE references is handled as a lexical issue
 | |
|    * by the caller, as is whitespace.
 | |
|    */
 | |
|   private void parseMarkupdecl()
 | |
|     throws Exception
 | |
|   {
 | |
|     char[] saved = null;
 | |
|     boolean savedPE = expandPE;
 | |
| 
 | |
|     // prevent "<%foo;" and ensures saved entity is right
 | |
|     require('<');
 | |
|     unread('<');
 | |
|     expandPE = false;
 | |
| 
 | |
|     if (tryRead("<!ELEMENT"))
 | |
|       {
 | |
|         saved = readBuffer;
 | |
|         expandPE = savedPE;
 | |
|         parseElementDecl();
 | |
|       }
 | |
|     else if (tryRead("<!ATTLIST"))
 | |
|       {
 | |
|         saved = readBuffer;
 | |
|         expandPE = savedPE;
 | |
|         parseAttlistDecl();
 | |
|       }
 | |
|     else if (tryRead("<!ENTITY"))
 | |
|       {
 | |
|         saved = readBuffer;
 | |
|         expandPE = savedPE;
 | |
|         parseEntityDecl();
 | |
|       }
 | |
|     else if (tryRead("<!NOTATION"))
 | |
|       {
 | |
|         saved = readBuffer;
 | |
|         expandPE = savedPE;
 | |
|         parseNotationDecl();
 | |
|       }
 | |
|     else if (tryRead(startDelimPI))
 | |
|       {
 | |
|         saved = readBuffer;
 | |
|         expandPE = savedPE;
 | |
|         parsePI();
 | |
|       }
 | |
|     else if (tryRead(startDelimComment))
 | |
|       {
 | |
|         saved = readBuffer;
 | |
|         expandPE = savedPE;
 | |
|         parseComment();
 | |
|       }
 | |
|     else if (tryRead("<!["))
 | |
|       {
 | |
|         saved = readBuffer;
 | |
|         expandPE = savedPE;
 | |
|         if (inputStack.size() > 0)
 | |
|           {
 | |
|             parseConditionalSect(saved);
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             error("conditional sections illegal in internal subset");
 | |
|           }
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         error("expected markup declaration");
 | |
|       }
 | |
| 
 | |
|     // VC: Proper Decl/PE Nesting
 | |
|     if (readBuffer != saved)
 | |
|       {
 | |
|         handler.verror("Illegal Declaration/PE nesting");
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse an element, with its tags.
 | |
|    * <pre>
 | |
|    * [39] element ::= EmptyElementTag | STag content ETag
 | |
|    * [40] STag ::= '<' Name (S Attribute)* S? '>'
 | |
|    * [44] EmptyElementTag ::= '<' Name (S Attribute)* S? '/>'
 | |
|    * </pre>
 | |
|    * <p> (The '<' has already been read.)
 | |
|    * <p>NOTE: this method actually chains onto parseContent (), if necessary,
 | |
|    * and parseContent () will take care of calling parseETag ().
 | |
|    */
 | |
|   private void parseElement(boolean maybeGetSubset)
 | |
|     throws Exception
 | |
|   {
 | |
|     String gi;
 | |
|     char c;
 | |
|     int oldElementContent = currentElementContent;
 | |
|     String oldElement = currentElement;
 | |
|     ElementDecl element;
 | |
| 
 | |
|     // This is the (global) counter for the
 | |
|     // array of specified attributes.
 | |
|     tagAttributePos = 0;
 | |
| 
 | |
|     // Read the element type name.
 | |
|     gi = readNmtoken(true);
 | |
| 
 | |
|     // If we saw no DTD, and this is the document root element,
 | |
|     // let the application modify the input stream by providing one.
 | |
|     if (maybeGetSubset)
 | |
|       {
 | |
|         InputSource subset = handler.getExternalSubset(gi,
 | |
|                                                        handler.getSystemId());
 | |
|         if (subset != null)
 | |
|           {
 | |
|             String publicId = subset.getPublicId();
 | |
|             String systemId = subset.getSystemId();
 | |
| 
 | |
|             handler.warn("modifying document by adding DTD");
 | |
|             handler.doctypeDecl(gi, publicId, systemId);
 | |
|             pushString(null, ">");
 | |
| 
 | |
|             // NOTE:  [dtd] is so we say what SAX2 expects,
 | |
|             // though it's misleading (subset, not entire dtd)
 | |
|             pushURL(true, "[dtd]",
 | |
|                     new ExternalIdentifiers(publicId, systemId, null),
 | |
|                     subset.getCharacterStream(),
 | |
|                     subset.getByteStream(),
 | |
|                     subset.getEncoding(),
 | |
|                     false);
 | |
| 
 | |
|             // Loop until we end up back at '>'
 | |
|             while (true)
 | |
|               {
 | |
|                 doReport = expandPE = true;
 | |
|                 skipWhitespace();
 | |
|                 doReport = expandPE = false;
 | |
|                 if (tryRead('>'))
 | |
|                   {
 | |
|                     break;
 | |
|                   }
 | |
|                 else
 | |
|                   {
 | |
|                     expandPE = true;
 | |
|                     parseMarkupdecl();
 | |
|                     expandPE = false;
 | |
|                   }
 | |
|               }
 | |
| 
 | |
|             // the ">" string isn't popped yet
 | |
|             if (inputStack.size() != 1)
 | |
|               {
 | |
|                 error("external subset has unmatched '>'");
 | |
|               }
 | |
| 
 | |
|             handler.endDoctype();
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // Determine the current content type.
 | |
|     currentElement = gi;
 | |
|     element = (ElementDecl) elementInfo.get(gi);
 | |
|     currentElementContent = getContentType(element, CONTENT_ANY);
 | |
| 
 | |
|     // Read the attributes, if any.
 | |
|     // After this loop, "c" is the closing delimiter.
 | |
|     boolean white = tryWhitespace();
 | |
|     c = readCh();
 | |
|     while (c != '/' && c != '>')
 | |
|       {
 | |
|         unread(c);
 | |
|         if (!white)
 | |
|           {
 | |
|             error("need whitespace between attributes");
 | |
|           }
 | |
|         parseAttribute(gi);
 | |
|         white = tryWhitespace();
 | |
|         c = readCh();
 | |
|       }
 | |
| 
 | |
|     // Supply any defaulted attributes.
 | |
|     Iterator atts = declaredAttributes(element);
 | |
|     if (atts != null)
 | |
|       {
 | |
|         String aname;
 | |
| loop:
 | |
|         while (atts.hasNext())
 | |
|           {
 | |
|             aname = (String) atts.next();
 | |
|             // See if it was specified.
 | |
|             for (int i = 0; i < tagAttributePos; i++)
 | |
|               {
 | |
|                 if (tagAttributes[i] == aname)
 | |
|                   {
 | |
|                     continue loop;
 | |
|                   }
 | |
|               }
 | |
|             // ... or has a default
 | |
|             String value = getAttributeDefaultValue(gi, aname);
 | |
| 
 | |
|             if (value == null)
 | |
|               {
 | |
|                 continue;
 | |
|               }
 | |
|             handler.attribute(aname, value, false);
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // Figure out if this is a start tag
 | |
|     // or an empty element, and dispatch an
 | |
|     // event accordingly.
 | |
|     switch (c)
 | |
|       {
 | |
|       case '>':
 | |
|         handler.startElement(gi);
 | |
|         parseContent();
 | |
|         break;
 | |
|       case '/':
 | |
|         require('>');
 | |
|         handler.startElement(gi);
 | |
|         handler.endElement(gi);
 | |
|         break;
 | |
|       }
 | |
| 
 | |
|     // Restore the previous state.
 | |
|     currentElement = oldElement;
 | |
|     currentElementContent = oldElementContent;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse an attribute assignment.
 | |
|    * <pre>
 | |
|    * [41] Attribute ::= Name Eq AttValue
 | |
|    * </pre>
 | |
|    * @param name The name of the attribute's element.
 | |
|    * @see SAXDriver#attribute
 | |
|    */
 | |
|   private void parseAttribute(String name)
 | |
|     throws Exception
 | |
|   {
 | |
|     String aname;
 | |
|     String type;
 | |
|     String value;
 | |
|     int flags = LIT_ATTRIBUTE |  LIT_ENTITY_REF;
 | |
| 
 | |
|     // Read the attribute name.
 | |
|     aname = readNmtoken(true);
 | |
|     type = getAttributeType(name, aname);
 | |
| 
 | |
|     // Parse '='
 | |
|     parseEq();
 | |
| 
 | |
|     // Read the value, normalizing whitespace
 | |
|     // unless it is CDATA.
 | |
|     if (handler.stringInterning)
 | |
|       {
 | |
|         if (type == "CDATA" || type == null)
 | |
|           {
 | |
|             value = readLiteral(flags);
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             value = readLiteral(flags | LIT_NORMALIZE);
 | |
|           }
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         if (type == null || type.equals("CDATA"))
 | |
|           {
 | |
|             value = readLiteral(flags);
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             value = readLiteral(flags | LIT_NORMALIZE);
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // WFC: no duplicate attributes
 | |
|     for (int i = 0; i < tagAttributePos; i++)
 | |
|       {
 | |
|         if (aname.equals(tagAttributes [i]))
 | |
|           {
 | |
|             error("duplicate attribute", aname, null);
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // Inform the handler about the
 | |
|     // attribute.
 | |
|     handler.attribute(aname, value, true);
 | |
|     dataBufferPos = 0;
 | |
| 
 | |
|     // Note that the attribute has been
 | |
|     // specified.
 | |
|     if (tagAttributePos == tagAttributes.length)
 | |
|       {
 | |
|         String newAttrib[] = new String[tagAttributes.length * 2];
 | |
|         System.arraycopy(tagAttributes, 0, newAttrib, 0, tagAttributePos);
 | |
|         tagAttributes = newAttrib;
 | |
|       }
 | |
|     tagAttributes[tagAttributePos++] = aname;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse an equals sign surrounded by optional whitespace.
 | |
|    * <pre>
 | |
|    * [25] Eq ::= S? '=' S?
 | |
|    * </pre>
 | |
|    */
 | |
|   private void parseEq()
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     skipWhitespace();
 | |
|     require('=');
 | |
|     skipWhitespace();
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse an end tag.
 | |
|    * <pre>
 | |
|    * [42] ETag ::= '</' Name S? '>'
 | |
|    * </pre>
 | |
|    * <p>NOTE: parseContent () chains to here, we already read the
 | |
|    * "</".
 | |
|    */
 | |
|   private void parseETag()
 | |
|     throws Exception
 | |
|   {
 | |
|     require(currentElement);
 | |
|     skipWhitespace();
 | |
|     require('>');
 | |
|     handler.endElement(currentElement);
 | |
|     // not re-reporting any SAXException re bogus end tags,
 | |
|     // even though that diagnostic might be clearer ...
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse the content of an element.
 | |
|    * <pre>
 | |
|    * [43] content ::= (element | CharData | Reference
 | |
|    *    | CDSect | PI | Comment)*
 | |
|    * [67] Reference ::= EntityRef | CharRef
 | |
|    * </pre>
 | |
|    * <p> NOTE: consumes ETtag.
 | |
|    */
 | |
|   private void parseContent()
 | |
|     throws Exception
 | |
|   {
 | |
|     char c;
 | |
| 
 | |
|     while (true)
 | |
|       {
 | |
|         // consume characters (or ignorable whitspace) until delimiter
 | |
|         parseCharData();
 | |
| 
 | |
|         // Handle delimiters
 | |
|         c = readCh();
 | |
|         switch (c)
 | |
|           {
 | |
|           case '&':       // Found "&"
 | |
|             c = readCh();
 | |
|             if (c == '#')
 | |
|               {
 | |
|                 parseCharRef();
 | |
|               }
 | |
|             else
 | |
|               {
 | |
|                 unread(c);
 | |
|                 parseEntityRef(true);
 | |
|               }
 | |
|             isDirtyCurrentElement = true;
 | |
|             break;
 | |
| 
 | |
|           case '<':       // Found "<"
 | |
|             dataBufferFlush();
 | |
|             c = readCh();
 | |
|             switch (c)
 | |
|               {
 | |
|               case '!':       // Found "<!"
 | |
|                 c = readCh();
 | |
|                 switch (c)
 | |
|                   {
 | |
|                   case '-':     // Found "<!-"
 | |
|                     require('-');
 | |
|                     isDirtyCurrentElement = false;
 | |
|                     parseComment();
 | |
|                     break;
 | |
|                   case '[':     // Found "<!["
 | |
|                     isDirtyCurrentElement = false;
 | |
|                     require("CDATA[");
 | |
|                     handler.startCDATA();
 | |
|                     inCDATA = true;
 | |
|                     parseCDSect();
 | |
|                     inCDATA = false;
 | |
|                     handler.endCDATA();
 | |
|                     break;
 | |
|                   default:
 | |
|                     error("expected comment or CDATA section", c, null);
 | |
|                     break;
 | |
|                   }
 | |
|                 break;
 | |
| 
 | |
|               case '?':     // Found "<?"
 | |
|                 isDirtyCurrentElement = false;
 | |
|                 parsePI();
 | |
|                 break;
 | |
| 
 | |
|               case '/':     // Found "</"
 | |
|                 isDirtyCurrentElement = false;
 | |
|                 parseETag();
 | |
|                 return;
 | |
| 
 | |
|               default:     // Found "<" followed by something else
 | |
|                 isDirtyCurrentElement = false;
 | |
|                 unread(c);
 | |
|                 parseElement(false);
 | |
|                 break;
 | |
|               }
 | |
|           }
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse an element type declaration.
 | |
|    * <pre>
 | |
|    * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
 | |
|    * </pre>
 | |
|    * <p> NOTE: the '<!ELEMENT' has already been read.
 | |
|    */
 | |
|   private void parseElementDecl()
 | |
|     throws Exception
 | |
|   {
 | |
|     String name;
 | |
| 
 | |
|     requireWhitespace();
 | |
|     // Read the element type name.
 | |
|     name = readNmtoken(true);
 | |
| 
 | |
|     requireWhitespace();
 | |
|     // Read the content model.
 | |
|     parseContentspec(name);
 | |
| 
 | |
|     skipWhitespace();
 | |
|     require('>');
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Content specification.
 | |
|    * <pre>
 | |
|    * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | elements
 | |
|    * </pre>
 | |
|    */
 | |
|   private void parseContentspec(String name)
 | |
|     throws Exception
 | |
|   {
 | |
|     // FIXME: move elementDecl() into setElement(), pass EMTPY/ANY ...
 | |
|     if (tryRead("EMPTY"))
 | |
|       {
 | |
|         setElement(name, CONTENT_EMPTY, null, null);
 | |
|         if (!skippedPE)
 | |
|           {
 | |
|             handler.getDeclHandler().elementDecl(name, "EMPTY");
 | |
|           }
 | |
|         return;
 | |
|       }
 | |
|     else if (tryRead("ANY"))
 | |
|       {
 | |
|         setElement(name, CONTENT_ANY, null, null);
 | |
|         if (!skippedPE)
 | |
|           {
 | |
|             handler.getDeclHandler().elementDecl(name, "ANY");
 | |
|           }
 | |
|         return;
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         String model;
 | |
|         char[] saved;
 | |
| 
 | |
|         require('(');
 | |
|         saved = readBuffer;
 | |
|         dataBufferAppend('(');
 | |
|         skipWhitespace();
 | |
|         if (tryRead("#PCDATA"))
 | |
|           {
 | |
|             dataBufferAppend("#PCDATA");
 | |
|             parseMixed(saved);
 | |
|             model = dataBufferToString();
 | |
|             setElement(name, CONTENT_MIXED, model, null);
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             parseElements(saved);
 | |
|             model = dataBufferToString();
 | |
|             setElement(name, CONTENT_ELEMENTS, model, null);
 | |
|           }
 | |
|         if (!skippedPE)
 | |
|           {
 | |
|             handler.getDeclHandler().elementDecl(name, model);
 | |
|           }
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse an element-content model.
 | |
|    * <pre>
 | |
|    * [47] elements ::= (choice | seq) ('?' | '*' | '+')?
 | |
|    * [49] choice ::= '(' S? cp (S? '|' S? cp)+ S? ')'
 | |
|    * [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')'
 | |
|    * </pre>
 | |
|    *
 | |
|    * <p> NOTE: the opening '(' and S have already been read.
 | |
|    *
 | |
|    * @param saved Buffer for entity that should have the terminal ')'
 | |
|    */
 | |
|   private void parseElements(char[] saved)
 | |
|     throws Exception
 | |
|   {
 | |
|     char c;
 | |
|     char sep;
 | |
| 
 | |
|     // Parse the first content particle
 | |
|     skipWhitespace();
 | |
|     parseCp();
 | |
| 
 | |
|     // Check for end or for a separator.
 | |
|     skipWhitespace();
 | |
|     c = readCh();
 | |
|     switch (c)
 | |
|       {
 | |
|       case ')':
 | |
|         // VC: Proper Group/PE Nesting
 | |
|         if (readBuffer != saved)
 | |
|           {
 | |
|             handler.verror("Illegal Group/PE nesting");
 | |
|           }
 | |
| 
 | |
|         dataBufferAppend(')');
 | |
|         c = readCh();
 | |
|         switch (c)
 | |
|           {
 | |
|           case '*':
 | |
|           case '+':
 | |
|           case '?':
 | |
|             dataBufferAppend(c);
 | |
|             break;
 | |
|           default:
 | |
|             unread(c);
 | |
|           }
 | |
|         return;
 | |
|       case ',':       // Register the separator.
 | |
|       case '|':
 | |
|         sep = c;
 | |
|         dataBufferAppend(c);
 | |
|         break;
 | |
|       default:
 | |
|         error("bad separator in content model", c, null);
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|     // Parse the rest of the content model.
 | |
|     while (true)
 | |
|       {
 | |
|         skipWhitespace();
 | |
|         parseCp();
 | |
|         skipWhitespace();
 | |
|         c = readCh();
 | |
|         if (c == ')')
 | |
|           {
 | |
|             // VC: Proper Group/PE Nesting
 | |
|             if (readBuffer != saved)
 | |
|               {
 | |
|                 handler.verror("Illegal Group/PE nesting");
 | |
|               }
 | |
| 
 | |
|             dataBufferAppend(')');
 | |
|             break;
 | |
|           }
 | |
|         else if (c != sep)
 | |
|           {
 | |
|             error("bad separator in content model", c, null);
 | |
|             return;
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             dataBufferAppend(c);
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // Check for the occurrence indicator.
 | |
|     c = readCh();
 | |
|     switch (c)
 | |
|       {
 | |
|       case '?':
 | |
|       case '*':
 | |
|       case '+':
 | |
|         dataBufferAppend(c);
 | |
|         return;
 | |
|       default:
 | |
|         unread(c);
 | |
|         return;
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse a content particle.
 | |
|    * <pre>
 | |
|    * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
 | |
|    * </pre>
 | |
|    */
 | |
|   private void parseCp()
 | |
|     throws Exception
 | |
|   {
 | |
|     if (tryRead('('))
 | |
|       {
 | |
|         dataBufferAppend('(');
 | |
|         parseElements(readBuffer);
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         dataBufferAppend(readNmtoken(true));
 | |
|         char c = readCh();
 | |
|         switch (c)
 | |
|           {
 | |
|           case '?':
 | |
|           case '*':
 | |
|           case '+':
 | |
|             dataBufferAppend(c);
 | |
|             break;
 | |
|           default:
 | |
|             unread(c);
 | |
|             break;
 | |
|           }
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse mixed content.
 | |
|    * <pre>
 | |
|    * [51] Mixed ::= '(' S? ( '#PCDATA' (S? '|' S? Name)*) S? ')*'
 | |
|    *        | '(' S? ('#PCDATA') S? ')'
 | |
|    * </pre>
 | |
|    *
 | |
|    * @param saved Buffer for entity that should have the terminal ')'
 | |
|    */
 | |
|   private void parseMixed(char[] saved)
 | |
|     throws Exception
 | |
|   {
 | |
|     // Check for PCDATA alone.
 | |
|     skipWhitespace();
 | |
|     if (tryRead(')'))
 | |
|       {
 | |
|         // VC: Proper Group/PE Nesting
 | |
|         if (readBuffer != saved)
 | |
|           {
 | |
|             handler.verror("Illegal Group/PE nesting");
 | |
|           }
 | |
| 
 | |
|         dataBufferAppend(")*");
 | |
|         tryRead('*');
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|     // Parse mixed content.
 | |
|     skipWhitespace();
 | |
|     while (!tryRead(")"))
 | |
|       {
 | |
|         require('|');
 | |
|         dataBufferAppend('|');
 | |
|         skipWhitespace();
 | |
|         dataBufferAppend(readNmtoken(true));
 | |
|         skipWhitespace();
 | |
|       }
 | |
| 
 | |
|     // VC: Proper Group/PE Nesting
 | |
|     if (readBuffer != saved)
 | |
|       {
 | |
|         handler.verror("Illegal Group/PE nesting");
 | |
|       }
 | |
| 
 | |
|     require('*');
 | |
|     dataBufferAppend(")*");
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse an attribute list declaration.
 | |
|    * <pre>
 | |
|    * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
 | |
|    * </pre>
 | |
|    * <p>NOTE: the '<!ATTLIST' has already been read.
 | |
|    */
 | |
|   private void parseAttlistDecl()
 | |
|     throws Exception
 | |
|   {
 | |
|     String elementName;
 | |
| 
 | |
|     requireWhitespace();
 | |
|     elementName = readNmtoken(true);
 | |
|     boolean white = tryWhitespace();
 | |
|     while (!tryRead('>'))
 | |
|       {
 | |
|         if (!white)
 | |
|           {
 | |
|             error("whitespace required before attribute definition");
 | |
|           }
 | |
|         parseAttDef(elementName);
 | |
|         white = tryWhitespace();
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse a single attribute definition.
 | |
|    * <pre>
 | |
|    * [53] AttDef ::= S Name S AttType S DefaultDecl
 | |
|    * </pre>
 | |
|    */
 | |
|   private void parseAttDef(String elementName)
 | |
|     throws Exception
 | |
|   {
 | |
|     String name;
 | |
|     String type;
 | |
|     String enumer = null;
 | |
| 
 | |
|     // Read the attribute name.
 | |
|     name = readNmtoken(true);
 | |
| 
 | |
|     // Read the attribute type.
 | |
|     requireWhitespace();
 | |
|     type = readAttType();
 | |
| 
 | |
|     // Get the string of enumerated values if necessary.
 | |
|     if (handler.stringInterning)
 | |
|       {
 | |
|         if ("ENUMERATION" == type || "NOTATION" == type)
 | |
|           {
 | |
|             enumer = dataBufferToString();
 | |
|           }
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         if ("ENUMERATION".equals(type) || "NOTATION".equals(type))
 | |
|           {
 | |
|             enumer = dataBufferToString();
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // Read the default value.
 | |
|     requireWhitespace();
 | |
|     parseDefault(elementName, name, type, enumer);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse the attribute type.
 | |
|    * <pre>
 | |
|    * [54] AttType ::= StringType | TokenizedType | EnumeratedType
 | |
|    * [55] StringType ::= 'CDATA'
 | |
|    * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY'
 | |
|    *    | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
 | |
|    * [57] EnumeratedType ::= NotationType | Enumeration
 | |
|    * </pre>
 | |
|    */
 | |
|   private String readAttType()
 | |
|     throws Exception
 | |
|   {
 | |
|     if (tryRead('('))
 | |
|       {
 | |
|         parseEnumeration(false);
 | |
|         return "ENUMERATION";
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         String typeString = readNmtoken(true);
 | |
|         if (handler.stringInterning)
 | |
|           {
 | |
|             if ("NOTATION" == typeString)
 | |
|               {
 | |
|                 parseNotationType();
 | |
|                 return typeString;
 | |
|               }
 | |
|             else if ("CDATA" == typeString
 | |
|                      || "ID" == typeString
 | |
|                      || "IDREF" == typeString
 | |
|                      || "IDREFS" == typeString
 | |
|                      || "ENTITY" == typeString
 | |
|                      || "ENTITIES" == typeString
 | |
|                      || "NMTOKEN" == typeString
 | |
|                      || "NMTOKENS" == typeString)
 | |
|               {
 | |
|                 return typeString;
 | |
|               }
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             if ("NOTATION".equals(typeString))
 | |
|               {
 | |
|                 parseNotationType();
 | |
|                 return typeString;
 | |
|               }
 | |
|             else if ("CDATA".equals(typeString)
 | |
|                      || "ID".equals(typeString)
 | |
|                      || "IDREF".equals(typeString)
 | |
|                      || "IDREFS".equals(typeString)
 | |
|                      || "ENTITY".equals(typeString)
 | |
|                      || "ENTITIES".equals(typeString)
 | |
|                      || "NMTOKEN".equals(typeString)
 | |
|                      || "NMTOKENS".equals(typeString))
 | |
|               {
 | |
|                 return typeString;
 | |
|               }
 | |
|           }
 | |
|         error("illegal attribute type", typeString, null);
 | |
|         return null;
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse an enumeration.
 | |
|    * <pre>
 | |
|    * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
 | |
|    * </pre>
 | |
|    * <p>NOTE: the '(' has already been read.
 | |
|    */
 | |
|   private void parseEnumeration(boolean isNames)
 | |
|     throws Exception
 | |
|   {
 | |
|     dataBufferAppend('(');
 | |
| 
 | |
|     // Read the first token.
 | |
|     skipWhitespace();
 | |
|     dataBufferAppend(readNmtoken(isNames));
 | |
|     // Read the remaining tokens.
 | |
|     skipWhitespace();
 | |
|     while (!tryRead(')'))
 | |
|       {
 | |
|         require('|');
 | |
|         dataBufferAppend('|');
 | |
|         skipWhitespace();
 | |
|         dataBufferAppend(readNmtoken (isNames));
 | |
|         skipWhitespace();
 | |
|       }
 | |
|     dataBufferAppend(')');
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse a notation type for an attribute.
 | |
|    * <pre>
 | |
|    * [58] NotationType ::= 'NOTATION' S '(' S? NameNtoks
 | |
|    *    (S? '|' S? name)* S? ')'
 | |
|    * </pre>
 | |
|    * <p>NOTE: the 'NOTATION' has already been read
 | |
|    */
 | |
|   private void parseNotationType()
 | |
|     throws Exception
 | |
|   {
 | |
|     requireWhitespace();
 | |
|     require('(');
 | |
| 
 | |
|     parseEnumeration(true);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse the default value for an attribute.
 | |
|    * <pre>
 | |
|    * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
 | |
|    *    | (('#FIXED' S)? AttValue)
 | |
|    * </pre>
 | |
|    */
 | |
|   private void parseDefault(String elementName, String name,
 | |
|                             String type, String enumer)
 | |
|     throws Exception
 | |
|   {
 | |
|     int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
 | |
|     String value = null;
 | |
|     int flags = LIT_ATTRIBUTE;
 | |
|     boolean saved = expandPE;
 | |
|     String defaultType = null;
 | |
| 
 | |
|     // LIT_ATTRIBUTE forces '<' checks now (ASAP) and turns whitespace
 | |
|     // chars to spaces (doesn't matter when that's done if it doesn't
 | |
|     // interfere with char refs expanding to whitespace).
 | |
| 
 | |
|     if (!skippedPE)
 | |
|       {
 | |
|         flags |= LIT_ENTITY_REF;
 | |
|         if (handler.stringInterning)
 | |
|           {
 | |
|             if ("CDATA" != type)
 | |
|               {
 | |
|                 flags |= LIT_NORMALIZE;
 | |
|               }
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             if (!"CDATA".equals(type))
 | |
|               {
 | |
|                 flags |= LIT_NORMALIZE;
 | |
|               }
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     expandPE = false;
 | |
|     if (tryRead('#'))
 | |
|       {
 | |
|         if (tryRead("FIXED"))
 | |
|           {
 | |
|             defaultType = "#FIXED";
 | |
|             valueType = ATTRIBUTE_DEFAULT_FIXED;
 | |
|             requireWhitespace();
 | |
|             value = readLiteral(flags);
 | |
|           }
 | |
|         else if (tryRead("REQUIRED"))
 | |
|           {
 | |
|             defaultType = "#REQUIRED";
 | |
|             valueType = ATTRIBUTE_DEFAULT_REQUIRED;
 | |
|           }
 | |
|         else if (tryRead("IMPLIED"))
 | |
|           {
 | |
|             defaultType = "#IMPLIED";
 | |
|             valueType = ATTRIBUTE_DEFAULT_IMPLIED;
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             error("illegal keyword for attribute default value");
 | |
|           }
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         value = readLiteral(flags);
 | |
|       }
 | |
|     expandPE = saved;
 | |
|     setAttribute(elementName, name, type, enumer, value, valueType);
 | |
|     if (handler.stringInterning)
 | |
|       {
 | |
|         if ("ENUMERATION" == type)
 | |
|           {
 | |
|             type = enumer;
 | |
|           }
 | |
|         else if ("NOTATION" == type)
 | |
|           {
 | |
|             type = "NOTATION " + enumer;
 | |
|           }
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         if ("ENUMERATION".equals(type))
 | |
|           {
 | |
|             type = enumer;
 | |
|           }
 | |
|         else if ("NOTATION".equals(type))
 | |
|           {
 | |
|             type = "NOTATION " + enumer;
 | |
|           }
 | |
|       }
 | |
|     if (!skippedPE)
 | |
|       {
 | |
|         handler.getDeclHandler().attributeDecl(elementName, name, type,
 | |
|                                                defaultType, value);
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse a conditional section.
 | |
|    * <pre>
 | |
|    * [61] conditionalSect ::= includeSect || ignoreSect
 | |
|    * [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
 | |
|    *    extSubsetDecl ']]>'
 | |
|    * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
 | |
|    *    ignoreSectContents* ']]>'
 | |
|    * [64] ignoreSectContents ::= Ignore
 | |
|    *    ('<![' ignoreSectContents* ']]>' Ignore )*
 | |
|    * [65] Ignore ::= Char* - (Char* ( '<![' | ']]>') Char* )
 | |
|    * </pre>
 | |
|    * <p> NOTE: the '>![' has already been read.
 | |
|    */
 | |
|   private void parseConditionalSect(char[] saved)
 | |
|     throws Exception
 | |
|   {
 | |
|     skipWhitespace();
 | |
|     if (tryRead("INCLUDE"))
 | |
|       {
 | |
|         skipWhitespace();
 | |
|         require('[');
 | |
|         // VC: Proper Conditional Section/PE Nesting
 | |
|         if (readBuffer != saved)
 | |
|           {
 | |
|             handler.verror("Illegal Conditional Section/PE nesting");
 | |
|           }
 | |
|         skipWhitespace();
 | |
|         while (!tryRead("]]>"))
 | |
|           {
 | |
|             parseMarkupdecl();
 | |
|             skipWhitespace();
 | |
|           }
 | |
|       }
 | |
|     else if (tryRead("IGNORE"))
 | |
|       {
 | |
|         skipWhitespace();
 | |
|         require('[');
 | |
|         // VC: Proper Conditional Section/PE Nesting
 | |
|         if (readBuffer != saved)
 | |
|           {
 | |
|             handler.verror("Illegal Conditional Section/PE nesting");
 | |
|           }
 | |
|         int nesting = 1;
 | |
|         char c;
 | |
|         expandPE = false;
 | |
|         for (int nest = 1; nest > 0; )
 | |
|           {
 | |
|             c = readCh();
 | |
|             switch (c)
 | |
|               {
 | |
|               case '<':
 | |
|                 if (tryRead("!["))
 | |
|                   {
 | |
|                     nest++;
 | |
|                   }
 | |
|                 break;
 | |
|               case ']':
 | |
|                 if (tryRead("]>"))
 | |
|                   {
 | |
|                     nest--;
 | |
|                   }
 | |
|               }
 | |
|           }
 | |
|         expandPE = true;
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         error("conditional section must begin with INCLUDE or IGNORE");
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   private void parseCharRef()
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     parseCharRef(true /* do flushDataBuffer by default */);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Try to read a character reference without consuming data from buffer.
 | |
|    * <pre>
 | |
|    * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
 | |
|    * </pre>
 | |
|    * <p>NOTE: the '&#' has already been read.
 | |
|    */
 | |
|   private void tryReadCharRef()
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     int value = 0;
 | |
|     char c;
 | |
| 
 | |
|     if (tryRead('x'))
 | |
|       {
 | |
| loop1:
 | |
|         while (true)
 | |
|           {
 | |
|             c = readCh();
 | |
|             if (c == ';')
 | |
|               {
 | |
|                 break loop1;
 | |
|               }
 | |
|             else
 | |
|               {
 | |
|                 int n = Character.digit(c, 16);
 | |
|                 if (n == -1)
 | |
|                   {
 | |
|                     error("illegal character in character reference", c, null);
 | |
|                     break loop1;
 | |
|                   }
 | |
|                 value *= 16;
 | |
|                 value += n;
 | |
|               }
 | |
|           }
 | |
|       }
 | |
|     else
 | |
|       {
 | |
| loop2:
 | |
|         while (true)
 | |
|           {
 | |
|             c = readCh();
 | |
|             if (c == ';')
 | |
|               {
 | |
|                 break loop2;
 | |
|               }
 | |
|             else
 | |
|               {
 | |
|                 int n = Character.digit(c, 10);
 | |
|                 if (n == -1)
 | |
|                   {
 | |
|                     error("illegal character in character reference", c, null);
 | |
|                     break loop2;
 | |
|                   }
 | |
|                 value *= 10;
 | |
|                 value += n;
 | |
|               }
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // check for character refs being legal XML
 | |
|     if ((value < 0x0020
 | |
|          && ! (value == '\n' || value == '\t' || value == '\r'))
 | |
|         || (value >= 0xD800 && value <= 0xDFFF)
 | |
|         || value == 0xFFFE || value == 0xFFFF
 | |
|         || value > 0x0010ffff)
 | |
|       {
 | |
|         error("illegal XML character reference U+"
 | |
|               + Integer.toHexString(value));
 | |
|       }
 | |
| 
 | |
|     // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz
 | |
|     //  (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz:
 | |
|     if (value > 0x0010ffff)
 | |
|       {
 | |
|         // too big for surrogate
 | |
|         error("character reference " + value + " is too large for UTF-16",
 | |
|               Integer.toString(value), null);
 | |
|       }
 | |
| 
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Read and interpret a character reference.
 | |
|    * <pre>
 | |
|    * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
 | |
|    * </pre>
 | |
|    * <p>NOTE: the '&#' has already been read.
 | |
|    */
 | |
|   private void parseCharRef(boolean doFlush)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     int value = 0;
 | |
|     char c;
 | |
| 
 | |
|     if (tryRead('x'))
 | |
|       {
 | |
| loop1:
 | |
|         while (true)
 | |
|           {
 | |
|             c = readCh();
 | |
|             if (c == ';')
 | |
|               {
 | |
|                 break loop1;
 | |
|               }
 | |
|             else
 | |
|               {
 | |
|                 int n = Character.digit(c, 16);
 | |
|                 if (n == -1)
 | |
|                   {
 | |
|                     error("illegal character in character reference", c, null);
 | |
|                     break loop1;
 | |
|                   }
 | |
|                 value *= 16;
 | |
|                 value += n;
 | |
|               }
 | |
|           }
 | |
|       }
 | |
|     else
 | |
|       {
 | |
| loop2:
 | |
|         while (true)
 | |
|           {
 | |
|             c = readCh();
 | |
|             if (c == ';')
 | |
|               {
 | |
|                 break loop2;
 | |
|               }
 | |
|             else
 | |
|               {
 | |
|                 int n = Character.digit(c, 10);
 | |
|                 if (n == -1)
 | |
|                   {
 | |
|                     error("illegal character in character reference", c, null);
 | |
|                     break loop2;
 | |
|                   }
 | |
|                 value *= 10;
 | |
|                 value += c - '0';
 | |
|               }
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // check for character refs being legal XML
 | |
|     if ((value < 0x0020
 | |
|          && ! (value == '\n' || value == '\t' || value == '\r'))
 | |
|         || (value >= 0xD800 && value <= 0xDFFF)
 | |
|         || value == 0xFFFE || value == 0xFFFF
 | |
|         || value > 0x0010ffff)
 | |
|       {
 | |
|         error("illegal XML character reference U+"
 | |
|               + Integer.toHexString(value));
 | |
|       }
 | |
| 
 | |
|     // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz
 | |
|     //  (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz:
 | |
|     if (value <= 0x0000ffff)
 | |
|       {
 | |
|         // no surrogates needed
 | |
|         dataBufferAppend((char) value);
 | |
|       }
 | |
|     else if (value <= 0x0010ffff)
 | |
|       {
 | |
|         value -= 0x10000;
 | |
|         // > 16 bits, surrogate needed
 | |
|         dataBufferAppend((char) (0xd800 | (value >> 10)));
 | |
|         dataBufferAppend((char) (0xdc00 | (value & 0x0003ff)));
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         // too big for surrogate
 | |
|         error("character reference " + value + " is too large for UTF-16",
 | |
|               Integer.toString(value), null);
 | |
|       }
 | |
|     if (doFlush)
 | |
|       {
 | |
|         dataBufferFlush();
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse and expand an entity reference.
 | |
|    * <pre>
 | |
|    * [68] EntityRef ::= '&' Name ';'
 | |
|    * </pre>
 | |
|    * <p>NOTE: the '&' has already been read.
 | |
|    * @param externalAllowed External entities are allowed here.
 | |
|    */
 | |
|   private void parseEntityRef(boolean externalAllowed)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     String name;
 | |
| 
 | |
|     name = readNmtoken(true);
 | |
|     require(';');
 | |
|     switch (getEntityType(name))
 | |
|       {
 | |
|       case ENTITY_UNDECLARED:
 | |
|         // NOTE:  XML REC describes amazingly convoluted handling for
 | |
|         // this case.  Nothing as meaningful as being a WFness error
 | |
|         // unless the processor might _legitimately_ not have seen a
 | |
|         // declaration ... which is what this implements.
 | |
|         String message;
 | |
| 
 | |
|         message = "reference to undeclared general entity " + name;
 | |
|         if (skippedPE && !docIsStandalone)
 | |
|           {
 | |
|             handler.verror(message);
 | |
|             // we don't know this entity, and it might be external...
 | |
|             if (externalAllowed)
 | |
|               {
 | |
|                 handler.skippedEntity(name);
 | |
|               }
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             error(message);
 | |
|           }
 | |
|         break;
 | |
|       case ENTITY_INTERNAL:
 | |
|           pushString(name, getEntityValue(name));
 | |
| 
 | |
|           //workaround for possible input pop before marking
 | |
|           //the buffer reading position
 | |
|           char t = readCh();
 | |
|           unread(t);
 | |
|           int bufferPosMark = readBufferPos;
 | |
| 
 | |
|           int end = readBufferPos + getEntityValue(name).length();
 | |
|           for (int k = readBufferPos; k < end; k++)
 | |
|             {
 | |
|               t = readCh();
 | |
|               if (t == '&')
 | |
|                 {
 | |
|                   t = readCh();
 | |
|                   if (t  == '#')
 | |
|                     {
 | |
|                       //try to match a character ref
 | |
|                       tryReadCharRef();
 | |
| 
 | |
|                       //everything has been read
 | |
|                       if (readBufferPos >= end)
 | |
|                         {
 | |
|                           break;
 | |
|                         }
 | |
|                       k = readBufferPos;
 | |
|                       continue;
 | |
|                     }
 | |
|                   else if (Character.isLetter(t))
 | |
|                     {
 | |
|                       //looks like an entity ref
 | |
|                       unread(t);
 | |
|                       readNmtoken(true);
 | |
|                       require(';');
 | |
| 
 | |
|                       //everything has been read
 | |
|                       if (readBufferPos >= end)
 | |
|                         {
 | |
|                           break;
 | |
|                         }
 | |
|                       k = readBufferPos;
 | |
|                       continue;
 | |
|                     }
 | |
|                   error(" malformed entity reference");
 | |
|                 }
 | |
| 
 | |
|             }
 | |
|           readBufferPos = bufferPosMark;
 | |
|           break;
 | |
|       case ENTITY_TEXT:
 | |
|           if (externalAllowed)
 | |
|             {
 | |
|               pushURL(false, name, getEntityIds(name),
 | |
|                       null, null, null, true);
 | |
|             }
 | |
|           else
 | |
|             {
 | |
|               error("reference to external entity in attribute value.",
 | |
|                     name, null);
 | |
|             }
 | |
|           break;
 | |
|       case ENTITY_NDATA:
 | |
|           if (externalAllowed)
 | |
|             {
 | |
|               error("unparsed entity reference in content", name, null);
 | |
|             }
 | |
|           else
 | |
|             {
 | |
|               error("reference to external entity in attribute value.",
 | |
|                     name, null);
 | |
|             }
 | |
|           break;
 | |
|       default:
 | |
|           throw new RuntimeException();
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse and expand a parameter entity reference.
 | |
|    * <pre>
 | |
|    * [69] PEReference ::= '%' Name ';'
 | |
|    * </pre>
 | |
|    * <p>NOTE: the '%' has already been read.
 | |
|    */
 | |
|   private void parsePEReference()
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     String name;
 | |
| 
 | |
|     name = "%" + readNmtoken(true);
 | |
|     require(';');
 | |
|     switch (getEntityType(name))
 | |
|       {
 | |
|       case ENTITY_UNDECLARED:
 | |
|         // VC: Entity Declared
 | |
|         handler.verror("reference to undeclared parameter entity " + name);
 | |
| 
 | |
|         // we should disable handling of all subsequent declarations
 | |
|         // unless this is a standalone document (info discarded)
 | |
|         break;
 | |
|       case ENTITY_INTERNAL:
 | |
|         if (inLiteral)
 | |
|           {
 | |
|             pushString(name, getEntityValue(name));
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             pushString(name, ' ' + getEntityValue(name) + ' ');
 | |
|           }
 | |
|         break;
 | |
|       case ENTITY_TEXT:
 | |
|         if (!inLiteral)
 | |
|           {
 | |
|             pushString(null, " ");
 | |
|           }
 | |
|         pushURL(true, name, getEntityIds(name), null, null, null, true);
 | |
|         if (!inLiteral)
 | |
|           {
 | |
|             pushString(null, " ");
 | |
|           }
 | |
|         break;
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse an entity declaration.
 | |
|    * <pre>
 | |
|    * [70] EntityDecl ::= GEDecl | PEDecl
 | |
|    * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
 | |
|    * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
 | |
|    * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
 | |
|    * [74] PEDef ::= EntityValue | ExternalID
 | |
|    * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
 | |
|    *       | 'PUBLIC' S PubidLiteral S SystemLiteral
 | |
|    * [76] NDataDecl ::= S 'NDATA' S Name
 | |
|    * </pre>
 | |
|    * <p>NOTE: the '<!ENTITY' has already been read.
 | |
|    */
 | |
|   private void parseEntityDecl()
 | |
|     throws Exception
 | |
|   {
 | |
|     boolean peFlag = false;
 | |
|     int flags = 0;
 | |
| 
 | |
|     // Check for a parameter entity.
 | |
|     expandPE = false;
 | |
|     requireWhitespace();
 | |
|     if (tryRead('%'))
 | |
|       {
 | |
|         peFlag = true;
 | |
|         requireWhitespace();
 | |
|       }
 | |
|     expandPE = true;
 | |
| 
 | |
|     // Read the entity name, and prepend
 | |
|     // '%' if necessary.
 | |
|     String name = readNmtoken(true);
 | |
|     //NE08
 | |
|     if (name.indexOf(':') >= 0)
 | |
|       {
 | |
|         error("Illegal character(':') in entity name ", name, null);
 | |
|       }
 | |
|     if (peFlag)
 | |
|       {
 | |
|         name = "%" + name;
 | |
|       }
 | |
| 
 | |
|     // Read the entity value.
 | |
|     requireWhitespace();
 | |
|     char c = readCh();
 | |
|     unread (c);
 | |
|     if (c == '"' || c == '\'')
 | |
|       {
 | |
|         // Internal entity ... replacement text has expanded refs
 | |
|         // to characters and PEs, but not to general entities
 | |
|         String value = readLiteral(flags);
 | |
|         setInternalEntity(name, value);
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         // Read the external IDs
 | |
|         ExternalIdentifiers ids = readExternalIds(false, false);
 | |
| 
 | |
|         // Check for NDATA declaration.
 | |
|         boolean white = tryWhitespace();
 | |
|         if (!peFlag && tryRead("NDATA"))
 | |
|           {
 | |
|             if (!white)
 | |
|               {
 | |
|                 error("whitespace required before NDATA");
 | |
|               }
 | |
|             requireWhitespace();
 | |
|             String notationName = readNmtoken(true);
 | |
|             if (!skippedPE)
 | |
|               {
 | |
|                 setExternalEntity(name, ENTITY_NDATA, ids, notationName);
 | |
|                 handler.unparsedEntityDecl(name, ids.publicId, ids.systemId,
 | |
|                                            ids.baseUri, notationName);
 | |
|               }
 | |
|           }
 | |
|         else if (!skippedPE)
 | |
|           {
 | |
|             setExternalEntity(name, ENTITY_TEXT, ids, null);
 | |
|             handler.getDeclHandler()
 | |
|               .externalEntityDecl(name, ids.publicId,
 | |
|                                    handler.resolveURIs()
 | |
|                                    // FIXME: ASSUMES not skipped
 | |
|                                    // "false" forces error on bad URI
 | |
|                                    ? handler.absolutize(ids.baseUri,
 | |
|                                                         ids.systemId,
 | |
|                                                         false)
 | |
|                                    : ids.systemId);
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // Finish the declaration.
 | |
|     skipWhitespace();
 | |
|     require('>');
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse a notation declaration.
 | |
|    * <pre>
 | |
|    * [82] NotationDecl ::= '<!NOTATION' S Name S
 | |
|    *    (ExternalID | PublicID) S? '>'
 | |
|    * [83] PublicID ::= 'PUBLIC' S PubidLiteral
 | |
|    * </pre>
 | |
|    * <P>NOTE: the '<!NOTATION' has already been read.
 | |
|    */
 | |
|   private void parseNotationDecl()
 | |
|     throws Exception
 | |
|   {
 | |
|     String nname;
 | |
|     ExternalIdentifiers ids;
 | |
| 
 | |
|     requireWhitespace();
 | |
|     nname = readNmtoken(true);
 | |
|     //NE08
 | |
|     if (nname.indexOf(':') >= 0)
 | |
|       {
 | |
|         error("Illegal character(':') in notation name ", nname, null);
 | |
|       }
 | |
|     requireWhitespace();
 | |
| 
 | |
|     // Read the external identifiers.
 | |
|     ids = readExternalIds(true, false);
 | |
| 
 | |
|     // Register the notation.
 | |
|     setNotation(nname, ids);
 | |
| 
 | |
|     skipWhitespace();
 | |
|     require('>');
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parse character data.
 | |
|    * <pre>
 | |
|    * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
 | |
|    * </pre>
 | |
|    */
 | |
|   private void parseCharData()
 | |
|     throws Exception
 | |
|   {
 | |
|     char c;
 | |
|     int state = 0;
 | |
|     boolean pureWhite = false;
 | |
| 
 | |
|     // assert (dataBufferPos == 0);
 | |
| 
 | |
|     // are we expecting pure whitespace?  it might be dirty...
 | |
|     if ((currentElementContent == CONTENT_ELEMENTS) && !isDirtyCurrentElement)
 | |
|       {
 | |
|         pureWhite = true;
 | |
|       }
 | |
| 
 | |
|     // always report right out of readBuffer
 | |
|     // to minimize (pointless) buffer copies
 | |
|     while (true)
 | |
|       {
 | |
|         int lineAugment = 0;
 | |
|         int columnAugment = 0;
 | |
|         int i;
 | |
| 
 | |
| loop:
 | |
|         for (i = readBufferPos; i < readBufferLength; i++)
 | |
|           {
 | |
|             switch (c = readBuffer[i])
 | |
|               {
 | |
|               case '\n':
 | |
|                 lineAugment++;
 | |
|                 columnAugment = 0;
 | |
|                 // pureWhite unmodified
 | |
|                 break;
 | |
|               case '\r':  // should not happen!!
 | |
|               case '\t':
 | |
|               case ' ':
 | |
|                 // pureWhite unmodified
 | |
|                 columnAugment++;
 | |
|                 break;
 | |
|               case '&':
 | |
|               case '<':
 | |
|                 columnAugment++;
 | |
|                 // pureWhite unmodified
 | |
|                 // CLEAN end of text sequence
 | |
|                 state = 1;
 | |
|                 break loop;
 | |
|               case ']':
 | |
|                 // that's not a whitespace char, and
 | |
|                 // can not terminate pure whitespace either
 | |
|                 pureWhite = false;
 | |
|                 if ((i + 2) < readBufferLength)
 | |
|                   {
 | |
|                     if (readBuffer [i + 1] == ']'
 | |
|                         && readBuffer [i + 2] == '>')
 | |
|                       {
 | |
|                         // ERROR end of text sequence
 | |
|                         state = 2;
 | |
|                         break loop;
 | |
|                       }
 | |
|                   }
 | |
|                 else
 | |
|                   {
 | |
|                     // FIXME missing two end-of-buffer cases
 | |
|                   }
 | |
|                 columnAugment++;
 | |
|                 break;
 | |
|               default:
 | |
|                 if ((c < 0x0020 || c > 0xFFFD)
 | |
|                     || ((c >= 0x007f) && (c <= 0x009f) && (c != 0x0085)
 | |
|                         && xmlVersion == XML_11))
 | |
|                   {
 | |
|                     error("illegal XML character U+"
 | |
|                           + Integer.toHexString(c));
 | |
|                   }
 | |
|                 // that's not a whitespace char
 | |
|                 pureWhite = false;
 | |
|                 columnAugment++;
 | |
|               }
 | |
|           }
 | |
| 
 | |
|         // report text thus far
 | |
|         if (lineAugment > 0)
 | |
|           {
 | |
|             line += lineAugment;
 | |
|             column = columnAugment;
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             column += columnAugment;
 | |
|           }
 | |
| 
 | |
|         // report characters/whitspace
 | |
|         int length = i - readBufferPos;
 | |
| 
 | |
|         if (length != 0)
 | |
|           {
 | |
|             if (pureWhite)
 | |
|               {
 | |
|                 handler.ignorableWhitespace(readBuffer,
 | |
|                                             readBufferPos, length);
 | |
|               }
 | |
|             else
 | |
|               {
 | |
|                 handler.charData(readBuffer, readBufferPos, length);
 | |
|               }
 | |
|             readBufferPos = i;
 | |
|           }
 | |
| 
 | |
|         if (state != 0)
 | |
|           {
 | |
|             break;
 | |
|           }
 | |
| 
 | |
|         // fill next buffer from this entity, or
 | |
|         // pop stack and continue with previous entity
 | |
|         unread(readCh());
 | |
|       }
 | |
|     if (!pureWhite)
 | |
|       {
 | |
|         isDirtyCurrentElement = true;
 | |
|       }
 | |
|     // finish, maybe with error
 | |
|     if (state != 1)  // finish, no error
 | |
|       {
 | |
|         error("character data may not contain ']]>'");
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
|   // High-level reading and scanning methods.
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
| 
 | |
|   /**
 | |
|    * Require whitespace characters.
 | |
|    */
 | |
|   private void requireWhitespace()
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     char c = readCh();
 | |
|     if (isWhitespace(c))
 | |
|       {
 | |
|         skipWhitespace();
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         error("whitespace required", c, null);
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Skip whitespace characters.
 | |
|    * <pre>
 | |
|    * [3] S ::= (#x20 | #x9 | #xd | #xa)+
 | |
|    * </pre>
 | |
|    */
 | |
|   private void skipWhitespace()
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     // Start with a little cheat.  Most of
 | |
|     // the time, the white space will fall
 | |
|     // within the current read buffer; if
 | |
|     // not, then fall through.
 | |
|     if (USE_CHEATS)
 | |
|       {
 | |
|         int lineAugment = 0;
 | |
|         int columnAugment = 0;
 | |
| 
 | |
| loop:
 | |
|         for (int i = readBufferPos; i < readBufferLength; i++)
 | |
|           {
 | |
|             switch (readBuffer[i])
 | |
|               {
 | |
|               case ' ':
 | |
|               case '\t':
 | |
|               case '\r':
 | |
|                 columnAugment++;
 | |
|                 break;
 | |
|               case '\n':
 | |
|                 lineAugment++;
 | |
|                 columnAugment = 0;
 | |
|                 break;
 | |
|               case '%':
 | |
|                 if (expandPE)
 | |
|                   {
 | |
|                     break loop;
 | |
|                   }
 | |
|                 // else fall through...
 | |
|               default:
 | |
|                 readBufferPos = i;
 | |
|                 if (lineAugment > 0)
 | |
|                   {
 | |
|                     line += lineAugment;
 | |
|                     column = columnAugment;
 | |
|                   }
 | |
|                 else
 | |
|                   {
 | |
|                     column += columnAugment;
 | |
|                   }
 | |
|                 return;
 | |
|               }
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // OK, do it the slow way.
 | |
|     char c = readCh ();
 | |
|     while (isWhitespace(c))
 | |
|       {
 | |
|         c = readCh();
 | |
|       }
 | |
|     unread(c);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Read a name or (when parsing an enumeration) name token.
 | |
|    * <pre>
 | |
|    * [5] Name ::= (Letter | '_' | ':') (NameChar)*
 | |
|    * [7] Nmtoken ::= (NameChar)+
 | |
|    * </pre>
 | |
|    */
 | |
|   private String readNmtoken(boolean isName)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     char c;
 | |
| 
 | |
|     if (USE_CHEATS)
 | |
|       {
 | |
| loop:
 | |
|         for (int i = readBufferPos; i < readBufferLength; i++)
 | |
|           {
 | |
|             c = readBuffer[i];
 | |
|             switch (c)
 | |
|               {
 | |
|               case '%':
 | |
|                 if (expandPE)
 | |
|                   {
 | |
|                     break loop;
 | |
|                   }
 | |
|                 // else fall through...
 | |
| 
 | |
|                 // What may legitimately come AFTER a name/nmtoken?
 | |
|               case '<': case '>': case '&':
 | |
|               case ',': case '|': case '*': case '+': case '?':
 | |
|               case ')':
 | |
|               case '=':
 | |
|               case '\'': case '"':
 | |
|               case '[':
 | |
|               case ' ': case '\t': case '\r': case '\n':
 | |
|               case ';':
 | |
|               case '/':
 | |
|                 int start = readBufferPos;
 | |
|                 if (i == start)
 | |
|                   {
 | |
|                     error("name expected", readBuffer[i], null);
 | |
|                   }
 | |
|                 readBufferPos = i;
 | |
|                 return intern(readBuffer, start, i - start);
 | |
| 
 | |
|               default:
 | |
|                 // FIXME ... per IBM's OASIS test submission, these:
 | |
|                 //   ?    U+06dd
 | |
|                 //   Combining  U+309B
 | |
|                 //these switches are kind of ugly but at least we won't
 | |
|                 //have to go over the whole lits for each char
 | |
|                 if (isName && i == readBufferPos)
 | |
|                   {
 | |
|                     char c2 = (char) (c & 0x00f0);
 | |
|                     switch (c & 0xff00)
 | |
|                       {
 | |
|                         //starting with 01
 | |
|                       case 0x0100:
 | |
|                         switch (c2)
 | |
|                           {
 | |
|                           case 0x0030:
 | |
|                             if (c == 0x0132 || c == 0x0133 || c == 0x013f)
 | |
|                               {
 | |
|                                 error("Not a name start character, U+"
 | |
|                                       + Integer.toHexString(c));
 | |
|                               }
 | |
|                             break;
 | |
|                           case 0x0040:
 | |
|                             if (c == 0x0140 || c == 0x0149)
 | |
|                               {
 | |
|                                 error("Not a name start character, U+"
 | |
|                                       + Integer.toHexString(c));
 | |
|                               }
 | |
|                             break;
 | |
|                           case 0x00c0:
 | |
|                             if (c == 0x01c4 || c == 0x01cc)
 | |
|                               {
 | |
|                                 error("Not a name start character, U+"
 | |
|                                       + Integer.toHexString(c));
 | |
|                               }
 | |
|                             break;
 | |
|                           case 0x00f0:
 | |
|                             if (c == 0x01f1 || c == 0x01f3)
 | |
|                               {
 | |
|                                 error("Not a name start character, U+"
 | |
|                                       + Integer.toHexString(c));
 | |
|                               }
 | |
|                             break;
 | |
|                           case 0x00b0:
 | |
|                             if (c == 0x01f1 || c == 0x01f3)
 | |
|                               {
 | |
|                                 error("Not a name start character, U+"
 | |
|                                       + Integer.toHexString(c));
 | |
|                               }
 | |
|                             break;
 | |
|                           default:
 | |
|                             if (c == 0x017f)
 | |
|                               {
 | |
|                                 error("Not a name start character, U+"
 | |
|                                       + Integer.toHexString(c));
 | |
|                               }
 | |
|                           }
 | |
| 
 | |
|                         break;
 | |
|                         //starting with 11
 | |
|                       case 0x1100:
 | |
|                         switch (c2)
 | |
|                           {
 | |
|                           case 0x0000:
 | |
|                             if (c == 0x1104 || c == 0x1108 ||
 | |
|                                 c == 0x110a || c == 0x110d)
 | |
|                               {
 | |
|                                 error("Not a name start character, U+"
 | |
|                                       + Integer.toHexString(c));
 | |
|                               }
 | |
|                             break;
 | |
|                           case 0x0030:
 | |
|                             if (c == 0x113b || c == 0x113f)
 | |
|                               {
 | |
|                                 error("Not a name start character, U+"
 | |
|                                       + Integer.toHexString(c));
 | |
|                               }
 | |
|                             break;
 | |
|                           case 0x0040:
 | |
|                             if (c == 0x1141 || c == 0x114d
 | |
|                                 || c == 0x114f )
 | |
|                               {
 | |
|                                 error("Not a name start character, U+"
 | |
|                                       + Integer.toHexString(c));
 | |
|                               }
 | |
|                             break;
 | |
|                           case 0x0050:
 | |
|                             if (c == 0x1151 || c == 0x1156)
 | |
|                               {
 | |
|                                 error("Not a name start character, U+"
 | |
|                                       + Integer.toHexString(c));
 | |
|                               }
 | |
|                             break;
 | |
|                           case 0x0060:
 | |
|                             if (c == 0x1162 || c == 0x1164
 | |
|                                 || c == 0x1166 || c == 0x116b
 | |
|                                 || c == 0x116f)
 | |
|                               {
 | |
|                                 error("Not a name start character, U+"
 | |
|                                       + Integer.toHexString(c));
 | |
|                               }
 | |
|                             break;
 | |
|                           case 0x00b0:
 | |
|                             if (c == 0x11b6 || c == 0x11b9
 | |
|                                 || c == 0x11bb || c == 0x116f)
 | |
|                               {
 | |
|                                 error("Not a name start character, U+"
 | |
|                                       + Integer.toHexString(c));
 | |
|                               }
 | |
|                             break;
 | |
|                           default:
 | |
|                             if (c == 0x1174 || c == 0x119f
 | |
|                                 || c == 0x11ac || c == 0x11c3
 | |
|                                 || c == 0x11f1)
 | |
|                               {
 | |
|                                 error("Not a name start character, U+"
 | |
|                                       + Integer.toHexString(c));
 | |
|                               }
 | |
|                           }
 | |
|                         break;
 | |
|                       default:
 | |
|                         if (c == 0x0e46 || c == 0x1011
 | |
|                             || c == 0x212f || c == 0x0587
 | |
|                             || c == 0x0230 )
 | |
|                           {
 | |
|                             error("Not a name start character, U+"
 | |
|                                   + Integer.toHexString(c));
 | |
|                           }
 | |
|                       }
 | |
|                   }
 | |
|                 // punt on exact tests from Appendix A; approximate
 | |
|                 // them using the Unicode ID start/part rules
 | |
|                 if (i == readBufferPos && isName)
 | |
|                   {
 | |
|                     if (!Character.isUnicodeIdentifierStart(c)
 | |
|                         && c != ':' && c != '_')
 | |
|                       {
 | |
|                         error("Not a name start character, U+"
 | |
|                               + Integer.toHexString(c));
 | |
|                       }
 | |
|                   }
 | |
|                 else if (!Character.isUnicodeIdentifierPart(c)
 | |
|                          && c != '-' && c != ':' && c != '_' && c != '.'
 | |
|                          && !isExtender(c))
 | |
|                   {
 | |
|                     error("Not a name character, U+"
 | |
|                           + Integer.toHexString(c));
 | |
|                   }
 | |
|               }
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     nameBufferPos = 0;
 | |
| 
 | |
|     // Read the first character.
 | |
|     while (true)
 | |
|       {
 | |
|         c = readCh();
 | |
|         switch (c)
 | |
|           {
 | |
|           case '%':
 | |
|           case '<': case '>': case '&':
 | |
|           case ',': case '|': case '*': case '+': case '?':
 | |
|           case ')':
 | |
|           case '=':
 | |
|           case '\'': case '"':
 | |
|           case '[':
 | |
|           case ' ': case '\t': case '\n': case '\r':
 | |
|           case ';':
 | |
|           case '/':
 | |
|             unread(c);
 | |
|             if (nameBufferPos == 0)
 | |
|               {
 | |
|                 error ("name expected");
 | |
|               }
 | |
|             // punt on exact tests from Appendix A, but approximate them
 | |
|             if (isName
 | |
|                 && !Character.isUnicodeIdentifierStart(nameBuffer[0])
 | |
|                 && ":_".indexOf(nameBuffer[0]) == -1)
 | |
|               {
 | |
|                 error("Not a name start character, U+"
 | |
|                       + Integer.toHexString(nameBuffer[0]));
 | |
|               }
 | |
|             String s = intern(nameBuffer, 0, nameBufferPos);
 | |
|             nameBufferPos = 0;
 | |
|             return s;
 | |
|           default:
 | |
|             // punt on exact tests from Appendix A, but approximate them
 | |
| 
 | |
|             if ((nameBufferPos != 0 || !isName)
 | |
|                 && !Character.isUnicodeIdentifierPart(c)
 | |
|                 && ":-_.".indexOf(c) == -1
 | |
|                 && !isExtender(c))
 | |
|               {
 | |
|                 error("Not a name character, U+"
 | |
|                       + Integer.toHexString(c));
 | |
|               }
 | |
|             if (nameBufferPos >= nameBuffer.length)
 | |
|               {
 | |
|                 nameBuffer =
 | |
|                   (char[]) extendArray(nameBuffer,
 | |
|                                        nameBuffer.length, nameBufferPos);
 | |
|               }
 | |
|             nameBuffer[nameBufferPos++] = c;
 | |
|           }
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   private static boolean isExtender(char c)
 | |
|   {
 | |
|     // [88] Extender ::= ...
 | |
|     return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387
 | |
|       || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005
 | |
|       || (c >= 0x3031 && c <= 0x3035)
 | |
|       || (c >= 0x309d && c <= 0x309e)
 | |
|       || (c >= 0x30fc && c <= 0x30fe);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Read a literal.  With matching single or double quotes as
 | |
|    * delimiters (and not embedded!) this is used to parse:
 | |
|    * <pre>
 | |
|    *  [9] EntityValue ::= ... ([^%&] | PEReference | Reference)* ...
 | |
|    *  [10] AttValue ::= ... ([^<&] | Reference)* ...
 | |
|    *  [11] SystemLiteral ::= ... (URLchar - "'")* ...
 | |
|    *  [12] PubidLiteral ::= ... (PubidChar - "'")* ...
 | |
|    * </pre>
 | |
|    * as well as the quoted strings in XML and text declarations
 | |
|    * (for version, encoding, and standalone) which have their
 | |
|    * own constraints.
 | |
|    */
 | |
|   private String readLiteral(int flags)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     char delim, c;
 | |
|     int startLine = line;
 | |
|     boolean saved = expandPE;
 | |
|     boolean savedReport = doReport;
 | |
| 
 | |
|     // Find the first delimiter.
 | |
|     delim = readCh();
 | |
|     if (delim != '"' && delim != '\'')
 | |
|       {
 | |
|         error("expected '\"' or \"'\"", delim, null);
 | |
|         return null;
 | |
|       }
 | |
|     inLiteral = true;
 | |
|     if ((flags & LIT_DISABLE_PE) != 0)
 | |
|       {
 | |
|         expandPE = false;
 | |
|       }
 | |
|     doReport = false;
 | |
| 
 | |
|     // Each level of input source has its own buffer; remember
 | |
|     // ours, so we won't read the ending delimiter from any
 | |
|     // other input source, regardless of entity processing.
 | |
|     char[] ourBuf = readBuffer;
 | |
| 
 | |
|     // Read the literal.
 | |
|     try
 | |
|       {
 | |
|         c = readCh();
 | |
|         boolean ampRead = false;
 | |
| loop:
 | |
|         while (! (c == delim && readBuffer == ourBuf))
 | |
|           {
 | |
|             switch (c)
 | |
|               {
 | |
|                 // attributes and public ids are normalized
 | |
|                 // in almost the same ways
 | |
|               case '\n':
 | |
|               case '\r':
 | |
|                 if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
 | |
|                   {
 | |
|                     c = ' ';
 | |
|                   }
 | |
|                 break;
 | |
|               case '\t':
 | |
|                 if ((flags & LIT_ATTRIBUTE) != 0)
 | |
|                   {
 | |
|                     c = ' ';
 | |
|                   }
 | |
|                 break;
 | |
|               case '&':
 | |
|                 c = readCh();
 | |
|                 // Char refs are expanded immediately, except for
 | |
|                 // all the cases where it's deferred.
 | |
|                 if (c == '#')
 | |
|                   {
 | |
|                     if ((flags & LIT_DISABLE_CREF) != 0)
 | |
|                       {
 | |
|                         dataBufferAppend('&');
 | |
|                         break;
 | |
|                       }
 | |
|                     parseCharRef(false /* Do not do flushDataBuffer */);
 | |
| 
 | |
|                     // exotic WFness risk: this is an entity literal,
 | |
|                     // dataBuffer [dataBufferPos - 1] == '&', and
 | |
|                     // following chars are a _partial_ entity/char ref
 | |
| 
 | |
|                     // It looks like an entity ref ...
 | |
|                   }
 | |
|                 else
 | |
|                   {
 | |
|                     unread(c);
 | |
|                     // Expand it?
 | |
|                     if ((flags & LIT_ENTITY_REF) > 0)
 | |
|                       {
 | |
|                         parseEntityRef(false);
 | |
|                         if (String.valueOf(readBuffer).equals("&"))
 | |
|                           {
 | |
|                             ampRead = true;
 | |
|                           }
 | |
|                         //Is it just data?
 | |
|                       }
 | |
|                     else if ((flags & LIT_DISABLE_EREF) != 0)
 | |
|                       {
 | |
|                         dataBufferAppend('&');
 | |
| 
 | |
|                         // OK, it will be an entity ref -- expanded later.
 | |
|                       }
 | |
|                     else
 | |
|                       {
 | |
|                         String name = readNmtoken(true);
 | |
|                         require(';');
 | |
|                         dataBufferAppend('&');
 | |
|                         dataBufferAppend(name);
 | |
|                         dataBufferAppend(';');
 | |
|                       }
 | |
|                   }
 | |
|                 c = readCh();
 | |
|                 continue loop;
 | |
| 
 | |
|               case '<':
 | |
|                 // and why?  Perhaps so "&foo;" expands the same
 | |
|                 // inside and outside an attribute?
 | |
|                 if ((flags & LIT_ATTRIBUTE) != 0)
 | |
|                   {
 | |
|                     error("attribute values may not contain '<'");
 | |
|                   }
 | |
|                 break;
 | |
| 
 | |
|                 // We don't worry about case '%' and PE refs, readCh does.
 | |
| 
 | |
|               default:
 | |
|                 break;
 | |
|               }
 | |
|             dataBufferAppend(c);
 | |
|             c = readCh();
 | |
|           }
 | |
|       }
 | |
|     catch (EOFException e)
 | |
|       {
 | |
|         error("end of input while looking for delimiter (started on line "
 | |
|               + startLine + ')', null, Character.toString(delim));
 | |
|       }
 | |
|     inLiteral = false;
 | |
|     expandPE = saved;
 | |
|     doReport = savedReport;
 | |
| 
 | |
|     // Normalise whitespace if necessary.
 | |
|     if ((flags & LIT_NORMALIZE) > 0)
 | |
|       {
 | |
|         dataBufferNormalize();
 | |
|       }
 | |
| 
 | |
|     // Return the value.
 | |
|     return dataBufferToString();
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Try reading external identifiers.
 | |
|    * A system identifier is not required for notations.
 | |
|    * @param inNotation Are we parsing a notation decl?
 | |
|    * @param isSubset Parsing external subset decl (may be omitted)?
 | |
|    * @return A three-member String array containing the identifiers,
 | |
|    *  or nulls. Order: public, system, baseURI.
 | |
|    */
 | |
|   private ExternalIdentifiers readExternalIds(boolean inNotation,
 | |
|                                               boolean isSubset)
 | |
|     throws Exception
 | |
|   {
 | |
|     char c;
 | |
|     ExternalIdentifiers ids = new ExternalIdentifiers();
 | |
|     int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
 | |
| 
 | |
|     if (tryRead("PUBLIC"))
 | |
|       {
 | |
|         requireWhitespace();
 | |
|         ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags);
 | |
|         if (inNotation)
 | |
|           {
 | |
|             skipWhitespace();
 | |
|             c = readCh();
 | |
|             unread(c);
 | |
|             if (c == '"' || c == '\'')
 | |
|               {
 | |
|                 ids.systemId = readLiteral(flags);
 | |
|               }
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             requireWhitespace();
 | |
|             ids.systemId = readLiteral(flags);
 | |
|           }
 | |
| 
 | |
|         for (int i = 0; i < ids.publicId.length(); i++)
 | |
|           {
 | |
|             c = ids.publicId.charAt(i);
 | |
|             if (c >= 'a' && c <= 'z')
 | |
|               {
 | |
|                 continue;
 | |
|               }
 | |
|             if (c >= 'A' && c <= 'Z')
 | |
|               {
 | |
|                 continue;
 | |
|               }
 | |
|             if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(c) != -1)
 | |
|               {
 | |
|                 continue;
 | |
|               }
 | |
|             error("illegal PUBLIC id character U+"
 | |
|                   + Integer.toHexString(c));
 | |
|           }
 | |
|       }
 | |
|     else if (tryRead("SYSTEM"))
 | |
|       {
 | |
|         requireWhitespace();
 | |
|         ids.systemId = readLiteral(flags);
 | |
|       }
 | |
|     else if (!isSubset)
 | |
|       {
 | |
|         error("missing SYSTEM or PUBLIC keyword");
 | |
|       }
 | |
| 
 | |
|     if (ids.systemId != null)
 | |
|       {
 | |
|         if (ids.systemId.indexOf('#') != -1)
 | |
|           {
 | |
|             handler.verror("SYSTEM id has a URI fragment: " + ids.systemId);
 | |
|           }
 | |
|         ids.baseUri = handler.getSystemId();
 | |
|         if (ids.baseUri == null && uriWarnings)
 | |
|           {
 | |
|             handler.warn("No base URI; hope URI is absolute: "
 | |
|                          + ids.systemId);
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     return ids;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Test if a character is whitespace.
 | |
|    * <pre>
 | |
|    * [3] S ::= (#x20 | #x9 | #xd | #xa)+
 | |
|    * </pre>
 | |
|    * @param c The character to test.
 | |
|    * @return true if the character is whitespace.
 | |
|    */
 | |
|   private final boolean isWhitespace(char c)
 | |
|   {
 | |
|     if (c > 0x20)
 | |
|       {
 | |
|         return false;
 | |
|       }
 | |
|     if (c == 0x20 || c == 0x0a || c == 0x09 || c == 0x0d)
 | |
|       {
 | |
|         return true;
 | |
|       }
 | |
|     return false;  // illegal ...
 | |
|   }
 | |
| 
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
|   // Utility routines.
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
| 
 | |
|   /**
 | |
|    * Add a character to the data buffer.
 | |
|    */
 | |
|   private void dataBufferAppend(char c)
 | |
|   {
 | |
|     // Expand buffer if necessary.
 | |
|     if (dataBufferPos >= dataBuffer.length)
 | |
|       {
 | |
|         dataBuffer = (char[]) extendArray(dataBuffer,
 | |
|                                           dataBuffer.length, dataBufferPos);
 | |
|       }
 | |
|     dataBuffer[dataBufferPos++] = c;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Add a string to the data buffer.
 | |
|    */
 | |
|   private void dataBufferAppend(String s)
 | |
|   {
 | |
|     dataBufferAppend(s.toCharArray(), 0, s.length());
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Append (part of) a character array to the data buffer.
 | |
|    */
 | |
|   private void dataBufferAppend(char[] ch, int start, int length)
 | |
|   {
 | |
|     dataBuffer = (char[]) extendArray(dataBuffer, dataBuffer.length,
 | |
|                                       dataBufferPos + length);
 | |
| 
 | |
|     System.arraycopy(ch, start, dataBuffer, dataBufferPos, length);
 | |
|     dataBufferPos += length;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Normalise space characters in the data buffer.
 | |
|    */
 | |
|   private void dataBufferNormalize()
 | |
|   {
 | |
|     int i = 0;
 | |
|     int j = 0;
 | |
|     int end = dataBufferPos;
 | |
| 
 | |
|     // Skip spaces at the start.
 | |
|     while (j < end && dataBuffer[j] == ' ')
 | |
|       {
 | |
|         j++;
 | |
|       }
 | |
| 
 | |
|     // Skip whitespace at the end.
 | |
|     while (end > j && dataBuffer[end - 1] == ' ')
 | |
|       {
 | |
|         end --;
 | |
|       }
 | |
| 
 | |
|     // Start copying to the left.
 | |
|     while (j < end)
 | |
|       {
 | |
| 
 | |
|         char c = dataBuffer[j++];
 | |
| 
 | |
|         // Normalise all other spaces to
 | |
|         // a single space.
 | |
|         if (c == ' ')
 | |
|           {
 | |
|             while (j < end && dataBuffer[j++] == ' ')
 | |
|               {
 | |
|                 continue;
 | |
|               }
 | |
|             dataBuffer[i++] = ' ';
 | |
|             dataBuffer[i++] = dataBuffer[j - 1];
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             dataBuffer[i++] = c;
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // The new length is <= the old one.
 | |
|     dataBufferPos = i;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Convert the data buffer to a string.
 | |
|    */
 | |
|   private String dataBufferToString()
 | |
|   {
 | |
|     String s = new String(dataBuffer, 0, dataBufferPos);
 | |
|     dataBufferPos = 0;
 | |
|     return s;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Flush the contents of the data buffer to the handler, as
 | |
|    * appropriate, and reset the buffer for new input.
 | |
|    */
 | |
|   private void dataBufferFlush()
 | |
|     throws SAXException
 | |
|   {
 | |
|     if (currentElementContent == CONTENT_ELEMENTS
 | |
|         && dataBufferPos > 0
 | |
|         && !inCDATA)
 | |
|       {
 | |
|         // We can't just trust the buffer to be whitespace, there
 | |
|         // are (error) cases when it isn't
 | |
|         for (int i = 0; i < dataBufferPos; i++)
 | |
|           {
 | |
|             if (!isWhitespace(dataBuffer[i]))
 | |
|               {
 | |
|                 handler.charData(dataBuffer, 0, dataBufferPos);
 | |
|                 dataBufferPos = 0;
 | |
|               }
 | |
|           }
 | |
|         if (dataBufferPos > 0)
 | |
|           {
 | |
|             handler.ignorableWhitespace(dataBuffer, 0, dataBufferPos);
 | |
|             dataBufferPos = 0;
 | |
|           }
 | |
|       }
 | |
|     else if (dataBufferPos > 0)
 | |
|       {
 | |
|         handler.charData(dataBuffer, 0, dataBufferPos);
 | |
|         dataBufferPos = 0;
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Require a string to appear, or throw an exception.
 | |
|    * <p><em>Precondition:</em> Entity expansion is not required.
 | |
|    * <p><em>Precondition:</em> data buffer has no characters that
 | |
|    * will get sent to the application.
 | |
|    */
 | |
|   private void require(String delim)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     int length = delim.length();
 | |
|     char[] ch;
 | |
| 
 | |
|     if (length < dataBuffer.length)
 | |
|       {
 | |
|         ch = dataBuffer;
 | |
|         delim.getChars(0, length, ch, 0);
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         ch = delim.toCharArray();
 | |
|       }
 | |
| 
 | |
|     if (USE_CHEATS && length <= (readBufferLength - readBufferPos))
 | |
|       {
 | |
|         int offset = readBufferPos;
 | |
| 
 | |
|         for (int i = 0; i < length; i++, offset++)
 | |
|           {
 | |
|             if (ch[i] != readBuffer[offset])
 | |
|               {
 | |
|                 error ("required string", null, delim);
 | |
|               }
 | |
|           }
 | |
|         readBufferPos = offset;
 | |
| 
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         for (int i = 0; i < length; i++)
 | |
|           {
 | |
|             require(ch[i]);
 | |
|           }
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Require a character to appear, or throw an exception.
 | |
|    */
 | |
|   private void require(char delim)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     char c = readCh();
 | |
| 
 | |
|     if (c != delim)
 | |
|       {
 | |
|         error("required character", c, Character.toString(delim));
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Create an interned string from a character array.
 | |
|    * Ælfred uses this method to create an interned version
 | |
|    * of all names and name tokens, so that it can test equality
 | |
|    * with <code>==</code> instead of <code>String.equals ()</code>.
 | |
|    *
 | |
|    * <p>This is much more efficient than constructing a non-interned
 | |
|    * string first, and then interning it.
 | |
|    *
 | |
|    * @param ch an array of characters for building the string.
 | |
|    * @param start the starting position in the array.
 | |
|    * @param length the number of characters to place in the string.
 | |
|    * @return an interned string.
 | |
|    * @see #intern (String)
 | |
|    * @see java.lang.String#intern
 | |
|    */
 | |
|   public String intern(char[] ch, int start, int length)
 | |
|   {
 | |
|     int index = 0;
 | |
|     int hash = 0;
 | |
|     Object[] bucket;
 | |
| 
 | |
|     // Generate a hash code.  This is a widely used string hash,
 | |
|     // often attributed to Brian Kernighan.
 | |
|     for (int i = start; i < start + length; i++)
 | |
|       {
 | |
|         hash = 31 * hash + ch[i];
 | |
|       }
 | |
|     hash = (hash & 0x7fffffff) % SYMBOL_TABLE_LENGTH;
 | |
| 
 | |
|     // Get the bucket -- consists of {array,String} pairs
 | |
|     if ((bucket = symbolTable[hash]) == null)
 | |
|       {
 | |
|         // first string in this bucket
 | |
|         bucket = new Object[8];
 | |
| 
 | |
|         // Search for a matching tuple, and
 | |
|         // return the string if we find one.
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         while (index < bucket.length)
 | |
|           {
 | |
|             char[] chFound = (char[]) bucket[index];
 | |
| 
 | |
|             // Stop when we hit an empty entry.
 | |
|             if (chFound == null)
 | |
|               {
 | |
|                 break;
 | |
|               }
 | |
| 
 | |
|             // If they're the same length, check for a match.
 | |
|             if (chFound.length == length)
 | |
|               {
 | |
|                 for (int i = 0; i < chFound.length; i++)
 | |
|                   {
 | |
|                     // continue search on failure
 | |
|                     if (ch[start + i] != chFound[i])
 | |
|                       {
 | |
|                         break;
 | |
|                       }
 | |
|                     else if (i == length - 1)
 | |
|                       {
 | |
|                         // That's it, we have a match!
 | |
|                         return (String) bucket[index + 1];
 | |
|                       }
 | |
|                   }
 | |
|               }
 | |
|             index += 2;
 | |
|           }
 | |
|         // Not found -- we'll have to add it.
 | |
| 
 | |
|         // Do we have to grow the bucket?
 | |
|         bucket = (Object[]) extendArray(bucket, bucket.length, index);
 | |
|       }
 | |
|     symbolTable[hash] = bucket;
 | |
| 
 | |
|     // OK, add it to the end of the bucket -- "local" interning.
 | |
|     // Intern "globally" to let applications share interning benefits.
 | |
|     // That is, "!=" and "==" work on our strings, not just equals().
 | |
|     String s = new String(ch, start, length).intern();
 | |
|     bucket[index] = s.toCharArray();
 | |
|     bucket[index + 1] = s;
 | |
|     return s;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Ensure the capacity of an array, allocating a new one if
 | |
|    * necessary.  Usually extends only for name hash collisions.
 | |
|    */
 | |
|   private Object extendArray(Object array, int currentSize, int requiredSize)
 | |
|   {
 | |
|     if (requiredSize < currentSize)
 | |
|       {
 | |
|         return array;
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         Object newArray = null;
 | |
|         int newSize = currentSize * 2;
 | |
| 
 | |
|         if (newSize <= requiredSize)
 | |
|           {
 | |
|             newSize = requiredSize + 1;
 | |
|           }
 | |
| 
 | |
|         if (array instanceof char[])
 | |
|           {
 | |
|             newArray = new char[newSize];
 | |
|           }
 | |
|         else if (array instanceof Object[])
 | |
|           {
 | |
|             newArray = new Object[newSize];
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             throw new RuntimeException();
 | |
|           }
 | |
| 
 | |
|         System.arraycopy(array, 0, newArray, 0, currentSize);
 | |
|         return newArray;
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
|   // XML query routines.
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
| 
 | |
|   boolean isStandalone()
 | |
|   {
 | |
|     return docIsStandalone;
 | |
|   }
 | |
| 
 | |
|   //
 | |
|   // Elements
 | |
|   //
 | |
| 
 | |
|   private int getContentType(ElementDecl element, int defaultType)
 | |
|   {
 | |
|     int retval;
 | |
| 
 | |
|     if (element == null)
 | |
|       {
 | |
|         return defaultType;
 | |
|       }
 | |
|     retval = element.contentType;
 | |
|     if (retval == CONTENT_UNDECLARED)
 | |
|       {
 | |
|         retval = defaultType;
 | |
|       }
 | |
|     return retval;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Look up the content type of an element.
 | |
|    * @param name The element type name.
 | |
|    * @return An integer constant representing the content type.
 | |
|    * @see #CONTENT_UNDECLARED
 | |
|    * @see #CONTENT_ANY
 | |
|    * @see #CONTENT_EMPTY
 | |
|    * @see #CONTENT_MIXED
 | |
|    * @see #CONTENT_ELEMENTS
 | |
|    */
 | |
|   public int getElementContentType(String name)
 | |
|   {
 | |
|     ElementDecl element = (ElementDecl) elementInfo.get(name);
 | |
|     return getContentType(element, CONTENT_UNDECLARED);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Register an element.
 | |
|    * Array format:
 | |
|    *  [0] element type name
 | |
|    *  [1] content model (mixed, elements only)
 | |
|    *  [2] attribute hash table
 | |
|    */
 | |
|   private void setElement(String name, int contentType,
 | |
|                           String contentModel, HashMap attributes)
 | |
|     throws SAXException
 | |
|   {
 | |
|     if (skippedPE)
 | |
|       {
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|     ElementDecl element = (ElementDecl) elementInfo.get(name);
 | |
| 
 | |
|     // first <!ELEMENT ...> or <!ATTLIST ...> for this type?
 | |
|     if (element == null)
 | |
|       {
 | |
|         element = new ElementDecl();
 | |
|         element.contentType = contentType;
 | |
|         element.contentModel = contentModel;
 | |
|         element.attributes = attributes;
 | |
|         elementInfo.put(name, element);
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|     // <!ELEMENT ...> declaration?
 | |
|     if (contentType != CONTENT_UNDECLARED)
 | |
|       {
 | |
|         // ... following an associated <!ATTLIST ...>
 | |
|         if (element.contentType == CONTENT_UNDECLARED)
 | |
|           {
 | |
|             element.contentType = contentType;
 | |
|             element.contentModel = contentModel;
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             // VC: Unique Element Type Declaration
 | |
|             handler.verror("multiple declarations for element type: "
 | |
|                            + name);
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // first <!ATTLIST ...>, before <!ELEMENT ...> ?
 | |
|     else if (attributes != null)
 | |
|       {
 | |
|         element.attributes = attributes;
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Look up the attribute hash table for an element.
 | |
|    * The hash table is the second item in the element array.
 | |
|    */
 | |
|   private HashMap getElementAttributes(String name)
 | |
|   {
 | |
|     ElementDecl element = (ElementDecl) elementInfo.get(name);
 | |
|     return (element == null) ? null : element.attributes;
 | |
|   }
 | |
| 
 | |
|   //
 | |
|   // Attributes
 | |
|   //
 | |
| 
 | |
|   /**
 | |
|    * Get the declared attributes for an element type.
 | |
|    * @param elname The name of the element type.
 | |
|    * @return An iterator over all the attributes declared for
 | |
|    *   a specific element type.  The results will be valid only
 | |
|    *   after the DTD (if any) has been parsed.
 | |
|    * @see #getAttributeType
 | |
|    * @see #getAttributeEnumeration
 | |
|    * @see #getAttributeDefaultValueType
 | |
|    * @see #getAttributeDefaultValue
 | |
|    * @see #getAttributeExpandedValue
 | |
|    */
 | |
|   private Iterator declaredAttributes(ElementDecl element)
 | |
|   {
 | |
|     HashMap attlist;
 | |
| 
 | |
|     if (element == null)
 | |
|       {
 | |
|         return null;
 | |
|       }
 | |
|     if ((attlist = element.attributes) == null)
 | |
|       {
 | |
|         return null;
 | |
|       }
 | |
|     return attlist.keySet().iterator();
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Get the declared attributes for an element type.
 | |
|    * @param elname The name of the element type.
 | |
|    * @return An iterator over all the attributes declared for
 | |
|    *   a specific element type.  The results will be valid only
 | |
|    *   after the DTD (if any) has been parsed.
 | |
|    * @see #getAttributeType
 | |
|    * @see #getAttributeEnumeration
 | |
|    * @see #getAttributeDefaultValueType
 | |
|    * @see #getAttributeDefaultValue
 | |
|    * @see #getAttributeExpandedValue
 | |
|    */
 | |
|   public Iterator declaredAttributes(String elname)
 | |
|   {
 | |
|     return declaredAttributes((ElementDecl) elementInfo.get(elname));
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Retrieve the declared type of an attribute.
 | |
|    * @param name The name of the associated element.
 | |
|    * @param aname The name of the attribute.
 | |
|    * @return An interend string denoting the type, or null
 | |
|    *  indicating an undeclared attribute.
 | |
|    */
 | |
|   public String getAttributeType(String name, String aname)
 | |
|   {
 | |
|     AttributeDecl attribute = getAttribute(name, aname);
 | |
|     return (attribute == null) ? null : attribute.type;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Retrieve the allowed values for an enumerated attribute type.
 | |
|    * @param name The name of the associated element.
 | |
|    * @param aname The name of the attribute.
 | |
|    * @return A string containing the token list.
 | |
|    */
 | |
|   public String getAttributeEnumeration(String name, String aname)
 | |
|   {
 | |
|     AttributeDecl attribute = getAttribute(name, aname);
 | |
|     // assert:  attribute.enumeration is "ENUMERATION" or "NOTATION"
 | |
|     return (attribute == null) ? null : attribute.enumeration;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Retrieve the default value of a declared attribute.
 | |
|    * @param name The name of the associated element.
 | |
|    * @param aname The name of the attribute.
 | |
|    * @return The default value, or null if the attribute was
 | |
|    *   #IMPLIED or simply undeclared and unspecified.
 | |
|    * @see #getAttributeExpandedValue
 | |
|    */
 | |
|   public String getAttributeDefaultValue(String name, String aname)
 | |
|   {
 | |
|     AttributeDecl attribute = getAttribute(name, aname);
 | |
|     return (attribute == null) ? null : attribute.value;
 | |
|   }
 | |
| 
 | |
|     /*
 | |
| 
 | |
| // FIXME:  Leaving this in, until W3C finally resolves the confusion
 | |
| // between parts of the XML 2nd REC about when entity declararations
 | |
| // are guaranteed to be known.  Current code matches what section 5.1
 | |
| // (conformance) describes, but some readings of the self-contradicting
 | |
| // text in 4.1 (the "Entity Declared" WFC and VC) seem to expect that
 | |
| // attribute expansion/normalization must be deferred in some cases
 | |
| // (just TRY to identify them!).
 | |
| 
 | |
|      * Retrieve the expanded value of a declared attribute.
 | |
|      * <p>General entities (and char refs) will be expanded (once).
 | |
|      * @param name The name of the associated element.
 | |
|      * @param aname The name of the attribute.
 | |
|      * @return The expanded default value, or null if the attribute was
 | |
|      *   #IMPLIED or simply undeclared
 | |
|      * @see #getAttributeDefaultValue
 | |
|     public String getAttributeExpandedValue (String name, String aname)
 | |
|     throws Exception
 | |
|     {
 | |
|   AttributeDecl attribute = getAttribute (name, aname);
 | |
| 
 | |
|   if (attribute == null) {
 | |
|       return null;
 | |
|   } else if (attribute.defaultValue == null && attribute.value != null) {
 | |
|       // we MUST use the same buf for both quotes else the literal
 | |
|       // can't be properly terminated
 | |
|       char buf [] = new char [1];
 | |
|       int  flags = LIT_ENTITY_REF | LIT_ATTRIBUTE;
 | |
|       String type = getAttributeType (name, aname);
 | |
| 
 | |
|       if (type != "CDATA" && type != null)
 | |
|     flags |= LIT_NORMALIZE;
 | |
|       buf [0] = '"';
 | |
|       pushCharArray (null, buf, 0, 1);
 | |
|       pushString (null, attribute.value);
 | |
|       pushCharArray (null, buf, 0, 1);
 | |
|       attribute.defaultValue = readLiteral (flags);
 | |
|   }
 | |
|   return attribute.defaultValue;
 | |
|     }
 | |
|      */
 | |
| 
 | |
|   /**
 | |
|    * Retrieve the default value mode of a declared attribute.
 | |
|    * @see #ATTRIBUTE_DEFAULT_SPECIFIED
 | |
|    * @see #ATTRIBUTE_DEFAULT_IMPLIED
 | |
|    * @see #ATTRIBUTE_DEFAULT_REQUIRED
 | |
|    * @see #ATTRIBUTE_DEFAULT_FIXED
 | |
|    */
 | |
|   public int getAttributeDefaultValueType(String name, String aname)
 | |
|   {
 | |
|     AttributeDecl attribute = getAttribute(name, aname);
 | |
|     return (attribute == null) ? ATTRIBUTE_DEFAULT_UNDECLARED :
 | |
|       attribute.valueType;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Register an attribute declaration for later retrieval.
 | |
|    * Format:
 | |
|    * - String type
 | |
|    * - String default value
 | |
|    * - int value type
 | |
|    * - enumeration
 | |
|    * - processed default value
 | |
|    */
 | |
|   private void setAttribute(String elName, String name, String type,
 | |
|                             String enumeration, String value, int valueType)
 | |
|     throws Exception
 | |
|   {
 | |
|     HashMap attlist;
 | |
| 
 | |
|     if (skippedPE)
 | |
|       {
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|     // Create a new hashtable if necessary.
 | |
|     attlist = getElementAttributes(elName);
 | |
|     if (attlist == null)
 | |
|       {
 | |
|         attlist = new HashMap();
 | |
|       }
 | |
| 
 | |
|     // ignore multiple attribute declarations!
 | |
|     if (attlist.get(name) != null)
 | |
|       {
 | |
|         // warn ...
 | |
|         return;
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         AttributeDecl attribute = new AttributeDecl();
 | |
|         attribute.type = type;
 | |
|         attribute.value = value;
 | |
|         attribute.valueType = valueType;
 | |
|         attribute.enumeration = enumeration;
 | |
|         attlist.put(name, attribute);
 | |
| 
 | |
|         // save; but don't overwrite any existing <!ELEMENT ...>
 | |
|         setElement(elName, CONTENT_UNDECLARED, null, attlist);
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Retrieve the attribute declaration for the given element name and name.
 | |
|    */
 | |
|   private AttributeDecl getAttribute(String elName, String name)
 | |
|   {
 | |
|     HashMap attlist = getElementAttributes(elName);
 | |
|     return (attlist == null) ? null : (AttributeDecl) attlist.get(name);
 | |
|   }
 | |
| 
 | |
|   //
 | |
|   // Entities
 | |
|   //
 | |
| 
 | |
|   /**
 | |
|    * Find the type of an entity.
 | |
|    * @returns An integer constant representing the entity type.
 | |
|    * @see #ENTITY_UNDECLARED
 | |
|    * @see #ENTITY_INTERNAL
 | |
|    * @see #ENTITY_NDATA
 | |
|    * @see #ENTITY_TEXT
 | |
|    */
 | |
|   public int getEntityType(String ename)
 | |
|   {
 | |
|     EntityInfo entity = (EntityInfo) entityInfo.get(ename);
 | |
|     return (entity == null) ?  ENTITY_UNDECLARED : entity.type;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Return an external entity's identifiers.
 | |
|    * @param ename The name of the external entity.
 | |
|    * @return The entity's public identifier, system identifier, and base URI.
 | |
|    *  Null if the entity was not declared as an external entity.
 | |
|    * @see #getEntityType
 | |
|    */
 | |
|   public ExternalIdentifiers getEntityIds(String ename)
 | |
|   {
 | |
|     EntityInfo entity = (EntityInfo) entityInfo.get(ename);
 | |
|     return (entity == null) ? null : entity.ids;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Return an internal entity's replacement text.
 | |
|    * @param ename The name of the internal entity.
 | |
|    * @return The entity's replacement text, or null if
 | |
|    *   the entity was not declared as an internal entity.
 | |
|    * @see #getEntityType
 | |
|    */
 | |
|   public String getEntityValue(String ename)
 | |
|   {
 | |
|     EntityInfo entity = (EntityInfo) entityInfo.get(ename);
 | |
|     return (entity == null) ? null : entity.value;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Register an entity declaration for later retrieval.
 | |
|    */
 | |
|   private void setInternalEntity(String eName, String value)
 | |
|     throws SAXException
 | |
|   {
 | |
|     if (skippedPE)
 | |
|       {
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|     if (entityInfo.get(eName) == null)
 | |
|       {
 | |
|         EntityInfo entity = new EntityInfo();
 | |
|         entity.type = ENTITY_INTERNAL;
 | |
|         entity.value = value;
 | |
|         entityInfo.put(eName, entity);
 | |
|       }
 | |
|     if (handler.stringInterning)
 | |
|       {
 | |
|         if ("lt" == eName || "gt" == eName || "quot" == eName
 | |
|             || "apos" == eName || "amp" == eName)
 | |
|           {
 | |
|             return;
 | |
|           }
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         if ("lt".equals(eName) || "gt".equals(eName) || "quot".equals(eName)
 | |
|             || "apos".equals(eName) || "amp".equals(eName))
 | |
|           {
 | |
|             return;
 | |
|           }
 | |
|       }
 | |
|     handler.getDeclHandler().internalEntityDecl(eName, value);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Register an external entity declaration for later retrieval.
 | |
|    */
 | |
|   private void setExternalEntity(String eName, int eClass,
 | |
|                                  ExternalIdentifiers ids, String nName)
 | |
|   {
 | |
|     if (entityInfo.get(eName) == null)
 | |
|       {
 | |
|         EntityInfo entity = new EntityInfo();
 | |
|         entity.type = eClass;
 | |
|         entity.ids = ids;
 | |
|         entity.notationName = nName;
 | |
|         entityInfo.put(eName, entity);
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   //
 | |
|   // Notations.
 | |
|   //
 | |
| 
 | |
|   /**
 | |
|    * Report a notation declaration, checking for duplicates.
 | |
|    */
 | |
|   private void setNotation(String nname, ExternalIdentifiers ids)
 | |
|     throws SAXException
 | |
|   {
 | |
|     if (skippedPE)
 | |
|       {
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|     handler.notationDecl(nname, ids.publicId, ids.systemId, ids.baseUri);
 | |
|     if (notationInfo.get(nname) == null)
 | |
|       {
 | |
|         notationInfo.put(nname, nname);
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         // VC: Unique Notation Name
 | |
|         handler.verror("Duplicate notation name decl: " + nname);
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   //
 | |
|   // Location.
 | |
|   //
 | |
| 
 | |
|   /**
 | |
|    * Return the current line number.
 | |
|    */
 | |
|   public int getLineNumber()
 | |
|   {
 | |
|     return line;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Return the current column number.
 | |
|    */
 | |
|   public int getColumnNumber()
 | |
|   {
 | |
|     return column;
 | |
|   }
 | |
| 
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
|   // High-level I/O.
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
| 
 | |
|   /**
 | |
|    * Read a single character from the readBuffer.
 | |
|    * <p>The readDataChunk () method maintains the buffer.
 | |
|    * <p>If we hit the end of an entity, try to pop the stack and
 | |
|    * keep going.
 | |
|    * <p> (This approach doesn't really enforce XML's rules about
 | |
|    * entity boundaries, but this is not currently a validating
 | |
|    * parser).
 | |
|    * <p>This routine also attempts to keep track of the current
 | |
|    * position in external entities, but it's not entirely accurate.
 | |
|    * @return The next available input character.
 | |
|    * @see #unread (char)
 | |
|    * @see #readDataChunk
 | |
|    * @see #readBuffer
 | |
|    * @see #line
 | |
|    * @return The next character from the current input source.
 | |
|    */
 | |
|   private char readCh()
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     // As long as there's nothing in the
 | |
|     // read buffer, try reading more data
 | |
|     // (for an external entity) or popping
 | |
|     // the entity stack (for either).
 | |
|     while (readBufferPos >= readBufferLength)
 | |
|       {
 | |
|         switch (sourceType)
 | |
|           {
 | |
|           case INPUT_READER:
 | |
|           case INPUT_STREAM:
 | |
|             readDataChunk();
 | |
|             while (readBufferLength < 1)
 | |
|               {
 | |
|                 popInput();
 | |
|                 if (readBufferLength < 1)
 | |
|                   {
 | |
|                     readDataChunk();
 | |
|                   }
 | |
|               }
 | |
|             break;
 | |
| 
 | |
|           default:
 | |
| 
 | |
|             popInput();
 | |
|             break;
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     char c = readBuffer[readBufferPos++];
 | |
| 
 | |
|     if (c == '\n')
 | |
|       {
 | |
|         line++;
 | |
|         column = 0;
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         if (c == '<')
 | |
|           {
 | |
|             /* the most common return to parseContent () ... NOP */
 | |
|           }
 | |
|         else if (((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD)
 | |
|                  || ((c >= 0x007f) && (c <= 0x009f) && (c != 0x0085)
 | |
|                      && xmlVersion == XML_11))
 | |
|           {
 | |
|             error("illegal XML character U+" + Integer.toHexString(c));
 | |
|           }
 | |
| 
 | |
|         // If we're in the DTD and in a context where PEs get expanded,
 | |
|         // do so ... 1/14/2000 errata identify those contexts.  There
 | |
|         // are also spots in the internal subset where PE refs are fatal
 | |
|         // errors, hence yet another flag.
 | |
|         else if (c == '%' && expandPE)
 | |
|           {
 | |
|             if (peIsError)
 | |
|               {
 | |
|                 error("PE reference within decl in internal subset.");
 | |
|               }
 | |
|             parsePEReference();
 | |
|             return readCh();
 | |
|           }
 | |
|         column++;
 | |
|       }
 | |
| 
 | |
|     return c;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Push a single character back onto the current input stream.
 | |
|    * <p>This method usually pushes the character back onto
 | |
|    * the readBuffer.
 | |
|    * <p>I don't think that this would ever be called with
 | |
|    * readBufferPos = 0, because the methods always reads a character
 | |
|    * before unreading it, but just in case, I've added a boundary
 | |
|    * condition.
 | |
|    * @param c The character to push back.
 | |
|    * @see #readCh
 | |
|    * @see #unread (char[])
 | |
|    * @see #readBuffer
 | |
|    */
 | |
|   private void unread(char c)
 | |
|     throws SAXException
 | |
|   {
 | |
|     // Normal condition.
 | |
|     if (c == '\n')
 | |
|       {
 | |
|         line--;
 | |
|         column = -1;
 | |
|       }
 | |
|     if (readBufferPos > 0)
 | |
|       {
 | |
|         readBuffer[--readBufferPos] = c;
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         pushString(null, Character.toString(c));
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Push a char array back onto the current input stream.
 | |
|    * <p>NOTE: you must <em>never</em> push back characters that you
 | |
|    * haven't actually read: use pushString () instead.
 | |
|    * @see #readCh
 | |
|    * @see #unread (char)
 | |
|    * @see #readBuffer
 | |
|    * @see #pushString
 | |
|    */
 | |
|   private void unread(char[] ch, int length)
 | |
|     throws SAXException
 | |
|   {
 | |
|     for (int i = 0; i < length; i++)
 | |
|       {
 | |
|         if (ch[i] == '\n')
 | |
|           {
 | |
|             line--;
 | |
|             column = -1;
 | |
|           }
 | |
|       }
 | |
|     if (length < readBufferPos)
 | |
|       {
 | |
|         readBufferPos -= length;
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         pushCharArray(null, ch, 0, length);
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Push, or skip, a new external input source.
 | |
|    * The source will be some kind of parsed entity, such as a PE
 | |
|    * (including the external DTD subset) or content for the body.
 | |
|    *
 | |
|    * @param url The java.net.URL object for the entity.
 | |
|    * @see SAXDriver#resolveEntity
 | |
|    * @see #pushString
 | |
|    * @see #sourceType
 | |
|    * @see #pushInput
 | |
|    * @see #detectEncoding
 | |
|    * @see #sourceType
 | |
|    * @see #readBuffer
 | |
|    */
 | |
|   private void pushURL(boolean isPE,
 | |
|                        String ename,
 | |
|                        ExternalIdentifiers ids,
 | |
|                        Reader reader,
 | |
|                        InputStream stream,
 | |
|                        String encoding,
 | |
|                        boolean doResolve)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     boolean ignoreEncoding;
 | |
|     String systemId;
 | |
|     InputSource source;
 | |
| 
 | |
|     if (!isPE)
 | |
|       {
 | |
|         dataBufferFlush();
 | |
|       }
 | |
| 
 | |
|     scratch.setPublicId(ids.publicId);
 | |
|     scratch.setSystemId(ids.systemId);
 | |
| 
 | |
|     // See if we should skip or substitute the entity.
 | |
|     // If we're not skipping, resolving reports startEntity()
 | |
|     // and updates the (handler's) stack of URIs.
 | |
|     if (doResolve)
 | |
|       {
 | |
|         // assert (stream == null && reader == null && encoding == null)
 | |
|         source = handler.resolveEntity(isPE, ename, scratch, ids.baseUri);
 | |
|         if (source == null)
 | |
|           {
 | |
|             handler.warn("skipping entity: " + ename);
 | |
|             handler.skippedEntity(ename);
 | |
|             if (isPE)
 | |
|               {
 | |
|                 skippedPE = true;
 | |
|               }
 | |
|             return;
 | |
|           }
 | |
| 
 | |
|         // we might be using alternate IDs/encoding
 | |
|         systemId = source.getSystemId();
 | |
|         // The following warning and setting systemId was deleted bcause
 | |
|         // the application has the option of not setting systemId
 | |
|         // provided that it has set the characte/byte stream.
 | |
|         /*
 | |
|            if (systemId == null) {
 | |
|            handler.warn ("missing system ID, using " + ids.systemId);
 | |
|            systemId = ids.systemId;
 | |
|            }
 | |
|          */
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         // "[document]", or "[dtd]" via getExternalSubset()
 | |
|         scratch.setCharacterStream(reader);
 | |
|         scratch.setByteStream(stream);
 | |
|         scratch.setEncoding(encoding);
 | |
|         source = scratch;
 | |
|         systemId = ids.systemId;
 | |
|         if (handler.stringInterning)
 | |
|           {
 | |
|             handler.startExternalEntity(ename, systemId,
 | |
|                                         "[document]" == ename);
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             handler.startExternalEntity(ename, systemId,
 | |
|                                         "[document]".equals(ename));
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // we may have been given I/O streams directly
 | |
|     if (source.getCharacterStream() != null)
 | |
|       {
 | |
|         if (source.getByteStream() != null)
 | |
|           error("InputSource has two streams!");
 | |
|         reader = source.getCharacterStream();
 | |
|       }
 | |
|     else if (source.getByteStream() != null)
 | |
|       {
 | |
|         encoding = source.getEncoding();
 | |
|         if (encoding == null)
 | |
|           {
 | |
|             stream = source.getByteStream();
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             try
 | |
|               {
 | |
|                 reader = new InputStreamReader(source.getByteStream(),
 | |
|                                                encoding);
 | |
|               }
 | |
|             catch (IOException e)
 | |
|               {
 | |
|                 stream = source.getByteStream();
 | |
|               }
 | |
|           }
 | |
|       }
 | |
|     else if (systemId == null)
 | |
|       {
 | |
|         error("InputSource has no URI!");
 | |
|       }
 | |
|     scratch.setCharacterStream(null);
 | |
|     scratch.setByteStream(null);
 | |
|     scratch.setEncoding(null);
 | |
| 
 | |
|     // Push the existing status.
 | |
|     pushInput(ename);
 | |
| 
 | |
|     // Create a new read buffer.
 | |
|     // (Note the four-character margin)
 | |
|     readBuffer = new char[READ_BUFFER_MAX + 4];
 | |
|     readBufferPos = 0;
 | |
|     readBufferLength = 0;
 | |
|     readBufferOverflow = -1;
 | |
|     is = null;
 | |
|     line = 1;
 | |
|     column = 0;
 | |
|     currentByteCount = 0;
 | |
| 
 | |
|     // If there's an explicit character stream, just
 | |
|     // ignore encoding declarations.
 | |
|     if (reader != null)
 | |
|       {
 | |
|         sourceType = INPUT_READER;
 | |
|         this.reader = reader;
 | |
|         tryEncodingDecl(true);
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|     // Else we handle the conversion, and need to ensure
 | |
|     // it's done right.
 | |
|     sourceType = INPUT_STREAM;
 | |
|     if (stream != null)
 | |
|       {
 | |
|         is = stream;
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         // We have to open our own stream to the URL.
 | |
|         URL url = new URL(systemId);
 | |
| 
 | |
|         externalEntity = url.openConnection();
 | |
|         externalEntity.connect();
 | |
|         is = externalEntity.getInputStream();
 | |
|       }
 | |
| 
 | |
|     // If we get to here, there must be
 | |
|     // an InputStream available.
 | |
|     if (!is.markSupported())
 | |
|       {
 | |
|         is = new BufferedInputStream(is);
 | |
|       }
 | |
| 
 | |
|     // Get any external encoding label.
 | |
|     if (encoding == null && externalEntity != null)
 | |
|       {
 | |
|         // External labels can be untrustworthy; filesystems in
 | |
|         // particular often have the wrong default for content
 | |
|         // that wasn't locally originated.  Those we autodetect.
 | |
|         if (!"file".equals(externalEntity.getURL().getProtocol()))
 | |
|           {
 | |
|             int temp;
 | |
| 
 | |
|             // application/xml;charset=something;otherAttr=...
 | |
|             // ... with many variants on 'something'
 | |
|             encoding = externalEntity.getContentType();
 | |
| 
 | |
|             // MHK code (fix for Saxon 5.5.1/007):
 | |
|             // protect against encoding==null
 | |
|             if (encoding == null)
 | |
|               {
 | |
|                 temp = -1;
 | |
|               }
 | |
|             else
 | |
|               {
 | |
|                 temp = encoding.indexOf("charset");
 | |
|               }
 | |
| 
 | |
|             // RFC 2376 sez MIME text defaults to ASCII, but since the
 | |
|             // JDK will create a MIME type out of thin air, we always
 | |
|             // autodetect when there's no explicit charset attribute.
 | |
|             if (temp < 0)
 | |
|               {
 | |
|                 encoding = null;  // autodetect
 | |
|               }
 | |
|             else
 | |
|               {
 | |
|                 // only this one attribute
 | |
|                 if ((temp = encoding.indexOf(';')) > 0)
 | |
|                   {
 | |
|                     encoding = encoding.substring(0, temp);
 | |
|                   }
 | |
| 
 | |
|                 if ((temp = encoding.indexOf('=', temp + 7)) > 0)
 | |
|                   {
 | |
|                     encoding = encoding.substring(temp + 1);
 | |
| 
 | |
|                     // attributes can have comment fields (RFC 822)
 | |
|                     if ((temp = encoding.indexOf('(')) > 0)
 | |
|                       {
 | |
|                         encoding = encoding.substring(0, temp);
 | |
|                       }
 | |
|                     // ... and values may be quoted
 | |
|                     if ((temp = encoding.indexOf('"')) > 0)
 | |
|                       {
 | |
|                         encoding =
 | |
|                           encoding.substring(temp + 1,
 | |
|                                              encoding.indexOf('"', temp + 2));
 | |
|                       }
 | |
|                     encoding = encoding.trim();
 | |
|                   }
 | |
|                 else
 | |
|                   {
 | |
|                     handler.warn("ignoring illegal MIME attribute: "
 | |
|                                  + encoding);
 | |
|                     encoding = null;
 | |
|                   }
 | |
|               }
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // if we got an external encoding label, use it ...
 | |
|     if (encoding != null)
 | |
|       {
 | |
|         this.encoding = ENCODING_EXTERNAL;
 | |
|         setupDecoding(encoding);
 | |
|         ignoreEncoding = true;
 | |
| 
 | |
|         // ... else autodetect from first bytes.
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         detectEncoding();
 | |
|         ignoreEncoding = false;
 | |
|       }
 | |
| 
 | |
|     // Read any XML or text declaration.
 | |
|     // If we autodetected, it may tell us the "real" encoding.
 | |
|     try
 | |
|       {
 | |
|         tryEncodingDecl(ignoreEncoding);
 | |
|       }
 | |
|     catch (UnsupportedEncodingException x)
 | |
|       {
 | |
|         encoding = x.getMessage();
 | |
| 
 | |
|         // if we don't handle the declared encoding,
 | |
|         // try letting a JVM InputStreamReader do it
 | |
|         try
 | |
|           {
 | |
|             if (sourceType != INPUT_STREAM)
 | |
|               {
 | |
|                 throw x;
 | |
|               }
 | |
| 
 | |
|             is.reset();
 | |
|             readBufferPos = 0;
 | |
|             readBufferLength = 0;
 | |
|             readBufferOverflow = -1;
 | |
|             line = 1;
 | |
|             currentByteCount = column = 0;
 | |
| 
 | |
|             sourceType = INPUT_READER;
 | |
|             this.reader = new InputStreamReader(is, encoding);
 | |
|             is = null;
 | |
| 
 | |
|             tryEncodingDecl(true);
 | |
| 
 | |
|           }
 | |
|         catch (IOException e)
 | |
|           {
 | |
|             error("unsupported text encoding",
 | |
|                   encoding,
 | |
|                   null);
 | |
|           }
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Check for an encoding declaration.  This is the second part of the
 | |
|    * XML encoding autodetection algorithm, relying on detectEncoding to
 | |
|    * get to the point that this part can read any encoding declaration
 | |
|    * in the document (using only US-ASCII characters).
 | |
|    *
 | |
|    * <p> Because this part starts to fill parser buffers with this data,
 | |
|    * it's tricky to setup a reader so that Java's built-in decoders can be
 | |
|    * used for the character encodings that aren't built in to this parser
 | |
|    * (such as EUC-JP, KOI8-R, Big5, etc).
 | |
|    *
 | |
|    * @return any encoding in the declaration, uppercased; or null
 | |
|    * @see detectEncoding
 | |
|    */
 | |
|   private String tryEncodingDecl(boolean ignoreEncoding)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     // Read the XML/text declaration.
 | |
|     if (tryRead("<?xml"))
 | |
|       {
 | |
|         if (tryWhitespace())
 | |
|           {
 | |
|             if (inputStack.size() > 0)
 | |
|               {
 | |
|                 return parseTextDecl(ignoreEncoding);
 | |
|               }
 | |
|             else
 | |
|               {
 | |
|                 return parseXMLDecl(ignoreEncoding);
 | |
|               }
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             // <?xml-stylesheet ...?> or similar
 | |
|             unread('l');
 | |
|             unread('m');
 | |
|             unread('x');
 | |
|             unread('?');
 | |
|             unread('<');
 | |
|           }
 | |
|       }
 | |
|     return null;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Attempt to detect the encoding of an entity.
 | |
|    * <p>The trick here (as suggested in the XML standard) is that
 | |
|    * any entity not in UTF-8, or in UCS-2 with a byte-order mark,
 | |
|    * <b>must</b> begin with an XML declaration or an encoding
 | |
|    * declaration; we simply have to look for "<?xml" in various
 | |
|    * encodings.
 | |
|    * <p>This method has no way to distinguish among 8-bit encodings.
 | |
|    * Instead, it sets up for UTF-8, then (possibly) revises its assumption
 | |
|    * later in setupDecoding ().  Any ASCII-derived 8-bit encoding
 | |
|    * should work, but most will be rejected later by setupDecoding ().
 | |
|    * @see #tryEncoding (byte[], byte, byte, byte, byte)
 | |
|    * @see #tryEncoding (byte[], byte, byte)
 | |
|    * @see #setupDecoding
 | |
|    */
 | |
|   private void detectEncoding()
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     byte[] signature = new byte[4];
 | |
| 
 | |
|     // Read the first four bytes for
 | |
|     // autodetection.
 | |
|     is.mark(4);
 | |
|     is.read(signature);
 | |
|     is.reset();
 | |
| 
 | |
|     //
 | |
|     // FIRST:  four byte encodings (who uses these?)
 | |
|     //
 | |
|     if (tryEncoding(signature, (byte) 0x00, (byte) 0x00,
 | |
|                     (byte) 0x00, (byte) 0x3c))
 | |
|       {
 | |
|         // UCS-4 must begin with "<?xml"
 | |
|         // 0x00 0x00 0x00 0x3c: UCS-4, big-endian (1234)
 | |
|         // "UTF-32BE"
 | |
|         encoding = ENCODING_UCS_4_1234;
 | |
|       }
 | |
|     else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x00,
 | |
|                          (byte) 0x00, (byte) 0x00))
 | |
|       {
 | |
|         // 0x3c 0x00 0x00 0x00: UCS-4, little-endian (4321)
 | |
|         // "UTF-32LE"
 | |
|         encoding = ENCODING_UCS_4_4321;
 | |
|       }
 | |
|     else if (tryEncoding(signature, (byte) 0x00, (byte) 0x00,
 | |
|                          (byte) 0x3c, (byte) 0x00))
 | |
|       {
 | |
|         // 0x00 0x00 0x3c 0x00: UCS-4, unusual (2143)
 | |
|         encoding = ENCODING_UCS_4_2143;
 | |
|       }
 | |
|     else if (tryEncoding(signature, (byte) 0x00, (byte) 0x3c,
 | |
|                          (byte) 0x00, (byte) 0x00))
 | |
|       {
 | |
|         // 0x00 0x3c 0x00 0x00: UCS-4, unusual (3421)
 | |
|         encoding = ENCODING_UCS_4_3412;
 | |
| 
 | |
|         // 00 00 fe ff UCS_4_1234 (with BOM)
 | |
|         // ff fe 00 00 UCS_4_4321 (with BOM)
 | |
|       }
 | |
| 
 | |
|     //
 | |
|     // SECOND:  two byte encodings
 | |
|     // note ... with 1/14/2000 errata the XML spec identifies some
 | |
|     // more "broken UTF-16" autodetection cases, with no XML decl,
 | |
|     // which we don't handle here (that's legal too).
 | |
|     //
 | |
|     else if (tryEncoding(signature, (byte) 0xfe, (byte) 0xff))
 | |
|       {
 | |
|         // UCS-2 with a byte-order marker. (UTF-16)
 | |
|         // 0xfe 0xff: UCS-2, big-endian (12)
 | |
|         encoding = ENCODING_UCS_2_12;
 | |
|         is.read(); is.read();
 | |
|       }
 | |
|     else if (tryEncoding(signature, (byte) 0xff, (byte) 0xfe))
 | |
|       {
 | |
|         // UCS-2 with a byte-order marker. (UTF-16)
 | |
|         // 0xff 0xfe: UCS-2, little-endian (21)
 | |
|         encoding = ENCODING_UCS_2_21;
 | |
|         is.read(); is.read();
 | |
|       }
 | |
|     else if (tryEncoding(signature, (byte) 0x00, (byte) 0x3c,
 | |
|                          (byte) 0x00, (byte) 0x3f))
 | |
|       {
 | |
|         // UTF-16BE (otherwise, malformed UTF-16)
 | |
|         // 0x00 0x3c 0x00 0x3f: UCS-2, big-endian, no byte-order mark
 | |
|         encoding = ENCODING_UCS_2_12;
 | |
|         error("no byte-order mark for UCS-2 entity");
 | |
|       }
 | |
|     else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x00,
 | |
|                          (byte) 0x3f, (byte) 0x00))
 | |
|       {
 | |
|         // UTF-16LE (otherwise, malformed UTF-16)
 | |
|         // 0x3c 0x00 0x3f 0x00: UCS-2, little-endian, no byte-order mark
 | |
|         encoding = ENCODING_UCS_2_21;
 | |
|         error("no byte-order mark for UCS-2 entity");
 | |
|       }
 | |
| 
 | |
|     //
 | |
|     // THIRD:  ASCII-derived encodings, fixed and variable lengths
 | |
|     //
 | |
|     else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x3f,
 | |
|                          (byte) 0x78, (byte) 0x6d))
 | |
|       {
 | |
|         // ASCII derived
 | |
|         // 0x3c 0x3f 0x78 0x6d: UTF-8 or other 8-bit markup (read ENCODING)
 | |
|         encoding = ENCODING_UTF_8;
 | |
|         prefetchASCIIEncodingDecl();
 | |
|       }
 | |
|     else if (signature[0] == (byte) 0xef
 | |
|              && signature[1] == (byte) 0xbb
 | |
|              && signature[2] == (byte) 0xbf)
 | |
|       {
 | |
|         // 0xef 0xbb 0xbf: UTF-8 BOM (not part of document text)
 | |
|         // this un-needed notion slipped into XML 2nd ed through a
 | |
|         // "non-normative" erratum; now required by MSFT and UDDI,
 | |
|         // and E22 made it normative.
 | |
|         encoding = ENCODING_UTF_8;
 | |
|         is.read(); is.read(); is.read();
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         // 4c 6f a7 94 ... we don't understand EBCDIC flavors
 | |
|         // ... but we COULD at least kick in some fixed code page
 | |
| 
 | |
|         // (default) UTF-8 without encoding/XML declaration
 | |
|         encoding = ENCODING_UTF_8;
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Check for a four-byte signature.
 | |
|    * <p>Utility routine for detectEncoding ().
 | |
|    * <p>Always looks for some part of "<?XML" in a specific encoding.
 | |
|    * @param sig The first four bytes read.
 | |
|    * @param b1 The first byte of the signature
 | |
|    * @param b2 The second byte of the signature
 | |
|    * @param b3 The third byte of the signature
 | |
|    * @param b4 The fourth byte of the signature
 | |
|    * @see #detectEncoding
 | |
|    */
 | |
|   private static boolean tryEncoding(byte[] sig, byte b1, byte b2,
 | |
|                                      byte b3, byte b4)
 | |
|   {
 | |
|     return (sig[0] == b1 && sig[1] == b2
 | |
|             && sig[2] == b3 && sig[3] == b4);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Check for a two-byte signature.
 | |
|    * <p>Looks for a UCS-2 byte-order mark.
 | |
|    * <p>Utility routine for detectEncoding ().
 | |
|    * @param sig The first four bytes read.
 | |
|    * @param b1 The first byte of the signature
 | |
|    * @param b2 The second byte of the signature
 | |
|    * @see #detectEncoding
 | |
|    */
 | |
|   private static boolean tryEncoding(byte[] sig, byte b1, byte b2)
 | |
|   {
 | |
|     return ((sig[0] == b1) && (sig[1] == b2));
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * This method pushes a string back onto input.
 | |
|    * <p>It is useful either as the expansion of an internal entity,
 | |
|    * or for backtracking during the parse.
 | |
|    * <p>Call pushCharArray () to do the actual work.
 | |
|    * @param s The string to push back onto input.
 | |
|    * @see #pushCharArray
 | |
|    */
 | |
|   private void pushString(String ename, String s)
 | |
|     throws SAXException
 | |
|   {
 | |
|     char[] ch = s.toCharArray();
 | |
|     pushCharArray(ename, ch, 0, ch.length);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Push a new internal input source.
 | |
|    * <p>This method is useful for expanding an internal entity,
 | |
|    * or for unreading a string of characters.  It creates a new
 | |
|    * readBuffer containing the characters in the array, instead
 | |
|    * of characters converted from an input byte stream.
 | |
|    * @param ch The char array to push.
 | |
|    * @see #pushString
 | |
|    * @see #pushURL
 | |
|    * @see #readBuffer
 | |
|    * @see #sourceType
 | |
|    * @see #pushInput
 | |
|    */
 | |
|   private void pushCharArray(String ename, char[] ch, int start, int length)
 | |
|     throws SAXException
 | |
|   {
 | |
|     // Push the existing status
 | |
|     pushInput(ename);
 | |
|     if (ename != null && doReport)
 | |
|       {
 | |
|         dataBufferFlush();
 | |
|         handler.startInternalEntity(ename);
 | |
|       }
 | |
|     sourceType = INPUT_INTERNAL;
 | |
|     readBuffer = ch;
 | |
|     readBufferPos = start;
 | |
|     readBufferLength = length;
 | |
|     readBufferOverflow = -1;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Save the current input source onto the stack.
 | |
|    * <p>This method saves all of the global variables associated with
 | |
|    * the current input source, so that they can be restored when a new
 | |
|    * input source has finished.  It also tests for entity recursion.
 | |
|    * <p>The method saves the following global variables onto a stack
 | |
|    * using a fixed-length array:
 | |
|    * <ol>
 | |
|    * <li>sourceType
 | |
|    * <li>externalEntity
 | |
|    * <li>readBuffer
 | |
|    * <li>readBufferPos
 | |
|    * <li>readBufferLength
 | |
|    * <li>line
 | |
|    * <li>encoding
 | |
|    * </ol>
 | |
|    * @param ename The name of the entity (if any) causing the new input.
 | |
|    * @see #popInput
 | |
|    * @see #sourceType
 | |
|    * @see #externalEntity
 | |
|    * @see #readBuffer
 | |
|    * @see #readBufferPos
 | |
|    * @see #readBufferLength
 | |
|    * @see #line
 | |
|    * @see #encoding
 | |
|    */
 | |
|   private void pushInput(String ename)
 | |
|     throws SAXException
 | |
|   {
 | |
|     // Check for entity recursion.
 | |
|     if (ename != null)
 | |
|       {
 | |
|         Iterator entities = entityStack.iterator();
 | |
|         while (entities.hasNext())
 | |
|           {
 | |
|             String e = (String) entities.next();
 | |
|             if (e != null && e == ename)
 | |
|               {
 | |
|                 error("recursive reference to entity", ename, null);
 | |
|               }
 | |
|           }
 | |
|       }
 | |
|     entityStack.addLast(ename);
 | |
| 
 | |
|     // Don't bother if there is no current input.
 | |
|     if (sourceType == INPUT_NONE)
 | |
|       {
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|     // Set up a snapshot of the current
 | |
|     // input source.
 | |
|     Input input = new Input();
 | |
| 
 | |
|     input.sourceType = sourceType;
 | |
|     input.externalEntity = externalEntity;
 | |
|     input.readBuffer = readBuffer;
 | |
|     input.readBufferPos = readBufferPos;
 | |
|     input.readBufferLength = readBufferLength;
 | |
|     input.line = line;
 | |
|     input.encoding = encoding;
 | |
|     input.readBufferOverflow = readBufferOverflow;
 | |
|     input.is = is;
 | |
|     input.currentByteCount = currentByteCount;
 | |
|     input.column = column;
 | |
|     input.reader = reader;
 | |
| 
 | |
|     // Push it onto the stack.
 | |
|     inputStack.addLast(input);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Restore a previous input source.
 | |
|    * <p>This method restores all of the global variables associated with
 | |
|    * the current input source.
 | |
|    * @exception java.io.EOFException
 | |
|    *    If there are no more entries on the input stack.
 | |
|    * @see #pushInput
 | |
|    * @see #sourceType
 | |
|    * @see #externalEntity
 | |
|    * @see #readBuffer
 | |
|    * @see #readBufferPos
 | |
|    * @see #readBufferLength
 | |
|    * @see #line
 | |
|    * @see #encoding
 | |
|    */
 | |
|   private void popInput()
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     String ename = (String) entityStack.removeLast();
 | |
| 
 | |
|     if (ename != null && doReport)
 | |
|       {
 | |
|         dataBufferFlush();
 | |
|       }
 | |
|     switch (sourceType)
 | |
|       {
 | |
|       case INPUT_STREAM:
 | |
|         handler.endExternalEntity(ename);
 | |
|         is.close();
 | |
|         break;
 | |
|       case INPUT_READER:
 | |
|         handler.endExternalEntity(ename);
 | |
|         reader.close();
 | |
|         break;
 | |
|       case INPUT_INTERNAL:
 | |
|         if (ename != null && doReport)
 | |
|           {
 | |
|             handler.endInternalEntity(ename);
 | |
|           }
 | |
|         break;
 | |
|       }
 | |
| 
 | |
|     // Throw an EOFException if there
 | |
|     // is nothing else to pop.
 | |
|     if (inputStack.isEmpty())
 | |
|       {
 | |
|         throw new EOFException("no more input");
 | |
|       }
 | |
| 
 | |
|     Input input = (Input) inputStack.removeLast();
 | |
| 
 | |
|     sourceType = input.sourceType;
 | |
|     externalEntity = input.externalEntity;
 | |
|     readBuffer = input.readBuffer;
 | |
|     readBufferPos = input.readBufferPos;
 | |
|     readBufferLength = input.readBufferLength;
 | |
|     line = input.line;
 | |
|     encoding = input.encoding;
 | |
|     readBufferOverflow = input.readBufferOverflow;
 | |
|     is = input.is;
 | |
|     currentByteCount = input.currentByteCount;
 | |
|     column = input.column;
 | |
|     reader = input.reader;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Return true if we can read the expected character.
 | |
|    * <p>Note that the character will be removed from the input stream
 | |
|    * on success, but will be put back on failure.  Do not attempt to
 | |
|    * read the character again if the method succeeds.
 | |
|    * @param delim The character that should appear next.  For a
 | |
|    *        insensitive match, you must supply this in upper-case.
 | |
|    * @return true if the character was successfully read, or false if
 | |
|    *   it was not.
 | |
|    * @see #tryRead (String)
 | |
|    */
 | |
|   private boolean tryRead(char delim)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     char c;
 | |
| 
 | |
|     // Read the character
 | |
|     c = readCh();
 | |
| 
 | |
|     // Test for a match, and push the character
 | |
|     // back if the match fails.
 | |
|     if (c == delim)
 | |
|       {
 | |
|         return true;
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         unread(c);
 | |
|         return false;
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Return true if we can read the expected string.
 | |
|    * <p>This is simply a convenience method.
 | |
|    * <p>Note that the string will be removed from the input stream
 | |
|    * on success, but will be put back on failure.  Do not attempt to
 | |
|    * read the string again if the method succeeds.
 | |
|    * <p>This method will push back a character rather than an
 | |
|    * array whenever possible (probably the majority of cases).
 | |
|    * @param delim The string that should appear next.
 | |
|    * @return true if the string was successfully read, or false if
 | |
|    *   it was not.
 | |
|    * @see #tryRead (char)
 | |
|    */
 | |
|   private boolean tryRead(String delim)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     return tryRead(delim.toCharArray());
 | |
|   }
 | |
| 
 | |
|   private boolean tryRead(char[] ch)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     char c;
 | |
| 
 | |
|     // Compare the input, character-
 | |
|     // by character.
 | |
| 
 | |
|     for (int i = 0; i < ch.length; i++)
 | |
|       {
 | |
|         c = readCh();
 | |
|         if (c != ch[i])
 | |
|           {
 | |
|             unread(c);
 | |
|             if (i != 0)
 | |
|               {
 | |
|                 unread(ch, i);
 | |
|               }
 | |
|             return false;
 | |
|           }
 | |
|       }
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Return true if we can read some whitespace.
 | |
|    * <p>This is simply a convenience method.
 | |
|    * <p>This method will push back a character rather than an
 | |
|    * array whenever possible (probably the majority of cases).
 | |
|    * @return true if whitespace was found.
 | |
|    */
 | |
|   private boolean tryWhitespace()
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     char c;
 | |
|     c = readCh();
 | |
|     if (isWhitespace(c))
 | |
|       {
 | |
|         skipWhitespace();
 | |
|         return true;
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         unread(c);
 | |
|         return false;
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Read all data until we find the specified string.
 | |
|    * This is useful for scanning CDATA sections and PIs.
 | |
|    * <p>This is inefficient right now, since it calls tryRead ()
 | |
|    * for every character.
 | |
|    * @param delim The string delimiter
 | |
|    * @see #tryRead (String, boolean)
 | |
|    * @see #readCh
 | |
|    */
 | |
|   private void parseUntil(String delim)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     parseUntil(delim.toCharArray());
 | |
|   }
 | |
| 
 | |
|   private void parseUntil(char[] delim)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     char c;
 | |
|     int startLine = line;
 | |
| 
 | |
|     try
 | |
|       {
 | |
|         while (!tryRead(delim))
 | |
|           {
 | |
|             c = readCh();
 | |
|             dataBufferAppend(c);
 | |
|           }
 | |
|       }
 | |
|     catch (EOFException e)
 | |
|       {
 | |
|         error("end of input while looking for delimiter "
 | |
|               + "(started on line " + startLine
 | |
|               + ')', null, new String(delim));
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
|   // Low-level I/O.
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
| 
 | |
|   /**
 | |
|    * Prefetch US-ASCII XML/text decl from input stream into read buffer.
 | |
|    * Doesn't buffer more than absolutely needed, so that when an encoding
 | |
|    * decl says we need to create an InputStreamReader, we can discard our
 | |
|    * buffer and reset().  Caller knows the first chars of the decl exist
 | |
|    * in the input stream.
 | |
|    */
 | |
|   private void prefetchASCIIEncodingDecl()
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     int ch;
 | |
|     readBufferPos = readBufferLength = 0;
 | |
| 
 | |
|     is.mark(readBuffer.length);
 | |
|     while (true)
 | |
|       {
 | |
|         ch = is.read();
 | |
|         readBuffer[readBufferLength++] = (char) ch;
 | |
|         switch (ch)
 | |
|           {
 | |
|           case (int) '>':
 | |
|             return;
 | |
|           case -1:
 | |
|             error("file ends before end of XML or encoding declaration.",
 | |
|                   null, "?>");
 | |
|           }
 | |
|         if (readBuffer.length == readBufferLength)
 | |
|           {
 | |
|             error("unfinished XML or encoding declaration");
 | |
|           }
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Read a chunk of data from an external input source.
 | |
|    * <p>This is simply a front-end that fills the rawReadBuffer
 | |
|    * with bytes, then calls the appropriate encoding handler.
 | |
|    * @see #encoding
 | |
|    * @see #rawReadBuffer
 | |
|    * @see #readBuffer
 | |
|    * @see #filterCR
 | |
|    * @see #copyUtf8ReadBuffer
 | |
|    * @see #copyIso8859_1ReadBuffer
 | |
|    * @see #copyUcs_2ReadBuffer
 | |
|    * @see #copyUcs_4ReadBuffer
 | |
|    */
 | |
|   private void readDataChunk()
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     int count;
 | |
| 
 | |
|     // See if we have any overflow (filterCR sets for CR at end)
 | |
|     if (readBufferOverflow > -1)
 | |
|       {
 | |
|         readBuffer[0] = (char) readBufferOverflow;
 | |
|         readBufferOverflow = -1;
 | |
|         readBufferPos = 1;
 | |
|         sawCR = true;
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         readBufferPos = 0;
 | |
|         sawCR = false;
 | |
|       }
 | |
| 
 | |
|     // input from a character stream.
 | |
|     if (sourceType == INPUT_READER)
 | |
|       {
 | |
|         count = reader.read(readBuffer,
 | |
|                             readBufferPos, READ_BUFFER_MAX - readBufferPos);
 | |
|         if (count < 0)
 | |
|           {
 | |
|             readBufferLength = readBufferPos;
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             readBufferLength = readBufferPos + count;
 | |
|           }
 | |
|         if (readBufferLength > 0)
 | |
|           {
 | |
|             filterCR(count >= 0);
 | |
|           }
 | |
|         sawCR = false;
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|     // Read as many bytes as possible into the raw buffer.
 | |
|     count = is.read(rawReadBuffer, 0, READ_BUFFER_MAX);
 | |
| 
 | |
|     // Dispatch to an encoding-specific reader method to populate
 | |
|     // the readBuffer.  In most parser speed profiles, these routines
 | |
|     // show up at the top of the CPU usage chart.
 | |
|     if (count > 0)
 | |
|       {
 | |
|         switch (encoding)
 | |
|           {
 | |
|             // one byte builtins
 | |
|           case ENCODING_ASCII:
 | |
|             copyIso8859_1ReadBuffer(count, (char) 0x0080);
 | |
|             break;
 | |
|           case ENCODING_UTF_8:
 | |
|             copyUtf8ReadBuffer(count);
 | |
|             break;
 | |
|           case ENCODING_ISO_8859_1:
 | |
|             copyIso8859_1ReadBuffer(count, (char) 0);
 | |
|             break;
 | |
| 
 | |
|             // two byte builtins
 | |
|           case ENCODING_UCS_2_12:
 | |
|             copyUcs2ReadBuffer(count, 8, 0);
 | |
|             break;
 | |
|           case ENCODING_UCS_2_21:
 | |
|             copyUcs2ReadBuffer(count, 0, 8);
 | |
|             break;
 | |
| 
 | |
|             // four byte builtins
 | |
|           case ENCODING_UCS_4_1234:
 | |
|             copyUcs4ReadBuffer(count, 24, 16, 8, 0);
 | |
|             break;
 | |
|           case ENCODING_UCS_4_4321:
 | |
|             copyUcs4ReadBuffer(count, 0, 8, 16, 24);
 | |
|             break;
 | |
|           case ENCODING_UCS_4_2143:
 | |
|             copyUcs4ReadBuffer(count, 16, 24, 0, 8);
 | |
|             break;
 | |
|           case ENCODING_UCS_4_3412:
 | |
|             copyUcs4ReadBuffer(count, 8, 0, 24, 16);
 | |
|             break;
 | |
|           }
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         readBufferLength = readBufferPos;
 | |
|       }
 | |
| 
 | |
|     readBufferPos = 0;
 | |
| 
 | |
|     // Filter out all carriage returns if we've seen any
 | |
|     // (including any saved from a previous read)
 | |
|     if (sawCR)
 | |
|       {
 | |
|         filterCR(count >= 0);
 | |
|         sawCR = false;
 | |
| 
 | |
|         // must actively report EOF, lest some CRs get lost.
 | |
|         if (readBufferLength == 0 && count >= 0)
 | |
|           {
 | |
|             readDataChunk();
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     if (count > 0)
 | |
|       {
 | |
|         currentByteCount += count;
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Filter carriage returns in the read buffer.
 | |
|    * CRLF becomes LF; CR becomes LF.
 | |
|    * @param moreData true iff more data might come from the same source
 | |
|    * @see #readDataChunk
 | |
|    * @see #readBuffer
 | |
|    * @see #readBufferOverflow
 | |
|    */
 | |
|   private void filterCR(boolean moreData)
 | |
|   {
 | |
|     int i, j;
 | |
| 
 | |
|     readBufferOverflow = -1;
 | |
| 
 | |
| loop:
 | |
|     for (i = j = readBufferPos; j < readBufferLength; i++, j++)
 | |
|       {
 | |
|         switch (readBuffer[j])
 | |
|           {
 | |
|           case '\r':
 | |
|             if (j == readBufferLength - 1)
 | |
|               {
 | |
|                 if (moreData)
 | |
|                   {
 | |
|                     readBufferOverflow = '\r';
 | |
|                     readBufferLength--;
 | |
|                   }
 | |
|                 else   // CR at end of buffer
 | |
|                   {
 | |
|                     readBuffer[i++] = '\n';
 | |
|                   }
 | |
|                 break loop;
 | |
|               }
 | |
|             else if (readBuffer[j + 1] == '\n')
 | |
|               {
 | |
|                 j++;
 | |
|               }
 | |
|             readBuffer[i] = '\n';
 | |
|             break;
 | |
| 
 | |
|           case '\n':
 | |
|           default:
 | |
|             readBuffer[i] = readBuffer[j];
 | |
|             break;
 | |
|           }
 | |
|       }
 | |
|     readBufferLength = i;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Convert a buffer of UTF-8-encoded bytes into UTF-16 characters.
 | |
|    * <p>When readDataChunk () calls this method, the raw bytes are in
 | |
|    * rawReadBuffer, and the final characters will appear in
 | |
|    * readBuffer.
 | |
|    * <p>Note that as of Unicode 3.1, good practice became a requirement,
 | |
|    * so that each Unicode character has exactly one UTF-8 representation.
 | |
|    * @param count The number of bytes to convert.
 | |
|    * @see #readDataChunk
 | |
|    * @see #rawReadBuffer
 | |
|    * @see #readBuffer
 | |
|    * @see #getNextUtf8Byte
 | |
|    */
 | |
|   private void copyUtf8ReadBuffer(int count)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     int i = 0;
 | |
|     int j = readBufferPos;
 | |
|     int b1;
 | |
|     char c = 0;
 | |
| 
 | |
|     /*
 | |
|     // check once, so the runtime won't (if it's smart enough)
 | |
|     if (count < 0 || count > rawReadBuffer.length)
 | |
|     throw new ArrayIndexOutOfBoundsException (Integer.toString (count));
 | |
|      */
 | |
| 
 | |
|     while (i < count)
 | |
|       {
 | |
|         b1 = rawReadBuffer[i++];
 | |
| 
 | |
|         // Determine whether we are dealing
 | |
|         // with a one-, two-, three-, or four-
 | |
|         // byte sequence.
 | |
|         if (b1 < 0)
 | |
|           {
 | |
|             if ((b1 & 0xe0) == 0xc0)
 | |
|               {
 | |
|                 // 2-byte sequence: 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
 | |
|                 c = (char) (((b1 & 0x1f) << 6)
 | |
|                             | getNextUtf8Byte(i++, count));
 | |
|                 if (c < 0x0080)
 | |
|                   {
 | |
|                     encodingError("Illegal two byte UTF-8 sequence",
 | |
|                                   c, 0);
 | |
|                   }
 | |
| 
 | |
|                 //Sec 2.11
 | |
|                 // [1] the two-character sequence #xD #xA
 | |
|                 // [2] the two-character sequence #xD #x85
 | |
|                 if ((c == 0x0085 || c == 0x000a) && sawCR)
 | |
|                   {
 | |
|                     continue;
 | |
|                   }
 | |
| 
 | |
|                 // Sec 2.11
 | |
|                 // [3] the single character #x85
 | |
| 
 | |
|                 if (c == 0x0085 && xmlVersion == XML_11)
 | |
|                   {
 | |
|                     readBuffer[j++] = '\r';
 | |
|                   }
 | |
|               }
 | |
|             else if ((b1 & 0xf0) == 0xe0)
 | |
|               {
 | |
|                 // 3-byte sequence:
 | |
|                 // zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
 | |
|                 // most CJKV characters
 | |
|                 c = (char) (((b1 & 0x0f) << 12) |
 | |
|                             (getNextUtf8Byte(i++, count) << 6) |
 | |
|                             getNextUtf8Byte(i++, count));
 | |
|                 //sec 2.11
 | |
|                 //[4] the single character #x2028
 | |
|                 if (c == 0x2028 && xmlVersion == XML_11)
 | |
|                   {
 | |
|                     readBuffer[j++] = '\r';
 | |
|                     sawCR = true;
 | |
|                     continue;
 | |
|                   }
 | |
|                 if (c < 0x0800 || (c >= 0xd800 && c <= 0xdfff))
 | |
|                   {
 | |
|                     encodingError("Illegal three byte UTF-8 sequence",
 | |
|                                   c, 0);
 | |
|                   }
 | |
|               }
 | |
|             else if ((b1 & 0xf8) == 0xf0)
 | |
|               {
 | |
|                 // 4-byte sequence: 11101110wwwwzzzzyy + 110111yyyyxxxxxx
 | |
|                 //     = 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
 | |
|                 // (uuuuu = wwww + 1)
 | |
|                 // "Surrogate Pairs" ... from the "Astral Planes"
 | |
|                 // Unicode 3.1 assigned the first characters there
 | |
|                 int iso646 = b1 & 07;
 | |
|                 iso646 = (iso646 << 6) + getNextUtf8Byte(i++, count);
 | |
|                 iso646 = (iso646 << 6) + getNextUtf8Byte(i++, count);
 | |
|                 iso646 = (iso646 << 6) + getNextUtf8Byte(i++, count);
 | |
| 
 | |
|                 if (iso646 <= 0xffff)
 | |
|                   {
 | |
|                     encodingError("Illegal four byte UTF-8 sequence",
 | |
|                                   iso646, 0);
 | |
|                   }
 | |
|                 else
 | |
|                   {
 | |
|                     if (iso646 > 0x0010ffff)
 | |
|                       {
 | |
|                         encodingError("UTF-8 value out of range for Unicode",
 | |
|                                       iso646, 0);
 | |
|                       }
 | |
|                     iso646 -= 0x010000;
 | |
|                     readBuffer[j++] = (char) (0xd800 | (iso646 >> 10));
 | |
|                     readBuffer[j++] = (char) (0xdc00 | (iso646 & 0x03ff));
 | |
|                     continue;
 | |
|                   }
 | |
|               }
 | |
|             else
 | |
|               {
 | |
|                 // The five and six byte encodings aren't supported;
 | |
|                 // they exceed the Unicode (and XML) range.
 | |
|                 encodingError("unsupported five or six byte UTF-8 sequence",
 | |
|                               0xff & b1, i);
 | |
|                 // NOTREACHED
 | |
|                 c = 0;
 | |
|               }
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             // 1-byte sequence: 000000000xxxxxxx = 0xxxxxxx
 | |
|             // (US-ASCII character, "common" case, one branch to here)
 | |
|             c = (char) b1;
 | |
|           }
 | |
|         readBuffer[j++] = c;
 | |
|         if (c == '\r')
 | |
|           {
 | |
|             sawCR = true;
 | |
|           }
 | |
|       }
 | |
|     // How many characters have we read?
 | |
|     readBufferLength = j;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Return the next byte value in a UTF-8 sequence.
 | |
|    * If it is not possible to get a byte from the current
 | |
|    * entity, throw an exception.
 | |
|    * @param pos The current position in the rawReadBuffer.
 | |
|    * @param count The number of bytes in the rawReadBuffer
 | |
|    * @return The significant six bits of a non-initial byte in
 | |
|    *   a UTF-8 sequence.
 | |
|    * @exception EOFException If the sequence is incomplete.
 | |
|    */
 | |
|   private int getNextUtf8Byte(int pos, int count)
 | |
|     throws SAXException, IOException
 | |
|   {
 | |
|     int val;
 | |
| 
 | |
|     // Take a character from the buffer
 | |
|     // or from the actual input stream.
 | |
|     if (pos < count)
 | |
|       {
 | |
|         val = rawReadBuffer[pos];
 | |
|       }
 | |
|     else
 | |
|       {
 | |
|         val = is.read();
 | |
|         if (val == -1)
 | |
|           {
 | |
|             encodingError("unfinished multi-byte UTF-8 sequence at EOF",
 | |
|                           -1, pos);
 | |
|           }
 | |
|       }
 | |
| 
 | |
|     // Check for the correct bits at the start.
 | |
|     if ((val & 0xc0) != 0x80)
 | |
|       {
 | |
|         encodingError("bad continuation of multi-byte UTF-8 sequence",
 | |
|                       val, pos + 1);
 | |
|       }
 | |
| 
 | |
|     // Return the significant bits.
 | |
|     return (val & 0x3f);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Convert a buffer of US-ASCII or ISO-8859-1-encoded bytes into
 | |
|    * UTF-16 characters.
 | |
|    *
 | |
|    * <p>When readDataChunk () calls this method, the raw bytes are in
 | |
|    * rawReadBuffer, and the final characters will appear in
 | |
|    * readBuffer.
 | |
|    *
 | |
|    * @param count The number of bytes to convert.
 | |
|    * @param mask For ASCII conversion, 0x7f; else, 0xff.
 | |
|    * @see #readDataChunk
 | |
|    * @see #rawReadBuffer
 | |
|    * @see #readBuffer
 | |
|    */
 | |
|   private void copyIso8859_1ReadBuffer(int count, char mask)
 | |
|     throws IOException
 | |
|   {
 | |
|     int i, j;
 | |
|     for (i = 0, j = readBufferPos; i < count; i++, j++)
 | |
|       {
 | |
|         char c = (char) (rawReadBuffer[i] & 0xff);
 | |
|         if ((c & mask) != 0)
 | |
|           {
 | |
|             throw new CharConversionException("non-ASCII character U+"
 | |
|                                               + Integer.toHexString(c));
 | |
|           }
 | |
|         if (c == 0x0085 && xmlVersion == XML_11)
 | |
|           {
 | |
|             c = '\r';
 | |
|           }
 | |
|         readBuffer[j] = c;
 | |
|         if (c == '\r')
 | |
|           {
 | |
|             sawCR = true;
 | |
|           }
 | |
|       }
 | |
|     readBufferLength = j;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Convert a buffer of UCS-2-encoded bytes into UTF-16 characters
 | |
|    * (as used in Java string manipulation).
 | |
|    *
 | |
|    * <p>When readDataChunk () calls this method, the raw bytes are in
 | |
|    * rawReadBuffer, and the final characters will appear in
 | |
|    * readBuffer.
 | |
|    * @param count The number of bytes to convert.
 | |
|    * @param shift1 The number of bits to shift byte 1.
 | |
|    * @param shift2 The number of bits to shift byte 2
 | |
|    * @see #readDataChunk
 | |
|    * @see #rawReadBuffer
 | |
|    * @see #readBuffer
 | |
|    */
 | |
|   private void copyUcs2ReadBuffer(int count, int shift1, int shift2)
 | |
|     throws SAXException
 | |
|   {
 | |
|     int j = readBufferPos;
 | |
| 
 | |
|     if (count > 0 && (count % 2) != 0)
 | |
|       {
 | |
|         encodingError("odd number of bytes in UCS-2 encoding", -1, count);
 | |
|       }
 | |
|     // The loops are faster with less internal brancing; hence two
 | |
|     if (shift1 == 0)
 | |
|       {  // "UTF-16-LE"
 | |
|         for (int i = 0; i < count; i += 2)
 | |
|           {
 | |
|             char c = (char) (rawReadBuffer[i + 1] << 8);
 | |
|             c |= 0xff & rawReadBuffer[i];
 | |
|             readBuffer[j++] = c;
 | |
|             if (c == '\r')
 | |
|               {
 | |
|                 sawCR = true;
 | |
|               }
 | |
|           }
 | |
|       }
 | |
|     else
 | |
|       {  // "UTF-16-BE"
 | |
|         for (int i = 0; i < count; i += 2)
 | |
|           {
 | |
|             char c = (char) (rawReadBuffer[i] << 8);
 | |
|             c |= 0xff & rawReadBuffer[i + 1];
 | |
|             readBuffer[j++] = c;
 | |
|             if (c == '\r')
 | |
|               {
 | |
|                 sawCR = true;
 | |
|               }
 | |
|           }
 | |
|       }
 | |
|     readBufferLength = j;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Convert a buffer of UCS-4-encoded bytes into UTF-16 characters.
 | |
|    *
 | |
|    * <p>When readDataChunk () calls this method, the raw bytes are in
 | |
|    * rawReadBuffer, and the final characters will appear in
 | |
|    * readBuffer.
 | |
|    * <p>Java has Unicode chars, and this routine uses surrogate pairs
 | |
|    * for ISO-10646 values between 0x00010000 and 0x000fffff.  An
 | |
|    * exception is thrown if the ISO-10646 character has no Unicode
 | |
|    * representation.
 | |
|    *
 | |
|    * @param count The number of bytes to convert.
 | |
|    * @param shift1 The number of bits to shift byte 1.
 | |
|    * @param shift2 The number of bits to shift byte 2
 | |
|    * @param shift3 The number of bits to shift byte 2
 | |
|    * @param shift4 The number of bits to shift byte 2
 | |
|    * @see #readDataChunk
 | |
|    * @see #rawReadBuffer
 | |
|    * @see #readBuffer
 | |
|    */
 | |
|   private void copyUcs4ReadBuffer(int count, int shift1, int shift2,
 | |
|                                   int shift3, int shift4)
 | |
|     throws SAXException
 | |
|   {
 | |
|     int j = readBufferPos;
 | |
| 
 | |
|     if (count > 0 && (count % 4) != 0)
 | |
|       {
 | |
|         encodingError("number of bytes in UCS-4 encoding " +
 | |
|                       "not divisible by 4",
 | |
|                       -1, count);
 | |
|       }
 | |
|     for (int i = 0; i < count; i += 4)
 | |
|       {
 | |
|         int value = (((rawReadBuffer [i] & 0xff) << shift1) |
 | |
|                      ((rawReadBuffer [i + 1] & 0xff) << shift2) |
 | |
|                      ((rawReadBuffer [i + 2] & 0xff) << shift3) |
 | |
|                      ((rawReadBuffer [i + 3] & 0xff) << shift4));
 | |
|         if (value < 0x0000ffff)
 | |
|           {
 | |
|             readBuffer [j++] = (char) value;
 | |
|             if (value == (int) '\r')
 | |
|               {
 | |
|                 sawCR = true;
 | |
|               }
 | |
|           }
 | |
|         else if (value < 0x0010ffff)
 | |
|           {
 | |
|             value -= 0x010000;
 | |
|             readBuffer[j++] = (char) (0xd8 | ((value >> 10) & 0x03ff));
 | |
|             readBuffer[j++] = (char) (0xdc | (value & 0x03ff));
 | |
|           }
 | |
|         else
 | |
|           {
 | |
|             encodingError("UCS-4 value out of range for Unicode",
 | |
|                           value, i);
 | |
|           }
 | |
|       }
 | |
|     readBufferLength = j;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Report a character encoding error.
 | |
|    */
 | |
|   private void encodingError(String message, int value, int offset)
 | |
|     throws SAXException
 | |
|   {
 | |
|     if (value != -1)
 | |
|       {
 | |
|         message = message + " (character code: 0x" +
 | |
|           Integer.toHexString(value) + ')';
 | |
|         error(message);
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
|   // Local Variables.
 | |
|   //////////////////////////////////////////////////////////////////////
 | |
| 
 | |
|   /**
 | |
|    * Re-initialize the variables for each parse.
 | |
|    */
 | |
|   private void initializeVariables()
 | |
|   {
 | |
|     // First line
 | |
|     line = 1;
 | |
|     column = 0;
 | |
| 
 | |
|     // Set up the buffers for data and names
 | |
|     dataBufferPos = 0;
 | |
|     dataBuffer = new char[DATA_BUFFER_INITIAL];
 | |
|     nameBufferPos = 0;
 | |
|     nameBuffer = new char[NAME_BUFFER_INITIAL];
 | |
| 
 | |
|     // Set up the DTD hash tables
 | |
|     elementInfo = new HashMap();
 | |
|     entityInfo = new HashMap();
 | |
|     notationInfo = new HashMap();
 | |
|     skippedPE = false;
 | |
| 
 | |
|     // Set up the variables for the current
 | |
|     // element context.
 | |
|     currentElement = null;
 | |
|     currentElementContent = CONTENT_UNDECLARED;
 | |
| 
 | |
|     // Set up the input variables
 | |
|     sourceType = INPUT_NONE;
 | |
|     inputStack = new LinkedList();
 | |
|     entityStack = new LinkedList();
 | |
|     externalEntity = null;
 | |
|     tagAttributePos = 0;
 | |
|     tagAttributes = new String[100];
 | |
|     rawReadBuffer = new byte[READ_BUFFER_MAX];
 | |
|     readBufferOverflow = -1;
 | |
| 
 | |
|     scratch = new InputSource();
 | |
| 
 | |
|     inLiteral = false;
 | |
|     expandPE = false;
 | |
|     peIsError = false;
 | |
| 
 | |
|     doReport = false;
 | |
| 
 | |
|     inCDATA = false;
 | |
| 
 | |
|     symbolTable = new Object[SYMBOL_TABLE_LENGTH][];
 | |
|   }
 | |
| 
 | |
|   static class ExternalIdentifiers
 | |
|   {
 | |
| 
 | |
|     String publicId;
 | |
|     String systemId;
 | |
|     String baseUri;
 | |
| 
 | |
|     ExternalIdentifiers()
 | |
|     {
 | |
|     }
 | |
| 
 | |
|     ExternalIdentifiers(String publicId, String systemId, String baseUri)
 | |
|     {
 | |
|       this.publicId = publicId;
 | |
|       this.systemId = systemId;
 | |
|       this.baseUri = baseUri;
 | |
|     }
 | |
| 
 | |
|   }
 | |
| 
 | |
|   static class EntityInfo
 | |
|   {
 | |
| 
 | |
|     int type;
 | |
|     ExternalIdentifiers ids;
 | |
|     String value;
 | |
|     String notationName;
 | |
| 
 | |
|   }
 | |
| 
 | |
|   static class AttributeDecl
 | |
|   {
 | |
| 
 | |
|     String type;
 | |
|     String value;
 | |
|     int valueType;
 | |
|     String enumeration;
 | |
|     String defaultValue;
 | |
| 
 | |
|   }
 | |
| 
 | |
|   static class ElementDecl
 | |
|   {
 | |
| 
 | |
|     int contentType;
 | |
|     String contentModel;
 | |
|     HashMap attributes;
 | |
| 
 | |
|   }
 | |
| 
 | |
|   static class Input
 | |
|   {
 | |
| 
 | |
|     int sourceType;
 | |
|     URLConnection externalEntity;
 | |
|     char[] readBuffer;
 | |
|     int readBufferPos;
 | |
|     int readBufferLength;
 | |
|     int line;
 | |
|     int encoding;
 | |
|     int readBufferOverflow;
 | |
|     InputStream is;
 | |
|     int currentByteCount;
 | |
|     int column;
 | |
|     Reader reader;
 | |
| 
 | |
|   }
 | |
| 
 | |
| }
 |