mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			1932 lines
		
	
	
		
			68 KiB
		
	
	
	
		
			Java
		
	
	
	
			
		
		
	
	
			1932 lines
		
	
	
		
			68 KiB
		
	
	
	
		
			Java
		
	
	
	
/* XMLWriter.java --
 | 
						|
   Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
 | 
						|
 | 
						|
This file is part of GNU Classpath.
 | 
						|
 | 
						|
GNU Classpath is free software; you can redistribute it and/or modify
 | 
						|
it under the terms of the GNU General Public License as published by
 | 
						|
the Free Software Foundation; either version 2, or (at your option)
 | 
						|
any later version.
 | 
						|
 | 
						|
GNU Classpath is distributed in the hope that it will be useful, but
 | 
						|
WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
General Public License for more details.
 | 
						|
 | 
						|
You should have received a copy of the GNU General Public License
 | 
						|
along with GNU Classpath; see the file COPYING.  If not, write to the
 | 
						|
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 | 
						|
02110-1301 USA.
 | 
						|
 | 
						|
Linking this library statically or dynamically with other modules is
 | 
						|
making a combined work based on this library.  Thus, the terms and
 | 
						|
conditions of the GNU General Public License cover the whole
 | 
						|
combination.
 | 
						|
 | 
						|
As a special exception, the copyright holders of this library give you
 | 
						|
permission to link this library with independent modules to produce an
 | 
						|
executable, regardless of the license terms of these independent
 | 
						|
modules, and to copy and distribute the resulting executable under
 | 
						|
terms of your choice, provided that you also meet, for each linked
 | 
						|
independent module, the terms and conditions of the license of that
 | 
						|
module.  An independent module is a module which is not derived from
 | 
						|
or based on this library.  If you modify this library, you may extend
 | 
						|
this exception to your version of the library, but you are not
 | 
						|
obligated to do so.  If you do not wish to do so, delete this
 | 
						|
exception statement from your version. */
 | 
						|
 | 
						|
package gnu.xml.util;
 | 
						|
 | 
						|
import gnu.java.lang.CPStringBuilder;
 | 
						|
 | 
						|
import java.io.BufferedWriter;
 | 
						|
import java.io.CharConversionException;
 | 
						|
import java.io.IOException;
 | 
						|
import java.io.OutputStream;
 | 
						|
import java.io.OutputStreamWriter;
 | 
						|
import java.io.Writer;
 | 
						|
import java.util.Stack;
 | 
						|
 | 
						|
import org.xml.sax.*;
 | 
						|
import org.xml.sax.ext.*;
 | 
						|
import org.xml.sax.helpers.*;
 | 
						|
 | 
						|
 | 
						|
/**
 | 
						|
 * This class is a SAX handler which writes all its input as a well formed
 | 
						|
 * XML or XHTML document.  If driven using SAX2 events, this output may
 | 
						|
 * include a recreated document type declaration, subject to limitations
 | 
						|
 * of SAX (no internal subset exposed) or DOM (the important declarations,
 | 
						|
 * with their documentation, are discarded).
 | 
						|
 *
 | 
						|
 * <p> By default, text is generated "as-is", but some optional modes
 | 
						|
 * are supported.  Pretty-printing is supported, to make life easier
 | 
						|
 * for people reading the output.  XHTML (1.0) output has can be made
 | 
						|
 * particularly pretty; all the built-in character entities are known.
 | 
						|
 * Canonical XML can also be generated, assuming the input is properly
 | 
						|
 * formed.
 | 
						|
 *
 | 
						|
 * <hr>
 | 
						|
 *
 | 
						|
 * <p> Some of the methods on this class are intended for applications to
 | 
						|
 * use directly, rather than as pure SAX2 event callbacks.  Some of those
 | 
						|
 * methods access the JavaBeans properties (used to tweak output formats,
 | 
						|
 * for example canonicalization and pretty printing).  Subclasses
 | 
						|
 * are expected to add new behaviors, not to modify current behavior, so
 | 
						|
 * many such methods are final.</p>
 | 
						|
 *
 | 
						|
 * <p> The <em>write*()</em> methods may be slightly simpler for some
 | 
						|
 * applications to use than direct callbacks.  For example, they support
 | 
						|
 * a simple policy for encoding data items as the content of a single element.
 | 
						|
 *
 | 
						|
 * <p> To reuse an XMLWriter you must provide it with a new Writer, since
 | 
						|
 * this handler closes the writer it was given as part of its endDocument()
 | 
						|
 * handling.  (XML documents have an end of input, and the way to encode
 | 
						|
 * that on a stream is to close it.) </p>
 | 
						|
 *
 | 
						|
 * <hr>
 | 
						|
 *
 | 
						|
 * <p> Note that any relative URIs in the source document, as found in
 | 
						|
 * entity and notation declarations, ought to have been fully resolved by
 | 
						|
 * the parser providing events to this handler.  This means that the
 | 
						|
 * output text should only have fully resolved URIs, which may not be
 | 
						|
 * the desired behavior in cases where later binding is desired. </p>
 | 
						|
 *
 | 
						|
 * <p> <em>Note that due to SAX2 defaults, you may need to manually
 | 
						|
 * ensure that the input events are XML-conformant with respect to namespace
 | 
						|
 * prefixes and declarations.  {@link gnu.xml.pipeline.NSFilter} is
 | 
						|
 * one solution to this problem, in the context of processing pipelines.</em>
 | 
						|
 * Something as simple as connecting this handler to a parser might not
 | 
						|
 * generate the correct output.  Another workaround is to ensure that the
 | 
						|
 * <em>namespace-prefixes</em> feature is always set to true, if you're
 | 
						|
 * hooking this directly up to some XMLReader implementation.
 | 
						|
 *
 | 
						|
 * @see gnu.xml.pipeline.TextConsumer
 | 
						|
 *
 | 
						|
 * @author David Brownell
 | 
						|
 *
 | 
						|
 * @deprecated Please use the javax.xml.stream APIs instead
 | 
						|
 */
 | 
						|
public class XMLWriter
 | 
						|
    implements ContentHandler, LexicalHandler, DTDHandler, DeclHandler
 | 
						|
{
 | 
						|
    // text prints/escapes differently depending on context
 | 
						|
    //  CTX_ENTITY ... entity literal value
 | 
						|
    //  CTX_ATTRIBUTE ... attribute literal value
 | 
						|
    //  CTX_CONTENT ... content of an element
 | 
						|
    //  CTX_UNPARSED ... CDATA, comment, PI, names, etc
 | 
						|
    //  CTX_NAME ... name or nmtoken, no escapes possible
 | 
						|
    private static final int    CTX_ENTITY = 1;
 | 
						|
    private static final int    CTX_ATTRIBUTE = 2;
 | 
						|
    private static final int    CTX_CONTENT = 3;
 | 
						|
    private static final int    CTX_UNPARSED = 4;
 | 
						|
    private static final int    CTX_NAME = 5;
 | 
						|
 | 
						|
// FIXME: names (element, attribute, PI, notation, etc) are not
 | 
						|
// currently written out with range checks (escapeChars).
 | 
						|
// In non-XHTML, some names can't be directly written; panic!
 | 
						|
 | 
						|
    private static String       sysEOL;
 | 
						|
 | 
						|
    static {
 | 
						|
        try {
 | 
						|
            sysEOL = System.getProperty ("line.separator", "\n");
 | 
						|
 | 
						|
            // don't use the system's EOL if it's illegal XML.
 | 
						|
            if (!isLineEnd (sysEOL))
 | 
						|
                sysEOL = "\n";
 | 
						|
 | 
						|
        } catch (SecurityException e) {
 | 
						|
            sysEOL = "\n";
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    private static boolean isLineEnd (String eol)
 | 
						|
    {
 | 
						|
        return "\n".equals (eol)
 | 
						|
                    || "\r".equals (eol)
 | 
						|
                    || "\r\n".equals (eol);
 | 
						|
    }
 | 
						|
 | 
						|
    private Writer              out;
 | 
						|
    private boolean             inCDATA;
 | 
						|
    private int                 elementNestLevel;
 | 
						|
    private String              eol = sysEOL;
 | 
						|
 | 
						|
    private short               dangerMask;
 | 
						|
    private CPStringBuilder     stringBuf;
 | 
						|
    private Locator             locator;
 | 
						|
    private ErrorHandler        errHandler;
 | 
						|
 | 
						|
    private boolean             expandingEntities = false;
 | 
						|
    private int                 entityNestLevel;
 | 
						|
    private boolean             xhtml;
 | 
						|
    private boolean             startedDoctype;
 | 
						|
    private String              encoding;
 | 
						|
 | 
						|
    private boolean             canonical;
 | 
						|
    private boolean             inDoctype;
 | 
						|
    private boolean             inEpilogue;
 | 
						|
 | 
						|
    // pretty printing controls
 | 
						|
    private boolean             prettyPrinting;
 | 
						|
    private int                 column;
 | 
						|
    private boolean             noWrap;
 | 
						|
    private Stack               space = new Stack ();
 | 
						|
 | 
						|
    // this is not a hard'n'fast rule -- longer lines are OK,
 | 
						|
    // but are to be avoided.  Here, prettyprinting is more to
 | 
						|
    // show structure "cleanly" than to be precise about it.
 | 
						|
    // better to have ragged layout than one line 24Kb long.
 | 
						|
    private static final int    lineLength = 75;
 | 
						|
 | 
						|
 | 
						|
    /**
 | 
						|
     * Constructs this handler with System.out used to write SAX events
 | 
						|
     * using the UTF-8 encoding.  Avoid using this except when you know
 | 
						|
     * it's safe to close System.out at the end of the document.
 | 
						|
     */
 | 
						|
    public XMLWriter () throws IOException
 | 
						|
        { this (System.out); }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Constructs a handler which writes all input to the output stream
 | 
						|
     * in the UTF-8 encoding, and closes it when endDocument is called.
 | 
						|
     * (Yes it's annoying that this throws an exception -- but there's
 | 
						|
     * really no way around it, since it's barely possible a JDK may
 | 
						|
     * exist somewhere that doesn't know how to emit UTF-8.)
 | 
						|
     */
 | 
						|
    public XMLWriter (OutputStream out) throws IOException
 | 
						|
    {
 | 
						|
        this (new OutputStreamWriter (out, "UTF8"));
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Constructs a handler which writes all input to the writer, and then
 | 
						|
     * closes the writer when the document ends.  If an XML declaration is
 | 
						|
     * written onto the output, and this class can determine the name of
 | 
						|
     * the character encoding for this writer, that encoding name will be
 | 
						|
     * included in the XML declaration.
 | 
						|
     *
 | 
						|
     * <P> See the description of the constructor which takes an encoding
 | 
						|
     * name for imporant information about selection of encodings.
 | 
						|
     *
 | 
						|
     * @param writer XML text is written to this writer.
 | 
						|
     */
 | 
						|
    public XMLWriter (Writer writer)
 | 
						|
    {
 | 
						|
        this (writer, null);
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Constructs a handler which writes all input to the writer, and then
 | 
						|
     * closes the writer when the document ends.  If an XML declaration is
 | 
						|
     * written onto the output, this class will use the specified encoding
 | 
						|
     * name in that declaration.  If no encoding name is specified, no
 | 
						|
     * encoding name will be declared unless this class can otherwise
 | 
						|
     * determine the name of the character encoding for this writer.
 | 
						|
     *
 | 
						|
     * <P> At this time, only the UTF-8 ("UTF8") and UTF-16 ("Unicode")
 | 
						|
     * output encodings are fully lossless with respect to XML data.  If you
 | 
						|
     * use any other encoding you risk having your data be silently mangled
 | 
						|
     * on output, as the standard Java character encoding subsystem silently
 | 
						|
     * maps non-encodable characters to a question mark ("?") and will not
 | 
						|
     * report such errors to applications.
 | 
						|
     *
 | 
						|
     * <p> For a few other encodings the risk can be reduced. If the writer is
 | 
						|
     * a java.io.OutputStreamWriter, and uses either the ISO-8859-1 ("8859_1",
 | 
						|
     * "ISO8859_1", etc) or US-ASCII ("ASCII") encodings, content which
 | 
						|
     * can't be encoded in those encodings will be written safely.  Where
 | 
						|
     * relevant, the XHTML entity names will be used; otherwise, numeric
 | 
						|
     * character references will be emitted.
 | 
						|
     *
 | 
						|
     * <P> However, there remain a number of cases where substituting such
 | 
						|
     * entity or character references is not an option.  Such references are
 | 
						|
     * not usable within a DTD, comment, PI, or CDATA section.  Neither may
 | 
						|
     * they be used when element, attribute, entity, or notation names have
 | 
						|
     * the problematic characters.
 | 
						|
     *
 | 
						|
     * @param writer XML text is written to this writer.
 | 
						|
     * @param encoding if non-null, and an XML declaration is written,
 | 
						|
     *  this is the name that will be used for the character encoding.
 | 
						|
     */
 | 
						|
    public XMLWriter (Writer writer, String encoding)
 | 
						|
    {
 | 
						|
        setWriter (writer, encoding);
 | 
						|
    }
 | 
						|
 | 
						|
    private void setEncoding (String encoding)
 | 
						|
    {
 | 
						|
        if (encoding == null && out instanceof OutputStreamWriter)
 | 
						|
            encoding = ((OutputStreamWriter)out).getEncoding ();
 | 
						|
 | 
						|
        if (encoding != null) {
 | 
						|
            encoding = encoding.toUpperCase ();
 | 
						|
 | 
						|
            // Use official encoding names where we know them,
 | 
						|
            // avoiding the Java-only names.  When using common
 | 
						|
            // encodings where we can easily tell if characters
 | 
						|
            // are out of range, we'll escape out-of-range
 | 
						|
            // characters using character refs for safety.
 | 
						|
 | 
						|
            // I _think_ these are all the main synonyms for these!
 | 
						|
            if ("UTF8".equals (encoding)) {
 | 
						|
                encoding = "UTF-8";
 | 
						|
            } else if ("US-ASCII".equals (encoding)
 | 
						|
                    || "ASCII".equals (encoding)) {
 | 
						|
                dangerMask = (short) 0xff80;
 | 
						|
                encoding = "US-ASCII";
 | 
						|
            } else if ("ISO-8859-1".equals (encoding)
 | 
						|
                    || "8859_1".equals (encoding)
 | 
						|
                    || "ISO8859_1".equals (encoding)) {
 | 
						|
                dangerMask = (short) 0xff00;
 | 
						|
                encoding = "ISO-8859-1";
 | 
						|
            } else if ("UNICODE".equals (encoding)
 | 
						|
                    || "UNICODE-BIG".equals (encoding)
 | 
						|
                    || "UNICODE-LITTLE".equals (encoding)) {
 | 
						|
                encoding = "UTF-16";
 | 
						|
 | 
						|
                // TODO: UTF-16BE, UTF-16LE ... no BOM; what
 | 
						|
                // release of JDK supports those Unicode names?
 | 
						|
            }
 | 
						|
 | 
						|
            if (dangerMask != 0)
 | 
						|
                stringBuf = new CPStringBuilder ();
 | 
						|
        }
 | 
						|
 | 
						|
        this.encoding = encoding;
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
    /**
 | 
						|
     * Resets the handler to write a new text document.
 | 
						|
     *
 | 
						|
     * @param writer XML text is written to this writer.
 | 
						|
     * @param encoding if non-null, and an XML declaration is written,
 | 
						|
     *  this is the name that will be used for the character encoding.
 | 
						|
     *
 | 
						|
     * @exception IllegalStateException if the current
 | 
						|
     *  document hasn't yet ended (with {@link #endDocument})
 | 
						|
     */
 | 
						|
    final public void setWriter (Writer writer, String encoding)
 | 
						|
    {
 | 
						|
        if (out != null)
 | 
						|
            throw new IllegalStateException (
 | 
						|
                "can't change stream in mid course");
 | 
						|
        out = writer;
 | 
						|
        if (out != null)
 | 
						|
            setEncoding (encoding);
 | 
						|
        if (!(out instanceof BufferedWriter))
 | 
						|
            out = new BufferedWriter (out);
 | 
						|
        space.push ("default");
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Assigns the line ending style to be used on output.
 | 
						|
     * @param eolString null to use the system default; else
 | 
						|
     *  "\n", "\r", or "\r\n".
 | 
						|
     */
 | 
						|
    final public void setEOL (String eolString)
 | 
						|
    {
 | 
						|
        if (eolString == null)
 | 
						|
            eol = sysEOL;
 | 
						|
        else if (!isLineEnd (eolString))
 | 
						|
            eol = eolString;
 | 
						|
        else
 | 
						|
            throw new IllegalArgumentException (eolString);
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Assigns the error handler to be used to present most fatal
 | 
						|
     * errors.
 | 
						|
     */
 | 
						|
    public void setErrorHandler (ErrorHandler handler)
 | 
						|
    {
 | 
						|
        errHandler = handler;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Used internally and by subclasses, this encapsulates the logic
 | 
						|
     * involved in reporting fatal errors.  It uses locator information
 | 
						|
     * for good diagnostics, if available, and gives the application's
 | 
						|
     * ErrorHandler the opportunity to handle the error before throwing
 | 
						|
     * an exception.
 | 
						|
     */
 | 
						|
    protected void fatal (String message, Exception e)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        SAXParseException       x;
 | 
						|
 | 
						|
        if (locator == null)
 | 
						|
            x = new SAXParseException (message, null, null, -1, -1, e);
 | 
						|
        else
 | 
						|
            x = new SAXParseException (message, locator, e);
 | 
						|
        if (errHandler != null)
 | 
						|
            errHandler.fatalError (x);
 | 
						|
        throw x;
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
    // JavaBeans properties
 | 
						|
 | 
						|
    /**
 | 
						|
     * Controls whether the output should attempt to follow the "transitional"
 | 
						|
     * XHTML rules so that it meets the "HTML Compatibility Guidelines"
 | 
						|
     * appendix in the XHTML specification.  A "transitional" Document Type
 | 
						|
     * Declaration (DTD) is placed near the beginning of the output document,
 | 
						|
     * instead of whatever DTD would otherwise have been placed there, and
 | 
						|
     * XHTML empty elements are printed specially.  When writing text in
 | 
						|
     * US-ASCII or ISO-8859-1 encodings, the predefined XHTML internal
 | 
						|
     * entity names are used (in preference to character references) when
 | 
						|
     * writing content characters which can't be expressed in those encodings.
 | 
						|
     *
 | 
						|
     * <p> When this option is enabled, it is the caller's responsibility
 | 
						|
     * to ensure that the input is otherwise valid as XHTML.  Things to
 | 
						|
     * be careful of in all cases, as described in the appendix referenced
 | 
						|
     * above, include:  <ul>
 | 
						|
     *
 | 
						|
     *  <li> Element and attribute names must be in lower case, both
 | 
						|
     *          in the document and in any CSS style sheet.
 | 
						|
     *  <li> All XML constructs must be valid as defined by the XHTML
 | 
						|
     *          "transitional" DTD (including all familiar constructs,
 | 
						|
     *          even deprecated ones).
 | 
						|
     *  <li> The root element must be "html".
 | 
						|
     *  <li> Elements that must be empty (such as <em><br></em>
 | 
						|
     *          must have no content.
 | 
						|
     *  <li> Use both <em>lang</em> and <em>xml:lang</em> attributes
 | 
						|
     *          when specifying language.
 | 
						|
     *  <li> Similarly, use both <em>id</em> and <em>name</em> attributes
 | 
						|
     *          when defining elements that may be referred to through
 | 
						|
     *          URI fragment identifiers ... and make sure that the
 | 
						|
     *          value is a legal NMTOKEN, since not all such HTML 4.0
 | 
						|
     *          identifiers are valid in XML.
 | 
						|
     *  <li> Be careful with character encodings; make sure you provide
 | 
						|
     *          a <em><meta http-equiv="Content-type"
 | 
						|
     *          content="text/xml;charset=..." /></em> element in
 | 
						|
     *          the HTML "head" element, naming the same encoding
 | 
						|
     *          used to create this handler.  Also, if that encoding
 | 
						|
     *          is anything other than US-ASCII, make sure that if
 | 
						|
     *          the document is given a MIME content type, it has
 | 
						|
     *          a <em>charset=...</em> attribute with that encoding.
 | 
						|
     *  </ul>
 | 
						|
     *
 | 
						|
     * <p> Additionally, some of the oldest browsers have additional
 | 
						|
     * quirks, to address with guidelines such as: <ul>
 | 
						|
     *
 | 
						|
     *  <li> Processing instructions may be rendered, so avoid them.
 | 
						|
     *          (Similarly for an XML declaration.)
 | 
						|
     *  <li> Embedded style sheets and scripts should not contain XML
 | 
						|
     *          markup delimiters:  &, <, and ]]> are trouble.
 | 
						|
     *  <li> Attribute values should not have line breaks or multiple
 | 
						|
     *          consecutive white space characters.
 | 
						|
     *  <li> Use no more than one of the deprecated (transitional)
 | 
						|
     *          <em><isindex></em> elements.
 | 
						|
     *  <li> Some boolean attributes (such as <em>compact, checked,
 | 
						|
     *          disabled, readonly, selected,</em> and more) confuse
 | 
						|
     *          some browsers, since they only understand minimized
 | 
						|
     *          versions which are illegal in XML.
 | 
						|
     *  </ul>
 | 
						|
     *
 | 
						|
     * <p> Also, some characteristics of the resulting output may be
 | 
						|
     * a function of whether the document is later given a MIME
 | 
						|
     * content type of <em>text/html</em> rather than one indicating
 | 
						|
     * XML (<em>application/xml</em> or <em>text/xml</em>).  Worse,
 | 
						|
     * some browsers ignore MIME content types and prefer to rely URI
 | 
						|
     * name suffixes -- so an "index.xml" could always be XML, never
 | 
						|
     * XHTML, no matter its MIME type.
 | 
						|
     */
 | 
						|
    final public void setXhtml (boolean value)
 | 
						|
    {
 | 
						|
        if (locator != null)
 | 
						|
            throw new IllegalStateException ("started parsing");
 | 
						|
        xhtml = value;
 | 
						|
        if (xhtml)
 | 
						|
            canonical = false;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Returns true if the output attempts to echo the input following
 | 
						|
     * "transitional" XHTML rules and matching the "HTML Compatibility
 | 
						|
     * Guidelines" so that an HTML version 3 browser can read the output
 | 
						|
     * as HTML; returns false (the default) othewise.
 | 
						|
     */
 | 
						|
    final public boolean isXhtml ()
 | 
						|
    {
 | 
						|
        return xhtml;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Controls whether the output text contains references to
 | 
						|
     * entities (the default), or instead contains the expanded
 | 
						|
     * values of those entities.
 | 
						|
     */
 | 
						|
    final public void setExpandingEntities (boolean value)
 | 
						|
    {
 | 
						|
        if (locator != null)
 | 
						|
            throw new IllegalStateException ("started parsing");
 | 
						|
        expandingEntities = value;
 | 
						|
        if (!expandingEntities)
 | 
						|
            canonical = false;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Returns true if the output will have no entity references;
 | 
						|
     * returns false (the default) otherwise.
 | 
						|
     */
 | 
						|
    final public boolean isExpandingEntities ()
 | 
						|
    {
 | 
						|
        return expandingEntities;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Controls pretty-printing, which by default is not enabled
 | 
						|
     * (and currently is most useful for XHTML output).
 | 
						|
     * Pretty printing enables structural indentation, sorting of attributes
 | 
						|
     * by name, line wrapping, and potentially other mechanisms for making
 | 
						|
     * output more or less readable.
 | 
						|
     *
 | 
						|
     * <p> At this writing, structural indentation and line wrapping are
 | 
						|
     * enabled when pretty printing is enabled and the <em>xml:space</em>
 | 
						|
     * attribute has the value <em>default</em> (its other legal value is
 | 
						|
     * <em>preserve</em>, as defined in the XML specification).  The three
 | 
						|
     * XHTML element types which use another value are recognized by their
 | 
						|
     * names (namespaces are ignored).
 | 
						|
     *
 | 
						|
     * <p> Also, for the record, the "pretty" aspect of printing here
 | 
						|
     * is more to provide basic structure on outputs that would otherwise
 | 
						|
     * risk being a single long line of text.  For now, expect the
 | 
						|
     * structure to be ragged ... unless you'd like to submit a patch
 | 
						|
     * to make this be more strictly formatted!
 | 
						|
     *
 | 
						|
     * @exception IllegalStateException thrown if this method is invoked
 | 
						|
     *  after output has begun.
 | 
						|
     */
 | 
						|
    final public void setPrettyPrinting (boolean value)
 | 
						|
    {
 | 
						|
        if (locator != null)
 | 
						|
            throw new IllegalStateException ("started parsing");
 | 
						|
        prettyPrinting = value;
 | 
						|
        if (prettyPrinting)
 | 
						|
            canonical = false;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Returns value of flag controlling pretty printing.
 | 
						|
     */
 | 
						|
    final public boolean isPrettyPrinting ()
 | 
						|
    {
 | 
						|
        return prettyPrinting;
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
    /**
 | 
						|
     * Sets the output style to be canonicalized.  Input events must
 | 
						|
     * meet requirements that are slightly more stringent than the
 | 
						|
     * basic well-formedness ones, and include:  <ul>
 | 
						|
     *
 | 
						|
     *  <li> Namespace prefixes must not have been changed from those
 | 
						|
     *  in the original document.  (This may only be ensured by setting
 | 
						|
     *  the SAX2 XMLReader <em>namespace-prefixes</em> feature flag;
 | 
						|
     *  by default, it is cleared.)
 | 
						|
     *
 | 
						|
     *  <li> Redundant namespace declaration attributes have been
 | 
						|
     *  removed.  (If an ancestor element defines a namespace prefix
 | 
						|
     *  and that declaration hasn't been overriden, an element must
 | 
						|
     *  not redeclare it.)
 | 
						|
     *
 | 
						|
     *  <li> If comments are not to be included in the canonical output,
 | 
						|
     *  they must first be removed from the input event stream; this
 | 
						|
     *  <em>Canonical XML with comments</em> by default.
 | 
						|
     *
 | 
						|
     *  <li> If the input character encoding was not UCS-based, the
 | 
						|
     *  character data must have been normalized using Unicode
 | 
						|
     *  Normalization Form C.  (UTF-8 and UTF-16 are UCS-based.)
 | 
						|
     *
 | 
						|
     *  <li> Attribute values must have been normalized, as is done
 | 
						|
     *  by any conformant XML processor which processes all external
 | 
						|
     *  parameter entities.
 | 
						|
     *
 | 
						|
     *  <li> Similarly, attribute value defaulting has been performed.
 | 
						|
     *
 | 
						|
     *  </ul>
 | 
						|
     *
 | 
						|
     * <p> Note that fragments of XML documents, as specified by an XPath
 | 
						|
     * node set, may be canonicalized.  In such cases, elements may need
 | 
						|
     * some fixup (for <em>xml:*</em> attributes and application-specific
 | 
						|
     * context).
 | 
						|
     *
 | 
						|
     * @exception IllegalArgumentException if the output encoding
 | 
						|
     *  is anything other than UTF-8.
 | 
						|
     */
 | 
						|
    final public void setCanonical (boolean value)
 | 
						|
    {
 | 
						|
        if (value && !"UTF-8".equals (encoding))
 | 
						|
            throw new IllegalArgumentException ("encoding != UTF-8");
 | 
						|
        canonical = value;
 | 
						|
        if (canonical) {
 | 
						|
            prettyPrinting = xhtml = false;
 | 
						|
            expandingEntities = true;
 | 
						|
            eol = "\n";
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
    /**
 | 
						|
     * Returns value of flag controlling canonical output.
 | 
						|
     */
 | 
						|
    final public boolean isCanonical ()
 | 
						|
    {
 | 
						|
        return canonical;
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
    /**
 | 
						|
     * Flushes the output stream.  When this handler is used in long lived
 | 
						|
     * pipelines, it can be important to flush buffered state, for example
 | 
						|
     * so that it can reach the disk as part of a state checkpoint.
 | 
						|
     */
 | 
						|
    final public void flush ()
 | 
						|
    throws IOException
 | 
						|
    {
 | 
						|
        if (out != null)
 | 
						|
            out.flush ();
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
    // convenience routines
 | 
						|
 | 
						|
// FIXME:  probably want a subclass that holds a lot of these...
 | 
						|
// and maybe more!
 | 
						|
 | 
						|
    /**
 | 
						|
     * Writes the string as if characters() had been called on the contents
 | 
						|
     * of the string.  This is particularly useful when applications act as
 | 
						|
     * producers and write data directly to event consumers.
 | 
						|
     */
 | 
						|
    final public void write (String data)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        char    buf [] = data.toCharArray ();
 | 
						|
        characters (buf, 0, buf.length);
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
    /**
 | 
						|
     * Writes an element that has content consisting of a single string.
 | 
						|
     * @see #writeEmptyElement
 | 
						|
     * @see #startElement
 | 
						|
     */
 | 
						|
    public void writeElement (
 | 
						|
        String uri,
 | 
						|
        String localName,
 | 
						|
        String qName,
 | 
						|
        Attributes atts,
 | 
						|
        String content
 | 
						|
    ) throws SAXException
 | 
						|
    {
 | 
						|
        if (content == null || content.length () == 0) {
 | 
						|
            writeEmptyElement (uri, localName, qName, atts);
 | 
						|
            return;
 | 
						|
        }
 | 
						|
        startElement (uri, localName, qName, atts);
 | 
						|
        char chars [] = content.toCharArray ();
 | 
						|
        characters (chars, 0, chars.length);
 | 
						|
        endElement (uri, localName, qName);
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
    /**
 | 
						|
     * Writes an element that has content consisting of a single integer,
 | 
						|
     * encoded as a decimal string.
 | 
						|
     * @see #writeEmptyElement
 | 
						|
     * @see #startElement
 | 
						|
     */
 | 
						|
    public void writeElement (
 | 
						|
        String uri,
 | 
						|
        String localName,
 | 
						|
        String qName,
 | 
						|
        Attributes atts,
 | 
						|
        int content
 | 
						|
    ) throws SAXException
 | 
						|
    {
 | 
						|
        writeElement (uri, localName, qName, atts, Integer.toString (content));
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
    // SAX1 ContentHandler
 | 
						|
    /** <b>SAX1</b>:  provides parser status information */
 | 
						|
    final public void setDocumentLocator (Locator l)
 | 
						|
    {
 | 
						|
        locator = l;
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
    // URL for dtd that validates against all normal HTML constructs
 | 
						|
    private static final String xhtmlFullDTD =
 | 
						|
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";
 | 
						|
 | 
						|
 | 
						|
    /**
 | 
						|
     * <b>SAX1</b>:  indicates the beginning of a document parse.
 | 
						|
     * If you're writing (well formed) fragments of XML, neither
 | 
						|
     * this nor endDocument should be called.
 | 
						|
     */
 | 
						|
    // NOT final
 | 
						|
    public void startDocument ()
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        try {
 | 
						|
            if (out == null)
 | 
						|
                throw new IllegalStateException (
 | 
						|
                    "null Writer given to XMLWriter");
 | 
						|
 | 
						|
            // Not all parsers provide the locator we want; this also
 | 
						|
            // flags whether events are being sent to this object yet.
 | 
						|
            // We could only have this one call if we only printed whole
 | 
						|
            // documents ... but we also print fragments, so most of the
 | 
						|
            // callbacks here replicate this test.
 | 
						|
 | 
						|
            if (locator == null)
 | 
						|
                locator = new LocatorImpl ();
 | 
						|
 | 
						|
            // Unless the data is in US-ASCII or we're canonicalizing, write
 | 
						|
            // the XML declaration if we know the encoding.  US-ASCII won't
 | 
						|
            // normally get mangled by web server confusion about the
 | 
						|
            // character encodings used.  Plus, it's an easy way to
 | 
						|
            // ensure we can write ASCII that's unlikely to confuse
 | 
						|
            // elderly HTML parsers.
 | 
						|
 | 
						|
            if (!canonical
 | 
						|
                    && dangerMask != (short) 0xff80
 | 
						|
                    && encoding != null) {
 | 
						|
                rawWrite ("<?xml version='1.0'");
 | 
						|
                rawWrite (" encoding='" + encoding + "'");
 | 
						|
                rawWrite ("?>");
 | 
						|
                newline ();
 | 
						|
            }
 | 
						|
 | 
						|
            if (xhtml) {
 | 
						|
 | 
						|
                rawWrite ("<!DOCTYPE html PUBLIC");
 | 
						|
                newline ();
 | 
						|
                rawWrite ("  '-//W3C//DTD XHTML 1.0 Transitional//EN'");
 | 
						|
                newline ();
 | 
						|
                rawWrite ("  '");
 | 
						|
                    // NOTE:  URL (above) matches the REC
 | 
						|
                rawWrite (xhtmlFullDTD);
 | 
						|
                rawWrite ("'>");
 | 
						|
                newline ();
 | 
						|
                newline ();
 | 
						|
 | 
						|
                // fake the rest of the handler into ignoring
 | 
						|
                // everything until the root element, so any
 | 
						|
                // XHTML DTD comments, PIs, etc are ignored
 | 
						|
                startedDoctype = true;
 | 
						|
            }
 | 
						|
 | 
						|
            entityNestLevel = 0;
 | 
						|
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * <b>SAX1</b>:  indicates the completion of a parse.
 | 
						|
     * Note that all complete SAX event streams make this call, even
 | 
						|
     * if an error is reported during a parse.
 | 
						|
     */
 | 
						|
    // NOT final
 | 
						|
    public void endDocument ()
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        try {
 | 
						|
            if (!canonical) {
 | 
						|
                newline ();
 | 
						|
                newline ();
 | 
						|
            }
 | 
						|
            out.close ();
 | 
						|
            out = null;
 | 
						|
            locator = null;
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    // XHTML elements declared as EMPTY print differently
 | 
						|
    final private static boolean isEmptyElementTag (String tag)
 | 
						|
    {
 | 
						|
        switch (tag.charAt (0)) {
 | 
						|
          case 'a':     return "area".equals (tag);
 | 
						|
          case 'b':     return "base".equals (tag)
 | 
						|
                            || "basefont".equals (tag)
 | 
						|
                            || "br".equals (tag);
 | 
						|
          case 'c':     return "col".equals (tag);
 | 
						|
          case 'f':     return "frame".equals (tag);
 | 
						|
          case 'h':     return "hr".equals (tag);
 | 
						|
          case 'i':     return "img".equals (tag)
 | 
						|
                            || "input".equals (tag)
 | 
						|
                            || "isindex".equals (tag);
 | 
						|
          case 'l':     return "link".equals (tag);
 | 
						|
          case 'm':     return "meta".equals (tag);
 | 
						|
          case 'p':     return "param".equals (tag);
 | 
						|
        }
 | 
						|
        return false;
 | 
						|
    }
 | 
						|
 | 
						|
    private static boolean indentBefore (String tag)
 | 
						|
    {
 | 
						|
        // basically indent before block content
 | 
						|
        // and within structure like tables, lists
 | 
						|
        switch (tag.charAt (0)) {
 | 
						|
          case 'a':     return "applet".equals (tag);
 | 
						|
          case 'b':     return "body".equals (tag)
 | 
						|
                            || "blockquote".equals (tag);
 | 
						|
          case 'c':     return "center".equals (tag);
 | 
						|
          case 'f':     return "frame".equals (tag)
 | 
						|
                            || "frameset".equals (tag);
 | 
						|
          case 'h':     return "head".equals (tag);
 | 
						|
          case 'm':     return "meta".equals (tag);
 | 
						|
          case 'o':     return "object".equals (tag);
 | 
						|
          case 'p':     return "param".equals (tag)
 | 
						|
                            || "pre".equals (tag);
 | 
						|
          case 's':     return "style".equals (tag);
 | 
						|
          case 't':     return "title".equals (tag)
 | 
						|
                            || "td".equals (tag)
 | 
						|
                            || "th".equals (tag);
 | 
						|
        }
 | 
						|
        // ... but not inline elements like "em", "b", "font"
 | 
						|
        return false;
 | 
						|
    }
 | 
						|
 | 
						|
    private static boolean spaceBefore (String tag)
 | 
						|
    {
 | 
						|
        // blank line AND INDENT before certain structural content
 | 
						|
        switch (tag.charAt (0)) {
 | 
						|
          case 'h':     return "h1".equals (tag)
 | 
						|
                            || "h2".equals (tag)
 | 
						|
                            || "h3".equals (tag)
 | 
						|
                            || "h4".equals (tag)
 | 
						|
                            || "h5".equals (tag)
 | 
						|
                            || "h6".equals (tag)
 | 
						|
                            || "hr".equals (tag);
 | 
						|
          case 'l':     return "li".equals (tag);
 | 
						|
          case 'o':     return "ol".equals (tag);
 | 
						|
          case 'p':     return "p".equals (tag);
 | 
						|
          case 't':     return "table".equals (tag)
 | 
						|
                            || "tr".equals (tag);
 | 
						|
          case 'u':     return "ul".equals (tag);
 | 
						|
        }
 | 
						|
        return false;
 | 
						|
    }
 | 
						|
 | 
						|
    // XHTML DTDs say these three have xml:space="preserve"
 | 
						|
    private static boolean spacePreserve (String tag)
 | 
						|
    {
 | 
						|
        return "pre".equals (tag)
 | 
						|
                || "style".equals (tag)
 | 
						|
                || "script".equals (tag);
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * <b>SAX2</b>:  ignored.
 | 
						|
     */
 | 
						|
    final public void startPrefixMapping (String prefix, String uri)
 | 
						|
        {}
 | 
						|
 | 
						|
    /**
 | 
						|
     * <b>SAX2</b>:  ignored.
 | 
						|
     */
 | 
						|
    final public void endPrefixMapping (String prefix)
 | 
						|
        {}
 | 
						|
 | 
						|
    private void writeStartTag (
 | 
						|
        String name,
 | 
						|
        Attributes atts,
 | 
						|
        boolean isEmpty
 | 
						|
    ) throws SAXException, IOException
 | 
						|
    {
 | 
						|
        rawWrite ('<');
 | 
						|
        rawWrite (name);
 | 
						|
 | 
						|
        // write out attributes ... sorting is particularly useful
 | 
						|
        // with output that's been heavily defaulted.
 | 
						|
        if (atts != null && atts.getLength () != 0) {
 | 
						|
 | 
						|
            // Set up to write, with optional sorting
 | 
						|
            int         indices [] = new int [atts.getLength ()];
 | 
						|
 | 
						|
            for (int i= 0; i < indices.length; i++)
 | 
						|
                indices [i] = i;
 | 
						|
 | 
						|
            // optionally sort
 | 
						|
 | 
						|
// FIXME:  canon xml demands xmlns nodes go first,
 | 
						|
// and sorting by URI first (empty first) then localname
 | 
						|
// it should maybe use a different sort
 | 
						|
 | 
						|
            if (canonical || prettyPrinting) {
 | 
						|
 | 
						|
                // insertion sort by attribute name
 | 
						|
                for (int i = 1; i < indices.length; i++) {
 | 
						|
                    int n = indices [i], j;
 | 
						|
                    String      s = atts.getQName (n);
 | 
						|
 | 
						|
                    for (j = i - 1; j >= 0; j--) {
 | 
						|
                        if (s.compareTo (atts.getQName (indices [j]))
 | 
						|
                                >= 0)
 | 
						|
                            break;
 | 
						|
                        indices [j + 1] = indices [j];
 | 
						|
                    }
 | 
						|
                    indices [j + 1] = n;
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            // write, sorted or no
 | 
						|
            for (int i= 0; i < indices.length; i++) {
 | 
						|
                String  s = atts.getQName (indices [i]);
 | 
						|
 | 
						|
                    if (s == null || "".equals (s))
 | 
						|
                        throw new IllegalArgumentException ("no XML name");
 | 
						|
                rawWrite (" ");
 | 
						|
                rawWrite (s);
 | 
						|
                rawWrite ("=");
 | 
						|
                writeQuotedValue (atts.getValue (indices [i]),
 | 
						|
                    CTX_ATTRIBUTE);
 | 
						|
            }
 | 
						|
        }
 | 
						|
        if (isEmpty)
 | 
						|
            rawWrite (" /");
 | 
						|
        rawWrite ('>');
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * <b>SAX2</b>:  indicates the start of an element.
 | 
						|
     * When XHTML is in use, avoid attribute values with
 | 
						|
     * line breaks or multiple whitespace characters, since
 | 
						|
     * not all user agents handle them correctly.
 | 
						|
     */
 | 
						|
    final public void startElement (
 | 
						|
        String uri,
 | 
						|
        String localName,
 | 
						|
        String qName,
 | 
						|
        Attributes atts
 | 
						|
    ) throws SAXException
 | 
						|
    {
 | 
						|
        startedDoctype = false;
 | 
						|
 | 
						|
        if (locator == null)
 | 
						|
            locator = new LocatorImpl ();
 | 
						|
 | 
						|
        if (qName == null || "".equals (qName))
 | 
						|
            throw new IllegalArgumentException ("no XML name");
 | 
						|
 | 
						|
        try {
 | 
						|
            if (entityNestLevel != 0)
 | 
						|
                return;
 | 
						|
            if (prettyPrinting) {
 | 
						|
                String whitespace = null;
 | 
						|
 | 
						|
                if (xhtml && spacePreserve (qName))
 | 
						|
                    whitespace = "preserve";
 | 
						|
                else if (atts != null)
 | 
						|
                    whitespace = atts.getValue ("xml:space");
 | 
						|
                if (whitespace == null)
 | 
						|
                    whitespace = (String) space.peek ();
 | 
						|
                space.push (whitespace);
 | 
						|
 | 
						|
                if ("default".equals (whitespace)) {
 | 
						|
                    if (xhtml) {
 | 
						|
                        if (spaceBefore (qName)) {
 | 
						|
                            newline ();
 | 
						|
                            doIndent ();
 | 
						|
                        } else if (indentBefore (qName))
 | 
						|
                            doIndent ();
 | 
						|
                        // else it's inlined, modulo line length
 | 
						|
                        // FIXME: incrementing element nest level
 | 
						|
                        // for inlined elements causes ugliness
 | 
						|
                    } else
 | 
						|
                        doIndent ();
 | 
						|
                }
 | 
						|
            }
 | 
						|
            elementNestLevel++;
 | 
						|
            writeStartTag (qName, atts, xhtml && isEmptyElementTag (qName));
 | 
						|
 | 
						|
            if (xhtml) {
 | 
						|
// FIXME: if this is an XHTML "pre" element, turn
 | 
						|
// off automatic wrapping.
 | 
						|
            }
 | 
						|
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Writes an empty element.
 | 
						|
     * @see #startElement
 | 
						|
     */
 | 
						|
    public void writeEmptyElement (
 | 
						|
        String uri,
 | 
						|
        String localName,
 | 
						|
        String qName,
 | 
						|
        Attributes atts
 | 
						|
    ) throws SAXException
 | 
						|
    {
 | 
						|
        if (canonical) {
 | 
						|
            startElement (uri, localName, qName, atts);
 | 
						|
            endElement (uri, localName, qName);
 | 
						|
        } else {
 | 
						|
            try {
 | 
						|
                writeStartTag (qName, atts, true);
 | 
						|
            } catch (IOException e) {
 | 
						|
                fatal ("can't write", e);
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
    /** <b>SAX2</b>:  indicates the end of an element */
 | 
						|
    final public void endElement (String uri, String localName, String qName)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (qName == null || "".equals (qName))
 | 
						|
            throw new IllegalArgumentException ("no XML name");
 | 
						|
 | 
						|
        try {
 | 
						|
            elementNestLevel--;
 | 
						|
            if (entityNestLevel != 0)
 | 
						|
                return;
 | 
						|
            if (xhtml && isEmptyElementTag (qName))
 | 
						|
                return;
 | 
						|
            rawWrite ("</");
 | 
						|
            rawWrite (qName);
 | 
						|
            rawWrite ('>');
 | 
						|
 | 
						|
            if (prettyPrinting) {
 | 
						|
                if (!space.empty ())
 | 
						|
                    space.pop ();
 | 
						|
                else
 | 
						|
                    fatal ("stack discipline", null);
 | 
						|
            }
 | 
						|
            if (elementNestLevel == 0)
 | 
						|
                inEpilogue = true;
 | 
						|
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /** <b>SAX1</b>:  reports content characters */
 | 
						|
    final public void characters (char ch [], int start, int length)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (locator == null)
 | 
						|
            locator = new LocatorImpl ();
 | 
						|
 | 
						|
        try {
 | 
						|
            if (entityNestLevel != 0)
 | 
						|
                return;
 | 
						|
            if (inCDATA) {
 | 
						|
                escapeChars (ch, start, length, CTX_UNPARSED);
 | 
						|
            } else {
 | 
						|
                escapeChars (ch, start, length, CTX_CONTENT);
 | 
						|
            }
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /** <b>SAX1</b>:  reports ignorable whitespace */
 | 
						|
    final public void ignorableWhitespace (char ch [], int start, int length)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (locator == null)
 | 
						|
            locator = new LocatorImpl ();
 | 
						|
 | 
						|
        try {
 | 
						|
            if (entityNestLevel != 0)
 | 
						|
                return;
 | 
						|
            // don't forget to map NL to CRLF, CR, etc
 | 
						|
            escapeChars (ch, start, length, CTX_CONTENT);
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * <b>SAX1</b>:  reports a PI.
 | 
						|
     * This doesn't check for illegal target names, such as "xml" or "XML",
 | 
						|
     * or namespace-incompatible ones like "big:dog"; the caller is
 | 
						|
     * responsible for ensuring those names are legal.
 | 
						|
     */
 | 
						|
    final public void processingInstruction (String target, String data)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (locator == null)
 | 
						|
            locator = new LocatorImpl ();
 | 
						|
 | 
						|
        // don't print internal subset for XHTML
 | 
						|
        if (xhtml && startedDoctype)
 | 
						|
            return;
 | 
						|
 | 
						|
        // ancient HTML browsers might render these ... their loss.
 | 
						|
        // to prevent:  "if (xhtml) return;".
 | 
						|
 | 
						|
        try {
 | 
						|
            if (entityNestLevel != 0)
 | 
						|
                return;
 | 
						|
            if (canonical && inEpilogue)
 | 
						|
                newline ();
 | 
						|
            rawWrite ("<?");
 | 
						|
            rawWrite (target);
 | 
						|
            rawWrite (' ');
 | 
						|
            escapeChars (data.toCharArray (), -1, -1, CTX_UNPARSED);
 | 
						|
            rawWrite ("?>");
 | 
						|
            if (elementNestLevel == 0 && !(canonical && inEpilogue))
 | 
						|
                newline ();
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /** <b>SAX1</b>: indicates a non-expanded entity reference */
 | 
						|
    public void skippedEntity (String name)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        try {
 | 
						|
            rawWrite ("&");
 | 
						|
            rawWrite (name);
 | 
						|
            rawWrite (";");
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    // SAX2 LexicalHandler
 | 
						|
 | 
						|
    /** <b>SAX2</b>:  called before parsing CDATA characters */
 | 
						|
    final public void startCDATA ()
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (locator == null)
 | 
						|
            locator = new LocatorImpl ();
 | 
						|
 | 
						|
        if (canonical)
 | 
						|
            return;
 | 
						|
 | 
						|
        try {
 | 
						|
            inCDATA = true;
 | 
						|
            if (entityNestLevel == 0)
 | 
						|
                rawWrite ("<![CDATA[");
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /** <b>SAX2</b>:  called after parsing CDATA characters */
 | 
						|
    final public void endCDATA ()
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (canonical)
 | 
						|
            return;
 | 
						|
 | 
						|
        try {
 | 
						|
            inCDATA = false;
 | 
						|
            if (entityNestLevel == 0)
 | 
						|
                rawWrite ("]]>");
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * <b>SAX2</b>:  called when the doctype is partially parsed
 | 
						|
     * Note that this, like other doctype related calls, is ignored
 | 
						|
     * when XHTML is in use.
 | 
						|
     */
 | 
						|
    final public void startDTD (String name, String publicId, String systemId)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (locator == null)
 | 
						|
            locator = new LocatorImpl ();
 | 
						|
        if (xhtml)
 | 
						|
            return;
 | 
						|
        try {
 | 
						|
            inDoctype = startedDoctype = true;
 | 
						|
            if (canonical)
 | 
						|
                return;
 | 
						|
            rawWrite ("<!DOCTYPE ");
 | 
						|
            rawWrite (name);
 | 
						|
            rawWrite (' ');
 | 
						|
 | 
						|
            if (!expandingEntities) {
 | 
						|
                if (publicId != null)
 | 
						|
                    rawWrite ("PUBLIC '" + publicId + "' '" + systemId + "' ");
 | 
						|
                else if (systemId != null)
 | 
						|
                    rawWrite ("SYSTEM '" + systemId + "' ");
 | 
						|
            }
 | 
						|
 | 
						|
            rawWrite ('[');
 | 
						|
            newline ();
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /** <b>SAX2</b>:  called after the doctype is parsed */
 | 
						|
    final public void endDTD ()
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        inDoctype = false;
 | 
						|
        if (canonical || xhtml)
 | 
						|
            return;
 | 
						|
        try {
 | 
						|
            rawWrite ("]>");
 | 
						|
            newline ();
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * <b>SAX2</b>:  called before parsing a general entity in content
 | 
						|
     */
 | 
						|
    final public void startEntity (String name)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        try {
 | 
						|
            boolean     writeEOL = true;
 | 
						|
 | 
						|
            // Predefined XHTML entities (for characters) will get
 | 
						|
            // mapped back later.
 | 
						|
            if (xhtml || expandingEntities)
 | 
						|
                return;
 | 
						|
 | 
						|
            entityNestLevel++;
 | 
						|
            if (name.equals ("[dtd]"))
 | 
						|
                return;
 | 
						|
            if (entityNestLevel != 1)
 | 
						|
                return;
 | 
						|
            if (!name.startsWith ("%")) {
 | 
						|
                writeEOL = false;
 | 
						|
                rawWrite ('&');
 | 
						|
            }
 | 
						|
            rawWrite (name);
 | 
						|
            rawWrite (';');
 | 
						|
            if (writeEOL)
 | 
						|
                newline ();
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * <b>SAX2</b>:  called after parsing a general entity in content
 | 
						|
     */
 | 
						|
    final public void endEntity (String name)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (xhtml || expandingEntities)
 | 
						|
            return;
 | 
						|
        entityNestLevel--;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * <b>SAX2</b>:  called when comments are parsed.
 | 
						|
     * When XHTML is used, the old HTML tradition of using comments
 | 
						|
     * to for inline CSS, or for JavaScript code is  discouraged.
 | 
						|
     * This is because XML processors are encouraged to discard, on
 | 
						|
     * the grounds that comments are for users (and perhaps text
 | 
						|
     * editors) not programs.  Instead, use external scripts
 | 
						|
     */
 | 
						|
    final public void comment (char ch [], int start, int length)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (locator == null)
 | 
						|
            locator = new LocatorImpl ();
 | 
						|
 | 
						|
        // don't print internal subset for XHTML
 | 
						|
        if (xhtml && startedDoctype)
 | 
						|
            return;
 | 
						|
        // don't print comment in doctype for canon xml
 | 
						|
        if (canonical && inDoctype)
 | 
						|
            return;
 | 
						|
 | 
						|
        try {
 | 
						|
            boolean indent;
 | 
						|
 | 
						|
            if (prettyPrinting && space.empty ())
 | 
						|
                fatal ("stack discipline", null);
 | 
						|
            indent = prettyPrinting && "default".equals (space.peek ());
 | 
						|
            if (entityNestLevel != 0)
 | 
						|
                return;
 | 
						|
            if (indent)
 | 
						|
                doIndent ();
 | 
						|
            if (canonical && inEpilogue)
 | 
						|
                newline ();
 | 
						|
            rawWrite ("<!--");
 | 
						|
            escapeChars (ch, start, length, CTX_UNPARSED);
 | 
						|
            rawWrite ("-->");
 | 
						|
            if (indent)
 | 
						|
                doIndent ();
 | 
						|
            if (elementNestLevel == 0 && !(canonical && inEpilogue))
 | 
						|
                newline ();
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    // SAX1 DTDHandler
 | 
						|
 | 
						|
    /** <b>SAX1</b>:  called on notation declarations */
 | 
						|
    final public void notationDecl (String name,
 | 
						|
        String publicId, String systemId)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (xhtml)
 | 
						|
            return;
 | 
						|
        try {
 | 
						|
            // At this time, only SAX2 callbacks start these.
 | 
						|
            if (!startedDoctype)
 | 
						|
                return;
 | 
						|
 | 
						|
            if (entityNestLevel != 0)
 | 
						|
                return;
 | 
						|
            rawWrite ("<!NOTATION " + name + " ");
 | 
						|
            if (publicId != null)
 | 
						|
                rawWrite ("PUBLIC \"" + publicId + '"');
 | 
						|
            else
 | 
						|
                rawWrite ("SYSTEM ");
 | 
						|
            if (systemId != null)
 | 
						|
                rawWrite ('"' + systemId + '"');
 | 
						|
            rawWrite (">");
 | 
						|
            newline ();
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /** <b>SAX1</b>:  called on unparsed entity declarations */
 | 
						|
    final public void unparsedEntityDecl (String name,
 | 
						|
        String publicId, String systemId,
 | 
						|
        String notationName)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (xhtml)
 | 
						|
            return;
 | 
						|
        try {
 | 
						|
            // At this time, only SAX2 callbacks start these.
 | 
						|
            if (!startedDoctype)  {
 | 
						|
                // FIXME: write to temporary buffer, and make the start
 | 
						|
                // of the root element write these declarations.
 | 
						|
                return;
 | 
						|
            }
 | 
						|
 | 
						|
            if (entityNestLevel != 0)
 | 
						|
                return;
 | 
						|
            rawWrite ("<!ENTITY " + name + " ");
 | 
						|
            if (publicId != null)
 | 
						|
                rawWrite ("PUBLIC \"" + publicId + '"');
 | 
						|
            else
 | 
						|
                rawWrite ("SYSTEM ");
 | 
						|
            rawWrite ('"' + systemId + '"');
 | 
						|
            rawWrite (" NDATA " + notationName + ">");
 | 
						|
            newline ();
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    // SAX2 DeclHandler
 | 
						|
 | 
						|
    /** <b>SAX2</b>:  called on attribute declarations */
 | 
						|
    final public void attributeDecl (String eName, String aName,
 | 
						|
            String type, String mode, String value)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (xhtml)
 | 
						|
            return;
 | 
						|
        try {
 | 
						|
            // At this time, only SAX2 callbacks start these.
 | 
						|
            if (!startedDoctype)
 | 
						|
                return;
 | 
						|
            if (entityNestLevel != 0)
 | 
						|
                return;
 | 
						|
            rawWrite ("<!ATTLIST " + eName + ' ' + aName + ' ');
 | 
						|
            rawWrite (type);
 | 
						|
            rawWrite (' ');
 | 
						|
            if (mode != null)
 | 
						|
                rawWrite (mode + ' ');
 | 
						|
            if (value != null)
 | 
						|
                writeQuotedValue (value, CTX_ATTRIBUTE);
 | 
						|
            rawWrite ('>');
 | 
						|
            newline ();
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /** <b>SAX2</b>:  called on element declarations */
 | 
						|
    final public void elementDecl (String name, String model)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (xhtml)
 | 
						|
            return;
 | 
						|
        try {
 | 
						|
            // At this time, only SAX2 callbacks start these.
 | 
						|
            if (!startedDoctype)
 | 
						|
                return;
 | 
						|
            if (entityNestLevel != 0)
 | 
						|
                return;
 | 
						|
            rawWrite ("<!ELEMENT " + name + ' ' + model + '>');
 | 
						|
            newline ();
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /** <b>SAX2</b>:  called on external entity declarations */
 | 
						|
    final public void externalEntityDecl (
 | 
						|
        String name,
 | 
						|
        String publicId,
 | 
						|
        String systemId)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (xhtml)
 | 
						|
            return;
 | 
						|
        try {
 | 
						|
            // At this time, only SAX2 callbacks start these.
 | 
						|
            if (!startedDoctype)
 | 
						|
                return;
 | 
						|
            if (entityNestLevel != 0)
 | 
						|
                return;
 | 
						|
            rawWrite ("<!ENTITY ");
 | 
						|
            if (name.startsWith ("%")) {
 | 
						|
                rawWrite ("% ");
 | 
						|
                rawWrite (name.substring (1));
 | 
						|
            } else
 | 
						|
                rawWrite (name);
 | 
						|
            if (publicId != null)
 | 
						|
                rawWrite (" PUBLIC \"" + publicId + '"');
 | 
						|
            else
 | 
						|
                rawWrite (" SYSTEM ");
 | 
						|
            rawWrite ('"' + systemId + "\">");
 | 
						|
            newline ();
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /** <b>SAX2</b>:  called on internal entity declarations */
 | 
						|
    final public void internalEntityDecl (String name, String value)
 | 
						|
    throws SAXException
 | 
						|
    {
 | 
						|
        if (xhtml)
 | 
						|
            return;
 | 
						|
        try {
 | 
						|
            // At this time, only SAX2 callbacks start these.
 | 
						|
            if (!startedDoctype)
 | 
						|
                return;
 | 
						|
            if (entityNestLevel != 0)
 | 
						|
                return;
 | 
						|
            rawWrite ("<!ENTITY ");
 | 
						|
            if (name.startsWith ("%")) {
 | 
						|
                rawWrite ("% ");
 | 
						|
                rawWrite (name.substring (1));
 | 
						|
            } else
 | 
						|
                rawWrite (name);
 | 
						|
            rawWrite (' ');
 | 
						|
            writeQuotedValue (value, CTX_ENTITY);
 | 
						|
            rawWrite ('>');
 | 
						|
            newline ();
 | 
						|
        } catch (IOException e) {
 | 
						|
            fatal ("can't write", e);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    private void writeQuotedValue (String value, int code)
 | 
						|
    throws SAXException, IOException
 | 
						|
    {
 | 
						|
        char    buf [] = value.toCharArray ();
 | 
						|
        int     off = 0, len = buf.length;
 | 
						|
 | 
						|
        // we can't add line breaks to attribute/entity/... values
 | 
						|
        noWrap = true;
 | 
						|
        rawWrite ('"');
 | 
						|
        escapeChars (buf, off, len, code);
 | 
						|
        rawWrite ('"');
 | 
						|
        noWrap = false;
 | 
						|
    }
 | 
						|
 | 
						|
    // From "HTMLlat1x.ent" ... names of entities for ISO-8859-1
 | 
						|
    // (Latin/1) characters, all codes:  160-255 (0xA0-0xFF).
 | 
						|
    // Codes 128-159 have no assigned values.
 | 
						|
    private static final String HTMLlat1x [] = {
 | 
						|
        // 160
 | 
						|
        "nbsp", "iexcl", "cent", "pound", "curren",
 | 
						|
        "yen", "brvbar", "sect", "uml", "copy",
 | 
						|
 | 
						|
        // 170
 | 
						|
        "ordf", "laquo", "not", "shy", "reg",
 | 
						|
        "macr", "deg", "plusmn", "sup2", "sup3",
 | 
						|
 | 
						|
        // 180
 | 
						|
        "acute", "micro", "para", "middot", "cedil",
 | 
						|
        "sup1", "ordm", "raquo", "frac14", "frac12",
 | 
						|
 | 
						|
        // 190
 | 
						|
        "frac34", "iquest", "Agrave", "Aacute", "Acirc",
 | 
						|
        "Atilde", "Auml", "Aring", "AElig", "Ccedil",
 | 
						|
 | 
						|
        // 200
 | 
						|
        "Egrave", "Eacute", "Ecirc", "Euml", "Igrave",
 | 
						|
        "Iacute", "Icirc", "Iuml", "ETH", "Ntilde",
 | 
						|
 | 
						|
        // 210
 | 
						|
        "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml",
 | 
						|
        "times", "Oslash", "Ugrave", "Uacute", "Ucirc",
 | 
						|
 | 
						|
        // 220
 | 
						|
        "Uuml", "Yacute", "THORN", "szlig", "agrave",
 | 
						|
        "aacute", "acirc", "atilde", "auml", "aring",
 | 
						|
 | 
						|
        // 230
 | 
						|
        "aelig", "ccedil", "egrave", "eacute", "ecirc",
 | 
						|
        "euml", "igrave", "iacute", "icirc", "iuml",
 | 
						|
 | 
						|
        // 240
 | 
						|
        "eth", "ntilde", "ograve", "oacute", "ocirc",
 | 
						|
        "otilde", "ouml", "divide", "oslash", "ugrave",
 | 
						|
 | 
						|
        // 250
 | 
						|
        "uacute", "ucirc", "uuml", "yacute", "thorn",
 | 
						|
        "yuml"
 | 
						|
    };
 | 
						|
 | 
						|
    // From "HTMLsymbolx.ent" ... some of the symbols that
 | 
						|
    // we can conveniently handle.  Entities for the Greek.
 | 
						|
    // alphabet (upper and lower cases) are compact.
 | 
						|
    private static final String HTMLsymbolx_GR [] = {
 | 
						|
        // 913
 | 
						|
        "Alpha", "Beta", "Gamma", "Delta", "Epsilon",
 | 
						|
        "Zeta", "Eta", "Theta", "Iota", "Kappa",
 | 
						|
 | 
						|
        // 923
 | 
						|
        "Lambda", "Mu", "Nu", "Xi", "Omicron",
 | 
						|
        "Pi", "Rho", null, "Sigma", "Tau",
 | 
						|
 | 
						|
        // 933
 | 
						|
        "Upsilon", "Phi", "Chi", "Psi", "Omega"
 | 
						|
    };
 | 
						|
 | 
						|
    private static final String HTMLsymbolx_gr [] = {
 | 
						|
        // 945
 | 
						|
        "alpha", "beta", "gamma", "delta", "epsilon",
 | 
						|
        "zeta", "eta", "theta", "iota", "kappa",
 | 
						|
 | 
						|
        // 955
 | 
						|
        "lambda", "mu", "nu", "xi", "omicron",
 | 
						|
        "pi", "rho", "sigmaf", "sigma", "tau",
 | 
						|
 | 
						|
        // 965
 | 
						|
        "upsilon", "phi", "chi", "psi", "omega"
 | 
						|
    };
 | 
						|
 | 
						|
 | 
						|
    // General routine to write text and substitute predefined
 | 
						|
    // entities (XML, and a special case for XHTML) as needed.
 | 
						|
    private void escapeChars (char buf [], int off, int len, int code)
 | 
						|
    throws SAXException, IOException
 | 
						|
    {
 | 
						|
        int     first = 0;
 | 
						|
 | 
						|
        if (off < 0) {
 | 
						|
            off = 0;
 | 
						|
            len = buf.length;
 | 
						|
        }
 | 
						|
        for (int i = 0; i < len; i++) {
 | 
						|
            String      esc;
 | 
						|
            char        c = buf [off + i];
 | 
						|
 | 
						|
            switch (c) {
 | 
						|
              // Note that CTX_ATTRIBUTE isn't explicitly tested here;
 | 
						|
              // all syntax delimiters are escaped in CTX_ATTRIBUTE,
 | 
						|
              // otherwise it's similar to CTX_CONTENT
 | 
						|
 | 
						|
              // ampersand flags entity references; entity replacement
 | 
						|
              // text has unexpanded references, other text doesn't.
 | 
						|
              case '&':
 | 
						|
                if (code == CTX_ENTITY || code == CTX_UNPARSED)
 | 
						|
                    continue;
 | 
						|
                esc = "amp";
 | 
						|
                break;
 | 
						|
 | 
						|
              // attributes and text may NOT have literal '<', but
 | 
						|
              // entities may have markup constructs
 | 
						|
              case '<':
 | 
						|
                if (code == CTX_ENTITY || code == CTX_UNPARSED)
 | 
						|
                    continue;
 | 
						|
                esc = "lt";
 | 
						|
                break;
 | 
						|
 | 
						|
              // as above re markup constructs; but otherwise
 | 
						|
              // except when canonicalizing, this is for consistency
 | 
						|
              case '>':
 | 
						|
                if (code == CTX_ENTITY || code == CTX_UNPARSED)
 | 
						|
                    continue;
 | 
						|
                esc = "gt";
 | 
						|
                break;
 | 
						|
              case '\'':
 | 
						|
                if (code == CTX_CONTENT || code == CTX_UNPARSED)
 | 
						|
                    continue;
 | 
						|
                if (canonical)
 | 
						|
                    continue;
 | 
						|
                esc = "apos";
 | 
						|
                break;
 | 
						|
 | 
						|
              // needed when printing quoted attribute/entity values
 | 
						|
              case '"':
 | 
						|
                if (code == CTX_CONTENT || code == CTX_UNPARSED)
 | 
						|
                    continue;
 | 
						|
                esc = "quot";
 | 
						|
                break;
 | 
						|
 | 
						|
              // make line ends work per host OS convention
 | 
						|
              case '\n':
 | 
						|
                esc = eol;
 | 
						|
                break;
 | 
						|
 | 
						|
              //
 | 
						|
              // No other characters NEED special treatment ... except
 | 
						|
              // for encoding-specific issues, like whether the character
 | 
						|
              // can really be represented in that encoding.
 | 
						|
              //
 | 
						|
              default:
 | 
						|
                //
 | 
						|
                // There are characters we can never write safely; getting
 | 
						|
                // them is an error.
 | 
						|
                //
 | 
						|
                //   (a) They're never legal in XML ... detected by range
 | 
						|
                //      checks, and (eventually) by remerging surrogate
 | 
						|
                //      pairs on output.  (Easy error for apps to prevent.)
 | 
						|
                //
 | 
						|
                //   (b) This encoding can't represent them, and we
 | 
						|
                //      can't make reference substitution (e.g. inside
 | 
						|
                //      CDATA sections, names, PI data, etc).  (Hard for
 | 
						|
                //      apps to prevent, except by using UTF-8 or UTF-16
 | 
						|
                //      as their output encoding.)
 | 
						|
                //
 | 
						|
                // We know a very little bit about what characters
 | 
						|
                // the US-ASCII and ISO-8859-1 encodings support.  For
 | 
						|
                // other encodings we can't detect the second type of
 | 
						|
                // error at all.  (Never an issue for UTF-8 or UTF-16.)
 | 
						|
                //
 | 
						|
 | 
						|
// FIXME:  CR in CDATA is an error; in text, turn to a char ref
 | 
						|
 | 
						|
// FIXME:  CR/LF/TAB in attributes should become char refs
 | 
						|
 | 
						|
                if ((c > 0xfffd)
 | 
						|
                        || ((c < 0x0020) && !((c == 0x0009)
 | 
						|
                                || (c == 0x000A) || (c == 0x000D)))
 | 
						|
                        || (((c & dangerMask) != 0)
 | 
						|
                            && (code == CTX_UNPARSED))) {
 | 
						|
 | 
						|
                    // if case (b) in CDATA, we might end the section,
 | 
						|
                    // write a reference, then restart ... possible
 | 
						|
                    // in one DOM L3 draft.
 | 
						|
 | 
						|
                    throw new CharConversionException (
 | 
						|
                            "Illegal or non-writable character: U+"
 | 
						|
                            + Integer.toHexString (c));
 | 
						|
                }
 | 
						|
 | 
						|
                //
 | 
						|
                // If the output encoding represents the character
 | 
						|
                // directly, let it do so!  Else we'll escape it.
 | 
						|
                //
 | 
						|
                if ((c & dangerMask) == 0)
 | 
						|
                    continue;
 | 
						|
                esc = null;
 | 
						|
 | 
						|
                // Avoid numeric refs where symbolic ones exist, as
 | 
						|
                // symbolic ones make more sense to humans reading!
 | 
						|
                if (xhtml) {
 | 
						|
                    // all the HTMLlat1x.ent entities
 | 
						|
                    // (all the "ISO-8859-1" characters)
 | 
						|
                    if (c >= 160 && c <= 255)
 | 
						|
                        esc = HTMLlat1x [c - 160];
 | 
						|
 | 
						|
                    // not quite half the HTMLsymbolx.ent entities
 | 
						|
                    else if (c >= 913 && c <= 937)
 | 
						|
                        esc = HTMLsymbolx_GR [c - 913];
 | 
						|
                    else if (c >= 945 && c <= 969)
 | 
						|
                        esc = HTMLsymbolx_gr [c - 945];
 | 
						|
 | 
						|
                    else switch (c) {
 | 
						|
                        // all of the HTMLspecialx.ent entities
 | 
						|
                        case  338: esc = "OElig";       break;
 | 
						|
                        case  339: esc = "oelig";       break;
 | 
						|
                        case  352: esc = "Scaron";      break;
 | 
						|
                        case  353: esc = "scaron";      break;
 | 
						|
                        case  376: esc = "Yuml";        break;
 | 
						|
                        case  710: esc = "circ";        break;
 | 
						|
                        case  732: esc = "tilde";       break;
 | 
						|
                        case 8194: esc = "ensp";        break;
 | 
						|
                        case 8195: esc = "emsp";        break;
 | 
						|
                        case 8201: esc = "thinsp";      break;
 | 
						|
                        case 8204: esc = "zwnj";        break;
 | 
						|
                        case 8205: esc = "zwj";         break;
 | 
						|
                        case 8206: esc = "lrm";         break;
 | 
						|
                        case 8207: esc = "rlm";         break;
 | 
						|
                        case 8211: esc = "ndash";       break;
 | 
						|
                        case 8212: esc = "mdash";       break;
 | 
						|
                        case 8216: esc = "lsquo";       break;
 | 
						|
                        case 8217: esc = "rsquo";       break;
 | 
						|
                        case 8218: esc = "sbquo";       break;
 | 
						|
                        case 8220: esc = "ldquo";       break;
 | 
						|
                        case 8221: esc = "rdquo";       break;
 | 
						|
                        case 8222: esc = "bdquo";       break;
 | 
						|
                        case 8224: esc = "dagger";      break;
 | 
						|
                        case 8225: esc = "Dagger";      break;
 | 
						|
                        case 8240: esc = "permil";      break;
 | 
						|
                        case 8249: esc = "lsaquo";      break;
 | 
						|
                        case 8250: esc = "rsaquo";      break;
 | 
						|
                        case 8364: esc = "euro";        break;
 | 
						|
 | 
						|
                        // the other HTMLsymbox.ent entities
 | 
						|
                        case  402: esc = "fnof";        break;
 | 
						|
                        case  977: esc = "thetasym";    break;
 | 
						|
                        case  978: esc = "upsih";       break;
 | 
						|
                        case  982: esc = "piv";         break;
 | 
						|
                        case 8226: esc = "bull";        break;
 | 
						|
                        case 8230: esc = "hellip";      break;
 | 
						|
                        case 8242: esc = "prime";       break;
 | 
						|
                        case 8243: esc = "Prime";       break;
 | 
						|
                        case 8254: esc = "oline";       break;
 | 
						|
                        case 8260: esc = "frasl";       break;
 | 
						|
                        case 8472: esc = "weierp";      break;
 | 
						|
                        case 8465: esc = "image";       break;
 | 
						|
                        case 8476: esc = "real";        break;
 | 
						|
                        case 8482: esc = "trade";       break;
 | 
						|
                        case 8501: esc = "alefsym";     break;
 | 
						|
                        case 8592: esc = "larr";        break;
 | 
						|
                        case 8593: esc = "uarr";        break;
 | 
						|
                        case 8594: esc = "rarr";        break;
 | 
						|
                        case 8595: esc = "darr";        break;
 | 
						|
                        case 8596: esc = "harr";        break;
 | 
						|
                        case 8629: esc = "crarr";       break;
 | 
						|
                        case 8656: esc = "lArr";        break;
 | 
						|
                        case 8657: esc = "uArr";        break;
 | 
						|
                        case 8658: esc = "rArr";        break;
 | 
						|
                        case 8659: esc = "dArr";        break;
 | 
						|
                        case 8660: esc = "hArr";        break;
 | 
						|
                        case 8704: esc = "forall";      break;
 | 
						|
                        case 8706: esc = "part";        break;
 | 
						|
                        case 8707: esc = "exist";       break;
 | 
						|
                        case 8709: esc = "empty";       break;
 | 
						|
                        case 8711: esc = "nabla";       break;
 | 
						|
                        case 8712: esc = "isin";        break;
 | 
						|
                        case 8713: esc = "notin";       break;
 | 
						|
                        case 8715: esc = "ni";          break;
 | 
						|
                        case 8719: esc = "prod";        break;
 | 
						|
                        case 8721: esc = "sum";         break;
 | 
						|
                        case 8722: esc = "minus";       break;
 | 
						|
                        case 8727: esc = "lowast";      break;
 | 
						|
                        case 8730: esc = "radic";       break;
 | 
						|
                        case 8733: esc = "prop";        break;
 | 
						|
                        case 8734: esc = "infin";       break;
 | 
						|
                        case 8736: esc = "ang";         break;
 | 
						|
                        case 8743: esc = "and";         break;
 | 
						|
                        case 8744: esc = "or";          break;
 | 
						|
                        case 8745: esc = "cap";         break;
 | 
						|
                        case 8746: esc = "cup";         break;
 | 
						|
                        case 8747: esc = "int";         break;
 | 
						|
                        case 8756: esc = "there4";      break;
 | 
						|
                        case 8764: esc = "sim";         break;
 | 
						|
                        case 8773: esc = "cong";        break;
 | 
						|
                        case 8776: esc = "asymp";       break;
 | 
						|
                        case 8800: esc = "ne";          break;
 | 
						|
                        case 8801: esc = "equiv";       break;
 | 
						|
                        case 8804: esc = "le";          break;
 | 
						|
                        case 8805: esc = "ge";          break;
 | 
						|
                        case 8834: esc = "sub";         break;
 | 
						|
                        case 8835: esc = "sup";         break;
 | 
						|
                        case 8836: esc = "nsub";        break;
 | 
						|
                        case 8838: esc = "sube";        break;
 | 
						|
                        case 8839: esc = "supe";        break;
 | 
						|
                        case 8853: esc = "oplus";       break;
 | 
						|
                        case 8855: esc = "otimes";      break;
 | 
						|
                        case 8869: esc = "perp";        break;
 | 
						|
                        case 8901: esc = "sdot";        break;
 | 
						|
                        case 8968: esc = "lceil";       break;
 | 
						|
                        case 8969: esc = "rceil";       break;
 | 
						|
                        case 8970: esc = "lfloor";      break;
 | 
						|
                        case 8971: esc = "rfloor";      break;
 | 
						|
                        case 9001: esc = "lang";        break;
 | 
						|
                        case 9002: esc = "rang";        break;
 | 
						|
                        case 9674: esc = "loz";         break;
 | 
						|
                        case 9824: esc = "spades";      break;
 | 
						|
                        case 9827: esc = "clubs";       break;
 | 
						|
                        case 9829: esc = "hearts";      break;
 | 
						|
                        case 9830: esc = "diams";       break;
 | 
						|
                    }
 | 
						|
                }
 | 
						|
 | 
						|
                // else escape with numeric char refs
 | 
						|
                if (esc == null) {
 | 
						|
                    stringBuf.setLength (0);
 | 
						|
                    stringBuf.append ("#x");
 | 
						|
                    stringBuf.append (Integer.toHexString (c).toUpperCase ());
 | 
						|
                    esc = stringBuf.toString ();
 | 
						|
 | 
						|
                    // FIXME:  We don't write surrogate pairs correctly.
 | 
						|
                    // They should work as one ref per character, since
 | 
						|
                    // each pair is one character.  For reading back into
 | 
						|
                    // Unicode, it matters beginning in Unicode 3.1 ...
 | 
						|
                }
 | 
						|
                break;
 | 
						|
            }
 | 
						|
            if (i != first)
 | 
						|
                rawWrite (buf, off + first, i - first);
 | 
						|
            first = i + 1;
 | 
						|
            if (esc == eol)
 | 
						|
                newline ();
 | 
						|
            else {
 | 
						|
                rawWrite ('&');
 | 
						|
                rawWrite (esc);
 | 
						|
                rawWrite (';');
 | 
						|
            }
 | 
						|
        }
 | 
						|
        if (first < len)
 | 
						|
            rawWrite (buf, off + first, len - first);
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
 | 
						|
    private void newline ()
 | 
						|
    throws SAXException, IOException
 | 
						|
    {
 | 
						|
        out.write (eol);
 | 
						|
        column = 0;
 | 
						|
    }
 | 
						|
 | 
						|
    private void doIndent ()
 | 
						|
    throws SAXException, IOException
 | 
						|
    {
 | 
						|
        int     space = elementNestLevel * 2;
 | 
						|
 | 
						|
        newline ();
 | 
						|
        column = space;
 | 
						|
        // track tabs only at line starts
 | 
						|
        while (space > 8) {
 | 
						|
            out.write ("\t");
 | 
						|
            space -= 8;
 | 
						|
        }
 | 
						|
        while (space > 0) {
 | 
						|
            out.write ("  ");
 | 
						|
            space -= 2;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    private void rawWrite (char c)
 | 
						|
    throws IOException
 | 
						|
    {
 | 
						|
        out.write (c);
 | 
						|
        column++;
 | 
						|
    }
 | 
						|
 | 
						|
    private void rawWrite (String s)
 | 
						|
    throws SAXException, IOException
 | 
						|
    {
 | 
						|
        if (prettyPrinting && "default".equals (space.peek ())) {
 | 
						|
            char data [] = s.toCharArray ();
 | 
						|
            rawWrite (data, 0, data.length);
 | 
						|
        } else {
 | 
						|
            out.write (s);
 | 
						|
            column += s.length ();
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    // NOTE:  if xhtml, the REC gives some rules about whitespace
 | 
						|
    // which we could follow ... notably, many places where conformant
 | 
						|
    // agents "must" consolidate/normalize whitespace.  Line ends can
 | 
						|
    // be removed there, etc.  This may not be the right place to do
 | 
						|
    // such mappings though.
 | 
						|
 | 
						|
    // Line buffering may help clarify algorithms and improve results.
 | 
						|
 | 
						|
    // It's likely xml:space needs more attention.
 | 
						|
 | 
						|
    private void rawWrite (char buf [], int offset, int length)
 | 
						|
    throws SAXException, IOException
 | 
						|
    {
 | 
						|
        boolean         wrap;
 | 
						|
 | 
						|
        if (prettyPrinting && space.empty ())
 | 
						|
            fatal ("stack discipline", null);
 | 
						|
 | 
						|
        wrap = prettyPrinting && "default".equals (space.peek ());
 | 
						|
        if (!wrap) {
 | 
						|
            out.write (buf, offset, length);
 | 
						|
            column += length;
 | 
						|
            return;
 | 
						|
        }
 | 
						|
 | 
						|
        // we're pretty printing and want to fill lines out only
 | 
						|
        // to the desired line length.
 | 
						|
        while (length > 0) {
 | 
						|
            int         target = lineLength - column;
 | 
						|
            boolean     wrote = false;
 | 
						|
 | 
						|
            // Do we even have a problem?
 | 
						|
            if (target > length || noWrap) {
 | 
						|
                out.write (buf, offset, length);
 | 
						|
                column += length;
 | 
						|
                return;
 | 
						|
            }
 | 
						|
 | 
						|
            // break the line at a space character, trying to fill
 | 
						|
            // as much of the line as possible.
 | 
						|
            char        c;
 | 
						|
 | 
						|
            for (int i = target - 1; i >= 0; i--) {
 | 
						|
                if ((c = buf [offset + i]) == ' ' || c == '\t') {
 | 
						|
                    i++;
 | 
						|
                    out.write (buf, offset, i);
 | 
						|
                    doIndent ();
 | 
						|
                    offset += i;
 | 
						|
                    length -= i;
 | 
						|
                    wrote = true;
 | 
						|
                    break;
 | 
						|
                }
 | 
						|
            }
 | 
						|
            if (wrote)
 | 
						|
                continue;
 | 
						|
 | 
						|
            // no space character permitting break before target
 | 
						|
            // line length is filled.  So, take the next one.
 | 
						|
            if (target < 0)
 | 
						|
                target = 0;
 | 
						|
            for (int i = target; i < length; i++)
 | 
						|
                if ((c = buf [offset + i]) == ' ' || c == '\t') {
 | 
						|
                    i++;
 | 
						|
                    out.write (buf, offset, i);
 | 
						|
                    doIndent ();
 | 
						|
                    offset += i;
 | 
						|
                    length -= i;
 | 
						|
                    wrote = true;
 | 
						|
                    break;
 | 
						|
                }
 | 
						|
            if (wrote)
 | 
						|
                continue;
 | 
						|
 | 
						|
            // no such luck.
 | 
						|
            out.write (buf, offset, length);
 | 
						|
            column += length;
 | 
						|
            break;
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 |