Package net.sf.saxon.evpull

Source Code of net.sf.saxon.evpull.StaxToEventBridge$StaxErrorReporter

package net.sf.saxon.evpull;

import net.sf.saxon.Configuration;
import net.sf.saxon.event.*;
import net.sf.saxon.expr.ExpressionLocation;
import net.sf.saxon.om.*;
import net.sf.saxon.pull.UnparsedEntity;
import net.sf.saxon.tinytree.CharSlice;
import net.sf.saxon.trans.SaxonErrorCode;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.type.Type;
import net.sf.saxon.value.Whitespace;

import javax.xml.stream.*;
import javax.xml.stream.events.EntityDeclaration;
import javax.xml.transform.TransformerException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

/**
* This class implements the Saxon EventIterator API on top of a standard StAX parser
* (or any other StAX XMLStreamReader implementation)
*/

public class StaxToEventBridge implements EventIterator, SaxonLocator, SourceLocationProvider {

    private Configuration config;
    private XMLStreamReader reader;
    private PipelineConfiguration pipe;
    private List unparsedEntities = null;
    PullEvent currentEvent = null;
    int depth = 0;
    boolean ignoreIgnorable = false;

    /**
     * Create a new instance of the class
     */

    public StaxToEventBridge() {

    }

    /**
     * Supply an input stream containing XML to be parsed. A StAX parser is created using
     * the JAXP XMLInputFactory.
     * @param systemId The Base URI of the input document
     * @param inputStream the stream containing the XML to be parsed
     * @throws net.sf.saxon.trans.XPathException if an error occurs creating the StAX parser
     */

    public void setInputStream(String systemId, InputStream inputStream) throws XPathException {
        try {
            XMLInputFactory factory = XMLInputFactory.newInstance();
            //XMLInputFactory factory = new WstxInputFactory();
            factory.setXMLReporter(new StaxErrorReporter());
            reader = factory.createXMLStreamReader(systemId, inputStream);
        } catch (XMLStreamException e) {
            throw new XPathException(e);
        }
    }

    /**
     * Supply an XMLStreamReader: the events reported by this XMLStreamReader will be translated
     * into EventIterator events
     * @param reader the supplier of XML events, typically an XML parser
     */

    public void setXMLStreamReader(XMLStreamReader reader) {
        this.reader = reader;
    }

    /**
     * Set configuration information. This must only be called before any events
     * have been read.
     * @param pipe the pipeline configuration
     */

    public void setPipelineConfiguration(PipelineConfiguration pipe) {
        this.pipe = new PipelineConfiguration(pipe);
        this.pipe.setLocationProvider(this);
        config = pipe.getConfiguration();
        ignoreIgnorable = config.getStripsWhiteSpace() != Whitespace.NONE;
    }

    /**
     * Get configuration information.
     * @return the pipeline configuration
     */

    public PipelineConfiguration getPipelineConfiguration() {
        return pipe;
    }

    /**
     * Get the XMLStreamReader used by this StaxBridge. This is available only after
     * setInputStream() or setXMLStreamReader() has been called
     * @return the instance of XMLStreamReader allocated when setInputStream() was called,
     * or the instance supplied directly to setXMLStreamReader()
     */

    public XMLStreamReader getXMLStreamReader() {
        return reader;
    }

    /**
     * Get the name pool
     * @return the name pool
     */

    public NamePool getNamePool() {
        return pipe.getConfiguration().getNamePool();
    }

    /**
     * Get the next event
     * @return the next event; or null to indicate the end of the event stream
     */

    public PullEvent next() throws XPathException {
        if (currentEvent == null) {
            // StAX isn't reporting START_DOCUMENT so we supply it ourselves
            currentEvent = StartDocumentEvent.getInstance();
            return currentEvent;
        }
        if (currentEvent instanceof EndDocumentEvent) {
            try {
                reader.close();
            } catch (XMLStreamException e) {
                //
            }
            return null;
        }
        try {
            if (reader.hasNext()) {
                int event = reader.next();
                //System.err.println("Read event " + event);
                currentEvent = translate(event);
            } else {
                currentEvent = null;
            }
        } catch (XMLStreamException e) {
            String message = e.getMessage();
            // Following code recognizes the messages produced by the Sun Zephyr parser
            if (message.startsWith("ParseError at")) {
                int c = message.indexOf("\nMessage: ");
                if (c > 0) {
                    message = message.substring(c + 10);
                }
            }
            XPathException err = new XPathException("Error reported by XML parser: " + message);
            err.setErrorCode(SaxonErrorCode.SXXP0003);
            err.setLocator(translateLocation(e.getLocation()));
            throw err;
        }
        return currentEvent;
    }

    /**
     * Translate a StAX event into a Saxon PullEvent
     * @param event the StAX event
     * @return the Saxon PullEvent
     * @throws XPathException
     */

    private PullEvent translate(int event) throws XPathException {
            //System.err.println("EVENT " + event);
            switch (event) {
                case XMLStreamConstants.ATTRIBUTE:
                    return next();          // attributes are reported as part of StartElement
                case XMLStreamConstants.CDATA:
                case XMLStreamConstants.CHARACTERS:
                    if (depth == 0 && reader.isWhiteSpace()) {
                        return next();
                    } else {
                        Orphan o = new Orphan(config);
                        o.setNodeKind(Type.TEXT);
                        CharSlice value = new CharSlice(
                            reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength());
                        o.setStringValue(value);
                        return o;
                    }
                case XMLStreamConstants.COMMENT: {
                    Orphan o = new Orphan(config);
                    o.setNodeKind(Type.COMMENT);
                    CharSlice value = new CharSlice(
                            reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength());
                    o.setStringValue(value);
                    return o;
                }
                case XMLStreamConstants.DTD:
                    unparsedEntities = (List)reader.getProperty("javax.xml.stream.entities");
                    return next();
                case XMLStreamConstants.END_DOCUMENT:
                    return EndDocumentEvent.getInstance();
                case XMLStreamConstants.END_ELEMENT:
                    depth--;
                    return EndElementEvent.getInstance();
                case XMLStreamConstants.ENTITY_DECLARATION:
                    return next();
                case XMLStreamConstants.ENTITY_REFERENCE:
                    return next();
                case XMLStreamConstants.NAMESPACE:
                    return next();      // namespaces are reported as part of StartElement
                case XMLStreamConstants.NOTATION_DECLARATION:
                    return next();
                case XMLStreamConstants.PROCESSING_INSTRUCTION:{
                    Orphan o = new Orphan(config);
                    o.setNodeKind(Type.PROCESSING_INSTRUCTION);
                    String local = reader.getPITarget();
                    o.setNameCode(getNamePool().allocate("", "", local));
                    o.setStringValue(reader.getText());
                    return o;
                }
                case XMLStreamConstants.SPACE:
                    if (depth == 0) {
                        return next();
                    } else if (ignoreIgnorable) {
                        // (Brave attempt, but Woodstox doesn't seem to report ignorable whitespace)
                        return next();
                    } else {
                        Orphan o = new Orphan(config);
                        o.setNodeKind(Type.TEXT);
                        o.setStringValue(reader.getText());
                        return o;
                    }
                case XMLStreamConstants.START_DOCUMENT:
                    return next()// we supplied the START_DOCUMENT ourselves
                case XMLStreamConstants.START_ELEMENT:
                    depth++;
                    StartElementEvent see = new StartElementEvent(pipe);
                    String elocal = reader.getLocalName();
                    String euri = reader.getNamespaceURI();
                    String eprefix = reader.getPrefix();
                    if (eprefix == null) {
                        eprefix = "";
                    }
                    if (euri == null) {
                        euri = "";
                    }
                    int enc = getNamePool().allocate(eprefix, euri, elocal);
                    see.setNameCode(enc);
                    see.setTypeCode(StandardNames.XS_UNTYPED);
                    int attCount = reader.getAttributeCount();
                    for (int index=0; index<attCount; index++) {
                        String local = reader.getAttributeLocalName(index);
                        String uri = reader.getAttributeNamespace(index);
                        String prefix = reader.getAttributePrefix(index);
                        if (prefix == null) {
                            prefix = "";
                        }
                        if (uri == null) {
                            uri = "";
                        }
                        int nc = getNamePool().allocate(prefix, uri, local);
                        Orphan o = new Orphan(config);
                        o.setNodeKind(Type.ATTRIBUTE);
                        o.setNameCode(nc);
                        o.setStringValue(reader.getAttributeValue(index));
                        see.addAttribute(o);
                    }
                    see.namespaceFixup();
                    return see;
                default:
                    throw new IllegalStateException("Unknown StAX event " + event);


            }
    }


    /**
     * Return the public identifier for the current document event.
     * <p/>
     * <p>The return value is the public identifier of the document
     * entity or of the external parsed entity in which the markup
     * triggering the event appears.</p>
     *
     * @return A string containing the public identifier, or
     *         null if none is available.
     * @see #getSystemId
     */
    public String getPublicId() {
        return reader.getLocation().getPublicId();
    }

    /**
     * Return the system identifier for the current document event.
     * <p/>
     * <p>The return value is the system identifier of the document
     * entity or of the external parsed entity in which the markup
     * triggering the event appears.</p>
     * <p/>
     * <p>If the system identifier is a URL, the parser must resolve it
     * fully before passing it to the application.  For example, a file
     * name must always be provided as a <em>file:...</em> URL, and other
     * kinds of relative URI are also resolved against their bases.</p>
     *
     * @return A string containing the system identifier, or null
     *         if none is available.
     * @see #getPublicId
     */
    public String getSystemId() {
        return reader.getLocation().getSystemId();
    }

    /**
     * Return the line number where the current document event ends.
     * Lines are delimited by line ends, which are defined in
     * the XML specification.
     * <p/>
     * <p><strong>Warning:</strong> The return value from the method
     * is intended only as an approximation for the sake of diagnostics;
     * it is not intended to provide sufficient information
     * to edit the character content of the original XML document.
     * In some cases, these "line" numbers match what would be displayed
     * as columns, and in others they may not match the source text
     * due to internal entity expansion.  </p>
     * <p/>
     * <p>The return value is an approximation of the line number
     * in the document entity or external parsed entity where the
     * markup triggering the event appears.</p>
     * <p/>
     * <p>If possible, the SAX driver should provide the line position
     * of the first character after the text associated with the document
     * event.  The first line is line 1.</p>
     *
     * @return The line number, or -1 if none is available.
     * @see #getColumnNumber
     */
    public int getLineNumber() {
        return reader.getLocation().getLineNumber();
    }

    /**
     * Return the column number where the current document event ends.
     * This is one-based number of Java <code>char</code> values since
     * the last line end.
     * <p/>
     * <p><strong>Warning:</strong> The return value from the method
     * is intended only as an approximation for the sake of diagnostics;
     * it is not intended to provide sufficient information
     * to edit the character content of the original XML document.
     * For example, when lines contain combining character sequences, wide
     * characters, surrogate pairs, or bi-directional text, the value may
     * not correspond to the column in a text editor's display. </p>
     * <p/>
     * <p>The return value is an approximation of the column number
     * in the document entity or external parsed entity where the
     * markup triggering the event appears.</p>
     * <p/>
     * <p>If possible, the SAX driver should provide the line position
     * of the first character after the text associated with the document
     * event.  The first column in each line is column 1.</p>
     *
     * @return The column number, or -1 if none is available.
     * @see #getLineNumber
     */
    public int getColumnNumber() {
        return reader.getLocation().getColumnNumber();
    }

    public String getSystemId(long locationId) {
        return getSystemId();
    }

    public int getLineNumber(long locationId) {
        return getLineNumber();
    }

    public int getColumnNumber(long locationId) {
        return getColumnNumber();
    }    

    /**
     * Get a list of unparsed entities.
     *
     * @return a list of unparsed entities, or null if the information is not available, or
     *         an empty list if there are no unparsed entities. Each item in the list will
     *         be an instance of {@link net.sf.saxon.pull.UnparsedEntity}
     */

    public List getUnparsedEntities() {
        if (unparsedEntities == null) {
            return null;
        }
        List list = new ArrayList(unparsedEntities.size());
        for (int i=0; i<unparsedEntities.size(); i++) {
            Object ent = unparsedEntities.get(i);
            String name = null;
            String systemId = null;
            String publicId = null;
            String baseURI = null;
            if (ent instanceof EntityDeclaration) {
                // This is what we would expect from the StAX API spec
                EntityDeclaration ed = (EntityDeclaration)ent;
                name = ed.getName();
                systemId = ed.getSystemId();
                publicId = ed.getPublicId();
                baseURI = ed.getBaseURI();
            } else if (ent.getClass().getName().equals("com.ctc.wstx.ent.UnparsedExtEntity")) {
                // Woodstox 3.0.0 returns this: use introspection to get the data we need
                try {
                    Class woodstoxClass = ent.getClass();
                    Class[] noArgs = new Class[0];
                    Method method = woodstoxClass.getMethod("getName", noArgs);
                    name = (String)method.invoke(ent, (Object[]) noArgs);
                    method = woodstoxClass.getMethod("getSystemId", noArgs);
                    systemId = (String)method.invoke(ent, (Object[]) noArgs);
                    method = woodstoxClass.getMethod("getPublicId", noArgs);
                    publicId = (String)method.invoke(ent, (Object[]) noArgs);
                    method = woodstoxClass.getMethod("getBaseURI", noArgs);
                    baseURI = (String)method.invoke(ent, (Object[]) noArgs);
                } catch (NoSuchMethodException e) {
                    //
                } catch (IllegalAccessException e) {
                    //
                } catch (InvocationTargetException e) {
                    //
                }
            }
            if (name != null) {
                try {
                    systemId = new URI(baseURI).resolve(systemId).toString();
                } catch (URISyntaxException err) {
                    //
                }
                UnparsedEntity ue = new UnparsedEntity();
                ue.setName(name);
                ue.setSystemId(systemId);
                ue.setPublicId(publicId);
                ue.setBaseURI(baseURI);
                list.add(ue);
            }
        }
        return list;
    }

    /**
      * Translate a StAX Location object to a Saxon Locator
      * @param location the StAX Location object
      * @return a Saxon/SAX SourceLocator object
      */

     private ExpressionLocation translateLocation(Location location) {
         ExpressionLocation loc = new ExpressionLocation();
         if (location != null) {
             loc.setLineNumber(location.getLineNumber());
             loc.setColumnNumber(location.getColumnNumber());
             loc.setSystemId(location.getSystemId());
             //loc.setPublicId(location.getPublicId());
         }
         return loc;
     }


    /**
     * Error reporting class for StAX parser errors
     */

    private class StaxErrorReporter implements XMLReporter {

        public void report(String message, String errorType,
                           Object relatedInformation, Location location)
                throws XMLStreamException {
            ExpressionLocation loc = translateLocation(location);
            XPathException err = new XPathException("Error reported by XML parser: " + message + " (" + errorType + ')');
            err.setLocator(loc);
            try {
                pipe.getErrorListener().error(err);
            } catch (TransformerException e) {
                throw new XMLStreamException(e);
            }
        }

    }

    /**
     * Simple test program
     * Usage: java StaxBridge in.xml [out.xml]
     * @param args command line arguments
     */

    public static void main(String[] args) throws Exception {
        for (int i=0; i<1; i++) {
            long startTime = System.currentTimeMillis();
            PipelineConfiguration pipe = new Configuration().makePipelineConfiguration();
            StaxToEventBridge puller = new StaxToEventBridge();
            File f = new File(args[0]);
            puller.setInputStream(f.toURI().toString(), new FileInputStream(f));
            puller.setPipelineConfiguration(pipe);
            XMLEmitter emitter = new XMLEmitter();
            emitter.setPipelineConfiguration(pipe);
            emitter.setOutputProperties(new Properties());
            if (args.length > 1) {
                emitter.setOutputStream(new FileOutputStream(args[1]));
            } else {
                emitter.setOutputStream(System.out);
            }
            NamespaceReducer r = new NamespaceReducer(emitter);

            EventIteratorToReceiver.copy(puller, r);
            System.err.println("Elapsed time: " + (System.currentTimeMillis() - startTime) + "ms");
        }
    }


    /**
     * Determine whether the EventIterator returns a flat sequence of events, or whether it can return
     * nested event iterators
     *
     * @return true if the next() method is guaranteed never to return an EventIterator
     */

    public boolean isFlatSequence() {
        return false;
    }
}

//
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
// you may not use this file except in compliance with the License. You may obtain a copy of the
// License at http://www.mozilla.org/MPL/
//
// Software distributed under the License is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the License for the specific language governing rights and limitations under the License.
//
// The Original Code is: all this file
//
// The Initial Developer of the Original Code is Michael H. Kay.
//
// Contributor(s):
//

TOP

Related Classes of net.sf.saxon.evpull.StaxToEventBridge$StaxErrorReporter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.