/*
* Copyright (C) Chaperon. All rights reserved.
* -------------------------------------------------------------------------
* This software is published under the terms of the Apache Software License
* version 1.1, a copy of which has been included with this distribution in
* the LICENSE file.
*/
package net.sourceforge.chaperon.process.extended;
import net.sourceforge.chaperon.common.Decoder;
import net.sourceforge.chaperon.model.extended.ExtendedGrammar;
import org.apache.commons.logging.Log;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.LocatorImpl;
/**
* This class represents a simulation of a pushdown automata using the parser automaton class.
*
* @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
* @version CVS $Id: ExtendedBacktrackingParserProcessor.java,v 1.1 2004/01/04 16:49:12 benedikta Exp $
*/
public class ExtendedBacktrackingParserProcessor implements ContentHandler, LexicalHandler
{
/** Namespace and element names for the consumed SAX events. */
public static final String NS = "http://chaperon.sourceforge.net/schema/text/1.0";
public static final String TEXT = "text";
/** Namespace and element names for the generated SAX events. */
public static final String NS_OUTPUT = "http://chaperon.sourceforge.net/schema/syntaxtree/2.0";
public static final String OUTPUT = "output";
public static final String ERROR = "error";
/** Content handler and locator facilities */
private ContentHandler contentHandler = null;
private LexicalHandler lexicalHandler = null;
private Locator locator = null;
private LocatorImpl locatorImpl = null;
/** State of consumed input */
private static final int STATE_OUTER = 0;
private static final int STATE_INNER = 1;
private int state = STATE_OUTER;
/** Internals */
private ExtendedParserAutomaton automaton;
private ExtendedGrammar grammar;
private boolean flatten = false;
private StackNode stackNode = null;
private Log log;
private CharBuffer buffer = new CharBuffer();
private StackNode topmost = null;
/**
* Create a new parser processor.
*/
public ExtendedBacktrackingParserProcessor() {}
/**
* Create a new parser processor.
*
* @param automaton Parser automaton, which the processor should ues.
* @param handler Handler, which should receives the parser events.
* @param log Log, which should used.
*/
public ExtendedBacktrackingParserProcessor(ExtendedParserAutomaton automaton, Log log)
{
this.automaton = automaton;
this.log = log;
}
/**
* Set the parser automaton for the processor.
*
* @param automaton Parser automaton.
*/
public void setExtendedParserAutomaton(ExtendedParserAutomaton automaton)
{
this.automaton = automaton;
this.grammar = automaton.getExtendedGrammar();
}
/**
* Set the <code>ContentHandler</code> that will receive XML data.
*/
public void setContentHandler(ContentHandler handler)
{
this.contentHandler = handler;
}
/**
* Set the <code>LexicalHandler</code> that will receive XML data.
*/
public void setLexicalHandler(LexicalHandler handler)
{
this.lexicalHandler = handler;
}
/**
* Provide processor with a log.
*
* @param log The log.
*/
public void setLog(Log log)
{
this.log = log;
}
/**
* If the adapter should produce a more flatten XML hirachy, which means elements which the same
* name will be collapsed
*
* @param flatten True, if a more flatten hirachy should be produced.
*/
public void setFlatten(boolean flatten)
{
this.flatten = flatten;
}
/**
* Receive an object for locating the origin of SAX document events.
*/
public void setDocumentLocator(Locator locator)
{
this.locator = locator;
if (locator!=null)
{
this.locatorImpl = new LocatorImpl(locator);
contentHandler.setDocumentLocator(locatorImpl);
}
}
/**
* Receive notification of the beginning of a document.
*/
public void startDocument() throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
contentHandler.startDocument();
state = STATE_OUTER;
}
/**
* Receive notification of the beginning of an element.
*/
public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_INNER)
throw new SAXException("Unexpected element "+qName);
if (state==STATE_OUTER)
{
if ((namespaceURI!=null) && (namespaceURI.equals(NS)))
{
if (!localName.equals(TEXT))
throw new SAXException("Unknown element "+qName);
}
else
{
contentHandler.startElement(namespaceURI, localName, qName, atts);
return;
}
}
state = STATE_INNER;
// ======================= Start Text Document =======================
buffer.clear();
stackNode = new TerminalStackNode('\u0000', automaton.first, null);
topmost = stackNode;
}
/**
* Receive notification of character data.
*/
public void characters(char[] text, int textstart, int textlength)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
{
contentHandler.characters(text, textstart, textlength);
return;
}
if ((log!=null) && (log.isDebugEnabled()))
log.debug("getting text "+Decoder.toString(new String(text, textstart, textlength)));
buffer.push(text, textstart, textlength);
}
/**
* Receive notification of the end of an element.
*/
public void endElement(String namespaceURI, String localName, String qName)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
contentHandler.endElement(namespaceURI, localName, qName);
if (state==STATE_INNER)
{
if ((namespaceURI!=null) && (namespaceURI.equals(NS)))
{
if (!localName.equals(TEXT))
throw new SAXException("Unknown element "+qName);
}
else
throw new SAXException("Unexpected element "+qName);
}
// ======================= End Text Document =======================
while (stackNode!=null)
{
if ((log!=null) && (log.isDebugEnabled()) && (buffer.available()))
log.debug("process "+Decoder.toChar(buffer.peek()));
//log.debug("State "+automaton.indexOf(stackNode.state)+" "+stackNode.toCanonicalString(automaton));
/* ============================ Reduce =================================== */
ShiftAction shiftAction = null;
ReduceAction[] reduceActions;
if (buffer.available())
{
outer:
while (((shiftAction = stackNode.state.getShiftAction(buffer.peek()))==null) &&
((reduceActions = stackNode.state.getReduceActions()).length>0))
{
reduceActions = stackNode.state.getReduceActions();
for (int index = 0; index<=reduceActions.length; index++)
{
if (index==reduceActions.length)
break outer;
ReduceAction reduceAction = reduceActions[index];
StackNode second = (reduceAction.length==2) ? stackNode : null;
StackNode first = (reduceAction.length==2) ? second.ancestor : null;
StackNode previousStackNode = (reduceAction.length==2) ? first.ancestor : stackNode;
GotoAction gotoAction =
(reduceAction.symbol!=null)
? previousStackNode.state.getGotoAction(reduceAction.symbol)
: previousStackNode.state.getGotoAction(reduceAction.pattern);
if (gotoAction!=null)
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+automaton.indexOf(stackNode.state)+" "+reduceAction);
stackNode =
new DefinitionStackNode(reduceAction, index, first, second, gotoAction.state,
previousStackNode);
topmost = topmost.getTopMost(stackNode);
break;
}
}
}
}
else
{
while ((reduceActions = stackNode.state.getReduceActions()).length>0)
{
ReduceAction reduceAction = reduceActions[0];
StackNode second = (reduceAction.length==2) ? stackNode : null;
StackNode first = (reduceAction.length==2) ? second.ancestor : null;
StackNode previousStackNode = (reduceAction.length==2) ? first.ancestor : stackNode;
GotoAction gotoAction =
(reduceAction.symbol!=null)
? previousStackNode.state.getGotoAction(reduceAction.symbol)
: previousStackNode.state.getGotoAction(reduceAction.pattern);
if ((automaton.first==previousStackNode.state) &&
(grammar.getStartSymbol().equals(reduceAction.symbol)))
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+automaton.indexOf(stackNode.state)+" accept");
stackNode =
new DefinitionStackNode(reduceAction, 0, first, second, null, previousStackNode);
fireEvents();
state = STATE_OUTER;
return;
}
else
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+automaton.indexOf(stackNode.state)+" "+reduceAction);
stackNode =
new DefinitionStackNode(reduceAction, 0, first, second, gotoAction.state,
previousStackNode);
}
}
}
/* ==================================== Shift =================================== */
if (shiftAction!=null)
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+automaton.indexOf(stackNode.state)+" "+shiftAction);
stackNode = new TerminalStackNode(buffer.read(), shiftAction.state, stackNode);
topmost = topmost.getTopMost(stackNode);
}
else
{
if ((log!=null) && (log.isDebugEnabled()))
if (buffer.available())
log.debug("State "+automaton.indexOf(stackNode.state)+" error "+
Decoder.toChar(buffer.peek()));
else
log.debug("State "+automaton.indexOf(stackNode.state)+" error EOF");
//if (buffer.available())
// buffer.back(); // push character back into buffer
backtrack();
}
}
if (buffer.available())
throw new IllegalArgumentException("Character "+Decoder.toChar(buffer.peek())+
" is not expected");
else
throw new IllegalArgumentException("Eon of file is not expected");
}
private void backtrack() throws SAXException
{
while (automaton.first!=stackNode.state)
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+automaton.indexOf(stackNode.state)+" backtracking");
if (stackNode instanceof DefinitionStackNode)
{
DefinitionStackNode definitionStackNode = (DefinitionStackNode)stackNode;
stackNode =
(definitionStackNode.action.length==0) ? stackNode.ancestor : definitionStackNode.second;
ReduceAction[] reduceActions = stackNode.state.getReduceActions();
if (reduceActions.length>(definitionStackNode.index+1))
{
// another reduction is possible
ReduceAction reduceAction = reduceActions[definitionStackNode.index+1];
StackNode second = (reduceAction.length==2) ? stackNode : null;
StackNode first = (reduceAction.length==2) ? second.ancestor : null;
StackNode previousStackNode = (reduceAction.length==2) ? first.ancestor : stackNode;
GotoAction gotoAction =
(reduceAction.symbol!=null)
? previousStackNode.state.getGotoAction(reduceAction.symbol)
: previousStackNode.state.getGotoAction(reduceAction.pattern);
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+automaton.indexOf(stackNode.state)+" "+reduceAction);
stackNode =
new DefinitionStackNode(reduceAction, definitionStackNode.index+1, first, second,
gotoAction.state, previousStackNode);
// reparse text
return;
}
// else no other action is possible, going deeper
}
else
{
TerminalStackNode terminalStackNode = (TerminalStackNode)stackNode;
stackNode = stackNode.ancestor;
ReduceAction[] reduceActions = stackNode.state.getReduceActions();
buffer.back();
if (reduceActions.length>0)
{
// reduction is possible instead of shift action
ReduceAction reduceAction = reduceActions[0];
StackNode second = (reduceAction.length==2) ? stackNode : null;
StackNode first = (reduceAction.length==2) ? second.ancestor : null;
StackNode previousStackNode = (reduceAction.length==2) ? first.ancestor : stackNode;
GotoAction gotoAction =
(reduceAction.symbol!=null)
? previousStackNode.state.getGotoAction(reduceAction.symbol)
: previousStackNode.state.getGotoAction(reduceAction.pattern);
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+automaton.indexOf(stackNode.state)+" "+reduceAction);
stackNode =
new DefinitionStackNode(reduceAction, 0, first, second, gotoAction.state,
previousStackNode);
return;
}
// else no other action is possible, going deeper
}
}
throw new SAXException("Could not recognize text at ["+topmost.lineNumber+":"+
topmost.columnNumber+"]");
}
private void fireEvents() throws SAXException
{
contentHandler.startPrefixMapping("", NS_OUTPUT);
contentHandler.startElement(NS_OUTPUT, OUTPUT, OUTPUT, new AttributesImpl());
stackNode.toXML(contentHandler);
contentHandler.endElement(NS_OUTPUT, OUTPUT, OUTPUT);
contentHandler.endPrefixMapping("");
}
/**
* Receive notification of ignorable whitespace in element content.
*/
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
contentHandler.ignorableWhitespace(ch, start, length);
}
/**
* Begin the scope of a prefix-URI Namespace mapping.
*/
public void startPrefixMapping(String prefix, String uri)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
contentHandler.startPrefixMapping(prefix, uri);
}
/**
* End the scope of a prefix-URI mapping.
*/
public void endPrefixMapping(String prefix) throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
contentHandler.endPrefixMapping(prefix);
}
/**
* Receive notification of a processing instruction.
*/
public void processingInstruction(String target, String data)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
contentHandler.processingInstruction(target, data);
}
/**
* Receive notification of a skipped entity.
*/
public void skippedEntity(String name) throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
contentHandler.skippedEntity(name);
}
/**
* Receive notification of the end of a document.
*/
public void endDocument() throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
contentHandler.endDocument();
}
/**
* Report the start of DTD declarations, if any.
*/
public void startDTD(String name, String publicId, String systemId)
throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.startDTD(name, publicId, systemId);
}
/**
* Report the end of DTD declarations.
*/
public void endDTD() throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.endDTD();
}
/**
* Report the beginning of an entity.
*/
public void startEntity(String name) throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.startEntity(name);
}
/**
* Report the end of an entity.
*/
public void endEntity(String name) throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.endEntity(name);
}
/**
* Report the start of a CDATA section.
*/
public void startCDATA() throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.startCDATA();
}
/**
* Report the end of a CDATA section.
*/
public void endCDATA() throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.endCDATA();
}
/**
* Report an XML comment anywhere in the document.
*/
public void comment(char[] ch, int start, int len) throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.comment(ch, start, len);
}
}