/*
* Copyright (C) Chaperon. All rights reserved.
* -------------------------------------------------------------------------
* This software is published under the terms of the Apache Software License
* version 1.1, a copy of which has been included with this distribution in
* the LICENSE file.
*/
package net.sourceforge.chaperon.process;
import net.sourceforge.chaperon.build.Automaton;
import net.sourceforge.chaperon.build.ReduceAction;
import net.sourceforge.chaperon.build.ShiftAction;
import net.sourceforge.chaperon.build.State;
import net.sourceforge.chaperon.model.grammar.Grammar;
import net.sourceforge.chaperon.model.grammar.Production;
import net.sourceforge.chaperon.model.symbol.Symbol;
import net.sourceforge.chaperon.model.symbol.Terminal;
import org.apache.commons.logging.Log;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.LocatorImpl;
import java.util.Stack;
//import org.xml.sax.ext.LexicalHandler;
/**
* This class represents a simulation of a pushdown automata using the parser automaton class.
*
* @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
* @version CVS $Id: GeneralParserProcessor.java,v 1.19 2003/12/14 09:41:35 benedikta Exp $
*/
public class GeneralParserProcessor implements ContentHandler, LexicalHandler
{
private static final String NS = "http://chaperon.sourceforge.net/schema/lexemes/1.0";
private static final String LEXEMES = "lexemes";
private static final String LEXEME = "lexeme";
/** Namespace for the generated SAX events. */
private static final String NS_OUTPUT = "http://chaperon.sourceforge.net/schema/syntaxtree/1.0";
private static final String OUTPUT = "output";
private ContentHandler contentHandler = null;
private LexicalHandler lexicalHandler = null;
private Locator locator = null;
private LocatorImpl locatorImpl = null;
private static final int STATE_OUTER = 0;
private static final int STATE_LEXEMES = 1;
private static final int STATE_LEXEME = 2;
private int state = STATE_OUTER;
private Automaton automaton;
private Grammar grammar;
private boolean flatten = false;
//private ParserHandler handler;
private Stack current = new Stack();
private Stack next = new Stack();
private Log log;
//private boolean recovery = false;
private int maxActiveStates = 50;
/**
* Create a new parser processor.
*/
public GeneralParserProcessor() {}
/**
* Create a new parser processor.
*
* @param automaton Parser automaton, which the processor should ues.
* @param handler Handler, which should receives the parser events.
* @param log Log, which should used.
*/
public GeneralParserProcessor(Automaton automaton, Log log)
{
this.automaton = automaton;
this.log = log;
}
/**
* Set the parser automaton for the processor.
*
* @param automaton Parser automaton.
*/
public void setParserAutomaton(Automaton automaton)
{
this.automaton = automaton;
this.grammar = automaton.getGrammar();
}
/**
* Set the <code>ContentHandler</code> that will receive XML data.
*/
public void setContentHandler(ContentHandler handler)
{
this.contentHandler = handler;
}
/**
* Set the <code>LexicalHandler</code> that will receive XML data.
*/
public void setLexicalHandler(LexicalHandler handler)
{
this.lexicalHandler = handler;
}
/**
* Provide processor with a log.
*
* @param log The log.
*/
public void setLog(Log log)
{
this.log = log;
}
/**
* If the adapter should produce a more flatten XML hirachy, which means elements which the same
* name will be collapsed
*
* @param flatten True, if a more flatten hirachy should be produced.
*/
public void setFlatten(boolean flatten)
{
this.flatten = flatten;
}
/**
* Receive an object for locating the origin of SAX document events.
*
* @param locator
*/
public void setDocumentLocator(Locator locator)
{
this.locator = locator;
if (locator!=null)
{
this.locatorImpl = new LocatorImpl(locator);
contentHandler.setDocumentLocator(locatorImpl);
}
}
/**
* Receive notification of the beginning of a document.
*
* @throws SAXException
*/
public void startDocument() throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
contentHandler.startDocument();
state = STATE_OUTER;
}
/**
* Receive notification of the beginning of an element.
*
* @param namespaceURI
* @param localName
* @param qName
* @param atts
*
* @throws SAXException
*/
public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
{
if ((namespaceURI!=null) && (namespaceURI.equals(NS)) && (localName.equals(LEXEMES)))
{
processStartDocument();
state = STATE_LEXEMES;
}
else
contentHandler.startElement(namespaceURI, localName, qName, atts);
}
else if (state==STATE_LEXEMES)
{
if ((namespaceURI!=null) && (namespaceURI.equals(NS)) && (localName.equals(LEXEME)))
{
processLexeme(atts.getValue("symbol"), atts.getValue("text"));
state = STATE_LEXEME;
}
else
throw new SAXException("Unexpected start element.");
}
else if (state==STATE_LEXEME)
throw new SAXException("Unexpected start element.");
}
/**
* Receive notification of the end of an element.
*
* @param namespaceURI
* @param localName
* @param qName
*
* @throws SAXException
*/
public void endElement(String namespaceURI, String localName, String qName)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
contentHandler.endElement(namespaceURI, localName, qName);
else if (state==STATE_LEXEMES)
{
if ((namespaceURI!=null) && (namespaceURI.equals(NS)) && (localName.equals(LEXEMES)))
{
contentHandler.startPrefixMapping("", NS_OUTPUT);
contentHandler.startElement(NS_OUTPUT, OUTPUT, OUTPUT, new AttributesImpl());
processEndDocument();
contentHandler.endElement(NS_OUTPUT, OUTPUT, OUTPUT);
contentHandler.endPrefixMapping("");
state = STATE_OUTER;
}
else
throw new SAXException("Unexpected end element.");
}
else if (state==STATE_LEXEME)
state = STATE_LEXEMES;
}
/**
* Receive notification of character data.
*
* @param ch
* @param start
* @param length
*
* @throws SAXException
*/
public void characters(char[] ch, int start, int length)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
contentHandler.characters(ch, start, length);
}
/**
* Receive notification of ignorable whitespace in element content.
*
* @param ch
* @param start
* @param length
*
* @throws SAXException
*/
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
contentHandler.ignorableWhitespace(ch, start, length);
}
/**
* Begin the scope of a prefix-URI Namespace mapping.
*
* @param prefix
* @param uri
*
* @throws SAXException
*/
public void startPrefixMapping(String prefix, String uri)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
contentHandler.startPrefixMapping(prefix, uri);
}
/**
* End the scope of a prefix-URI mapping.
*
* @param prefix
*
* @throws SAXException
*/
public void endPrefixMapping(String prefix) throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
contentHandler.endPrefixMapping(prefix);
}
/**
* Receive notification of a processing instruction.
*
* @param target
* @param data
*
* @throws SAXException
*/
public void processingInstruction(String target, String data)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
contentHandler.processingInstruction(target, data);
}
/**
* Receive notification of a skipped entity.
*
* @param name
*
* @throws SAXException
*/
public void skippedEntity(String name) throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
contentHandler.skippedEntity(name);
}
/**
* Receive notification of the end of a document.
*
* @throws SAXException
*/
public void endDocument() throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
contentHandler.endDocument();
}
/**
* Report the start of DTD declarations, if any.
*/
public void startDTD(String name, String publicId, String systemId)
throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.startDTD(name, publicId, systemId);
}
/**
* Report the end of DTD declarations.
*/
public void endDTD() throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.endDTD();
}
/**
* Report the beginning of an entity.
*/
public void startEntity(String name) throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.startEntity(name);
}
/**
* Report the end of an entity.
*/
public void endEntity(String name) throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.endEntity(name);
}
/**
* Report the start of a CDATA section.
*/
public void startCDATA() throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.startCDATA();
}
/**
* Report the end of a CDATA section.
*/
public void endCDATA() throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.endCDATA();
}
/**
* Report an XML comment anywhere in the document.
*/
public void comment(char[] ch, int start, int len) throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.comment(ch, start, len);
}
/**
* Receives the notification, that the lexical processor starts reading a new document.
*
* @throws Exception If a exception occurs.
*/
private void processStartDocument()
{
current.clear();
current.push(new StateNode(automaton.getState(0), null, null));
next.clear();
count = 0;
System.out.println("Automaton:\n"+automaton);
//handler.handleStartDocument();
}
private static int count = 0;
/**
* Receives the notification, that the lexical processor has recognized a lexeme.
*
* @param symbol Symbol of the lexeme.
* @param text Recognized text.
*
* @throws Exception If a exception occurs.
*/
private void processLexeme(String symbolname, String text)
{
Terminal symbol = new Terminal(symbolname);
System.out.println("\n===================================\nProcess "+symbolname);
if (current.isEmpty())
throw new IllegalStateException("Parsing process is aborted");
System.out.println("Current states");
for (int i = 0; i<current.size(); i++)
System.out.println(current.get(i));
System.out.println();
if (current.size()>maxActiveStates)
throw new IllegalStateException("Processor occupied too many states");
/* ============================ Reduce =================================== */
int watchdog = 0;
while (!current.isEmpty())
{
if (watchdog++>20)
throw new IllegalStateException("overflow");
StateNode statenode = (StateNode)current.pop();
next.push(statenode);
ReduceAction[] reduceactions = statenode.state.getReduceActions();
if (reduceactions.length>0)
{
for (int i = 0; i<reduceactions.length; i++)
{
Production production = reduceactions[i].production;
if ((log!=null) && (log.isDebugEnabled()))
log.debug(
/*"State "+node.state+*/
" reduce "+production.getSymbol());
/*+
" ("+production+")");*/
ProductionNode productionnode = new ProductionNode(production);
TreeNode[] descendants = new TreeNode[production.getDefinition().getSymbolCount()];
StateNode ancestor = statenode;
for (int j = production.getDefinition().getSymbolCount()-1; j>=0; j--)
{
descendants[j] = ancestor.treenode;
ancestor = ancestor.ancestor;
}
productionnode.descendants = descendants;
if (descendants.length>0)
{
productionnode.linenumber = descendants[0].linenumber;
productionnode.columnnumber = descendants[0].columnnumber;
}
else
{
productionnode.linenumber = locator.getLineNumber();
productionnode.columnnumber = locator.getColumnNumber();
}
ShiftAction shiftaction = ancestor.state.getShiftAction(productionnode.symbol);
if (shiftaction!=null)
{
StateNode newstatenode = getStateNode(current, shiftaction.state, ancestor);
if (newstatenode==null)
{
System.out.println("new state node: new state="+automaton.indexOf(shiftaction.state)+
" ancestor state="+automaton.indexOf(ancestor.state));
newstatenode = new StateNode(shiftaction.state, ancestor, productionnode);
current.push(newstatenode);
}
else
{
System.out.println("merging state node");
ProductionNode oldproductionnode = (ProductionNode)newstatenode.treenode;
if (grammar.getPriority(oldproductionnode.production)>grammar.getPriority(production))
{
System.out.println("priority("+production+") < priority("+
oldproductionnode.production+")");
newstatenode.treenode = productionnode;
}
else
System.out.println("priority("+production+") >= priority("+
oldproductionnode.production+")");
}
}
}
}
}
Stack dummy = next;
next = current;
current = dummy;
System.out.println("Current states");
for (int i = 0; i<current.size(); i++)
System.out.println(current.get(i));
System.out.println();
/* ==================================== Shift =================================== */
TokenNode tokennode = new TokenNode(symbol, text);
if (locator!=null)
{
tokennode.linenumber = locator.getLineNumber();
tokennode.columnnumber = locator.getColumnNumber();
}
while (!current.isEmpty())
{
StateNode statenode = (StateNode)current.pop();
ShiftAction shiftaction = statenode.state.getShiftAction(symbol);
if (shiftaction!=null)
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug(
/*"State "+state+*/
" shift token "+symbolname+" ("+symbol+")");
next.push(new StateNode(shiftaction.state, statenode, tokennode));
}
}
if (next.isEmpty())
throw new IllegalArgumentException("Token "+symbolname+" is not expected in this state");
dummy = next;
next = current;
current = dummy;
System.out.println("Current states");
for (int i = 0; i<current.size(); i++)
System.out.println(current.get(i));
System.out.println();
}
/**
* Receives the notification, that the lexical processor accepted the complete document, and
* stops with reading.
*
* @throws Exception If a exception occurs.
*/
private void processEndDocument() throws SAXException
{
System.out.println("\n===================================\nProcess EOF");
while (!current.isEmpty())
{
StateNode statenode = (StateNode)current.pop();
ReduceAction[] reduceactions = statenode.state.getReduceActions();
if (reduceactions.length>0)
{
for (int i = 0; i<reduceactions.length; i++)
{
Production production = reduceactions[i].production;
ProductionNode productionnode = new ProductionNode(production);
TreeNode[] descendants = new TreeNode[production.getDefinition().getSymbolCount()];
StateNode ancestor = statenode;
for (int j = production.getDefinition().getSymbolCount()-1; j>=0; j--)
{
descendants[j] = ancestor.treenode;
ancestor = ancestor.ancestor;
}
productionnode.descendants = descendants;
ShiftAction shiftaction = ancestor.state.getShiftAction(productionnode.symbol);
//System.out.println("current state:\n"+ancestor.state+"\ntransition for "+productionnode.symbol+" = "+shiftaction);
if ((automaton.getState(0)==ancestor.state) &&
(productionnode.symbol.equals(grammar.getStartSymbol())))
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+state+" accept");
StateNode newstatenode = getStateNode(next, null, ancestor);
if (newstatenode==null)
{
newstatenode = new StateNode(null, ancestor, productionnode);
next.push(newstatenode);
}
else
{
System.out.println("merging state node");
ProductionNode oldproductionnode = (ProductionNode)newstatenode.treenode;
if (grammar.getPriority(oldproductionnode.production)>grammar.getPriority(production))
{
System.out.println("priority("+production+") < priority("+
oldproductionnode.production+")");
newstatenode.treenode = productionnode;
}
else
System.out.println("priority("+production+") >= priority("+
oldproductionnode.production+")");
}
}
else
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug(
/*"State "+node.state+*/
" reduce "+production.getSymbol()+" ("+production+")");
/* StateNode newstatenode = new
StateNode(ancestor.state.getShiftAction(productionnode.symbol).state, ancestor, productionnode);
current.push(newstatenode);*/
StateNode newstatenode = getStateNode(current, shiftaction.state, ancestor);
if (newstatenode==null)
{
newstatenode = new StateNode(shiftaction.state, ancestor, productionnode);
current.push(newstatenode);
}
else
{
System.out.println("merging state node");
ProductionNode oldproductionnode = (ProductionNode)newstatenode.treenode;
if (grammar.getPriority(oldproductionnode.production)>grammar.getPriority(production))
{
System.out.println("priority("+production+") < priority("+
oldproductionnode.production+")");
newstatenode.treenode = productionnode;
}
else
System.out.println("priority("+production+") >= priority("+
oldproductionnode.production+")");
}
}
System.out.println("Current states");
for (int k = 0; k<current.size(); k++)
System.out.println(current.get(k));
System.out.println();
}
}
}
if (log.isDebugEnabled())
log.debug("Parser found "+next.size()+" alternatives");
System.out.println();
int index = 1;
while (!next.isEmpty())
{
StateNode state = (StateNode)next.pop();
//System.out.println(index+". result: "+((StateNode)next.pop()).treenode);
fireEvents(null, state.treenode);
index++;
}
if (next.size()>1)
log.warn("Grammar is ambig, found "+next.size()+" alternative trees");
}
private StateNode getStateNode(Stack stack, State state, StateNode ancestor)
{
StateNode statenode = null;
for (int j = 0; j<stack.size(); j++)
{
statenode = (StateNode)stack.get(j);
if ((statenode.ancestor==ancestor) && (statenode.state==state))
return statenode;
}
return null;
}
/**
* Fire the SAX events by traverseing the hirachy.
*
* @param parent Parent node.
* @param node Current node.
*
* @throws Exception If an exception occurs.
*/
private void fireEvents(ProductionNode parent, TreeNode node)
throws SAXException
{
if (node instanceof ProductionNode)
{
ProductionNode production = (ProductionNode)node;
if (locatorImpl!=null)
{
locatorImpl.setLineNumber(production.linenumber);
locatorImpl.setColumnNumber(production.columnnumber);
}
if ((!flatten) || (parent==null) || (!parent.symbol.equals(production.symbol)))
contentHandler.startElement(NS_OUTPUT, production.symbol.getName(),
production.symbol.getName(), new AttributesImpl());
for (int i = 0; i<production.descendants.length; i++)
fireEvents(production, production.descendants[i]);
if ((!flatten) || (parent==null) || (!parent.symbol.equals(production.symbol)))
contentHandler.endElement(NS_OUTPUT, production.symbol.getName(),
production.symbol.getName());
}
else
{
TokenNode token = (TokenNode)node;
if (locatorImpl!=null)
{
locatorImpl.setLineNumber(token.linenumber);
locatorImpl.setColumnNumber(token.columnnumber);
}
contentHandler.startElement(NS_OUTPUT, token.symbol.getName(), token.symbol.getName(),
new AttributesImpl());
contentHandler.characters(token.text.toCharArray(), 0, token.text.length());
contentHandler.endElement(NS_OUTPUT, token.symbol.getName(), token.symbol.getName());
}
}
private class StateNode
{
public StateNode(State state, StateNode ancestor, TreeNode treenode)
{
this.state = state;
this.treenode = treenode;
this.ancestor = ancestor;
}
public State state = null;
public StateNode ancestor = null;
public TreeNode treenode = null;
public String toString()
{
StringBuffer buffer = new StringBuffer();
if (ancestor!=null)
{
buffer.append(ancestor.toString());
buffer.append(" <- ");
}
buffer.append("<");
buffer.append(automaton.indexOf(state));
/*buffer.append(",");
if (ancestor!=null)
buffer.append(automaton.indexOf(ancestor.state));*/
buffer.append(">");
return buffer.toString();
}
}
private abstract class TreeNode
{
public Symbol symbol = null;
public int linenumber = 1;
public int columnnumber = 1;
}
private class TokenNode extends TreeNode
{
public TokenNode(Terminal symbol, String text)
{
this.symbol = symbol;
this.text = text;
}
public String text = null;
public String toString()
{
StringBuffer buffer = new StringBuffer();
buffer.append("{");
buffer.append(symbol);
buffer.append(":");
buffer.append(text);
buffer.append("}");
return buffer.toString();
}
}
private class ProductionNode extends TreeNode
{
/*public ProductionNode(Nonterminal symbol)
{
this.symbol = symbol;
}*/
public ProductionNode(Production production)
{
this.production = production;
this.symbol = production.getSymbol();
}
public Production production = null;
public TreeNode[] descendants = null;
public String toString()
{
StringBuffer buffer = new StringBuffer();
buffer.append("{");
buffer.append(symbol);
buffer.append(":");
for (int i = 0; i<descendants.length; i++)
buffer.append(descendants[i].toString());
buffer.append("}");
return buffer.toString();
}
}
}