Package net.sourceforge.chaperon.process

Source Code of net.sourceforge.chaperon.process.GeneralParserProcessor$TreeNode

/*
*  Copyright (C) Chaperon. All rights reserved.
*  -------------------------------------------------------------------------
*  This software is published under the terms of the Apache Software License
*  version 1.1, a copy of which has been included  with this distribution in
*  the LICENSE file.
*/

package net.sourceforge.chaperon.process;

import net.sourceforge.chaperon.build.Automaton;
import net.sourceforge.chaperon.build.ReduceAction;
import net.sourceforge.chaperon.build.ShiftAction;
import net.sourceforge.chaperon.build.State;
import net.sourceforge.chaperon.model.grammar.Grammar;
import net.sourceforge.chaperon.model.grammar.Production;
import net.sourceforge.chaperon.model.symbol.Symbol;
import net.sourceforge.chaperon.model.symbol.Terminal;

import org.apache.commons.logging.Log;

import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.LocatorImpl;

import java.util.Stack;

//import org.xml.sax.ext.LexicalHandler;

/**
* This class represents a simulation of a pushdown automata using the parser automaton class.
*
* @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
* @version CVS $Id: GeneralParserProcessor.java,v 1.19 2003/12/14 09:41:35 benedikta Exp $
*/
public class GeneralParserProcessor implements ContentHandler, LexicalHandler
{
  private static final String NS = "http://chaperon.sourceforge.net/schema/lexemes/1.0";
  private static final String LEXEMES = "lexemes";
  private static final String LEXEME = "lexeme";

  /** Namespace for the generated SAX events. */
  private static final String NS_OUTPUT = "http://chaperon.sourceforge.net/schema/syntaxtree/1.0";
  private static final String OUTPUT = "output";
  private ContentHandler contentHandler = null;
  private LexicalHandler lexicalHandler = null;
  private Locator locator = null;
  private LocatorImpl locatorImpl = null;
  private static final int STATE_OUTER = 0;
  private static final int STATE_LEXEMES = 1;
  private static final int STATE_LEXEME = 2;
  private int state = STATE_OUTER;
  private Automaton automaton;
  private Grammar grammar;
  private boolean flatten = false;

  //private ParserHandler handler;
  private Stack current = new Stack();
  private Stack next = new Stack();
  private Log log;

  //private boolean recovery = false;
  private int maxActiveStates = 50;

  /**
   * Create a new parser processor.
   */
  public GeneralParserProcessor() {}

  /**
   * Create a new parser processor.
   *
   * @param automaton Parser automaton, which the processor should ues.
   * @param handler Handler, which should receives the parser events.
   * @param log Log, which should used.
   */
  public GeneralParserProcessor(Automaton automaton, Log log)
  {
    this.automaton = automaton;
    this.log = log;
  }

  /**
   * Set the parser automaton for the processor.
   *
   * @param automaton Parser automaton.
   */
  public void setParserAutomaton(Automaton automaton)
  {
    this.automaton = automaton;
    this.grammar = automaton.getGrammar();
  }

  /**
   * Set the <code>ContentHandler</code> that will receive XML data.
   */
  public void setContentHandler(ContentHandler handler)
  {
    this.contentHandler = handler;
  }

  /**
   * Set the <code>LexicalHandler</code> that will receive XML data.
   */
  public void setLexicalHandler(LexicalHandler handler)
  {
    this.lexicalHandler = handler;
  }

  /**
   * Provide processor with a log.
   *
   * @param log The log.
   */
  public void setLog(Log log)
  {
    this.log = log;
  }

  /**
   * If the adapter should produce a more flatten XML hirachy, which means elements which the same
   * name will be collapsed
   *
   * @param flatten True, if a more flatten hirachy should be produced.
   */
  public void setFlatten(boolean flatten)
  {
    this.flatten = flatten;
  }

  /**
   * Receive an object for locating the origin of SAX document events.
   *
   * @param locator
   */
  public void setDocumentLocator(Locator locator)
  {
    this.locator = locator;

    if (locator!=null)
    {
      this.locatorImpl = new LocatorImpl(locator);
      contentHandler.setDocumentLocator(locatorImpl);
    }
  }

  /**
   * Receive notification of the beginning of a document.
   *
   * @throws SAXException
   */
  public void startDocument() throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());
    contentHandler.startDocument();
    state = STATE_OUTER;
  }

  /**
   * Receive notification of the beginning of an element.
   *
   * @param namespaceURI
   * @param localName
   * @param qName
   * @param atts
   *
   * @throws SAXException
   */
  public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
    throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    if (state==STATE_OUTER)
    {
      if ((namespaceURI!=null) && (namespaceURI.equals(NS)) && (localName.equals(LEXEMES)))
      {
        processStartDocument();
        state = STATE_LEXEMES;
      }
      else
        contentHandler.startElement(namespaceURI, localName, qName, atts);
    }
    else if (state==STATE_LEXEMES)
    {
      if ((namespaceURI!=null) && (namespaceURI.equals(NS)) && (localName.equals(LEXEME)))
      {
        processLexeme(atts.getValue("symbol"), atts.getValue("text"));
        state = STATE_LEXEME;
      }
      else
        throw new SAXException("Unexpected start element.");
    }
    else if (state==STATE_LEXEME)
      throw new SAXException("Unexpected start element.");
  }

  /**
   * Receive notification of the end of an element.
   *
   * @param namespaceURI
   * @param localName
   * @param qName
   *
   * @throws SAXException
   */
  public void endElement(String namespaceURI, String localName, String qName)
    throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    if (state==STATE_OUTER)
      contentHandler.endElement(namespaceURI, localName, qName);
    else if (state==STATE_LEXEMES)
    {
      if ((namespaceURI!=null) && (namespaceURI.equals(NS)) && (localName.equals(LEXEMES)))
      {
        contentHandler.startPrefixMapping("", NS_OUTPUT);
        contentHandler.startElement(NS_OUTPUT, OUTPUT, OUTPUT, new AttributesImpl());

        processEndDocument();

        contentHandler.endElement(NS_OUTPUT, OUTPUT, OUTPUT);
        contentHandler.endPrefixMapping("");

        state = STATE_OUTER;
      }
      else
        throw new SAXException("Unexpected end element.");
    }
    else if (state==STATE_LEXEME)
      state = STATE_LEXEMES;
  }

  /**
   * Receive notification of character data.
   *
   * @param ch
   * @param start
   * @param length
   *
   * @throws SAXException
   */
  public void characters(char[] ch, int start, int length)
    throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    if (state==STATE_OUTER)
      contentHandler.characters(ch, start, length);
  }

  /**
   * Receive notification of ignorable whitespace in element content.
   *
   * @param ch
   * @param start
   * @param length
   *
   * @throws SAXException
   */
  public void ignorableWhitespace(char[] ch, int start, int length)
    throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    if (state==STATE_OUTER)
      contentHandler.ignorableWhitespace(ch, start, length);
  }

  /**
   * Begin the scope of a prefix-URI Namespace mapping.
   *
   * @param prefix
   * @param uri
   *
   * @throws SAXException
   */
  public void startPrefixMapping(String prefix, String uri)
    throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    contentHandler.startPrefixMapping(prefix, uri);
  }

  /**
   * End the scope of a prefix-URI mapping.
   *
   * @param prefix
   *
   * @throws SAXException
   */
  public void endPrefixMapping(String prefix) throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    contentHandler.endPrefixMapping(prefix);
  }

  /**
   * Receive notification of a processing instruction.
   *
   * @param target
   * @param data
   *
   * @throws SAXException
   */
  public void processingInstruction(String target, String data)
    throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    if (state==STATE_OUTER)
      contentHandler.processingInstruction(target, data);
  }

  /**
   * Receive notification of a skipped entity.
   *
   * @param name
   *
   * @throws SAXException
   */
  public void skippedEntity(String name) throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    if (state==STATE_OUTER)
      contentHandler.skippedEntity(name);
  }

  /**
   * Receive notification of the end of a document.
   *
   * @throws SAXException
   */
  public void endDocument() throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    if (state==STATE_OUTER)
      contentHandler.endDocument();
  }

  /**
   * Report the start of DTD declarations, if any.
   */
  public void startDTD(String name, String publicId, String systemId)
    throws SAXException
  {
    if (lexicalHandler!=null)
      lexicalHandler.startDTD(name, publicId, systemId);
  }

  /**
   * Report the end of DTD declarations.
   */
  public void endDTD() throws SAXException
  {
    if (lexicalHandler!=null)
      lexicalHandler.endDTD();
  }

  /**
   * Report the beginning of an entity.
   */
  public void startEntity(String name) throws SAXException
  {
    if (lexicalHandler!=null)
      lexicalHandler.startEntity(name);
  }

  /**
   * Report the end of an entity.
   */
  public void endEntity(String name) throws SAXException
  {
    if (lexicalHandler!=null)
      lexicalHandler.endEntity(name);
  }

  /**
   * Report the start of a CDATA section.
   */
  public void startCDATA() throws SAXException
  {
    if (lexicalHandler!=null)
      lexicalHandler.startCDATA();
  }

  /**
   * Report the end of a CDATA section.
   */
  public void endCDATA() throws SAXException
  {
    if (lexicalHandler!=null)
      lexicalHandler.endCDATA();
  }

  /**
   * Report an XML comment anywhere in the document.
   */
  public void comment(char[] ch, int start, int len) throws SAXException
  {
    if (lexicalHandler!=null)
      lexicalHandler.comment(ch, start, len);
  }

  /**
   * Receives the notification, that the lexical processor starts reading a new document.
   *
   * @throws Exception If a exception occurs.
   */
  private void processStartDocument()
  {
    current.clear();
    current.push(new StateNode(automaton.getState(0), null, null));
    next.clear();

    count = 0;

    System.out.println("Automaton:\n"+automaton);

    //handler.handleStartDocument();
  }

  private static int count = 0;

  /**
   * Receives the notification, that the lexical processor has recognized a lexeme.
   *
   * @param symbol Symbol of the lexeme.
   * @param text Recognized text.
   *
   * @throws Exception If a exception occurs.
   */
  private void processLexeme(String symbolname, String text)
  {
    Terminal symbol = new Terminal(symbolname);

    System.out.println("\n===================================\nProcess "+symbolname);

    if (current.isEmpty())
      throw new IllegalStateException("Parsing process is aborted");

    System.out.println("Current states");

    for (int i = 0; i<current.size(); i++)
      System.out.println(current.get(i));

    System.out.println();

    if (current.size()>maxActiveStates)
      throw new IllegalStateException("Processor occupied too many states");

    /* ============================ Reduce =================================== */
    int watchdog = 0;

    while (!current.isEmpty())
    {
      if (watchdog++>20)
        throw new IllegalStateException("overflow");

      StateNode statenode = (StateNode)current.pop();

      next.push(statenode);

      ReduceAction[] reduceactions = statenode.state.getReduceActions();

      if (reduceactions.length>0)
      {
        for (int i = 0; i<reduceactions.length; i++)
        {
          Production production = reduceactions[i].production;

          if ((log!=null) && (log.isDebugEnabled()))
            log.debug(
            /*"State "+node.state+*/
            " reduce "+production.getSymbol());

          /*+
                                   " ("+production+")");*/
          ProductionNode productionnode = new ProductionNode(production);
          TreeNode[] descendants = new TreeNode[production.getDefinition().getSymbolCount()];

          StateNode ancestor = statenode;

          for (int j = production.getDefinition().getSymbolCount()-1; j>=0; j--)
          {
            descendants[j] = ancestor.treenode;
            ancestor = ancestor.ancestor;
          }

          productionnode.descendants = descendants;

          if (descendants.length>0)
          {
            productionnode.linenumber = descendants[0].linenumber;
            productionnode.columnnumber = descendants[0].columnnumber;
          }
          else
          {
            productionnode.linenumber = locator.getLineNumber();
            productionnode.columnnumber = locator.getColumnNumber();
          }

          ShiftAction shiftaction = ancestor.state.getShiftAction(productionnode.symbol);

          if (shiftaction!=null)
          {
            StateNode newstatenode = getStateNode(current, shiftaction.state, ancestor);

            if (newstatenode==null)
            {
              System.out.println("new state node: new state="+automaton.indexOf(shiftaction.state)+
                                 " ancestor state="+automaton.indexOf(ancestor.state));
              newstatenode = new StateNode(shiftaction.state, ancestor, productionnode);
              current.push(newstatenode);
            }
            else
            {
              System.out.println("merging state node");

              ProductionNode oldproductionnode = (ProductionNode)newstatenode.treenode;

              if (grammar.getPriority(oldproductionnode.production)>grammar.getPriority(production))
              {
                System.out.println("priority("+production+") < priority("+
                                   oldproductionnode.production+")");
                newstatenode.treenode = productionnode;
              }
              else
                System.out.println("priority("+production+") >= priority("+
                                   oldproductionnode.production+")");
            }
          }
        }
      }
    }

    Stack dummy = next;
    next = current;
    current = dummy;

    System.out.println("Current states");

    for (int i = 0; i<current.size(); i++)
      System.out.println(current.get(i));

    System.out.println();

    /* ==================================== Shift  =================================== */
    TokenNode tokennode = new TokenNode(symbol, text);

    if (locator!=null)
    {
      tokennode.linenumber = locator.getLineNumber();
      tokennode.columnnumber = locator.getColumnNumber();
    }

    while (!current.isEmpty())
    {
      StateNode statenode = (StateNode)current.pop();

      ShiftAction shiftaction = statenode.state.getShiftAction(symbol);

      if (shiftaction!=null)
      {
        if ((log!=null) && (log.isDebugEnabled()))
          log.debug(
          /*"State "+state+*/
          " shift token "+symbolname+" ("+symbol+")");

        next.push(new StateNode(shiftaction.state, statenode, tokennode));
      }
    }

    if (next.isEmpty())
      throw new IllegalArgumentException("Token "+symbolname+" is not expected in this state");

    dummy = next;
    next = current;
    current = dummy;

    System.out.println("Current states");

    for (int i = 0; i<current.size(); i++)
      System.out.println(current.get(i));

    System.out.println();
  }

  /**
   * Receives the notification, that the lexical processor accepted the complete document, and
   * stops with reading.
   *
   * @throws Exception If a exception occurs.
   */
  private void processEndDocument() throws SAXException
  {
    System.out.println("\n===================================\nProcess EOF");

    while (!current.isEmpty())
    {
      StateNode statenode = (StateNode)current.pop();

      ReduceAction[] reduceactions = statenode.state.getReduceActions();

      if (reduceactions.length>0)
      {
        for (int i = 0; i<reduceactions.length; i++)
        {
          Production production = reduceactions[i].production;

          ProductionNode productionnode = new ProductionNode(production);
          TreeNode[] descendants = new TreeNode[production.getDefinition().getSymbolCount()];

          StateNode ancestor = statenode;

          for (int j = production.getDefinition().getSymbolCount()-1; j>=0; j--)
          {
            descendants[j] = ancestor.treenode;
            ancestor = ancestor.ancestor;
          }

          productionnode.descendants = descendants;

          ShiftAction shiftaction = ancestor.state.getShiftAction(productionnode.symbol);

          //System.out.println("current state:\n"+ancestor.state+"\ntransition for "+productionnode.symbol+" = "+shiftaction);
          if ((automaton.getState(0)==ancestor.state) &&
              (productionnode.symbol.equals(grammar.getStartSymbol())))
          {
            if ((log!=null) && (log.isDebugEnabled()))
              log.debug("State "+state+" accept");

            StateNode newstatenode = getStateNode(next, null, ancestor);

            if (newstatenode==null)
            {
              newstatenode = new StateNode(null, ancestor, productionnode);
              next.push(newstatenode);
            }
            else
            {
              System.out.println("merging state node");

              ProductionNode oldproductionnode = (ProductionNode)newstatenode.treenode;

              if (grammar.getPriority(oldproductionnode.production)>grammar.getPriority(production))
              {
                System.out.println("priority("+production+") < priority("+
                                   oldproductionnode.production+")");
                newstatenode.treenode = productionnode;
              }
              else
                System.out.println("priority("+production+") >= priority("+
                                   oldproductionnode.production+")");
            }
          }
          else
          {
            if ((log!=null) && (log.isDebugEnabled()))
              log.debug(
              /*"State "+node.state+*/
              " reduce "+production.getSymbol()+" ("+production+")");

            /*          StateNode newstatenode = new
            StateNode(ancestor.state.getShiftAction(productionnode.symbol).state, ancestor, productionnode);

                      current.push(newstatenode);*/
            StateNode newstatenode = getStateNode(current, shiftaction.state, ancestor);

            if (newstatenode==null)
            {
              newstatenode = new StateNode(shiftaction.state, ancestor, productionnode);
              current.push(newstatenode);
            }
            else
            {
              System.out.println("merging state node");

              ProductionNode oldproductionnode = (ProductionNode)newstatenode.treenode;

              if (grammar.getPriority(oldproductionnode.production)>grammar.getPriority(production))
              {
                System.out.println("priority("+production+") < priority("+
                                   oldproductionnode.production+")");
                newstatenode.treenode = productionnode;
              }
              else
                System.out.println("priority("+production+") >= priority("+
                                   oldproductionnode.production+")");
            }
          }

          System.out.println("Current states");

          for (int k = 0; k<current.size(); k++)
            System.out.println(current.get(k));

          System.out.println();
        }
      }
    }

    if (log.isDebugEnabled())
      log.debug("Parser found "+next.size()+" alternatives");

    System.out.println();

    int index = 1;

    while (!next.isEmpty())
    {
      StateNode state = (StateNode)next.pop();

      //System.out.println(index+". result: "+((StateNode)next.pop()).treenode);
      fireEvents(null, state.treenode);
      index++;
    }

    if (next.size()>1)
      log.warn("Grammar is ambig, found "+next.size()+" alternative trees");
  }

  private StateNode getStateNode(Stack stack, State state, StateNode ancestor)
  {
    StateNode statenode = null;

    for (int j = 0; j<stack.size(); j++)
    {
      statenode = (StateNode)stack.get(j);

      if ((statenode.ancestor==ancestor) && (statenode.state==state))
        return statenode;
    }

    return null;
  }

  /**
   * Fire the SAX events by traverseing the hirachy.
   *
   * @param parent Parent node.
   * @param node Current node.
   *
   * @throws Exception If an exception occurs.
   */
  private void fireEvents(ProductionNode parent, TreeNode node)
    throws SAXException
  {
    if (node instanceof ProductionNode)
    {
      ProductionNode production = (ProductionNode)node;

      if (locatorImpl!=null)
      {
        locatorImpl.setLineNumber(production.linenumber);
        locatorImpl.setColumnNumber(production.columnnumber);
      }

      if ((!flatten) || (parent==null) || (!parent.symbol.equals(production.symbol)))
        contentHandler.startElement(NS_OUTPUT, production.symbol.getName(),
                                    production.symbol.getName(), new AttributesImpl());

      for (int i = 0; i<production.descendants.length; i++)
        fireEvents(production, production.descendants[i]);

      if ((!flatten) || (parent==null) || (!parent.symbol.equals(production.symbol)))
        contentHandler.endElement(NS_OUTPUT, production.symbol.getName(),
                                  production.symbol.getName());
    }
    else
    {
      TokenNode token = (TokenNode)node;

      if (locatorImpl!=null)
      {
        locatorImpl.setLineNumber(token.linenumber);
        locatorImpl.setColumnNumber(token.columnnumber);
      }

      contentHandler.startElement(NS_OUTPUT, token.symbol.getName(), token.symbol.getName(),
                                  new AttributesImpl());
      contentHandler.characters(token.text.toCharArray(), 0, token.text.length());
      contentHandler.endElement(NS_OUTPUT, token.symbol.getName(), token.symbol.getName());
    }
  }

  private class StateNode
  {
    public StateNode(State state, StateNode ancestor, TreeNode treenode)
    {
      this.state = state;
      this.treenode = treenode;
      this.ancestor = ancestor;
    }

    public State state = null;
    public StateNode ancestor = null;
    public TreeNode treenode = null;

    public String toString()
    {
      StringBuffer buffer = new StringBuffer();

      if (ancestor!=null)
      {
        buffer.append(ancestor.toString());
        buffer.append(" <- ");
      }

      buffer.append("<");
      buffer.append(automaton.indexOf(state));

      /*buffer.append(",");
      if (ancestor!=null)
        buffer.append(automaton.indexOf(ancestor.state));*/
      buffer.append(">");

      return buffer.toString();
    }
  }

  private abstract class TreeNode
  {
    public Symbol symbol = null;
    public int linenumber = 1;
    public int columnnumber = 1;
  }

  private class TokenNode extends TreeNode
  {
    public TokenNode(Terminal symbol, String text)
    {
      this.symbol = symbol;
      this.text = text;
    }

    public String text = null;

    public String toString()
    {
      StringBuffer buffer = new StringBuffer();

      buffer.append("{");
      buffer.append(symbol);
      buffer.append(":");
      buffer.append(text);
      buffer.append("}");

      return buffer.toString();
    }
  }

  private class ProductionNode extends TreeNode
  {
    /*public ProductionNode(Nonterminal symbol)
    {
      this.symbol = symbol;
    }*/
    public ProductionNode(Production production)
    {
      this.production = production;
      this.symbol = production.getSymbol();
    }

    public Production production = null;
    public TreeNode[] descendants = null;

    public String toString()
    {
      StringBuffer buffer = new StringBuffer();

      buffer.append("{");
      buffer.append(symbol);
      buffer.append(":");

      for (int i = 0; i<descendants.length; i++)
        buffer.append(descendants[i].toString());

      buffer.append("}");

      return buffer.toString();
    }
  }
}
TOP

Related Classes of net.sourceforge.chaperon.process.GeneralParserProcessor$TreeNode

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.