Package net.sourceforge.chaperon.process.extended

Source Code of net.sourceforge.chaperon.process.extended.ExtendedDirectParserProcessor

/*
*  Copyright (C) Chaperon. All rights reserved.
*  -------------------------------------------------------------------------
*  This software is published under the terms of the Apache Software License
*  version 1.1, a copy of which has been included  with this distribution in
*  the LICENSE file.
*/

package net.sourceforge.chaperon.process.extended;

import net.sourceforge.chaperon.common.Decoder;
import net.sourceforge.chaperon.model.Violations;
import net.sourceforge.chaperon.model.extended.ExtendedGrammar;
import net.sourceforge.chaperon.model.extended.Pattern;
import net.sourceforge.chaperon.model.extended.PatternIterator;

import org.apache.commons.logging.Log;

import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.LocatorImpl;

import java.util.Stack;

/**
* This class represents a simulation of a pushdown automata using the parser automaton class.
*
* @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
* @version CVS $Id: ExtendedDirectParserProcessor.java,v 1.12 2004/01/09 10:34:51 benedikta Exp $
*/
public class ExtendedDirectParserProcessor implements ContentHandler, LexicalHandler
{
  private static final String NS = "http://chaperon.sourceforge.net/schema/text/1.0";
  private static final String TEXT = "text";

  /** Namespace for the generated SAX events. */
  public static final String NS_OUTPUT = "http://chaperon.sourceforge.net/schema/syntaxtree/2.0";
  private static final String OUTPUT = "output";

  // SAX variables
  private ContentHandler contentHandler = null;
  private LexicalHandler lexicalHandler = null;
  private Locator locator = null;
  private LocatorImpl locatorImpl = null;

  // State of SAX events
  private static final int STATE_OUTER = 0;
  private static final int STATE_INNER = 1;
  private int state = STATE_OUTER;

  // Help variables
  private ExtendedGrammar grammar;
  private boolean flatten = false;
  private StackNodeSet current = new StackNodeSet();
  private StackNodeSet next = new StackNodeSet();
  private Log log;
  private StackNodeList root;
  private int line = 1;
  private int column = 1;
  private static final int MAXWATCHDOG = 1000;

  /**
   * Create a new parser processor.
   */
  public ExtendedDirectParserProcessor() {}

  /**
   * Create a new parser processor.
   *
   * @param automaton Parser automaton, which the processor should ues.
   * @param handler Handler, which should receives the parser events.
   * @param log Log, which should used.
   */
  public ExtendedDirectParserProcessor(ExtendedGrammar grammar, Log log)
  {
    setExtendedGrammar(grammar);
    this.log = log;
  }

  /**
   * Set the parser automaton for the processor.
   *
   * @param automaton Parser automaton.
   */
  public void setExtendedGrammar(ExtendedGrammar grammar)
  {
    this.grammar = grammar;

    Violations violations = grammar.validate();

    if ((violations!=null) && (violations.getViolationCount()>0))
      throw new IllegalArgumentException("Grammar is not valid: "+violations.getViolation(0));

    if ((log!=null) && (log.isDebugEnabled()))
      log.debug("grammar:\n"+grammar);

    grammar.update();

    if ((log!=null) && (log.isDebugEnabled()))
    {
      StringBuffer buffer = new StringBuffer();
      buffer.append("Successors:\n");
      for(PatternIterator i=grammar.getAllPattern().getPattern(); i.hasNext();)
      {
        Pattern pattern = i.next();
        if (pattern.getSuccessors().hasNext())
        {
          buffer.append(pattern+"->{");
          for(PatternIterator j=pattern.getSuccessors(); j.hasNext();)
          {
            buffer.append(j.next());
            if (j.hasNext())
              buffer.append(",");
          }
          buffer.append("}\n");
        }
      }

      buffer.append("\nAscending successors:\n");
      for(PatternIterator i=grammar.getAllPattern().getPattern(); i.hasNext();)
      {
        Pattern pattern = i.next();
        if (pattern.getAscendingSuccessors().hasNext())
        {
          buffer.append(pattern+"->{");
          for(PatternIterator j=pattern.getAscendingSuccessors(); j.hasNext();)
          {
            buffer.append(j.next());
            if (j.hasNext())
              buffer.append(",");
          }
          buffer.append("}\n");
        }
      }

      buffer.append("\nDescending successors:\n");
      for(PatternIterator i=grammar.getAllPattern().getPattern(); i.hasNext();)
      {
        Pattern pattern = i.next();
        if (pattern.getDescendingSuccessors().hasNext())
        {
          buffer.append(pattern+"->{");
          for(PatternIterator j=pattern.getDescendingSuccessors(); j.hasNext();)
          {
            buffer.append(j.next());
            if (j.hasNext())
              buffer.append(",");
          }
          buffer.append("}\n");
        }
      }
      log.debug(buffer.toString());
    }
  }

  /**
   * Set the <code>ContentHandler</code> that will receive XML data.
   */
  public void setContentHandler(ContentHandler handler)
  {
    this.contentHandler = handler;
  }

  /**
   * Set the <code>LexicalHandler</code> that will receive XML data.
   */
  public void setLexicalHandler(LexicalHandler handler)
  {
    this.lexicalHandler = handler;
  }

  /**
   * Provide processor with a log.
   *
   * @param log The log.
   */
  public void setLog(Log log)
  {
    this.log = log;
  }

  /**
   * If the adapter should produce a more flatten XML hirachy, which means elements which the same
   * name will be collapsed
   *
   * @param flatten True, if a more flatten hirachy should be produced.
   */
  public void setFlatten(boolean flatten)
  {
    this.flatten = flatten;
  }

  /**
   * Receive an object for locating the origin of SAX document events.
   */
  public void setDocumentLocator(Locator locator)
  {
    this.locator = locator;

    if (locator!=null)
    {
      this.locatorImpl = new LocatorImpl(locator);
      contentHandler.setDocumentLocator(locatorImpl);
    }
  }

  /**
   * Receive notification of the beginning of a document.
   */
  public void startDocument() throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());
    contentHandler.startDocument();
    state = STATE_OUTER;
  }

  /**
   * Receive notification of the beginning of an element.
   */
  public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
    throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    if (state==STATE_INNER)
      throw new SAXException("Unexpected element "+qName);

    if (state==STATE_OUTER)
    {
      if ((namespaceURI!=null) && (namespaceURI.equals(NS)))
      {
        if (!localName.equals(TEXT))
          throw new SAXException("Unknown element "+qName);
      }
      else
      {
        contentHandler.startElement(namespaceURI, localName, qName, atts);
        return;
      }
    }

    state = STATE_INNER;

    // ======================= Start Text Document =======================
    current.clear();
    current.push(new TerminalStackNode(null, 0, grammar.getStartPattern(), null));
    next.clear();
    line = 1;
    column = 1;
  }

  /**
   * Receive notification of character data.
   */
  public void characters(char[] text, int textstart, int textlength)
    throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    if (state==STATE_OUTER)
    {
      contentHandler.characters(text, textstart, textlength);

      return;
    }

    for (int position = textstart; position<(textstart+textlength); position++)
    {
      if ((log!=null) && (log.isDebugEnabled()))
        log.debug("===================================\nProcess "+Decoder.toChar(text[position]));

      if (current.isEmpty())
        throw new IllegalStateException("Parsing process is aborted");

      if ((log!=null) && (log.isDebugEnabled()))
        log.debug(getStatesAsString());

      while (!current.isEmpty())
      {
        StackNode node = current.pop();

        for (PatternIterator nextPattern = node.pattern.getDescendingSuccessors();
             nextPattern.hasNext();)
          if (nextPattern.next().contains(text[position]))
          {
            reduce(node.pattern.getDefinition().getSymbol(), node, null);
            break;
          }

        reduceEmpty(node);

        shift(node, text, position);

        if ((current.watchdog>MAXWATCHDOG) || (next.watchdog>MAXWATCHDOG))
        {
          if ((log!=null) && (log.isInfoEnabled()))
            log.info(getStatesAsString());
          throw new IllegalStateException("Aborted parsing because of a high ambiguous grammar"+
                                          " ["+line+":"+column+"]");
        }
      }

      if ((log!=null) && (log.isDebugEnabled()))
        log.debug(getStatesAsString());

      if (next.isEmpty())
      {
        if ((log!=null) && (log.isInfoEnabled()))
          log.info(getStatesAsString());
        throw new IllegalArgumentException("Character '"+text[position]+"' is not expected"+" ["+
                                           line+":"+column+"]");
      }

      swapStacks();

      increasePosition(text, position, position+1);
    }
  }

  /**
   * Receive notification of the end of an element.
   */
  public void endElement(String namespaceURI, String localName, String qName)
    throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    if (state==STATE_OUTER)
    {
      contentHandler.endElement(namespaceURI, localName, qName);
      return;
    }

    if (state==STATE_INNER)
    {
      if ((namespaceURI!=null) && (namespaceURI.equals(NS)))
      {
        if (!localName.equals(TEXT))
          throw new SAXException("Unknown element "+qName);
      }
      else
        throw new SAXException("Unexpected element "+qName);
    }

    state = STATE_OUTER;

    // ======================= End Text Document =======================
    if ((log!=null) && (log.isDebugEnabled()))
      log.debug("===================================\nProcess end of text");

    root = null;

    Pattern eot = grammar.getEndPattern();

    while (!current.isEmpty())
    {
      StackNode node = current.pop();

      for (PatternIterator nextPattern = node.pattern.getDescendingSuccessors();
           nextPattern.hasNext();)
        if (nextPattern.next()==eot)
        {
          reduce(node.pattern.getDefinition().getSymbol(), node, null);
          break;
        }

      reduceEmpty(node);

      if ((current.watchdog>MAXWATCHDOG) || (next.watchdog>MAXWATCHDOG))
      {
        if ((log!=null) && (log.isInfoEnabled()))
          log.info(getStatesAsString());
        throw new IllegalStateException("Aborted parsing because of a high ambiguous grammar"+" ["+
                                        line+":"+column+"]");
      }
    }

    if ((log!=null) && (log.isDebugEnabled()))
      log.debug(getStatesAsString());

    if (root==null)
    {
      if ((log!=null) && (log.isInfoEnabled()))
        log.info(getStatesAsString());
      throw new IllegalStateException("Unexpected end of text"+" ["+line+":"+column+"]");
    }

    fireEvents();
  }

  /**
   * Receive notification of ignorable whitespace in element content.
   */
  public void ignorableWhitespace(char[] ch, int start, int length)
    throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    if (state==STATE_OUTER)
      contentHandler.ignorableWhitespace(ch, start, length);
  }

  /**
   * Begin the scope of a prefix-URI Namespace mapping.
   */
  public void startPrefixMapping(String prefix, String uri)
    throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    contentHandler.startPrefixMapping(prefix, uri);
  }

  /**
   * End the scope of a prefix-URI mapping.
   */
  public void endPrefixMapping(String prefix) throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    contentHandler.endPrefixMapping(prefix);
  }

  /**
   * Receive notification of a processing instruction.
   */
  public void processingInstruction(String target, String data)
    throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    if (state==STATE_OUTER)
      contentHandler.processingInstruction(target, data);
  }

  /**
   * Receive notification of a skipped entity.
   */
  public void skippedEntity(String name) throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    if (state==STATE_OUTER)
      contentHandler.skippedEntity(name);
  }

  /**
   * Receive notification of the end of a document.
   */
  public void endDocument() throws SAXException
  {
    locatorImpl.setLineNumber(locator.getLineNumber());
    locatorImpl.setColumnNumber(locator.getColumnNumber());

    contentHandler.endDocument();
  }

  /**
   * Report the start of DTD declarations, if any.
   */
  public void startDTD(String name, String publicId, String systemId)
    throws SAXException
  {
    lexicalHandler.startDTD(name, publicId, systemId);
  }

  /**
   * Report the end of DTD declarations.
   */
  public void endDTD() throws SAXException
  {
    lexicalHandler.endDTD();
  }

  /**
   * Report the beginning of an entity.
   */
  public void startEntity(String name) throws SAXException
  {
    if (lexicalHandler!=null)
      lexicalHandler.startEntity(name);
  }

  /**
   * Report the end of an entity.
   */
  public void endEntity(String name) throws SAXException
  {
    if (lexicalHandler!=null)
      lexicalHandler.endEntity(name);
  }

  /**
   * Report the start of a CDATA section.
   */
  public void startCDATA() throws SAXException
  {
    if (lexicalHandler!=null)
      lexicalHandler.startCDATA();
  }

  /**
   * Report the end of a CDATA section.
   */
  public void endCDATA() throws SAXException
  {
    if (lexicalHandler!=null)
      lexicalHandler.endCDATA();
  }

  /**
   * Report an XML comment anywhere in the document.
   */
  public void comment(char[] ch, int start, int len) throws SAXException
  {
    if (lexicalHandler!=null)
      lexicalHandler.comment(ch, start, len);
  }

  private String getStatesAsString()
  {
    StringBuffer buffer = new StringBuffer();
    buffer.append("current:\n");
    buffer.append(current);
    buffer.append(current.dump());
    buffer.append("Count of states:");
    buffer.append(current.size());
    buffer.append("\nnext:\n");
    buffer.append(next);
    buffer.append(next.dump());
    buffer.append("Count of states:");
    buffer.append(next.size());
    return buffer.toString();
  }

  private void swapStacks()
  {
    StackNodeSet dummy = next;
    next = current;
    current = dummy;
    next.clear();
  }

  private void shift(StackNode node, char[] text, int position)
  {
    for (PatternIterator i = node.pattern.getSuccessors(); i.hasNext();)
    {
      Pattern nextPattern = i.next();

      if (nextPattern.contains(text[position]))
      {
        if (node instanceof NonterminalStackNode)
        {
          for (PatternIterator j = node.last.pattern.getSuccessors(); j.hasNext();)
            if (j.next().contains(text[position]))
              return;

          for (PatternIterator j = node.last.pattern.getAscendingSuccessors(); j.hasNext();)
            if (j.next().contains(text[position]))
              return;
        }

        StackNode newNode = new TerminalStackNode(text, position, nextPattern, node);

        if ((log!=null) && (log.isDebugEnabled()))
          log.debug("shift "+newNode);

        next.push(newNode);
      }
    }

    for (PatternIterator i = node.pattern.getAscendingSuccessors(); i.hasNext();)
    {
      Pattern firstPattern = i.next();

      if (firstPattern.contains(text[position]))
      {
        if (node instanceof NonterminalStackNode)
        {
          for (PatternIterator j = node.last.pattern.getSuccessors(); j.hasNext();)
            if (j.next().contains(text[position]))
              return;

          for (PatternIterator j = node.last.pattern.getAscendingSuccessors(); j.hasNext();)
            if (j.next().contains(text[position]))
              return;
        }

        StackNode newNode = new TerminalStackNode(text, position, firstPattern, node);

        if ((log!=null) && (log.isDebugEnabled()))
          log.debug("shift "+newNode);

        next.push(newNode);
      }
    }
  }

  private void reduce(String symbol, StackNode node, StackNodeList list)
  {
    if (node.sibling!=null)
      reduce(symbol, node.sibling, list);
    list = new StackNodeList(node, list);
    while(node.ancestor.pattern.hasSuccessor(node.pattern))
    {
      node = node.ancestor;
      if (node.sibling!=null)
        reduce(symbol, node.sibling, list);
      list = new StackNodeList(node, list);
    }

    for (PatternIterator i = node.ancestor.pattern.getSuccessors(); i.hasNext();)
    {
      Pattern nextPattern = i.next();
      if (symbol.equals(nextPattern.getSymbol()))
      {
        StackNode newNode = new NonterminalStackNode(list, nextPattern, node.ancestor);
                                                                                                                                                            
        if ((log!=null) && (log.isDebugEnabled()))
          log.debug("reduce "+newNode+" with "+list);
                                                                                                                                                            
        current.push(newNode);
      }
    }
                                                                                                                                                            
    for (PatternIterator i = node.ancestor.pattern.getAscendingSuccessors(); i.hasNext();)
    {
      Pattern firstPattern = i.next();
      if (symbol.equals(firstPattern.getSymbol()))
      {
        StackNode newNode = new NonterminalStackNode(list, firstPattern, node.ancestor);
                                                                                                                                                            
        if ((log!=null) && (log.isDebugEnabled()))
          log.debug("reduce "+newNode+" with "+list);
                                                                                                                                                            
        current.push(newNode);
      }
    }
                                                                                                                                                            
    if ((root==null) && (node.ancestor.pattern==grammar.getStartPattern()) &&
        (symbol.equals(grammar.getStartSymbol())))
    {
      root = list;
                                                                                                                                                            
      if ((log!=null) && (log.isDebugEnabled()))
        log.debug("accept "+symbol+" with "+list);
    }
  }

  private void reduceEmpty(StackNode node)
  {
    for (PatternIterator i = node.pattern.getSuccessors(); i.hasNext();)
    {
      Pattern nextPattern = i.next();
      if ((nextPattern.getSymbol()!=null) && (grammar.isNullable(nextPattern.getSymbol())))
      {
        StackNode newNode = new NonterminalStackNode(null, nextPattern, node);

        if ((log!=null) && (log.isDebugEnabled()))
          log.debug("reduce "+newNode);

        current.push(newNode);
      }
    }

    for (PatternIterator i = node.pattern.getAscendingSuccessors(); i.hasNext();)
    {
      Pattern firstPattern = i.next();
      if ((firstPattern.getSymbol()!=null) && (grammar.isNullable(firstPattern.getSymbol())))
      {
        // TODO: check for empty elements, which can occur in the ascending successors
        if (firstPattern==node.pattern)
        {
          //System.out.println("prevent empty element "+firstPattern);
          continue;
        }

        StackNode newNode = new NonterminalStackNode(null, firstPattern, node);

        if ((log!=null) && (log.isDebugEnabled()))
          log.debug("reduce "+newNode);

        current.push(newNode);
      }
    }
  }

  private void increasePosition(char[] text, int position, int lastposition)
  {
    for (int i = position; i<lastposition; i++)
    {
      if (text[i]=='\n')
      {
        column = 1;
        line++;
      }
      else if ((text[i]=='\r') && ((i==(text.length-1)) || (text[i+1]!='\n')))
      {
        column = 1;
        line++;
      }
      else
        column++;
    }
  }

  private void fireEvents() throws SAXException
  {
    contentHandler.startPrefixMapping("", NS_OUTPUT);
    contentHandler.startElement(NS_OUTPUT, OUTPUT, OUTPUT, new AttributesImpl());

    String symbol = grammar.getStartSymbol();
    contentHandler.startElement(NS_OUTPUT, symbol, symbol, new AttributesImpl());

    Stack stack = new Stack();
    StackNodeList next = root;
    char[] text = null;
    int position = 0;
    int lastposition = 0;
    line = 1;
    column = 1;

    if (locatorImpl!=null)
    {
      locatorImpl.setLineNumber(line);
      locatorImpl.setColumnNumber(column);
    }

    while (next!=null)
    {
      if (next.node instanceof NonterminalStackNode)
      {
        if (text!=null)
        {
          contentHandler.characters(text, position, (lastposition+1)-position);
          increasePosition(text, position, (lastposition+1)-position);

          if (locatorImpl!=null)
          {
            locatorImpl.setLineNumber(line);
            locatorImpl.setColumnNumber(column);
          }

          text = null;
        }

        NonterminalStackNode nonterminal = (NonterminalStackNode)next.node;

        AttributesImpl atts = new AttributesImpl();

        /*if (localizable)
        {
          atts.addAttribute("", "line", "line", "CDATA", String.valueOf(next.linenumber));
          atts.addAttribute("", "column", "column", "CDATA", String.valueOf(next.columnnumber));
        }*/
        contentHandler.startElement(NS_OUTPUT, next.node.pattern.getSymbol(),
                                    next.node.pattern.getSymbol(), atts);
        stack.push(next);
        next = nonterminal.definition;
      }
      else
      {
        TerminalStackNode terminal = (TerminalStackNode)next.node;
        if (text==null)
        {
          text = terminal.text;
          position = terminal.position;
        }
        else if (text!=terminal.text)
        {
          contentHandler.characters(text, position, (lastposition+1)-position);
          increasePosition(text, position, (lastposition+1)-position);

          if (locatorImpl!=null)
          {
            locatorImpl.setLineNumber(line);
            locatorImpl.setColumnNumber(column);
          }

          text = terminal.text;
          position = terminal.position;
        }

        lastposition = terminal.position;

        next = next.next;
      }

      while ((next==null) && (!stack.isEmpty()))
      {
        next = (StackNodeList)stack.pop();

        if (text!=null)
        {
          contentHandler.characters(text, position, (lastposition+1)-position);
          increasePosition(text, position, (lastposition+1)-position);

          if (locatorImpl!=null)
          {
            locatorImpl.setLineNumber(line);
            locatorImpl.setColumnNumber(column);
          }

          text = null;
        }

        contentHandler.endElement(NS_OUTPUT, next.node.pattern.getSymbol(),
                                  next.node.pattern.getSymbol());
        next = next.next;
      }
    }

    if (text!=null)
    {
      contentHandler.characters(text, position, (lastposition+1)-position);
      increasePosition(text, position, (lastposition+1)-position);

      if (locatorImpl!=null)
      {
        locatorImpl.setLineNumber(line);
        locatorImpl.setColumnNumber(column);
      }

      text = null;
    }

    contentHandler.endElement(NS_OUTPUT, symbol, symbol);

    contentHandler.endElement(NS_OUTPUT, OUTPUT, OUTPUT);
    contentHandler.endPrefixMapping("");
  }
}
TOP

Related Classes of net.sourceforge.chaperon.process.extended.ExtendedDirectParserProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.