Package net.sourceforge.chaperon.parser.generator

Source Code of net.sourceforge.chaperon.parser.generator.ParserTableGenerator

/*
*  Copyright (C) Chaperon. All rights reserved.                              
*  -------------------------------------------------------------------------
*  This software is published under the terms of the Apache Software License
*  version 1.1, a copy of which has been included  with this distribution in
*  the LICENSE file.                                                        
*/

package net.sourceforge.chaperon.parser.generator;

import java.util.ArrayList;

import org.apache.avalon.framework.logger.LogEnabled;
import org.apache.avalon.framework.logger.Logger;

import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
import net.sourceforge.chaperon.grammar.Grammar;
import net.sourceforge.chaperon.grammar.SyntaxErrorException;
import net.sourceforge.chaperon.grammar.production.ProductionList;
import net.sourceforge.chaperon.grammar.production.ReduceType;
import net.sourceforge.chaperon.grammar.symbol.Symbol;
import net.sourceforge.chaperon.grammar.symbol.SymbolList;
import net.sourceforge.chaperon.grammar.token.Associativity;
//import net.sourceforge.chaperon.grammar.token.Comment;
import net.sourceforge.chaperon.grammar.token.Token;
import net.sourceforge.chaperon.grammar.token.TokenList;
//import net.sourceforge.chaperon.grammar.token.Whitespace;
import net.sourceforge.chaperon.helpers.IntegerList;
import net.sourceforge.chaperon.parser.RegexAutomate;
import net.sourceforge.chaperon.parser.ParserTable;
import net.sourceforge.chaperon.parser.generator.conflict.Conflict;
import net.sourceforge.chaperon.parser.generator.conflict.ConflictList;
import net.sourceforge.chaperon.parser.generator.conflict.ShiftReduceConflict;
import net.sourceforge.chaperon.parser.generator.conflict.ReduceReduceConflict;

/**
* Ther generator generates a parser table from grammar
*
* @author Stephan Michels
* @version CVS $Id: ParserTableGenerator.java,v 1.12 2002/05/07 16:40:56 benedikta Exp $
*/
public class ParserTableGenerator implements LogEnabled
{

  private Grammar        _grammar;

  /*private ProductionList _grammar.getProductionList();
  private TokenList      _grammar.getTokenList();*/
  private SymbolList     _symbols;
  private SymbolList     _tsymbols;
  private SymbolList     _ntsymbols;

  /*private Whitespace     _whitespace;
  private Comment        _comment;
  private Symbol         _grammar.getStartSymbol();*/

  private FirstSets      _firstsets;

  private Collection     _C;

  private ParserTable    _table;

  private ConflictList   _conflicts = new ConflictList();

  private Logger         _logger;

  /**
   * Creates a generator
   *
   * @param grammar Grammar, which should generate the parser table
   *
   * @throws SyntaxErrorException
   */
  public ParserTableGenerator(Grammar grammar) throws SyntaxErrorException
  {
    try
    {
      _grammar = (Grammar)grammar.clone();
    } catch (CloneNotSupportedException cnse)
    {
      throw new IllegalArgumentException("Grammar is nor cloneable");
    }

    SyntaxErrorException exception = _grammar.validate();

    if (exception != null)
      throw exception;

    //_grammar.getProductionList() = _grammar.getProductionList();
    //_grammar.getTokenList() = _grammar.getTokenList();
    _symbols = _grammar.getSymbols();
    _tsymbols = _symbols.getTerminalSymbols();
    _ntsymbols = _symbols.getNonTerminalSymbols();

    //_whitespace = _grammar.getWhitespace();
    //_comment = _grammar.getComment();
    //_grammar.getStartSymbol() = _grammar.getStartSymbol();
  }

  /**
   * Returns the generated parser table
   *
   * @return The parser table
   */
  public ParserTable getParserTable()
  {
    if (_table==null)
      generate();

    return _table;
  }

  /**
   * Provide component with a logger.
   *
   * @param logger the logger
   */
  public void enableLogging(Logger logger)
  {
    _logger = logger;
  }

  /**
   * Calculates all state and transitions
   * and combine all states with the same core.
   *
   * @return Collection of itemsets
   */
  private Collection items()
  {
    // C = closure( [S'=^S,EOF] )
    Collection C = new Collection(_grammar);

    IntegerList changedState = new IntegerList(); // 0=not changed 1=changed

    ItemSet I = new ItemSet(_grammar, _firstsets);
    IntegerList startlist = _grammar.getProductionList().getProductionList(_grammar.getStartSymbol());
    Symbol endsymbol = _tsymbols.getSymbol("EOF");

    for (int i = 0; i < startlist.getSize(); i++)
      I.addItem(startlist.get(i), 0, endsymbol);

    C.add(I.closure());
    changedState.add(1);

    ItemSet J;
    int index;
    boolean mustrepeat, changed;

    for (int i = 0; i < C.getSize(); i++)
      if (changedState.get(i) != 0)
      {
        changedState.set(i, 0);
        I = C.get(i);

        if (_logger!=null)
          _logger.debug("Inspecting state "+i+"\n"+I);

        // J = goto(I,X) add to C, for all non terminal and terminal elements X

        // for the non terminal symbols
        for (int j = 0; j < _ntsymbols.getSymbolCount(); j++)
        {
          J = I.gotoX(_ntsymbols.getSymbol(j));

          if (!J.isEmpty())
          {
            index = C.indexOfCore(J);
            if (index < 0)                   // if C doesn't contain J
            {
              index = C.add(J);
              changedState.add(1);
            }
            else                             // otherwise the found state extends through J
            {
              changed = C.get(index).add(J); // if the found state change

              if (changed)
              {
                if (index < changedState.getSize())
                  changedState.set(index, 1);
                else
                  changedState.add(1);
              }

              if ((index <= i) && (changed)) // if J before I, and J
                // was changed then must the loop repeat
                mustrepeat = true;
            }

            I.setTransition(_ntsymbols.getSymbol(j), index); // stores the transition for this symbol

            if (_logger!=null)
              _logger.debug("State "+i+" + "+_ntsymbols.getSymbol(j)+" -> State "+index);
          }
        }

        // and for the terminal symbls
        for (int j = 0; j < _tsymbols.getSymbolCount(); j++)
        {
          J = I.gotoX(_tsymbols.getSymbol(j));

          if (!J.isEmpty())
          {
            index = C.indexOfCore(J);
            if (index < 0)                   // if C doesn't contain J
            {
              index = C.add(J);
              changedState.add(1);
            }
            else                             // otherwise the found state extends through J
            {
              changed = C.get(index).add(J); // if the found state change

              if (changed)
              {
                if (index < changedState.getSize())
                  changedState.set(index, 1);
                else
                  changedState.add(1);
              }

              if ((index <= i) && (changed)) // if J before I, and J
                // was changed then must the loop repeat
                mustrepeat = true;
            }

            I.setTransition(_tsymbols.getSymbol(j), index); // stores the transition for this symbol

            if (_logger!=null)
              _logger.debug("State "+i+" + "+_tsymbols.getSymbol(j)+" -> State "+index);
          }
        }
      }


    SymbolList symbols;
    do
    {
      mustrepeat = false;

      for (int i = 0; i < C.getSize(); i++)
        if (changedState.get(i) != 0)
        {
          changedState.set(i, 0);
          I = C.get(i);

          if (_logger!=null)
          {
            _logger.debug("Inspecting state "+i+"\n"+I);
          }

          symbols = I.getShiftSymbols();
         
          for (int j = 0; j < symbols.getSymbolCount(); j++)
          {
            J = I.gotoX(symbols.getSymbol(j));
            index = I.getTransition(symbols.getSymbol(j));
            changed = C.get(index).add(J); // if the found state change

            if (changed)
            {
              if (index < changedState.getSize())
                changedState.set(index, 1);
              else
                changedState.add(1);

              if (index <= i) // if J before I, and J
                // was changed then must the loop repeat
                mustrepeat = true;
            }

            if (_logger!=null)
              _logger.debug("State "+i+" + "+symbols.getSymbol(j)+" -> State "+index);
          }
        }

    }
    while (mustrepeat);                        // Repeat till no state changed

    return C;
  }

  /**
   * Generate the parser table
   */
  private void generate()
  {
    long time = System.currentTimeMillis();
    int i, j, k;

    // generate all first sets
    if (_logger!=null)
      _logger.debug("Generating first sets");
    _firstsets = new FirstSets(_grammar, _logger);

    // calculation of alle states and transitions
    if (_logger!=null)
      _logger.debug("Generating states and transitions");
    _C = items();

    if (_logger!=null)
      _logger.debug("Generating parser table");
    _table = new ParserTable(_grammar.getURI(), _tsymbols.getSymbolCount(),
                             _grammar.getIgnorableTokenList().getTokenCount(), _ntsymbols.getSymbolCount(),
                             _grammar.getProductionList().getProductionCount(), _C.getSize());

    if (_logger!=null)
      _logger.debug("Grammar:"+_grammar);

    // symbol for the end of file
    Symbol endsymbol = _tsymbols.getSymbol("EOF");

    // for alle terminal symbols
    for (i = 0; i < _tsymbols.getSymbolCount(); i++)
      _table.setTerminalSymbol(i, _tsymbols.getSymbol(i).getName());

    // for the regex automates of the terminal symbols
    RegexAutomate tdefinition;

    for (i = 0; i < _grammar.getTokenList().getTokenCount(); i++)
    {
      _table.setTokenDefinition(i, (new RegexAutomateGenerator(
        _grammar.getTokenList().getToken(i).getDefinition())).getRegexAutomate());
    }

    for (i = 0; i < _grammar.getIgnorableTokenList().getTokenCount(); i++)
    {
      _table.setIgnorableTokenSymbol(i, _grammar.getIgnorableTokenList().getToken(i).getSymbol().getName());
      _table.setIgnorableTokenDefinition(i, (new RegexAutomateGenerator(
        _grammar.getIgnorableTokenList().getToken(i).getDefinition())).getRegexAutomate());
    }

    // for the non terminal symbols
    for (i = 0; i < _ntsymbols.getSymbolCount(); i++)
      _table.setNonTerminalSymbol(i, _ntsymbols.getSymbol(i).getName());

    // for all productions
    for (i = 0; i < _grammar.getProductionList().getProductionCount(); i++)
    {
      _table.setProductionSymbol(i, _ntsymbols.indexOf(
          _grammar.getProductionList().getProduction(i).getSymbol()));
      _table.setProductionLength(i, _grammar.getProductionList().getProduction(i).getLength());

      ReduceType reducetype = _grammar.getProductionList().getProduction(i).getReduceType();
      if (reducetype==ReduceType.NORMAL)
        _table.setProductionReduceType(i, ParserTable.NORMAL);
      else if (reducetype==ReduceType.APPEND)
        _table.setProductionReduceType(i, ParserTable.APPEND);
      else if (reducetype==ReduceType.RESOLVE)
        _table.setProductionReduceType(i, ParserTable.RESOLVE);
      else if (reducetype==ReduceType.NEGLECT)
        _table.setProductionReduceType(i, ParserTable.NEGLECT);
    }

    // set all actions on error
    for (i = 0; i < _C.getSize(); i++)
      for (j = 0; j < _tsymbols.getSymbolCount(); j++)
        _table.setErrorAction(i, j, 0);

    for (i = 0; i < _C.getSize(); i++)
      for (j = 0; j < _ntsymbols.getSymbolCount(); j++)
        _table.setTransition(i, j, 0);

    // for all itemsets I in collection C
    ItemSet I;
    Token token;
    IntegerList reduceproductions;
    int highestproduction, priority;
    SymbolList shiftsymbols, reducesymbols;

    for (i = 0; i < _C.getSize(); i++)
    {
      I = _C.get(i);

      shiftsymbols = I.getShiftSymbols(); // Transition symbols for shift actions
      reducesymbols = I.getReduceSymbols(); // Lookahead symbols for reduce actions

      for (j = 0; j < _tsymbols.getSymbolCount(); j++)
      {
        reduceproductions = I.getReduceProductions(_tsymbols.getSymbol(j));
        priority = -1;
        highestproduction = -1;
        for (k = 0; k < reduceproductions.getSize(); k++)
        {
          /*if ((_grammar.getProductionList().getProduction(reduceproductions.get(k)).getSymbol().equals(
                      _grammar.getStartSymbol())) && (_tsymbols.getSymbol(j).equals(endsymbol)))
            _table.setAcceptAction(i, j, reduceproductions.get(k));*/

          if (_grammar.getProductionList().getProduction(reduceproductions.get(k)).getPriority()
                  > priority)
          {
            highestproduction = reduceproductions.get(k);
            priority = _grammar.getProductionList().getProduction(highestproduction).getPriority();
          }
        }

        //if (!_table.isAcceptAction(i, j))

        if (shiftsymbols.contains(_tsymbols.getSymbol(j)))
        {
          if (reducesymbols.contains(_tsymbols.getSymbol(j)))
          {
            token = _grammar.getTokenList().getToken(_tsymbols.getSymbol(j));

            if (token.getPriority() > priority)
            {
              _table.setShiftAction(i, j, I.getTransition(_tsymbols.getSymbol(j)));

              if (_logger!=null)
                _logger.warn("Shift/Reduce Conflict State "+i+" between"+
                             System.getProperty(("line.separator"))+
                             token+System.getProperty(("line.separator"))+
                             _grammar.getProductionList().getProduction(highestproduction)+
                             System.getProperty(("line.separator"))+
                             "The parser will shift");
            }
            else if (token.getPriority() < priority)
            {
              /*if ((_grammar.getProductionList().getProduction(highestproduction).getSymbol().equals(
                      _grammar.getStartSymbol()))
                   && (_tsymbols.getSymbol(j).equals(endsymbol)))
                _table.setAcceptAction(i, j, highestproduction);
              else*/
                _table.setReduceAction(i, j, highestproduction);

              if (_logger!=null)   
                _logger.warn("Shift/Reduce Conflict State "+i+" between"+
                             System.getProperty(("line.separator"))+
                             token+System.getProperty(("line.separator"))+
                             _grammar.getProductionList().getProduction(highestproduction)+
                             System.getProperty(("line.separator"))+
                             "The parser will reduce");
            }
            else
            {
              if (token.getAssociativity()==Associativity.RIGHT)
              {
                _table.setShiftAction(i, j, I.getTransition(_tsymbols.getSymbol(j)));

                if (_logger!=null)   
                  _logger.warn("Shift/Reduce Conflict State "+i+" between"+
                               System.getProperty(("line.separator"))+
                               token+System.getProperty(("line.separator"))+
                               _grammar.getProductionList().getProduction(highestproduction)+
                               System.getProperty(("line.separator"))+
                               "The parser will shift");
              }
              else if (token.getAssociativity()==Associativity.LEFT)
              {
                /*if ((_grammar.getProductionList().getProduction(highestproduction).getSymbol().equals(
                        _grammar.getStartSymbol()))
                    && (_tsymbols.getSymbol(j).equals(endsymbol)))
                  _table.setAcceptAction(i, j, highestproduction);
                else*/
                  _table.setReduceAction(i, j, highestproduction);

                if (_logger!=null)   
                  _logger.warn("Shift/Reduce Conflict State "+i+" between"+
                               System.getProperty(("line.separator"))+
                               token+System.getProperty(("line.separator"))+
                               _grammar.getProductionList().getProduction(highestproduction)+
                               System.getProperty(("line.separator"))+
                               "The parser will reduce");
              }
              else
              {
                _table.setShiftAction(i, j, I.getTransition(_tsymbols.getSymbol(j)));

                if (_logger!=null)   
                  _logger.info("Shift/Reduce Conflict State "+i+" between"+
                               System.getProperty(("line.separator"))+
                               token+System.getProperty(("line.separator"))+
                               _grammar.getProductionList().getProduction(highestproduction)+
                               System.getProperty(("line.separator"))+
                               "The parser will shift");
                _conflicts.add(new ShiftReduceConflict(I, _tsymbols.getSymbol(j)));
              }
            }
          }
          else
            _table.setShiftAction(i, j, I.getTransition(_tsymbols.getSymbol(j)));
        }
        else if (reducesymbols.contains(_tsymbols.getSymbol(j)))
        {
          /*if ((_grammar.getProductionList().getProduction(highestproduction).getSymbol().equals(
                  _grammar.getStartSymbol()))
               && (_tsymbols.getSymbol(j).equals(endsymbol)))
            _table.setAcceptAction(i, j, highestproduction);
          else*/
            _table.setReduceAction(i, j, highestproduction);
        }

        for (k = 0; k < reduceproductions.getSize(); k++)
          if ((_grammar.getProductionList().getProduction(reduceproductions.get(k)).getSymbol().equals(
                      _grammar.getStartSymbol())) && (_tsymbols.getSymbol(j).equals(endsymbol)))
            _table.setAcceptAction(i, j, reduceproductions.get(k));
      }

      for (j = 0; j < _ntsymbols.getSymbolCount(); j++)
        if (shiftsymbols.contains(_ntsymbols.getSymbol(j)))
          _table.setTransition(i, j, I.getTransition(_ntsymbols.getSymbol(j)));
    }

    /*_table.setAcceptAction(_C.get(0).getTransition(_grammar.getStartSymbol()),
                           _ntsymbols.indexOf(_grammar.getStartSymbol()), 0);*/

    if (_logger!=null)   
      _logger.info("Time for the generation from parsertable: "
                   + (System.currentTimeMillis() - time) + " ms");
  }
}
TOP

Related Classes of net.sourceforge.chaperon.parser.generator.ParserTableGenerator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.