/*
* Copyright (C) Chaperon. All rights reserved.
* -------------------------------------------------------------------------
* This software is published under the terms of the Apache Software License
* version 1.1, a copy of which has been included with this distribution in
* the LICENSE file.
*/
package net.sourceforge.chaperon.parser.generator;
import java.util.ArrayList;
import org.apache.avalon.framework.logger.LogEnabled;
import org.apache.avalon.framework.logger.Logger;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
import net.sourceforge.chaperon.grammar.Grammar;
import net.sourceforge.chaperon.grammar.SyntaxErrorException;
import net.sourceforge.chaperon.grammar.production.ProductionList;
import net.sourceforge.chaperon.grammar.production.ReduceType;
import net.sourceforge.chaperon.grammar.symbol.Symbol;
import net.sourceforge.chaperon.grammar.symbol.SymbolList;
import net.sourceforge.chaperon.grammar.token.Associativity;
//import net.sourceforge.chaperon.grammar.token.Comment;
import net.sourceforge.chaperon.grammar.token.Token;
import net.sourceforge.chaperon.grammar.token.TokenList;
//import net.sourceforge.chaperon.grammar.token.Whitespace;
import net.sourceforge.chaperon.helpers.IntegerList;
import net.sourceforge.chaperon.parser.RegexAutomate;
import net.sourceforge.chaperon.parser.ParserTable;
import net.sourceforge.chaperon.parser.generator.conflict.Conflict;
import net.sourceforge.chaperon.parser.generator.conflict.ConflictList;
import net.sourceforge.chaperon.parser.generator.conflict.ShiftReduceConflict;
import net.sourceforge.chaperon.parser.generator.conflict.ReduceReduceConflict;
/**
* Ther generator generates a parser table from grammar
*
* @author Stephan Michels
* @version CVS $Id: ParserTableGenerator.java,v 1.12 2002/05/07 16:40:56 benedikta Exp $
*/
public class ParserTableGenerator implements LogEnabled
{
private Grammar _grammar;
/*private ProductionList _grammar.getProductionList();
private TokenList _grammar.getTokenList();*/
private SymbolList _symbols;
private SymbolList _tsymbols;
private SymbolList _ntsymbols;
/*private Whitespace _whitespace;
private Comment _comment;
private Symbol _grammar.getStartSymbol();*/
private FirstSets _firstsets;
private Collection _C;
private ParserTable _table;
private ConflictList _conflicts = new ConflictList();
private Logger _logger;
/**
* Creates a generator
*
* @param grammar Grammar, which should generate the parser table
*
* @throws SyntaxErrorException
*/
public ParserTableGenerator(Grammar grammar) throws SyntaxErrorException
{
try
{
_grammar = (Grammar)grammar.clone();
} catch (CloneNotSupportedException cnse)
{
throw new IllegalArgumentException("Grammar is nor cloneable");
}
SyntaxErrorException exception = _grammar.validate();
if (exception != null)
throw exception;
//_grammar.getProductionList() = _grammar.getProductionList();
//_grammar.getTokenList() = _grammar.getTokenList();
_symbols = _grammar.getSymbols();
_tsymbols = _symbols.getTerminalSymbols();
_ntsymbols = _symbols.getNonTerminalSymbols();
//_whitespace = _grammar.getWhitespace();
//_comment = _grammar.getComment();
//_grammar.getStartSymbol() = _grammar.getStartSymbol();
}
/**
* Returns the generated parser table
*
* @return The parser table
*/
public ParserTable getParserTable()
{
if (_table==null)
generate();
return _table;
}
/**
* Provide component with a logger.
*
* @param logger the logger
*/
public void enableLogging(Logger logger)
{
_logger = logger;
}
/**
* Calculates all state and transitions
* and combine all states with the same core.
*
* @return Collection of itemsets
*/
private Collection items()
{
// C = closure( [S'=^S,EOF] )
Collection C = new Collection(_grammar);
IntegerList changedState = new IntegerList(); // 0=not changed 1=changed
ItemSet I = new ItemSet(_grammar, _firstsets);
IntegerList startlist = _grammar.getProductionList().getProductionList(_grammar.getStartSymbol());
Symbol endsymbol = _tsymbols.getSymbol("EOF");
for (int i = 0; i < startlist.getSize(); i++)
I.addItem(startlist.get(i), 0, endsymbol);
C.add(I.closure());
changedState.add(1);
ItemSet J;
int index;
boolean mustrepeat, changed;
for (int i = 0; i < C.getSize(); i++)
if (changedState.get(i) != 0)
{
changedState.set(i, 0);
I = C.get(i);
if (_logger!=null)
_logger.debug("Inspecting state "+i+"\n"+I);
// J = goto(I,X) add to C, for all non terminal and terminal elements X
// for the non terminal symbols
for (int j = 0; j < _ntsymbols.getSymbolCount(); j++)
{
J = I.gotoX(_ntsymbols.getSymbol(j));
if (!J.isEmpty())
{
index = C.indexOfCore(J);
if (index < 0) // if C doesn't contain J
{
index = C.add(J);
changedState.add(1);
}
else // otherwise the found state extends through J
{
changed = C.get(index).add(J); // if the found state change
if (changed)
{
if (index < changedState.getSize())
changedState.set(index, 1);
else
changedState.add(1);
}
if ((index <= i) && (changed)) // if J before I, and J
// was changed then must the loop repeat
mustrepeat = true;
}
I.setTransition(_ntsymbols.getSymbol(j), index); // stores the transition for this symbol
if (_logger!=null)
_logger.debug("State "+i+" + "+_ntsymbols.getSymbol(j)+" -> State "+index);
}
}
// and for the terminal symbls
for (int j = 0; j < _tsymbols.getSymbolCount(); j++)
{
J = I.gotoX(_tsymbols.getSymbol(j));
if (!J.isEmpty())
{
index = C.indexOfCore(J);
if (index < 0) // if C doesn't contain J
{
index = C.add(J);
changedState.add(1);
}
else // otherwise the found state extends through J
{
changed = C.get(index).add(J); // if the found state change
if (changed)
{
if (index < changedState.getSize())
changedState.set(index, 1);
else
changedState.add(1);
}
if ((index <= i) && (changed)) // if J before I, and J
// was changed then must the loop repeat
mustrepeat = true;
}
I.setTransition(_tsymbols.getSymbol(j), index); // stores the transition for this symbol
if (_logger!=null)
_logger.debug("State "+i+" + "+_tsymbols.getSymbol(j)+" -> State "+index);
}
}
}
SymbolList symbols;
do
{
mustrepeat = false;
for (int i = 0; i < C.getSize(); i++)
if (changedState.get(i) != 0)
{
changedState.set(i, 0);
I = C.get(i);
if (_logger!=null)
{
_logger.debug("Inspecting state "+i+"\n"+I);
}
symbols = I.getShiftSymbols();
for (int j = 0; j < symbols.getSymbolCount(); j++)
{
J = I.gotoX(symbols.getSymbol(j));
index = I.getTransition(symbols.getSymbol(j));
changed = C.get(index).add(J); // if the found state change
if (changed)
{
if (index < changedState.getSize())
changedState.set(index, 1);
else
changedState.add(1);
if (index <= i) // if J before I, and J
// was changed then must the loop repeat
mustrepeat = true;
}
if (_logger!=null)
_logger.debug("State "+i+" + "+symbols.getSymbol(j)+" -> State "+index);
}
}
}
while (mustrepeat); // Repeat till no state changed
return C;
}
/**
* Generate the parser table
*/
private void generate()
{
long time = System.currentTimeMillis();
int i, j, k;
// generate all first sets
if (_logger!=null)
_logger.debug("Generating first sets");
_firstsets = new FirstSets(_grammar, _logger);
// calculation of alle states and transitions
if (_logger!=null)
_logger.debug("Generating states and transitions");
_C = items();
if (_logger!=null)
_logger.debug("Generating parser table");
_table = new ParserTable(_grammar.getURI(), _tsymbols.getSymbolCount(),
_grammar.getIgnorableTokenList().getTokenCount(), _ntsymbols.getSymbolCount(),
_grammar.getProductionList().getProductionCount(), _C.getSize());
if (_logger!=null)
_logger.debug("Grammar:"+_grammar);
// symbol for the end of file
Symbol endsymbol = _tsymbols.getSymbol("EOF");
// for alle terminal symbols
for (i = 0; i < _tsymbols.getSymbolCount(); i++)
_table.setTerminalSymbol(i, _tsymbols.getSymbol(i).getName());
// for the regex automates of the terminal symbols
RegexAutomate tdefinition;
for (i = 0; i < _grammar.getTokenList().getTokenCount(); i++)
{
_table.setTokenDefinition(i, (new RegexAutomateGenerator(
_grammar.getTokenList().getToken(i).getDefinition())).getRegexAutomate());
}
for (i = 0; i < _grammar.getIgnorableTokenList().getTokenCount(); i++)
{
_table.setIgnorableTokenSymbol(i, _grammar.getIgnorableTokenList().getToken(i).getSymbol().getName());
_table.setIgnorableTokenDefinition(i, (new RegexAutomateGenerator(
_grammar.getIgnorableTokenList().getToken(i).getDefinition())).getRegexAutomate());
}
// for the non terminal symbols
for (i = 0; i < _ntsymbols.getSymbolCount(); i++)
_table.setNonTerminalSymbol(i, _ntsymbols.getSymbol(i).getName());
// for all productions
for (i = 0; i < _grammar.getProductionList().getProductionCount(); i++)
{
_table.setProductionSymbol(i, _ntsymbols.indexOf(
_grammar.getProductionList().getProduction(i).getSymbol()));
_table.setProductionLength(i, _grammar.getProductionList().getProduction(i).getLength());
ReduceType reducetype = _grammar.getProductionList().getProduction(i).getReduceType();
if (reducetype==ReduceType.NORMAL)
_table.setProductionReduceType(i, ParserTable.NORMAL);
else if (reducetype==ReduceType.APPEND)
_table.setProductionReduceType(i, ParserTable.APPEND);
else if (reducetype==ReduceType.RESOLVE)
_table.setProductionReduceType(i, ParserTable.RESOLVE);
else if (reducetype==ReduceType.NEGLECT)
_table.setProductionReduceType(i, ParserTable.NEGLECT);
}
// set all actions on error
for (i = 0; i < _C.getSize(); i++)
for (j = 0; j < _tsymbols.getSymbolCount(); j++)
_table.setErrorAction(i, j, 0);
for (i = 0; i < _C.getSize(); i++)
for (j = 0; j < _ntsymbols.getSymbolCount(); j++)
_table.setTransition(i, j, 0);
// for all itemsets I in collection C
ItemSet I;
Token token;
IntegerList reduceproductions;
int highestproduction, priority;
SymbolList shiftsymbols, reducesymbols;
for (i = 0; i < _C.getSize(); i++)
{
I = _C.get(i);
shiftsymbols = I.getShiftSymbols(); // Transition symbols for shift actions
reducesymbols = I.getReduceSymbols(); // Lookahead symbols for reduce actions
for (j = 0; j < _tsymbols.getSymbolCount(); j++)
{
reduceproductions = I.getReduceProductions(_tsymbols.getSymbol(j));
priority = -1;
highestproduction = -1;
for (k = 0; k < reduceproductions.getSize(); k++)
{
/*if ((_grammar.getProductionList().getProduction(reduceproductions.get(k)).getSymbol().equals(
_grammar.getStartSymbol())) && (_tsymbols.getSymbol(j).equals(endsymbol)))
_table.setAcceptAction(i, j, reduceproductions.get(k));*/
if (_grammar.getProductionList().getProduction(reduceproductions.get(k)).getPriority()
> priority)
{
highestproduction = reduceproductions.get(k);
priority = _grammar.getProductionList().getProduction(highestproduction).getPriority();
}
}
//if (!_table.isAcceptAction(i, j))
if (shiftsymbols.contains(_tsymbols.getSymbol(j)))
{
if (reducesymbols.contains(_tsymbols.getSymbol(j)))
{
token = _grammar.getTokenList().getToken(_tsymbols.getSymbol(j));
if (token.getPriority() > priority)
{
_table.setShiftAction(i, j, I.getTransition(_tsymbols.getSymbol(j)));
if (_logger!=null)
_logger.warn("Shift/Reduce Conflict State "+i+" between"+
System.getProperty(("line.separator"))+
token+System.getProperty(("line.separator"))+
_grammar.getProductionList().getProduction(highestproduction)+
System.getProperty(("line.separator"))+
"The parser will shift");
}
else if (token.getPriority() < priority)
{
/*if ((_grammar.getProductionList().getProduction(highestproduction).getSymbol().equals(
_grammar.getStartSymbol()))
&& (_tsymbols.getSymbol(j).equals(endsymbol)))
_table.setAcceptAction(i, j, highestproduction);
else*/
_table.setReduceAction(i, j, highestproduction);
if (_logger!=null)
_logger.warn("Shift/Reduce Conflict State "+i+" between"+
System.getProperty(("line.separator"))+
token+System.getProperty(("line.separator"))+
_grammar.getProductionList().getProduction(highestproduction)+
System.getProperty(("line.separator"))+
"The parser will reduce");
}
else
{
if (token.getAssociativity()==Associativity.RIGHT)
{
_table.setShiftAction(i, j, I.getTransition(_tsymbols.getSymbol(j)));
if (_logger!=null)
_logger.warn("Shift/Reduce Conflict State "+i+" between"+
System.getProperty(("line.separator"))+
token+System.getProperty(("line.separator"))+
_grammar.getProductionList().getProduction(highestproduction)+
System.getProperty(("line.separator"))+
"The parser will shift");
}
else if (token.getAssociativity()==Associativity.LEFT)
{
/*if ((_grammar.getProductionList().getProduction(highestproduction).getSymbol().equals(
_grammar.getStartSymbol()))
&& (_tsymbols.getSymbol(j).equals(endsymbol)))
_table.setAcceptAction(i, j, highestproduction);
else*/
_table.setReduceAction(i, j, highestproduction);
if (_logger!=null)
_logger.warn("Shift/Reduce Conflict State "+i+" between"+
System.getProperty(("line.separator"))+
token+System.getProperty(("line.separator"))+
_grammar.getProductionList().getProduction(highestproduction)+
System.getProperty(("line.separator"))+
"The parser will reduce");
}
else
{
_table.setShiftAction(i, j, I.getTransition(_tsymbols.getSymbol(j)));
if (_logger!=null)
_logger.info("Shift/Reduce Conflict State "+i+" between"+
System.getProperty(("line.separator"))+
token+System.getProperty(("line.separator"))+
_grammar.getProductionList().getProduction(highestproduction)+
System.getProperty(("line.separator"))+
"The parser will shift");
_conflicts.add(new ShiftReduceConflict(I, _tsymbols.getSymbol(j)));
}
}
}
else
_table.setShiftAction(i, j, I.getTransition(_tsymbols.getSymbol(j)));
}
else if (reducesymbols.contains(_tsymbols.getSymbol(j)))
{
/*if ((_grammar.getProductionList().getProduction(highestproduction).getSymbol().equals(
_grammar.getStartSymbol()))
&& (_tsymbols.getSymbol(j).equals(endsymbol)))
_table.setAcceptAction(i, j, highestproduction);
else*/
_table.setReduceAction(i, j, highestproduction);
}
for (k = 0; k < reduceproductions.getSize(); k++)
if ((_grammar.getProductionList().getProduction(reduceproductions.get(k)).getSymbol().equals(
_grammar.getStartSymbol())) && (_tsymbols.getSymbol(j).equals(endsymbol)))
_table.setAcceptAction(i, j, reduceproductions.get(k));
}
for (j = 0; j < _ntsymbols.getSymbolCount(); j++)
if (shiftsymbols.contains(_ntsymbols.getSymbol(j)))
_table.setTransition(i, j, I.getTransition(_ntsymbols.getSymbol(j)));
}
/*_table.setAcceptAction(_C.get(0).getTransition(_grammar.getStartSymbol()),
_ntsymbols.indexOf(_grammar.getStartSymbol()), 0);*/
if (_logger!=null)
_logger.info("Time for the generation from parsertable: "
+ (System.currentTimeMillis() - time) + " ms");
}
}