Source Code of org.antlr.tool.Interpreter$LexerActionGetTokenType

/*
 * [The "BSD license"]
 *  Copyright (c) 2010 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.antlr.tool;


import org.antlr.analysis.DFA;
import org.antlr.analysis.*;
import org.antlr.misc.IntervalSet;
import org.antlr.runtime.*;
import org.antlr.runtime.debug.BlankDebugEventListener;
import org.antlr.runtime.debug.DebugEventListener;
import org.antlr.runtime.debug.ParseTreeBuilder;
import org.antlr.runtime.tree.ParseTree;


import java.util.List;
import java.util.Stack;


/** The recognition interpreter/engine for grammars.  Separated
 *  out of Grammar as it's related, but technically not a Grammar function.
 *  You create an interpreter for a grammar and an input stream.  This object
 *  can act as a TokenSource so that you can hook up two grammars (via
 *  a CommonTokenStream) to lex/parse.  Being a token source only makes sense
 *  for a lexer grammar of course.
 */
public class Interpreter implements TokenSource {
  protected Grammar grammar;
  protected IntStream input;


  /** A lexer listener that just creates token objects as they
   *  are matched.  scan() use this listener to get a single object.
   *  To get a stream of tokens, you must call scan() multiple times,
   *  recording the token object result after each call.
   */
  class LexerActionGetTokenType extends BlankDebugEventListener {
    public CommonToken token;
    Grammar g;
    public LexerActionGetTokenType(Grammar g) {
      this.g = g;
    }


    @Override
    public void exitRule(String grammarFileName, String ruleName) {
      if ( !ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ){
        int type = g.getTokenType(ruleName);
        int channel = Token.DEFAULT_CHANNEL;
        token = new CommonToken((CharStream)input,type,channel,0,0);
      }
    }
  }


  public Interpreter(Grammar grammar, IntStream input) {
    this.grammar = grammar;
    this.input = input;
  }


  @Override
  public Token nextToken() {
    if ( grammar.type!=Grammar.LEXER ) {
      return null;
    }
    if ( input.LA(1)==CharStream.EOF ) {
            return new CommonToken((CharStream)input,Token.EOF,Token.DEFAULT_CHANNEL,input.index(),input.index());
    }
    int start = input.index();
    int charPos = ((CharStream)input).getCharPositionInLine();
    CommonToken token = null;
    loop:
    while (input.LA(1)!=CharStream.EOF) {
      try {
        token = scan(Grammar.ARTIFICIAL_TOKENS_RULENAME, null);
        break;
      }
      catch (RecognitionException re) {
        // report a problem and try for another
        reportScanError(re);
        continue loop;
      }
    }
    // the scan can only set type
    // we must set the line, and other junk here to make it a complete token
    int stop = input.index()-1;
    if ( token==null ) {
            return new CommonToken((CharStream)input,Token.EOF,Token.DEFAULT_CHANNEL,start,start);
    }
    token.setLine(((CharStream)input).getLine());
    token.setStartIndex(start);
    token.setStopIndex(stop);
    token.setCharPositionInLine(charPos);
    return token;
  }


  /** For a given input char stream, try to match against the NFA
   *  starting at startRule.  This is a deterministic parse even though
   *  it is using an NFA because it uses DFAs at each decision point to
   *  predict which alternative will succeed.  This is exactly what the
   *  generated parser will do.
   *
   *  This only does lexer grammars.
   *
   *  Return the token type associated with the final rule end state.
   */
  public void scan(String startRule,
           DebugEventListener actions,
           List<NFAState> visitedStates)
    throws RecognitionException
  {
    if ( grammar.type!=Grammar.LEXER ) {
      return;
    }


    //System.out.println("scan("+startRule+",'"+in.substring(in.index(),in.size()-1)+"')");
    // Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet
    if ( grammar.getRuleStartState(startRule)==null ) {
      grammar.buildNFA();
    }


    if ( !grammar.allDecisionDFAHaveBeenCreated() ) {
      // Create the DFA predictors for each decision
      grammar.createLookaheadDFAs();
    }


    // do the parse
    Stack<NFAState> ruleInvocationStack = new Stack<NFAState>();
    NFAState start = grammar.getRuleStartState(startRule);
    NFAState stop = grammar.getRuleStopState(startRule);
    parseEngine(startRule, start, stop, input, ruleInvocationStack,
          actions, visitedStates);
  }


  public CommonToken scan(String startRule)
    throws RecognitionException
  {
    return scan(startRule, null);
  }


  public CommonToken scan(String startRule,
              List<NFAState> visitedStates)
    throws RecognitionException
  {
    LexerActionGetTokenType actions = new LexerActionGetTokenType(grammar);
    scan(startRule, actions, visitedStates);
    return actions.token;
  }


  public void parse(String startRule,
            DebugEventListener actions,
            List<NFAState> visitedStates)
    throws RecognitionException
  {
    //System.out.println("parse("+startRule+")");
    // Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet
    if ( grammar.getRuleStartState(startRule)==null ) {
      grammar.buildNFA();
    }
    if ( !grammar.allDecisionDFAHaveBeenCreated() ) {
      // Create the DFA predictors for each decision
      grammar.createLookaheadDFAs();
    }
    // do the parse
    Stack<NFAState> ruleInvocationStack = new Stack<NFAState>();
    NFAState start = grammar.getRuleStartState(startRule);
    NFAState stop = grammar.getRuleStopState(startRule);
    parseEngine(startRule, start, stop, input, ruleInvocationStack,
          actions, visitedStates);
  }


  public ParseTree parse(String startRule)
    throws RecognitionException
  {
    return parse(startRule, null);
  }


  public ParseTree parse(String startRule, List<NFAState> visitedStates)
    throws RecognitionException
  {
    ParseTreeBuilder actions = new ParseTreeBuilder(grammar.name);
    try {
      parse(startRule, actions, visitedStates);
    }
    catch (RecognitionException re) {
      // Errors are tracked via the ANTLRDebugInterface
      // Exceptions are used just to blast out of the parse engine
      // The error will be in the parse tree.
    }
    return actions.getTree();
  }


  /** Fill a list of all NFA states visited during the parse */
  protected void parseEngine(String startRule,
                 NFAState start,
                 NFAState stop,
                 IntStream input,
                 Stack<NFAState> ruleInvocationStack,
                 DebugEventListener actions,
                 List<NFAState> visitedStates)
    throws RecognitionException
  {
    NFAState s = start;
    if ( actions!=null ) {
      actions.enterRule(s.nfa.grammar.getFileName(), start.enclosingRule.name);
    }
    int t = input.LA(1);
    while ( s!=stop ) {
      if ( visitedStates!=null ) {
        visitedStates.add(s);
      }
      /*
      System.out.println("parse state "+s.stateNumber+" input="+
        s.nfa.grammar.getTokenDisplayName(t));
        */
      // CASE 1: decision state
      if ( s.getDecisionNumber()>0 && s.nfa.grammar.getNumberOfAltsForDecisionNFA(s)>1 ) {
        // decision point, must predict and jump to alt
        DFA dfa = s.nfa.grammar.getLookaheadDFA(s.getDecisionNumber());
        /*
        if ( s.nfa.grammar.type!=Grammar.LEXER ) {
          System.out.println("decision: "+
                   dfa.getNFADecisionStartState().getDescription()+
                   " input="+s.nfa.grammar.getTokenDisplayName(t));
        }
        */
        int m = input.mark();
        int predictedAlt = predict(dfa);
        if ( predictedAlt == NFA.INVALID_ALT_NUMBER ) {
          String description = dfa.getNFADecisionStartState().getDescription();
          NoViableAltException nvae =
            new NoViableAltException(description,
                            dfa.getDecisionNumber(),
                            s.stateNumber,
                            input);
          if ( actions!=null ) {
            actions.recognitionException(nvae);
          }
          input.consume(); // recover
          throw nvae;
        }
        input.rewind(m);
        int parseAlt =
          s.translateDisplayAltToWalkAlt(predictedAlt);
        /*
        if ( s.nfa.grammar.type!=Grammar.LEXER ) {
          System.out.println("predicted alt "+predictedAlt+", parseAlt "+
                     parseAlt);
        }
        */
        NFAState alt;
        if ( parseAlt > s.nfa.grammar.getNumberOfAltsForDecisionNFA(s) ) {
          // implied branch of loop etc...
          alt = s.nfa.grammar.nfa.getState( s.endOfBlockStateNumber );
        }
        else {
          alt = s.nfa.grammar.getNFAStateForAltOfDecision(s, parseAlt);
        }
        s = (NFAState)alt.transition[0].target;
        continue;
      }


      // CASE 2: finished matching a rule
      if ( s.isAcceptState() ) { // end of rule node
        if ( actions!=null ) {
          actions.exitRule(s.nfa.grammar.getFileName(), s.enclosingRule.name);
        }
        if ( ruleInvocationStack.empty() ) {
          // done parsing.  Hit the start state.
          //System.out.println("stack empty in stop state for "+s.getEnclosingRule());
          break;
        }
        // pop invoking state off the stack to know where to return to
        NFAState invokingState = ruleInvocationStack.pop();
        RuleClosureTransition invokingTransition =
            (RuleClosureTransition)invokingState.transition[0];
        // move to node after state that invoked this rule
        s = invokingTransition.followState;
        continue;
      }


      Transition trans = s.transition[0];
      Label label = trans.label;
      if ( label.isSemanticPredicate() ) {
        FailedPredicateException fpe =
          new FailedPredicateException(input,
                         s.enclosingRule.name,
                         "can't deal with predicates yet");
        if ( actions!=null ) {
          actions.recognitionException(fpe);
        }
      }


      // CASE 3: epsilon transition
      if ( label.isEpsilon() ) {
        // CASE 3a: rule invocation state
        if ( trans instanceof RuleClosureTransition ) {
          ruleInvocationStack.push(s);
          s = (NFAState)trans.target;
          //System.out.println("call "+s.enclosingRule.name+" from "+s.nfa.grammar.getFileName());
          if ( actions!=null ) {
            actions.enterRule(s.nfa.grammar.getFileName(), s.enclosingRule.name);
          }
          // could be jumping to new grammar, make sure DFA created
          if ( !s.nfa.grammar.allDecisionDFAHaveBeenCreated() ) {
            s.nfa.grammar.createLookaheadDFAs();
          }
        }
        // CASE 3b: plain old epsilon transition, just move
        else {
          s = (NFAState)trans.target;
        }
      }


      // CASE 4: match label on transition
      else if ( label.matches(t) ) {
        if ( actions!=null ) {
          if ( s.nfa.grammar.type == Grammar.PARSER ||
             s.nfa.grammar.type == Grammar.COMBINED )
          {
            actions.consumeToken(((TokenStream)input).LT(1));
          }
        }
        s = (NFAState)s.transition[0].target;
        input.consume();
        t = input.LA(1);
      }


      // CASE 5: error condition; label is inconsistent with input
      else {
        if ( label.isAtom() ) {
          MismatchedTokenException mte =
            new MismatchedTokenException(label.getAtom(), input);
          if ( actions!=null ) {
            actions.recognitionException(mte);
          }
          input.consume(); // recover
          throw mte;
        }
        else if ( label.isSet() ) {
          MismatchedSetException mse =
            new MismatchedSetException(((IntervalSet)label.getSet()).toRuntimeBitSet(),
                           input);
          if ( actions!=null ) {
            actions.recognitionException(mse);
          }
          input.consume(); // recover
          throw mse;
        }
        else if ( label.isSemanticPredicate() ) {
          FailedPredicateException fpe =
            new FailedPredicateException(input,
                           s.enclosingRule.name,
                           label.getSemanticContext().toString());
          if ( actions!=null ) {
            actions.recognitionException(fpe);
          }
          input.consume(); // recover
          throw fpe;
        }
        else {
          throw new RecognitionException(input); // unknown error
        }
      }
    }
    //System.out.println("hit stop state for "+stop.getEnclosingRule());
    if ( actions!=null ) {
      actions.exitRule(s.nfa.grammar.getFileName(), stop.enclosingRule.name);
    }
  }


  /** Given an input stream, return the unique alternative predicted by
   *  matching the input.  Upon error, return NFA.INVALID_ALT_NUMBER
   *  The first symbol of lookahead is presumed to be primed; that is,
   *  input.lookahead(1) must point at the input symbol you want to start
   *  predicting with.
   */
  public int predict(DFA dfa) {
    DFAState s = dfa.startState;
    int c = input.LA(1);
    Transition eotTransition = null;
  dfaLoop:
    while ( !s.isAcceptState() ) {
      /*
      System.out.println("DFA.predict("+s.getStateNumber()+", "+
          dfa.getNFA().getGrammar().getTokenName(c)+")");
      */
      // for each edge of s, look for intersection with current char
      for (int i=0; i<s.getNumberOfTransitions(); i++) {
        Transition t = s.transition(i);
        // special case: EOT matches any char
        if ( t.label.matches(c) ) {
          // take transition i
          s = (DFAState)t.target;
          input.consume();
          c = input.LA(1);
          continue dfaLoop;
        }
        if ( t.label.getAtom()==Label.EOT ) {
          eotTransition = t;
        }
      }
      if ( eotTransition!=null ) {
        s = (DFAState)eotTransition.target;
        continue dfaLoop;
      }
      /*
      ErrorManager.error(ErrorManager.MSG_NO_VIABLE_DFA_ALT,
                 s,
                 dfa.nfa.grammar.getTokenName(c));
      */
      return NFA.INVALID_ALT_NUMBER;
    }
    // woohoo!  We know which alt to predict
    // nothing emanates from a stop state; must terminate anyway
    /*
    System.out.println("DFA stop state "+s.getStateNumber()+" predicts "+
        s.getUniquelyPredictedAlt());
    */
    return s.getUniquelyPredictedAlt();
  }


  public void reportScanError(RecognitionException re) {
    CharStream cs = (CharStream)input;
    // print as good of a message as we can, given that we do not have
    // a Lexer object and, hence, cannot call the routine to get a
    // decent error message.
    System.err.println("problem matching token at "+
      cs.getLine()+":"+cs.getCharPositionInLine()+" "+re);
  }


  @Override
  public String getSourceName() {
    return input.getSourceName();
  }


}
Source Code of org.antlr.tool.Interpreter$LexerActionGetTokenType

Related Classes of org.antlr.tool.Interpreter$LexerActionGetTokenType