Package org.antlr.analysis

Source Code of org.antlr.analysis.DFA

/*
* [The "BSD license"]
*  Copyright (c) 2010 Terence Parr
*  All rights reserved.
*
*  Redistribution and use in source and binary forms, with or without
*  modification, are permitted provided that the following conditions
*  are met:
*  1. Redistributions of source code must retain the above copyright
*      notice, this list of conditions and the following disclaimer.
*  2. Redistributions in binary form must reproduce the above copyright
*      notice, this list of conditions and the following disclaimer in the
*      documentation and/or other materials provided with the distribution.
*  3. The name of the author may not be used to endorse or promote products
*      derived from this software without specific prior written permission.
*
*  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
*  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
*  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
*  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
*  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
*  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
*  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
*  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
*  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.analysis;

import org.antlr.codegen.CodeGenerator;
import org.antlr.misc.IntSet;
import org.antlr.misc.IntervalSet;
import org.antlr.misc.Utils;
import org.antlr.runtime.IntStream;
import org.stringtemplate.v4.ST;
import org.antlr.tool.*;

import java.util.*;

/** A DFA (converted from a grammar's NFA).
*  DFAs are used as prediction machine for alternative blocks in all kinds
*  of recognizers (lexers, parsers, tree walkers).
*/
public class DFA {
  public static final int REACHABLE_UNKNOWN = -2;
  public static final int REACHABLE_BUSY = -1; // in process of computing
  public static final int REACHABLE_NO = 0;
  public static final int REACHABLE_YES = 1;

  public static final int CYCLIC_UNKNOWN = -2;
  public static final int CYCLIC_BUSY = -1; // in process of computing
  public static final int CYCLIC_DONE = 0;
 
  /** Prevent explosion of DFA states during conversion. The max number
   *  of states per alt in a single decision's DFA.
  public static final int MAX_STATES_PER_ALT_IN_DFA = 450;
   */

  /** Set to 0 to not terminate early (time in ms) */
  public static int MAX_TIME_PER_DFA_CREATION = 1*1000;

  /** How many edges can each DFA state have before a "special" state
   *  is created that uses IF expressions instead of a table?
   */
  public static int MAX_STATE_TRANSITIONS_FOR_TABLE = 65534;

  /** What's the start state for this DFA? */
    public DFAState startState;

  /** This DFA is being built for which decision? */
  public int decisionNumber = 0;

    /** From what NFAState did we create the DFA? */
    public NFAState decisionNFAStartState;

  /** The printable grammar fragment associated with this DFA */
  public String description;

  /** A set of all uniquely-numbered DFA states.  Maps hash of DFAState
     *  to the actual DFAState object.  We use this to detect
     *  existing DFA states.  Map<DFAState,DFAState>.  Use Map so
   *  we can get old state back (Set only allows you to see if it's there).
   *  Not used during fixed k lookahead as it's a waste to fill it with
   *  a dup of states array.
     */
    protected Map<DFAState, DFAState> uniqueStates = new HashMap<DFAState, DFAState>();

  /** Maps the state number to the actual DFAState.  Use a Vector as it
   *  grows automatically when I set the ith element.  This contains all
   *  states, but the states are not unique.  s3 might be same as s1 so
   *  s3 &rarr; s1 in this table.  This is how cycles occur.  If fixed k,
   *  then these states will all be unique as states[i] always points
   *  at state i when no cycles exist.
   *
   *  This is managed in parallel with uniqueStates and simply provides
   *  a way to go from state number to DFAState rather than via a
   *  hash lookup.
   */
  protected Vector<DFAState> states = new Vector<DFAState>();

  /** Unique state numbers per DFA */
  protected int stateCounter = 0;

  /** count only new states not states that were rejected as already present */
  protected int numberOfStates = 0;

  /** User specified max fixed lookahead.  If 0, nothing specified.  -1
   *  implies we have not looked at the options table yet to set k.
   */
  protected int user_k = -1;

  /** While building the DFA, track max lookahead depth if not cyclic */
  protected int max_k = -1;

    /** Is this DFA reduced?  I.e., can all states lead to an accept state? */
    protected boolean reduced = true;

    /** Are there any loops in this DFA?
   *  Computed by doesStateReachAcceptState()
   */
    protected boolean cyclic = false;

  /** Track whether this DFA has at least one sem/syn pred encountered
   *  during a closure operation.  This is useful for deciding whether
   *  to retry a non-LL(*) with k=1.  If no pred, it will not work w/o
   *  a pred so don't bother.  It would just give another error message.
   */
  public boolean predicateVisible = false;

  public boolean hasPredicateBlockedByAction = false;

  /** Each alt in an NFA derived from a grammar must have a DFA state that
     *  predicts it lest the parser not know what to do.  Nondeterminisms can
     *  lead to this situation (assuming no semantic predicates can resolve
     *  the problem) and when for some reason, I cannot compute the lookahead
     *  (which might arise from an error in the algorithm or from
     *  left-recursion etc...).  This list starts out with all alts contained
     *  and then in method doesStateReachAcceptState() I remove the alts I
     *  know to be uniquely predicted.
     */
    protected List<Integer> unreachableAlts;

  protected int nAlts = 0;

  /** We only want one accept state per predicted alt; track here */
  protected DFAState[] altToAcceptState;

  /** Track whether an alt discovers recursion for each alt during
   *  NFA to DFA conversion; &gt;1 alt with recursion implies nonregular.
   */
  public IntSet recursiveAltSet = new IntervalSet();

  /** Which NFA are we converting (well, which piece of the NFA)? */
    public NFA nfa;

  protected NFAToDFAConverter nfaConverter;

  /** This probe tells you a lot about a decision and is useful even
   *  when there is no error such as when a syntactic nondeterminism
   *  is solved via semantic predicates.  Perhaps a GUI would want
   *  the ability to show that.
   */
  public DecisionProbe probe = new DecisionProbe(this);

  /** Track absolute time of the conversion so we can have a failsafe:
   *  if it takes too long, then terminate.  Assume bugs are in the
   *  analysis engine.
   */
  //protected long conversionStartTime;

  /** Map an edge transition table to a unique set number; ordered so
   *  we can push into the output template as an ordered list of sets
   *  and then ref them from within the transition[][] table.  Like this
   *  for C# target:
   *     public static readonly DFA30_transition0 =
   *       new short[] { 46, 46, -1, 46, 46, -1, -1, -1, -1, -1, -1, -1,...};
   *         public static readonly DFA30_transition1 =
   *       new short[] { 21 };
   *      public static readonly short[][] DFA30_transition = {
   *         DFA30_transition0,
   *         DFA30_transition0,
   *         DFA30_transition1,
   *         ...
   *      };
   */
  public Map<List<Integer>, Integer> edgeTransitionClassMap = new LinkedHashMap<List<Integer>, Integer>();

  /** The unique edge transition class number; every time we see a new
   *  set of edges emanating from a state, we number it so we can reuse
   *  if it's every seen again for another state.  For Java grammar,
   *  some of the big edge transition tables are seen about 57 times.
   */
  protected int edgeTransitionClass =0;

  /* This DFA can be converted to a transition[state][char] table and
   * the following tables are filled by createStateTables upon request.
   * These are injected into the templates for code generation.
   * See March 25, 2006 entry for description:
   *   http://www.antlr.org/blog/antlr3/codegen.tml
   * Often using Vector as can't set ith position in a List and have
   * it extend list size; bizarre.
   */

  /** List of special DFAState objects */
  public List<DFAState> specialStates;
  /** List of ST for special states. */
  public List<ST> specialStateSTs;
  public Vector<Integer> accept;
  public Vector<Integer> eot;
  public Vector<Integer> eof;
  public Vector<Integer> min;
  public Vector<Integer> max;
  public Vector<Integer> special;
  public Vector<Vector<Integer>> transition;
  /** just the Vector&lt;Integer&gt; indicating which unique edge table is at
   *  position i.
   */
  public Vector<Integer> transitionEdgeTables; // not used by java yet
  protected int uniqueCompressedSpecialStateNum = 0;

  /** Which generator to use if we're building state tables */
  protected CodeGenerator generator = null;

  protected DFA() {}

  public DFA(int decisionNumber, NFAState decisionStartState) {
    this.decisionNumber = decisionNumber;
        this.decisionNFAStartState = decisionStartState;
        nfa = decisionStartState.nfa;
        nAlts = nfa.grammar.getNumberOfAltsForDecisionNFA(decisionStartState);
        //setOptions( nfa.grammar.getDecisionOptions(getDecisionNumber()) );
        initAltRelatedInfo();

    //long start = System.currentTimeMillis();
        nfaConverter = new NFAToDFAConverter(this);
    try {
      nfaConverter.convert();

      // figure out if there are problems with decision
      verify();

      if ( !probe.isDeterministic() || probe.analysisOverflowed() ) {
        probe.issueWarnings();
      }

      // must be after verify as it computes cyclic, needed by this routine
      // should be after warnings because early termination or something
      // will not allow the reset to operate properly in some cases.
      resetStateNumbersToBeContiguous();

      //long stop = System.currentTimeMillis();
      //System.out.println("verify cost: "+(int)(stop-start)+" ms");
    }
//    catch (AnalysisTimeoutException at) {
//      probe.reportAnalysisTimeout();
//      if ( !okToRetryDFAWithK1() ) {
//        probe.issueWarnings();
//      }
//    }
    catch (NonLLStarDecisionException nonLL) {
      probe.reportNonLLStarDecision(this);
      // >1 alt recurses, k=* and no auto backtrack nor manual sem/syn
      if ( !okToRetryDFAWithK1() ) {
        probe.issueWarnings();
      }
    }
    }

  /** Walk all states and reset their numbers to be a contiguous sequence
   *  of integers starting from 0.  Only cyclic DFA can have unused positions
   *  in states list.  State i might be identical to a previous state j and
   *  will result in states[i] == states[j].  We don't want to waste a state
   *  number on this.  Useful mostly for code generation in tables.
   *
   *  At the start of this routine, states[i].stateNumber &lt;= i by definition.
   *  If states[50].stateNumber is 50 then a cycle during conversion may
   *  try to add state 103, but we find that an identical DFA state, named
   *  50, already exists, hence, states[103]==states[50] and both have
   *  stateNumber 50 as they point at same object.  Afterwards, the set
   *  of state numbers from all states should represent a contiguous range
   *  from 0..n-1 where n is the number of unique states.
   */
  public void resetStateNumbersToBeContiguous() {
    if ( getUserMaxLookahead()>0 ) {
      // all numbers are unique already; no states are thrown out.
      return;
    }

        // walk list of DFAState objects by state number,
    // setting state numbers to 0..n-1
    int snum=0;
    for (int i = 0; i <= getMaxStateNumber(); i++) {
      DFAState s = getState(i);
            // some states are unused after creation most commonly due to cycles
            // or conflict resolution.
            if ( s==null ) {
                continue;
            }
      // state i is mapped to DFAState with state number set to i originally
      // so if it's less than i, then we renumbered it already; that
      // happens when states have been merged or cycles occurred I think.
      // states[50] will point to DFAState with s50 in it but
      // states[103] might also point at this same DFAState.  Since
      // 50 < 103 then it's already been renumbered as it points downwards.
      boolean alreadyRenumbered = s.stateNumber<i;
      if ( !alreadyRenumbered ) {
        // state i is a valid state, reset it's state number
        s.stateNumber = snum; // rewrite state numbers to be 0..n-1
        snum++;
      }
    }
        if ( snum!=getNumberOfStates() ) {
      ErrorManager.internalError("DFA "+decisionNumber+": "+
        decisionNFAStartState.getDescription()+" num unique states "+getNumberOfStates()+
        "!= num renumbered states "+snum);
    }
  }

  // JAVA-SPECIFIC Accessors!!!!!  It is so impossible to get arrays
  // or even consistently formatted strings acceptable to java that
  // I am forced to build the individual char elements here

  public List<? extends String> getJavaCompressedAccept() { return getRunLengthEncoding(accept); }
  public List<? extends String> getJavaCompressedEOT() { return getRunLengthEncoding(eot); }
  public List<? extends String> getJavaCompressedEOF() { return getRunLengthEncoding(eof); }
  public List<? extends String> getJavaCompressedMin() { return getRunLengthEncoding(min); }
  public List<? extends String> getJavaCompressedMax() { return getRunLengthEncoding(max); }
  public List<? extends String> getJavaCompressedSpecial() { return getRunLengthEncoding(special); }
  public List<List<? extends String>> getJavaCompressedTransition() {
    if ( transition==null || transition.isEmpty() ) {
      return null;
    }
    List<List<? extends String>> encoded = new ArrayList<List<? extends String>>(transition.size());
    // walk Vector<Vector<FormattedInteger>> which is the transition[][] table
    for (int i = 0; i < transition.size(); i++) {
      Vector<Integer> transitionsForState = transition.elementAt(i);
      encoded.add(getRunLengthEncoding(transitionsForState));
    }
    return encoded;
  }

  /** Compress the incoming data list so that runs of same number are
   *  encoded as number,value pair sequences.  3 -1 -1 -1 28 is encoded
   *  as 1 3 3 -1 1 28.  I am pretty sure this is the lossless compression
   *  that GIF files use.  Transition tables are heavily compressed by
   *  this technique.  I got the idea from JFlex http://jflex.de/
   *
   *  Return List&lt;String&gt; where each string is either \xyz for 8bit char
   *  and \uFFFF for 16bit.  Hideous and specific to Java, but it is the
   *  only target bad enough to need it.
   */
  public List<? extends String> getRunLengthEncoding(List<Integer> data) {
    if ( data==null || data.isEmpty() ) {
      // for states with no transitions we want an empty string ""
      // to hold its place in the transitions array.
      List<String> empty = new ArrayList<String>();
      empty.add("");
      return empty;
    }
    int size = Math.max(2,data.size()/2);
    List<String> encoded = new ArrayList<String>(size); // guess at size
    // scan values looking for runs
    int i = 0;
    Integer emptyValue = Utils.integer(-1);
    while ( i < data.size() ) {
      Integer I = data.get(i);
      if ( I==null ) {
        I = emptyValue;
      }
      // count how many v there are?
      int n = 0;
      for (int j = i; j < data.size(); j++) {
        Integer v = data.get(j);
        if ( v==null ) {
          v = emptyValue;
        }
        if ( I.equals(v) ) {
          n++;
        }
        else {
          break;
        }
      }
      encoded.add(generator.target.encodeIntAsCharEscape((char)n));
      encoded.add(generator.target.encodeIntAsCharEscape((char)I.intValue()));
      i+=n;
    }
    return encoded;
  }

  public void createStateTables(CodeGenerator generator) {
    //System.out.println("createTables:\n"+this);
    this.generator = generator;
    description = getNFADecisionStartState().getDescription();
    description =
      generator.target.getTargetStringLiteralFromString(description);

    // create all the tables
    special = new Vector<Integer>(this.getNumberOfStates()); // Vector<short>
    special.setSize(this.getNumberOfStates());
    specialStates = new ArrayList<DFAState>();        // List<DFAState>
    specialStateSTs = new ArrayList<ST>();        // List<ST>
    accept = new Vector<Integer>(this.getNumberOfStates()); // Vector<int>
    accept.setSize(this.getNumberOfStates());
    eot = new Vector<Integer>(this.getNumberOfStates()); // Vector<int>
    eot.setSize(this.getNumberOfStates());
    eof = new Vector<Integer>(this.getNumberOfStates()); // Vector<int>
    eof.setSize(this.getNumberOfStates());
    min = new Vector<Integer>(this.getNumberOfStates()); // Vector<int>
    min.setSize(this.getNumberOfStates());
    max = new Vector<Integer>(this.getNumberOfStates()); // Vector<int>
    max.setSize(this.getNumberOfStates());
    transition = new Vector<Vector<Integer>>(this.getNumberOfStates()); // Vector<Vector<int>>
    transition.setSize(this.getNumberOfStates());
    transitionEdgeTables = new Vector<Integer>(this.getNumberOfStates()); // Vector<int>
    transitionEdgeTables.setSize(this.getNumberOfStates());

    // for each state in the DFA, fill relevant tables.
    Iterator<DFAState> it;
    if ( getUserMaxLookahead()>0 ) {
      it = states.iterator();
    }
    else {
      it = getUniqueStates().values().iterator();
    }
    while ( it.hasNext() ) {
      DFAState s = it.next();
      if ( s==null ) {
        // ignore null states; some acylic DFA see this condition
        // when inlining DFA (due to lacking of exit branch pruning?)
        continue;
      }
      if ( s.isAcceptState() ) {
        // can't compute min,max,special,transition on accepts
        accept.set(s.stateNumber,
               Utils.integer(s.getUniquelyPredictedAlt()));
      }
      else {
        createMinMaxTables(s);
        createTransitionTableEntryForState(s);
        createSpecialTable(s);
        createEOTAndEOFTables(s);
      }
    }

    // now that we have computed list of specialStates, gen code for 'em
    for (int i = 0; i < specialStates.size(); i++) {
      DFAState ss = specialStates.get(i);
      ST stateST =
        generator.generateSpecialState(ss);
      specialStateSTs.add(stateST);
    }

    // check that the tables are not messed up by encode/decode
    /*
    testEncodeDecode(min);
    testEncodeDecode(max);
    testEncodeDecode(accept);
    testEncodeDecode(special);
    System.out.println("min="+min);
    System.out.println("max="+max);
    System.out.println("eot="+eot);
    System.out.println("eof="+eof);
    System.out.println("accept="+accept);
    System.out.println("special="+special);
    System.out.println("transition="+transition);
    */
  }

  /*
  private void testEncodeDecode(List data) {
    System.out.println("data="+data);
    List encoded = getRunLengthEncoding(data);
    StringBuffer buf = new StringBuffer();
    for (int i = 0; i < encoded.size(); i++) {
      String I = (String)encoded.get(i);
      int v = 0;
      if ( I.startsWith("\\u") ) {
        v = Integer.parseInt(I.substring(2,I.length()), 16);
      }
      else {
        v = Integer.parseInt(I.substring(1,I.length()), 8);
      }
      buf.append((char)v);
    }
    String encodedS = buf.toString();
    short[] decoded = org.antlr.runtime.DFA.unpackEncodedString(encodedS);
    //System.out.println("decoded:");
    for (int i = 0; i < decoded.length; i++) {
      short x = decoded[i];
      if ( x!=((Integer)data.get(i)).intValue() ) {
        System.err.println("problem with encoding");
      }
      //System.out.print(", "+x);
    }
    //System.out.println();
  }
  */

  protected void createMinMaxTables(DFAState s) {
    int smin = Label.MAX_CHAR_VALUE + 1;
    int smax = Label.MIN_ATOM_VALUE - 1;
    for (int j = 0; j < s.getNumberOfTransitions(); j++) {
      Transition edge = s.transition(j);
      Label label = edge.label;
      if ( label.isAtom() ) {
        if ( label.getAtom()>=Label.MIN_CHAR_VALUE ) {
          if ( label.getAtom()<smin ) {
            smin = label.getAtom();
          }
          if ( label.getAtom()>smax ) {
            smax = label.getAtom();
          }
        }
      }
      else if ( label.isSet() ) {
        IntervalSet labels = (IntervalSet)label.getSet();
        int lmin = labels.getMinElement();
        // if valid char (don't do EOF) and less than current min
        if ( lmin<smin && lmin>=Label.MIN_CHAR_VALUE ) {
          smin = labels.getMinElement();
        }
        if ( labels.getMaxElement()>smax ) {
          smax = labels.getMaxElement();
        }
      }
    }

    if ( smax<0 ) {
      // must be predicates or pure EOT transition; just zero out min, max
      smin = Label.MIN_CHAR_VALUE;
      smax = Label.MIN_CHAR_VALUE;
    }

    min.set(s.stateNumber, Utils.integer((char)smin));
    max.set(s.stateNumber, Utils.integer((char)smax));

    if ( smax<0 || smin>Label.MAX_CHAR_VALUE || smin<0 ) {
      ErrorManager.internalError("messed up: min="+min+", max="+max);
    }
  }

  protected void createTransitionTableEntryForState(DFAState s) {
    /*
    System.out.println("createTransitionTableEntryForState s"+s.stateNumber+
      " dec "+s.dfa.decisionNumber+" cyclic="+s.dfa.isCyclic());
      */
    int smax = max.get(s.stateNumber);
    int smin = min.get(s.stateNumber);

    Vector<Integer> stateTransitions = new Vector<Integer>(smax-smin+1);
    stateTransitions.setSize(smax-smin+1);
    transition.set(s.stateNumber, stateTransitions);
    for (int j = 0; j < s.getNumberOfTransitions(); j++) {
      Transition edge = s.transition(j);
      Label label = edge.label;
      if ( label.isAtom() && label.getAtom()>=Label.MIN_CHAR_VALUE ) {
        int labelIndex = label.getAtom()-smin; // offset from 0
        stateTransitions.set(labelIndex,
                   Utils.integer(edge.target.stateNumber));
      }
      else if ( label.isSet() ) {
        IntervalSet labels = (IntervalSet)label.getSet();
        int[] atoms = labels.toArray();
        for (int a = 0; a < atoms.length; a++) {
          // set the transition if the label is valid (don't do EOF)
          if ( atoms[a]>=Label.MIN_CHAR_VALUE ) {
            int labelIndex = atoms[a]-smin; // offset from 0
            stateTransitions.set(labelIndex,
                       Utils.integer(edge.target.stateNumber));
          }
        }
      }
    }
    // track unique state transition tables so we can reuse
    Integer edgeClass = edgeTransitionClassMap.get(stateTransitions);
    if ( edgeClass!=null ) {
      //System.out.println("we've seen this array before; size="+stateTransitions.size());
      transitionEdgeTables.set(s.stateNumber, edgeClass);
    }
    else {
      edgeClass = Utils.integer(edgeTransitionClass);
      transitionEdgeTables.set(s.stateNumber, edgeClass);
      edgeTransitionClassMap.put(stateTransitions, edgeClass);
      edgeTransitionClass++;
    }
  }

  /** Set up the EOT and EOF tables; we cannot put -1 min/max values so
   *  we need another way to test that in the DFA transition function.
   */
  protected void createEOTAndEOFTables(DFAState s) {
    for (int j = 0; j < s.getNumberOfTransitions(); j++) {
      Transition edge = s.transition(j);
      Label label = edge.label;
      if ( label.isAtom() ) {
        if ( label.getAtom()==Label.EOT ) {
          // eot[s] points to accept state
          eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
        }
        else if ( label.getAtom()==Label.EOF ) {
          // eof[s] points to accept state
          eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
        }
      }
      else if ( label.isSet() ) {
        IntervalSet labels = (IntervalSet)label.getSet();
        int[] atoms = labels.toArray();
        for (int a = 0; a < atoms.length; a++) {
          if ( atoms[a]==Label.EOT ) {
            // eot[s] points to accept state
            eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
          }
          else if ( atoms[a]==Label.EOF ) {
            eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
          }
        }
      }
    }
  }

  protected void createSpecialTable(DFAState s) {
    // number all special states from 0...n-1 instead of their usual numbers
    boolean hasSemPred = false;

    // TODO this code is very similar to canGenerateSwitch.  Refactor to share
    for (int j = 0; j < s.getNumberOfTransitions(); j++) {
      Transition edge = s.transition(j);
      Label label = edge.label;
      // can't do a switch if the edges have preds or are going to
      // require gated predicates
      if ( label.isSemanticPredicate() ||
         ((DFAState)edge.target).getGatedPredicatesInNFAConfigurations()!=null)
      {
        hasSemPred = true;
        break;
      }
    }
    // if has pred or too big for table, make it special
    int smax = max.get(s.stateNumber);
    int smin = min.get(s.stateNumber);
    if ( hasSemPred || smax-smin>MAX_STATE_TRANSITIONS_FOR_TABLE ) {
      special.set(s.stateNumber,
            Utils.integer(uniqueCompressedSpecialStateNum));
      uniqueCompressedSpecialStateNum++;
      specialStates.add(s);
    }
    else {
      special.set(s.stateNumber, Utils.integer(-1)); // not special
    }
  }

  public int predict(IntStream input) {
    Interpreter interp = new Interpreter(nfa.grammar, input);
    return interp.predict(this);
  }

  /** Add a new DFA state to this DFA if not already present.
     *  To force an acyclic, fixed maximum depth DFA, just always
   *  return the incoming state.  By not reusing old states,
   *  no cycles can be created.  If we're doing fixed k lookahead
   *  don't updated uniqueStates, just return incoming state, which
   *  indicates it's a new state.
     */
    protected DFAState addState(DFAState d) {
    if ( getUserMaxLookahead()>0 ) {
      return d;
    }
    // does a DFA state exist already with everything the same
    // except its state number?
    DFAState existing = uniqueStates.get(d);
    if ( existing != null ) {
            /*
            System.out.println("state "+d.stateNumber+" exists as state "+
                existing.stateNumber);
                */
            // already there...get the existing DFA state
      return existing;
    }

    // if not there, then add new state.
    uniqueStates.put(d,d);
        numberOfStates++;
    return d;
  }

  public void removeState(DFAState d) {
    DFAState it = uniqueStates.remove(d);
    if ( it!=null ) {
      numberOfStates--;
    }
  }

  public Map<DFAState, DFAState> getUniqueStates() {
    return uniqueStates;
  }

  /** What is the max state number ever created?  This may be beyond
   *  getNumberOfStates().
   */
  public int getMaxStateNumber() {
    return states.size()-1;
  }

  public DFAState getState(int stateNumber) {
    return states.get(stateNumber);
  }

  public void setState(int stateNumber, DFAState d) {
    states.set(stateNumber, d);
  }

  /** Is the DFA reduced?  I.e., does every state have a path to an accept
     *  state?  If not, don't delete as we need to generate an error indicating
     *  which paths are "dead ends".  Also tracks list of alts with no accept
     *  state in the DFA.  Must call verify() first before this makes sense.
     */
    public boolean isReduced() {
        return reduced;
    }

    /** Is this DFA cyclic?  That is, are there any loops?  If not, then
     *  the DFA is essentially an LL(k) predictor for some fixed, max k value.
     *  We can build a series of nested IF statements to match this.  In the
     *  presence of cycles, we need to build a general DFA and interpret it
     *  to distinguish between alternatives.
     */
    public boolean isCyclic() {
        return cyclic && getUserMaxLookahead()==0;
    }

  public boolean isClassicDFA() {
    return !isCyclic() &&
         !nfa.grammar.decisionsWhoseDFAsUsesSemPreds.contains(this) &&
         !nfa.grammar.decisionsWhoseDFAsUsesSynPreds.contains(this);
  }

  public boolean canInlineDecision() {
    return !isCyclic() &&
        !probe.isNonLLStarDecision() &&
      getNumberOfStates() < CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE;
  }

  /** Is this DFA derived from the NFA for the Tokens rule? */
  public boolean isTokensRuleDecision() {
    if ( nfa.grammar.type!=Grammar.LEXER ) {
      return false;
    }
    NFAState nfaStart = getNFADecisionStartState();
    Rule r = nfa.grammar.getLocallyDefinedRule(Grammar.ARTIFICIAL_TOKENS_RULENAME);
    NFAState TokensRuleStart = r.startState;
    NFAState TokensDecisionStart =
      (NFAState)TokensRuleStart.transition[0].target;
    return nfaStart == TokensDecisionStart;
  }

  /** The user may specify a max, acyclic lookahead for any decision.  No
   *  DFA cycles are created when this value, k, is greater than 0.
   *  If this decision has no k lookahead specified, then try the grammar.
   */
  public int getUserMaxLookahead() {
    if ( user_k>=0 ) { // cache for speed
      return user_k;
    }
    user_k = nfa.grammar.getUserMaxLookahead(decisionNumber);
    return user_k;
  }

  public boolean getAutoBacktrackMode() {
    return nfa.grammar.getAutoBacktrackMode(decisionNumber);
  }

  public void setUserMaxLookahead(int k) {
    this.user_k = k;
  }

  /** Return k if decision is LL(k) for some k else return max int
     */
  public int getMaxLookaheadDepth() {
    if ( hasCycle() ) return Integer.MAX_VALUE;
    // compute to be sure
    return _getMaxLookaheadDepth(startState, 0);
  }

  int _getMaxLookaheadDepth(DFAState d, int depth) {
    // not cyclic; don't worry about termination
    // fail if pred edge.
    int max = depth;
    for (int i=0; i<d.getNumberOfTransitions(); i++) {
      Transition t = d.transition(i);
//      if ( t.isSemanticPredicate() ) return Integer.MAX_VALUE;
      if ( !t.isSemanticPredicate() ) {
        // if pure pred not gated, it must target stop state; don't count
        DFAState edgeTarget = (DFAState)t.target;
        int m = _getMaxLookaheadDepth(edgeTarget, depth+1);
        max = Math.max(max, m);
      }
    }
    return max;
  }

  /** Count all disambiguating syn preds (ignore synpred tests
   *  for gated edges, which occur for nonambig input sequences).
   *  E.g.,
   *  x  : (X)=&gt; (X|Y)\n" +
   *     | X\n" +
   *     ;
   *
   *  gives
   *
   * .s0-X-&gt;.s1
   * .s0-Y&amp;&amp;{synpred1_t}?-&gt;:s2=&gt;1
   * .s1-{synpred1_t}?-&gt;:s2=&gt;1
   * .s1-{true}?-&gt;:s3=&gt;2
   */
  public boolean hasSynPred() {
    boolean has = _hasSynPred(startState, new HashSet<DFAState>());
//    if ( !has ) {
//      System.out.println("no synpred in dec "+decisionNumber);
//      FASerializer serializer = new FASerializer(nfa.grammar);
//      String result = serializer.serialize(startState);
//      System.out.println(result);
//    }
    return has;
  }

  public boolean getHasSynPred() { return hasSynPred(); } // for ST 

  boolean _hasSynPred(DFAState d, Set<DFAState> busy) {
    busy.add(d);
    for (int i=0; i<d.getNumberOfTransitions(); i++) {
      Transition t = d.transition(i);
      if ( t.isSemanticPredicate() ) {
        SemanticContext ctx = t.label.getSemanticContext();
//        if ( ctx.toString().indexOf("synpred")>=0 ) {
//          System.out.println("has pred "+ctx.toString()+" "+ctx.isSyntacticPredicate());
//          System.out.println(((SemanticContext.Predicate)ctx).predicateAST.token);
//        }
        if ( ctx.isSyntacticPredicate() ) return true;
      }
      DFAState edgeTarget = (DFAState)t.target;
      if ( !busy.contains(edgeTarget) && _hasSynPred(edgeTarget, busy) ) return true;
    }

    return false;
  }

  public boolean hasSemPred() { // has user-defined sempred
    boolean has = _hasSemPred(startState, new HashSet<DFAState>());
    return has;
  }

  boolean _hasSemPred(DFAState d, Set<DFAState> busy) {
    busy.add(d);
    for (int i=0; i<d.getNumberOfTransitions(); i++) {
      Transition t = d.transition(i);
      if ( t.isSemanticPredicate() ) {
        SemanticContext ctx = t.label.getSemanticContext();
        if ( ctx.hasUserSemanticPredicate() ) return true;
      }
      DFAState edgeTarget = (DFAState)t.target;
      if ( !busy.contains(edgeTarget) && _hasSemPred(edgeTarget, busy) ) return true;
    }

    return false;
  }

  /** Compute cyclic w/o relying on state computed during analysis. just check. */
  public boolean hasCycle() {
    boolean cyclic = _hasCycle(startState, new HashMap<DFAState, Integer>());
    return cyclic;
  }

  boolean _hasCycle(DFAState d, Map<DFAState, Integer> busy) {
    busy.put(d, CYCLIC_BUSY);
    for (int i=0; i<d.getNumberOfTransitions(); i++) {
      Transition t = d.transition(i);
      DFAState target = (DFAState)t.target;
      int cond = CYCLIC_UNKNOWN;
      if ( busy.get(target)!=null ) cond = busy.get(target);
      if ( cond==CYCLIC_BUSY ) return true;
      if ( cond!=CYCLIC_DONE && _hasCycle(target, busy) ) return true;
    }
    busy.put(d, CYCLIC_DONE);
    return false;
  }


    /** Return a list of Integer alt numbers for which no lookahead could
     *  be computed or for which no single DFA accept state predicts those
     *  alts.  Must call verify() first before this makes sense.
     */
    public List<Integer> getUnreachableAlts() {
        return unreachableAlts;
    }

  /** Once this DFA has been built, need to verify that:
   *
   *  1. it's reduced
   *  2. all alts have an accept state
   *
   *  Elsewhere, in the NFA converter, we need to verify that:
   *
   *  3. alts i and j have disjoint lookahead if no sem preds
   *  4. if sem preds, nondeterministic alts must be sufficiently covered
   *
   *  This is avoided if analysis bails out for any reason.
   */
  public void verify() {
    doesStateReachAcceptState(startState);
  }

    /** figure out if this state eventually reaches an accept state and
     *  modify the instance variable 'reduced' to indicate if we find
     *  at least one state that cannot reach an accept state.  This implies
     *  that the overall DFA is not reduced.  This algorithm should be
     *  linear in the number of DFA states.
     *
     *  The algorithm also tracks which alternatives have no accept state,
     *  indicating a nondeterminism.
   *
   *  Also computes whether the DFA is cyclic.
   *
     *  TODO: I call getUniquelyPredicatedAlt too much; cache predicted alt
     */
    protected boolean doesStateReachAcceptState(DFAState d) {
    if ( d.isAcceptState() ) {
            // accept states have no edges emanating from them so we can return
            d.setAcceptStateReachable(REACHABLE_YES);
            // this alt is uniquely predicted, remove from nondeterministic list
            int predicts = d.getUniquelyPredictedAlt();
            unreachableAlts.remove(Utils.integer(predicts));
            return true;
        }

        // avoid infinite loops
        d.setAcceptStateReachable(REACHABLE_BUSY);

        boolean anEdgeReachesAcceptState = false;
        // Visit every transition, track if at least one edge reaches stop state
    // Cannot terminate when we know this state reaches stop state since
    // all transitions must be traversed to set status of each DFA state.
    for (int i=0; i<d.getNumberOfTransitions(); i++) {
            Transition t = d.transition(i);
            DFAState edgeTarget = (DFAState)t.target;
            int targetStatus = edgeTarget.getAcceptStateReachable();
            if ( targetStatus==REACHABLE_BUSY ) { // avoid cycles; they say nothing
                cyclic = true;
                continue;
            }
            if ( targetStatus==REACHABLE_YES ) { // avoid unnecessary work
                anEdgeReachesAcceptState = true;
                continue;
            }
            if ( targetStatus==REACHABLE_NO ) {  // avoid unnecessary work
                continue;
            }
      // target must be REACHABLE_UNKNOWN (i.e., unvisited)
            if ( doesStateReachAcceptState(edgeTarget) ) {
                anEdgeReachesAcceptState = true;
                // have to keep looking so don't break loop
                // must cover all states even if we find a path for this state
            }
        }
        if ( anEdgeReachesAcceptState ) {
            d.setAcceptStateReachable(REACHABLE_YES);
        }
        else {
            d.setAcceptStateReachable(REACHABLE_NO);
      reduced = false;
        }
        return anEdgeReachesAcceptState;
    }

  /** Walk all accept states and find the manually-specified synpreds.
   *  Gated preds are not always hoisted
   *  I used to do this in the code generator, but that is too late.
   *  This converter tries to avoid computing DFA for decisions in
   *  syntactic predicates that are not ever used such as those
   *  created by autobacktrack mode.
   */
  public void findAllGatedSynPredsUsedInDFAAcceptStates() {
    int nAlts = getNumberOfAlts();
    for (int i=1; i<=nAlts; i++) {
      DFAState a = getAcceptState(i);
      //System.out.println("alt "+i+": "+a);
      if ( a!=null ) {
        Set<? extends SemanticContext> synpreds = a.getGatedSyntacticPredicatesInNFAConfigurations();
        if ( synpreds!=null ) {
          // add all the predicates we find (should be just one, right?)
          for (SemanticContext semctx : synpreds) {
            // System.out.println("synpreds: "+semctx);
            nfa.grammar.synPredUsedInDFA(this, semctx);
          }
        }
      }
    }
  }

  public NFAState getNFADecisionStartState() {
        return decisionNFAStartState;
    }

  public DFAState getAcceptState(int alt) {
    return altToAcceptState[alt];
  }

  public void setAcceptState(int alt, DFAState acceptState) {
    altToAcceptState[alt] = acceptState;
  }

  public String getDescription() {
    return description;
  }

  public int getDecisionNumber() {
        return decisionNFAStartState.getDecisionNumber();
    }

  /** If this DFA failed to finish during construction, we might be
   *  able to retry with k=1 but we need to know whether it will
   *  potentially succeed.  Can only succeed if there is a predicate
   *  to resolve the issue.  Don't try if k=1 already as it would
   *  cycle forever.  Timeout can retry with k=1 even if no predicate
   *  if k!=1.
   */
  public boolean okToRetryDFAWithK1() {
    boolean nonLLStarOrOverflowAndPredicateVisible =
      (probe.isNonLLStarDecision()||probe.analysisOverflowed()) &&
        predicateVisible; // auto backtrack or manual sem/syn
    return getUserMaxLookahead()!=1 &&
       nonLLStarOrOverflowAndPredicateVisible;
  }

  public String getReasonForFailure() {
    StringBuilder buf = new StringBuilder();
    if ( probe.isNonLLStarDecision() ) {
      buf.append("non-LL(*)");
      if ( predicateVisible ) {
        buf.append(" && predicate visible");
      }
    }
    if ( probe.analysisOverflowed() ) {
      buf.append("recursion overflow");
      if ( predicateVisible ) {
        buf.append(" && predicate visible");
      }
    }
    buf.append("\n");
    return buf.toString();
  }

  /** What GrammarAST node (derived from the grammar) is this DFA
     *  associated with?  It will point to the start of a block or
     *  the loop back of a (...)+ block etc...
     */
    public GrammarAST getDecisionASTNode() {
        return decisionNFAStartState.associatedASTNode;
    }

    public boolean isGreedy() {
    GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decisionNumber);
    Object v = nfa.grammar.getBlockOption(blockAST,"greedy");
    if ( v!=null && v.equals("false") ) {
      return false;
    }
        return true;

  }

    public DFAState newState() {
        DFAState n = new DFAState(this);
        n.stateNumber = stateCounter;
        stateCounter++;
    states.setSize(n.stateNumber+1);
    states.set(n.stateNumber, n); // track state num to state
        return n;
    }

  public int getNumberOfStates() {
    if ( getUserMaxLookahead()>0 ) {
      // if using fixed lookahead then uniqueSets not set
      return states.size();
    }
    return numberOfStates;
  }

  public int getNumberOfAlts() {
    return nAlts;
  }

//  public boolean analysisTimedOut() {
//    return probe.analysisTimedOut();
//  }

    protected void initAltRelatedInfo() {
        unreachableAlts = new LinkedList<Integer>();
        for (int i = 1; i <= nAlts; i++) {
            unreachableAlts.add(Utils.integer(i));
        }
    altToAcceptState = new DFAState[nAlts+1];
    }

  @Override
  public String toString() {
    FASerializer serializer = new FASerializer(nfa.grammar);
    if ( startState==null ) {
      return "";
    }
    return serializer.serialize(startState, false);
  }

  /** EOT (end of token) is a label that indicates when the DFA conversion
   *  algorithm would "fall off the end of a lexer rule".  It normally
   *  means the default clause.  So for ('a'..'z')+ you would see a DFA
   *  with a state that has a..z and EOT emanating from it.  a..z would
   *  jump to a state predicting alt 1 and EOT would jump to a state
   *  predicting alt 2 (the exit loop branch).  EOT implies anything other
   *  than a..z.  If for some reason, the set is "all char" such as with
   *  the wildcard '.', then EOT cannot match anything.  For example,
   *
   *     BLOCK : '{' (.)* '}'
   *
   *  consumes all char until EOF when greedy=true.  When all edges are
   *  combined for the DFA state after matching '}', you will find that
   *  it is all char.  The EOT transition has nothing to match and is
   *  unreachable.  The findNewDFAStatesAndAddDFATransitions() method
   *  must know to ignore the EOT, so we simply remove it from the
   *  reachable labels.  Later analysis will find that the exit branch
   *  is not predicted by anything.  For greedy=false, we leave only
   *  the EOT label indicating that the DFA should stop immediately
   *  and predict the exit branch. The reachable labels are often a
   *  set of disjoint values like: [<EOT>, 42, {0..41, 43..65534}]
   *  due to DFA conversion so must construct a pure set to see if
   *  it is same as Label.ALLCHAR.
   *
   *  Only do this for Lexers.
   *
   *  If EOT coexists with ALLCHAR:
   *  1. If not greedy, modify the labels parameter to be EOT
   *  2. If greedy, remove EOT from the labels set
  protected boolean reachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels)
  {
    Label eot = new Label(Label.EOT);
    if ( !labels.containsKey(eot) ) {
      return false;
    }
    System.out.println("### contains EOT");
    boolean containsAllChar = false;
    IntervalSet completeVocab = new IntervalSet();
    int n = labels.size();
    for (int i=0; i<n; i++) {
      Label rl = (Label)labels.get(i);
      if ( !rl.equals(eot) ) {
        completeVocab.addAll(rl.getSet());
      }
    }
    System.out.println("completeVocab="+completeVocab);
    if ( completeVocab.equals(Label.ALLCHAR) ) {
      System.out.println("all char");
      containsAllChar = true;
    }
    return containsAllChar;
  }
   */
}
 
TOP

Related Classes of org.antlr.analysis.DFA

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.