Source Code of org.antlr.v4.test.TestATNParserPrediction

/*
 * [The "BSD license"]
 *  Copyright (c) 2012 Terence Parr
 *  Copyright (c) 2012 Sam Harwell
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *
 *  1. Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *     derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */


package org.antlr.v4.test;


import org.antlr.v4.Tool;
import org.antlr.v4.automata.ParserATNFactory;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.NoViableAltException;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.LexerATNSimulator;
import org.antlr.v4.runtime.atn.PredictionContextCache;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.misc.IntegerList;
import org.antlr.v4.tool.DOTGenerator;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LexerGrammar;
import org.antlr.v4.tool.Rule;
import org.junit.Test;


import static org.junit.Assert.assertEquals;


  // NOTICE: TOKENS IN LEXER, PARSER MUST BE SAME OR TOKEN TYPE MISMATCH
  // NOTICE: TOKENS IN LEXER, PARSER MUST BE SAME OR TOKEN TYPE MISMATCH
  // NOTICE: TOKENS IN LEXER, PARSER MUST BE SAME OR TOKEN TYPE MISMATCH


public class TestATNParserPrediction extends BaseTest {
  @Test public void testAorB() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n" +
      "A : 'a' ;\n" +
      "B : 'b' ;\n" +
      "C : 'c' ;\n");
    Grammar g = new Grammar(
      "parser grammar T;\n"+
      "a : A{;} | B ;");
    int decision = 0;
    checkPredictedAlt(lg, g, decision, "a", 1);
    checkPredictedAlt(lg, g, decision, "b", 2);


    // After matching these inputs for decision, what is DFA after each prediction?
    String[] inputs = {
      "a",
      "b",
      "a"
    };
    String[] dfa = {
      "s0-'a'->:s1=>1\n",


      "s0-'a'->:s1=>1\n" +
      "s0-'b'->:s2=>2\n",


      "s0-'a'->:s1=>1\n" + // don't change after it works
      "s0-'b'->:s2=>2\n",
    };
    checkDFAConstruction(lg, g, decision, inputs, dfa);
  }


  @Test public void testEmptyInput() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n" +
      "A : 'a' ;\n" +
      "B : 'b' ;\n" +
      "C : 'c' ;\n");
    Grammar g = new Grammar(
      "parser grammar T;\n"+
      "a : A | ;");
    int decision = 0;
    checkPredictedAlt(lg, g, decision, "a", 1);
    checkPredictedAlt(lg, g, decision, "", 2);


    // After matching these inputs for decision, what is DFA after each prediction?
    String[] inputs = {
      "a",
      "",
    };
    String[] dfa = {
      "s0-'a'->:s1=>1\n",


      "s0-EOF->:s2=>2\n" +
      "s0-'a'->:s1=>1\n",
    };
    checkDFAConstruction(lg, g, decision, inputs, dfa);
  }


  @Test public void testPEGAchillesHeel() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n" +
      "A : 'a' ;\n" +
      "B : 'b' ;\n" +
      "C : 'c' ;\n");
    Grammar g = new Grammar(
      "parser grammar T;\n"+
      "a : A | A B ;");
    checkPredictedAlt(lg, g, 0, "a", 1);
    checkPredictedAlt(lg, g, 0, "ab", 2);
    checkPredictedAlt(lg, g, 0, "abc", 2);


    String[] inputs = {
      "a",
      "ab",
      "abc"
    };
    String[] dfa = {
      "s0-'a'->s1\n" +
      "s1-EOF->:s2=>1\n",


      "s0-'a'->s1\n" +
      "s1-EOF->:s2=>1\n" +
      "s1-'b'->:s3=>2\n",


      "s0-'a'->s1\n" +
      "s1-EOF->:s2=>1\n" +
      "s1-'b'->:s3=>2\n"
    };
    checkDFAConstruction(lg, g, 0, inputs, dfa);
  }


  @Test public void testRuleRefxory() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n" +
      "A : 'a' ;\n" +
      "B : 'b' ;\n" +
      "C : 'c' ;\n");
    Grammar g = new Grammar(
      "parser grammar T;\n"+
      "a : x | y ;\n" +
      "x : A ;\n" +
      "y : B ;\n");
    int decision = 0;
    checkPredictedAlt(lg, g, decision, "a", 1);
    checkPredictedAlt(lg, g, decision, "b", 2);


    // After matching these inputs for decision, what is DFA after each prediction?
    String[] inputs = {
      "a",
      "b",
      "a"
    };
    String[] dfa = {
      "s0-'a'->:s1=>1\n",


      "s0-'a'->:s1=>1\n" +
      "s0-'b'->:s2=>2\n",


      "s0-'a'->:s1=>1\n" + // don't change after it works
      "s0-'b'->:s2=>2\n",
    };
    checkDFAConstruction(lg, g, decision, inputs, dfa);
  }


  @Test public void testOptionalRuleChasesGlobalFollow() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n" +
      "A : 'a' ;\n" +
      "B : 'b' ;\n" +
      "C : 'c' ;\n");
    Grammar g = new Grammar(
      "parser grammar T;\n"+
      "tokens {A,B,C}\n" +
      "a : x B ;\n" +
      "b : x C ;\n" +
      "x : A | ;\n");
    int decision = 0;
    checkPredictedAlt(lg, g, decision, "a", 1);
    checkPredictedAlt(lg, g, decision, "b", 2);
    checkPredictedAlt(lg, g, decision, "c", 2);


    // After matching these inputs for decision, what is DFA after each prediction?
    String[] inputs = {
      "a",
      "b",
      "c",
      "c",
    };
    String[] dfa = {
      "s0-'a'->:s1=>1\n",


      "s0-'a'->:s1=>1\n" +
      "s0-'b'->:s2=>2\n",


      "s0-'a'->:s1=>1\n" +
      "s0-'b'->:s2=>2\n" +
      "s0-'c'->:s3=>2\n",


      "s0-'a'->:s1=>1\n" +
      "s0-'b'->:s2=>2\n" +
      "s0-'c'->:s3=>2\n",
    };
    checkDFAConstruction(lg, g, decision, inputs, dfa);
  }


  @Test public void testLL1Ambig() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n" +
      "A : 'a' ;\n" +
      "B : 'b' ;\n" +
      "C : 'c' ;\n");
    Grammar g = new Grammar(
      "parser grammar T;\n"+
      "a : A | A | A B ;");
    int decision = 0;
    checkPredictedAlt(lg, g, decision, "a", 1);
    checkPredictedAlt(lg, g, decision, "ab", 3);


    // After matching these inputs for decision, what is DFA after each prediction?
    String[] inputs = {
      "a",
      "ab",
      "ab"
    };
    String[] dfa = {
      "s0-'a'->s1\n" +
      "s1-EOF->:s2^=>1\n",


      "s0-'a'->s1\n" +
      "s1-EOF->:s2^=>1\n" +
      "s1-'b'->:s3=>3\n",


      "s0-'a'->s1\n" +
      "s1-EOF->:s2^=>1\n" +
      "s1-'b'->:s3=>3\n",
    };
    checkDFAConstruction(lg, g, decision, inputs, dfa);
  }


  @Test public void testLL2Ambig() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n" +
      "A : 'a' ;\n" +
      "B : 'b' ;\n" +
      "C : 'c' ;\n");
    Grammar g = new Grammar(
      "parser grammar T;\n"+
      "a : A B | A B | A B C ;");
    int decision = 0;
    checkPredictedAlt(lg, g, decision, "ab", 1);
    checkPredictedAlt(lg, g, decision, "abc", 3);


    // After matching these inputs for decision, what is DFA after each prediction?
    String[] inputs = {
      "ab",
      "abc",
      "ab"
    };
    String[] dfa = {
      "s0-'a'->s1\n" +
      "s1-'b'->s2\n" +
      "s2-EOF->:s3^=>1\n",


      "s0-'a'->s1\n" +
      "s1-'b'->s2\n" +
      "s2-EOF->:s3^=>1\n" +
      "s2-'c'->:s4=>3\n",


      "s0-'a'->s1\n" +
      "s1-'b'->s2\n" +
      "s2-EOF->:s3^=>1\n" +
      "s2-'c'->:s4=>3\n",
    };
    checkDFAConstruction(lg, g, decision, inputs, dfa);
  }


  @Test public void testRecursiveLeftPrefix() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n" +
      "A : 'a' ;\n" +
      "B : 'b' ;\n" +
      "C : 'c' ;\n" +
      "LP : '(' ;\n" +
      "RP : ')' ;\n" +
      "INT : '0'..'9'+ ;\n"
    );
    Grammar g = new Grammar(
      "parser grammar T;\n"+
      "tokens {A,B,C,LP,RP,INT}\n" +
      "a : e B | e C ;\n" +
      "e : LP e RP\n" +
      "  | INT\n" +
      "  ;");
    int decision = 0;
    checkPredictedAlt(lg, g, decision, "34b", 1);
    checkPredictedAlt(lg, g, decision, "34c", 2);
    checkPredictedAlt(lg, g, decision, "((34))b", 1);
    checkPredictedAlt(lg, g, decision, "((34))c", 2);


    // After matching these inputs for decision, what is DFA after each prediction?
    String[] inputs = {
      "34b",
      "34c",
      "((34))b",
      "((34))c"
    };
    String[] dfa = {
      "s0-INT->s1\n" +
      "s1-'b'->:s2=>1\n",


      "s0-INT->s1\n" +
      "s1-'b'->:s2=>1\n" +
      "s1-'c'->:s3=>2\n",


      "s0-'('->s4\n" +
      "s0-INT->s1\n" +
      "s1-'b'->:s2=>1\n" +
      "s1-'c'->:s3=>2\n" +
      "s4-'('->s5\n" +
      "s5-INT->s6\n" +
      "s6-')'->s7\n" +
      "s7-')'->s1\n",


      "s0-'('->s4\n" +
      "s0-INT->s1\n" +
      "s1-'b'->:s2=>1\n" +
      "s1-'c'->:s3=>2\n" +
      "s4-'('->s5\n" +
      "s5-INT->s6\n" +
      "s6-')'->s7\n" +
      "s7-')'->s1\n",
    };
    checkDFAConstruction(lg, g, decision, inputs, dfa);
  }


  @Test public void testRecursiveLeftPrefixWithAorABIssue() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n" +
      "A : 'a' ;\n" +
      "B : 'b' ;\n" +
      "C : 'c' ;\n" +
      "LP : '(' ;\n" +
      "RP : ')' ;\n" +
      "INT : '0'..'9'+ ;\n"
    );
    Grammar g = new Grammar(
      "parser grammar T;\n"+
      "tokens {A,B,C,LP,RP,INT}\n" +
      "a : e A | e A B ;\n" +
      "e : LP e RP\n" +
      "  | INT\n" +
      "  ;");
    int decision = 0;
    checkPredictedAlt(lg, g, decision, "34a", 1);
    checkPredictedAlt(lg, g, decision, "34ab", 2); // PEG would miss this one!
    checkPredictedAlt(lg, g, decision, "((34))a", 1);
    checkPredictedAlt(lg, g, decision, "((34))ab", 2);


    // After matching these inputs for decision, what is DFA after each prediction?
    String[] inputs = {
      "34a",
      "34ab",
      "((34))a",
      "((34))ab",
    };
    String[] dfa = {
      "s0-INT->s1\n" +
      "s1-'a'->s2\n" +
      "s2-EOF->:s3=>1\n",


      "s0-INT->s1\n" +
      "s1-'a'->s2\n" +
      "s2-EOF->:s3=>1\n" +
      "s2-'b'->:s4=>2\n",


      "s0-'('->s5\n" +
      "s0-INT->s1\n" +
      "s1-'a'->s2\n" +
      "s2-EOF->:s3=>1\n" +
      "s2-'b'->:s4=>2\n" +
      "s5-'('->s6\n" +
      "s6-INT->s7\n" +
      "s7-')'->s8\n" +
      "s8-')'->s1\n",


      "s0-'('->s5\n" +
      "s0-INT->s1\n" +
      "s1-'a'->s2\n" +
      "s2-EOF->:s3=>1\n" +
      "s2-'b'->:s4=>2\n" +
      "s5-'('->s6\n" +
      "s6-INT->s7\n" +
      "s7-')'->s8\n" +
      "s8-')'->s1\n",
    };
    checkDFAConstruction(lg, g, decision, inputs, dfa);
  }


  @Test public void testContinuePrediction() throws Exception {
    // Sam found prev def of ambiguity was too restrictive.
    // E.g., (13, 1, []), (13, 2, []), (12, 2, []) should not
    // be declared ambig since (12, 2, []) can take us to
    // unambig state maybe. keep going.
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n" +
      "ID : 'a'..'z' ;\n" + // one char
      "SEMI : ';' ;\n"+
      "INT : '0'..'9'+ ;\n"
    );
    Grammar g = new Grammar(
      "parser grammar T;\n"+
      "tokens {ID,SEMI,INT}\n" +
      "a : (ID | ID ID?) SEMI ;");
    int decision = 1;
    checkPredictedAlt(lg, g, decision, "a;", 1);
    checkPredictedAlt(lg, g, decision, "ab;", 2);
  }


  @Test public void testContinuePrediction2() throws Exception {
    // ID is ambig for first two alts, but ID SEMI lets us move forward with alt 3
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n" +
      "ID : 'a'..'z' ;\n" + // one char
      "SEMI : ';' ;\n"+
      "INT : '0'..'9'+ ;\n"
    );
    Grammar g = new Grammar(
      "parser grammar T;\n"+
      "tokens {ID,SEMI,INT}\n" +
      "a : ID | ID | ID SEMI ;\n");
    int decision = 0;
    checkPredictedAlt(lg, g, decision, "a", 1);
    checkPredictedAlt(lg, g, decision, "a;", 3);
  }


  /** first check that the ATN predicts right alt.
   *  Then check adaptive prediction.
   */
  public void checkPredictedAlt(LexerGrammar lg, Grammar g, int decision,
                  String inputString, int expectedAlt)
  {
    Tool.internalOption_ShowATNConfigsInDFA = true;
    ATN lexatn = createATN(lg, true);
    LexerATNSimulator lexInterp =
      new LexerATNSimulator(lexatn,new DFA[] { new DFA(lexatn.modeToStartState.get(Lexer.DEFAULT_MODE)) },new PredictionContextCache());
    IntegerList types = getTokenTypesViaATN(inputString, lexInterp);
    System.out.println(types);


    semanticProcess(lg);
    g.importVocab(lg);
    semanticProcess(g);


    ParserATNFactory f = new ParserATNFactory(g);
    ATN atn = f.createATN();


    DOTGenerator dot = new DOTGenerator(g);


    Rule r = g.getRule("a");
    if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
    r = g.getRule("b");
    if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
    r = g.getRule("e");
    if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
    r = g.getRule("ifstat");
    if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
    r = g.getRule("block");
    if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));


    // Check ATN prediction
//    ParserATNSimulator interp = new ParserATNSimulator(atn);
    TokenStream input = new IntTokenStream(types);
    ParserInterpreterForTesting interp = new ParserInterpreterForTesting(g, input);
    int alt = interp.adaptivePredict(input, decision, ParserRuleContext.EMPTY);


    assertEquals(expectedAlt, alt);


    // Check adaptive prediction
    input.seek(0);
    alt = interp.adaptivePredict(input, decision, null);
    assertEquals(expectedAlt, alt);
    // run 2x; first time creates DFA in atn
    input.seek(0);
    alt = interp.adaptivePredict(input, decision, null);
    assertEquals(expectedAlt, alt);
  }


  public void checkDFAConstruction(LexerGrammar lg, Grammar g, int decision,
                   String[] inputString, String[] dfaString)
  {
//    Tool.internalOption_ShowATNConfigsInDFA = true;
    ATN lexatn = createATN(lg, true);
    LexerATNSimulator lexInterp =
      new LexerATNSimulator(lexatn,new DFA[] { new DFA(lexatn.getDecisionState(Lexer.DEFAULT_MODE)) }, new PredictionContextCache());


    semanticProcess(lg);
    g.importVocab(lg);
    semanticProcess(g);


    ParserInterpreterForTesting interp = new ParserInterpreterForTesting(g, null);
    for (int i=0; i<inputString.length; i++) {
      // Check DFA
      IntegerList types = getTokenTypesViaATN(inputString[i], lexInterp);
      System.out.println(types);
      TokenStream input = new IntTokenStream(types);
      try {
        interp.adaptivePredict(input, decision, ParserRuleContext.EMPTY);
      }
      catch (NoViableAltException nvae) {
        nvae.printStackTrace(System.err);
      }
      DFA dfa = interp.parser.decisionToDFA[decision];
      assertEquals(dfaString[i], dfa.toString(g.getVocabulary()));
    }
  }
}
Source Code of org.antlr.v4.test.TestATNParserPrediction

Related Classes of org.antlr.v4.test.TestATNParserPrediction