Package org.antlr.v4.test

Source Code of org.antlr.v4.test.TestATNLexerInterpreter

/*
* [The "BSD license"]
*  Copyright (c) 2012 Terence Parr
*  Copyright (c) 2012 Sam Harwell
*  All rights reserved.
*
*  Redistribution and use in source and binary forms, with or without
*  modification, are permitted provided that the following conditions
*  are met:
*
*  1. Redistributions of source code must retain the above copyright
*     notice, this list of conditions and the following disclaimer.
*  2. Redistributions in binary form must reproduce the above copyright
*     notice, this list of conditions and the following disclaimer in the
*     documentation and/or other materials provided with the distribution.
*  3. The name of the author may not be used to endorse or promote products
*     derived from this software without specific prior written permission.
*
*  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
*  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
*  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
*  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
*  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
*  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
*  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
*  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
*  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package org.antlr.v4.test;

import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.misc.Utils;
import org.antlr.v4.tool.DOTGenerator;
import org.antlr.v4.tool.LexerGrammar;
import org.junit.Test;

import java.util.List;

import static org.junit.Assert.*;

/**
* Lexer rules are little quirky when it comes to wildcards. Problem
* stems from the fact that we want the longest match to win among
* several rules and even within a rule. However, that conflicts
* with the notion of non-greedy, which by definition tries to match
* the fewest possible. During ATN construction, non-greedy loops
* have their entry and exit branches reversed so that the ATN
* simulator will see the exit branch 1st, giving it a priority. The
* 1st path to the stop state kills any other paths for that rule
* that begin with the wildcard. In general, this does everything we
* want, but occasionally there are some quirks as you'll see from
* the tests below.
*/
public class TestATNLexerInterpreter extends BaseTest {
  @Test public void testLexerTwoRules() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "A : 'a' ;\n" +
      "B : 'b' ;\n");
    String expecting = "A, B, A, B, EOF";
    checkLexerMatches(lg, "abab", expecting);
  }

  @Test public void testShortLongRule() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "A : 'xy'\n" +
      "  | 'xyz'\n" + // this alt is preferred since there are no non-greedy configs
      "  ;\n" +
      "Z : 'z'\n" +
      "  ;\n");
    checkLexerMatches(lg, "xy", "A, EOF");
    checkLexerMatches(lg, "xyz", "A, EOF");
  }

  @Test public void testShortLongRule2() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "A : 'xyz'\n" // make sure nongreedy mech cut off doesn't kill this alt
      "  | 'xy'\n" +
      "  ;\n");
    checkLexerMatches(lg, "xy", "A, EOF");
    checkLexerMatches(lg, "xyz", "A, EOF");
  }

  @Test public void testWildOnEndFirstAlt() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "A : 'xy' .\n" + // should pursue '.' since xyz hits stop first, before 2nd alt
      "  | 'xy'\n" +
      "  ;\n" +
      "Z : 'z'\n" +
      "  ;\n");
    checkLexerMatches(lg, "xy", "A, EOF");
    checkLexerMatches(lg, "xyz", "A, EOF");
  }

  @Test public void testWildOnEndLastAlt() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "A : 'xy'\n" +
      "  | 'xy' .\n" // this alt is preferred since there are no non-greedy configs
      "  ;\n" +
      "Z : 'z'\n" +
      "  ;\n");
    checkLexerMatches(lg, "xy", "A, EOF");
    checkLexerMatches(lg, "xyz", "A, EOF");
  }

  @Test public void testWildcardNonQuirkWhenSplitBetweenTwoRules() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "A : 'xy' ;\n" +
      "B : 'xy' . 'z' ;\n");
    checkLexerMatches(lg, "xy", "A, EOF");
    checkLexerMatches(lg, "xyqz", "B, EOF");
  }

  @Test public void testLexerLoops() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "INT : '0'..'9'+ ;\n" +
      "ID : 'a'..'z'+ ;\n");
    String expecting = "ID, INT, ID, INT, EOF";
    checkLexerMatches(lg, "a34bde3", expecting);
  }

  @Test public void testLexerNotSet() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "ID : ~('a'|'b')\n ;");
    String expecting = "ID, EOF";
    checkLexerMatches(lg, "c", expecting);
  }

  @Test public void testLexerKeywordIDAmbiguity() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "KEND : 'end' ;\n" +
      "ID : 'a'..'z'+ ;\n" +
      "WS : (' '|'\\n')+ ;");
    String expecting = "ID, EOF";
    //checkLexerMatches(lg, "e", expecting);
    expecting = "KEND, EOF";
    checkLexerMatches(lg, "end", expecting);
    expecting = "ID, EOF";
    checkLexerMatches(lg, "ending", expecting);
    expecting = "ID, WS, KEND, WS, ID, EOF";
    checkLexerMatches(lg, "a end bcd", expecting);
  }

  @Test public void testLexerRuleRef() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "INT : DIGIT+ ;\n" +
      "fragment DIGIT : '0'..'9' ;\n" +
      "WS : (' '|'\\n')+ ;");
    String expecting = "INT, WS, INT, EOF";
    checkLexerMatches(lg, "32 99", expecting);
  }

  @Test public void testRecursiveLexerRuleRef() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "CMT : '/*' (CMT | ~'*')+ '*/' ;\n" +
      "WS : (' '|'\\n')+ ;");
    String expecting = "CMT, WS, CMT, EOF";
    checkLexerMatches(lg, "/* ick */\n/* /*nested*/ */", expecting);
  }

  @Test public void testRecursiveLexerRuleRefWithWildcard() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "CMT : '/*' (CMT | .)*? '*/' ;\n" +
      "WS : (' '|'\\n')+ ;");

    String expecting = "CMT, WS, CMT, WS, EOF";
    checkLexerMatches(lg,
              "/* ick */\n" +
              "/* /* */\n" +
              "/* /*nested*/ */\n",
              expecting);
  }

  @Test public void testLexerWildcardGreedyLoopByDefault() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "CMT : '//' .* '\\n' ;\n");
    String expecting = "CMT, EOF";
    checkLexerMatches(lg, "//x\n//y\n", expecting);
  }

  @Test public void testLexerWildcardLoopExplicitNonGreedy() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "CMT : '//' .*? '\\n' ;\n");
    String expecting = "CMT, CMT, EOF";
    checkLexerMatches(lg, "//x\n//y\n", expecting);
  }

  @Test public void testLexerEscapeInString() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "STR : '[' ('~' ']' | .)* ']' ;\n");
    checkLexerMatches(lg, "[a~]b]", "STR, EOF");
    checkLexerMatches(lg, "[a]", "STR, EOF");
  }

  @Test public void testLexerWildcardGreedyPlusLoopByDefault() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "CMT : '//' .+ '\\n' ;\n");
    String expecting = "CMT, EOF";
    checkLexerMatches(lg, "//x\n//y\n", expecting);
  }

  @Test public void testLexerWildcardExplicitNonGreedyPlusLoop() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "CMT : '//' .+? '\\n' ;\n");
    String expecting = "CMT, CMT, EOF";
    checkLexerMatches(lg, "//x\n//y\n", expecting);
  }

  // does not fail since ('*/')? can't match and have rule succeed
  @Test public void testLexerGreedyOptionalShouldWorkAsWeExpect() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "CMT : '/*' ('*/')? '*/' ;\n");
    String expecting = "CMT, EOF";
    checkLexerMatches(lg, "/**/", expecting);
  }

  @Test public void testGreedyBetweenRules() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "A : '<a>' ;\n" +
      "B : '<' .+ '>' ;\n");
    String expecting = "B, EOF";
    checkLexerMatches(lg, "<a><x>", expecting);
  }

  @Test public void testNonGreedyBetweenRules() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "A : '<a>' ;\n" +
      "B : '<' .+? '>' ;\n");
    String expecting = "A, B, EOF";
    checkLexerMatches(lg, "<a><x>", expecting);
  }

  @Test public void testEOFAtEndOfLineComment() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "CMT : '//' ~('\\n')* ;\n");
    String expecting = "CMT, EOF";
    checkLexerMatches(lg, "//x", expecting);
  }

  @Test public void testEOFAtEndOfLineComment2() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "CMT : '//' ~('\\n'|'\\r')* ;\n");
    String expecting = "CMT, EOF";
    checkLexerMatches(lg, "//x", expecting);
  }

  /** only positive sets like (EOF|'\n') can match EOF and not in wildcard or ~foo sets
   *  EOF matches but does not advance cursor.
   */
  @Test public void testEOFInSetAtEndOfLineComment() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "CMT : '//' .* (EOF|'\\n') ;\n");
    String expecting = "CMT, EOF";
    checkLexerMatches(lg, "//", expecting);
  }

  @Test public void testEOFSuffixInSecondRule() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "A : 'a' ;\n"+ // shorter than 'a' EOF, despite EOF being 0 width
      "B : 'a' EOF ;\n");
    String expecting = "B, EOF";
    checkLexerMatches(lg, "a", expecting);
  }

  @Test public void testEOFSuffixInFirstRule() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "A : 'a' EOF ;\n"+
      "B : 'a';\n");
    String expecting = "A, EOF";
    checkLexerMatches(lg, "a", expecting);
  }

  @Test public void testEOFByItself() throws Exception {
    LexerGrammar lg = new LexerGrammar(
      "lexer grammar L;\n"+
      "DONE : EOF ;\n"+
      "A : 'a';\n");
    String expecting = "A, DONE, EOF";
    checkLexerMatches(lg, "a", expecting);
  }

  protected void checkLexerMatches(LexerGrammar lg, String inputString, String expecting) {
    ATN atn = createATN(lg, true);
    CharStream input = new ANTLRInputStream(inputString);
    ATNState startState = atn.modeNameToStartState.get("DEFAULT_MODE");
    DOTGenerator dot = new DOTGenerator(lg);
    System.out.println(dot.getDOT(startState, true));

    List<String> tokenTypes = getTokenTypes(lg, atn, input);

    String result = Utils.join(tokenTypes.iterator(), ", ");
    System.out.println(tokenTypes);
    assertEquals(expecting, result);
  }

}
TOP

Related Classes of org.antlr.v4.test.TestATNLexerInterpreter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.