Package org.jboss.forge.addon.text.highlight.scanner

Source Code of org.jboss.forge.addon.text.highlight.scanner.GroovyScanner

package org.jboss.forge.addon.text.highlight.scanner;

import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;

import org.jboss.forge.addon.text.highlight.Encoder;
import org.jboss.forge.addon.text.highlight.Scanner;
import org.jboss.forge.addon.text.highlight.StringScanner;
import org.jboss.forge.addon.text.highlight.TokenType;
import org.jboss.forge.addon.text.highlight.WordList;

/*
* https://github.com/rubychan/coderay/blob/master/lib/coderay/scanners/groovy.rb
* Last update sha: e1d5e111d968639fa03a6074cf90535ecc90d0dd
*
*/
public class GroovyScanner implements Scanner
{
   private static final String[] GROOVY_KEYWORDS = new String[] {
                                "as", "assert", "def", "in"};


   private static final WordList<Boolean> KEYWORDS_EXPECTING_VALUE = new WordList<Boolean>(false)
               .add(new String[] {
                  "case", "instanceof", "new", "return", "throw", "typeof", "while", "as", "assert", "in"
               }, true);

   private static final String[] GROOVY_MAGIC_VARIABLES = new String[] {"it"};

   public static final WordList<TokenType> IDENT_KIND = JavaScanner.IDENT_KIND.clone()
               .add(GROOVY_KEYWORDS, TokenType.keyword)
               .add(GROOVY_MAGIC_VARIABLES, TokenType.local_variable);

   private static final Pattern ESCAPE = Pattern.compile(" [bfnrtv$\\n\\\\'\"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} ", Pattern.COMMENTS);
   private static final Pattern UNICODE_ESCAPE = Pattern.compile(" u[a-fA-F0-9]{4} ", Pattern.COMMENTS);
   private static final Pattern REGEXP_ESCAPE = Pattern.compile(" [bfnrtv\\n\\\\'\"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \\d | [bBdDsSwW\\/] ", Pattern.COMMENTS);

   private static final Map<String, Pattern> STRING_CONTENT_PATTERN = new HashMap<String, Pattern>();
   {
      STRING_CONTENT_PATTERN.put("'", Pattern.compile("(?>\\\\[^\\\\'\\n]+|[^\\\\'\\n]+)+"));
      STRING_CONTENT_PATTERN.put("\"", Pattern.compile("[^\\\\$\"\\n]+"));
      STRING_CONTENT_PATTERN.put("'''", Pattern.compile("(?>[^\\\\']+|'(?!''))+"));
      STRING_CONTENT_PATTERN.put("\"\"\"", Pattern.compile("(?>[^\\\\$\"]+|\"(?!\"\"))+"));
      STRING_CONTENT_PATTERN.put("/", Pattern.compile("[^\\\\$\\/\\n]+"));
   }

   private static final Pattern SPACE = Pattern.compile(" \\s+ | \\\\\\n ", Pattern.COMMENTS);
   private static final Pattern COMMENT = Pattern.compile("   // [^\\n\\\\]* (?: \\\\. [^\\n\\\\]* )* | /\\* (?: .*? \\*/ | .* ) ", Pattern.DOTALL|Pattern.COMMENTS);
   private static final Pattern DOCTYPE = Pattern.compile("\\#!.*", Pattern.COMMENTS);
   private static final Pattern INCLUDE = Pattern.compile(" (?!as) " + JavaScanner.IDENT.pattern() + " (?: \\. " + JavaScanner.IDENT.pattern() + " )* (?: \\.\\* )? ", Pattern.COMMENTS);
   private static final Pattern IDENT = Pattern.compile(" " + JavaScanner.IDENT.pattern() + " | \\[\\] ", Pattern.COMMENTS);
   private static final Pattern AFTER_DEF = Pattern.compile("\\s*[({]");
   private static final Pattern SEMI_COLON = Pattern.compile(";");
   private static final Pattern START_BRACKET = Pattern.compile("\\{");
   private static final Pattern OPERATOR = Pattern.compile(" \\.\\.<? | \\*?\\.(?!\\d)@? | \\.& | \\?:? | [,?:(\\[] | -[->] | \\+\\+ |\n" +
         "              && | \\|\\| | \\*\\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? ", Pattern.COMMENTS);
   private static final Pattern END_BRACKET = Pattern.compile(" [)\\]}] ", Pattern.COMMENTS);
   private static final Pattern NUMBER = Pattern.compile("[\\d.]");
   private static final Pattern HEX = Pattern.compile("0[xX][0-9A-Fa-f]+");
   private static final Pattern OCTAL = Pattern.compile("(?>0[0-7]+)(?![89.eEfF])");
   private static final Pattern FLOAT = Pattern.compile("\\d+[fFdD]|\\d*\\.\\d+(?:[eE][+-]?\\d+)?[fFdD]?|\\d+[eE][+-]?\\d+[fFdD]?");
   private static final Pattern INTEGER = Pattern.compile("\\d+[lLgG]?");

   private static final Pattern MULTI_LINE_DELIMITER = Pattern.compile("'''|\"\"\"");
   private static final Pattern STRING_DELIMITER = Pattern.compile("[\"']");
   private static final Pattern START_REGEXP = Pattern.compile("\\/");
   private static final Pattern ANNOTATION = Pattern.compile(" @ " + JavaScanner.IDENT.pattern() + " ", Pattern.COMMENTS);
   private static final Pattern END_OPERATOR = Pattern.compile("\\/");
   private static final Pattern CONTENT = Pattern.compile(" \\\\ (?: " + ESCAPE.pattern() + " | " + UNICODE_ESCAPE.pattern() + " ) ", Pattern.DOTALL|Pattern.COMMENTS);

   private static final Pattern REGEXP_CONTENT = Pattern.compile(" \\\\ (?: " + REGEXP_ESCAPE.pattern() + " | " + UNICODE_ESCAPE.pattern() + " ) ", Pattern.DOTALL|Pattern.COMMENTS);
   private static final Pattern INLINE_IDENT = Pattern.compile("\\$ " + JavaScanner.IDENT.pattern() +" ", Pattern.DOTALL|Pattern.COMMENTS);
   private static final Pattern INLINE_DELIMITER = Pattern.compile(" \\$ \\{ ", Pattern.COMMENTS);
   private static final Pattern CONTENT_2 = Pattern.compile(" \\$ ", Pattern.DOTALL|Pattern.COMMENTS);
   private static final Pattern CONTENT_3 = Pattern.compile("\\\\. ", Pattern.DOTALL|Pattern.COMMENTS);
   private static final Pattern END_NEWLINE = Pattern.compile(" \\\\ | \\n ", Pattern.COMMENTS);

   public enum State
   {
      initial,
      multiline_string,
      regexp,
      string
   }

   public static final String OPTION_START_STATE = "state";

   public static final Type TYPE = new Type("GROOVY", "\\.(groovy|gvy|gradle)$");

   @Override
   public Type getType() {
      return TYPE;
   }

   @Override
   public void scan(StringScanner source, Encoder encoder, Map<String, Object> options) {
      State state = State.initial;
      if (options.containsKey(OPTION_START_STATE))
      {
         state = State.valueOf(String.valueOf(options.get(OPTION_START_STATE)));
      }
      // Object[] {State, stringDelimiter, inlineBlockParenDepth}
      Stack<Object[]> inlineBlockStack = new Stack<Object[]>();
      int inlineBlockParenDepth = 0;
      String stringDelimiter = null;
      String last_token = null;
      boolean import_clause = false;
      boolean class_name_follows = false;
      boolean after_def = false;
      boolean value_expected = true;
      TokenType kind = null;

      while (source.hasMore())
      {
         MatchResult m = null;

         switch (state)
         {
         case initial:

            if ((m = source.scan(SPACE)) != null)
            {
               encoder.textToken(m.group(), TokenType.space);
               if(m.group().indexOf("\n") != -1)
               {
                  import_clause = after_def = false;
                  if(!value_expected)
                  {
                     value_expected = true;
                  }
               }
               continue;
            }
            else if ((m = source.scan(COMMENT)) != null)
            {
               value_expected = true;
               after_def = false;
               encoder.textToken(m.group(), TokenType.comment);
            }
            else if ((m = source.scan(DOCTYPE)) != null)
            {
               encoder.textToken(m.group(), TokenType.doctype);
            }
            else if (import_clause && (m = source.scan(INCLUDE)) != null)
            {
               after_def = value_expected = false;
               encoder.textToken(m.group(), TokenType.include);
            }
            else if ((m = source.scan(IDENT)) != null)
            {
               kind = IDENT_KIND.lookup(m.group());
               value_expected = (kind == TokenType.keyword) && KEYWORDS_EXPECTING_VALUE.lookup(m.group());
               if (".".equals(last_token))
               {
                  kind = TokenType.ident;
               }
               else if (class_name_follows)
               {
                  kind = TokenType.class_;
                  class_name_follows = false;
               }
               else if (after_def && source.check(AFTER_DEF) != null)
               {
                  kind = TokenType.method;
                  after_def = false;
               }
               else if (kind == TokenType.ident && !"?".equals(last_token) && source.check(":") != null)
               {
                  kind = TokenType.key;
               }
               else
               {
                  if (m.group().equals("class") || (import_clause && m.group().equals("as")))
                  {
                     class_name_follows = true;
                  }
                  import_clause = (m.group().equals("import"));
                  if(m.group().equals("def"))
                  {
                     after_def = true;
                  }
               }
               encoder.textToken(m.group(), kind);
            }
            else if ((m = source.scan(SEMI_COLON)) != null)
            {
               import_clause = after_def = false;
               value_expected = true;
               encoder.textToken(m.group(), TokenType.operator);
            }
            else if ((m = source.scan(START_BRACKET)) != null)
            {
               class_name_follows = after_def = false;
               value_expected = true;
               encoder.textToken(m.group(), TokenType.operator);
               if (!inlineBlockStack.isEmpty())
               {
                  inlineBlockParenDepth += 1;
               }
            }
            else if ((m = source.scan(OPERATOR)) != null)
            {
               value_expected = true;
               //value_expected = :regexp if match == '~';
               after_def = false;

               encoder.textToken(m.group(), TokenType.operator);
            }
            else if ((m = source.scan(END_BRACKET)) != null)
            {
               value_expected = after_def = false;
               if (!inlineBlockStack.isEmpty() && m.group().equals("}"))
               {
                  inlineBlockParenDepth -= 1;
                  if (inlineBlockParenDepth == 0 ) // # closing brace of inline block reached
                  {
                     encoder.textToken(m.group(),  TokenType.inline_delimiter);
                     encoder.endGroup(TokenType.inline);
                     Object[] inlineBlock = inlineBlockStack.pop();
                     state = (State)inlineBlock[0];
                     stringDelimiter = (String)inlineBlock[1];
                     inlineBlockParenDepth = (int)inlineBlock[2];
                     continue;
                  }
               }
               encoder.textToken(m.group(), TokenType.operator);
            }
            else if (source.check(NUMBER) != null)
            {
               after_def = value_expected = false;
               if ((m = source.scan(HEX)) != null)
               {
                  encoder.textToken(m.group(), TokenType.hex);
               }
               else if ((m = source.scan(OCTAL)) != null)
               {
                  encoder.textToken(m.group(), TokenType.octal);
               }
               else if ((m = source.scan(FLOAT)) != null)
               {
                  encoder.textToken(m.group(), TokenType.float_);
               }
               else if ((m = source.scan(INTEGER)) != null)
               {
                  encoder.textToken(m.group(), TokenType.integer);
               }
            }
            else if ((m = source.scan(MULTI_LINE_DELIMITER)) != null)
            {
               after_def = value_expected = false;
               state = State.multiline_string;
               encoder.beginGroup(TokenType.string);
               stringDelimiter = m.group();
               encoder.textToken(m.group(), TokenType.delimiter);
            }
            else if ((m = source.scan(STRING_DELIMITER)) != null)
            {
               after_def = value_expected = false;
               state = m.group().equals("/") ? State.regexp :State.string;
               encoder.beginGroup(TokenType.valueOf(state.name()));
               stringDelimiter = m.group();
               encoder.textToken(m.group(), TokenType.delimiter);
            }
            else if (value_expected && (m = source.scan(START_REGEXP)) != null)
            {
               after_def = value_expected = false;
               encoder.beginGroup(TokenType.regexp);
               state = State.regexp;
               stringDelimiter = "/";
               encoder.textToken(m.group(), TokenType.delimiter);
            }
            else if ((m = source.scan(ANNOTATION)) != null)
            {
               after_def = value_expected = false;
               encoder.textToken(m.group(), TokenType.annotation);
            }
            else if ((m = source.scan(END_OPERATOR)) != null)
            {
               after_def = false;
               value_expected = true;
               encoder.textToken(m.group(), TokenType.operator);
            }
            else
            {
               encoder.textToken(source.next(), TokenType.error);
            }
            break;

         case string:
         case regexp:
         case multiline_string:

            if ((m = source.scan(STRING_CONTENT_PATTERN.get(stringDelimiter))) != null)
            {
               encoder.textToken(m.group(), TokenType.content);
            }
            else if ((m = source.scan(state == State.multiline_string ? "'''|\"\"\"":"[\"'\\/]")) != null)
            {
               encoder.textToken(m.group(), TokenType.delimiter);
               if (state == State.regexp)
               {
                  MatchResult modifiers = source.scan("[ix]+");
                  if (modifiers != null && !modifiers.group().equals(""))
                  {
                     encoder.textToken(modifiers.group(), TokenType.modifier);
                  }
               }
               if (state == State.multiline_string)
               {
                  state = State.string;
               }
               encoder.endGroup(TokenType.string);
               stringDelimiter = null;
               after_def = value_expected = false;
               state = State.initial;
               continue;
            }
            else if ((state == State.string || state == State.multiline_string) && (m = source.scan(CONTENT)) != null)
            {
               if (stringDelimiter.charAt(0) == '\'' && !(m.group().equals("\\\\") || m.group().equals("\\'")))
               {
                  encoder.textToken(m.group(), TokenType.content);
               }
               else
               {
                  encoder.textToken(m.group(), TokenType.char_);
               }
            }
            else if (state == State.regexp && (m = source.scan(REGEXP_CONTENT)) != null)
            {
               encoder.textToken(m.group(), TokenType.char_);
            }
            else if ((m = source.scan(INLINE_IDENT)) != null)
            {
               encoder.beginGroup(TokenType.inline);
               encoder.textToken("$", TokenType.inline_delimiter);
               String ident = m.group().substring(1);
               encoder.textToken(ident, IDENT_KIND.lookup(ident));
               encoder.endGroup(TokenType.inline);
            }
            else if ((m = source.scan(INLINE_DELIMITER)) != null)
            {
               encoder.beginGroup(TokenType.inline);
               encoder.textToken(m.group(), TokenType.inline_delimiter);
               inlineBlockStack.push(new Object[] {state, stringDelimiter, inlineBlockParenDepth});
               inlineBlockParenDepth = 1;
               state = State.initial;
            }
            else if ((m = source.scan(CONTENT_2)) != null)
            {
               encoder.textToken(m.group(), TokenType.content);
            }
            else if ((m = source.scan(CONTENT_3)) != null)
            {
               encoder.textToken(m.group(), TokenType.content);
            }
            else if ((m = source.scan(END_NEWLINE)) != null)
            {
               encoder.endGroup(state == State.regexp? TokenType.regexp:TokenType.string);
               encoder.textToken(m.group(), TokenType.error);
               after_def = value_expected = false;
               state = State.initial;
            }
            else
            {
               throw new RuntimeException("Else case reached; unhandled " + source.peek(1));
            }
            break;
         default:
            throw new RuntimeException("Unknown state " + state);
         }
         if (kind != TokenType.space && kind != TokenType.comment && kind != TokenType.doctype)
         {
            last_token = m == null ? null:m.group();
         }
      }
      if (state == State.multiline_string || state == State.string || state == State.regexp)
      {
         encoder.endGroup(state == State.regexp? TokenType.regexp:TokenType.string);
      }

      while (!inlineBlockStack.isEmpty())
      {
         state = (State)inlineBlockStack.pop()[0];
         encoder.endGroup(TokenType.inline);
         encoder.endGroup(state == State.regexp? TokenType.regexp:TokenType.string);
      }
   }

}
TOP

Related Classes of org.jboss.forge.addon.text.highlight.scanner.GroovyScanner

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.