Package cambridge.parser

Source Code of cambridge.parser.TemplateTokenizer

package cambridge.parser;

import cambridge.model.ExtensionPoint;
import cambridge.parser.tokens.AssignToken;
import cambridge.parser.tokens.AttributeNameToken;
import cambridge.parser.tokens.AttributeValueToken;
import cambridge.parser.tokens.CDATAToken;
import cambridge.parser.tokens.CloseTagToken;
import cambridge.parser.tokens.CommentToken;
import cambridge.parser.tokens.DocTypeToken;
import cambridge.parser.tokens.EOFToken;
import cambridge.parser.tokens.EOLToken;
import cambridge.parser.tokens.ExpressionToken;
import cambridge.parser.tokens.OpenTagToken;
import cambridge.parser.tokens.ParserDirectiveToken;
import cambridge.parser.tokens.StringToken;
import cambridge.parser.tokens.TagEndToken;
import cambridge.parser.tokens.TagStringToken;
import cambridge.parser.tokens.Token;
import cambridge.parser.tokens.WSToken;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.ArrayList;


/**
* The tokenizer reads from an InputStream or Reader and generates
* tokens that will be consumed by an TemplateParser.
* <p/>
* The generated tokens are objects of type Token and every Token
* has a TokenType.
*
* @see cambridge.parser.tokens.Token
* @see cambridge.parser.tokens.TokenType
* @see TemplateParser
*/
public class TemplateTokenizer extends Tokenizer {
   public TemplateTokenizer(Reader reader) throws IOException {
      super(reader);
   }

   public TemplateTokenizer(InputStream in) throws IOException {
      super(in);
   }

   void setDirective(String property, String value) {
      if ("consumeScriptTag".equals(property)) {
         consumeScriptTag = "true".equals(value);
      }
   }

   private boolean consumeScriptTag = true;

   enum State {
      INITIAL_STATE,
      TAG, // After <X
      TAG_EXPECTING_ATT_VALUE,
      TAG_EXPECTING_SQ,
      TAG_EXPECTING_DQ
   }

   private String currentTag;

   private State state = State.INITIAL_STATE;

   public Token nextToken() throws IOException {
      int col = getColumn();
      int line = getLineNo();
      char c = nextChar();

      if (c == Tokenizer.EOL) {
         state = State.INITIAL_STATE;
         return new EOFToken(line, col, null, getLineNo(), 0);

         // END OF LINE
      } else if (c == '\r') {
         if (peek(1) == '\n') {
            nextChar();
            return new EOLToken(line, col, "\r\n", getLineNo(), 0);
         }
         return new EOLToken(line, col, "\r", line + 1, 0);
      } else if (c == '\n') {
         return new EOLToken(line, col, "\n", line + 1, 0);

         // WHITE SPACE
      } else if (Character.isWhitespace(c)) {
         StringBuilder builder = new StringBuilder();
         builder.append(c);
         char peek = peek(1);
         while (Character.isWhitespace(peek) && peek != '\r' && peek != '\n') {
            builder.append(nextChar());
            peek = peek(1);
         }

         return new WSToken(line, col, builder.toString(), getLineNo(), getColumn());
      }

      if (state == State.INITIAL_STATE) {
         return initialStateHandler(c, col, line);
      } else if (state == State.TAG) {
         return tagHandler(c, col, line);
      } else if (state == State.TAG_EXPECTING_ATT_VALUE) {
         return expectingAttributeValueHandler(c, col, line);
      } else if (state == State.TAG_EXPECTING_DQ) {
         return expectingDQHandler(c, col, line);
      } else if (state == State.TAG_EXPECTING_SQ) {
         return expectingSQHandler(c, col, line);
      }

      return new StringToken(line, col, "" + c, getLineNo(), getColumn());
   }

   private Token expectingSQHandler(char c, int col, int line) throws IOException {
      if (c == '\'') {
         state = State.TAG;

         AttributeValueToken tok = new AttributeValueToken(line, col, "", getLineNo(), getColumn());
         tok.setQuotes(AttributeValueToken.SINGLE_QUOTES);
         return tok;
      }
      StringBuilder builder = new StringBuilder();

      builder.append(c);
      while (true) {
         c = nextChar();
         if (c == Tokenizer.EOL || c == '\'') break;

         if (c == '\\' && peek(1) == '\'') {
            nextChar();
            builder.append("'");
         } else {
            builder.append(c);
         }
      }

      state = State.TAG;
      AttributeValueToken tok = new AttributeValueToken(line, col, builder.toString(), getLineNo(), getColumn());
      tok.setQuotes(AttributeValueToken.SINGLE_QUOTES);
      return tok;
   }

   private Token expectingDQHandler(char c, int col, int line) throws IOException {
      StringBuilder builder = new StringBuilder();

      if (c == '"') {
         state = State.TAG;
         AttributeValueToken tok = new AttributeValueToken(line, col, "", getLineNo(), getColumn());
         tok.setQuotes(AttributeValueToken.DOUBLE_QUOTES);
         return tok;
      }
      builder.append(c);
      while (true) {
         c = nextChar();
         if (c == Tokenizer.EOL || c == '"') break;

         if (c == '\\' && peek(1) == '"') {
            nextChar();
            builder.append("\"");
         } else {
            builder.append(c);
         }
      }

      state = State.TAG;

      AttributeValueToken tok = new AttributeValueToken(line, col, builder.toString(), getLineNo(), getColumn());
      tok.setQuotes(AttributeValueToken.DOUBLE_QUOTES);
      return tok;
   }

   private Token expectingAttributeValueHandler(char c, int col, int line) throws IOException {
      if (c == '\'') {
         state = State.TAG_EXPECTING_SQ;
         return expectingSQHandler(nextChar(), col, line);
      }
      if (c == '"') {
         state = State.TAG_EXPECTING_DQ;
         return expectingDQHandler(nextChar(), col, line);
      }

      if (c == '$' && peek(1) == '{') {
         return expressionToken(col, line, false);
      }

      if (c == '%' && peek(1) == '{') {
         return expressionToken(col, line, true);
      }

      ArrayList<ExtensionPoint> extensionPoints = TemplateParser.getExtensionPoints();
      if (extensionPoints != null) {
         for (ExtensionPoint p : extensionPoints) {
            String opener = p.getTagOpener();
            int length = opener.length();
            if (length > 1 && c == opener.charAt(0) && opener.substring(1).equals(peekString(length - 1))) {
               nextChar(length - 1);
               return p.getToken(this, col, line);
            }
         }
      }

      if (c == '>') {
         state = State.INITIAL_STATE;
         return new TagEndToken(line, col, ">", getLineNo(), getColumn());
      }

      StringBuilder builder = new StringBuilder();

      while (true) {
         char peek = peek(1);
         builder.append(c);
         if (Character.isWhitespace(peek) || peek == Tokenizer.EOL || peek == '>' || peek == '=' || peek == '"' || peek == '\'') {
            break;
         }


         //builder.append(peek);
         c = nextChar();
      }

      state = State.TAG;
      AttributeValueToken tok = new AttributeValueToken(line, col, builder.toString(), getLineNo(), getColumn());
      tok.setQuotes(AttributeValueToken.NO_QUOTES);
      return tok;
   }

   private Token expressionToken(int col, int line, boolean raw) throws IOException {
      char c;
      nextChar(); // Consume {
      StringBuilder builder = new StringBuilder();
      int state = 1;

      c = nextChar();
      while (state != 0) {
         if (c == '{') {
            state++;
         } else if (c == '}') {
            state--;
         }

         if (state == 0) {
            break;
         }

         builder.append(c);

         if (peek(1) == Tokenizer.EOL) {
            break;
         }

         c = nextChar();
      }

      if (peek(1) == '(') {
         nextChar();
         c = nextChar();
         ArrayList<String> filters = new ArrayList<String>();
         StringBuilder filter = new StringBuilder();
         while (c != ')') {
            if (c == '|') {
               if (filter.length() != 0) {
                  filters.add(filter.toString());
               }
               filter.setLength(0);
            } else {
               filter.append(c);
            }

            if (peek(1) == Tokenizer.EOL) {
               break;
            }

            c = nextChar();
         }

         if (filter.length() != 0 && filters.size() == 0) {
            filters.add(filter.toString());
         }

         return new ExpressionToken(line, col, builder.toString(), getLineNo(), getColumn(), raw, filters);
      }

      return new ExpressionToken(line, col, builder.toString(), getLineNo(), getColumn(), raw);
   }

   private Token tagHandler(char c, int col, int line) throws IOException {
      if (c == '/') {
         if (peek(1) == '>') {
            nextChar();
            state = State.INITIAL_STATE;

            currentTag = null;

            return new TagEndToken(line, col, "/>", getLineNo(), getColumn());
         } else {
            return new TagStringToken(line, col, "/", getLineNo(), getColumn());
         }

         // TAG END
      } else if (c == '>') {
         state = State.INITIAL_STATE;
         return new TagEndToken(line, col, ">", getLineNo(), getColumn());
      } else if (c == '$' && peek(1) == '{') {
         return expressionToken(col, line, false);
      } else if (c == '%' && peek(1) == '{') {
         return expressionToken(col, line, true);
      } else if (c == '=') {
         state = State.TAG_EXPECTING_ATT_VALUE;
         return new AssignToken(line, col, "=", getLineNo(), getColumn());
         // ATTRIBUTES -- Somewhere betweeen <X and >
      } else if (CharUtil.isName(c)) {
         // These characters should not be here...

         if (c == '\'') {
            return new TagStringToken(line, col, "'", getLineNo(), getColumn());
         }
         if (c == '"') {
            return new TagStringToken(line, col, "\"", getLineNo(), getColumn());
         }

         StringBuilder builder = new StringBuilder();
         char peek = peek(1);
         builder.append(c);
         while (CharUtil.isNameChar(peek)) {
            builder.append(nextChar());
            peek = peek(1);
         }

         return new AttributeNameToken(line, col, builder.toString(), getLineNo(), getColumn());
      } else {
         ArrayList<ExtensionPoint> extensionPoints = TemplateParser.getExtensionPoints();
         if (extensionPoints != null) {
            for (ExtensionPoint p : extensionPoints) {
               String opener = p.getTagOpener();
               int length = opener.length();
               if (length > 1 && c == opener.charAt(0) && opener.substring(1).equals(peekString(length - 1))) {
                  nextChar(length - 1);
                  return p.getToken(this, col, line);
               }
            }
         }

         StringBuilder builder = new StringBuilder();
         builder.append(c);
         while (!Character.isWhitespace(peek(1)) && peek(1) != '>' && !CharUtil.isName(peek(1))) {
            builder.append(nextChar());
         }


         return new TagStringToken(line, col, builder.toString(), getLineNo(), getColumn());
      }
   }

   private Token initialStateHandler(char c, int col, int line) throws IOException {
      // TAGS, COMMENTS, PARSER DIRECTIVES AND DOCTYPES
      if (c == '<') {
         StringBuilder builder = new StringBuilder();
         // COMMENTS, PARSER DIRECTIVES AND DOCTYPES

         if (peek(1) == '!') {
            builder.append(c);
            c = nextChar();
            if (peek(1) == '-' && peek(2) == '-' && peek(3) == '$') {
               nextChar(2);
               // Comment block
               builder.append("!--");

               String directive = "";
               String args;

               while (true) {
                  if (peek(1) == Tokenizer.EOL) {
                     return new CommentToken(line, col, builder.toString(), getLineNo(), getColumn());
                  }

                  if ("".equals(directive) && Character.isWhitespace(peek(1))) {
                     directive = builder.substring(5);
                  }

                  if (peek(1) == '-' && peek(2) == '-' && peek(3) == '>') {
                     args = builder.substring(5 + directive.length() + 1).trim();
                     break;
                  }
                  builder.append(nextChar());
               }
               nextChar(3);

               builder.append("-->");

               ParserDirectiveToken tok = new ParserDirectiveToken(line, col, builder.toString(), getLineNo(), getColumn(), directive, args);
               if (peek(1) == '\r') {
                  if (peek(2) == '\n') {
                     nextChar(2);
                     tok.setTrailingSpace("\r\n");
                  } else {
                     tok.setTrailingSpace("\r");
                     nextChar();
                  }
               } else if (peek(1) == '\n') {
                  tok.setTrailingSpace("\n");
                  nextChar();
               }
               return tok;
            } else if (peek(1) == '-' && peek(2) == '-') {
               nextChar(2);
               // Comment block
               builder.append("!--");

               while (true) {
                  if (peek(1) == Tokenizer.EOL) {
                     return new CommentToken(line, col, builder.toString(), getLineNo(), getColumn());
                  }
                  if (peek(1) == '-' && peek(2) == '-' && peek(3) == '>') break;
                  builder.append(nextChar());
               }
               nextChar(3);
               builder.append("-->");
               return new CommentToken(line, col, builder.toString(), getLineNo(), getColumn());


               // CDATA Blocks
            } else if ("[CDATA[".equals(peekString(7))) {
               c = nextChar(7);
               builder.append("![CDATA");
               while (true) {
                  builder.append(c);
                  if (peek(1) == Tokenizer.EOL) {
                     return new CDATAToken(line, col, builder.toString(), getLineNo(), getColumn());
                  } else {
                     if (peek(1) == ']' && peek(2) == ']' && peek(3) == '>') {
                        builder.append("]]>");
                        break;
                     }
                  }
                  c = nextChar();
               }
               nextChar(3);
               return new CDATAToken(line, col, builder.toString(), getLineNo(), getColumn());

               // DOCTYPE DECLARATIONS
            } else {
               while (c != '>') {
                  builder.append(c);
                  if (peek(1) == Tokenizer.EOL) {
                     break;
                  }
                  c = nextChar();
               }

               if (c == '>') {
                  builder.append(c);
               }
               return new DocTypeToken(line, col, builder.toString(), getLineNo(), getColumn());
            }
            // TAG CLOSE </X>
         } else if (peek(1) == '/') {
            builder.append(c);
            c = nextChar();
            String tagName = null;
            while (c != '>') {
               builder.append(c);
               if (peek(1) == Tokenizer.EOL) {
                  break;
               }
               c = nextChar();

               if (tagName == null && (Character.isWhitespace(c) || c == '>')) {
                  tagName = builder.substring(2).toLowerCase();
               }
            }

            if (c == '>') {
               builder.append(c);
            }

            state = State.INITIAL_STATE;
            CloseTagToken tok = new CloseTagToken(line, col, builder.toString(), getLineNo(), getColumn());

            tok.setTagName(tagName);

            return tok;
            // OPEN TAG <X
         } else if (CharUtil.isLetter((int) peek(1))) {
            c = nextChar();
            builder.append(c);
            c = peek(1);

            // TAG
            // @todo tum valid karakter range'leri girilmeli
            while (CharUtil.isNameChar((int) c)) {
               builder.append(nextChar());
               c = peek(1);
            }
            currentTag = builder.substring(0).toLowerCase();
            state = State.TAG;
            return new OpenTagToken(line, col, builder.toString(), getLineNo(), getColumn());
         } else {
            builder.append(c);
            return new StringToken(line, col, builder.toString(), getLineNo(), getColumn());
         }
         // Expression
      } else if (c == '$' && peek(1) == '{') {
         return expressionToken(col, line, false);
      } else if (c == '%' && peek(1) == '{') {
         return expressionToken(col, line, true);
      } else {

         ArrayList<ExtensionPoint> extensionPoints = TemplateParser.getExtensionPoints();
         if (extensionPoints != null) {
            for (ExtensionPoint p : extensionPoints) {
               String opener = p.getTagOpener();
               int length = opener.length();
               if (length > 1 && c == opener.charAt(0) && opener.substring(1).equals(peekString(length - 1))) {
                  nextChar(length - 1);
                  return p.getToken(this, col, line);
               }
            }
         }

         StringBuilder builder = new StringBuilder();
         builder.append(c);

         if (consumeScriptTag && "script".equals(currentTag)) {
            while (true) {
               if (peek(1) == Tokenizer.EOL
                  || ("</script".equalsIgnoreCase(peekString(8)))
                  || (peek(1) == '%' && peek(2) == '{')
                  || (peek(1) == '$' && peek(2) == '{')) {
                  break;
               }
               builder.append(nextChar());
            }
         } else {
            while (true) {
               if (peek(1) == Tokenizer.EOL
                  || (peek(1) == '<' && CharUtil.isName(peek(2)))
                  || (peek(1) == '<' && peek(2) == '!')
                  || (peek(1) == '<' && peek(2) == '/' && CharUtil.isName(peek(3)))
                  || (peek(1) == '$' && peek(2) == '{')
                  || (peek(1) == '%' && peek(2) == '{')
                  ) {
                  break;
               }
               builder.append(nextChar());
            }
         }

         return new StringToken(line, col, builder.toString(), getLineNo(), getColumn());
      }
   }
}
TOP

Related Classes of cambridge.parser.TemplateTokenizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.