Source Code of com.google.java.contract.core.util.JavaTokenizer$Token

/*
 * Copyright 2010 Google Inc.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 */
package com.google.java.contract.core.util;


import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;


import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;
import java.util.Iterator;
import java.util.NoSuchElementException;


/**
 * A tokenizer that processes Java expressions. It knows about words,
 * quoted strings, comments, and white space. Symbols are <em>not</em>
 * aggregated into operators.
 *
 * @author nhat.minh.le@huoc.org (Nhat Minh Lê)
 */
@Invariant("reader != null")
public class JavaTokenizer implements Iterator<JavaTokenizer.Token> {
  /**
   * The kind of a token.
   */
  public static enum TokenKind {
    /**
     * An identifier or keyword.
     */
    WORD,


    /**
     * A quoted literal.
     */
    QUOTE,


    /**
     * A punctuation symbol.
     */
    SYMBOL,


    /**
     * A comment.
     */
    COMMENT,


    /**
     * A sequence of whitespace.
     */
    SPACE;
  }


  /**
   * A token returned by an instance of JavaTokenizer.
   */
  public class Token {
    public final TokenKind kind;
    public final String text;
    public final int offset;


    protected Token(TokenKind kind, String text, int offset) {
      this.kind = kind;
      this.text = text;
      this.offset = offset;
    }
  }


  /**
   * The source of characters to tokenize.
   */
  protected PushbackReader reader;


  /**
   * The next token returned by this tokenizer, or {@code null} if
   * none (end of file or not read yet).
   */
  protected Token nextToken;


  /**
   * {@code true} if an error was found during tokenization.
   */
  protected boolean hasErrors_;


  /**
   * The error message, if an error has occurred. Can be {@code null}.
   */
  protected String errorMessage;


  /**
   * The number of characters read by this tokenizer.
   */
  protected int currentOffset;


  /**
   * Constructs a new JavaTokenizer reading characters from
   * {@code reader}.
   */
  @Requires("reader != null")
  public JavaTokenizer(Reader reader) {
    this.reader = new PushbackReader(reader);
    nextToken = null;
    hasErrors_ = false;
    currentOffset = 0;
  }


  /**
   * Reads a character from {@code reader}. In this implementation,
   * all read accesses to the reader pass through this method.
   *
   * @param allowEOF if {@code false}, throws IOException if EOF is
   * reached
   */
  @Ensures({
    "!allowEOF ? result >= 0 : result >= -1"
  })
  protected int readChar(boolean allowEOF) throws IOException {
    int c = reader.read();
    if (c != -1) {
      ++currentOffset;
    } else {
      if (!allowEOF) {
        throw new IOException();
      }
    }
    return c;
  }


  /**
   * Pushes back a character into {@code reader}. In this
   * implementation, all unread accesses to the reader pass through
   * this method.
   */
  protected void unreadChar(int c) throws IOException {
    if (c == -1) {
      return;
    }
    --currentOffset;
    reader.unread(c);
  }


  /**
   * Reads the next token into {@code nextToken}.
   *
   * @return {@code true} if a new token was successfully read
   */
  @Requires("nextToken == null")
  @Ensures("result == (nextToken != null)")
  protected boolean lex() throws IOException {
    int startOffset = currentOffset;
    StringBuilder buffer = new StringBuilder();
    int c = readChar(true);
    if (c == -1) {
      return false;
    }
    buffer.append((char) c);
    switch (c) {
      case '/':
        /* Comments. */
        c = readChar(true);
        switch (c) {
          case '/':
            buffer.append((char) c);
            do {
              c = readChar(false);
              buffer.append((char) c);
            } while (c != '\n');
            nextToken = new Token(TokenKind.COMMENT, buffer.toString(),
                                  startOffset);
            break;
          case '*':
            buffer.append((char) c);
            for (;;) {
              c = readChar(false);
              buffer.append((char) c);
              if (c == '*') {
                c = readChar(false);
                buffer.append((char) c);
                if (c == '/') {
                  break;
                }
              }
            }
            nextToken = new Token(TokenKind.COMMENT, buffer.toString(),
                                  startOffset);
            break;
          default:
            unreadChar(c);
            nextToken = new Token(TokenKind.SYMBOL, buffer.toString(),
                                  startOffset);
        }
        break;


      case '\'':
      case '"':
        /* Quoted strings. */
        int delim = c;
        for (;;) {
          c = readChar(false);
          buffer.append((char) c);
          if (c == delim) {
            break;
          }
          if (c == '\\') {
            buffer.append((char) readChar(false));
          }
        }
        nextToken = new Token(TokenKind.QUOTE, buffer.toString(), startOffset);
        break;


      default:
        if (Character.isJavaIdentifierStart(c)) {
          /* Identifiers. */
          while ((c = readChar(true)) != -1
                 && Character.isJavaIdentifierPart(c)) {
            buffer.append((char) c);
          }
          unreadChar(c);
          nextToken = new Token(TokenKind.WORD, buffer.toString(), startOffset);
        } else if (Character.isWhitespace(c)) {
          /* White space. */
          while ((c = readChar(true)) != -1 && Character.isWhitespace(c)) {
            buffer.append((char) c);
          }
          unreadChar(c);
          nextToken = new Token(TokenKind.SPACE, buffer.toString(),
                                startOffset);
        } else {
          /* Symbol. */
          nextToken = new Token(TokenKind.SYMBOL, buffer.toString(),
                                startOffset);
        }
    }
    return true;
  }


  /**
   * Returns the next token without consuming it.
   */
  public Token getNextToken() {
    if (nextToken == null) {
      try {
        if (!lex()) {
          throw new NoSuchElementException();
        }
      } catch (IOException e) {
        errorMessage = e.getMessage();
        hasErrors_ = true;
        throw new NoSuchElementException();
      }
    }
    return nextToken;
  }


  public int getCurrentOffset() {
    return currentOffset;
  }


  /**
   * Returns {@code true} if an error has been found.
   */
  public boolean hasErrors() {
    return hasErrors_;
  }


  @Requires("hasErrors()")
  public String getErrorMessage() {
    return errorMessage;
  }


  @Override
  public boolean hasNext() {
    if (nextToken != null) {
      return true;
    } else {
      try {
        return lex();
      } catch (IOException e) {
        errorMessage = e.getMessage();
        hasErrors_ = true;
        return false;
      }
    }
  }


  @Override
  public Token next() {
    Token token = getNextToken();
    nextToken = null;
    return token;
  }


  @Override
  public void remove() {
    throw new UnsupportedOperationException();
  }
}
Source Code of com.google.java.contract.core.util.JavaTokenizer$Token

Related Classes of com.google.java.contract.core.util.JavaTokenizer$Token