Source Code of org.eclipse.assemblyformatter.ir.Tokenizer

package org.eclipse.assemblyformatter.ir;


import org.eclipse.assemblyformatter.ir.lowlevel.CharacterLiteral;
import org.eclipse.assemblyformatter.ir.lowlevel.Comment;
import org.eclipse.assemblyformatter.ir.lowlevel.IntegerLiteral;
import org.eclipse.assemblyformatter.ir.lowlevel.LineSeparator;
import org.eclipse.assemblyformatter.ir.lowlevel.Symbol;
import org.eclipse.assemblyformatter.ir.lowlevel.WhiteSpace;
import org.eclipse.assemblyformatter.ir.lowlevel.Comment.Type;


/**
 * This class is for getting tokens for the enclosing document.
 * 
 * The main method is run(). Token types (low-level elements):
 * <ul>
 * <li>Symbol</li>
 * <li>IntegerLiteral</li>
 * <li>WhiteSpace</li>
 * <li>LineSeparator</li>
 * <li>Comment</li>
 * <li>CharacterLiteral</li>
 * </ul>
 * 
 * Other elements are high-level and are obtained from tokens.
 * 
 */
public class Tokenizer {
  private String content;
  private String lineSeparator;
  private int position;
  private char character; // current character
  private boolean moveSuccess = false;


  public void setContent(String content) {
    this.content = content;
    determineLineSpearator();
  }


  public void determineLineSpearator() {
    int index = content.indexOf('\n');
    if (index >= 0) {
      lineSeparator = "\n"; // Unix?
      if (index > 0) {
        if (content.charAt(index - 1) == '\r') {
          lineSeparator = "\r\n"; // Windows
        }
      }
    } else {
      index = content.indexOf('\r');
      if (index >= 0) {
        lineSeparator = "\r"; // Mac
      }
    }
  }






  private void moveForward() {
    if (position < content.length() - 1) {
      position++;
      moveSuccess = true;
    } else {
      moveSuccess = false;
    }
    if (position >= 0) {
      character = content.charAt(position);
    } else {
      // position is invalid
      // character is undefined
    }
  }


  /**
   * Moves the buffer pointer if prefix follows.
   * 
   * @param prefix
   *            The prefix required in buffer at current position.
   */
  private void moveForward(String prefix) {
    moveSuccess = false;
    final int prefixLength = prefix.length();
    if ((position + prefixLength) < (content.length() - 1)) {
      boolean flag = true;
      for (int i = 0; i < prefixLength; i++) {
        if (content.charAt(position + i) != prefix.charAt(i)) {
          flag = false;
          break;
        }
      }
      if (flag) {
        position += prefixLength;
        moveBackward();
        moveSuccess = true;
      }
    }
  }


  private void moveFarForward() {
    position = content.length() - 1;
  }


  private void moveBackward() {
    if (position >= 0) {
      position--;
      moveSuccess = true;
    } else {
      position = -1;
      moveSuccess = true;
    }
  }


  /**
   * Call this to get the linked list of document sections.
   */
  public Section run() {
    Section base = null;
    Section current = null;


    position = -1;


    while (true) {
      Section section = null;


      section = determineNextSection();


      if (section != null) {
        if (base == null) {
          base = section;
        }
        if (current != null) {
          current.setNextSection(section);
        }
        current = section;
      } else {
        break;
      }
    }
    
    return base;
  }


  /**
   * Gets the next document section, whatever it is.
   * 
   * This function is in essence a state machine.
   */
  private Section determineNextSection() {
    Section section = null;


    moveForward();
    if (!moveSuccess) {
      return null;
    }


    int i;
    switch (character) {
    case ' ':
    case '\t':
      section = new WhiteSpace();
      section.setOffset(position);
      { // WSPACE branch
        boolean flag = true;
        while (flag) {
          moveForward();
          if (moveSuccess) {
            if ((character == ' ') || (character == '\t')) {
              ;
            } else {
              section.setEnd(position);
              moveBackward();
              flag = false;
            }
          } else {
            section.setEnd(position);
            flag = false;
          }
        }
      }
      break;
    case ';':
      section = new Comment(Type.A);
      section.setOffset(position);
      i = content.indexOf(lineSeparator, position);


      if (i > position) {
        position = i - 1;
        section.setEnd(i);
      } else {
        // line separator is not found
        moveFarForward();
        section.setEnd(content.length());
      }
      break;
    case '/': // similar with case ';'
      moveForward();
      if (!moveSuccess) {
        break;
      }
      if (character == '/') {
        section = new Comment(Type.CPP);
        section.setOffset(position - 1);
        i = content.indexOf(lineSeparator, position);


        if (i > position) {
          position = i - 1;
          section.setEnd(i);
        } else {
          // line separator is not found
          moveFarForward();
          section.setEnd(content.length());
        }
      } else {
        if (character == '*') {
          section = new Comment(Type.C);
          section.setOffset(position - 1);


          boolean flag = true;
          while (flag) {
            moveForward();
            if (moveSuccess) {
              if (character == '*') {
                moveForward();
                if (moveSuccess) {
                  if (character == '/') {
                    flag = false;
                  }
                } else {
                  moveFarForward();
                  flag = false;
                }
              } else {
                ;
              }
            } else {
              moveFarForward();
              flag = false;
            }
          }


          section.setEnd(position);
        } else {
          moveBackward();
        }
      }
      break;
    default:
      if (Character.isDigit(character)) {
        // INT BRANCH
        section = new IntegerLiteral();
        section.setOffset(position);
        boolean flag = true;
        while (flag) {
          moveForward();
          if (moveSuccess) {
            if (Character.isDigit(character)) {
              ;
            } else {
              section.setEnd(position);
              moveBackward();
              flag = false;
            }
          } else {
            section.setEnd(position);
            flag = false;
          }
        }
      } else {
        // TODO Review symbol format (__ is permitted)
        if (Character.isLetter(character) || (character == '_')) {
          // SYMBOL BRANCH
          section = new Symbol();
          section.setOffset(position);
          boolean flag = true;
          while (flag) {
            moveForward();
            if (moveSuccess) {
              if (Character.isLetter(character)
                  || Character.isDigit(character)
                  || (character == '_')) {
                ;
              } else {
                section.setEnd(position);
                moveBackward();
                flag = false;
              }
            } else {
              section.setEnd(position);
              flag = false;
            }
          }
        } else {
          // verify line separator case
          final int p = position;
          moveForward(lineSeparator);
          if (moveSuccess) {
            section = new LineSeparator();
            section.setOffset(p);
            section.setLength(lineSeparator.length());
          }
        }
      }
    }


    if (section == null) {
      // GENERAL CHARACTER BRANCH
      section = new CharacterLiteral();
      section.setOffset(position);
      section.setLength(1);
    }


    return section;
  }
}
Source Code of org.eclipse.assemblyformatter.ir.Tokenizer

Related Classes of org.eclipse.assemblyformatter.ir.Tokenizer