package org.eclipse.assemblyformatter.ir;
import org.eclipse.assemblyformatter.ir.lowlevel.CharacterLiteral;
import org.eclipse.assemblyformatter.ir.lowlevel.Comment;
import org.eclipse.assemblyformatter.ir.lowlevel.IntegerLiteral;
import org.eclipse.assemblyformatter.ir.lowlevel.LineSeparator;
import org.eclipse.assemblyformatter.ir.lowlevel.Symbol;
import org.eclipse.assemblyformatter.ir.lowlevel.WhiteSpace;
import org.eclipse.assemblyformatter.ir.lowlevel.Comment.Type;
/**
* This class is for getting tokens for the enclosing document.
*
* The main method is run(). Token types (low-level elements):
* <ul>
* <li>Symbol</li>
* <li>IntegerLiteral</li>
* <li>WhiteSpace</li>
* <li>LineSeparator</li>
* <li>Comment</li>
* <li>CharacterLiteral</li>
* </ul>
*
* Other elements are high-level and are obtained from tokens.
*
*/
public class Tokenizer {
private String content;
private String lineSeparator;
private int position;
private char character; // current character
private boolean moveSuccess = false;
public void setContent(String content) {
this.content = content;
determineLineSpearator();
}
public void determineLineSpearator() {
int index = content.indexOf('\n');
if (index >= 0) {
lineSeparator = "\n"; // Unix?
if (index > 0) {
if (content.charAt(index - 1) == '\r') {
lineSeparator = "\r\n"; // Windows
}
}
} else {
index = content.indexOf('\r');
if (index >= 0) {
lineSeparator = "\r"; // Mac
}
}
}
private void moveForward() {
if (position < content.length() - 1) {
position++;
moveSuccess = true;
} else {
moveSuccess = false;
}
if (position >= 0) {
character = content.charAt(position);
} else {
// position is invalid
// character is undefined
}
}
/**
* Moves the buffer pointer if prefix follows.
*
* @param prefix
* The prefix required in buffer at current position.
*/
private void moveForward(String prefix) {
moveSuccess = false;
final int prefixLength = prefix.length();
if ((position + prefixLength) < (content.length() - 1)) {
boolean flag = true;
for (int i = 0; i < prefixLength; i++) {
if (content.charAt(position + i) != prefix.charAt(i)) {
flag = false;
break;
}
}
if (flag) {
position += prefixLength;
moveBackward();
moveSuccess = true;
}
}
}
private void moveFarForward() {
position = content.length() - 1;
}
private void moveBackward() {
if (position >= 0) {
position--;
moveSuccess = true;
} else {
position = -1;
moveSuccess = true;
}
}
/**
* Call this to get the linked list of document sections.
*/
public Section run() {
Section base = null;
Section current = null;
position = -1;
while (true) {
Section section = null;
section = determineNextSection();
if (section != null) {
if (base == null) {
base = section;
}
if (current != null) {
current.setNextSection(section);
}
current = section;
} else {
break;
}
}
return base;
}
/**
* Gets the next document section, whatever it is.
*
* This function is in essence a state machine.
*/
private Section determineNextSection() {
Section section = null;
moveForward();
if (!moveSuccess) {
return null;
}
int i;
switch (character) {
case ' ':
case '\t':
section = new WhiteSpace();
section.setOffset(position);
{ // WSPACE branch
boolean flag = true;
while (flag) {
moveForward();
if (moveSuccess) {
if ((character == ' ') || (character == '\t')) {
;
} else {
section.setEnd(position);
moveBackward();
flag = false;
}
} else {
section.setEnd(position);
flag = false;
}
}
}
break;
case ';':
section = new Comment(Type.A);
section.setOffset(position);
i = content.indexOf(lineSeparator, position);
if (i > position) {
position = i - 1;
section.setEnd(i);
} else {
// line separator is not found
moveFarForward();
section.setEnd(content.length());
}
break;
case '/': // similar with case ';'
moveForward();
if (!moveSuccess) {
break;
}
if (character == '/') {
section = new Comment(Type.CPP);
section.setOffset(position - 1);
i = content.indexOf(lineSeparator, position);
if (i > position) {
position = i - 1;
section.setEnd(i);
} else {
// line separator is not found
moveFarForward();
section.setEnd(content.length());
}
} else {
if (character == '*') {
section = new Comment(Type.C);
section.setOffset(position - 1);
boolean flag = true;
while (flag) {
moveForward();
if (moveSuccess) {
if (character == '*') {
moveForward();
if (moveSuccess) {
if (character == '/') {
flag = false;
}
} else {
moveFarForward();
flag = false;
}
} else {
;
}
} else {
moveFarForward();
flag = false;
}
}
section.setEnd(position);
} else {
moveBackward();
}
}
break;
default:
if (Character.isDigit(character)) {
// INT BRANCH
section = new IntegerLiteral();
section.setOffset(position);
boolean flag = true;
while (flag) {
moveForward();
if (moveSuccess) {
if (Character.isDigit(character)) {
;
} else {
section.setEnd(position);
moveBackward();
flag = false;
}
} else {
section.setEnd(position);
flag = false;
}
}
} else {
// TODO Review symbol format (__ is permitted)
if (Character.isLetter(character) || (character == '_')) {
// SYMBOL BRANCH
section = new Symbol();
section.setOffset(position);
boolean flag = true;
while (flag) {
moveForward();
if (moveSuccess) {
if (Character.isLetter(character)
|| Character.isDigit(character)
|| (character == '_')) {
;
} else {
section.setEnd(position);
moveBackward();
flag = false;
}
} else {
section.setEnd(position);
flag = false;
}
}
} else {
// verify line separator case
final int p = position;
moveForward(lineSeparator);
if (moveSuccess) {
section = new LineSeparator();
section.setOffset(p);
section.setLength(lineSeparator.length());
}
}
}
}
if (section == null) {
// GENERAL CHARACTER BRANCH
section = new CharacterLiteral();
section.setOffset(position);
section.setLength(1);
}
return section;
}
}