package org.eclipse.assemblyformatter.parsers;
import org.eclipse.assemblyformatter.ir.Directive;
import org.eclipse.assemblyformatter.ir.Instruction;
import org.eclipse.assemblyformatter.ir.Label;
import org.eclipse.assemblyformatter.ir.Parameter;
import org.eclipse.assemblyformatter.ir.Section;
import org.eclipse.assemblyformatter.ir.lowlevel.CharacterLiteral;
import org.eclipse.assemblyformatter.ir.lowlevel.Comment;
import org.eclipse.assemblyformatter.ir.lowlevel.LineSeparator;
import org.eclipse.assemblyformatter.ir.lowlevel.Symbol;
import org.eclipse.assemblyformatter.ir.lowlevel.WhiteSpace;
import org.eclipse.jface.text.BadLocationException;
/**
* <p>
* The format of an assembler source line is as follows: [label [:]] [operation]
* [operands] [; comment] where the components are as follows:
* </p>
* <ul>
* <li><b>label</b> - A definition of a label, which is a symbol that represents
* an address. If the label starts in the first column�that is, at the far left
* on the line�the :(colon) is optional.</li>
* <li><b>operation</b> - An assembler instruction or directive. This must not
* start in the first column�there must be some whitespace to the left of it.</li>
* <li><b>operands</b> - A list of operands, separated by commas.</li>
* <li><b>comment</b> - Comment, preceded by a ; (semicolon) C or C++ comments
* are also allowed.</li>
* </ul>
*
* <p>
* <i>Reference manual: "C:\Program Files (x86)\IAR Systems\Embedded Workbench
* 5.4 Kickstart\arm\doc\EWARM_AssemblerReference.ENU.pdf"</i>
* </p>
*/
public class IARParser extends Parser {
@Override
public void run() throws BadLocationException {
identifyAnyLabel();
// identifyAnyDirective();
identifyAnyInstruction();
identifyAnyParameter();
}
private static final int RESET = 0;
private static final int MATCH = -1;
/**
* <h3>State evolution for identifying a label</h3>
*
* <pre>
* --[0]-- LINE_SEPARATOR --[1]-- SYMBOL --[2]-- WHITE_SPACE or CHARACTER_LITERAL(:) or LINE_SEPARATOR --[MATCH]--
* -- WHITE_SPACE --[3]-- SYMBOL --[4]-- CHARACTER_LITERAL(:) --[MATCH]--
* </pre>
*
* @return
* @throws BadLocationException
*
*/
private Section identifyAnyLabel() throws BadLocationException {
Section section = base;
Symbol symbolSection = null;
Section i = null; // section before symbol section
int state = 1; // initial state must not be RESET
while (section != null) {
// final String content = section.getContent(document); // for
// debugging
Section nextSection = section.getNextSection();
switch (state) {
case RESET:
// TODO: Two LINE_SEPARATOR sections case
if (section instanceof LineSeparator) {
i = section;
state = 1;
}/* else { // Stay in current state. } */
break;
case 1: // The initial state
if (section instanceof LineSeparator) {
break; // Stay in current state.
}
if (section instanceof Symbol) {
symbolSection = (Symbol) section;
state = 2;
} else {
if (section instanceof WhiteSpace) {
i = section;
state = 3;
} else {
state = RESET;
}
}
break;
case 2:
if ((section instanceof WhiteSpace)
|| (section instanceof LineSeparator)) {
state = MATCH;
} else {
if (section instanceof CharacterLiteral) {
if (document.getChar(section.getOffset()) == ':') {
state = MATCH;
} else {
state = RESET;
}
} else {
state = RESET;
}
}
break;
case 3:
if (section instanceof Symbol) {
symbolSection = (Symbol) section;
state = 4;
} else {
state = RESET;
}
break;
case 4:
if (section instanceof CharacterLiteral) {
final char c = document.getChar(section.getOffset());
if (c == ':') {
state = MATCH;
} else {
state = RESET;
}
} else {
state = RESET;
}
break;
case MATCH:
// NOP here
// TODO Review
break;
default:
state = RESET;
}
if (state == MATCH) {
// Replace SYMBOL with LABEL.
Section label = new Label();
label.copyPosition(symbolSection);
label.setNextSection(symbolSection.getNextSection());
i.setNextSection(label);
state = RESET;
}
section = nextSection;
}
return base;
}
/**
* <h3>Rule for identifying a directive (pseudo-instruction)</h3>
* <p>
* If SYMBOL is in set {AREA, TEXT, DATA, CODE, EXPORT, EXTERN, EXTRN, ...}
* then SYMBOL is DIRECTIVE
* </p>
*
* Only one directive is identified per line.
*
* NOTE: Directive identification is not necessary now (only instruction
* identification).
*
* @return
* @throws BadLocationException
*/
private Section identifyAnyDirective() throws BadLocationException {
Section section = base;
Section prevSection = null;
/**
* false: Looking for SYMBOL. true: Looking for LINE_SEPARATOR.
*/
boolean state = false;
while (section != null) {
Section nextSection = section.getNextSection();
if (!state) {
if (section instanceof Symbol) {
String s = section.getContent(document);
for (String i : Directive.IAR_LIST) {
if (s.compareToIgnoreCase(i) == 0) {
// Replace SYMBOL with DIRECTIVE.
Section directiveSection = new Directive();
directiveSection.copyPosition(section);
directiveSection.setNextSection(nextSection);
// Special link handling.
if (prevSection != null) {
prevSection.setNextSection(directiveSection);
} else {
// Very rare case.
if (base == section) {
base = directiveSection;
}
}
// Jump.
nextSection = nextSection.getNextSection();
// Change state.
// No DIRECTIVE is identified until LINE_SEPARATOR
// is passed.
state = true;
}
}
}
} else {
if (section instanceof LineSeparator) {
state = false;
}
}
prevSection = section;
section = nextSection;
}
return base;
}
/**
* <h3>Rule for identifying an instruction</h3>
* <p>
* In case Label identification is passed: LINE_SEPARATOR + WHITE_SPACE +
* SYMBOL -> LINE_SEPARATOR + WHITE_SPACE + INSTRUCTION
* </p>
*
* <p>
* This style of embedded <code>if-then-else</code> structures might be more
* efficient than using <code>switch</code> structures (automata). When
* using <code>if-then-else</code> structures the state is contained in the
* instruction register. However <code>switch</code> structures might be
* more desirable for complex state evolutions (more than one branch).
* </p>
*
* @param base
* @return
*/
private Section identifyAnyInstruction() {
Section section = base;
while (section != null) {
Section nextSection = section.getNextSection();
if (section instanceof LineSeparator) {
if (section.nextIs(WhiteSpace.class, Symbol.class)) {
Symbol symbolSection = (Symbol) Section
.getNextIs__staticData(1);
// Replace SYMBOL with INSTRUCTION.
Section instructionSection = new Instruction();
instructionSection.copyPosition(symbolSection);
instructionSection.setNextSection(symbolSection
.getNextSection());
nextSection.setNextSection(instructionSection);
// Jump.
nextSection = symbolSection.getNextSection();
}
}
section = nextSection;
}
return base;
}
private static enum ParameterIdentificationState {
INSTRUCTION, WHITE_SPACE, PARAMETER,
}
/**
* <h3>Rule for identifying a parameter</h3> INSTRUCTION + ... +
* LINE_SEPARATOR|COMMENT(A|CPP) -> INSTRUCTION + PARAMETER +
* LINE_SEPARATOR|COMMENT(A|CPP)
*
* TODO Review (parameter identification problem is difficult)
*
* @return
*/
private Section identifyAnyParameter() {
Section section = base;
Section i = null, j = null;
ParameterIdentificationState state = ParameterIdentificationState.INSTRUCTION;
while (section != null) {
Section nextSection = section.getNextSection();
switch (state) {
case INSTRUCTION:
if (section instanceof Instruction) {
state = ParameterIdentificationState.WHITE_SPACE;
i = section;
}
break;
case WHITE_SPACE:
if (section instanceof WhiteSpace) {
i = section;
} else {
state = ParameterIdentificationState.PARAMETER;
nextSection = section; // one more cycle on this section
j = null;
}
break;
case PARAMETER:
boolean stopFlag = false;
if (section instanceof LineSeparator) {
stopFlag = true;
} else {
if (section instanceof Comment) {
Comment comment = (Comment) section;
if ((comment.getType() == Comment.Type.A)
|| (comment.getType() == Comment.Type.CPP)) {
stopFlag = true;
}
}
}
if (stopFlag) {
if (j != null) {
// Replace.
Parameter parameter = new Parameter();
parameter.copyOffset(i.getNextSection());
parameter.setEnd(j.getOffset() + j.getLength());
parameter.setNextSection(j.getNextSection());
i.setNextSection(parameter);
// Reset state.
state = ParameterIdentificationState.INSTRUCTION;
} else {
// No parameter content.
// Reset state.
state = ParameterIdentificationState.INSTRUCTION;
}
} else {
if (!(section instanceof WhiteSpace)) {
j = section; // non-white-space section
}
}
break;
}
section = nextSection;
}
return base;
}
}