/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* The following code was generated by JFlex 1.4.1 on 20/12/06 21:02 */
package javax.swing.text.html.parser;
import java_cup.runtime.Symbol;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTML.Tag;
import java.util.ArrayList;
import java.util.List;
import java.math.BigInteger;
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.4.1
* on 20/12/06 21:02 from the specification file
* <tt>/home/asanchez/workspace/ParserHtml/parser-spec/Lexer.lex</tt>
*/
class Lexer implements java_cup.runtime.Scanner {
/** This character denotes the end of file */
public static final int YYEOF = -1;
/** initial size of the lookahead buffer */
private static final int ZZ_BUFFERSIZE = 16384;
/** lexical states */
public static final int STARTTAG = 7;
public static final int TAG = 1;
public static final int ATTREQUALS = 8;
public static final int ATTRVALLIT_SQM = 11;
public static final int ENDTAG = 4;
public static final int CHECK_IF_CDATA = 16;
public static final int ATTRVALSTART = 9;
public static final int YYINITIAL_NOTEXT = 17;
public static final int CDATA_ENDTAG_NAME = 1;
public static final int TAG_TRAILINGWS = 13;
public static final int STARTENDTAG = 1;
public static final int MDSTART = 12;
public static final int ATTRVAL = 3;
public static final int CDATA = 14;
public static final int COMMENT = 6;
public static final int IGNORED_Q_TAG = 19;
public static final int CDATA_ENDTAG_START = 15;
public static final int MD = 5;
public static final int CDATA_ENDTAG_END = 1;
public static final int ATTRVALLIT_DQM = 10;
public static final int YYINITIAL = 0;
public static final int TAG_IGNORE_ATTS = 18;
public static final int ATTR = 2;
/**
* Translates characters to character classes
*/
private static final String ZZ_CMAP_PACKED =
"\11\0\1\5\1\6\2\0\1\7\22\0\1\4\1\16\1\11\1\20"+
"\2\0\1\10\1\12\5\0\1\3\1\21\1\15\12\1\1\25\1\17"+
"\1\13\1\26\1\14\1\24\1\0\6\23\21\2\1\22\2\2\6\0"+
"\6\23\21\2\1\22\2\2\uff85\0";
/**
* Translates characters to character classes
*/
private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
/**
* Translates DFA states to action switch labels.
*/
private static final int [] ZZ_ACTION = zzUnpackAction();
private static final String ZZ_ACTION_PACKED_0 =
"\6\0\1\1\3\0\1\2\1\3\10\0\1\4\1\5"+
"\1\6\1\7\1\4\1\10\1\11\1\12\1\13\1\14"+
"\1\15\1\16\1\17\1\20\1\21\1\20\1\22\1\23"+
"\1\24\1\20\1\25\1\26\1\27\1\30\1\31\1\32"+
"\1\33\1\34\1\33\1\35\1\36\1\37\1\1\1\40"+
"\1\41\1\42\1\43\1\44\1\45\1\46\1\47\1\50"+
"\1\51\1\52\1\53\1\17\1\2\1\54\1\55\1\56"+
"\1\2\1\57\1\3\1\60\1\61\1\62\1\3\1\63"+
"\1\64\1\65\1\17\1\66\1\67\1\70\1\71\2\72"+
"\1\73\1\74\2\75\1\76\1\77\1\100\2\101\1\102"+
"\1\103\1\104\1\105\1\0\1\106\1\107\1\0\1\110"+
"\1\111\1\112\1\113\1\0\1\114\1\115\1\116\2\0"+
"\1\117\1\120\1\121\1\0\1\122\1\123\1\0\1\124"+
"\1\125\1\126\2\0\1\127\1\130\1\0\1\131\1\132"+
"\1\133\1\134\1\135\1\107\1\0\1\107\1\136\1\113"+
"\1\0\1\113\1\0\1\137\1\140\1\121\1\0\1\121"+
"\1\141\1\123\1\0\1\123\1\142\1\126\1\0\1\126"+
"\1\0\1\143\1\107\1\113\1\121\1\123\1\126\1\144";
private static int [] zzUnpackAction() {
int [] result = new int[163];
int offset = 0;
offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
return result;
}
private static int zzUnpackAction(String packed, int offset, int [] result) {
int i = 0; /* index in packed string */
int j = offset; /* index in unpacked array */
int l = packed.length();
while (i < l) {
int count = packed.charAt(i++);
int value = packed.charAt(i++);
do result[j++] = value; while (--count > 0);
}
return j;
}
/**
* Translates a state to a row index in the transition table
*/
private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
private static final String ZZ_ROWMAP_PACKED_0 =
"\0\0\0\27\0\56\0\105\0\134\0\163\0\212\0\241"+
"\0\270\0\317\0\346\0\375\0\u0114\0\u012b\0\u0142\0\u0159"+
"\0\u0170\0\u0187\0\u019e\0\u01b5\0\u01cc\0\u01e3\0\27\0\u01fa"+
"\0\u0211\0\27\0\27\0\u0228\0\u023f\0\27\0\u0256\0\27"+
"\0\27\0\u026d\0\u0284\0\u029b\0\u02b2\0\27\0\u02c9\0\u02e0"+
"\0\27\0\27\0\27\0\u02f7\0\27\0\u030e\0\27\0\u0325"+
"\0\u033c\0\u0353\0\27\0\u036a\0\u0381\0\u0398\0\27\0\27"+
"\0\27\0\27\0\27\0\u03af\0\27\0\u03c6\0\27\0\27"+
"\0\27\0\u026d\0\u03dd\0\u03f4\0\u03dd\0\u040b\0\u0422\0\27"+
"\0\u0439\0\u0450\0\u0439\0\u0467\0\u047e\0\27\0\u0495\0\27"+
"\0\u04ac\0\27\0\u04c3\0\27\0\u04da\0\u04f1\0\u0508\0\27"+
"\0\u051f\0\u0536\0\u054d\0\27\0\u0564\0\27\0\27\0\u057b"+
"\0\27\0\27\0\u0592\0\u05a9\0\u05c0\0\27\0\u05d7\0\u05ee"+
"\0\27\0\u0605\0\27\0\u061c\0\u0633\0\27\0\27\0\27"+
"\0\u064a\0\u0661\0\27\0\u03dd\0\u0678\0\u068f\0\u0439\0\u06a6"+
"\0\u06bd\0\27\0\27\0\u06d4\0\u06eb\0\u0702\0\u0719\0\27"+
"\0\u0730\0\27\0\27\0\27\0\27\0\27\0\u0747\0\u075e"+
"\0\u0775\0\27\0\u078c\0\u07a3\0\u07ba\0\u07d1\0\27\0\27"+
"\0\u07e8\0\u07ff\0\u0816\0\27\0\u082d\0\u0844\0\u085b\0\27"+
"\0\u0872\0\u0889\0\u08a0\0\u08b7\0\u08ce\0\u08e5\0\u08fc\0\u0913"+
"\0\u092a\0\u0941\0\27";
private static int [] zzUnpackRowMap() {
int [] result = new int[163];
int offset = 0;
offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
return result;
}
private static int zzUnpackRowMap(String packed, int offset, int [] result) {
int i = 0; /* index in packed string */
int j = offset; /* index in unpacked array */
int l = packed.length();
while (i < l) {
int high = packed.charAt(i++) << 16;
result[j++] = high | packed.charAt(i++);
}
return j;
}
/**
* The transition table of the DFA
*/
private static final int [] ZZ_TRANS = zzUnpackTrans();
private static final String ZZ_TRANS_PACKED_0 =
"\4\25\2\26\1\27\1\30\1\31\2\25\1\32\13\25"+
"\27\0\2\33\1\34\1\33\2\35\1\36\1\37\4\33"+
"\1\40\1\41\4\33\2\34\3\33\1\42\1\43\1\42"+
"\1\44\2\45\1\46\1\47\1\50\1\51\1\52\1\0"+
"\1\40\12\42\4\53\2\54\1\55\1\56\4\53\1\40"+
"\12\53\2\57\1\60\1\61\2\62\1\63\1\64\12\57"+
"\2\60\3\57\3\65\1\66\23\65\4\67\2\35\1\36"+
"\1\37\3\67\1\70\1\71\1\41\7\67\1\72\1\67"+
"\2\73\1\34\1\73\2\74\1\75\1\76\4\73\1\40"+
"\1\41\4\73\2\34\2\73\1\77\1\42\1\43\1\42"+
"\1\44\2\35\1\36\1\37\1\50\1\100\2\101\1\40"+
"\1\102\11\42\4\103\2\104\1\105\1\106\1\107\1\110"+
"\15\103\4\111\2\112\1\113\1\114\1\115\1\111\1\116"+
"\14\111\14\117\1\40\12\117\4\120\2\121\1\122\1\123"+
"\4\120\1\124\12\120\4\125\1\126\1\127\1\130\1\131"+
"\1\132\2\125\1\133\13\125\2\134\1\135\3\134\1\0"+
"\5\134\1\40\5\134\2\135\3\134\27\136\6\137\1\0"+
"\4\137\1\140\13\137\6\141\1\142\1\143\3\141\1\70"+
"\1\71\12\141\24\144\1\145\2\144\4\25\5\0\2\25"+
"\1\0\13\25\4\0\2\26\27\0\1\146\22\0\1\147"+
"\15\0\1\150\1\0\2\147\4\0\3\34\15\0\3\34"+
"\7\0\2\35\27\0\1\151\20\0\1\42\1\0\2\42"+
"\11\0\12\42\1\152\1\43\2\152\11\0\12\152\1\42"+
"\1\43\2\42\11\0\12\42\4\0\2\45\27\0\1\153"+
"\22\0\1\154\15\0\1\155\1\0\2\154\7\0\2\54"+
"\27\0\1\156\21\0\3\60\15\0\3\60\6\0\1\157"+
"\27\0\2\62\27\0\1\160\20\0\3\65\1\161\26\65"+
"\1\162\23\65\4\0\2\74\27\0\1\163\20\0\10\103"+
"\2\0\21\103\2\104\2\103\2\0\23\103\1\164\1\103"+
"\2\0\15\103\2\0\1\165\15\0\1\166\1\0\2\165"+
"\3\0\10\111\1\0\1\111\1\0\20\111\2\112\2\111"+
"\1\0\1\111\1\0\22\111\1\167\1\111\1\0\1\111"+
"\1\0\14\111\2\0\1\170\15\0\1\171\1\0\2\170"+
"\3\0\14\117\1\0\12\117\4\0\2\121\27\0\1\172"+
"\20\0\4\125\1\0\1\125\3\0\2\125\1\0\13\125"+
"\4\0\2\126\21\0\4\125\1\126\1\127\3\0\2\125"+
"\1\0\13\125\6\0\1\173\22\0\1\174\15\0\1\175"+
"\1\0\2\174\20\0\1\176\12\0\3\135\15\0\3\135"+
"\5\0\1\177\11\0\1\200\1\201\1\202\3\0\2\177"+
"\1\203\10\0\1\204\20\0\24\144\1\0\2\144\14\0"+
"\1\205\14\0\1\147\14\0\1\206\2\0\2\147\4\0"+
"\1\207\1\147\1\210\16\0\1\211\1\147\3\0\4\152"+
"\11\0\12\152\2\0\1\154\14\0\1\212\2\0\2\154"+
"\4\0\1\213\1\154\1\214\16\0\1\215\1\154\3\0"+
"\3\65\1\216\37\65\1\217\12\65\2\0\1\165\14\0"+
"\1\220\2\0\2\165\4\0\1\221\1\165\1\222\16\0"+
"\1\223\1\165\5\0\1\170\14\0\1\224\2\0\2\170"+
"\4\0\1\225\1\170\1\226\16\0\1\227\1\170\5\0"+
"\1\174\14\0\1\230\2\0\2\174\4\0\1\231\1\174"+
"\1\232\16\0\1\233\1\174\5\0\1\234\17\0\2\234"+
"\4\0\3\177\15\0\3\177\5\0\1\235\17\0\2\235"+
"\4\0\1\207\15\0\1\206\10\0\1\207\26\0\1\236"+
"\1\147\14\0\1\206\2\0\1\147\1\211\4\0\1\213"+
"\15\0\1\212\10\0\1\213\26\0\1\237\1\154\14\0"+
"\1\212\2\0\1\154\1\215\3\0\14\65\1\0\12\65"+
"\1\0\1\221\15\0\1\220\10\0\1\221\26\0\1\240"+
"\1\165\14\0\1\220\2\0\1\165\1\223\4\0\1\225"+
"\15\0\1\224\10\0\1\225\26\0\1\241\1\170\14\0"+
"\1\224\2\0\1\170\1\227\4\0\1\231\15\0\1\230"+
"\10\0\1\231\26\0\1\242\1\174\14\0\1\230\2\0"+
"\1\174\1\233\4\0\3\234\10\0\1\243\4\0\3\234"+
"\4\0\3\235\15\0\3\235\4\0\1\236\15\0\1\206"+
"\3\0\1\236\4\0\1\237\15\0\1\212\3\0\1\237"+
"\4\0\1\240\15\0\1\220\3\0\1\240\4\0\1\241"+
"\15\0\1\224\3\0\1\241\4\0\1\242\15\0\1\230"+
"\3\0\1\242\3\0";
private static int [] zzUnpackTrans() {
int [] result = new int[2392];
int offset = 0;
offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
return result;
}
private static int zzUnpackTrans(String packed, int offset, int [] result) {
int i = 0; /* index in packed string */
int j = offset; /* index in unpacked array */
int l = packed.length();
while (i < l) {
int count = packed.charAt(i++);
int value = packed.charAt(i++);
value--;
do result[j++] = value; while (--count > 0);
}
return j;
}
/* error codes */
private static final int ZZ_UNKNOWN_ERROR = 0;
private static final int ZZ_NO_MATCH = 1;
private static final int ZZ_PUSHBACK_2BIG = 2;
/* error messages for the codes above */
private static final String ZZ_ERROR_MSG[] = {
"Unkown internal scanner error",
"Error: could not match input",
"Error: pushback value was too large"
};
/**
* ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
*/
private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
private static final String ZZ_ATTRIBUTE_PACKED_0 =
"\1\0\1\10\4\0\1\1\3\0\2\1\10\0\2\1"+
"\1\11\2\1\2\11\2\1\1\11\1\1\2\11\4\1"+
"\1\11\2\1\3\11\1\1\1\11\1\1\1\11\3\1"+
"\1\11\3\1\5\11\1\1\1\11\1\1\3\11\6\1"+
"\1\11\5\1\1\11\1\1\1\11\1\1\1\11\1\1"+
"\1\11\3\1\1\11\3\1\1\11\1\1\2\11\1\1"+
"\2\11\2\1\1\0\1\11\1\1\1\0\1\11\1\1"+
"\1\11\1\1\1\0\3\11\2\0\1\11\2\1\1\0"+
"\2\1\1\0\2\11\1\1\2\0\1\1\1\11\1\0"+
"\5\11\1\1\1\0\1\1\1\11\1\1\1\0\1\1"+
"\1\0\2\11\1\1\1\0\1\1\1\11\1\1\1\0"+
"\1\1\1\11\1\1\1\0\1\1\1\0\6\1\1\11";
private static int [] zzUnpackAttribute() {
int [] result = new int[163];
int offset = 0;
offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
return result;
}
private static int zzUnpackAttribute(String packed, int offset, int [] result) {
int i = 0; /* index in packed string */
int j = offset; /* index in unpacked array */
int l = packed.length();
while (i < l) {
int count = packed.charAt(i++);
int value = packed.charAt(i++);
do result[j++] = value; while (--count > 0);
}
return j;
}
/** the input device */
private java.io.Reader zzReader;
/** the current state of the DFA */
private int zzState;
/** the current lexical state */
private int zzLexicalState = YYINITIAL;
/** this buffer contains the current text to be matched and is
the source of the yytext() string */
private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
/** the textposition at the last accepting state */
private int zzMarkedPos;
/** the textposition at the last state to be included in yytext */
private int zzPushbackPos;
/** the current text position in the buffer */
private int zzCurrentPos;
/** startRead marks the beginning of the yytext() string in the buffer */
private int zzStartRead;
/** endRead marks the last character in the buffer, that has been read
from input */
private int zzEndRead;
/** number of newlines encountered up to the start of the matched text */
private int yyline;
/** the number of characters up to the start of the matched text */
private int yychar;
/**
* the number of characters from the last newline up to the start of the
* matched text
*/
private int yycolumn;
/**
* zzAtBOL == true <=> the scanner is currently at the beginning of a line
*/
private boolean zzAtBOL = true;
/** zzAtEOF == true <=> the scanner is at the EOF */
private boolean zzAtEOF;
/** denotes if the user-EOF-code has already been executed */
private boolean zzEOFDone;
/* user code: */
private StringBuilder buffer = new StringBuilder();
private int CRcount;
private int CRNLcount;
private int NLcount;
private String currentCDATAClosingTag;
private int offsetCDATA=0;
private LexerTextStateType textState = LexerTextStateType.DEFAULT;
private int preEntryCounter;
/**
* Decsribes whether a piece of text (no white space) was parsed.
*/
private boolean textParsed;
/**
* Describes whether a line terminator has already been skipped.
*/
private boolean terminatorSkipped;
/**
* Stores the position where a piece of text begins
*/
private int textPos;
private boolean hasTrailingSpaces;
private boolean hasLeadingSpaces;
/**
* Indicates if the strict mode of the current Parser instance is set
*/
private boolean strict;
/**
* A reference of the Parser's DTD
*/
private DTD dtd;
/**
* Stores all the TAGS that are set as preformatted at <code>HTML.getAllTags()</code>
*/
private static List<String> preformattedTags;
private static List<String> breaksFlowTags;
/**
* This flag is used in CHECK_IF_CDATA state
*/
private boolean syncronizeSent;
/*
* Used by preformatted purposes. Describe if the last token was a line terminator
* in order to collapse the white spaces as the RI does
**/
private boolean lastWasLineTerminator;
/**
* Used by preformatted purposes. Describe if the token text returned as a SYMBOL
* may have to collapse the last token (line terminator)
*/
private boolean skipLastLineTerminator;
/* private boolean lastTokenWasBreaksFlow; */
/**
* A reference to the current CUP object
*/
private ParserCup cup;
/**
* Sets <code>preformattedTags</code> with all the tags (<code>HTML.Tag</code>)
* that are set as preformatted, by examining isPreformatted() method.
* <br>
* Then, each time the method <code>isPreformatted(String)</code> is called
* search at this <code>ArrayList</code>
*/
static {
preformattedTags = new ArrayList<String>();
for (Tag tag : HTML.getAllTags()) {
if (tag.isPreformatted()) {
preformattedTags.add(tag.toString().toLowerCase());
}
}
}
public void setStrict(boolean strict) {
this.strict = strict;
}
public int getOffset() {
return yychar;
}
public void setCup (ParserCup cup){
this.cup = cup;
}
public void setDTD(DTD dtd) {
this.dtd = dtd;
}
public int yyline() {
return yyline;
}
/**
* Decides if a tag name corresponds to a preformatted one.
* <br>
* This method use <code>preformattedTags</code> in order retrieve this information
*
* @param tokenTag The tag name to be analyzed.
* @return True if the tag name corresponds to a preformatted one. Otherwise
* it returns false.
*/
private boolean isPreformatted (String tokenTag) {
return preformattedTags.contains(tokenTag);
}
private void appendText (LexerTextType textType) {
if (buffer.length() == 0) {
textPos = yychar;
}
switch (textState) {
/*
* DEFAULT STATE
*/
case DEFAULT:
switch (textType) {
case SPACE:
case LINE_TERMINATOR:
if (!hasLeadingSpaces && !textParsed) {
hasLeadingSpaces = true;
}
hasTrailingSpaces = true;
break;
case TEXT:
if (hasTrailingSpaces && textParsed) {
buffer.append(" ");
}
buffer.append(yytext());
hasTrailingSpaces = false;
textParsed = true;
break;
case ENTITY_SEMI:
if (hasTrailingSpaces && textParsed) {
buffer.append(" ");
}
buffer.append(replaceEntity(true));
textParsed = true;
hasTrailingSpaces = false;
break;
case ENTITY_NO_SEMI:
if (hasTrailingSpaces && textParsed) {
buffer.append(" ");
}
buffer.append(replaceEntity(false));
textParsed = true;
hasTrailingSpaces = false;
break;
}
break;
/*
* PREFORMATTED STATE
*/
case PREFORMATTED:
switch (textType) {
case SPACE:
buffer.append(yytext());
break;
case LINE_TERMINATOR:
buffer.append(yytext());
terminatorSkipped = false;
break;
case TEXT:
buffer.append(yytext());
textParsed = true;
break;
case ENTITY_SEMI:
buffer.append(replaceEntity(true));
textParsed = true;
break;
case ENTITY_NO_SEMI:
buffer.append(replaceEntity(false));
textParsed = true;
break;
}
break;
/*
* CDATA STATE
*/
case CDATA:
switch (textType) {
case TEXT:
case SPACE:
buffer.append(yytext());
textParsed = true;
break;
case LINE_TERMINATOR:
if (!terminatorSkipped) {
// skipping line terminator
terminatorSkipped = true;
} else {
// append line terminator
buffer.append(yytext());
}
break;
case ENTITY_SEMI:
buffer.append(replaceEntity(true));
textParsed = true;
break;
case ENTITY_NO_SEMI:
buffer.append(replaceEntity(false));
textParsed = true;
break;
}
break;
}
}
private String replaceEntity(boolean endsWithSemi){
Entity entity = null;
String str;
if (endsWithSemi) {
str = yytext().substring(1, yytext().length() - 1);
} else {
str = yytext().substring(1, yytext().length());
}
if (str.startsWith("#")) {
if (str.codePointAt(1) == 'X' || str.codePointAt(1) == 'x') {
entity = dtd.getEntity(Integer.parseInt(str.substring(2), 16));
} else {
entity = dtd.getEntity(Integer.parseInt(str.substring(1)));
}
} else {
entity = dtd.getEntity(str);
}
return entity == null ? "&" + str : String.valueOf(entity.data);
}
private HTMLText flushBufferedText() {
String str = buffer.toString();
buffer = new StringBuilder();
HTMLText textToken = null;
if (textParsed) {
/*
* Collapses if there is only one last line terminator.
* If there are more then doesn't do it (same as RI)
*/
if (skipLastLineTerminator && textState == LexerTextStateType.PREFORMATTED) {
str = str.substring(0, str.length() - 1);
skipLastLineTerminator = false;
}
textToken = new HTMLText(str, textPos, hasLeadingSpaces, hasTrailingSpaces);
}
hasTrailingSpaces = false;
hasLeadingSpaces = false;
textParsed = false;
terminatorSkipped = false;
lastWasLineTerminator = false;
return textToken;
}
public String getEOLString() {
int eol = Math.max(CRcount, Math.max(NLcount, CRNLcount));
String result = null;
if (eol == NLcount) {
result = String.valueOf('\n');
} else if (eol == CRNLcount) {
result = String.valueOf("\r\n");
} else if (eol == CRcount) {
result = String.valueOf('\r');
}
if (result != null) {
return result;
} else {
throw new AssertionError();
}
}
/**
* Creates a new scanner
* There is also a java.io.InputStream version of this constructor.
*
* @param in the java.io.Reader to read input from.
*/
Lexer(java.io.Reader in) {
this.zzReader = in;
}
/**
* Creates a new scanner.
* There is also java.io.Reader version of this constructor.
*
* @param in the java.io.Inputstream to read input from.
*/
Lexer(java.io.InputStream in) {
this(new java.io.InputStreamReader(in));
}
/**
* Unpacks the compressed character translation table.
*
* @param packed the packed character translation table
* @return the unpacked character translation table
*/
private static char [] zzUnpackCMap(String packed) {
char [] map = new char[0x10000];
int i = 0; /* index in packed string */
int j = 0; /* index in unpacked array */
while (i < 70) {
int count = packed.charAt(i++);
char value = packed.charAt(i++);
do map[j++] = value; while (--count > 0);
}
return map;
}
/**
* Refills the input buffer.
*
* @return <code>false</code>, iff there was new input.
*
* @exception java.io.IOException if any I/O-Error occurs
*/
private boolean zzRefill() throws java.io.IOException {
/* first: make room (if you can) */
if (zzStartRead > 0) {
System.arraycopy(zzBuffer, zzStartRead,
zzBuffer, 0,
zzEndRead-zzStartRead);
/* translate stored positions */
zzEndRead-= zzStartRead;
zzCurrentPos-= zzStartRead;
zzMarkedPos-= zzStartRead;
zzPushbackPos-= zzStartRead;
zzStartRead = 0;
}
/* is the buffer big enough? */
if (zzCurrentPos >= zzBuffer.length) {
/* if not: blow it up */
char newBuffer[] = new char[zzCurrentPos*2];
System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
zzBuffer = newBuffer;
}
/* finally: fill the buffer with new input */
int numRead = zzReader.read(zzBuffer, zzEndRead,
zzBuffer.length-zzEndRead);
if (numRead < 0) {
return true;
}
else {
zzEndRead+= numRead;
return false;
}
}
/**
* Closes the input stream.
*/
public final void yyclose() throws java.io.IOException {
zzAtEOF = true; /* indicate end of file */
zzEndRead = zzStartRead; /* invalidate buffer */
if (zzReader != null)
zzReader.close();
}
/**
* Resets the scanner to read from a new input stream.
* Does not close the old reader.
*
* All internal variables are reset, the old input stream
* <b>cannot</b> be reused (internal buffer is discarded and lost).
* Lexical state is set to <tt>ZZ_INITIAL</tt>.
*
* @param reader the new input stream
*/
public final void yyreset(java.io.Reader reader) {
zzReader = reader;
zzAtBOL = true;
zzAtEOF = false;
zzEndRead = zzStartRead = 0;
zzCurrentPos = zzMarkedPos = zzPushbackPos = 0;
yyline = yychar = yycolumn = 0;
zzLexicalState = YYINITIAL;
}
/**
* Returns the current lexical state.
*/
public final int yystate() {
return zzLexicalState;
}
/**
* Enters a new lexical state
*
* @param newState the new lexical state
*/
public final void yybegin(int newState) {
if ((newState == ENDTAG) && isPreformatted(yytext().substring(2))) {
preEntryCounter --;
textState = (preEntryCounter == 0) ? LexerTextStateType.DEFAULT :
LexerTextStateType.PREFORMATTED;
}
zzLexicalState = newState;
}
/**
* Returns the text matched by the current regular expression.
*/
public final String yytext() {
return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
}
/**
* Returns the character at position <tt>pos</tt> from the
* matched text.
*
* It is equivalent to yytext().charAt(pos), but faster
*
* @param pos the position of the character to fetch.
* A value from 0 to yylength()-1.
*
* @return the character at position pos
*/
public final char yycharat(int pos) {
return zzBuffer[zzStartRead+pos];
}
/**
* Returns the length of the matched text region.
*/
public final int yylength() {
return zzMarkedPos-zzStartRead;
}
/**
* Reports an error that occured while scanning.
*
* In a wellformed scanner (no or only correct usage of
* yypushback(int) and a match-all fallback rule) this method
* will only be called with things that "Can't Possibly Happen".
* If this method is called, something is seriously wrong
* (e.g. a JFlex bug producing a faulty scanner etc.).
*
* Usual syntax/scanner level error handling should be done
* in error fallback rules.
*
* @param errorCode the code of the errormessage to display
*/
private void zzScanError(int errorCode) {
String message;
try {
message = ZZ_ERROR_MSG[errorCode];
}
catch (ArrayIndexOutOfBoundsException e) {
message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
}
throw new Error(message);
}
/**
* Pushes the specified amount of characters back into the input stream.
*
* They will be read again by then next call of the scanning method
*
* @param number the number of characters to be read again.
* This number must not be greater than yylength()!
*/
public void yypushback(int number) {
if ( number > yylength() )
zzScanError(ZZ_PUSHBACK_2BIG);
zzMarkedPos -= number;
}
/**
* Contains user EOF-code, which will be executed exactly once,
* when the end of file is reached
*/
private void zzDoEOF() throws java.io.IOException {
if (!zzEOFDone) {
zzEOFDone = true;
yyclose();
}
}
/**
* Resumes scanning until the next regular expression is matched,
* the end of input is encountered or an I/O-Error occurs.
*
* @return the next token
* @exception java.io.IOException if any I/O-Error occurs
*/
public java_cup.runtime.Symbol next_token() throws java.io.IOException {
int zzInput;
int zzAction;
// cached fields:
int zzCurrentPosL;
int zzMarkedPosL;
int zzEndReadL = zzEndRead;
char [] zzBufferL = zzBuffer;
char [] zzCMapL = ZZ_CMAP;
int [] zzTransL = ZZ_TRANS;
int [] zzRowMapL = ZZ_ROWMAP;
int [] zzAttrL = ZZ_ATTRIBUTE;
while (true) {
zzMarkedPosL = zzMarkedPos;
yychar+= zzMarkedPosL-zzStartRead;
boolean zzR = false;
for (zzCurrentPosL = zzStartRead; zzCurrentPosL < zzMarkedPosL;
zzCurrentPosL++) {
switch (zzBufferL[zzCurrentPosL]) {
case '\u000B':
case '\u000C':
case '\u0085':
case '\u2028':
case '\u2029':
yyline++;
yycolumn = 0;
zzR = false;
break;
case '\r':
yyline++;
yycolumn = 0;
zzR = true;
break;
case '\n':
if (zzR)
zzR = false;
else {
yyline++;
yycolumn = 0;
}
break;
default:
zzR = false;
yycolumn++;
}
}
if (zzR) {
// peek one character ahead if it is \n (if we have counted one line too much)
boolean zzPeek;
if (zzMarkedPosL < zzEndReadL)
zzPeek = zzBufferL[zzMarkedPosL] == '\n';
else if (zzAtEOF)
zzPeek = false;
else {
boolean eof = zzRefill();
zzEndReadL = zzEndRead;
zzMarkedPosL = zzMarkedPos;
zzBufferL = zzBuffer;
if (eof)
zzPeek = false;
else
zzPeek = zzBufferL[zzMarkedPosL] == '\n';
}
if (zzPeek) yyline--;
}
zzAction = -1;
zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
zzState = zzLexicalState;
zzForAction: {
while (true) {
if (zzCurrentPosL < zzEndReadL)
zzInput = zzBufferL[zzCurrentPosL++];
else if (zzAtEOF) {
zzInput = YYEOF;
break zzForAction;
}
else {
// store back cached positions
zzCurrentPos = zzCurrentPosL;
zzMarkedPos = zzMarkedPosL;
boolean eof = zzRefill();
// get translated positions and possibly new buffer
zzCurrentPosL = zzCurrentPos;
zzMarkedPosL = zzMarkedPos;
zzBufferL = zzBuffer;
zzEndReadL = zzEndRead;
if (eof) {
zzInput = YYEOF;
break zzForAction;
}
else {
zzInput = zzBufferL[zzCurrentPosL++];
}
}
int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
if (zzNext == -1) break zzForAction;
zzState = zzNext;
int zzAttributes = zzAttrL[zzState];
if ( (zzAttributes & 1) == 1 ) {
zzAction = zzState;
zzMarkedPosL = zzCurrentPosL;
if ( (zzAttributes & 8) == 8 ) break zzForAction;
}
}
}
// store back cached position
zzMarkedPos = zzMarkedPosL;
switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
case 28:
{ yybegin(MDSTART);
return new Symbol (ParserSym.MUDECL, yytext());
}
case 101: break;
case 50:
{ yybegin(ATTRVALLIT_SQM);
CRcount++;
}
case 102: break;
case 49:
{ yybegin(ATTRVALLIT_SQM);
NLcount++;
}
case 103: break;
case 18:
{ yybegin(ATTRVAL);
}
case 104: break;
case 32:
{ yybegin(COMMENT);
return new Symbol (ParserSym.LEXERR,yytext());
}
case 105: break;
case 43:
{ yybegin(ATTRVALSTART);
return new Symbol (ParserSym.LEXERR,yytext());
}
case 106: break;
case 98:
{ yybegin(CDATA);
appendText(LexerTextType.ENTITY_SEMI);
}
case 107: break;
case 62:
{ yybegin(CDATA_ENDTAG_START);
return new Symbol (ParserSym.LEXERR,yytext());
}
case 108: break;
case 96:
{ yybegin(ATTRVALLIT_DQM);
return new Symbol (ParserSym.ATTRVAL_LIT, replaceEntity(true));
}
case 109: break;
case 7:
{ yybegin(YYINITIAL);
appendText(LexerTextType.LINE_TERMINATOR);
CRcount++;
}
case 110: break;
case 6:
{ yybegin(YYINITIAL);
appendText(LexerTextType.LINE_TERMINATOR);
NLcount++;
}
case 111: break;
case 61:
{ yybegin(CDATA);
appendText(LexerTextType.TEXT);
}
case 112: break;
case 59:
{ yybegin(CDATA);
NLcount++;
appendText(LexerTextType.LINE_TERMINATOR);
}
case 113: break;
case 31:
{ yybegin(MD);
CRcount++;
}
case 114: break;
case 30:
{ yybegin(MD);
NLcount++;
}
case 115: break;
case 9:
{ yybegin(ATTR);
return new Symbol (ParserSym.LEXERR,yytext());
}
case 116: break;
case 38:
{ yybegin(ATTREQUALS);
}
case 117: break;
case 100:
{ if (yytext().equalsIgnoreCase(currentCDATAClosingTag)) {
yypushback(yytext().length()-2);
yybegin(CDATA_ENDTAG_START);
String aux = buffer.toString();
if(aux.endsWith("\r\n")){
aux=aux.substring(0,aux.length()-2);
}
else if(aux.endsWith("\n") || aux.endsWith("\r")){
aux=aux.substring(0,aux.length()-1);
}
flushBufferedText(); // for initialization purposes
return new Symbol (ParserSym.TEXT, new HTMLText(aux, offsetCDATA, false, false));
} else {
yybegin(CDATA);
appendText(LexerTextType.TEXT);
}
}
case 118: break;
case 85:
{ yybegin(CDATA);
CRNLcount++;
appendText(LexerTextType.LINE_TERMINATOR);
}
case 119: break;
case 5:
{ yybegin(YYINITIAL);
appendText(LexerTextType.SPACE);
}
case 120: break;
case 35:
{ yybegin(CHECK_IF_CDATA);
syncronizeSent=false;
return new Symbol (ParserSym.TAG_CLOSE, yychar, yyline, new Character(yytext().charAt(0)));
}
case 121: break;
case 14:
{ yybegin(YYINITIAL);
return new Symbol (ParserSym.TAG_CLOSE, yychar, yyline, new Character(yytext().charAt(0)));
}
case 122: break;
case 93:
{ yybegin(YYINITIAL);
appendText(LexerTextType.ENTITY_SEMI);
}
case 123: break;
case 23:
{ yybegin(ENDTAG);
return new Symbol (ParserSym.LEXERR,yytext());
}
case 124: break;
case 74:
{ yybegin(ATTRVAL);
CRNLcount++;
}
case 125: break;
case 1:
{ yybegin(COMMENT);
return new Symbol (ParserSym.COMM_CONTENT, yytext());
}
case 126: break;
case 37:
{ yybegin(ATTREQUALS);
return new Symbol (ParserSym.LEXERR,yytext());
}
case 127: break;
case 17:
{ yybegin(ATTRVALSTART);
return new Symbol (ParserSym.ATTRVAL_NUM, yytext());
}
case 128: break;
case 88:
{ yybegin(YYINITIAL);
return new Symbol (ParserSym.EMPTY_TAG, yytext());
}
case 129: break;
case 57:
{ yybegin(CDATA);
appendText(LexerTextType.TEXT);
}
case 130: break;
case 81:
{ yybegin(ATTRVALLIT_DQM);
return new Symbol (ParserSym.ATTRVAL_LIT, replaceEntity(false));
}
case 131: break;
case 20:
{ yybegin(ATTRVAL);
CRcount++;
}
case 132: break;
case 19:
{ yybegin(ATTRVAL);
NLcount++;
}
case 133: break;
case 40:
{ yybegin(ATTREQUALS);
CRcount++;
}
case 134: break;
case 10:
{ yybegin(ATTREQUALS);
return new Symbol (ParserSym.ATTR_NAME, yytext());
}
case 135: break;
case 39:
{ yybegin(ATTREQUALS);
NLcount++;
}
case 136: break;
case 64:
{ yypushback(yytext().length());
if(!syncronizeSent){
syncronizeSent=true;
return new Symbol (ParserSym.SYNC, yytext());
}
if (cup.action_obj.isLastStartTagCreatedOk() && currentCDATAClosingTag!=null) {
textState = LexerTextStateType.CDATA;
yybegin(CDATA);
} else {
yybegin(YYINITIAL);
}
}
case 137: break;
case 75:
{ yybegin(ATTRVALSTART);
return new Symbol (ParserSym.ATTRVAL_NMTK, replaceEntity(false));
}
case 138: break;
case 65:
{ yybegin(YYINITIAL);
return new Symbol (ParserSym.LEXERR_EXPECTED_TAG_NAME,yytext());
}
case 139: break;
case 91:
{ yybegin(TAG_IGNORE_ATTS);
CRNLcount++;
}
case 140: break;
case 90:
{ yybegin(IGNORED_Q_TAG);
}
case 141: break;
case 63:
{ yybegin(CDATA_ENDTAG_START);
return new Symbol (ParserSym.END_TAG_NAME, offsetCDATA, yyline, yytext());
}
case 142: break;
case 70:
{ yybegin(YYINITIAL);
appendText(LexerTextType.LINE_TERMINATOR);
CRNLcount++;
}
case 143: break;
case 82:
{ yybegin(ATTRVALLIT_SQM);
CRNLcount++;
}
case 144: break;
case 79:
{ yybegin(ATTREQUALS);
CRNLcount++;
}
case 145: break;
case 72:
{ yybegin(ATTR);
CRNLcount++;
}
case 146: break;
case 33:
{ yybegin(STARTTAG);
return new Symbol (ParserSym.LEXERR,yytext());
}
case 147: break;
case 71:
{ yybegin(YYINITIAL);
appendText(LexerTextType.ENTITY_NO_SEMI);
}
case 148: break;
case 2:
{ yybegin(ATTRVALLIT_DQM);
return new Symbol (ParserSym.ATTRVAL_LIT, yytext());
}
case 149: break;
case 60:
{ yybegin(CDATA);
CRcount++;
appendText(LexerTextType.LINE_TERMINATOR);
}
case 150: break;
case 83:
{ yybegin(ATTRVALLIT_SQM);
return new Symbol (ParserSym.ATTRVAL_LIT, replaceEntity(false));
}
case 151: break;
case 99:
{ yybegin(ENDTAG);
String tagName = yytext().substring(2);
return new Symbol (ParserSym.END_TAG_NAME, yychar, yyline, tagName);
}
case 152: break;
case 76:
{ yybegin(ENDTAG);
CRNLcount++;
}
case 153: break;
case 53:
{ yybegin(YYINITIAL);
yypushback(yytext().length());
return new Symbol (ParserSym.TAG_SLASH_CLOSE, yytext());
}
case 154: break;
case 16:
{ yybegin(ATTRVALSTART);
return new Symbol (ParserSym.ATTRVAL_NMTK, yytext());
}
case 155: break;
case 22:
{ yybegin(ATTRVALLIT_SQM);
return new Symbol (ParserSym.SQM, new Character(yytext().charAt(0)));
}
case 156: break;
case 80:
{ yybegin(ATTRVALLIT_DQM);
CRNLcount++;
}
case 157: break;
case 77:
{ yybegin(COMMENT);
return new Symbol (ParserSym.COMM, yytext());
}
case 158: break;
case 47:
{ yybegin(ATTR);
return new Symbol (ParserSym.DQM, new Character(yytext().charAt(0)));
}
case 159: break;
case 73:
{ yybegin(ATTRVALSTART);
return new Symbol (ParserSym.ATTRVAL_NUMTK, yytext());
}
case 160: break;
case 86:
{ yybegin(CDATA);
appendText(LexerTextType.ENTITY_NO_SEMI);
// to collapse next LINE TERMINATOR (same as RI)
terminatorSkipped = false;
}
case 161: break;
case 36:
{ yybegin(TAG_IGNORE_ATTS);
return new Symbol (ParserSym.LEXERR,yytext());
}
case 162: break;
case 4:
{ yybegin(YYINITIAL);
appendText(LexerTextType.TEXT);
}
case 163: break;
case 56:
{ yybegin(YYINITIAL);
return new Symbol (ParserSym.TAG_SLASH_CLOSE, yytext());
}
case 164: break;
case 94:
{ yybegin(ATTRVALSTART);
return new Symbol (ParserSym.ATTRVAL_NMTK, replaceEntity(true));
}
case 165: break;
case 41:
{ yybegin(ATTRVAL);
return new Symbol (ParserSym.EQUALS, new Character(yytext().charAt(0)));
}
case 166: break;
case 15:
{ yybegin(TAG_TRAILINGWS);
}
case 167: break;
case 26:
{ yybegin(ENDTAG);
CRcount++;
}
case 168: break;
case 95:
{ yybegin(YYINITIAL);
return new Symbol (ParserSym.TAG_COMM_CLOSE, yytext());
}
case 169: break;
case 24:
{ yybegin(ENDTAG);
}
case 170: break;
case 25:
{ yybegin(ENDTAG);
NLcount++;
}
case 171: break;
case 44:
{ yybegin(ATTRVALLIT_DQM);
}
case 172: break;
case 13:
{ yybegin(ATTR);
CRcount++;
}
case 173: break;
case 89:
{ yybegin(MD);
return new Symbol (ParserSym.TAG_OPEN_EXM, yychar, yyline, yytext());
}
case 174: break;
case 12:
{ yybegin(ATTR);
NLcount++;
}
case 175: break;
case 58:
{ yybegin(CDATA);
appendText(LexerTextType.SPACE);
}
case 176: break;
case 92:
{ yybegin(YYINITIAL);
}
case 177: break;
case 84:
{ yybegin(TAG_TRAILINGWS);
CRNLcount++;
}
case 178: break;
case 46:
{ yybegin(ATTRVALLIT_DQM);
CRcount++;
}
case 179: break;
case 45:
{ yybegin(ATTRVALLIT_DQM);
NLcount++;
}
case 180: break;
case 87:
{ String tagName = yytext().substring(1).toLowerCase();
Element e = dtd.elementHash.get(tagName);
if (e != null && ((e.getType() == DTDConstants.CDATA) || e.isScript())) {
currentCDATAClosingTag = "</" + tagName + ">";
yybegin(TAG_IGNORE_ATTS);
} else {
if (isPreformatted(tagName)) {
preEntryCounter ++;
textState = LexerTextStateType.PREFORMATTED;
}
currentCDATAClosingTag = null;
yybegin(STARTTAG);
}
return new Symbol (ParserSym.TAG_NAME, yychar, yyline, yytext().substring(1, yytext().length()));
}
case 181: break;
case 3:
{ yybegin(ATTRVALLIT_SQM);
return new Symbol (ParserSym.ATTRVAL_LIT, yytext());
}
case 182: break;
case 68:
{ yybegin(TAG_IGNORE_ATTS);
CRcount++;
}
case 183: break;
case 29:
{ yybegin(MD);
}
case 184: break;
case 55:
{ yybegin(TAG_TRAILINGWS);
CRcount++;
}
case 185: break;
case 42:
{ yybegin(ATTRVALSTART);
return new Symbol (ParserSym.DQM, new Character(yytext().charAt(0)));
}
case 186: break;
case 67:
{ yybegin(TAG_IGNORE_ATTS);
NLcount++;
}
case 187: break;
case 54:
{ yybegin(TAG_TRAILINGWS);
NLcount++;
}
case 188: break;
case 21:
{ yybegin(ATTRVALLIT_DQM);
return new Symbol (ParserSym.DQM, new Character(yytext().charAt(0)));
}
case 189: break;
case 27:
{ yybegin(MD);
return new Symbol (ParserSym.LEXERR,yytext());
}
case 190: break;
case 11:
{ yybegin(ATTR);
}
case 191: break;
case 8:
{ HTMLText textToken = flushBufferedText();
yypushback(1);
yybegin(YYINITIAL_NOTEXT);
if (textToken != null) {
return new Symbol (ParserSym.TEXT, textToken);
}
}
case 192: break;
case 48:
{ yybegin(ATTRVALLIT_SQM);
}
case 193: break;
case 51:
{ yybegin(ATTR);
return new Symbol (ParserSym.SQM, new Character(yytext().charAt(0)));
}
case 194: break;
case 97:
{ yybegin(ATTRVALLIT_SQM);
return new Symbol (ParserSym.ATTRVAL_LIT, replaceEntity(true));
}
case 195: break;
case 66:
{ yybegin(TAG_IGNORE_ATTS);
}
case 196: break;
case 52:
{ yybegin(MDSTART);
return new Symbol (ParserSym.MUDECL_CONTENT, yytext());
}
case 197: break;
case 78:
{ yybegin(MD);
CRNLcount++;
}
case 198: break;
case 34:
{ yybegin(CHECK_IF_CDATA);
return new Symbol (ParserSym.TAG_OPEN, new Character(yytext().charAt(0)));
}
case 199: break;
case 69:
{
}
case 200: break;
default:
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
zzAtEOF = true;
zzDoEOF();
{ if (buffer.length() > 0 ) {
String aux = buffer.toString();
buffer= new StringBuilder();
return new Symbol (ParserSym.TEXT, new HTMLText(aux, offsetCDATA, false, false));
}
if(yystate()==CDATA){
yybegin(YYINITIAL);
return new Symbol(ParserSym.EOF_LITERAL,currentCDATAClosingTag.substring(2,currentCDATAClosingTag.length()-1 ));
}
return new Symbol (ParserSym.EOF,yytext());
}
}
else {
zzScanError(ZZ_NO_MATCH);
}
}
}
}
}