/*
* Copyright (c) 1998-2011 Caucho Technology -- all rights reserved
*
* Caucho Technology permits redistribution, modification and use
* of this file in source and binary form ("the Software") under the
* Caucho Developer Source License ("the License"). The following
* conditions must be met:
*
* 1. Each copy or derived work of the Software must preserve the copyright
* notice and this notice unmodified.
*
* 2. Redistributions of the Software in source or binary form must include
* an unmodified copy of the License, normally in a plain ASCII text
*
* 3. The names "Resin" or "Caucho" are trademarks of Caucho Technology and
* may not be used to endorse products derived from this software.
* "Resin" or "Caucho" may not appear in the names of products derived
* from this software.
*
* This Software is provided "AS IS," without a warranty of any kind.
* ALL EXPRESS OR IMPLIED REPRESENTATIONS AND WARRANTIES, INCLUDING ANY
* IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
* OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED.
*
* CAUCHO TECHNOLOGY AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES
* SUFFERED BY LICENSEE OR ANY THIRD PARTY AS A RESULT OF USING OR
* DISTRIBUTING SOFTWARE. IN NO EVENT WILL CAUCHO OR ITS LICENSORS BE LIABLE
* FOR ANY LOST REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL,
* CONSEQUENTIAL, INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND
* REGARDLESS OF THE THEORY OF LIABILITY, ARISING OUT OF THE USE OF OR
* INABILITY TO USE SOFTWARE, EVEN IF HE HAS BEEN ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGES.
*
* @author Sam
*/
package com.caucho.doc.javadoc;
import com.caucho.log.Log;
import com.caucho.util.CharBuffer;
import com.caucho.util.L10N;
import com.caucho.vfs.IOExceptionWrapper;
import com.caucho.vfs.ReadStream;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Parse a javadoc generated html index file.
*/
public class IndexParser {
static protected final Logger log = Log.open(IndexParser.class);
static final L10N L = new L10N(IndexParser.class);
static private final int EOF = -1;
static private final int EODL = -2;
static public final int TYPE_PACKAGE = 0x01;
static public final int TYPE_CLASS = 0x02;
static public final int TYPE_INTERFACE = 0x04;
static public final int TYPE_ENUM = 0x08;
static public final int TYPE_ANNOTATION = 0x10;
static public final int TYPE_EXCEPTION = 0x20;
static public final int TYPE_ERROR = 0x40;
static public final int TYPE_CONSTRUCTOR = 0x80;
static public final int TYPE_METHOD = 0x100;
static public final int TYPE_VARIABLE = 0x200;
static public final int MODIFIER_STATIC = 0x1000;
private ReadStream _rs = null;
private Callback _callback;
private int _currLine = 0;
private int _lastDTLine = -1;
public IndexParser(ReadStream rs, Callback callback)
{
_rs = rs;
_callback = callback;
}
public void parse()
throws IOException
{
// find <DL>
int ch = 0;;
while (ch != EOF) {
ch = readChar();
if (ch == '<') {
ch = readChar();
if (ch == 'D' || ch == 'd') {
ch = readChar();
if (ch == 'L' || ch == 'l') {
ch = readChar(); // eat '>'
ch = parseDL();
}
}
}
}
}
public interface Callback {
public void item(String path, String anchor, String name, String fullname, int typ, int modifier, String description);
}
/**
* A String describing the read source and the current line of parsing.
*/
public String getLineInfo()
{
CharBuffer cb = CharBuffer.allocate();
cb.append(_rs.getPath().toString());
cb.append(':');
cb.append(_currLine);
return cb.close();
}
protected boolean readLine(CharBuffer cb)
throws IOException
{
boolean n = _rs.readLine(cb);
if (n) _currLine++;
return n;
}
protected int readChar()
throws IOException
{
int n = _rs.readChar();
if (n == '\n') {
_currLine++;
}
else if (n == '\r') {
if (_rs.readChar() != '\n')
_rs.unread();
else {
_currLine++;
n = '\n';
}
}
return n;
}
// return EOF or EODL
private int parseDL()
throws IOException
{
boolean ignore = true; // ignore the first, it's just junk until the first <DT>
int ch = 0;
while (ch != EOF && ch != EODL) {
ch = readChar();
_lastDTLine = _currLine;
ch = parseDT(ignore);
ignore = false;
}
return ch;
}
// return EOF or EODL or last char read
// ignore means just ignore, used to get to first DT
private int parseDT(boolean ignore)
throws IOException
{
int r = EOF;
// fill dt until there is another <DT>
// or there is a </DL>
// once it's full, call readDT()
CharBuffer dt = ignore ? null : CharBuffer.allocate();
CharBuffer cbb = CharBuffer.allocate();
int ch;
while ((ch = readChar()) != -1) {
if (ch == '<') {
cbb.append((char)ch);
ch = readChar();
if (ch == 'D' || ch == 'd') {
cbb.append((char)ch);
ch = readChar();
if (ch == 'T' || ch == 't') {
cbb.clear();
if (!ignore) readDT(dt);
break;
}
} else if (ch == '/') {
cbb.append((char)ch);
ch = readChar();
if (ch == 'D' || ch == 'd') {
cbb.append((char)ch);
ch = readChar();
if (ch == 'L' || ch == 'l') {
if (!ignore) readDT(dt);
ch = EODL;
break;
}
}
}
if (dt != null) dt.append(cbb);
cbb.clear();
}
if (dt != null) dt.append((char)ch);
}
cbb.free();
if (dt != null) dt.free();
return ch;
}
private void readDT(CharBuffer cb)
throws IOException
{
String parseDescr = ""; // the step being performed
CharBuffer t = CharBuffer.allocate();
try {
if (log.isLoggable(Level.FINEST))
log.finest(L.l("<DT> entry from line {0} is [[{1}]]",String.valueOf(_lastDTLine),cb.toString()));
String path;
String anchor = null;
String fullname;
String name;
int typ;
int modifier = 0;
String description;
int i = 0;
parseDescr = "parsing href, looking for first \"";
i = readToAndEat(cb,i,'\"',null);
parseDescr = "parsing href, looking for next \"";
i = readToAndEat(cb,i,'\"',t);
while (t.startsWith("../"))
t.delete(0,3);
int ai = t.indexOf('#');
if (ai > -1) {
path = t.substring(0,ai);
anchor = t.substring(ai + 1);
} else {
path = t.toString();
}
t.clear();
if (log.isLoggable(Level.FINEST)) {
log.finest(L.l("path: [{0}]",path));
log.finest(L.l("anchor: [{0}]",anchor));
}
parseDescr = "using href to determine fullName";
t.append(path);
t.setLength(t.length() - 5); // drop .hmtl
for (int ti = t.length() - 1; ti >= 0; ti--) {
if (t.charAt(ti) == '/')
t.setCharAt(ti,'.');
}
if (anchor != null) {
t.append('.');
t.append(anchor);
}
if (t.endsWith(".package-summary"))
t.setLength(t.length() - 16);
fullname = t.toString();
t.clear();
if (log.isLoggable(Level.FINEST)) {
log.finest(L.l("fullname: [{0}]",fullname));
}
parseDescr = "parsing name, looking for opening <B>";
i = readToAndEat(cb,i,"<B>",null);
parseDescr = "parsing name, looking for closing </B>";
i = readToAndEat(cb,i,"<",t);
name = t.toString();
t.clear();
if (log.isLoggable(Level.FINEST)) {
log.finest(L.l("name: [{0}]",name));
}
parseDescr = "parsing description, `-' marks beginning";
i = readToAndEat(cb,i,'-',null);
parseDescr = "parsing description, removing markup";
clean(cb,i);
parseDescr = "parsing description";
// < 1.4 has "package ", 1.5 has "Package "
if (cb.startsWith("package ")) {
typ = TYPE_PACKAGE;
}
else if (cb.startsWith("Package ")) {
typ = TYPE_PACKAGE;
}
else if (cb.startsWith("class ")) {
typ = TYPE_CLASS;
}
else if (cb.startsWith("Class ")) {
typ = TYPE_CLASS;
}
else if (cb.startsWith("enum ")) {
typ = TYPE_ENUM;
}
else if (cb.startsWith("Enum ")) {
typ = TYPE_ENUM;
}
else if (cb.startsWith("annotation ")) {
typ = TYPE_ANNOTATION;
}
else if (cb.startsWith("Annotation ")) {
typ = TYPE_ANNOTATION;
}
else if (cb.startsWith("interface ")) {
typ = TYPE_INTERFACE;
}
else if (cb.startsWith("Interface ")) {
typ = TYPE_INTERFACE;
}
else if (cb.startsWith("exception ")) {
typ = TYPE_EXCEPTION;
}
else if (cb.startsWith("Exception ")) {
typ = TYPE_EXCEPTION;
}
else if (cb.startsWith("error ")) {
typ = TYPE_ERROR;
}
else if (cb.startsWith("Error ")) {
typ = TYPE_ERROR;
}
else if (cb.startsWith("Constructor")) {
typ = TYPE_CONSTRUCTOR;
}
else if (cb.startsWith("Method")) {
typ = TYPE_METHOD;
}
else if (cb.startsWith("Static method")) {
typ = TYPE_METHOD;
modifier = MODIFIER_STATIC;
}
else if (cb.startsWith("Variable")) {
typ = TYPE_VARIABLE;
}
else if (cb.startsWith("Static variable")) {
typ = TYPE_VARIABLE;
modifier = MODIFIER_STATIC;
}
else {
throw new IndexOutOfBoundsException(L.l("cannot determine type from `{0}'",cb.close()));
}
if (log.isLoggable(Level.FINEST)) {
log.finest(L.l("type: [{0}]",typ));
}
parseDescr = "parsing description, remove first sentence";
eatSentence(cb);
description = cb.toString();
if (log.isLoggable(Level.FINEST))
log.finest(L.l("description: [{0}]",description));
// do the callback
_callback.item(path,anchor,name,fullname,typ,modifier,description);
} catch (IndexOutOfBoundsException ex) {
String msg = L.l("parsing error {0}: {1}, {2}",parseDescr, ex.getMessage(),getLineInfo());
if (log.isLoggable(Level.FINE)) {
log.fine(msg);
log.fine(L.l("buffer was [[{0}]]",cb.toString()));
}
throw new IOExceptionWrapper(msg,ex);
} finally {
t.free();
}
}
private int readToAndEat(CharBuffer in, int i, char after, CharBuffer out)
{
int l = in.length();
char ch;
while ( (ch = in.charAt(i)) != after) {
if (out != null)
out.append(ch);
i++;
if (i >= l)
throw new IndexOutOfBoundsException(L.l("error looking for `{0}'",new Character(after)));
}
return ++i;
}
private int readToAndEat(CharBuffer in, int i, String after, CharBuffer out)
{
int al = after.length();
while (!in.regionMatches(i,after,0,al) ) {
if (out != null)
out.append(in.charAt(i));
i++;
if (i >= in.length())
throw new IndexOutOfBoundsException(L.l("error looking for `{0}'",after));
}
return i+al;
}
private void eatSentence(CharBuffer cb)
{
log.finest("eat sentence [[" + cb.toString() + "]]");
int cbl = cb.length();
int i = 0;
if (cb.startsWith("package ")) {
// second " " marks end of first sentence
i = cb.indexOf(' ') + 1;
if (i < cbl)
i = cb.indexOf(' ',i) + 1;
if (i <= 0)
i = cbl;
}
else {
// ". " marks end of first sentence
do {
int d = cb.indexOf('.',i);
if (d > -1) {
i = d + 1;
if (i >= cbl || Character.isWhitespace(cb.charAt(i)))
break;
else {
i++;
}
}
else
break;
} while (i < cbl);
}
// strip whitespace from beginning
while (i < cbl && (Character.isWhitespace(cb.charAt(i)) || cb.charAt(i) == '.')) {
i++;
}
if (i >= cbl) {
cb.clear();
}
else {
cb.delete(0,i);
}
}
/**
* remove whitespace or '.' at begining and whitespace at end, fix first
* sentence (add .), strip out equivalent of regexp match "<.*>", replace
* with space, replace newlines with space, and merge multiple spaces
* into a single space;
*/
private void clean(CharBuffer cb, int i)
{
CharBuffer r = CharBuffer.allocate();
for (;;) {
i = eatWhitespace(cb,i);
if (i < cb.length() && cb.charAt(i) == '.')
i++;
else
break;
}
boolean lastws = false; // reduce multiple ws to a single space
while (i < cb.length()) {
char ch = cb.charAt(i);
if (ch == '\n')
ch = ' ';
if (ch == '\r')
ch = ' ';
if (Character.isWhitespace(ch)) {
if (lastws) {
i++;
continue;
}
}
if (ch == '<') {
if (cb.charAt(i+1) == '/' && cb.charAt(i+2) == 'A')
r.append(". ");
// have to watch for stray < that are not really markup
// only something that matches "</?[A-Za-z]>" counts as markup
int cn = (i + 1 >= cb.length()) ? -1 : cb.charAt(i+1);
if (cn == '/')
cn = (i + 2 >= cb.length()) ? cn : cb.charAt(i+2);
if ((cn >= 'a' && cn <= 'z') || (cn >= 'A' && cn <= 'Z')) {
i = eatUntil(cb,++i,'>');
if (cn == 'D' || cn == 'd')
r.append(' ');
i++;
continue;
}
}
if (cb.regionMatches(i," ",0,6) ) {
r.append(' ');
i += 5;
lastws = true;
}
else {
r.append(ch);
lastws = Character.isWhitespace(ch);
}
i++;
}
int l = r.length() - 1;
while (l > 0 && Character.isWhitespace(r.charAt(l))) {
r.setLength(l--);
}
cb.clear();
cb.append(r);
}
private int eatWhitespace(CharBuffer cb, int i)
{
while (i < cb.length() && Character.isWhitespace(cb.charAt(i))) {
i++;
}
return i;
}
private int eatUntil(CharBuffer cb, int i, char until)
{
int l = cb.length();
while (cb.charAt(i) != until) {
i++;
if (i >= l)
throw new IndexOutOfBoundsException(L.l("error looking for `{0}'",new Character(until)));
}
return i;
}
}