Package org.apache.lenya.lucene.parser

Source Code of org.apache.lenya.lucene.parser.PreParser

/*
* PreParser.java
*
* Created on 30. M�rz 2003, 13:37
*/

package org.apache.lenya.lucene.parser;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.apache.lenya.util.CommandLineLogger;

/**
* The Java HTML parser cannot handle self-closing text.
* This class converts all "/>" strings to ">" to avoid this problem.
*
* @author  nobby
*/
public class PreParser {
   
    /** Creates a new instance of PreParser */
    public PreParser() {
        new CommandLineLogger(getClass()).log("creating new object");
    }

    /**
     * Parses HTML from a reader.
     */
    public Reader parse(Reader reader)
            throws IOException {
       
        StringBuffer buffer = new StringBuffer();
        boolean pending = false;
       
        char chars[] = new char[1];
        while (reader.read(chars) != -1) {
           
            int lastPosition = buffer.length() - 1;
            if (chars[0] == '>' && buffer.charAt(lastPosition) == '/') {
                buffer.deleteCharAt(lastPosition);
            }
            buffer.append(chars[0]);
        }
       
        return new StringReader(buffer.toString());
    }
   
}
TOP

Related Classes of org.apache.lenya.lucene.parser.PreParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.