Package org.vietspider.html.parser

Source Code of org.vietspider.html.parser.HTMLParser2

/*
* Copyright 2004-2006 The VietSpider        All rights reserved.
*
* Created on January 24, 2006, 7:48 PM
*/

package org.vietspider.html.parser;

import java.io.File;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import org.vietspider.chars.CharsDecoder;
import org.vietspider.common.io.DataReader;
import org.vietspider.html.HTMLDocument;
import org.vietspider.html.HTMLNode;
import org.vietspider.html.util.HTMLParserDetector;
import org.vietspider.token.TypeToken;
/**
* @author nhuthuan
* Email: nhudinhthuan@yahoo.com
*/
public class HTMLParser2 {
 
  public final static HTMLNode clone(HTMLNode node)  {
    NodeImpl nodeImpl = (NodeImpl)node;
    int type = nodeImpl.getType();
    if(type == TypeToken.CONTENT
        || type == TypeToken.COMMENT || type == TypeToken.CODE_CONTENT) {
      return new NodeImpl(nodeImpl.getValue(), nodeImpl.getName());
    }
    return new NodeImpl(nodeImpl.getValue(), nodeImpl.getName(), nodeImpl.getType());
  }
 
  public List<NodeImpl> createTokens(char[] data) throws Exception {
    CharsToken tokens = new CharsToken();
    ParserService parserService = new ParserService();
    TokenCreator tokenCreator = new TokenCreator(parserService, tokens);
    parserService.getTokenParser().createBeans(tokens, data, tokenCreator);
    List<NodeImpl> list = new ArrayList<NodeImpl>();
    while(tokens.hasNext()){   
      list.add(tokens.pop());
    }
    return list;
  }
 
  public HTMLDocument createDocument(char[] data) throws Exception {
    CharsToken tokens = new CharsToken();
    ParserService parserService = new ParserService();
    TokenCreator tokenCreator = new TokenCreator(parserService, tokens);
    parserService.getTokenParser().createBeans(tokens, data, tokenCreator);
    parserService.parse(tokens, tokens.getDocument());
    return tokens.getDocument();
  }
 
  public HTMLDocument createDocument(CharsToken tokens) throws Exception {
    ParserService parserService = new ParserService();
    parserService.parse(tokens, tokens.getDocument());
    return tokens.getDocument();
  }
 
  public HTMLDocument createDocument(List<NodeImpl> list) throws Exception {
    CharsToken tokens = new CharsToken();
    for(int i = 0; i < list.size(); i++) {
      tokens.push(list.get(i));
    }
    new ParserService().parse(tokens, tokens.getDocument());
    return tokens.getDocument();
  }

  public  HTMLDocument createDocument(String text) throws Exception {
    return createDocument(text.toCharArray());
  }

  public HTMLDocument createDocument(byte[] data, String charset) throws Exception {
    if(charset == null) {
      HTMLParserDetector parserDetector = new HTMLParserDetector();
      return parserDetector.createDocument(data);
    }
    char [] chars = CharsDecoder.decode(charset, data, 0, data.length);
    return createDocument(chars);
 

  public  HTMLDocument createDocument(InputStream input, String charset) throws Exception {
    DataReader reader = new DataReader();
    return createDocument(reader.loadInputStream(input).toByteArray(), charset)
  }

  public HTMLDocument createDocument(File file, String charset) throws Exception {
    DataReader reader = new DataReader();
    return createDocument(reader.load(file), charset);
  }

}
TOP

Related Classes of org.vietspider.html.parser.HTMLParser2

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.