Package org.vietspider.chars.refs

Examples of org.vietspider.chars.refs.RefsDecoder


    return document == null || ref == null ?
        document : new HTMLAnchorUtil().searchDocument(document, ref);
  }
 
  public NodePath findNodeByText(HTMLNode node, String start, String end) throws Exception {
    RefsDecoder decoder = new RefsDecoder();
    NodePathParser pathParser = new NodePathParser();
    TextHandler textHandler = new TextHandler();
    if(start == null || start.trim().length() == 0) return pathParser.toPath(node);
    start = textHandler.trim(start);
    HTMLNode startNode = textHandler.findByText(node, start, decoder);
View Full Code Here


   
    return values;
  }
 
  protected HTMLNode searchBody(HTMLDocument document) throws Exception {
    RefsDecoder decoder = new RefsDecoder();
    NodeIterator iterator = document.getRoot().iterator();
    while(iterator.hasNext()) {
      HTMLNode node = iterator.next();
      if(!node.isNode(Name.CONTENT)) continue;
      char [] chars = node.getValue();
      chars = decoder.decode(chars);

      chars = CharsUtil.cutAndTrim(chars, 0, chars.length);
      chars =  java.text.Normalizer.normalize(new String(chars), Normalizer.Form.NFC).toCharArray();
      node.setValue(chars);             
   
View Full Code Here

    List<HTMLNode> ignores = searchBadNodes2(body, checkers);
    return new ContentRenderer(body, ignores, linkNodeChecker);
  }

  public static HTMLNode searchBody(HTMLDocument document) throws Exception {
    RefsDecoder decoder = new RefsDecoder();
    NodeIterator iterator = document.getRoot().iterator();
    while(iterator.hasNext()) {
      HTMLNode node = iterator.next();
      if(!node.isNode(Name.CONTENT)) continue;
      char [] chars = node.getValue();
      chars = decoder.decode(chars);

      chars = CharsUtil.cutAndTrim(chars, 0, chars.length);
      chars =  java.text.Normalizer.normalize(new String(chars), Normalizer.Form.NFC).toCharArray();
      node.setValue(chars);             
   
View Full Code Here

    nodeName = nodeName.toLowerCase();
   
    text = text.substring(nodeName.length());
    text = trimText(text);

    RefsDecoder refsDecoder = new RefsDecoder();
    while(text.length() > 0) {
      Object [] components = getName(text);
      if(components == null) return list;
      String name = (String)components[0];
      text = (String)components[1];
     
      if(components.length > 2) {
        list.add(new Attribute(name, null));
        continue;
      }
     
      String value = "";
      char mark = '\"';
      if(text.length() > 0) {
        components = getValue(text);
        value = (String)components[0];
        text = (String)components[1];
        mark = (Character)components[2];
      }
     
      if(mark != '\"' && mark != '\'') mark = '\"';
      if(value.indexOf(REF_START) > -1) {
        value = new String(refsDecoder.decode(value.toCharArray()));
      }
      list.addElement(new Attribute(name, value, mark));
    }
    parseStyle(list);
    return list;
View Full Code Here

 
  final static public Attribute[] parse(String text) {
    List<Attribute> list = new ArrayList<Attribute>();
    text = trimText(text);

    RefsDecoder refsDecoder = new RefsDecoder();
    while(text.length() > 0) {
      Object [] components = getName(text);
      if(components == null) return list.toArray(new Attribute[list.size()]);
      String name = (String)components[0];
      text = (String)components[1];
     
      if(components.length > 2) {
        list.add(new Attribute(name, null));
        continue;
      }
     
      String value = "";
      char mark = '\"';
      if(text.length() > 0) {
        components = getValue(text);
        value = (String)components[0];
//        System.out.println(" ========== > "+ value);
        text = (String)components[1];
        mark = (Character)components[2];
      }
     
      if(mark != '\"' && mark != '\'') mark = '\"';
      if(value.indexOf(REF_START) > -1) {
        value = new String(refsDecoder.decode(value.toCharArray()));
      }
      list.add(new Attribute(name, value));
    }
    return list.toArray(new Attribute[list.size()]);
 
View Full Code Here

  static {
    Arrays.sort(URICS);
  }
 
  public URLUtilsBak() {
    decoder = new RefsDecoder();
  }
View Full Code Here

 
 
  public HTMLDocument createDocument(byte [] bytes) throws Exception {
    if(charset != null) {
      char [] chars = CharsDecoder.decode(charset, bytes, 0, bytes.length);
      if(decode) chars = new RefsDecoder().decode(chars);
      return createDocument(chars);
    }
    return detectDocument(bytes);
  }
View Full Code Here

  }
 
  private HTMLDocument detectDocument(byte [] bytes) throws Exception {
    this.charset = detectCharset(bytes);
    char [] chars = CharsDecoder.decode(charset, bytes, 0, bytes.length);
    if(decode) chars = new RefsDecoder().decode(chars);
    return createDocument(chars);
  }
View Full Code Here

  }
 
  public List<NodeImpl> createTokens(byte [] bytes) throws Exception {
    if(charset != null) {
      char [] chars = CharsDecoder.decode(charset, bytes, 0, bytes.length);
      if(decode) chars = new RefsDecoder().decode(chars);
      return createTokens(chars);
    }
    this.charset = detectCharset(bytes);
    char [] chars = CharsDecoder.decode(charset, bytes, 0, bytes.length);
    if(decode) chars = new RefsDecoder().decode(chars);
    return createTokens(chars);
  }
View Full Code Here

*          Email:nhudinhthuan@yahoo.com
* Nov 19, 2006
*/
public class DecodeExample {
  public static void main(String arg[]){
    RefsDecoder ref = new RefsDecoder();
    String text = "&nbsp;&nbsp;&nbsp;" ;
    String value = new String(ref.decode(text.toCharArray()));
    System.out.println("|"+value+"|");
    RefsEncoder encoder = new RefsEncoder(false);
    value = value + "&";
    text = new String(encoder.encode(value.toCharArray()));
    System.out.println(text);
View Full Code Here

TOP

Related Classes of org.vietspider.chars.refs.RefsDecoder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.