Examples of detectCharset()


Examples of net.yacy.document.parser.html.ScraperInputStream.detectCharset()

                            if((path.endsWith("html") || path.endsWith("htm"))) {
                                // save position
                                fis.mark(1000);
                                // scrape document to look up charset
                                final ScraperInputStream htmlFilter = new ScraperInputStream(fis,"UTF-8",new DigestURI("http://localhost"),null,false);
                                final String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
                                if(charset != null)
                                    mimeType = mimeType + "; charset="+charset;
                                // reset position
                                fis.reset();
                            }
View Full Code Here

Examples of net.yacy.document.parser.html.ScraperInputStream.detectCharset()

        // nothing found: try to find a meta-tag
        if (charset == null) {
            try {
                final ScraperInputStream htmlFilter = new ScraperInputStream(sourceStream,documentCharset,location,null,false);
                sourceStream = htmlFilter;
                charset = htmlFilter.detectCharset();
            } catch (IOException e1) {
                throw new Parser.Failure("Charset error:" + e1.getMessage(), location);
            }
        }
View Full Code Here

Examples of net.yacy.document.parser.html.ScraperInputStream.detectCharset()

                            if((path.endsWith("html") || path.endsWith("htm"))) {
                                // save position
                                fis.mark(1000);
                                // scrape document to look up charset
                                final ScraperInputStream htmlFilter = new ScraperInputStream(fis,"UTF-8",new DigestURI("http://localhost"),null,false);
                                final String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
                                if(charset != null)
                                    mimeType = mimeType + "; charset="+charset;
                                // reset position
                                fis.reset();
                            }
View Full Code Here

Examples of net.yacy.document.parser.html.ScraperInputStream.detectCharset()

        // nothing found: try to find a meta-tag
        if (charset == null) {
            try {
                final ScraperInputStream htmlFilter = new ScraperInputStream(sourceStream,documentCharset,location,null,false);
                sourceStream = htmlFilter;
                charset = htmlFilter.detectCharset();
            } catch (final IOException e1) {
                throw new Parser.Failure("Charset error:" + e1.getMessage(), location);
            }
        }
View Full Code Here

Examples of org.vietspider.html.util.HTMLParserDetector.detectCharset()

    if(level == depth) return ;
    System.out.println("\nstart download "+ address +" level "+level +" depth "+depth +" ...");
    byte [] bytes = download(parent.toString(), address);
    if(bytes == null || bytes.length < 0) return;
    HTMLParserDetector parser = new HTMLParserDetector();
    String charset = parser.detectCharset(bytes);
    char [] chars = CharsDecoder.decode(charset, bytes, 0, bytes.length);

    List<NodeImpl> tokens = parser.createTokens(chars);

    List<Resource> resources = new ArrayList<Resource>();
View Full Code Here

Examples of org.vietspider.html.util.HTMLParserDetector.detectCharset()

      message = "Error: Not html data!";
      return ERROR;
    }
    if(charset == null) {
      HTMLParserDetector detector = new HTMLParserDetector();
      charset = detector.detectCharset(data);
    }
   
    char [] chars = CharsDecoder.decode(charset, data, 0, data.length);
    return post(referer, formName, url, chars, abort);
  }
View Full Code Here

Examples of org.vietspider.html.util.HTMLParserDetector.detectCharset()

  }
 
  private boolean checkTimeout(byte [] data) throws Exception {
    if(data == null) return true;
    HTMLParserDetector parser = new HTMLParserDetector();
    if(charset == null) charset = parser.detectCharset(data);
   
    char [] chars = CharsDecoder.decode(charset, data, 0, data.length);
    List<NodeImpl> tokens  = parser.createTokens(chars);
    if(tokens == null) return true;
View Full Code Here

Examples of org.vietspider.html.util.HTMLParserDetector.detectCharset()

      message = "Not html data!";
      return ERROR;
    }
    if(charset == null) {
      HTMLParserDetector detector = new HTMLParserDetector();
      charset = detector.detectCharset(data);
    }
   
    char [] chars = CharsDecoder.decode(charset, data, 0, data.length);
    return post(referer, url, chars);
  }
View Full Code Here

Examples of org.vietspider.html.util.HTMLParserDetector.detectCharset()

    byte [] data = responseReader.readBody(response);
   
    HTMLParserDetector htmlParser2 = new HTMLParserDetector();
    if(data == null) return false;
    if(charset == null) {
      charset = htmlParser2.detectCharset(data);
    }
   
    char [] chars = CharsDecoder.decode(charset, data, 0, data.length);
    List<NodeImpl> tokens  = htmlParser2.createTokens(chars);
    if(tokens == null) return false;
View Full Code Here

Examples of org.vietspider.html.util.HTMLParserDetector.detectCharset()

    if(level == depth) return ;
    System.out.println("\nstart download "+ address +" level "+level +" depth "+depth +" ...");
    byte [] bytes = download(parent.toString(), address);
    if(bytes == null || bytes.length < 0) return;
    HTMLParserDetector parser = new HTMLParserDetector();
    String charset = parser.detectCharset(bytes);
    char [] chars = CharsDecoder.decode(charset, bytes, 0, bytes.length);

    List<NodeImpl> tokens = parser.createTokens(chars);

    List<Resource> resources = new ArrayList<Resource>();
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.