Package org.mozilla.universalchardet

Examples of org.mozilla.universalchardet.UniversalDetector.handleData()


     * @return 可能的字符集,如果检测失败,返回utf-8
     */
    public static String guessEncoding(byte[] bytes) {
        String DEFAULT_ENCODING = "UTF-8";
        UniversalDetector detector = new UniversalDetector(null);
        detector.handleData(bytes, 0, bytes.length);
        detector.dataEnd();
        String encoding = detector.getDetectedCharset();
        detector.reset();
        if (encoding == null) {
            encoding = DEFAULT_ENCODING;
View Full Code Here


        UniversalDetector detector = new UniversalDetector(null);

        // (2)
        int nread;
        while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
            detector.handleData(buf, 0, nread);
        }
        // (3)
        detector.dataEnd();

        // (4)
View Full Code Here

   * Detect encoding by analyzing characters in the array
   */
  public static String detectEncoding(byte[] bytes) {
    String DEFAULT_ENCODING = "UTF-8";
    UniversalDetector detector = new UniversalDetector(null);
    detector.handleData(bytes, 0, bytes.length);
    detector.dataEnd();
    String encoding = detector.getDetectedCharset();
    detector.reset();
    if (encoding == null) {
      encoding = DEFAULT_ENCODING;
View Full Code Here

      UniversalDetector detector = new UniversalDetector(null);
      ReadableByteChannel bc = Channels.newChannel(stream);
      ByteBuffer buffer = ByteBuffer.allocate(BUFFER_SIZE);
      int read = 0;
      while ((read = bc.read(buffer)) != -1) {
        detector.handleData(buffer.array(), buffer.position() - read, read);
        buffer = resizeBuffer(buffer);
      }
      detector.dataEnd();
      // copy the result back to a byte array
      String encoding = detector.getDetectedCharset();
View Full Code Here

    BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(file));
    final UniversalDetector universalDetector = new UniversalDetector(null);

    int numberOfBytesRead;
    while ((numberOfBytesRead = bufferedInputStream.read(buf)) > 0 && !universalDetector.isDone()) {
      universalDetector.handleData(buf, 0, numberOfBytesRead);
    }

    universalDetector.dataEnd();
    bufferedInputStream.close();
    String encoding = universalDetector.getDetectedCharset();
View Full Code Here

    // (2)
    resource.mark(MAX_CHARSET_READAHEAD);
    int len = resource.read(bbuffer, 0, MAX_CHARSET_READAHEAD);
    resource.reset();
    detector.handleData(bbuffer, 0, len);
    // (3)
    detector.dataEnd();
    // (4)
    charsetName = detector.getDetectedCharset();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.