Package org.mozilla.universalchardet

Examples of org.mozilla.universalchardet.UniversalDetector.handleData()


            is = new FileInputStream(file);
            UniversalDetector detector = new UniversalDetector(null);
            byte[] buf = new byte[4096];
            int nread;
            while ((nread = is.read(buf)) > 0 && !detector.isDone()) {
                detector.handleData(buf, 0, nread);
            }
            detector.dataEnd();
            encoding = detector.getDetectedCharset();
        } catch (IOException e) {
            // nothing to do
View Full Code Here


    public static String getDetectedEncoding(InputStream is) throws IOException {
        UniversalDetector detector = new UniversalDetector(null);
        byte[] buf = new byte[4096];
        int nread;
        while ((nread = is.read(buf)) > 0 && !detector.isDone()) {
            detector.handleData(buf, 0, nread);
        }
        detector.dataEnd();
        return detector.getDetectedCharset();
    }
View Full Code Here

      is = new FileInputStream(file);
      UniversalDetector detector = new UniversalDetector(null);
      byte[] buf = new byte[4096];
      int nread;
      while ((nread = is.read(buf)) > 0 && !detector.isDone()) {
        detector.handleData(buf, 0, nread);
      }
      detector.dataEnd();
      encoding = detector.getDetectedCharset();
    } catch (IOException e) {
      // nothing to do
View Full Code Here

                inStream = new DataInputStream(in);
      byte[] buf = new byte[(int)file.length()];
      int nrRead = inStream.read(buf);
   
      UniversalDetector detector = new UniversalDetector(null);
      detector.handleData(buf, 0, nrRead);
      detector.dataEnd();

      String encoding = detector.getDetectedCharset();
      detector.reset();
      if (encoding != null) {
View Full Code Here

    final UniversalDetector universalDetector;
    try (BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(file))) {
      universalDetector = new UniversalDetector(null);
      int numberOfBytesRead;
      while ((numberOfBytesRead = bufferedInputStream.read(buf)) > 0 && !universalDetector.isDone()) {
        universalDetector.handleData(buf, 0, numberOfBytesRead);
      }
    }
    universalDetector.dataEnd();
    String encoding = universalDetector.getDetectedCharset();
View Full Code Here

    }

    private static String detectEncoding(byte[] content)
    {
        UniversalDetector detector = new UniversalDetector(null);
        detector.handleData(content, 0, content.length);
        detector.dataEnd();
        String encoding = detector.getDetectedCharset();
        if (encoding != null)
        {
            LOGGER.debug(String.format("Detected encoding: %s\n", encoding));
View Full Code Here

   */
  private void checkForBadCharacters(String json) throws Exception
  {
    byte[] bytes = json.getBytes();   
    UniversalDetector ud = new UniversalDetector(null);
    ud.handleData(bytes, 0, bytes.length);
    ud.dataEnd();
    String encoding = ud.getDetectedCharset();     
    if ( encoding != null )
    {
      //do an extra check for charcode 65533 if encoding is utf-8   
View Full Code Here

   */
  private void checkForBadCharacters(String json) throws Exception
  {
    byte[] bytes = json.getBytes();   
    UniversalDetector ud = new UniversalDetector(null);
    ud.handleData(bytes, 0, bytes.length);
    ud.dataEnd();
    String encoding = ud.getDetectedCharset();     
    if ( encoding != null )
    {
      //do an extra check for charcode 65533 if encoding is utf-8   
View Full Code Here

        UniversalDetector detector = new UniversalDetector(null);
        int offset = 0;

        do {
            int blockSize = Math.min(4096, bytes.length - offset);
            detector.handleData(bytes, offset, blockSize);
            offset += blockSize;
        } while(offset < bytes.length);
        detector.dataEnd();

        return or(detector.getDetectedCharset(), "UTF-8");
View Full Code Here

        UniversalDetector detector = new UniversalDetector(null);
        byte[] buf = new byte[4096];
        int nRead;

        while ((nRead = is.read(buf)) > 0 && !detector.isDone()) {
            detector.handleData(buf, 0, nRead);
        }
        detector.dataEnd();

        return or(detector.getDetectedCharset(), "UTF-8");
    }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.