Package com.ibm.icu.text

Examples of com.ibm.icu.text.CharsetDetector.enableInputFilter()


        }

        // the author didn't tell us the encoding, try the mozilla-heuristic
        if (charset == null) {
          CharsetDetector det = new CharsetDetector();
          det.enableInputFilter(true);
          InputStream detStream = new BufferedInputStream(sourceStream);
          det.setText(detStream);
          charset = det.detect().getName();
          sourceStream = detStream;
        }
View Full Code Here


        String s = "<a> <lot> <of> <English> <inside> <the> <markup> Un tr\u00E8s petit peu de Fran\u00E7ais. <to> <confuse> <the> <detector>";
        byte[] bytes = s.getBytes("ISO-8859-1");
        CharsetDetector det = new CharsetDetector();
        CharsetMatch m;
       
        det.enableInputFilter(true);
        if (!det.inputFilterEnabled()){
            errln("input filter should be enabled");
        }
       
        det.setText(bytes);
View Full Code Here

       
        if (! m.getLanguage().equals("fr")) {
            errln("input filter did not strip markup!");
        }
       
        det.enableInputFilter(false);
        det.setText(bytes);
        m = det.detect();
       
        if (! m.getLanguage().equals("en")) {
            errln("unfiltered input did not detect as English!");
View Full Code Here

        }

        // the author didn't tell us the encoding, try the mozilla-heuristic
        if (charset == null) {
            final CharsetDetector det = new CharsetDetector();
            det.enableInputFilter(true);
            final InputStream detStream = new BufferedInputStream(sourceStream);
            det.setText(detStream);
            charset = det.detect().getName();
            sourceStream = detStream;
        }
View Full Code Here

      // charset detection with icu
      try {
        bis = new BufferedInputStream(new FileInputStream(tempFile));
        CharsetDetector detector;
        detector = new CharsetDetector();
        detector.enableInputFilter(true);
        detector.setText(bis);
        if (declaredEncoding!=null && !"".equals(declaredEncoding))
          detector.setDeclaredEncoding(declaredEncoding);
        CharsetMatch[] matches = null;
        matches = detector.detectAll();
View Full Code Here

      // charset detection with icu
      bis = new BufferedInputStream(new FileInputStream(tempFile));
      CharsetDetector detector;
      detector = new CharsetDetector();
      detector.enableInputFilter(true);
      detector.setText(bis);
      if (declaredEncoding!=null && !"".equals(declaredEncoding))
        detector.setDeclaredEncoding(declaredEncoding);
      CharsetMatch[] matches = detector.detectAll();
      bis.close();
View Full Code Here

    @Override
    protected CoderResult implFlush(CharBuffer out) {
      if (usedDecoder == null) {
        CharsetDetector detector = new CharsetDetector();
        detector.enableInputFilter(filtered);
        byte[] data = buffer.toByteArray();
        detector.setText(data);
        CharsetMatch cm = detector.detect();
        try {
          usedDecoder = Charset.forName(cm == null ? "ISO-8859-1" : cm.getName()).newDecoder();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.