Package com.ibm.icu.text

Examples of com.ibm.icu.text.CharsetMatch


      if (usedDecoder == null) {
        CharsetDetector detector = new CharsetDetector();
        detector.enableInputFilter(filtered);
        byte[] data = buffer.toByteArray();
        detector.setText(data);
        CharsetMatch cm = detector.detect();
        try {
          usedDecoder = Charset.forName(cm == null ? "ISO-8859-1" : cm.getName()).newDecoder();
        } catch (UnsupportedCharsetException ex) {
          usedDecoder = Charset.forName("ISO-8859-1").newDecoder();
        }
        usedDecoder.onUnmappableCharacter(unmappableCharacterAction());
        usedDecoder.onMalformedInput(malformedInputAction());
View Full Code Here


    }

    public static Reader readerWithCharsetDetect(InputStream is) {
        CharsetDetector detector = new CharsetDetector();
        try {
            CharsetMatch match = detector.setText(is).detect();
            is.reset();
            return new InputStreamReader(is, match.getName());
        } catch (IOException e) {
            e.printStackTrace();
            try {
                is.reset();
            } catch (IOException e1) {
View Full Code Here

   
    public Encoding sniff() throws IOException {
        try {
            CharsetDetector detector = new CharsetDetector();
            detector.setText(this);
            CharsetMatch match = detector.detect();
            Encoding enc = Encoding.forName(match.getName());
            Encoding actual = enc.getActualHtmlEncoding();
            if (actual != null) {
                enc = actual;
            }
            if (enc != Encoding.WINDOWS1252 && enc.isAsciiSuperset()) {
View Full Code Here

TOP

Related Classes of com.ibm.icu.text.CharsetMatch

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.