Package com.ibm.icu.text

Examples of com.ibm.icu.text.CharsetMatch


                {(byte)'A', (byte)'B', (byte)'C'},
                {(byte)'A', (byte)'B', (byte)'C', (byte)'D'}
            };
       
        CharsetDetector det = new CharsetDetector();
        CharsetMatch m;
        for (int i=0; i<shortBytes.length; i++) {
            det.setText(shortBytes[i]);
            m = det.detect();
            logln("i=" + i + " -> " + m.getName());
        }
    }
View Full Code Here


            null,
            "ISO-8859-1"
        };
       
        CharsetDetector det = new CharsetDetector();
        CharsetMatch match;

        det.setDeclaredEncoding("ISO-2022-JP");

        for (int idx = 0; idx < testStrings.length; idx += 1) {
            det.setText(testStrings[idx]);
            match = det.detect();

            if (match == null) {
                if (testResults[idx] != null) {
                    errln("Unexpectedly got no results at index " + idx);
                }
                else {
                    logln("Got no result as expected at index " + idx);
                }
                continue;
            }

            if (testResults[idx] == null || ! testResults[idx].equals(match.getName())) {
                errln("Unexpectedly got " + match.getName() + " instead of " + testResults[idx] +
                      " at index " + idx + " with confidence " + match.getConfidence());
                return;
            }
        }
    }
View Full Code Here

        }
    }

    private void checkMatch(CharsetDetector det, String testString, String encoding, String language, String id) throws Exception
    {
        CharsetMatch m = det.detect();
        String decoded;
       
        if (! m.getName().equals(encoding)) {
            errln(id + ": encoding detection failure - expected " + encoding + ", got " + m.getName());
            return;
        }
       
        String charsetMatchLanguage = m.getLanguage();
        if ((language != null && !charsetMatchLanguage.equals(language))
            || (language == null && charsetMatchLanguage != null)
            || (language != null && charsetMatchLanguage == null))
        {
            errln(id + ", " + encoding + ": language detection failure - expected " + language + ", got " + m.getLanguage());
        }
       
        if (encoding.startsWith("UTF-32")) {
            return;
        }
       
        decoded = m.getString();
       
        if (! testString.equals(decoded)) {
            errln(id + ", " + encoding + ": getString() didn't return the original string!");
        }
       
        decoded = stringFromReader(m.getReader());
       
        if (! testString.equals(decoded)) {
            errln(id + ", " + encoding + ": getReader() didn't yield the original string!");
        }
    }
View Full Code Here

        "\u0627\u0646 \u0644\u0633\u0643\u0627\u0646 \u062F\u0648\u0644\u0629 \u0627\u0633\u0631" +
        "\u0627\u0626\u064A\u0644 \u0628\u0648\u062C\u0647 \u0627\u0644\u0645\u062E\u0627\u0637" +
        "\u0631 \u0627\u0644\u0627\u0642\u062A\u0635\u0627\u062F\u064A\u0629 \u0648\u0627\u0644" +
        "\u0627\u062C\u062A\u0645\u0627\u0639\u064A\u0629.";
       
        CharsetMatch m = _test1256(s);
        String charsetMatch = m.getName();
        CheckAssert(charsetMatch.equals("windows-1256"));
       
        /* Create an encoder to get the bytes.
         * Using String.getBytes("IBM420") can produce inconsistent results
         * between different versions of the JDK.
         */
        CharsetEncoder encoder = new CharsetProviderICU().charsetForName("IBM420").newEncoder();
       
        m = _testIBM420_ar_rtl(s, encoder);
        charsetMatch = m.getName();
        CheckAssert(charsetMatch.equals("IBM420_rtl"));
       
         m = _testIBM420_ar_ltr(s, encoder);
        charsetMatch = m.getName();
        CheckAssert(charsetMatch.equals("IBM420_ltr"));

    }
View Full Code Here

    }

    private CharsetMatch _testIBM420_ar_rtl(String s, CharsetEncoder encoder) throws Exception {
        CharsetDetector det = new CharsetDetector();
        det.setText(encoder.encode(CharBuffer.wrap(s)).array());
        CharsetMatch m = det.detect();
        return m;
    }
View Full Code Here

        StringBuffer ltrStrBuf = new StringBuffer(s);
        ltrStrBuf = ltrStrBuf.reverse();
       
        CharsetDetector det = new CharsetDetector();
        det.setText(encoder.encode(CharBuffer.wrap(ltrStrBuf.toString())).array());
        CharsetMatch m = det.detect();
        return m;
    }
View Full Code Here

    private CharsetMatch _test1256(String s) throws Exception {
       
        byte [] bytes = s.getBytes("windows-1256");
        CharsetDetector det = new CharsetDetector();
        det.setText(bytes);
        CharsetMatch m = det.detect();
        return m;
    }
View Full Code Here

            " \u05DE\u05D1\u05E6\u05E2 \u05E2\u05D5\u05E4\u05E8\u05EA \u05D9\u05E6\u05D5\u05E7\u05D4\"." +
            " \u05DE\u05E0\u05D3\u05DC\u05D1\u05DC\u05D9\u05D8 \u05E7\u05D9\u05D1\u05DC \u05D0\u05EA" +
            " \u05D4\u05D7\u05DC\u05D8\u05EA\u05D5 \u05DC\u05D0\u05D7\u05E8 \u05E9\u05E2\u05D9\u05D9" +
            "\u05DF \u05D1\u05EA\u05DE\u05DC\u05D9\u05DC \u05D4\u05E2\u05D3\u05D5\u05D9\u05D5\u05EA";
       
        CharsetMatch m = _test1255(s);
        String charsetMatch = m.getName();
        CheckAssert(charsetMatch.equals("ISO-8859-8"));
       
        m = _testIBM424_he_rtl(s);
        charsetMatch = m.getName();
        CheckAssert(charsetMatch.equals("IBM424_rtl"));
       
        m = _testIBM424_he_ltr(s);
        charsetMatch = m.getName();
        CheckAssert(charsetMatch.equals("IBM424_ltr"));
    }
View Full Code Here

    private CharsetMatch _test1255(String s) throws Exception {
        byte [] bytes = s.getBytes("ISO-8859-8");
        CharsetDetector det = new CharsetDetector();
        det.setText(bytes);
        CharsetMatch m = det.detect();
        return m;
    }
View Full Code Here

   
    private CharsetMatch _testIBM424_he_rtl(String s) throws Exception {
        byte [] bytes = s.getBytes("IBM424");       
        CharsetDetector det = new CharsetDetector();
        det.setText(bytes);
        CharsetMatch m = det.detect();
        return m;
    }
View Full Code Here

TOP

Related Classes of com.ibm.icu.text.CharsetMatch

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.