Package com.ibm.icu.text

Examples of com.ibm.icu.text.Transliterator


    checkToken(Transliterator.createFromRules("test", rules, Transliterator.FORWARD), "caa", "cbd");
  }
 
  public void testOptimizer() throws Exception {
    String rules = "a > b; b > c;"; // convert a's to b's and b's to c's
    Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
    assertTrue(custom.getFilter() == null);
    new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
    assertTrue(custom.getFilter().equals(new UnicodeSet("[ab]")));
  }
View Full Code Here


        "ABCDE", "abcde");
  }
 
  public void testOptimizerSurrogate() throws Exception {
    String rules = "\\U00020087 > x;"; // convert CJK UNIFIED IDEOGRAPH-20087 to an x
    Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
    assertTrue(custom.getFilter() == null);
    new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
    assertTrue(custom.getFilter().equals(new UnicodeSet("[\\U00020087]")));
  }
View Full Code Here

    assertTokenStreamContents(ts, new String[] { expected });
  }
 
  /** blast some random strings through the analyzer */
  public void testRandomStrings() throws Exception {
    final Transliterator transform = Transliterator.getInstance("Any-Latin");
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
        return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, transform));
View Full Code Here

        return !Boolean.parseBoolean(obj.toString());
    }

    public static String cleanString(String str) {

        Transliterator accentsconverter = Transliterator.getInstance("Latin; NFD; [:Nonspacing Mark:] Remove; NFC;");

        str = accentsconverter.transliterate(str);

        // the character ? seems to not be changed to d by the transliterate function

        StringBuffer cleanedStr = new StringBuffer(str.trim());
        // delete special character
View Full Code Here

    checkToken(Transliterator.createFromRules("test", rules, Transliterator.FORWARD), "caa", "cbd");
  }
 
  public void testOptimizer() throws Exception {
    String rules = "a > b; b > c;"; // convert a's to b's and b's to c's
    Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
    assertTrue(custom.getFilter() == null);
    new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
    assertTrue(custom.getFilter().equals(new UnicodeSet("[ab]")));
  }
View Full Code Here

        "ABCDE", "abcde");
  }
 
  public void testOptimizerSurrogate() throws Exception {
    String rules = "\\U00020087 > x;"; // convert CJK UNIFIED IDEOGRAPH-20087 to an x
    Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
    assertTrue(custom.getFilter() == null);
    new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
    assertTrue(custom.getFilter().equals(new UnicodeSet("[\\U00020087]")));
  }
View Full Code Here

    assertTokenStreamContents(ts, new String[] { expected });
  }
 
  /** blast some random strings through the analyzer */
  public void testRandomStrings() throws Exception {
    final Transliterator transform = Transliterator.getInstance("Any-Latin");
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, transform));
View Full Code Here

   }

   public static String cleanString(String str)
   {

      Transliterator accentsconverter = Transliterator.getInstance("Latin; NFD; [:Nonspacing Mark:] Remove; NFC;");

      str = accentsconverter.transliterate(str);

      //the character ? seems to not be changed to d by the transliterate function

      StringBuffer cleanedStr = new StringBuffer(str.trim());
      // delete special character
View Full Code Here

    public static void registerTransliteratorFromFile(String dir, String id) {
        try {
            String filename = id.replace('-', '_') ".txt";
            String rules = getFileContents(dir, filename);
            Transliterator t;
            int pos = id.indexOf('-');
            String rid;
            if (pos < 0) {
                rid = id + "-Any";
                id = "Any-" + id;
View Full Code Here

        pw.println("td { text-align: Center; font-size: 200% }");
        pw.println("tt { font-size: 50% }");
        pw.println("td.miss { background-color: #CCCCFF }");
        pw.println("</style></head><body bgcolor='#FFFFFF'>");

        Transliterator anyToLatin = Transliterator.getInstance("any-latin");
       
        String testString = "\u0946\u093E";
       
        UnicodeSet failNorm = new UnicodeSet();
        Set latinFail = new TreeSet();
       
        for (int i = 0; i < indicScripts.length; ++i) {
            if (indicScripts[i] == UScript.LATIN) continue;
            String source = names[i];
            System.out.println(source);
            UnicodeSet sourceChars = sets[i];

            for (int j = 0; j < indicScripts.length; ++j) {
                if (i == j) continue;
                String target = names[j];
                Transliterator forward = Transliterator.getInstance(source + '-' + target);
                Transliterator backward = forward.getInverse();
                UnicodeSetIterator it = new UnicodeSetIterator(sourceChars);
                while (it.next()) {
                    if (lengthMarks.contains(it.codepoint)) continue;
                    String s = Normalizer.normalize(it.codepoint,Normalizer.NFC,0);
                    //if (!Normalizer.isNormalized(s,Normalizer.NFC,0)) continue;
                    if (!s.equals(Normalizer.normalize(s,Normalizer.NFD,0))) {
                        failNorm.add(it.codepoint);
                    }
                    String t = fix(forward.transliterate(s));
                    if (t.equals(testString)) {
                        System.out.println("debug");
                    }

                    String r = fix(backward.transliterate(t));
                    if (Normalizer.compare(s,r,0) == 0) {
                        if (indicScripts[j] != UScript.LATIN) eq.add(s,t);
                    } else {
                        if (indicScripts[j] == UScript.LATIN) {
                            latinFail.add(s + " - " + t + " - " + r);
View Full Code Here

TOP

Related Classes of com.ibm.icu.text.Transliterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.