Source Code of gnu.testlet.java.text.RuleBasedCollator.jdk11

/*************************************************************************
/* Tests for java.text.RuleBasedCollator
/*
/* Copyright (c) 2003 Stephen C. Crawley (crawley@dstc.edu.au)
/*
/* This program is free software; you can redistribute it and/or modify
/* it under the terms of the GNU General Public License as published 
/* by the Free Software Foundation, either version 2 of the License, or
/* (at your option) any later version.
/*
/* This program is distributed in the hope that it will be useful, but
/* WITHOUT ANY WARRANTY; without even the implied warranty of
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
/* GNU General Public License for more details.
/*
/* You should have received a copy of the GNU General Public License
/* along with this program; if not, write to the Free Software Foundation
/* Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307 USA
/*************************************************************************/


// Tags: JDK1.1


package gnu.testlet.java.text.RuleBasedCollator;


import gnu.testlet.Testlet;
import gnu.testlet.TestHarness;
import java.text.RuleBasedCollator;
import java.text.Collator;
import java.text.ParseException;


public class jdk11 implements Testlet
{
  // These are the rule strings returned by calling getRules() on the
  // collators for various JDK 1.4.0 Locales
  private final String EN_US_RULES = 
    "='\u200b'=\u200c=\u200d=\u200e=\u200f=\u0000=\u0001=\u0002=\u0003" +
    "=\u0004=\u0005=\u0006=\u0007=\u0008='\t'='\u000b'=\u000e" +
    "=\u000f='\u0010'=\u0011=\u0012=\u0013=\u0014=\u0015=\u0016" +
    "=\u0017=\u0018=\u0019=\u001a=\u001b=\u001c=\u001d=\u001e=\u001f" +
    "=\u007f=\u0080=\u0081=\u0082=\u0083=\u0084=\u0085=\u0086=\u0087" +
    "=\u0088=\u0089=\u008a=\u008b=\u008c=\u008d=\u008e=\u008f=\u0090" +
    "=\u0091=\u0092=\u0093=\u0094=\u0095=\u0096=\u0097=\u0098=\u0099" +
    "=\u009a=\u009b=\u009c=\u009d=\u009e=\u009f;' ';'\u00a0';'\u2000'" +
    ";'\u2001';'\u2002';'\u2003';'\u2004';'\u2005';'\u2006';'\u2007'" +
    ";'\u2008';'\u2009';'\u200a';'\u3000';'\ufeff';'\r';'\t'" +
    ";'\n';'\f';'\u000b';\u0301;\u0300;\u0306;\u0302;\u030c;\u030a" +
    ";\u030d;\u0308;\u030b;\u0303;\u0307;\u0304;\u0337;\u0327;\u0328" +
    ";\u0323;\u0332;\u0305;\u0309;\u030e;\u030f;\u0310;\u0311;\u0312" +
    ";\u0313;\u0314;\u0315;\u0316;\u0317;\u0318;\u0319;\u031a;\u031b" +
    ";\u031c;\u031d;\u031e;\u031f;\u0320;\u0321;\u0322;\u0324;\u0325" +
    ";\u0326;\u0329;\u032a;\u032b;\u032c;\u032d;\u032e;\u032f;\u0330" +
    ";\u0331;\u0333;\u0334;\u0335;\u0336;\u0338;\u0339;\u033a;\u033b" +
    ";\u033c;\u033d;\u033e;\u033f;\u0342;\u0344;\u0345;\u0360;\u0361" +
    ";\u0483;\u0484;\u0485;\u0486;\u20d0;\u20d1;\u20d2;\u20d3;\u20d4" +
    ";\u20d5;\u20d6;\u20d7;\u20d8;\u20d9;\u20da;\u20db;\u20dc;\u20dd" +
    ";\u20de;\u20df;\u20e0;\u20e1,'-';\u00ad;\u2010;\u2011;\u2012;\u2013" +
    ";\u2014;\u2015;\u2212<'_'<\u00af<','<';'<':'<'!'<\u00a1<'?'<\u00bf" +
    "<'/'<'.'<\u00b4<'`'<'^'<\u00a8<'~'<\u00b7<\u00b8<'''<'\"'<\u00ab" +
    "<\u00bb<'('<')'<'['<']'<'{'<'}'<\u00a7<\u00b6<\u00a9<\u00ae<'@'" +
    "<\u00a4<\u0e3f<\u00a2<\u20a1<\u20a2<'$'<\u20ab<\u20ac<\u20a3<\u20a4" +
    "<\u20a5<\u20a6<\u20a7<\u00a3<\u20a8<\u20aa<\u20a9<\u00a5<'*'<'\\'<'&'" +
    "<'#'<'%'<'+'<\u00b1<\u00f7<\u00d7<'<'<'='<'>'<\u00ac<'|'<\u00a6" +
    "<\u00b0<\u00b5<0<1<2<3<4<5<6<7<8<9<\u00bc<\u00bd<\u00be<a,A<b,B<c,C" +
    "<d,D<\u00f0,\u00d0<e,E<f,F<g,G<h,H<i,I<j,J<k,K<l,L<m,M<n,N<o,O<p,P" +
    "<q,Q<r,R<s, S & SS,\u00df<t,T& TH, \u00de &TH, \u00fe <u,U<v,V<w,W" +
    "<x,X<y,Y<z,Z&AE,\u00c6&AE,\u00e6&OE,\u0152&OE,\u0153";


  private final String FR_CA_RULES = 
    "='\u200b'=\u200c=\u200d=\u200e=\u200f=\u0000=\u0001=\u0002=\u0003" +
    "=\u0004=\u0005=\u0006=\u0007=\u0008='\t'='\u000b'=\u000e=\u000f" +
    "='\u0010'=\u0011=\u0012=\u0013=\u0014=\u0015=\u0016=\u0017=\u0018" +
    "=\u0019=\u001a=\u001b=\u001c=\u001d=\u001e=\u001f=\u007f=\u0080=\u0081" +
    "=\u0082=\u0083=\u0084=\u0085=\u0086=\u0087=\u0088=\u0089=\u008a=\u008b" +
    "=\u008c=\u008d=\u008e=\u008f=\u0090=\u0091=\u0092=\u0093=\u0094=\u0095" +
    "=\u0096=\u0097=\u0098=\u0099=\u009a=\u009b=\u009c=\u009d=\u009e=\u009f" +
    ";' ';'\u00a0';'\u2000';'\u2001';'\u2002';'\u2003';'\u2004';'\u2005'" +
    ";'\u2006';'\u2007';'\u2008';'\u2009';'\u200a';'\u3000';'\ufeff';'\r'" +
    ";'\t';'\n';'\f';'\u000b';\u0301;\u0300;\u0306;\u0302;\u030c;\u030a" +
    ";\u030d;\u0308;\u030b;\u0303;\u0307;\u0304;\u0337;\u0327;\u0328" +
    ";\u0323;\u0332;\u0305;\u0309;\u030e;\u030f;\u0310;\u0311;\u0312" +
    ";\u0313;\u0314;\u0315;\u0316;\u0317;\u0318;\u0319;\u031a;\u031b" +
    ";\u031c;\u031d;\u031e;\u031f;\u0320;\u0321;\u0322;\u0324;\u0325" +
    ";\u0326;\u0329;\u032a;\u032b;\u032c;\u032d;\u032e;\u032f;\u0330" +
    ";\u0331;\u0333;\u0334;\u0335;\u0336;\u0338;\u0339;\u033a;\u033b" +
    ";\u033c;\u033d;\u033e;\u033f;\u0342;\u0344;\u0345;\u0360;\u0361" +
    ";\u0483;\u0484;\u0485;\u0486;\u20d0;\u20d1;\u20d2;\u20d3;\u20d4" +
    ";\u20d5;\u20d6;\u20d7;\u20d8;\u20d9;\u20da;\u20db;\u20dc;\u20dd" +
    ";\u20de;\u20df;\u20e0;\u20e1,'-';\u00ad;\u2010;\u2011;\u2012;\u2013" +
    ";\u2014;\u2015;\u2212<'_'<\u00af<','<';'<':'<'!'<\u00a1<'?'<\u00bf" +
    "<'/'<'.'<\u00b4<'`'<'^'<\u00a8<'~'<\u00b7<\u00b8<'''<'\"'<\u00ab" +
    "<\u00bb<'('<')'<'['<']'<'{'<'}'<\u00a7<\u00b6<\u00a9<\u00ae<'@'" +
    "<\u00a4<\u0e3f<\u00a2<\u20a1<\u20a2<'$'<\u20ab<\u20ac<\u20a3<\u20a4" +
    "<\u20a5<\u20a6<\u20a7<\u00a3<\u20a8<\u20aa<\u20a9<\u00a5<'*'<'\\'<'&'" +
    "<'#'<'%'<'+'<\u00b1<\u00f7<\u00d7<'<'<'='<'>'<\u00ac<'|'<\u00a6" +
    "<\u00b0<\u00b5<0<1<2<3<4<5<6<7<8<9<\u00bc<\u00bd<\u00be<a,A<b,B<c,C" +
    "<d,D<\u00f0,\u00d0<e,E<f,F<g,G<h,H<i,I<j,J<k,K<l,L<m,M<n,N<o,O<p,P" +
    "<q,Q<r,R<s, S & SS,\u00df<t,T& TH, \u00de &TH, \u00fe <u,U<v,V<w,W" +
    "<x,X<y,Y<z,Z&AE,\u00c6&AE,\u00e6&OE,\u0152&OE,\u0153@";




  private TestHarness harness;


  private void constructorTests()
  {
    harness.checkPoint("constructor rule parsing");
    RuleBasedCollator r;
    final String[] GOOD_RULES = {
      // Examples from the Sun javadocs
      "< a < b < c < d",
      ("< a,A< b,B< c,C< d,D< e,E< f,F< g,G< h,H< i,I< j,J < k,K< l,L< m,M" +
       "< n,N< o,O< p,P< q,Q< r,R< s,S< t,T < u,U< v,V< w,W< x,X< y,Y< z,Z " +
       "< \u00E5=a\u030A,\u00C5=A\u030A ;aa,AA< \u00E6,\u00C6< \u00F8,\u00D8"),
      ("=\u0301;\u0300;\u0302;\u0308;\u0327;\u0303;\u0304;\u0305" +
       ";\u0306;\u0307;\u0309;\u030A;\u030B;\u030C;\u030D;\u030E" +
       ";\u030F;\u0310;\u0311;\u0312< a , A ; ae, AE ; \u00e6 , \u00c6" +
       "< b , B < c, C < e, E & C < d, D & \u0300 ; \u0308 ; \u0302"),
      // Real collation rules
      EN_US_RULES, FR_CA_RULES,
      // Cases involving non-significant white-space
      "=A ", "=A\t", "=A\n", 
      "=A B", "=A\tB", "=A\nB", 
      "= A", "=\tA", "=\nA", 
      " =A", "\t=A", "\n=A",
      // Dodgy cases that JDKs accept
      " ",
      "='\n''\n'",
      "='\n'\n'\n'",
      // Dodgy cases with unbalanced quotes.  JDKs allow these (though a 
      // couple result in IndexOutOfBoundsExceptions).  However, the spec
      // does not say what they mean.
      "='", /* <- JDK 1.4.0 exception */ "=' ", "='=A", "='=A'", 
      "=''", "='' ","=''=A", "=''=A'", 
      "=''''", /* <- JDK 1.4.0 exception */ "=''''=A", "=''''=A'", 
    };
    
    for (int i = 0; i < GOOD_RULES.length; i++) {
      try {
  r = new RuleBasedCollator(GOOD_RULES[i]);
  harness.check(true);
      }
      catch (ParseException ex) {
  harness.debug(ex);
  harness.debug("unexpected ParseException (offset is " +
          ex.getErrorOffset() + ")");
  harness.check(false);
      }
      catch (Throwable ex) {
  harness.debug(ex);
  harness.check(false);
      }
    }


    try {
      r = new RuleBasedCollator(null);
      harness.check(false);
    }
    catch (ParseException ex) {
      harness.check(false);
    }
    catch (NullPointerException ex) {
      harness.check(true);
    }
    
    harness.checkPoint("constructor rule parsing errors");
    final String[] BAD_RULES = {
      // Empty rule list
      "", 
      // No relation
      "A",
      // No text following relation
      "=", "<", ";", ",", 
      // Special chars should be quoted
      "=\n", "=#", "==",
    };


    for (int i = 0; i < BAD_RULES.length; i++) {
      try {
  r = new RuleBasedCollator(BAD_RULES[i]);
  harness.check(false);
      }
      catch (ParseException ex) {
  harness.check(true);
      }
      catch (Throwable ex) {
  harness.debug(ex);
  harness.check(false);
      }
    }
  }


  private void doComparisons(RuleBasedCollator r, String[][] tests) 
  {
    for (int i = 0; i < tests.length; i++) {
      int res = r.compare(tests[i][0], tests[i][1]);
      if (res < 0) {
  harness.check(tests[i][2].equals("<"));
      }
      else if (res == 0) {
  harness.check(tests[i][2].equals("="));
      }
      else {
  harness.check(tests[i][2].equals(">"));
      }
    }
  }


  private void ignoreTests() 
  {
    harness.checkPoint("ignorable characters");
    final String TEST_RULES = "=Z<a,A<b,B<c,C";
    final String[][] TESTS = {
      {"abc", "ABC", "<"},
      {"abc", "abc", "="},
      {"Abc", "abc", ">"},
      {"aZbZc", "abc", "="},
      {"aZbZc", "aZbZc", "="},
      {"abc", "aZbZc", "="},
      {"aZbZc", "ABC", "<"},
      {"Z", "Z", "="},
      {"Abc", "aZbZc", ">"},
    };


    try {
      RuleBasedCollator r = new RuleBasedCollator(TEST_RULES);
      doComparisons(r, TESTS);
    }
    catch (ParseException ex) {
      harness.debug(ex);
      harness.fail("ignorable characters: ParseException (offset is " +
       ex.getErrorOffset() + ")");
    }
  }


  private void oneCharTests() 
  {
    checkStrengths();
    harness.checkPoint("single character ordering");
    final String TEST_RULES = "<a;A=0<b,B=1<c;C,d=2";
    final String[][][] TESTS = {
      { // PRIMARY
  {"", "", "="},
  {"abc", "abc", "="},
  {"abc", "ab", ">"},
  {"ab", "abc", "<"},
  {"abc", "Abc", "="},
  {"abc", "aBc", "="},
  {"abc", "abd", "="},
  {"abc", "abC", "="},
  {"abC", "abd", "="},
  {"Abc", "abc", "="},
  {"aBc", "abc", "="},
  {"abd", "abc", "="},
  {"abC", "abc", "="},
  {"abd", "abC", "="},
  {"abc", "012", "="},
  {"ABd", "012", "="},
  {"abc", "xyz", "<"}, 
  {"xyz", "abc", ">"},
  {"pqr", "xyz", "<"}, /* While the Sun Javadoc simply says that
        unmentioned characters appear at the end
        of the collation, the Sun JDK impl'ns 
        appears to order them by raw char value. */
        
      },
      { // SECONDARY
  {"", "", "="},
  {"abc", "abc", "="},
  {"abc", "ab", ">"},
  {"ab", "abc", "<"},
  {"abc", "Abc", "<"},
  {"abc", "aBc", "="},
  {"abc", "abd", "<"},
  {"abc", "abC", "<"},
  {"abC", "abd", "="},
  {"Abc", "abc", ">"},
  {"aBc", "abc", "="},
  {"abd", "abc", ">"},
  {"abC", "abc", ">"},
  {"abd", "abC", "="},
  {"abc", "012", "<"},
  {"ABd", "012", "="},
  {"abc", "xyz", "<"},
  {"xyz", "abc", ">"},
  {"pqr", "xyz", "<"},
      },
      { // TERTIARY
  {"", "", "="},
  {"abc", "abc", "="},
  {"abc", "ab", ">"},
  {"ab", "abc", "<"},
  {"abc", "Abc", "<"},
  {"abc", "aBc", "<"},
  {"abc", "abd", "<"},
  {"abc", "abC", "<"},
  {"abC", "abd", "<"},
  {"Abc", "abc", ">"},
  {"aBc", "abc", ">"},
  {"abd", "abc", ">"},
  {"abC", "abc", ">"},
  {"abd", "abC", ">"},
  {"abc", "012", "<"},
  {"ABd", "012", "="},
  {"abc", "xyz", "<"},
  {"xyz", "abc", ">"},
  {"pqr", "xyz", "<"},
      },
      { // IDENTICAL
  {"", "", "="},
  {"abc", "abc", "="},
  {"abc", "ab", ">"},
  {"ab", "abc", "<"},
  {"abc", "Abc", "<"},
  {"abc", "aBc", "<"},
  {"abc", "abd", "<"},
  {"abc", "abC", "<"},
  {"abC", "abd", "<"},
  {"Abc", "abc", ">"},
  {"aBc", "abc", ">"},
  {"abd", "abc", ">"},  
  {"abC", "abc", ">"},
  {"abd", "abC", ">"},
  {"abc", "012", "<"},
  {"ABd", "012", ">"},  /* It appears that Sun JDKs fall back on the
         raw character values when characters 
         are defined as equivalent by the rules. */
  {"abc", "xyz", "<"},
  {"xyz", "abc", ">"},
  {"pqr", "xyz", "<"},
      },
    };


    try {
      RuleBasedCollator r = new RuleBasedCollator(TEST_RULES);
      for (int i = 0; i < TESTS.length; i++) {
  r.setStrength(i);
  doComparisons(r, TESTS[i]);
      }
    }
    catch (ParseException ex) {
      harness.debug(ex);
      harness.fail("single character ordering: ParseException (offset is " +
       ex.getErrorOffset() + ")");
    }
  }


  private void contractionTests() 
  {
    checkStrengths();
    harness.checkPoint("contraction ordering");
    final String OLD_SPANISH_RULES = "<c,C<ch,cH,Ch,CH<d,D";
    final String[][][] TESTS = {
      {
  // PRIMARY
  {"cat", "cat", "="},
  {"cat", "Cat", "="},
  {"cat", "chat", "<"},
  {"cot", "chat", "<"},
  {"chat", "chit", "<"},
  {"chat", "dog", "<"},
      },
      {
  // SECONDARY
  {"cat", "cat", "="},
  {"cat", "Cat", "="},
  {"cat", "chat", "<"},
  {"cot", "chat", "<"},
  {"chat", "chit", "<"},
  {"chat", "dog", "<"},
      },
      {
  // TERTIARY
  {"cat", "cat", "="},
  {"cat", "Cat", "<"},
  {"cat", "chat", "<"},
  {"cot", "chat", "<"},
  {"chat", "chit", "<"},
  {"chat", "dog", "<"},
      },
      {
  // IDENTICAL
  {"cat", "cat", "="},
  {"cat", "Cat", "<"},
  {"cat", "chat", "<"},
  {"cot", "chat", "<"},
  {"chat", "chit", "<"},
  {"chat", "dog", "<"},
      },
    };
    
    try {
      RuleBasedCollator r = new RuleBasedCollator(OLD_SPANISH_RULES);
      for (int i = 0; i < TESTS.length; i++) {
  r.setStrength(i);
  doComparisons(r, TESTS[i]);
      }
    }
    catch (ParseException ex) {
      harness.debug(ex);
      harness.fail("contraction ordering: ParseException (offset is " +
       ex.getErrorOffset() + ")");
    }
  }


  private void expansionTests() 
  {
    checkStrengths();
    harness.checkPoint("expansion ordering");
    final String OLD_ENGLISH_RULES = ("<a,A<b,B<c,C<d,D<e,E<f,F" +
              " &AE,'\u00e6' &AE,'\u00c6'");
    final String[][][] TESTS = {
      {
  // PRIMARY
  {"ae", "\u00e6", "="},
  {"AE", "\u00e6", "="},
  {"ae", "\u00c6", "="},
  {"AE", "\u00c6", "="},
  {"cat", "cat", "="},
  {"cat", "Cat", "="},
  {"caet", "caet", "="},
  {"caet", "c\u00e6t", "="},
  {"c\u00e6t", "caet", "="},
  {"c\u00e6t", "c\u00e6t", "="},
  {"caet", "c\u00c6t", "="},         
  {"c\u00c6t", "caet", "="},
  {"c\u00c6t", "c\u00c6t", "="},  
  {"c\u00c6t", "c\u00e6t", "="},
  {"caet", "cat", "<"},
  {"c\u00e6t", "cat", "<"},
  {"C\u00c6T", "CAT", "<"},
  {"caet", "cab", ">"},
  {"c\u00e6t", "cab", ">"},
  {"C\u00c6T", "CAB", ">"},
      },
      {
  // SECONDARY
  {"ae", "\u00e6", "="},
  {"AE", "\u00e6", "="},
  {"ae", "\u00c6", "="},
  {"AE", "\u00c6", "="},
  {"cat", "cat", "="},
  {"cat", "Cat", "="},
  {"caet", "caet", "="},
  {"caet", "c\u00e6t", "="},
  {"c\u00e6t", "caet", "="},
  {"c\u00e6t", "c\u00e6t", "="},
  {"caet", "c\u00c6t", "="},        
  {"c\u00c6t", "caet", "="},
  {"c\u00c6t", "c\u00c6t", "="},  
  {"c\u00c6t", "c\u00e6t", "="},
  {"caet", "cat", "<"},
  {"c\u00e6t", "cat", "<"},
  {"C\u00c6T", "CAT", "<"},
  {"caet", "cab", ">"},
  {"c\u00e6t", "cab", ">"},
  {"C\u00c6T", "CAB", ">"},
      },
      {
  // TERTIARY
  {"ae", "\u00e6", "<"},
  {"AE", "\u00e6", "<"},            
  {"ae", "\u00c6", "<"},
  {"AE", "\u00c6", "<"},
  {"cat", "cat", "="},
  {"cat", "Cat", "<"},
  {"caet", "caet", "="},
  {"caet", "c\u00e6t", "<"},
  {"c\u00e6t", "caet", ">"},
  {"c\u00e6t", "c\u00e6t", "="},
  {"caet", "c\u00c6t", "<"},
  {"c\u00c6t", "caet", ">"},    
  {"c\u00c6t", "c\u00c6t", "="},  
  {"c\u00c6t", "c\u00e6t", "<"},
  {"caet", "cat", "<"},
  {"c\u00e6t", "cat", ">"}, // JDK is buggy. It fails here.
  {"C\u00c6T", "CAT", ">"}, // JDK is buggy. It fails here.
  {"caet", "cab", ">"},
  {"c\u00e6t", "cab", ">"},
  {"C\u00c6T", "CAB", ">"},
      },
      {
  // IDENTICAL
  {"ae", "\u00e6", "<"},
  {"AE", "\u00e6", "<"},   
  {"ae", "\u00c6", "<"},
  {"AE", "\u00c6", "<"},
  {"cat", "cat", "="},
  {"cat", "Cat", "<"},
  {"caet", "caet", "="},
  {"caet", "c\u00e6t", "<"},
  {"c\u00e6t", "caet", ">"},
  {"c\u00e6t", "c\u00e6t", "="},
  {"caet", "c\u00c6t", "<"},
  {"c\u00c6t", "caet", ">"},    
  {"c\u00c6t", "c\u00c6t", "="},  
  {"c\u00c6t", "c\u00e6t", "<"},
  {"caet", "cat", "<"},
  {"c\u00e6t", "cat", ">"}, // JDK is buggy. It fails here.
  {"C\u00c6T", "CAT", ">"}, // JDK is buggy. It fails here.
  {"caet", "cab", ">"},
  {"c\u00e6t", "cab", ">"},
  {"C\u00c6T", "CAB", ">"},
      },
    };
    
    try {
      RuleBasedCollator r = new RuleBasedCollator(OLD_ENGLISH_RULES);
      for (int i = 0; i < TESTS.length; i++) {
  r.setStrength(i);
  doComparisons(r, TESTS[i]);
      }
    }
    catch (ParseException ex) {
      harness.debug(ex);
      harness.fail("expansion ordering: ParseException (offset is " +
       ex.getErrorOffset() + ")");
    }
  }


  private void checkStrengths() 
  {
    harness.checkPoint("collator strengths");
    harness.check(Collator.PRIMARY == 0);
    harness.check(Collator.SECONDARY == 1);
    harness.check(Collator.TERTIARY == 2);
    harness.check(Collator.IDENTICAL == 3);
  }


  public void test(TestHarness harness)
  {
    this.harness = harness;
    constructorTests();
    ignoreTests();
    oneCharTests();
    contractionTests();
    expansionTests();
    // More tests in the pipeline
  }
  
} // class jdk11
Source Code of gnu.testlet.java.text.RuleBasedCollator.jdk11

Related Classes of gnu.testlet.java.text.RuleBasedCollator.jdk11