Package com.ibm.icu.impl

Examples of com.ibm.icu.impl.Trie2Writable


        int lineNum = 0;

        Vector<BuilderScriptSet> scriptSets = null;
        int rtScriptSetsCount = 2;

        Trie2Writable anyCaseTrie = new Trie2Writable(0, 0);
        Trie2Writable lowerCaseTrie = new Trie2Writable(0, 0);

        // The scriptSets vector provides a mapping from TRIE values to the set
        // of scripts.
        //
        // Reserved TRIE values:
        // 0: Code point has no whole script confusables.
        // 1: Code point is of script Common or Inherited.
        // These code points do not participate in whole script confusable
        // detection.
        // (This is logically equivalent to saying that they contain confusables
        // in all scripts)
        //
        // Because Trie values are indexes into the ScriptSets vector, pre-fill
        // vector positions 0 and 1 to avoid conflicts with the reserved values.
        scriptSets = new Vector<BuilderScriptSet>();
        scriptSets.addElement(null);
        scriptSets.addElement(null);

        readWholeFileToString(confusablesWS, input);

        parseRegexp = Pattern.compile(parseExp);

        // Zap any Byte Order Mark at the start of input. Changing it to a space
        // is benign
        // given the syntax of the input.
        if (input.charAt(0) == 0xfeff) {
          input.setCharAt(0, (char) 0x20);
        }

        // Parse the input, one line per iteration of this loop.
        Matcher matcher = parseRegexp.matcher(input);
        while (matcher.find()) {
          lineNum++;
          if (matcher.start(1) >= 0) {
            // this was a blank or comment line.
            continue;
          }
          if (matcher.start(8) >= 0) {
            // input file syntax error.
            throw new ParseException("ConfusablesWholeScript, line " + lineNum + ": Unrecognized input: " + matcher.group(),
                matcher.start());
          }

          // Pick up the start and optional range end code points from the
          // parsed line.
          int startCodePoint = Integer.parseInt(matcher.group(2), 16);
          if (startCodePoint > 0x10ffff) {
            throw new ParseException("ConfusablesWholeScript, line " + lineNum + ": out of range code point: "
                + matcher.group(2), matcher.start(2));
          }
          int endCodePoint = startCodePoint;
          if (matcher.start(3) >= 0) {
            endCodePoint = Integer.parseInt(matcher.group(3), 16);
          }
          if (endCodePoint > 0x10ffff) {
            throw new ParseException("ConfusablesWholeScript, line " + lineNum + ": out of range code point: "
                + matcher.group(3), matcher.start(3));
          }

          // Extract the two script names from the source line.
          String srcScriptName = matcher.group(4);
          String targScriptName = matcher.group(5);
          int srcScript = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, srcScriptName);
          int targScript = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, targScriptName);
          if (srcScript == UScript.INVALID_CODE) {
            throw new ParseException(
                "ConfusablesWholeScript, line " + lineNum + ": Invalid script code t: " + matcher.group(4),
                matcher.start(4));
          }
          if (targScript == UScript.INVALID_CODE) {
            throw new ParseException(
                "ConfusablesWholeScript, line " + lineNum + ": Invalid script code t: " + matcher.group(5),
                matcher.start(5));
          }

          // select the table - (A) any case or (L) lower case only
          Trie2Writable table = anyCaseTrie;
          if (matcher.start(7) >= 0) {
            table = lowerCaseTrie;
          }

          // Build the set of scripts containing confusable characters for
          // the code point(s) specified in this input line.
          // Sanity check that the script of the source code point is the same
          // as the source script indicated in the input file. Failure of this
          // check is an error in the input file.
          //
          // Include the source script in the set (needed for Mixed Script
          // Confusable detection).
          //
          int cp;
          for (cp = startCodePoint; cp <= endCodePoint; cp++) {
            int setIndex = table.get(cp);
            BuilderScriptSet bsset = null;
            if (setIndex > 0) {
              assert (setIndex < scriptSets.size());
              bsset = scriptSets.elementAt(setIndex);
            } else {
              bsset = new BuilderScriptSet();
              bsset.codePoint = cp;
              bsset.trie = table;
              bsset.sset = new ScriptSet();
              setIndex = scriptSets.size();
              bsset.index = setIndex;
              bsset.rindex = 0;
              scriptSets.addElement(bsset);
              table.set(cp, setIndex);
            }
            bsset.sset.Union(targScript);
            bsset.sset.Union(srcScript);

            int cpScript = UScript.getScript(cp);
View Full Code Here

TOP

Related Classes of com.ibm.icu.impl.Trie2Writable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.