Package com.ibm.icu.impl

Source Code of com.ibm.icu.impl.Normalizer2Impl$IsAcceptable

/*
*******************************************************************************
*   Copyright (C) 2009-2012, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.impl;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;

import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.VersionInfo;

public final class Normalizer2Impl {
  public static final class Hangul {
    /* Korean Hangul and Jamo constants */
    public static final int JAMO_L_BASE = 0x1100; /* "lead" jamo */
    public static final int JAMO_V_BASE = 0x1161; /* "vowel" jamo */
    public static final int JAMO_T_BASE = 0x11a7; /* "trail" jamo */

    public static final int HANGUL_BASE = 0xac00;

    public static final int JAMO_L_COUNT = 19;
    public static final int JAMO_V_COUNT = 21;
    public static final int JAMO_T_COUNT = 28;

    public static final int JAMO_L_LIMIT = JAMO_L_BASE + JAMO_L_COUNT;
    public static final int JAMO_V_LIMIT = JAMO_V_BASE + JAMO_V_COUNT;

    public static final int JAMO_VT_COUNT = JAMO_V_COUNT * JAMO_T_COUNT;

    public static final int HANGUL_COUNT = JAMO_L_COUNT * JAMO_V_COUNT * JAMO_T_COUNT;
    public static final int HANGUL_LIMIT = HANGUL_BASE + HANGUL_COUNT;

    public static boolean isHangul(final int c) {
      return HANGUL_BASE <= c && c < HANGUL_LIMIT;
    }

    public static boolean isHangulWithoutJamoT(char c) {
      c -= HANGUL_BASE;
      return c < HANGUL_COUNT && c % JAMO_T_COUNT == 0;
    }

    public static boolean isJamoL(final int c) {
      return JAMO_L_BASE <= c && c < JAMO_L_LIMIT;
    }

    public static boolean isJamoV(final int c) {
      return JAMO_V_BASE <= c && c < JAMO_V_LIMIT;
    }

    /**
     * Decomposes c, which must be a Hangul syllable, into buffer and returns the length of the decomposition (2 or 3).
     */
    public static int decompose(int c, final Appendable buffer) {
      try {
        c -= HANGUL_BASE;
        int c2 = c % JAMO_T_COUNT;
        c /= JAMO_T_COUNT;
        buffer.append((char) (JAMO_L_BASE + c / JAMO_V_COUNT));
        buffer.append((char) (JAMO_V_BASE + c % JAMO_V_COUNT));
        if (c2 == 0) {
          return 2;
        } else {
          buffer.append((char) (JAMO_T_BASE + c2));
          return 3;
        }
      } catch (IOException e) {
        // Will not occur because we do not write to I/O.
        throw new RuntimeException(e);
      }
    }

    /**
     * Decomposes c, which must be a Hangul syllable, into buffer. This is the raw, not recursive, decomposition. Its length is always
     * 2.
     */
    public static void getRawDecomposition(int c, final Appendable buffer) {
      try {
        int orig = c;
        c -= HANGUL_BASE;
        int c2 = c % JAMO_T_COUNT;
        if (c2 == 0) {
          c /= JAMO_T_COUNT;
          buffer.append((char) (JAMO_L_BASE + c / JAMO_V_COUNT));
          buffer.append((char) (JAMO_V_BASE + c % JAMO_V_COUNT));
        } else {
          buffer.append((char) (orig - c2))// LV syllable
          buffer.append((char) (JAMO_T_BASE + c2));
        }
      } catch (IOException e) {
        // Will not occur because we do not write to I/O.
        throw new RuntimeException(e);
      }
    }
  }

  /**
   * Writable buffer that takes care of canonical ordering. Its Appendable methods behave like the C++ implementation's appendZeroCC()
   * methods.
   * <p>
   * If dest is a StringBuilder, then the buffer writes directly to it. Otherwise, the buffer maintains a StringBuilder for intermediate
   * text segments until no further changes are necessary and whole segments are appended. append() methods that take combining-class
   * values always write to the StringBuilder. Other append() methods flush and append to the Appendable.
   */
  public static final class ReorderingBuffer implements Appendable {
    public ReorderingBuffer(final Normalizer2Impl ni, final Appendable dest, final int destCapacity) {
      impl = ni;
      app = dest;
      if (app instanceof StringBuilder) {
        appIsStringBuilder = true;
        str = (StringBuilder) dest;
        // In Java, the constructor subsumes public void init(int destCapacity) {
        str.ensureCapacity(destCapacity);
        reorderStart = 0;
        if (str.length() == 0) {
          lastCC = 0;
        } else {
          setIterator();
          lastCC = previousCC();
          // Set reorderStart after the last code point with cc<=1 if there is one.
          if (lastCC > 1) {
            while (previousCC() > 1) {
            }
          }
          reorderStart = codePointLimit;
        }
      } else {
        appIsStringBuilder = false;
        str = new StringBuilder();
        reorderStart = 0;
        lastCC = 0;
      }
    }

    public boolean isEmpty() {
      return str.length() == 0;
    }

    public int length() {
      return str.length();
    }

    public int getLastCC() {
      return lastCC;
    }

    public StringBuilder getStringBuilder() {
      return str;
    }

    public boolean equals(final CharSequence s, final int start, final int limit) {
      return UTF16Plus.equal(str, 0, str.length(), s, start, limit);
    }

    // For Hangul composition, replacing the Leading consonant Jamo with the syllable.
    public void setLastChar(final char c) {
      str.setCharAt(str.length() - 1, c);
    }

    public void append(final int c, final int cc) {
      if (lastCC <= cc || cc == 0) {
        str.appendCodePoint(c);
        lastCC = cc;
        if (cc <= 1) {
          reorderStart = str.length();
        }
      } else {
        insert(c, cc);
      }
    }

    // s must be in NFD, otherwise change the implementation.
    public void append(final CharSequence s, int start, final int limit, int leadCC, final int trailCC) {
      if (start == limit) {
        return;
      }
      if (lastCC <= leadCC || leadCC == 0) {
        if (trailCC <= 1) {
          reorderStart = str.length() + (limit - start);
        } else if (leadCC <= 1) {
          reorderStart = str.length() + 1// Ok if not a code point boundary.
        }
        str.append(s, start, limit);
        lastCC = trailCC;
      } else {
        int c = Character.codePointAt(s, start);
        start += Character.charCount(c);
        insert(c, leadCC)// insert first code point
        while (start < limit) {
          c = Character.codePointAt(s, start);
          start += Character.charCount(c);
          if (start < limit) {
            // s must be in NFD, otherwise we need to use getCC().
            leadCC = getCCFromYesOrMaybe(impl.getNorm16(c));
          } else {
            leadCC = trailCC;
          }
          append(c, leadCC);
        }
      }
    }

    // The following append() methods work like C++ appendZeroCC().
    // They assume that the cc or trailCC of their input is 0.
    // Most of them implement Appendable interface methods.
    // @Override when we switch to Java 6
    public ReorderingBuffer append(final char c) {
      str.append(c);
      lastCC = 0;
      reorderStart = str.length();
      return this;
    }

    public void appendZeroCC(final int c) {
      str.appendCodePoint(c);
      lastCC = 0;
      reorderStart = str.length();
    }

    // @Override when we switch to Java 6
    public ReorderingBuffer append(final CharSequence s) {
      if (s.length() != 0) {
        str.append(s);
        lastCC = 0;
        reorderStart = str.length();
      }
      return this;
    }

    // @Override when we switch to Java 6
    public ReorderingBuffer append(final CharSequence s, final int start, final int limit) {
      if (start != limit) {
        str.append(s, start, limit);
        lastCC = 0;
        reorderStart = str.length();
      }
      return this;
    }

    /**
     * Flushes from the intermediate StringBuilder to the Appendable, if they are different objects. Used after recomposition. Must be
     * called at the end when writing to a non-StringBuilder Appendable.
     */
    public void flush() {
      if (appIsStringBuilder) {
        reorderStart = str.length();
      } else {
        try {
          app.append(str);
          str.setLength(0);
          reorderStart = 0;
        } catch (IOException e) {
          throw new RuntimeException(e)// Avoid declaring "throws IOException".
        }
      }
      lastCC = 0;
    }

    /**
     * Flushes from the intermediate StringBuilder to the Appendable, if they are different objects. Then appends the new text to the
     * Appendable or StringBuilder. Normally used after quick check loops find a non-empty sequence.
     */
    public ReorderingBuffer flushAndAppendZeroCC(final CharSequence s, final int start, final int limit) {
      if (appIsStringBuilder) {
        str.append(s, start, limit);
        reorderStart = str.length();
      } else {
        try {
          app.append(str).append(s, start, limit);
          str.setLength(0);
          reorderStart = 0;
        } catch (IOException e) {
          throw new RuntimeException(e)// Avoid declaring "throws IOException".
        }
      }
      lastCC = 0;
      return this;
    }

    public void remove() {
      str.setLength(0);
      lastCC = 0;
      reorderStart = 0;
    }

    public void removeSuffix(final int suffixLength) {
      int oldLength = str.length();
      str.delete(oldLength - suffixLength, oldLength);
      lastCC = 0;
      reorderStart = str.length();
    }

    /*
     * TODO: Revisit whether it makes sense to track reorderStart.
     * It is set to after the last known character with cc<=1,
     * which stops previousCC() before it reads that character and looks up its cc.
     * previousCC() is normally only called from insert().
     * In other words, reorderStart speeds up the insertion of a combining mark
     * into a multi-combining mark sequence where it does not belong at the end.
     * This might not be worth the trouble.
     * On the other hand, it's not a huge amount of trouble.
     *
     * We probably need it for UNORM_SIMPLE_APPEND.
     */

    // Inserts c somewhere before the last character.
    // Requires 0<cc<lastCC which implies reorderStart<limit.
    private void insert(final int c, final int cc) {
      for (setIterator(), skipPrevious(); previousCC() > cc;) {
      }
      // insert c at codePointLimit, after the character with prevCC<=cc
      if (c <= 0xffff) {
        str.insert(codePointLimit, (char) c);
        if (cc <= 1) {
          reorderStart = codePointLimit + 1;
        }
      } else {
        str.insert(codePointLimit, Character.toChars(c));
        if (cc <= 1) {
          reorderStart = codePointLimit + 2;
        }
      }
    }

    private final Normalizer2Impl impl;
    private final Appendable app;
    private final StringBuilder str;
    private final boolean appIsStringBuilder;
    private int reorderStart;
    private int lastCC;

    // private backward iterator
    private void setIterator() {
      codePointStart = str.length();
    }

    private void skipPrevious() {  // Requires 0<codePointStart.
      codePointLimit = codePointStart;
      codePointStart = str.offsetByCodePoints(codePointStart, -1);
    }

    private int previousCC() {  // Returns 0 if there is no previous character.
      codePointLimit = codePointStart;
      if (reorderStart >= codePointStart) {
        return 0;
      }
      int c = str.codePointBefore(codePointStart);
      codePointStart -= Character.charCount(c);
      if (c < MIN_CCC_LCCC_CP) {
        return 0;
      }
      return getCCFromYesOrMaybe(impl.getNorm16(c));
    }

    private int codePointStart, codePointLimit;
  }

  // TODO: Propose as public API on the UTF16 class.
  // TODO: Propose widening UTF16 methods that take char to take int.
  // TODO: Propose widening UTF16 methods that take String to take CharSequence.
  public static final class UTF16Plus {
    /**
     * Assuming c is a surrogate code point (UTF16.isSurrogate(c)), is it a lead surrogate?
     *
     * @param c
     *            code unit or code point
     * @return true or false
     */
    public static boolean isSurrogateLead(final int c) {
      return (c & 0x400) == 0;
    }

    /**
     * Compares two CharSequence objects for binary equality.
     *
     * @param s1
     *            first sequence
     * @param s2
     *            second sequence
     * @return true if s1 contains the same text as s2
     */
    public static boolean equal(final CharSequence s1, final CharSequence s2) {
      if (s1 == s2) {
        return true;
      }
      int length = s1.length();
      if (length != s2.length()) {
        return false;
      }
      for (int i = 0; i < length; ++i) {
        if (s1.charAt(i) != s2.charAt(i)) {
          return false;
        }
      }
      return true;
    }

    /**
     * Compares two CharSequence subsequences for binary equality.
     *
     * @param s1
     *            first sequence
     * @param start1
     *            start offset in first sequence
     * @param limit1
     *            limit offset in first sequence
     * @param s2
     *            second sequence
     * @param start2
     *            start offset in second sequence
     * @param limit2
     *            limit offset in second sequence
     * @return true if s1.subSequence(start1, limit1) contains the same text as s2.subSequence(start2, limit2)
     */
    public static boolean equal(final CharSequence s1, int start1, final int limit1, final CharSequence s2, int start2, final int limit2) {
      if ((limit1 - start1) != (limit2 - start2)) {
        return false;
      }
      if (s1 == s2 && start1 == start2) {
        return true;
      }
      while (start1 < limit1) {
        if (s1.charAt(start1++) != s2.charAt(start2++)) {
          return false;
        }
      }
      return true;
    }
  }

  public Normalizer2Impl() {
  }

  private static final class IsAcceptable implements ICUBinary.Authenticate {
    // @Override when we switch to Java 6
    public boolean isDataVersionAcceptable(final byte version[]) {
      return version[0] == 2;
    }
  }

  private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
  private static final byte DATA_FORMAT[] = { 0x4e, 0x72, 0x6d, 0x32 }// "Nrm2"

  public Normalizer2Impl load(final InputStream data) {
    try {
      BufferedInputStream bis = new BufferedInputStream(data);
      dataVersion = ICUBinary.readHeaderAndDataVersion(bis, DATA_FORMAT, IS_ACCEPTABLE);
      DataInputStream ds = new DataInputStream(bis);
      int indexesLength = ds.readInt() / 4// inIndexes[IX_NORM_TRIE_OFFSET]/4
      if (indexesLength <= IX_MIN_MAYBE_YES) {
        throw new IOException("Normalizer2 data: not enough indexes");
      }
      int[] inIndexes = new int[indexesLength];
      inIndexes[0] = indexesLength * 4;
      for (int i = 1; i < indexesLength; ++i) {
        inIndexes[i] = ds.readInt();
      }

      minDecompNoCP = inIndexes[IX_MIN_DECOMP_NO_CP];
      minCompNoMaybeCP = inIndexes[IX_MIN_COMP_NO_MAYBE_CP];

      minYesNo = inIndexes[IX_MIN_YES_NO];
      minYesNoMappingsOnly = inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
      minNoNo = inIndexes[IX_MIN_NO_NO];
      limitNoNo = inIndexes[IX_LIMIT_NO_NO];
      minMaybeYes = inIndexes[IX_MIN_MAYBE_YES];

      // Read the normTrie.
      int offset = inIndexes[IX_NORM_TRIE_OFFSET];
      int nextOffset = inIndexes[IX_EXTRA_DATA_OFFSET];
      normTrie = Trie2_16.createFromSerialized(ds);
      int trieLength = normTrie.getSerializedLength();
      if (trieLength > (nextOffset - offset)) {
        throw new IOException("Normalizer2 data: not enough bytes for normTrie");
      }
      ds.skipBytes((nextOffset - offset) - trieLength)// skip padding after trie bytes

      // Read the composition and mapping data.
      offset = nextOffset;
      nextOffset = inIndexes[IX_SMALL_FCD_OFFSET];
      int numChars = (nextOffset - offset) / 2;
      char[] chars;
      if (numChars != 0) {
        chars = new char[numChars];
        for (int i = 0; i < numChars; ++i) {
          chars[i] = ds.readChar();
        }
        maybeYesCompositions = new String(chars);
        extraData = maybeYesCompositions.substring(MIN_NORMAL_MAYBE_YES - minMaybeYes);
      }

      // smallFCD: new in formatVersion 2
      offset = nextOffset;
      smallFCD = new byte[0x100];
      for (int i = 0; i < 0x100; ++i) {
        smallFCD[i] = ds.readByte();
      }

      // Build tccc180[].
      // gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300.
      tccc180 = new int[0x180];
      int bits = 0;
      for (int c = 0; c < 0x180; bits >>= 1) {
        if ((c & 0xff) == 0) {
          bits = smallFCD[c >> 8]// one byte per 0x100 code points
        }
        if ((bits & 1) != 0) {
          for (int i = 0; i < 0x20; ++i, ++c) {
            tccc180[c] = getFCD16FromNormData(c) & 0xff;
          }
        } else {
          c += 0x20;
        }
      }

      data.close();
      return this;
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  public Normalizer2Impl load(final String name) {
    return load(ICUData.getRequiredStream(name));
  }

  public void addPropertyStarts(final UnicodeSet set) {
    /* add the start code point of each same-value range of each trie */
    Iterator<Trie2.Range> trieIterator = normTrie.iterator();
    Trie2.Range range;
    while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) {
      /* add the start code point to the USet */
      set.add(range.startCodePoint);
    }

    /* add Hangul LV syllables and LV+1 because of skippables */
    for (int c = Hangul.HANGUL_BASE; c < Hangul.HANGUL_LIMIT; c += Hangul.JAMO_T_COUNT) {
      set.add(c);
      set.add(c + 1);
    }
    set.add(Hangul.HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */
  }

  public void addCanonIterPropertyStarts(final UnicodeSet set) {
    /* add the start code point of each same-value range of the canonical iterator data trie */
    ensureCanonIterData();
    // currently only used for the SEGMENT_STARTER property
    Iterator<Trie2.Range> trieIterator = canonIterData.iterator(segmentStarterMapper);
    Trie2.Range range;
    while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) {
      /* add the start code point to the USet */
      set.add(range.startCodePoint);
    }
  }

  private static final Trie2.ValueMapper segmentStarterMapper = new Trie2.ValueMapper() {
    public int map(final int in) {
      return in & CANON_NOT_SEGMENT_STARTER;
    }
  };

  // low-level properties ------------------------------------------------ ***

  public Trie2_16 getNormTrie() {
    return normTrie;
  }

  // Note: Normalizer2Impl.java r30983 (2011-nov-27)
  // still had getFCDTrie() which built and cached an FCD trie.
  // That provided faster access to FCD data than getFCD16FromNormData()
  // but required synchronization and consumed some 10kB of heap memory
  // in any process that uses FCD (e.g., via collation).
  // tccc180[] and smallFCD[] are intended to help with any loss of performance,
  // at least for Latin & CJK.

  /**
   * Builds the canonical-iterator data for this instance. This is required before any of {@link #isCanonSegmentStarter(int)} or
   * {@link #getCanonStartSet(int, UnicodeSet)} are called, or else they crash.
   *
   * @return this
   */
  public synchronized Normalizer2Impl ensureCanonIterData() {
    if (canonIterData == null) {
      Trie2Writable newData = new Trie2Writable(0, 0);
      canonStartSets = new ArrayList<UnicodeSet>();
      Iterator<Trie2.Range> trieIterator = normTrie.iterator();
      Trie2.Range range;
      while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) {
        final int norm16 = range.value;
        if (norm16 == 0 || (minYesNo <= norm16 && norm16 < minNoNo)) {
          // Inert, or 2-way mapping (including Hangul syllable).
          // We do not write a canonStartSet for any yesNo character.
          // Composites from 2-way mappings are added at runtime from the
          // starter's compositions list, and the other characters in
          // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are
          // "maybe" characters.
          continue;
        }
        for (int c = range.startCodePoint; c <= range.endCodePoint; ++c) {
          final int oldValue = newData.get(c);
          int newValue = oldValue;
          if (norm16 >= minMaybeYes) {
            // not a segment starter if it occurs in a decomposition or has cc!=0
            newValue |= CANON_NOT_SEGMENT_STARTER;
            if (norm16 < MIN_NORMAL_MAYBE_YES) {
              newValue |= CANON_HAS_COMPOSITIONS;
            }
          } else if (norm16 < minYesNo) {
            newValue |= CANON_HAS_COMPOSITIONS;
          } else {
            // c has a one-way decomposition
            int c2 = c;
            int norm16_2 = norm16;
            while (limitNoNo <= norm16_2 && norm16_2 < minMaybeYes) {
              c2 = this.mapAlgorithmic(c2, norm16_2);
              norm16_2 = getNorm16(c2);
            }
            if (minYesNo <= norm16_2 && norm16_2 < limitNoNo) {
              // c decomposes, get everything from the variable-length extra data
              int firstUnit = extraData.charAt(norm16_2);
              int length = firstUnit & MAPPING_LENGTH_MASK;
              if ((firstUnit & MAPPING_HAS_CCC_LCCC_WORD) != 0) {
                if (c == c2 && (extraData.charAt(norm16_2 - 1) & 0xff) != 0) {
                  newValue |= CANON_NOT_SEGMENT_STARTER;  // original c has cc!=0
                }
              }
              // Skip empty mappings (no characters in the decomposition).
              if (length != 0) {
                ++norm16_2;  // skip over the firstUnit
                // add c to first code point's start set
                int limit = norm16_2 + length;
                c2 = extraData.codePointAt(norm16_2);
                addToStartSet(newData, c, c2);
                // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a
                // one-way mapping. A 2-way mapping is possible here after
                // intermediate algorithmic mapping.
                if (norm16_2 >= minNoNo) {
                  while ((norm16_2 += Character.charCount(c2)) < limit) {
                    c2 = extraData.codePointAt(norm16_2);
                    int c2Value = newData.get(c2);
                    if ((c2Value & CANON_NOT_SEGMENT_STARTER) == 0) {
                      newData.set(c2, c2Value | CANON_NOT_SEGMENT_STARTER);
                    }
                  }
                }
              }
            } else {
              // c decomposed to c2 algorithmically; c has cc==0
              addToStartSet(newData, c, c2);
            }
          }
          if (newValue != oldValue) {
            newData.set(c, newValue);
          }
        }
      }
      canonIterData = newData.toTrie2_32();
    }
    return this;
  }

  public int getNorm16(final int c) {
    return normTrie.get(c);
  }

  public int getCompQuickCheck(final int norm16) {
    if (norm16 < minNoNo || MIN_YES_YES_WITH_CC <= norm16) {
      return 1// yes
    } else if (minMaybeYes <= norm16) {
      return 2// maybe
    } else {
      return 0// no
    }
  }

  public boolean isCompNo(final int norm16) {
    return minNoNo <= norm16 && norm16 < minMaybeYes;
  }

  public boolean isDecompYes(final int norm16) {
    return norm16 < minYesNo || minMaybeYes <= norm16;
  }

  public int getCC(final int norm16) {
    if (norm16 >= MIN_NORMAL_MAYBE_YES) {
      return norm16 & 0xff;
    }
    if (norm16 < minNoNo || limitNoNo <= norm16) {
      return 0;
    }
    return getCCFromNoNo(norm16);
  }

  public static int getCCFromYesOrMaybe(final int norm16) {
    return norm16 >= MIN_NORMAL_MAYBE_YES ? norm16 & 0xff : 0;
  }

  /**
   * Returns the FCD data for code point c.
   *
   * @param c
   *            A Unicode code point.
   * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
   */
  public int getFCD16(final int c) {
    if (c < 0) {
      return 0;
    } else if (c < 0x180) {
      return tccc180[c];
    } else if (c <= 0xffff) {
      if (!singleLeadMightHaveNonZeroFCD16(c)) {
        return 0;
      }
    }
    return getFCD16FromNormData(c);
  }

  /** Returns the FCD data for U+0000<=c<U+0180. */
  public int getFCD16FromBelow180(final int c) {
    return tccc180[c];
  }

  /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */
  public boolean singleLeadMightHaveNonZeroFCD16(final int lead) {
    // 0<=lead<=0xffff
    byte bits = smallFCD[lead >> 8];
    if (bits == 0) {
      return false;
    }
    return ((bits >> ((lead >> 5) & 7)) & 1) != 0;
  }

  /** Gets the FCD value from the regular normalization data. */
  public int getFCD16FromNormData(int c) {
    // Only loops for 1:1 algorithmic mappings.
    for (;;) {
      int norm16 = getNorm16(c);
      if (norm16 <= minYesNo) {
        // no decomposition or Hangul syllable, all zeros
        return 0;
      } else if (norm16 >= MIN_NORMAL_MAYBE_YES) {
        // combining mark
        norm16 &= 0xff;
        return norm16 | (norm16 << 8);
      } else if (norm16 >= minMaybeYes) {
        return 0;
      } else if (isDecompNoAlgorithmic(norm16)) {
        c = mapAlgorithmic(c, norm16);
      } else {
        // c decomposes, get everything from the variable-length extra data
        int firstUnit = extraData.charAt(norm16);
        if ((firstUnit & MAPPING_LENGTH_MASK) == 0) {
          // A character that is deleted (maps to an empty string) must
          // get the worst-case lccc and tccc values because arbitrary
          // characters on both sides will become adjacent.
          return 0x1ff;
        } else {
          int fcd16 = firstUnit >> 8// tccc
          if ((firstUnit & MAPPING_HAS_CCC_LCCC_WORD) != 0) {
            fcd16 |= extraData.charAt(norm16 - 1) & 0xff00// lccc
          }
          return fcd16;
        }
      }
    }
  }

  /**
   * Gets the decomposition for one code point.
   *
   * @param c
   *            code point
   * @return c's decomposition, if it has one; returns null if it does not have a decomposition
   */
  public String getDecomposition(int c) {
    int decomp = -1;
    int norm16;
    for (;;) {
      if (c < minDecompNoCP || isDecompYes(norm16 = getNorm16(c))) {
        // c does not decompose
      } else if (isHangul(norm16)) {
        // Hangul syllable: decompose algorithmically
        StringBuilder buffer = new StringBuilder();
        Hangul.decompose(c, buffer);
        return buffer.toString();
      } else if (isDecompNoAlgorithmic(norm16)) {
        decomp = c = mapAlgorithmic(c, norm16);
        continue;
      } else {
        // c decomposes, get everything from the variable-length extra data
        int length = extraData.charAt(norm16++) & MAPPING_LENGTH_MASK;
        return extraData.substring(norm16, norm16 + length);
      }
      if (decomp < 0) {
        return null;
      } else {
        return UTF16.valueOf(decomp);
      }
    }
  }

  /**
   * Gets the raw decomposition for one code point.
   *
   * @param c
   *            code point
   * @return c's raw decomposition, if it has one; returns null if it does not have a decomposition
   */
  public String getRawDecomposition(final int c) {
    // We do not loop in this method because an algorithmic mapping itself
    // becomes a final result rather than having to be decomposed recursively.
    int norm16;
    if (c < minDecompNoCP || isDecompYes(norm16 = getNorm16(c))) {
      // c does not decompose
      return null;
    } else if (isHangul(norm16)) {
      // Hangul syllable: decompose algorithmically
      StringBuilder buffer = new StringBuilder();
      Hangul.getRawDecomposition(c, buffer);
      return buffer.toString();
    } else if (isDecompNoAlgorithmic(norm16)) {
      return UTF16.valueOf(mapAlgorithmic(c, norm16));
    } else {
      // c decomposes, get everything from the variable-length extra data
      int firstUnit = extraData.charAt(norm16);
      int mLength = firstUnit & MAPPING_LENGTH_MASK;  // length of normal mapping
      if ((firstUnit & MAPPING_HAS_RAW_MAPPING) != 0) {
        // Read the raw mapping from before the firstUnit and before the optional ccc/lccc word.
        // Bit 7=MAPPING_HAS_CCC_LCCC_WORD
        int rawMapping = norm16 - ((firstUnit >> 7) & 1) - 1;
        char rm0 = extraData.charAt(rawMapping);
        if (rm0 <= MAPPING_LENGTH_MASK) {
          return extraData.substring(rawMapping - rm0, rawMapping);
        } else {
          // Copy the normal mapping and replace its first two code units with rm0.
          StringBuilder buffer = new StringBuilder(mLength - 1).append(rm0);
          norm16 += 1 + 2// skip over the firstUnit and the first two mapping code units
          return buffer.append(extraData, norm16, norm16 + mLength - 2).toString();
        }
      } else {
        norm16 += 1// skip over the firstUnit
        return extraData.substring(norm16, norm16 + mLength);
      }
    }
  }

  /**
   * Returns true if code point c starts a canonical-iterator string segment. <b>{@link #ensureCanonIterData()} must have been called
   * before this method, or else this method will crash.</b>
   *
   * @param c
   *            A Unicode code point.
   * @return true if c starts a canonical-iterator string segment.
   */
  public boolean isCanonSegmentStarter(final int c) {
    return canonIterData.get(c) >= 0;
  }

  /**
   * Returns true if there are characters whose decomposition starts with c. If so, then the set is cleared and then filled with those
   * characters. <b>{@link #ensureCanonIterData()} must have been called before this method, or else this method will crash.</b>
   *
   * @param c
   *            A Unicode code point.
   * @param set
   *            A UnicodeSet to receive the characters whose decompositions start with c, if there are any.
   * @return true if there are characters whose decomposition starts with c.
   */
  public boolean getCanonStartSet(final int c, final UnicodeSet set) {
    int canonValue = canonIterData.get(c) & ~CANON_NOT_SEGMENT_STARTER;
    if (canonValue == 0) {
      return false;
    }
    set.clear();
    int value = canonValue & CANON_VALUE_MASK;
    if ((canonValue & CANON_HAS_SET) != 0) {
      set.addAll(canonStartSets.get(value));
    } else if (value != 0) {
      set.add(value);
    }
    if ((canonValue & CANON_HAS_COMPOSITIONS) != 0) {
      int norm16 = getNorm16(c);
      if (norm16 == JAMO_L) {
        int syllable = Hangul.HANGUL_BASE + (c - Hangul.JAMO_L_BASE) * Hangul.JAMO_VT_COUNT;
        set.add(syllable, syllable + Hangul.JAMO_VT_COUNT - 1);
      } else {
        addComposites(getCompositionsList(norm16), set);
      }
    }
    return true;
  }

  public static final int MIN_CCC_LCCC_CP = 0x300;

  public static final int MIN_YES_YES_WITH_CC = 0xff01;
  public static final int JAMO_VT = 0xff00;
  public static final int MIN_NORMAL_MAYBE_YES = 0xfe00;
  public static final int JAMO_L = 1;
  public static final int MAX_DELTA = 0x40;

  // Byte offsets from the start of the data, after the generic header.
  public static final int IX_NORM_TRIE_OFFSET = 0;
  public static final int IX_EXTRA_DATA_OFFSET = 1;
  public static final int IX_SMALL_FCD_OFFSET = 2;
  public static final int IX_RESERVED3_OFFSET = 3;
  public static final int IX_TOTAL_SIZE = 7;

  // Code point thresholds for quick check codes.
  public static final int IX_MIN_DECOMP_NO_CP = 8;
  public static final int IX_MIN_COMP_NO_MAYBE_CP = 9;

  // Norm16 value thresholds for quick check combinations and types of extra data.
  // Mappings & compositions in [minYesNo..minYesNoMappingsOnly[.
  public static final int IX_MIN_YES_NO = 10;
  public static final int IX_MIN_NO_NO = 11;
  public static final int IX_LIMIT_NO_NO = 12;
  public static final int IX_MIN_MAYBE_YES = 13;

  // Mappings only in [minYesNoMappingsOnly..minNoNo[.
  public static final int IX_MIN_YES_NO_MAPPINGS_ONLY = 14;

  public static final int IX_COUNT = 16;

  public static final int MAPPING_HAS_CCC_LCCC_WORD = 0x80;
  public static final int MAPPING_HAS_RAW_MAPPING = 0x40;
  public static final int MAPPING_NO_COMP_BOUNDARY_AFTER = 0x20;
  public static final int MAPPING_LENGTH_MASK = 0x1f;

  public static final int COMP_1_LAST_TUPLE = 0x8000;
  public static final int COMP_1_TRIPLE = 1;
  public static final int COMP_1_TRAIL_LIMIT = 0x3400;
  public static final int COMP_1_TRAIL_MASK = 0x7ffe;
  public static final int COMP_1_TRAIL_SHIFT = 9// 10-1 for the "triple" bit
  public static final int COMP_2_TRAIL_SHIFT = 6;
  public static final int COMP_2_TRAIL_MASK = 0xffc0;

  // higher-level functionality ------------------------------------------ ***

  // Dual functionality:
  // buffer!=NULL: normalize
  // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
  public int decompose(final CharSequence s, int src, final int limit, final ReorderingBuffer buffer) {
    int minNoCP = minDecompNoCP;

    int prevSrc;
    int c = 0;
    int norm16 = 0;

    // only for quick check
    int prevBoundary = src;
    int prevCC = 0;

    for (;;) {
      // count code units below the minimum or with irrelevant data for the quick check
      for (prevSrc = src; src != limit;) {
        if ((c = s.charAt(src)) < minNoCP || isMostDecompYesAndZeroCC(norm16 = normTrie.getFromU16SingleLead((char) c))) {
          ++src;
        } else if (!UTF16.isSurrogate((char) c)) {
          break;
        } else {
          char c2;
          if (UTF16Plus.isSurrogateLead(c)) {
            if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) {
              c = Character.toCodePoint((char) c, c2);
            }
          } else /* trail surrogate */{
            if (prevSrc < src && Character.isHighSurrogate(c2 = s.charAt(src - 1))) {
              --src;
              c = Character.toCodePoint(c2, (char) c);
            }
          }
          if (isMostDecompYesAndZeroCC(norm16 = getNorm16(c))) {
            src += Character.charCount(c);
          } else {
            break;
          }
        }
      }
      // copy these code units all at once
      if (src != prevSrc) {
        if (buffer != null) {
          buffer.flushAndAppendZeroCC(s, prevSrc, src);
        } else {
          prevCC = 0;
          prevBoundary = src;
        }
      }
      if (src == limit) {
        break;
      }

      // Check one above-minimum, relevant code point.
      src += Character.charCount(c);
      if (buffer != null) {
        decompose(c, norm16, buffer);
      } else {
        if (isDecompYes(norm16)) {
          int cc = getCCFromYesOrMaybe(norm16);
          if (prevCC <= cc || cc == 0) {
            prevCC = cc;
            if (cc <= 1) {
              prevBoundary = src;
            }
            continue;
          }
        }
        return prevBoundary;  // "no" or cc out of order
      }
    }
    return src;
  }

  public void decomposeAndAppend(final CharSequence s, final boolean doDecompose, final ReorderingBuffer buffer) {
    int limit = s.length();
    if (limit == 0) {
      return;
    }
    if (doDecompose) {
      decompose(s, 0, limit, buffer);
      return;
    }
    // Just merge the strings at the boundary.
    int c = Character.codePointAt(s, 0);
    int src = 0;
    int firstCC, prevCC, cc;
    firstCC = prevCC = cc = getCC(getNorm16(c));
    while (cc != 0) {
      prevCC = cc;
      src += Character.charCount(c);
      if (src >= limit) {
        break;
      }
      c = Character.codePointAt(s, src);
      cc = getCC(getNorm16(c));
    }
    ;
    buffer.append(s, 0, src, firstCC, prevCC);
    buffer.append(s, src, limit);
  }

  // Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
  // doCompose: normalize
  // !doCompose: isNormalized (buffer must be empty and initialized)
  public boolean compose(final CharSequence s, int src, final int limit, final boolean onlyContiguous, final boolean doCompose,
      final ReorderingBuffer buffer) {
    int minNoMaybeCP = minCompNoMaybeCP;

    /*
     * prevBoundary points to the last character before the current one
     * that has a composition boundary before it with ccc==0 and quick check "yes".
     * Keeping track of prevBoundary saves us looking for a composition boundary
     * when we find a "no" or "maybe".
     *
     * When we back out from prevSrc back to prevBoundary,
     * then we also remove those same characters (which had been simply copied
     * or canonically-order-inserted) from the ReorderingBuffer.
     * Therefore, at all times, the [prevBoundary..prevSrc[ source units
     * must correspond 1:1 to destination units at the end of the destination buffer.
     */
    int prevBoundary = src;
    int prevSrc;
    int c = 0;
    int norm16 = 0;

    // only for isNormalized
    int prevCC = 0;

    for (;;) {
      // count code units below the minimum or with irrelevant data for the quick check
      for (prevSrc = src; src != limit;) {
        if ((c = s.charAt(src)) < minNoMaybeCP || isCompYesAndZeroCC(norm16 = normTrie.getFromU16SingleLead((char) c))) {
          ++src;
        } else if (!UTF16.isSurrogate((char) c)) {
          break;
        } else {
          char c2;
          if (UTF16Plus.isSurrogateLead(c)) {
            if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) {
              c = Character.toCodePoint((char) c, c2);
            }
          } else /* trail surrogate */{
            if (prevSrc < src && Character.isHighSurrogate(c2 = s.charAt(src - 1))) {
              --src;
              c = Character.toCodePoint(c2, (char) c);
            }
          }
          if (isCompYesAndZeroCC(norm16 = getNorm16(c))) {
            src += Character.charCount(c);
          } else {
            break;
          }
        }
      }
      // copy these code units all at once
      if (src != prevSrc) {
        if (src == limit) {
          if (doCompose) {
            buffer.flushAndAppendZeroCC(s, prevSrc, src);
          }
          break;
        }
        // Set prevBoundary to the last character in the quick check loop.
        prevBoundary = src - 1;
        if (Character.isLowSurrogate(s.charAt(prevBoundary)) && prevSrc < prevBoundary
            && Character.isHighSurrogate(s.charAt(prevBoundary - 1))) {
          --prevBoundary;
        }
        if (doCompose) {
          // The last "quick check yes" character is excluded from the
          // flush-and-append call in case it needs to be modified.
          buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary);
          buffer.append(s, prevBoundary, src);
        } else {
          prevCC = 0;
        }
        // The start of the current character (c).
        prevSrc = src;
      } else if (src == limit) {
        break;
      }

      src += Character.charCount(c);
      /*
       * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
       * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
       * or has ccc!=0.
       * Check for Jamo V/T, then for regular characters.
       * c is not a Hangul syllable or Jamo L because those have "yes" properties.
       */
      if (isJamoVT(norm16) && prevBoundary != prevSrc) {
        char prev = s.charAt(prevSrc - 1);
        boolean needToDecompose = false;
        if (c < Hangul.JAMO_T_BASE) {
          // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
          prev -= Hangul.JAMO_L_BASE;
          if (prev < Hangul.JAMO_L_COUNT) {
            if (!doCompose) {
              return false;
            }
            char syllable = (char) (Hangul.HANGUL_BASE + (prev * Hangul.JAMO_V_COUNT + (c - Hangul.JAMO_V_BASE))
                * Hangul.JAMO_T_COUNT);
            char t;
            if (src != limit && (t = (char) (s.charAt(src) - Hangul.JAMO_T_BASE)) < Hangul.JAMO_T_COUNT) {
              ++src;
              syllable += t;  // The next character was a Jamo T.
              prevBoundary = src;
              buffer.setLastChar(syllable);
              continue;
            }
            // If we see L+V+x where x!=T then we drop to the slow path,
            // decompose and recompose.
            // This is to deal with NFKC finding normal L and V but a
            // compatibility variant of a T. We need to either fully compose that
            // combination here (which would complicate the code and may not work
            // with strange custom data) or use the slow path -- or else our replacing
            // two input characters (L+V) with one output character (LV syllable)
            // would violate the invariant that [prevBoundary..prevSrc[ has the same
            // length as what we appended to the buffer since prevBoundary.
            needToDecompose = true;
          }
        } else if (Hangul.isHangulWithoutJamoT(prev)) {
          // c is a Jamo Trailing consonant,
          // compose with previous Hangul LV that does not contain a Jamo T.
          if (!doCompose) {
            return false;
          }
          buffer.setLastChar((char) (prev + c - Hangul.JAMO_T_BASE));
          prevBoundary = src;
          continue;
        }
        if (!needToDecompose) {
          // The Jamo V/T did not compose into a Hangul syllable.
          if (doCompose) {
            buffer.append((char) c);
          } else {
            prevCC = 0;
          }
          continue;
        }
      }
      /*
       * Source buffer pointers:
       *
       *  all done      quick check   current char  not yet
       *                "yes" but     (c)           processed
       *                may combine
       *                forward
       * [-------------[-------------[-------------[-------------[
       * |             |             |             |             |
       * orig. src     prevBoundary  prevSrc       src           limit
       *
       *
       * Destination buffer pointers inside the ReorderingBuffer:
       *
       *  all done      might take    not filled yet
       *                characters for
       *                reordering
       * [-------------[-------------[-------------[
       * |             |             |             |
       * start         reorderStart  limit         |
       *                             +remainingCap.+
       */
      if (norm16 >= MIN_YES_YES_WITH_CC) {
        int cc = norm16 & 0xff// cc!=0
        if (onlyContiguous &&  // FCC
            (doCompose ? buffer.getLastCC() : prevCC) == 0 && prevBoundary < prevSrc &&
            // buffer.getLastCC()==0 && prevBoundary<prevSrc tell us that
            // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
            // passed the quick check "yes && ccc==0" test.
            // Check whether the last character was a "yesYes" or a "yesNo".
            // If a "yesNo", then we get its trailing ccc from its
            // mapping and check for canonical order.
            // All other cases are ok.
            getTrailCCFromCompYesAndZeroCC(s, prevBoundary, prevSrc) > cc) {
          // Fails FCD test, need to decompose and contiguously recompose.
          if (!doCompose) {
            return false;
          }
        } else if (doCompose) {
          buffer.append(c, cc);
          continue;
        } else if (prevCC <= cc) {
          prevCC = cc;
          continue;
        } else {
          return false;
        }
      } else if (!doCompose && !isMaybeOrNonZeroCC(norm16)) {
        return false;
      }

      /*
       * Find appropriate boundaries around this character,
       * decompose the source text from between the boundaries,
       * and recompose it.
       *
       * We may need to remove the last few characters from the ReorderingBuffer
       * to account for source text that was copied or appended
       * but needs to take part in the recomposition.
       */

      /*
       * Find the last composition boundary in [prevBoundary..src[.
       * It is either the decomposition of the current character (at prevSrc),
       * or prevBoundary.
       */
      if (hasCompBoundaryBefore(c, norm16)) {
        prevBoundary = prevSrc;
      } else if (doCompose) {
        buffer.removeSuffix(prevSrc - prevBoundary);
      }

      // Find the next composition boundary in [src..limit[ -
      // modifies src to point to the next starter.
      src = findNextCompBoundary(s, src, limit);

      // Decompose [prevBoundary..src[ into the buffer and then recompose that part of it.
      int recomposeStartIndex = buffer.length();
      decomposeShort(s, prevBoundary, src, buffer);
      recompose(buffer, recomposeStartIndex, onlyContiguous);
      if (!doCompose) {
        if (!buffer.equals(s, prevBoundary, src)) {
          return false;
        }
        buffer.remove();
        prevCC = 0;
      }

      // Move to the next starter. We never need to look back before this point again.
      prevBoundary = src;
    }
    return true;
  }

  /**
   * Very similar to compose(): Make the same changes in both places if relevant. doSpan: spanQuickCheckYes (ignore bit 0 of the return
   * value) !doSpan: quickCheck
   *
   * @return bits 31..1: spanQuickCheckYes (==s.length() if "yes") and bit 0: set if "maybe"; otherwise, if the span length&lt;s.length()
   *         then the quick check result is "no"
   */
  public int composeQuickCheck(final CharSequence s, int src, final int limit, final boolean onlyContiguous, final boolean doSpan) {
    int qcResult = 0;
    int minNoMaybeCP = minCompNoMaybeCP;

    /*
     * prevBoundary points to the last character before the current one
     * that has a composition boundary before it with ccc==0 and quick check "yes".
     */
    int prevBoundary = src;
    int prevSrc;
    int c = 0;
    int norm16 = 0;
    int prevCC = 0;

    for (;;) {
      // count code units below the minimum or with irrelevant data for the quick check
      for (prevSrc = src;;) {
        if (src == limit) {
          return (src << 1) | qcResult;  // "yes" or "maybe"
        }
        if ((c = s.charAt(src)) < minNoMaybeCP || isCompYesAndZeroCC(norm16 = normTrie.getFromU16SingleLead((char) c))) {
          ++src;
        } else if (!UTF16.isSurrogate((char) c)) {
          break;
        } else {
          char c2;
          if (UTF16Plus.isSurrogateLead(c)) {
            if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) {
              c = Character.toCodePoint((char) c, c2);
            }
          } else /* trail surrogate */{
            if (prevSrc < src && Character.isHighSurrogate(c2 = s.charAt(src - 1))) {
              --src;
              c = Character.toCodePoint(c2, (char) c);
            }
          }
          if (isCompYesAndZeroCC(norm16 = getNorm16(c))) {
            src += Character.charCount(c);
          } else {
            break;
          }
        }
      }
      if (src != prevSrc) {
        // Set prevBoundary to the last character in the quick check loop.
        prevBoundary = src - 1;
        if (Character.isLowSurrogate(s.charAt(prevBoundary)) && prevSrc < prevBoundary
            && Character.isHighSurrogate(s.charAt(prevBoundary - 1))) {
          --prevBoundary;
        }
        prevCC = 0;
        // The start of the current character (c).
        prevSrc = src;
      }

      src += Character.charCount(c);
      /*
       * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
       * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
       * or has ccc!=0.
       */
      if (isMaybeOrNonZeroCC(norm16)) {
        int cc = getCCFromYesOrMaybe(norm16);
        if (onlyContiguous &&  // FCC
            cc != 0 && prevCC == 0 && prevBoundary < prevSrc &&
            // prevCC==0 && prevBoundary<prevSrc tell us that
            // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
            // passed the quick check "yes && ccc==0" test.
            // Check whether the last character was a "yesYes" or a "yesNo".
            // If a "yesNo", then we get its trailing ccc from its
            // mapping and check for canonical order.
            // All other cases are ok.
            getTrailCCFromCompYesAndZeroCC(s, prevBoundary, prevSrc) > cc) {
          // Fails FCD test.
        } else if (prevCC <= cc || cc == 0) {
          prevCC = cc;
          if (norm16 < MIN_YES_YES_WITH_CC) {
            if (!doSpan) {
              qcResult = 1;
            } else {
              return prevBoundary << 1// spanYes does not care to know it's "maybe"
            }
          }
          continue;
        }
      }
      return prevBoundary << 1// "no"
    }
  }

  public void composeAndAppend(final CharSequence s, final boolean doCompose, final boolean onlyContiguous, final ReorderingBuffer buffer) {
    int src = 0, limit = s.length();
    if (!buffer.isEmpty()) {
      int firstStarterInSrc = findNextCompBoundary(s, 0, limit);
      if (0 != firstStarterInSrc) {
        int lastStarterInDest = findPreviousCompBoundary(buffer.getStringBuilder(), buffer.length());
        StringBuilder middle = new StringBuilder((buffer.length() - lastStarterInDest) + firstStarterInSrc + 16);
        middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length());
        buffer.removeSuffix(buffer.length() - lastStarterInDest);
        middle.append(s, 0, firstStarterInSrc);
        compose(middle, 0, middle.length(), onlyContiguous, true, buffer);
        src = firstStarterInSrc;
      }
    }
    if (doCompose) {
      compose(s, src, limit, onlyContiguous, true, buffer);
    } else {
      buffer.append(s, src, limit);
    }
  }

  // Dual functionality:
  // buffer!=NULL: normalize
  // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
  public int makeFCD(final CharSequence s, int src, final int limit, final ReorderingBuffer buffer) {
    // Note: In this function we use buffer->appendZeroCC() because we track
    // the lead and trail combining classes here, rather than leaving it to
    // the ReorderingBuffer.
    // The exception is the call to decomposeShort() which uses the buffer
    // in the normal way.

    // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
    // Similar to the prevBoundary in the compose() implementation.
    int prevBoundary = src;
    int prevSrc;
    int c = 0;
    int prevFCD16 = 0;
    int fcd16 = 0;

    for (;;) {
      // count code units with lccc==0
      for (prevSrc = src; src != limit;) {
        if ((c = s.charAt(src)) < MIN_CCC_LCCC_CP) {
          prevFCD16 = ~c;
          ++src;
        } else if (!singleLeadMightHaveNonZeroFCD16(c)) {
          prevFCD16 = 0;
          ++src;
        } else {
          if (UTF16.isSurrogate((char) c)) {
            char c2;
            if (UTF16Plus.isSurrogateLead(c)) {
              if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) {
                c = Character.toCodePoint((char) c, c2);
              }
            } else /* trail surrogate */{
              if (prevSrc < src && Character.isHighSurrogate(c2 = s.charAt(src - 1))) {
                --src;
                c = Character.toCodePoint(c2, (char) c);
              }
            }
          }
          if ((fcd16 = getFCD16FromNormData(c)) <= 0xff) {
            prevFCD16 = fcd16;
            src += Character.charCount(c);
          } else {
            break;
          }
        }
      }
      // copy these code units all at once
      if (src != prevSrc) {
        if (src == limit) {
          if (buffer != null) {
            buffer.flushAndAppendZeroCC(s, prevSrc, src);
          }
          break;
        }
        prevBoundary = src;
        // We know that the previous character's lccc==0.
        if (prevFCD16 < 0) {
          // Fetching the fcd16 value was deferred for this below-U+0300 code point.
          int prev = ~prevFCD16;
          prevFCD16 = prev < 0x180 ? tccc180[prev] : getFCD16FromNormData(prev);
          if (prevFCD16 > 1) {
            --prevBoundary;
          }
        } else {
          int p = src - 1;
          if (Character.isLowSurrogate(s.charAt(p)) && prevSrc < p && Character.isHighSurrogate(s.charAt(p - 1))) {
            --p;
            // Need to fetch the previous character's FCD value because
            // prevFCD16 was just for the trail surrogate code point.
            prevFCD16 = getFCD16FromNormData(Character.toCodePoint(s.charAt(p), s.charAt(p + 1)));
            // Still known to have lccc==0 because its lead surrogate unit had lccc==0.
          }
          if (prevFCD16 > 1) {
            prevBoundary = p;
          }
        }
        if (buffer != null) {
          // The last lccc==0 character is excluded from the
          // flush-and-append call in case it needs to be modified.
          buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary);
          buffer.append(s, prevBoundary, src);
        }
        // The start of the current character (c).
        prevSrc = src;
      } else if (src == limit) {
        break;
      }

      src += Character.charCount(c);
      // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
      // Check for proper order, and decompose locally if necessary.
      if ((prevFCD16 & 0xff) <= (fcd16 >> 8)) {
        // proper order: prev tccc <= current lccc
        if ((fcd16 & 0xff) <= 1) {
          prevBoundary = src;
        }
        if (buffer != null) {
          buffer.appendZeroCC(c);
        }
        prevFCD16 = fcd16;
        continue;
      } else if (buffer == null) {
        return prevBoundary;  // quick check "no"
      } else {
        /*
         * Back out the part of the source that we copied or appended
         * already but is now going to be decomposed.
         * prevSrc is set to after what was copied/appended.
         */
        buffer.removeSuffix(prevSrc - prevBoundary);
        /*
         * Find the part of the source that needs to be decomposed,
         * up to the next safe boundary.
         */
        src = findNextFCDBoundary(s, src, limit);
        /*
         * The source text does not fulfill the conditions for FCD.
         * Decompose and reorder a limited piece of the text.
         */
        decomposeShort(s, prevBoundary, src, buffer);
        prevBoundary = src;
        prevFCD16 = 0;
      }
    }
    return src;
  }

  public void makeFCDAndAppend(final CharSequence s, final boolean doMakeFCD, final ReorderingBuffer buffer) {
    int src = 0, limit = s.length();
    if (!buffer.isEmpty()) {
      int firstBoundaryInSrc = findNextFCDBoundary(s, 0, limit);
      if (0 != firstBoundaryInSrc) {
        int lastBoundaryInDest = findPreviousFCDBoundary(buffer.getStringBuilder(), buffer.length());
        StringBuilder middle = new StringBuilder((buffer.length() - lastBoundaryInDest) + firstBoundaryInSrc + 16);
        middle.append(buffer.getStringBuilder(), lastBoundaryInDest, buffer.length());
        buffer.removeSuffix(buffer.length() - lastBoundaryInDest);
        middle.append(s, 0, firstBoundaryInSrc);
        makeFCD(middle, 0, middle.length(), buffer);
        src = firstBoundaryInSrc;
      }
    }
    if (doMakeFCD) {
      makeFCD(s, src, limit, buffer);
    } else {
      buffer.append(s, src, limit);
    }
  }

  // Note: hasDecompBoundary() could be implemented as aliases to
  // hasFCDBoundaryBefore() and hasFCDBoundaryAfter()
  // at the cost of building the FCD trie for a decomposition normalizer.
  public boolean hasDecompBoundary(int c, final boolean before) {
    for (;;) {
      if (c < minDecompNoCP) {
        return true;
      }
      int norm16 = getNorm16(c);
      if (isHangul(norm16) || isDecompYesAndZeroCC(norm16)) {
        return true;
      } else if (norm16 > MIN_NORMAL_MAYBE_YES) {
        return false// ccc!=0
      } else if (isDecompNoAlgorithmic(norm16)) {
        c = mapAlgorithmic(c, norm16);
      } else {
        // c decomposes, get everything from the variable-length extra data
        int firstUnit = extraData.charAt(norm16);
        if ((firstUnit & MAPPING_LENGTH_MASK) == 0) {
          return false;
        }
        if (!before) {
          // decomp after-boundary: same as hasFCDBoundaryAfter(),
          // fcd16<=1 || trailCC==0
          if (firstUnit > 0x1ff) {
            return false// trailCC>1
          }
          if (firstUnit <= 0xff) {
            return true// trailCC==0
          }
          // if(trailCC==1) test leadCC==0, same as checking for before-boundary
        }
        // true if leadCC==0 (hasFCDBoundaryBefore())
        return (firstUnit & MAPPING_HAS_CCC_LCCC_WORD) == 0 || (extraData.charAt(norm16 - 1) & 0xff00) == 0;
      }
    }
  }

  public boolean isDecompInert(final int c) {
    return isDecompYesAndZeroCC(getNorm16(c));
  }

  public boolean hasCompBoundaryBefore(final int c) {
    return c < minCompNoMaybeCP || hasCompBoundaryBefore(c, getNorm16(c));
  }

  public boolean hasCompBoundaryAfter(int c, final boolean onlyContiguous, final boolean testInert) {
    for (;;) {
      int norm16 = getNorm16(c);
      if (isInert(norm16)) {
        return true;
      } else if (norm16 <= minYesNo) {
        // Hangul: norm16==minYesNo
        // Hangul LVT has a boundary after it.
        // Hangul LV and non-inert yesYes characters combine forward.
        return isHangul(norm16) && !Hangul.isHangulWithoutJamoT((char) c);
      } else if (norm16 >= (testInert ? minNoNo : minMaybeYes)) {
        return false;
      } else if (isDecompNoAlgorithmic(norm16)) {
        c = mapAlgorithmic(c, norm16);
      } else {
        // c decomposes, get everything from the variable-length extra data.
        // If testInert, then c must be a yesNo character which has lccc=0,
        // otherwise it could be a noNo.
        int firstUnit = extraData.charAt(norm16);
        // true if
        //   not MAPPING_NO_COMP_BOUNDARY_AFTER
        //     (which is set if
        //       c is not deleted, and
        //       it and its decomposition do not combine forward, and it has a starter)
        //   and if FCC then trailCC<=1
        return (firstUnit & MAPPING_NO_COMP_BOUNDARY_AFTER) == 0 && (!onlyContiguous || firstUnit <= 0x1ff);
      }
    }
  }

  public boolean hasFCDBoundaryBefore(final int c) {
    return c < MIN_CCC_LCCC_CP || getFCD16(c) <= 0xff;
  }

  public boolean hasFCDBoundaryAfter(final int c) {
    int fcd16 = getFCD16(c);
    return fcd16 <= 1 || (fcd16 & 0xff) == 0;
  }

  public boolean isFCDInert(final int c) {
    return getFCD16(c) <= 1;
  }

  private boolean isMaybe(final int norm16) {
    return minMaybeYes <= norm16 && norm16 <= JAMO_VT;
  }

  private boolean isMaybeOrNonZeroCC(final int norm16) {
    return norm16 >= minMaybeYes;
  }

  private static boolean isInert(final int norm16) {
    return norm16 == 0;
  }

  private static boolean isJamoL(final int norm16) {
    return norm16 == 1;
  }

  private static boolean isJamoVT(final int norm16) {
    return norm16 == JAMO_VT;
  }

  private boolean isHangul(final int norm16) {
    return norm16 == minYesNo;
  }

  private boolean isCompYesAndZeroCC(final int norm16) {
    return norm16 < minNoNo;
  }

  // UBool isCompYes(uint16_t norm16) const {
  //     return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
  // }
  // UBool isCompYesOrMaybe(uint16_t norm16) const {
  //     return norm16<minNoNo || minMaybeYes<=norm16;
  // }
  // private boolean hasZeroCCFromDecompYes(int norm16) {
  //     return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
  // }
  private boolean isDecompYesAndZeroCC(final int norm16) {
    return norm16 < minYesNo || norm16 == JAMO_VT || (minMaybeYes <= norm16 && norm16 <= MIN_NORMAL_MAYBE_YES);
  }

  /**
   * A little faster and simpler than isDecompYesAndZeroCC() but does not include the MaybeYes which combine-forward and have ccc=0.
   * (Standard Unicode 5.2 normalization does not have such characters.)
   */
  private boolean isMostDecompYesAndZeroCC(final int norm16) {
    return norm16 < minYesNo || norm16 == MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
  }

  private boolean isDecompNoAlgorithmic(final int norm16) {
    return norm16 >= limitNoNo;
  }

  // For use with isCompYes().
  // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
  // static uint8_t getCCFromYes(uint16_t norm16) {
  //     return norm16>=MIN_YES_YES_WITH_CC ? (uint8_t)norm16 : 0;
  // }
  private int getCCFromNoNo(final int norm16) {
    if ((extraData.charAt(norm16) & MAPPING_HAS_CCC_LCCC_WORD) != 0) {
      return extraData.charAt(norm16 - 1) & 0xff;
    } else {
      return 0;
    }
  }

  // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC()
  int getTrailCCFromCompYesAndZeroCC(final CharSequence s, final int cpStart, final int cpLimit) {
    int c;
    if (cpStart == (cpLimit - 1)) {
      c = s.charAt(cpStart);
    } else {
      c = Character.codePointAt(s, cpStart);
    }
    int prevNorm16 = getNorm16(c);
    if (prevNorm16 <= minYesNo) {
      return 0// yesYes and Hangul LV/LVT have ccc=tccc=0
    } else {
      return extraData.charAt(prevNorm16) >> 8// tccc from yesNo
    }
  }

  // Requires algorithmic-NoNo.
  private int mapAlgorithmic(final int c, final int norm16) {
    return c + norm16 - (minMaybeYes - MAX_DELTA - 1);
  }

  // Requires minYesNo<norm16<limitNoNo.
  // private int getMapping(int norm16) { return /*extraData+*/norm16; }

  /**
   * @return index into maybeYesCompositions, or -1
   */
  private int getCompositionsListForDecompYes(int norm16) {
    if (norm16 == 0 || MIN_NORMAL_MAYBE_YES <= norm16) {
      return -1;
    } else {
      if ((norm16 -= minMaybeYes) < 0) {
        // norm16<minMaybeYes: index into extraData which is a substring at
        //     maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes]
        // same as (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16
        norm16 += MIN_NORMAL_MAYBE_YES;  // for yesYes; if Jamo L: harmless empty list
      }
      return norm16;
    }
  }

  /**
   * @return index into maybeYesCompositions
   */
  private int getCompositionsListForComposite(final int norm16) {
    // composite has both mapping & compositions list
    int firstUnit = extraData.charAt(norm16);
    return (MIN_NORMAL_MAYBE_YES - minMaybeYes) + norm16 +  // mapping in maybeYesCompositions
        1 // +1 to skip the first unit with the mapping lenth
        (firstUnit & MAPPING_LENGTH_MASK)// + mapping length
  }

  /**
   * @param c
   *            code point must have compositions
   * @return index into maybeYesCompositions
   */
  private int getCompositionsList(final int norm16) {
    return isDecompYes(norm16) ? getCompositionsListForDecompYes(norm16) : getCompositionsListForComposite(norm16);
  }

  // Decompose a short piece of text which is likely to contain characters that
  // fail the quick check loop and/or where the quick check loop's overhead
  // is unlikely to be amortized.
  // Called by the compose() and makeFCD() implementations.
  // Public in Java for collation implementation code.
  public void decomposeShort(final CharSequence s, int src, final int limit, final ReorderingBuffer buffer) {
    while (src < limit) {
      int c = Character.codePointAt(s, src);
      src += Character.charCount(c);
      decompose(c, getNorm16(c), buffer);
    }
  }

  private void decompose(int c, int norm16, final ReorderingBuffer buffer) {
    // Only loops for 1:1 algorithmic mappings.
    for (;;) {
      // get the decomposition and the lead and trail cc's
      if (isDecompYes(norm16)) {
        // c does not decompose
        buffer.append(c, getCCFromYesOrMaybe(norm16));
      } else if (isHangul(norm16)) {
        // Hangul syllable: decompose algorithmically
        Hangul.decompose(c, buffer);
      } else if (isDecompNoAlgorithmic(norm16)) {
        c = mapAlgorithmic(c, norm16);
        norm16 = getNorm16(c);
        continue;
      } else {
        // c decomposes, get everything from the variable-length extra data
        int firstUnit = extraData.charAt(norm16);
        int length = firstUnit & MAPPING_LENGTH_MASK;
        int leadCC, trailCC;
        trailCC = firstUnit >> 8;
        if ((firstUnit & MAPPING_HAS_CCC_LCCC_WORD) != 0) {
          leadCC = extraData.charAt(norm16 - 1) >> 8;
        } else {
          leadCC = 0;
        }
        ++norm16;  // skip over the firstUnit
        buffer.append(extraData, norm16, norm16 + length, leadCC, trailCC);
      }
      return;
    }
  }

  /**
   * Finds the recomposition result for a forward-combining "lead" character, specified with a pointer to its compositions list, and a
   * backward-combining "trail" character.
   *
   * <p>
   * If the lead and trail characters combine, then this function returns the following "compositeAndFwd" value:
   *
   * <pre>
   * Bits 21..1  composite character
   * Bit      0  set if the composite is a forward-combining starter
   * </pre>
   *
   * otherwise it returns -1.
   *
   * <p>
   * The compositions list has (trail, compositeAndFwd) pair entries, encoded as either pairs or triples of 16-bit units. The last entry
   * has the high bit of its first unit set.
   *
   * <p>
   * The list is sorted by ascending trail characters (there are no duplicates). A linear search is used.
   *
   * <p>
   * See normalizer2impl.h for a more detailed description of the compositions list format.
   */
  private static int combine(final String compositions, int list, final int trail) {
    int key1, firstUnit;
    if (trail < COMP_1_TRAIL_LIMIT) {
      // trail character is 0..33FF
      // result entry may have 2 or 3 units
      key1 = (trail << 1);
      while (key1 > (firstUnit = compositions.charAt(list))) {
        list += 2 + (firstUnit & COMP_1_TRIPLE);
      }
      if (key1 == (firstUnit & COMP_1_TRAIL_MASK)) {
        if ((firstUnit & COMP_1_TRIPLE) != 0) {
          return ((int) compositions.charAt(list + 1) << 16) | compositions.charAt(list + 2);
        } else {
          return compositions.charAt(list + 1);
        }
      }
    } else {
      // trail character is 3400..10FFFF
      // result entry has 3 units
      key1 = COMP_1_TRAIL_LIMIT + (((trail >> COMP_1_TRAIL_SHIFT)) & ~COMP_1_TRIPLE);
      int key2 = (trail << COMP_2_TRAIL_SHIFT) & 0xffff;
      int secondUnit;
      for (;;) {
        if (key1 > (firstUnit = compositions.charAt(list))) {
          list += 2 + (firstUnit & COMP_1_TRIPLE);
        } else if (key1 == (firstUnit & COMP_1_TRAIL_MASK)) {
          if (key2 > (secondUnit = compositions.charAt(list + 1))) {
            if ((firstUnit & COMP_1_LAST_TUPLE) != 0) {
              break;
            } else {
              list += 3;
            }
          } else if (key2 == (secondUnit & COMP_2_TRAIL_MASK)) {
            return ((secondUnit & ~COMP_2_TRAIL_MASK) << 16) | compositions.charAt(list + 2);
          } else {
            break;
          }
        } else {
          break;
        }
      }
    }
    return -1;
  }

  /**
   * @param list
   *            some character's compositions list
   * @param set
   *            recursively receives the composites from these compositions
   */
  private void addComposites(int list, final UnicodeSet set) {
    int firstUnit, compositeAndFwd;
    do {
      firstUnit = maybeYesCompositions.charAt(list);
      if ((firstUnit & COMP_1_TRIPLE) == 0) {
        compositeAndFwd = maybeYesCompositions.charAt(list + 1);
        list += 2;
      } else {
        compositeAndFwd = (((int) maybeYesCompositions.charAt(list + 1) & ~COMP_2_TRAIL_MASK) << 16)
            | maybeYesCompositions.charAt(list + 2);
        list += 3;
      }
      int composite = compositeAndFwd >> 1;
      if ((compositeAndFwd & 1) != 0) {
        addComposites(getCompositionsListForComposite(getNorm16(composite)), set);
      }
      set.add(composite);
    } while ((firstUnit & COMP_1_LAST_TUPLE) == 0);
  }

  /*
   * Recomposes the buffer text starting at recomposeStartIndex
   * (which is in NFD - decomposed and canonically ordered),
   * and truncates the buffer contents.
   *
   * Note that recomposition never lengthens the text:
   * Any character consists of either one or two code units;
   * a composition may contain at most one more code unit than the original starter,
   * while the combining mark that is removed has at least one code unit.
   */
  private void recompose(final ReorderingBuffer buffer, final int recomposeStartIndex, final boolean onlyContiguous) {
    StringBuilder sb = buffer.getStringBuilder();
    int p = recomposeStartIndex;
    if (p == sb.length()) {
      return;
    }

    int starter, pRemove;
    int compositionsList;
    int c, compositeAndFwd;
    int norm16;
    int cc, prevCC;
    boolean starterIsSupplementary;

    // Some of the following variables are not used until we have a forward-combining starter
    // and are only initialized now to avoid compiler warnings.
    compositionsList = -1// used as indicator for whether we have a forward-combining starter
    starter = -1;
    starterIsSupplementary = false;
    prevCC = 0;

    for (;;) {
      c = sb.codePointAt(p);
      p += Character.charCount(c);
      norm16 = getNorm16(c);
      cc = getCCFromYesOrMaybe(norm16);
      if ( // this character combines backward and
      isMaybe(norm16) &&
      // we have seen a starter that combines forward and
          compositionsList >= 0 &&
          // the backward-combining character is not blocked
          (prevCC < cc || prevCC == 0)) {
        if (isJamoVT(norm16)) {
          // c is a Jamo V/T, see if we can compose it with the previous character.
          if (c < Hangul.JAMO_T_BASE) {
            // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
            char prev = (char) (sb.charAt(starter) - Hangul.JAMO_L_BASE);
            if (prev < Hangul.JAMO_L_COUNT) {
              pRemove = p - 1;
              char syllable = (char) (Hangul.HANGUL_BASE + (prev * Hangul.JAMO_V_COUNT + (c - Hangul.JAMO_V_BASE))
                  * Hangul.JAMO_T_COUNT);
              char t;
              if (p != sb.length() && (t = (char) (sb.charAt(p) - Hangul.JAMO_T_BASE)) < Hangul.JAMO_T_COUNT) {
                ++p;
                syllable += t;  // The next character was a Jamo T.
              }
              sb.setCharAt(starter, syllable);
              // remove the Jamo V/T
              sb.delete(pRemove, p);
              p = pRemove;
            }
          }
          /*
           * No "else" for Jamo T:
           * Since the input is in NFD, there are no Hangul LV syllables that
           * a Jamo T could combine with.
           * All Jamo Ts are combined above when handling Jamo Vs.
           */
          if (p == sb.length()) {
            break;
          }
          compositionsList = -1;
          continue;
        } else if ((compositeAndFwd = combine(maybeYesCompositions, compositionsList, c)) >= 0) {
          // The starter and the combining mark (c) do combine.
          int composite = compositeAndFwd >> 1;

          // Remove the combining mark.
          pRemove = p - Character.charCount(c)// pRemove & p: start & limit of the combining mark
          sb.delete(pRemove, p);
          p = pRemove;
          // Replace the starter with the composite.
          if (starterIsSupplementary) {
            if (composite > 0xffff) {
              // both are supplementary
              sb.setCharAt(starter, UTF16.getLeadSurrogate(composite));
              sb.setCharAt(starter + 1, UTF16.getTrailSurrogate(composite));
            } else {
              sb.setCharAt(starter, (char) c);
              sb.deleteCharAt(starter + 1);
              // The composite is shorter than the starter,
              // move the intermediate characters forward one.
              starterIsSupplementary = false;
              --p;
            }
          } else if (composite > 0xffff) {
            // The composite is longer than the starter,
            // move the intermediate characters back one.
            starterIsSupplementary = true;
            sb.setCharAt(starter, UTF16.getLeadSurrogate(composite));
            sb.insert(starter + 1, UTF16.getTrailSurrogate(composite));
            ++p;
          } else {
            // both are on the BMP
            sb.setCharAt(starter, (char) composite);
          }

          // Keep prevCC because we removed the combining mark.

          if (p == sb.length()) {
            break;
          }
          // Is the composite a starter that combines forward?
          if ((compositeAndFwd & 1) != 0) {
            compositionsList = getCompositionsListForComposite(getNorm16(composite));
          } else {
            compositionsList = -1;
          }

          // We combined; continue with looking for compositions.
          continue;
        }
      }

      // no combination this time
      prevCC = cc;
      if (p == sb.length()) {
        break;
      }

      // If c did not combine, then check if it is a starter.
      if (cc == 0) {
        // Found a new starter.
        if ((compositionsList = getCompositionsListForDecompYes(norm16)) >= 0) {
          // It may combine with something, prepare for it.
          if (c <= 0xffff) {
            starterIsSupplementary = false;
            starter = p - 1;
          } else {
            starterIsSupplementary = true;
            starter = p - 2;
          }
        }
      } else if (onlyContiguous) {
        // FCC: no discontiguous compositions; any intervening character blocks.
        compositionsList = -1;
      }
    }
    buffer.flush();
  }

  public int composePair(final int a, int b) {
    int norm16 = getNorm16(a)// maps an out-of-range 'a' to inert norm16=0
    int list;
    if (isInert(norm16)) {
      return -1;
    } else if (norm16 < minYesNoMappingsOnly) {
      if (isJamoL(norm16)) {
        b -= Hangul.JAMO_V_BASE;
        if (0 <= b && b < Hangul.JAMO_V_COUNT) {
          return (Hangul.HANGUL_BASE + ((a - Hangul.JAMO_L_BASE) * Hangul.JAMO_V_COUNT + b) * Hangul.JAMO_T_COUNT);
        } else {
          return -1;
        }
      } else if (isHangul(norm16)) {
        b -= Hangul.JAMO_T_BASE;
        if (Hangul.isHangulWithoutJamoT((char) a) && 0 < b && b < Hangul.JAMO_T_COUNT) {  // not b==0!
          return a + b;
        } else {
          return -1;
        }
      } else {
        // 'a' has a compositions list in extraData
        list = norm16;
        if (norm16 > minYesNo) {  // composite 'a' has both mapping & compositions list
          list +=  // mapping pointer
          1 // +1 to skip the first unit with the mapping lenth
          (extraData.charAt(list) & MAPPING_LENGTH_MASK)// + mapping length
        }
        // Turn the offset-into-extraData into an offset-into-maybeYesCompositions.
        list += MIN_NORMAL_MAYBE_YES - minMaybeYes;
      }
    } else if (norm16 < minMaybeYes || MIN_NORMAL_MAYBE_YES <= norm16) {
      return -1;
    } else {
      list = norm16 - minMaybeYes;  // offset into maybeYesCompositions
    }
    if (b < 0 || 0x10ffff < b) {  // combine(list, b) requires a valid code point b
      return -1;
    }
    return combine(maybeYesCompositions, list, b) >> 1;
  }

  /**
   * Does c have a composition boundary before it? True if its decomposition begins with a character that has ccc=0 && NFC_QC=Yes
   * (isCompYesAndZeroCC()). As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()) so we need not
   * decompose.
   */
  private boolean hasCompBoundaryBefore(int c, int norm16) {
    for (;;) {
      if (isCompYesAndZeroCC(norm16)) {
        return true;
      } else if (isMaybeOrNonZeroCC(norm16)) {
        return false;
      } else if (isDecompNoAlgorithmic(norm16)) {
        c = mapAlgorithmic(c, norm16);
        norm16 = getNorm16(c);
      } else {
        // c decomposes, get everything from the variable-length extra data
        int firstUnit = extraData.charAt(norm16);
        if ((firstUnit & MAPPING_LENGTH_MASK) == 0) {
          return false;
        }
        if ((firstUnit & MAPPING_HAS_CCC_LCCC_WORD) != 0 && (extraData.charAt(norm16 - 1) & 0xff00) != 0) {
          return false// non-zero leadCC
        }
        return isCompYesAndZeroCC(getNorm16(Character.codePointAt(extraData, norm16 + 1)));
      }
    }
  }

  private int findPreviousCompBoundary(final CharSequence s, int p) {
    while (p > 0) {
      int c = Character.codePointBefore(s, p);
      p -= Character.charCount(c);
      if (hasCompBoundaryBefore(c)) {
        break;
      }
      // We could also test hasCompBoundaryAfter() and return iter.codePointLimit,
      // but that's probably not worth the extra cost.
    }
    return p;
  }

  private int findNextCompBoundary(final CharSequence s, int p, final int limit) {
    while (p < limit) {
      int c = Character.codePointAt(s, p);
      int norm16 = normTrie.get(c);
      if (hasCompBoundaryBefore(c, norm16)) {
        break;
      }
      p += Character.charCount(c);
    }
    return p;
  }

  private int findPreviousFCDBoundary(final CharSequence s, int p) {
    while (p > 0) {
      int c = Character.codePointBefore(s, p);
      p -= Character.charCount(c);
      if (c < MIN_CCC_LCCC_CP || getFCD16(c) <= 0xff) {
        break;
      }
    }
    return p;
  }

  private int findNextFCDBoundary(final CharSequence s, int p, final int limit) {
    while (p < limit) {
      int c = Character.codePointAt(s, p);
      if (c < MIN_CCC_LCCC_CP || getFCD16(c) <= 0xff) {
        break;
      }
      p += Character.charCount(c);
    }
    return p;
  }

  private void addToStartSet(final Trie2Writable newData, final int origin, final int decompLead) {
    int canonValue = newData.get(decompLead);
    if ((canonValue & (CANON_HAS_SET | CANON_VALUE_MASK)) == 0 && origin != 0) {
      // origin is the first character whose decomposition starts with
      // the character for which we are setting the value.
      newData.set(decompLead, canonValue | origin);
    } else {
      // origin is not the first character, or it is U+0000.
      UnicodeSet set;
      if ((canonValue & CANON_HAS_SET) == 0) {
        int firstOrigin = canonValue & CANON_VALUE_MASK;
        canonValue = (canonValue & ~CANON_VALUE_MASK) | CANON_HAS_SET | canonStartSets.size();
        newData.set(decompLead, canonValue);
        canonStartSets.add(set = new UnicodeSet());
        if (firstOrigin != 0) {
          set.add(firstOrigin);
        }
      } else {
        set = canonStartSets.get(canonValue & CANON_VALUE_MASK);
      }
      set.add(origin);
    }
  }

  private VersionInfo dataVersion;

  // Code point thresholds for quick check codes.
  private int minDecompNoCP;
  private int minCompNoMaybeCP;

  // Norm16 value thresholds for quick check combinations and types of extra data.
  private int minYesNo;
  private int minYesNoMappingsOnly;
  private int minNoNo;
  private int limitNoNo;
  private int minMaybeYes;

  private Trie2_16 normTrie;
  private String maybeYesCompositions;
  private String extraData;  // mappings and/or compositions for yesYes, yesNo & noNo characters
  private byte[] smallFCD;  // [0x100] one bit per 32 BMP code points, set if any FCD!=0
  private int[] tccc180;  // [0x180] tccc values for U+0000..U+017F

  private Trie2_32 canonIterData;
  private ArrayList<UnicodeSet> canonStartSets;

  // bits in canonIterData
  private static final int CANON_NOT_SEGMENT_STARTER = 0x80000000;
  private static final int CANON_HAS_COMPOSITIONS = 0x40000000;
  private static final int CANON_HAS_SET = 0x200000;
  private static final int CANON_VALUE_MASK = 0x1fffff;
}
TOP

Related Classes of com.ibm.icu.impl.Normalizer2Impl$IsAcceptable

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.