Package com.ibm.icu.impl

Source Code of com.ibm.icu.impl.CharTrie

/*
******************************************************************************
* Copyright (C) 1996-2011, International Business Machines Corporation and   *
* others. All Rights Reserved.                                               *
******************************************************************************
*/

package com.ibm.icu.impl;

import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;

import com.ibm.icu.text.UTF16;

/**
* Trie implementation which stores data in char, 16 bits.
*
* @author synwee
* @see com.ibm.icu.impl.Trie
* @since release 2.1, Jan 01 2002
*/

// note that i need to handle the block calculations later, since chartrie
// in icu4c uses the same index array.
public class CharTrie extends Trie {
  // public constructors ---------------------------------------------

  /**
   * <p>
   * Creates a new Trie with the settings for the trie data.
   * </p>
   * <p>
   * Unserialize the 32-bit-aligned input stream and use the data for the trie.
   * </p>
   *
   * @param inputStream
   *            file input stream to a ICU data file, containing the trie
   * @param dataManipulate
   *            object which provides methods to parse the char data
   * @throws IOException
   *             thrown when data reading fails
   */
  public CharTrie(InputStream inputStream, DataManipulate dataManipulate) throws IOException {
    super(inputStream, dataManipulate);

    if (!isCharTrie()) {
      throw new IllegalArgumentException("Data given does not belong to a char trie.");
    }
  }

  /**
   * Make a dummy CharTrie. A dummy trie is an empty runtime trie, used when a real data trie cannot be loaded.
   *
   * The trie always returns the initialValue, or the leadUnitValue for lead surrogate code points. The Latin-1 part is always set up to
   * be linear.
   *
   * @param initialValue
   *            the initial value that is set for all code points
   * @param leadUnitValue
   *            the value for lead surrogate code _units_ that do not have associated supplementary data
   * @param dataManipulate
   *            object which provides methods to parse the char data
   */
  @SuppressWarnings("all")
  // No way to ignore dead code warning specifically - see eclipse bug#282770
  public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
    super(new char[BMP_INDEX_LENGTH + SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);

    int dataLength, latin1Length, i, limit;
    char block;

    /* calculate the actual size of the dummy trie data */

    /* max(Latin-1, block 0) */
    dataLength = latin1Length = INDEX_STAGE_1_SHIFT_ <= 8 ? 256 : DATA_BLOCK_LENGTH;
    if (leadUnitValue != initialValue) {
      dataLength += DATA_BLOCK_LENGTH;
    }
    m_data_ = new char[dataLength];
    m_dataLength_ = dataLength;

    m_initialValue_ = (char) initialValue;

    /* fill the index and data arrays */

    /* indexes are preset to 0 (block 0) */

    /* Latin-1 data */
    for (i = 0; i < latin1Length; ++i) {
      m_data_[i] = (char) initialValue;
    }

    if (leadUnitValue != initialValue) {
      /* indexes for lead surrogate code units to the block after Latin-1 */
      block = (char) (latin1Length >> INDEX_STAGE_2_SHIFT_);
      i = 0xd800 >> INDEX_STAGE_1_SHIFT_;
      limit = 0xdc00 >> INDEX_STAGE_1_SHIFT_;
      for (; i < limit; ++i) {
        m_index_[i] = block;
      }

      /* data for lead surrogate code units */
      limit = latin1Length + DATA_BLOCK_LENGTH;
      for (i = latin1Length; i < limit; ++i) {
        m_data_[i] = (char) leadUnitValue;
      }
    }
  }

  // public methods --------------------------------------------------

  /**
   * Gets the value associated with the codepoint. If no value is associated with the codepoint, a default value will be returned.
   *
   * @param ch
   *            codepoint
   * @return offset to data
   */
  public final char getCodePointValue(int ch) {
    int offset;

    // fastpath for U+0000..U+D7FF
    if (0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
      // copy of getRawOffset()
      offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) + (ch & INDEX_STAGE_3_MASK_);
      return m_data_[offset];
    }

    // handle U+D800..U+10FFFF
    offset = getCodePointOffset(ch);

    // return -1 if there is an error, in this case we return the default
    // value: m_initialValue_
    return (offset >= 0) ? m_data_[offset] : m_initialValue_;
  }

  /**
   * Gets the value to the data which this lead surrogate character points to. Returned data may contain folding offset information for
   * the next trailing surrogate character. This method does not guarantee correct results for trail surrogates.
   *
   * @param ch
   *            lead surrogate character
   * @return data value
   */
  public final char getLeadValue(char ch) {
    return m_data_[getLeadOffset(ch)];
  }

  /**
   * Get the value associated with the BMP code point. Lead surrogate code points are treated as normal code points, with unfolded values
   * that may differ from getLeadValue() results.
   *
   * @param ch
   *            the input BMP code point
   * @return trie data value associated with the BMP codepoint
   */
  public final char getBMPValue(char ch) {
    return m_data_[getBMPOffset(ch)];
  }

  /**
   * Get the value associated with a pair of surrogates.
   *
   * @param lead
   *            a lead surrogate
   * @param trail
   *            a trail surrogate
   */
  public final char getSurrogateValue(char lead, char trail) {
    int offset = getSurrogateOffset(lead, trail);
    if (offset > 0) {
      return m_data_[offset];
    }
    return m_initialValue_;
  }

  /**
   * <p>
   * Get a value from a folding offset (from the value of a lead surrogate) and a trail surrogate.
   * </p>
   * <p>
   * If the
   *
   * @param leadvalue
   *            value associated with the lead surrogate which contains the folding offset
   * @param trail
   *            surrogate
   * @return trie data value associated with the trail character
   */
  public final char getTrailValue(int leadvalue, char trail) {
    if (m_dataManipulate_ == null) {
      throw new NullPointerException("The field DataManipulate in this Trie is null");
    }
    int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
    if (offset > 0) {
      return m_data_[getRawOffset(offset, (char) (trail & SURROGATE_MASK_))];
    }
    return m_initialValue_;
  }

  /**
   * <p>
   * Gets the latin 1 fast path value.
   * </p>
   * <p>
   * Note this only works if latin 1 characters have their own linear array.
   * </p>
   *
   * @param ch
   *            latin 1 characters
   * @return value associated with latin character
   */
  public final char getLatin1LinearValue(char ch) {
    return m_data_[INDEX_STAGE_3_MASK_ + 1 + m_dataOffset_ + ch];
  }

  /**
   * Checks if the argument Trie has the same data as this Trie
   *
   * @param other
   *            Trie to check
   * @return true if the argument Trie has the same data as this Trie, false otherwise
   */
  ///CLOVER:OFF
  @Override
  public boolean equals(Object other) {
    boolean result = super.equals(other);
    if (result && other instanceof CharTrie) {
      CharTrie othertrie = (CharTrie) other;
      return m_initialValue_ == othertrie.m_initialValue_;
    }
    return false;
  }

  @Override
  public int hashCode() {
    assert false : "hashCode not designed";
    return 42;
  }

  ///CLOVER:ON

  // protected methods -----------------------------------------------

  /**
   * <p>
   * Parses the input stream and stores its trie content into a index and data array
   * </p>
   *
   * @param inputStream
   *            data input stream containing trie data
   * @exception IOException
   *                thrown when data reading fails
   */
  @Override
  protected final void unserialize(InputStream inputStream) throws IOException {
    DataInputStream input = new DataInputStream(inputStream);
    int indexDataLength = m_dataOffset_ + m_dataLength_;
    m_index_ = new char[indexDataLength];
    for (int i = 0; i < indexDataLength; i++) {
      m_index_[i] = input.readChar();
    }
    m_data_ = m_index_;
    m_initialValue_ = m_data_[m_dataOffset_];
  }

  /**
   * Gets the offset to the data which the surrogate pair points to.
   *
   * @param lead
   *            lead surrogate
   * @param trail
   *            trailing surrogate
   * @return offset to data
   */
  @Override
  protected final int getSurrogateOffset(char lead, char trail) {
    if (m_dataManipulate_ == null) {
      throw new NullPointerException("The field DataManipulate in this Trie is null");
    }

    // get fold position for the next trail surrogate
    int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));

    // get the real data from the folded lead/trail units
    if (offset > 0) {
      return getRawOffset(offset, (char) (trail & SURROGATE_MASK_));
    }

    // return -1 if there is an error, in this case we return the default
    // value: m_initialValue_
    return -1;
  }

  /**
   * Gets the value at the argument index. For use internally in TrieIterator.
   *
   * @param index
   *            value at index will be retrieved
   * @return 32 bit value
   * @see com.ibm.icu.impl.TrieIterator
   */
  @Override
  protected final int getValue(int index) {
    return m_data_[index];
  }

  /**
   * Gets the default initial value
   *
   * @return 32 bit value
   */
  @Override
  protected final int getInitialValue() {
    return m_initialValue_;
  }

  // private data members --------------------------------------------

  /**
   * Default value
   */
  private char m_initialValue_;
  /**
   * Array of char data
   */
  private char m_data_[];
}
TOP

Related Classes of com.ibm.icu.impl.CharTrie

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.