Package jflex.unicode

Source Code of jflex.unicode.UnicodeProperties$UnsupportedUnicodeVersionException

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* JFlex Unicode Properties                                                *
* Copyright (c) 2008-2103  Steve Rowe <sarowe@gmail.com>                  *
* All rights reserved.                                                    *
*                                                                         *
* License: BSD                                                            *
*                                                                         *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

package jflex.unicode;

import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jflex.IntCharSet;
import jflex.Interval;
import jflex.unicode.data.*;



/**
* This class was automatically generated by jflex-unicode-maven-plugin based
* on data files downloaded from unicode.org.
*/
public class UnicodeProperties {

  public static final String UNICODE_VERSIONS =
    "1.1, 1.1.5, 2, 2.0, 2.0.14, 2.1, 2.1.9, 3, 3.0, 3.0.1, 3.1, 3.1.0, 3.2, 3.2.0, 4, 4.0, 4.0.1, 4.1, 4.1.0, 5, 5.0, 5.0.0, 5.1, 5.1.0, 5.2, 5.2.0, 6, 6.0, 6.0.0, 6.1, 6.1.0, 6.2, 6.2.0, 6.3, 6.3.0";
  private static final String DEFAULT_UNICODE_VERSION =
    "6.3";
  private static final Pattern WORD_SEP_PATTERN = Pattern.compile("[-_\\s()]");

  private int maximumCodePoint;
  private Map<String,IntCharSet> propertyValueIntervals
    = new HashMap<String,IntCharSet>();
  private String caselessMatchPartitions;
  private int caselessMatchPartitionSize;
  private IntCharSet caselessMatches[];

  /**
   * Unpacks the Unicode data corresponding to the default Unicode version:
   * "{@value #DEFAULT_UNICODE_VERSION}".
   *
   * @throws UnsupportedUnicodeVersionException if the default version is not
   *  supported.
   */
  public UnicodeProperties() throws UnsupportedUnicodeVersionException {
    init(DEFAULT_UNICODE_VERSION);
  }

  /**
   * Unpacks the Unicode data corresponding to the given version.
   *
   * @param version The Unicode version for which to unpack data
   * @throws UnsupportedUnicodeVersionException if the given version is not
   *  supported.
   */
  public UnicodeProperties(String version)
    throws UnsupportedUnicodeVersionException {
    init(version);
  }

  /**
   * Returns the maximum code point for the selected Unicode version.
   *
   * @return the maximum code point for the selected Unicode version.
   */
  public int getMaximumCodePoint() {
    return maximumCodePoint;
  }

  /**
   * Returns the character interval set associated with the given property value
   * for the selected Unicode version.
   *
   * @param propertyValue The Unicode property or property value (or alias for
   *  one of these) for which to return the corresponding character intervals.
   * @return The character interval set corresponding to the given property
   *  value, if a match exists, and null otherwise.
   */
  public IntCharSet getIntCharSet(String propertyValue) {
    return propertyValueIntervals.get(normalize(propertyValue));
  }

  /**
   * Returns the set of all properties, property values, and their aliases
   * supported by the specified Unicode version.
   *
   * @return The set of all properties supported by the specified Unicode
   *  version
   */
  public Set<String> getPropertyValues() {
    return propertyValueIntervals.keySet();
  }

  /**
   * Returns a set of character intervals representing all characters
   * that are case-insensitively equivalent to the given character,
   * including the given character itself.
   * <p/>
   * The first call to this method lazily initializes the backing data.
   *
   * @param c The character for which to return case-insensitive equivalents.
   * @return All case-insensitively equivalent characters, or null
   *  if the given character is case-insensitively equivalent only to itself.
   */
  public IntCharSet getCaselessMatches(char c) {
    if (null == caselessMatches)
      initCaselessMatches();
    return caselessMatches[c];
  }

  /**
   * Unpacks the caseless match data. Called from
   * {@link #getCaselessMatches(char)} to lazily initialize.
   */
  private void initCaselessMatches() {
    caselessMatches = new IntCharSet[maximumCodePoint + 1];
    int[] members = new int[caselessMatchPartitionSize];
    for (int index = 0 ; index < caselessMatchPartitions.length() ; ) {
      IntCharSet partition = new IntCharSet();
      for (int n = 0 ; n < caselessMatchPartitionSize ; ++n) {
        int c = caselessMatchPartitions.codePointAt(index);
        index += (c <= 0xFFFF ? 1 : 2);
        members[n] = c;
        //TODO: Remove BMP boundary condition
        if (c > 0 && c <= 0xFFFF)
          //TODO: Change the character type from char to int
          partition.add((char)c);
      }
      if (partition.containsElements()) {
        for (int n = 0 ; n < caselessMatchPartitionSize ; ++n) {
          if (members[n] > 0)
            caselessMatches[members[n]] = partition;
        }
      }
    }
  }

  /**
   * Based on the given version, selects and binds the corresponding Unicode
   * data to facilitate mappings from property values to character intervals.
   *
   * @param version The Unicode version for which to bind data
   * @throws UnsupportedUnicodeVersionException if the given version is not
   *  supported.
   */
  private void init(String version) throws UnsupportedUnicodeVersionException {

    if (version.equals("1.1") || version.equals("1.1.5")) {
      bind(Unicode_1_1.propertyValues, Unicode_1_1.intervals, Unicode_1_1.propertyValueAliases,
         Unicode_1_1.maximumCodePoint, Unicode_1_1.caselessMatchPartitions, Unicode_1_1.caselessMatchPartitionSize);
    } else if (version.equals("2") || version.equals("2.0") || version.equals("2.0.14")) {
      bind(Unicode_2_0.propertyValues, Unicode_2_0.intervals, Unicode_2_0.propertyValueAliases,
         Unicode_2_0.maximumCodePoint, Unicode_2_0.caselessMatchPartitions, Unicode_2_0.caselessMatchPartitionSize);
    } else if (version.equals("2.1") || version.equals("2.1.9")) {
      bind(Unicode_2_1.propertyValues, Unicode_2_1.intervals, Unicode_2_1.propertyValueAliases,
         Unicode_2_1.maximumCodePoint, Unicode_2_1.caselessMatchPartitions, Unicode_2_1.caselessMatchPartitionSize);
    } else if (version.equals("3") || version.equals("3.0") || version.equals("3.0.1")) {
      bind(Unicode_3_0.propertyValues, Unicode_3_0.intervals, Unicode_3_0.propertyValueAliases,
         Unicode_3_0.maximumCodePoint, Unicode_3_0.caselessMatchPartitions, Unicode_3_0.caselessMatchPartitionSize);
    } else if (version.equals("3.1") || version.equals("3.1.0")) {
      bind(Unicode_3_1.propertyValues, Unicode_3_1.intervals, Unicode_3_1.propertyValueAliases,
         Unicode_3_1.maximumCodePoint, Unicode_3_1.caselessMatchPartitions, Unicode_3_1.caselessMatchPartitionSize);
    } else if (version.equals("3.2") || version.equals("3.2.0")) {
      bind(Unicode_3_2.propertyValues, Unicode_3_2.intervals, Unicode_3_2.propertyValueAliases,
         Unicode_3_2.maximumCodePoint, Unicode_3_2.caselessMatchPartitions, Unicode_3_2.caselessMatchPartitionSize);
    } else if (version.equals("4") || version.equals("4.0") || version.equals("4.0.1")) {
      bind(Unicode_4_0.propertyValues, Unicode_4_0.intervals, Unicode_4_0.propertyValueAliases,
         Unicode_4_0.maximumCodePoint, Unicode_4_0.caselessMatchPartitions, Unicode_4_0.caselessMatchPartitionSize);
    } else if (version.equals("4.1") || version.equals("4.1.0")) {
      bind(Unicode_4_1.propertyValues, Unicode_4_1.intervals, Unicode_4_1.propertyValueAliases,
         Unicode_4_1.maximumCodePoint, Unicode_4_1.caselessMatchPartitions, Unicode_4_1.caselessMatchPartitionSize);
    } else if (version.equals("5") || version.equals("5.0") || version.equals("5.0.0")) {
      bind(Unicode_5_0.propertyValues, Unicode_5_0.intervals, Unicode_5_0.propertyValueAliases,
         Unicode_5_0.maximumCodePoint, Unicode_5_0.caselessMatchPartitions, Unicode_5_0.caselessMatchPartitionSize);
    } else if (version.equals("5.1") || version.equals("5.1.0")) {
      bind(Unicode_5_1.propertyValues, Unicode_5_1.intervals, Unicode_5_1.propertyValueAliases,
         Unicode_5_1.maximumCodePoint, Unicode_5_1.caselessMatchPartitions, Unicode_5_1.caselessMatchPartitionSize);
    } else if (version.equals("5.2") || version.equals("5.2.0")) {
      bind(Unicode_5_2.propertyValues, Unicode_5_2.intervals, Unicode_5_2.propertyValueAliases,
         Unicode_5_2.maximumCodePoint, Unicode_5_2.caselessMatchPartitions, Unicode_5_2.caselessMatchPartitionSize);
    } else if (version.equals("6") || version.equals("6.0") || version.equals("6.0.0")) {
      bind(Unicode_6_0.propertyValues, Unicode_6_0.intervals, Unicode_6_0.propertyValueAliases,
         Unicode_6_0.maximumCodePoint, Unicode_6_0.caselessMatchPartitions, Unicode_6_0.caselessMatchPartitionSize);
    } else if (version.equals("6.1") || version.equals("6.1.0")) {
      bind(Unicode_6_1.propertyValues, Unicode_6_1.intervals, Unicode_6_1.propertyValueAliases,
         Unicode_6_1.maximumCodePoint, Unicode_6_1.caselessMatchPartitions, Unicode_6_1.caselessMatchPartitionSize);
    } else if (version.equals("6.2") || version.equals("6.2.0")) {
      bind(Unicode_6_2.propertyValues, Unicode_6_2.intervals, Unicode_6_2.propertyValueAliases,
         Unicode_6_2.maximumCodePoint, Unicode_6_2.caselessMatchPartitions, Unicode_6_2.caselessMatchPartitionSize);
    } else if (version.equals("6.3") || version.equals("6.3.0")) {
      bind(Unicode_6_3.propertyValues, Unicode_6_3.intervals, Unicode_6_3.propertyValueAliases,
         Unicode_6_3.maximumCodePoint, Unicode_6_3.caselessMatchPartitions, Unicode_6_3.caselessMatchPartitionSize);
    } else {
      throw new UnsupportedUnicodeVersionException();
    }

  }

  /**
   * Unpacks data for the selected Unicode version, populating
   * {@link #propertyValueIntervals}.
   *
   * @param propertyValues The list of property values, in same order as the
   *  packed data corresponding to them, in the given intervals, for the
   *  selected Unicode version.
   * @param intervals The packed character intervals corresponding to and in the
   *  same order as the given propertyValues, for the selected Unicode version.
   * @param propertyValueAliases Key/value pairs mapping property value aliases
   *  to property values, for the selected Unicode version.
   * @param maximumCodePoint The maximum code point for the selected Unicode
   *  version.
   * @param caselessMatchPartitions The packed caseless match partition data for
   *  the selected Unicode version
   * @param caselessMatchPartitionSize The partition data record length (the
   *  maximum number of elements in a caseless match partition) for the selected
   *  Unicode version.
   */
  private void bind(String[] propertyValues, String[] intervals,
                    String[] propertyValueAliases, int maximumCodePoint,
                    String caselessMatchPartitions, int caselessMatchPartitionSize) {
    // IntCharSet caselessMatches[] is lazily initialized - don't unpack here
    this.caselessMatchPartitions = caselessMatchPartitions;
    this.caselessMatchPartitionSize = caselessMatchPartitionSize;
    this.maximumCodePoint = maximumCodePoint;
    for (int n = 0 ; n < propertyValues.length ; ++n) {
      String propertyValue = propertyValues[n];
      String propertyIntervals = intervals[n];
      IntCharSet set = new IntCharSet();
      for (int index = 0 ; index < propertyIntervals.length() ; ) {
        int start = propertyIntervals.codePointAt(index);
        index += (start <= 0xFFFF ? 1 : 2);
        int end = propertyIntervals.codePointAt(index);
        index += (end <= 0xFFFF ? 1 : 2);
        //TODO: Remove BMP boundary condition
        if (start <= 0xFFFF) {
          //TODO: Change the character type from char to int and remove boundary condition
          set.add(new Interval((char)start, (char)Math.min(end, 0xFFFF)));
        }
      }
      propertyValueIntervals.put(propertyValue, set);
      if (2 == propertyValue.length()) {
        String singleLetter = propertyValue.substring(0, 1);
        IntCharSet singleLetterPropValueSet
          = propertyValueIntervals.get(singleLetter);
        if (null == singleLetterPropValueSet) {
          singleLetterPropValueSet = new IntCharSet();
          propertyValueIntervals.put(singleLetter, singleLetterPropValueSet);
        }
        singleLetterPropValueSet.add(set);
      }
    }
    for (int n = 0 ; n < propertyValueAliases.length ; n += 2) {
      String alias = propertyValueAliases[n];
      String propertyValue = propertyValueAliases[n + 1];
      IntCharSet targetSet = propertyValueIntervals.get(propertyValue);
      if (null != targetSet) {
        propertyValueIntervals.put(alias, targetSet);
      }
    }
    bindInvariantIntervals();
  }

  /**
   * Adds intervals for \p{ASCII} and \p{Any} to {@link #propertyValueIntervals}.
   */
  private void bindInvariantIntervals() {
    //TODO: Change the character type from char to int
    IntCharSet asciiSet = new IntCharSet(new Interval('\000', '\u007F'));
    propertyValueIntervals.put(normalize("ASCII"), asciiSet);

    //TODO: Change the character type from char to int
    //TODO: End of interval should be maximumCodePoint instead of '\uFFFF'
    IntCharSet anySet = new IntCharSet(new Interval('\000', '\uFFFF'));
    propertyValueIntervals.put(normalize("Any"), anySet);
  }

  /**
   * Normalizes the given identifier, by: downcasing; removing whitespace,
   * underscores, hyphens, and parentheses; and substituting '=' for every ':'.
   *
   * @param identifier The identifier to normalize
   * @return The normalized identifier
   */
  private String normalize(String identifier) {
    if (null == identifier)
      return identifier;
    Matcher matcher = WORD_SEP_PATTERN.matcher(identifier.toLowerCase(Locale.ENGLISH));
    return matcher.replaceAll("").replace(':', '=');
  }

  public static class UnsupportedUnicodeVersionException extends Exception {
  private static final long serialVersionUID = -1718158223161422981L;

    public UnsupportedUnicodeVersionException() {
      super("Supported versions: " + UNICODE_VERSIONS);
    }
  }
}
TOP

Related Classes of jflex.unicode.UnicodeProperties$UnsupportedUnicodeVersionException

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.