Package org.apache.abdera.i18n.io

Source Code of org.apache.abdera.i18n.io.CharUtils

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  The ASF licenses this file to You
* under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.  For additional information regarding
* copyright in this work, please see the NOTICE file in the top level
* directory of this distribution.
*/
package org.apache.abdera.i18n.io;

import java.util.BitSet;

import org.apache.abdera.i18n.io.CodepointIterator;
import org.apache.abdera.i18n.io.InvalidCharacterException;
import org.apache.abdera.i18n.io.RestrictedCodepointIterator;

/**
* General utilities for dealing with Unicode characters
*/
public final class CharUtils {

  private CharUtils() {}
  public static boolean isValidCodepoint(int d) {
    return d >= 0x000000 && d <= 0x10ffff;
  }
 
  public static int scanNot(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true,true);
    while (rci.hasNext()) rci.next();
    return rci.position;
  }
 
  public static int scanNot(char[] array, BitSet set) throws InvalidCharacterException {
    CodepointIterator ci = CodepointIterator.forCharArray(array);
    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true,true);
    while (rci.hasNext()) rci.next();
    return rci.position;
  }
 
  public static int scan(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
    while (rci.hasNext()) rci.next();
    return rci.position();
  }
 
  public static int scan(char[] array, BitSet set) throws InvalidCharacterException {
    CodepointIterator ci = CodepointIterator.forCharArray(array);
    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
    while (rci.hasNext()) rci.next();
    return rci.position();
  }
 
  public static int scan(String s, BitSet set) throws InvalidCharacterException {
    CodepointIterator ci = CodepointIterator.forCharSequence(s);
    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
    while (rci.hasNext()) rci.next();
    return rci.position;
  }
 
  public static void verifyNot(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
    while (rci.hasNext()) rci.next();
  }
 
  public static void verifyNot(char[] array, BitSet set) throws InvalidCharacterException {
    CodepointIterator ci = CodepointIterator.forCharArray(array);
    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
    while (rci.hasNext()) rci.next();
  }
 
  public static void verify(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
    while (rci.hasNext()) rci.next();
  }
 
  public static void verify(char[] array, BitSet set) throws InvalidCharacterException {
    CodepointIterator ci = CodepointIterator.forCharArray(array);
    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
    while (rci.hasNext()) rci.next();
  }
 
  public static void verify(String s, BitSet set) throws InvalidCharacterException {
    if (s == null) return;
    CodepointIterator ci = CodepointIterator.forCharSequence(s);
    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
    while (rci.hasNext()) rci.next();
  }
 
  public static boolean inRange(char[] chars, char low, char high) {
    for (int i = 0; i < chars.length; i++)
      if (chars[i] < low || chars[i] > high) return false;
    return true;
  }

  public static boolean inRange(char[] chars, int low, int high) {
    for (int i = 0; i < chars.length; i++) {
      char n = chars[i];
      int c = (isHighSurrogate(n) &&
               i + 1 < chars.length &&
               isLowSurrogate(chars[i+1])) ? toCodePoint(n,chars[i++]) : n;
      if (c < low || c > high) return false;
    }
    return true;
  }
 
  public static boolean isSet(int n, BitSet... sets) {
    if (n == -1) return false;
    BitSet set = new BitSet();
    for (BitSet s : sets) set.or(s);
    return set.get(n);
  }
 
  public static void append(StringBuffer buf, int c) {
    if (isSupplementary(c)) {
      buf.append(getHighSurrogate(c));
      buf.append(getLowSurrogate(c));
    } else buf.append((char)c);
  }
 
  public static char getHighSurrogate(int c) {
    return (c >= 0x10000) ?
       (char)((0xD800 - (0x10000 >> 10)) + (c >> 10)) : 0;
  }

  public static char getLowSurrogate(int c) {   
    return (c >= 0x10000) ?
        (char)(0xDC00 + (c & 0x3FF)) : (char)c;
  }
 
  public static boolean isHighSurrogate(char c) {
    return c <= '\uDBFF' && c >= '\uD800';
  }

  public static boolean isLowSurrogate(char c) {
    return c <= '\uDFFF' && c >= '\uDC00';
  }
 
  public static boolean isSupplementary(int c) {
    return c <= 0x10ffff && c >= 0x010000;
  }
 
  public static boolean isSurrogatePair(char high, char low) {
    return isHighSurrogate(high) && isLowSurrogate(low);
  }
 
  public static int toCodePoint(char[] chars) {
    return toCodePoint(chars[0],chars[1]);
  }
 
  public static int toCodePoint(char high, char low) {
    return ((high - '\uD800') << 10) + (low - '\uDC00') + 0x010000;   
  }

  public static int charAt(String s, int i) {
    char c = s.charAt(i);
    if (c < 0xD800 || c > 0xDFFF) return c;
    if (isHighSurrogate(c)) {
      if (s.length() != i) {
        char low = s.charAt(i+1);
        if (isLowSurrogate(low)) return toCodePoint(c,low);
      }
    } else if (isLowSurrogate(c)) {
      if (i >= 1) {
        char high = s.charAt(i-1);
        if (isHighSurrogate(high)) return toCodePoint(high,c);
      }
    }
    return c;
  }
 
  public static int charAt(StringBuffer s, int i) {
    char c = s.charAt(i);
    if (c < 0xD800 || c > 0xDFFF) return c;
    if (isHighSurrogate(c)) {
      if (s.length() != i) {
        char low = s.charAt(i+1);
        if (isLowSurrogate(low)) return toCodePoint(c,low);
      }
    } else if (isLowSurrogate(c)) {
      if (i >= 1) {
        char high = s.charAt(i-1);
        if (isHighSurrogate(high)) return toCodePoint(high,c);
      }
    }
    return c;
  }
 
  public static void insert(StringBuffer s, int i, int c) {
    if (i > 0 && i < s.length()) {
      char ch = s.charAt(i);
      boolean low = isLowSurrogate(ch);
      if (low) {
        if (low && isHighSurrogate(s.charAt(i-1))) {
          i--;
        }
      }
    }
    s.insert(i, toString(c));
  }
 
  public static void setChar(StringBuffer s, int i, int c) {
    int l = 1;
    char ch = s.charAt(i);
    boolean high = isHighSurrogate(ch);
    boolean low = isLowSurrogate(ch);
    if (high || low) {
      if (high && (i+1) < s.length() && isLowSurrogate(s.charAt(i+1))) l++;
      else {
        if (low && i > 0 && isHighSurrogate(s.charAt(i-1))) {
          i--; l++;
        }
      }
    }
    s.replace(i, i+l, toString(c));
  }
 
  public static int size(int c) {
    return (isSupplementary(c)) ? 2 : 1;
  }
 
  private static String supplementaryToString(int c) {
    StringBuffer buf = new StringBuffer();
    buf.append((char)getHighSurrogate(c));
    buf.append((char)getLowSurrogate(c));
    return buf.toString();
  }
 
  public static String toString(int c) {
    return (isSupplementary(c)) ?
      supplementaryToString(c) :
      String.valueOf((char)c);
  }
 
 

  private static final char LRE = 0x202A;
  private static final char RLE = 0x202B;
  private static final char LRO = 0x202D;
  private static final char RLO = 0x202E;
  private static final char LRM = 0x200E;
  private static final char RLM = 0x200F;
  private static final char PDF = 0x202C;
 
  /**
   * Removes leading and trailing bidi controls from the string
   */
  public static String stripBidi(String s) {
    if (s == null || s.length() <= 1) return s;
    if (charIsBidiControl(s.charAt(0)))
      s = s.substring(1);
    if (charIsBidiControl(s.charAt(s.length()-1)))
      s = s.substring(0,s.length()-1);
    return s;
  }
 
  /**
   * Returns true if the character is a bidi control
   */
  public static boolean charIsBidiControl(char c) {
    return c == 0x202A ||
    c == LRE ||
    c == RLE ||
    c == LRO ||
    c == RLO ||
    c == RLM ||
    c == LRM ||
    c == PDF;
  }
 
  private static String wrap(String s, char c1, char c2) {
    StringBuffer buf = new StringBuffer(s);
    if (buf.length() > 1) {
      if (buf.charAt(0) != c1) buf.insert(0, c1);
      if (buf.charAt(buf.length()-1) != c2) buf.append(c2);
    }
    return buf.toString();
  }
 
  /**
   * Wrap the string with Bidi Right-to-Left embed
   */
  public static String bidiRLE(String s) {
    return wrap(s,RLE,PDF);
  }
 
  /**
   * Wrap the string with Bidi Right-to-Left override
   */
  public static String bidiRLO(String s) {
    return wrap(s,RLO,PDF);
  }
 
  /**
   * Wrap the string with Bidi Left-to-Right embed
   */
  public static String bidiLRE(String s) {
    return wrap(s,LRE,PDF);
  }
 
  /**
   * Wrap the string with Bidi Left-to-Right override
   */
  public static String bidiLRO(String s) {
    return wrap(s,LRO,PDF);
  }
 
  /**
   * Wrap the string with Bidi RML marks
   */
  public static String bidiRLM(String s) {
    return wrap(s,RLM,RLM);
  }
 
  /**
   * Wrap the string with Bidi LRM marks
   */
  public static String bidiLRM(String s) {
    return wrap(s,LRM,LRM);
  }
}
TOP

Related Classes of org.apache.abdera.i18n.io.CharUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.