Package org.apache.abdera.ext.bidi

Source Code of org.apache.abdera.ext.bidi.BidiHelper

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  The ASF licenses this file to You
* under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.  For additional information regarding
* copyright in this work, please see the NOTICE file in the top level
* directory of this distribution.
*/
package org.apache.abdera.ext.bidi;

import java.text.AttributedString;
import java.text.Bidi;
import java.util.Locale;

import javax.xml.namespace.QName;

import org.apache.abdera.model.Base;
import org.apache.abdera.model.Element;
import org.apache.abdera.i18n.io.CharUtils;
import org.apache.abdera.i18n.lang.Lang;

/**
* <p>This is (hopefully) temporary.  Ideally, this would be wrapped into the
* core model API so that the bidi stuff is handled seamlessly.  There are
* still details being worked out on the Atom WG list and it's likely that
* at least one other impl (mozilla) will do something slightly different.</p>
*
* <p>Based on http://www.ietf.org/internet-drafts/draft-snell-atompub-bidi-04.txt</p>
*
* <p>Example:</p>
* <pre>
*   &lt;feed xmlns="http://www.w3.org/2005/Atom" dir="rtl">
*     ...
*   &lt;/feed>
* </pre>
*
* <p>The getBidi___ elements use the in-scope direction to wrap the text with
* the appropriate Unicode control characters. e.g. if dir="rtl", the text is
* wrapped with the RLE and PDF controls.  If the text already contains the
* control chars, the dir attribute is ignored.</p>
*
* <pre>
*    org.apache.abdera.Abdera abdera = new org.apache.abdera.Abdera();
*    org.apache.abdera.model.Feed feed = abdera.getFactory().newFeed();
*    feed.setAttributeValue("dir", "rtl");
*    feed.setTitle("Testing");
*    feed.addCategory("foo");
*   
*    System.out.println(
*      BidiHelper.getBidiElementText(
*        feed.getTitleElement()));
*    System.out.println(
*      BidiHelper.getBidiAttributeValue(
*        feed.getCategories().get(0),"term"));
* </pre>
*
*/
public final class BidiHelper {

  private static final QName DIR = new QName("dir");
 
  BidiHelper() {}
 
  public enum Direction { UNSPECIFIED, LTR, RTL};
 
  /**
   * Set the value of dir attribute
   */
  public static <T extends Element>void setDirection(
    Direction direction,
    T element) {
      if (direction != Direction.UNSPECIFIED)
        element.setAttributeValue(
          DIR,
          direction.toString().toLowerCase());
      else if (direction == Direction.UNSPECIFIED)
        element.setAttributeValue(DIR,"");
      else if (direction == null)
        element.removeAttribute(DIR);
  }
 
  /**
   * Get the in-scope direction for an element.
   */
  public static <T extends Element>Direction getDirection(T element) {
    Direction direction = Direction.UNSPECIFIED;
    String dir = element.getAttributeValue("dir");
    if (dir != null && dir.length() > 0)
      direction = Direction.valueOf(dir.toUpperCase());
    else if (dir == null) {
      // if the direction is unspecified on this element,
      // let's see if we've inherited it
      Base parent = element.getParentElement();
      if (parent != null &&
          parent instanceof Element)
        direction = getDirection((Element)parent);
    }
    return direction;
  }
 
  /**
   * Return the specified text with appropriate Unicode Control Characters given
   * the specified Direction.
   * @param direction The Directionality of the text
   * @param text The text to wrap within Unicode Control Characters
   * @return The directionally-wrapped text 
   */
  public static String getBidiText(Direction direction, String text) {
    switch (direction) {
      case LTR: return CharUtils.bidiLRE(text);
      case RTL: return CharUtils.bidiRLE(text);
      defaultreturn text;
    }
  }
 
  /**
   * Return the textual content of a child element using the in-scope directionality
   * @param element The parent element
   * @param child The XML QName of the child element
   * @return The directionally-wrapped text of the child element
   */
  public static <T extends Element>String getBidiChildText(T element, QName child) {
    Element el = element.getFirstChild(child);
    return (el != null) ? getBidiText(getDirection(el),el.getText()) : null;
  }
 
  /**
   * Return the textual content of the specified element
   * @param element An element containing directionally-sensitive text
   * @return The directionally-wrapped text of the element
   */
  public static <T extends Element>String getBidiElementText(T element) {
    return getBidiText(getDirection(element),element.getText());
  }
 
  /**
   * Return the text content of the specified attribute using the in-scope directionality
   * @param element The parent element
   * @param name the name of the attribute
   * @return The directionally-wrapped text of the attribute
   */
  public static <T extends Element>String getBidiAttributeValue(T element, String name) {
    return getBidiText(getDirection(element),element.getAttributeValue(name));
  }
 
  /**
   * Return the text content of the specified attribute using the in-scope directionality
   * @param element The parent element
   * @param name the name of the attribute
   * @return The directionally-wrapped text of the attribute
   */
  public static <T extends Element>String getBidiAttributeValue(T element, QName name) {
    return getBidiText(getDirection(element),element.getAttributeValue(name));
  }
 
 
  /**
   * Attempt to guess the base direction using the in-scope language. 
   * Implements the method used by Internet Explorer 7's feed view
   * documented here: http://blogs.msdn.com/rssteam/archive/2007/05/17/reading-feeds-in-right-to-left-order.aspx.
   *
   * This algorithm differs slightly from the method documented in that the
   * primary language tag is case insensitive.
   *
   * If the language tag is not specified, then the default Locale is used to
   * determine the direction.
   *
   * If the dir attribute is specified, the direction will be determine using it's value
   * instead of the language
   */
  public static <T extends Element>Direction guessDirectionFromLanguage(T element) {
    return guessDirectionFromLanguage(element, false);
  }
 
  /**
   * Attempt to guess the base direction using the in-scope language. 
   * Implements the method used by Internet Explorer 7's feed view
   * documented here: http://blogs.msdn.com/rssteam/archive/2007/05/17/reading-feeds-in-right-to-left-order.aspx.
   *
   * This algorithm differs slightly from the method documented in that the
   * primary language tag is case insensitive.
   *
   * If the language tag is not specified, then the default Locale is used to
   * determine the direction.
   *
   * According to the Atom Bidi spec, if the dir attribute is set explicitly, we
   * should not do language guessing.  This restriction can be bypassed by setting
   * ignoredir to true.
   */
  public static <T extends Element>Direction guessDirectionFromLanguage(T element, boolean ignoredir) {
    if (!ignoredir && hasDirection(element)) return getDirection(element);
    Lang lang = element.getLanguageTag();
    if (lang == null) {
      Locale l = Locale.getDefault();
      lang = new Lang(l.getLanguage());
    }
    String primary = lang.getPrimary();
    return (primary.equalsIgnoreCase("ar") ||
            primary.equalsIgnoreCase("fa") ||
            primary.equalsIgnoreCase("ur") ||
            primary.equalsIgnoreCase("ps") ||
            primary.equalsIgnoreCase("syr") ||
            primary.equalsIgnoreCase("dv") ||
            primary.equalsIgnoreCase("he") ||
            primary.equalsIgnoreCase("yi")) ? Direction.RTL : Direction.LTR;
  }

  /**
   * Attempt to guess the base direction of an element using an analysis of
   * the directional properties of the characters used.  This is a brute-force
   * style approach that can achieve fairly reasonable results when the element
   * text consists primarily of characters with the same bidi properties.  This
   * approach is implemented by the Snarfer feed reader as is documented at
   * http://www.xn--8ws00zhy3a.com/blog/2006/12/right-to-left-rss  
   *
   * If the dir attribute is specified, the direction will be determine using it's value
   * instead of the characteristics of the text
   */
  public static <T extends Element>Direction guessDirectionFromTextProperties(T element) {
    return guessDirectionFromTextProperties(element, false);
  }
 
  /**
   * Attempt to guess the base direction of an element using an analysis of
   * the directional properties of the characters used.  This is a brute-force
   * style approach that can achieve fairly reasonable results when the element
   * text consists primarily of characters with the same bidi properties.  This
   * approach is implemented by the Snarfer feed reader as is documented at
   * http://www.xn--8ws00zhy3a.com/blog/2006/12/right-to-left-rss  
   *
   * According to the Atom Bidi spec, if the dir attribute is set explicitly, we
   * should not do language guessing.  This restriction can be bypassed by setting
   * ignoredir to true.
   */
  public static <T extends Element>Direction guessDirectionFromTextProperties(T element, boolean ignoredir) {
    Direction dir = Direction.UNSPECIFIED;
    if (!ignoredir && hasDirection(element)) return getDirection(element);
    String text = element.getText();
    if (text != null) {
      int c = 0;
      for (int n = 0; n < text.length(); n++) {
        char ch = text.charAt(n);
        if (Bidi.requiresBidi(new char[] {ch}, 0, 1)) c++;
        else c--;
      }
      dir = (c > 0) ? Direction.RTL : Direction.LTR;
    }
    return dir;
  }

  /**
   * Use Java's built in support for bidi text to determine the base directionality
   * of the element's text.  The response to this only indicates the *base* directionality,
   * it does not indicate whether or not there are any RTL characters in the text.
   *
   * If the dir attribute is specified, the direction will be determine using it's value
   * instead of the characteristics of the text
   */
  public static <T extends Element>Direction guessDirectionFromJavaBidi(T element) {
    return guessDirectionFromJavaBidi(element, false);
  }
 
  /**
   * Use Java's built in support for bidi text to determine the base directionality
   * of the element's text.  The response to this only indicates the *base* directionality,
   * it does not indicate whether or not there are any RTL characters in the text.
   *
   * According to the Atom Bidi spec, if the dir attribute is set explicitly, we
   * should not do language guessing.  This restriction can be bypassed by setting
   * ignoredir to true.
   */
  public static <T extends Element>Direction guessDirectionFromJavaBidi(T element, boolean ignoredir) {
    Direction dir = Direction.UNSPECIFIED;
    if (!ignoredir && hasDirection(element)) return getDirection(element);
    String text = element.getText();
    if (text != null) {
      AttributedString s = new AttributedString(text);
      Bidi bidi = new Bidi(s.getIterator());
      dir = (bidi.baseIsLeftToRight()) ? Direction.LTR : Direction.RTL;
    }
    return dir;
  }
 
  private static <T extends Element>boolean hasDirection(T element) {
    boolean answer = false;
    String dir = element.getAttributeValue("dir");
    if (dir != null && dir.length() > 0)
      answer = true;
    else if (dir == null) {
      // if the direction is unspecified on this element,
      // let's see if we've inherited it
      Base parent = element.getParentElement();
      if (parent != null &&
          parent instanceof Element)
        answer = hasDirection((Element)parent);
    }
    return answer;
  }
}
TOP

Related Classes of org.apache.abdera.ext.bidi.BidiHelper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.