Source Code of net.yacy.cora.document.UTF8

/**
 *  UTF8
 *  Copyright 2011 by Michael Peter Christen
 *  First released 25.2.2011 at http://yacy.net
 *
 *  $LastChangedDate: 2011-06-19 15:15:01 +0200 (So, 19. Jun 2011) $
 *  $LastChangedRevision: 7784 $
 *  $LastChangedBy: orbiter $
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program in the file lgpl21.txt
 *  If not, see <http://www.gnu.org/licenses/>.
 */


package net.yacy.cora.document;


import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;


import org.apache.http.entity.mime.content.StringBody;


/**
 * convenience class to produce UTF-8 encoding StringBodies and to provide a default
 * UTF-8 Charset object.
 * Reason: if this is not used in StringBody-Class initialization, a default charset name is parsed.
 * This is a synchronized process and all classes using default charsets synchronize at that point
 * Synchronization is omitted if this class is used
 * @author admin
 *
 */
public class UTF8 {


    public final static Charset charset;
    static {
        charset = Charset.forName("UTF-8");
    }


    public final static StringBody StringBody(final byte[] b) {
        return StringBody(UTF8.String(b));
    }


    public final static StringBody StringBody(final String s) {
        try {
            return new StringBody(s, charset);
        } catch (final UnsupportedEncodingException e) {
            e.printStackTrace();
            return null;
        }
    }


    /**
     * using the string method with the default charset given as argument should prevent using the charset cache
     * in FastCharsetProvider.java:118 which locks all concurrent threads using a UTF8.String() method
     * @param bytes
     * @return
     */
    public final static String String(final byte[] bytes) {
        return new String(bytes, 0, bytes.length, charset);
    }


    public final static String String(final byte[] bytes, final int offset, final int length) {
        return new String(bytes, offset, length, charset);
    }


    /**
     * getBytes() as method for String synchronizes during the look-up for the
     * Charset object for the default charset as given with a default charset name.
     * This is the normal process:


    public byte[] getBytes() {
    return StringCoding.encode(value, offset, count);
    }


    static byte[] encode(char[] ca, int off, int len) {
    String csn = Charset.defaultCharset().name();
    try {
        return encode(csn, ca, off, len);
        ...


    static byte[] encode(String charsetName, char[] ca, int off, int len)
    throws UnsupportedEncodingException
    {
    StringEncoder se = (StringEncoder)deref(encoder);
    String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
    if ((se == null) || !(csn.equals(se.requestedCharsetName())
                  || csn.equals(se.charsetName()))) {
        se = null;
        try {
        Charset cs = lookupCharset(csn);
        ....


    private static Charset lookupCharset(String csn) {
    if (Charset.isSupported(csn)) {
        try {
        return Charset.forName(csn);
        ....


    public static Charset forName(String charsetName) {
    Charset cs = lookup(charsetName);
    ....


    private static Charset lookup(String charsetName) {
    if (charsetName == null)
        throw new IllegalArgumentException("Null charset name");


    Object[] a;
    if ((a = cache1) != null && charsetName.equals(a[0]))
        return (Charset)a[1];
    // We expect most programs to use one Charset repeatedly.
    // We convey a hint to this effect to the VM by putting the
    // level 1 cache miss code in a separate method.
    return lookup2(charsetName);
    }


    private static Charset lookup2(String charsetName) {
    Object[] a;
    if ((a = cache2) != null && charsetName.equals(a[0])) {
        cache2 = cache1;
        cache1 = a;
        return (Charset)a[1];
    }


    Charset cs;
    if ((cs = standardProvider.charsetForName(charsetName)) != null ||
        (cs = lookupExtendedCharset(charsetName))           != null ||
        (cs = lookupViaProviders(charsetName))              != null)
    {
        cache(charsetName, cs);
        ....


    At this point the getBytes() call synchronizes at one of the methods
    standardProvider.charsetForName
    lookupExtendedCharset
    lookupViaProviders


     * with our call using a given charset object, the call is much easier to perform
     * and it omits the synchronization for the charset lookup.
     *
     * @param s
     * @return
     */
    public final static byte[] getBytes(final String s) {
        if (s == null) return null;
        return s.getBytes(charset);
    }


}
Source Code of net.yacy.cora.document.UTF8

Related Classes of net.yacy.cora.document.UTF8