Package net.yacy.cora.document

Source Code of net.yacy.cora.document.UTF8

/**
*  UTF8
*  Copyright 2011 by Michael Peter Christen
*  First released 25.2.2011 at http://yacy.net
*
*  $LastChangedDate: 2011-06-19 15:15:01 +0200 (So, 19. Jun 2011) $
*  $LastChangedRevision: 7784 $
*  $LastChangedBy: orbiter $
*
*  This library is free software; you can redistribute it and/or
*  modify it under the terms of the GNU Lesser General Public
*  License as published by the Free Software Foundation; either
*  version 2.1 of the License, or (at your option) any later version.
*
*  This library is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
*  Lesser General Public License for more details.
*
*  You should have received a copy of the GNU Lesser General Public License
*  along with this program in the file lgpl21.txt
*  If not, see <http://www.gnu.org/licenses/>.
*/

package net.yacy.cora.document;

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;

import org.apache.http.entity.mime.content.StringBody;

/**
* convenience class to produce UTF-8 encoding StringBodies and to provide a default
* UTF-8 Charset object.
* Reason: if this is not used in StringBody-Class initialization, a default charset name is parsed.
* This is a synchronized process and all classes using default charsets synchronize at that point
* Synchronization is omitted if this class is used
* @author admin
*
*/
public class UTF8 {

    public final static Charset charset;
    static {
        charset = Charset.forName("UTF-8");
    }

    public final static StringBody StringBody(final byte[] b) {
        return StringBody(UTF8.String(b));
    }

    public final static StringBody StringBody(final String s) {
        try {
            return new StringBody(s, charset);
        } catch (final UnsupportedEncodingException e) {
            e.printStackTrace();
            return null;
        }
    }

    /**
     * using the string method with the default charset given as argument should prevent using the charset cache
     * in FastCharsetProvider.java:118 which locks all concurrent threads using a UTF8.String() method
     * @param bytes
     * @return
     */
    public final static String String(final byte[] bytes) {
        return new String(bytes, 0, bytes.length, charset);
    }

    public final static String String(final byte[] bytes, final int offset, final int length) {
        return new String(bytes, offset, length, charset);
    }

    /**
     * getBytes() as method for String synchronizes during the look-up for the
     * Charset object for the default charset as given with a default charset name.
     * This is the normal process:

    public byte[] getBytes() {
    return StringCoding.encode(value, offset, count);
    }

    static byte[] encode(char[] ca, int off, int len) {
    String csn = Charset.defaultCharset().name();
    try {
        return encode(csn, ca, off, len);
        ...

    static byte[] encode(String charsetName, char[] ca, int off, int len)
    throws UnsupportedEncodingException
    {
    StringEncoder se = (StringEncoder)deref(encoder);
    String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
    if ((se == null) || !(csn.equals(se.requestedCharsetName())
                  || csn.equals(se.charsetName()))) {
        se = null;
        try {
        Charset cs = lookupCharset(csn);
        ....

    private static Charset lookupCharset(String csn) {
    if (Charset.isSupported(csn)) {
        try {
        return Charset.forName(csn);
        ....

    public static Charset forName(String charsetName) {
    Charset cs = lookup(charsetName);
    ....

    private static Charset lookup(String charsetName) {
    if (charsetName == null)
        throw new IllegalArgumentException("Null charset name");

    Object[] a;
    if ((a = cache1) != null && charsetName.equals(a[0]))
        return (Charset)a[1];
    // We expect most programs to use one Charset repeatedly.
    // We convey a hint to this effect to the VM by putting the
    // level 1 cache miss code in a separate method.
    return lookup2(charsetName);
    }

    private static Charset lookup2(String charsetName) {
    Object[] a;
    if ((a = cache2) != null && charsetName.equals(a[0])) {
        cache2 = cache1;
        cache1 = a;
        return (Charset)a[1];
    }

    Charset cs;
    if ((cs = standardProvider.charsetForName(charsetName)) != null ||
        (cs = lookupExtendedCharset(charsetName))           != null ||
        (cs = lookupViaProviders(charsetName))              != null)
    {
        cache(charsetName, cs);
        ....

    At this point the getBytes() call synchronizes at one of the methods
    standardProvider.charsetForName
    lookupExtendedCharset
    lookupViaProviders

     * with our call using a given charset object, the call is much easier to perform
     * and it omits the synchronization for the charset lookup.
     *
     * @param s
     * @return
     */
    public final static byte[] getBytes(final String s) {
        if (s == null) return null;
        return s.getBytes(charset);
    }

}
TOP

Related Classes of net.yacy.cora.document.UTF8

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.