/*
* Copyright 2004-2005 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.sf.jml.util;
import java.lang.ref.SoftReference;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.ArrayList;
import java.util.Collection;
/**
* Utils for charset encode and decode.
* <p>
* Be careful when directly use Charset.encode/Charset.decode/CharsetEncoder.encode
* /CharsetDecoder.decode. Sometimes it will amazing you!
* <pre>
* For example:
* String s = "a";
* Charset charset = Charset.forName("UTF-8");
* String utf8 = charset.decode(charset.encode(s));
* charset = Charset.forName("GBK");
* String gbk = charset.decode(charset.encode(s));
*
* Run such exmaple, utf8 is "a", but gbk is ""!!!
* </pre>
* <p>
* That's because of CharsetDecode.decode use averageCharsPerByte to determine result's length.
* String s's length is 1, UTF-8's averageCharsPerByte is 1.0, GBK's averageCharsPerByte
* is 0.5. So int 1*1.0 == 1, but 1 * 0.5 == 0(cast to int)!!! So GBK returns nothing!
* <p>
* I have post this bug on sun's bug database. See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6211132.
*
* @author Roger Chen
*/
public final class CharsetUtils {
/**
* System charset utils.
*/
public static final CharsetUtils SYSTEM = new CharsetUtils(System
.getProperty("file.encoding"));
/**
* UTF-8 charset utils.
*/
public static final CharsetUtils UTF8 = new CharsetUtils("UTF-8");
private Charset charset;
//Charset is thread-safe, but CharsetEncoder and CharsetDecoder are not!
private ThreadLocal encoderCache = new ThreadLocal();
private ThreadLocal decoderCache = new ThreadLocal();
private static Object getReference(ThreadLocal threadLocal) {
SoftReference reference = (SoftReference) threadLocal.get();
if (reference != null)
return reference.get();
return null;
}
private static void setReference(ThreadLocal threadLocal, Object object) {
threadLocal.set(new SoftReference(object));
}
public CharsetUtils(String charsetName) {
charset = Charset.forName(charsetName);
}
/**
* Get charset name.
*
* @return
* charset name
*/
public String getCharsetName() {
return charset.name();
}
private CharsetEncoder getEncoder() {
CharsetEncoder encoder = (CharsetEncoder) getReference(encoderCache);
if (encoder == null) {
encoder = charset.newEncoder().onMalformedInput(
CodingErrorAction.REPLACE).onUnmappableCharacter(
CodingErrorAction.REPLACE);
setReference(encoderCache, encoder);
}
return encoder;
}
private CharsetDecoder getDecoder() {
CharsetDecoder decoder = (CharsetDecoder) getReference(decoderCache);
if (decoder == null) {
decoder = charset.newDecoder().onMalformedInput(
CodingErrorAction.REPLACE).onUnmappableCharacter(
CodingErrorAction.REPLACE);
setReference(decoderCache, decoder);
}
return decoder;
}
/**
* Encode string to ByteBuffer.
*
* @param s
* the string
* @return
* the encoded ByteBuffer
*/
public ByteBuffer encode(String s) {
return encode(CharBuffer.wrap(s));
}
/**
* Encode CharBuffer to ByteBuffer.
*
* @param buffer
* the CharBuffer
* @return
* the encoded ByteBuffer
*/
public ByteBuffer encode(CharBuffer buffer) {
CharsetEncoder encoder = getEncoder();
int n = 0;
if (buffer.remaining() > 0) {
n = (int) (buffer.remaining() * encoder.averageBytesPerChar());
if (n == 0)
n = (int) (buffer.remaining() * encoder.maxBytesPerChar());
}
ByteBuffer result = ByteBuffer.allocate(n);
if (n == 0)
return result;
encoder.reset();
while (true) {
CoderResult cr = buffer.hasRemaining() ? encoder.encode(buffer,
result, true) : encoder.flush(result);
if (cr.isUnderflow())
break;
if (cr.isOverflow()) {
n *= 2;
result.flip();
result = ByteBuffer.allocate(n).put(result);
continue;
}
}
result.flip();
return result;
}
/**
* Encode CharBuffer to ByteBuffer array. Every ByteBuffer's capacity
* will equals or less than bufferMaxLength, and will not contain
* half of char in CharBuffer.
*
* @param buffer
* the CharBuffer
* @param bufferMaxLength
* the return ByteBuffer's max capacity
* @return
* the encoded ByteBuffers
*/
public ByteBuffer[] encode(CharBuffer buffer, int bufferMaxLength) {
CharsetEncoder encoder = getEncoder();
encoder.reset();
Collection buffers = new ArrayList();
while (true) {
ByteBuffer out = ByteBuffer.allocate(bufferMaxLength);
CoderResult cr = encoder.encode(buffer, out, true);
if (cr.isUnderflow()) {
encoder.flush(out);
out.flip();
buffers.add(out);
break;
}
if (cr.isOverflow()) {
if (out.position() == 0) //can't encode this char, bufferMaxLength too small
break;
out.flip();
buffers.add(out);
continue;
}
}
return (ByteBuffer[]) buffers.toArray(new ByteBuffer[0]);
}
/**
* Decode byte array to CharBuffer.
*
* @param b
* byte array
* @return
* the decoded CharBuffer
*/
public CharBuffer decode(byte[] b) {
return decode(ByteBuffer.wrap(b));
}
/**
* Decode ByteBuffer to CharBuffer.
*
* @param buffer
* the ByteBuffer
* @return
* the decoded CharBuffer
*/
public CharBuffer decode(ByteBuffer buffer) {
CharsetDecoder decoder = getDecoder();
int n = 0;
if (buffer.remaining() > 0) {
n = (int) (buffer.remaining() * decoder.averageCharsPerByte());
if (n == 0)
n = (int) (buffer.remaining() * decoder.maxCharsPerByte());
}
CharBuffer result = CharBuffer.allocate(n);
if (n == 0)
return result;
decoder.reset();
while (true) {
CoderResult cr = buffer.hasRemaining() ? decoder.decode(buffer,
result, true) : decoder.flush(result);
if (cr.isUnderflow())
break;
if (cr.isOverflow()) {
n *= 2;
result.flip();
result = CharBuffer.allocate(n).put(result);
continue;
}
}
result.flip();
return result;
}
private static ThreadLocal charsetCache = new ThreadLocal();
private static CharsetUtils getCharsetUtils(String charsetName) {
CharsetUtils charset = (CharsetUtils) getReference(charsetCache);
if (charset == null || !charset.charset.name().equals(charsetName)) {
charset = new CharsetUtils(charsetName);
setReference(charsetCache, charset);
}
return charset;
}
/**
* Encode CharBuffer to ByteBuffer.
*
* @param charsetName
* charset name
* @param buffer
* the CharBuffer
* @return
* the encoded ByteBuffer
*/
public static ByteBuffer encode(String charsetName, CharBuffer buffer) {
return getCharsetUtils(charsetName).encode(buffer);
}
/**
* Encode String to ByteBuffer.
*
* @param charsetName
* charset name
* @param s
* the string
* @return
* the encoded ByteBuffer
*/
public static ByteBuffer encode(String charsetName, String s) {
return getCharsetUtils(charsetName).encode(s);
}
/**
* Decode ByteBuffer to CharBuffer.
*
* @param charsetName
* charset name
* @param buffer
* the ByteBuffer
* @return
* the decoded CharBuffer
*/
public static CharBuffer decode(String charsetName, ByteBuffer buffer) {
return getCharsetUtils(charsetName).decode(buffer);
}
}