int latin1_num, utf8_num, utf16_num;
out.println("--> MySqlUtilsCharsetMapTest.test()");
// load native library
loadSystemLibrary("ndbclient");
CharsetMap csmap = CharsetMap.create();
out.println(" --> Test that mysql includes UTF-8 and 16-bit Unicode");
utf8_num = csmap.getUTF8CharsetNumber();
utf16_num = csmap.getUTF16CharsetNumber();
out.println(" UTF-8 charset num: " + utf8_num +
" UTF-16 or UCS-2 charset num: " + utf16_num);
assert( ! ((utf8_num == 0) || (utf16_num == 0)));
out.println(" <-- Test that mysql includes UTF-8 and 16-bit Unicode");
out.println(" --> Test CharsetMap::getName()");
String utf8_name = csmap.getName(utf8_num);
String utf16 = csmap.getMysqlName(csmap.getUTF16CharsetNumber());
assert(utf8_name.compareTo("UTF-8") == 0);
/* MySQL 5.1 and earlier will have UCS-2 but later versions may have true
UTF-16. For information, print whether UTF-16 or UCS-2 is being used. */
out.println(" Using mysql \"" + utf16 + "\" for UTF-16.");
out.println(" <-- Test CharsetMap::getName()");
/* Now we're going to recode.
We test with the string "ülker", which begins with the character
LATIN SMALL LETTER U WITH DIARESIS - unicode code point U+00FC.
In the latin1 encoding this is a literal 0xFC,
but in the UTF-8 representation it is 0xC3 0xBC.
*/
final char[] cmy_word_latin1 = new char[] { 0xFC, 'l', 'k', 'e', 'r', 0 };
final char[] cmy_word_utf8 = new char[] { 0xC3, 0xBC, 'l', 'k', 'e', 'r', 0 };
final char[] cmy_word_truncated = new char[] { 0xC3, 0xBC, 'l', 'k', 0 };
final char[] cmy_bad_utf8 = new char[] { 'l' , 0xBC, 'a', 'd', 0 };
out.println(" --> CharsetMap::recode() Tests");
{
ByteBuffer my_word_latin1 = char2bb(cmy_word_latin1);
ByteBuffer my_word_utf8 = char2bb(cmy_word_utf8);
out.println(" --> Test that latin1 is available.");
latin1_num = csmap.getCharsetNumber("latin1");
out.println(" latin1 charset number: " + latin1_num +
" standard name: " + csmap.getName(latin1_num));
assert(latin1_num != 0);
assert(csmap.getName(latin1_num).compareTo("windows-1252") == 0);
out.println(" Latin1 source string: " + bbdump(my_word_latin1) + "\n" +
" UTF8 source string: " + bbdump(my_word_utf8));
out.println(" <-- Test that latin1 is available.");
}
{
out.println(" --> RECODE TEST 1: recode from UTF-8 to Latin 1");
ByteBuffer my_word_utf8 = char2bb(cmy_word_utf8);
ByteBuffer result_buff = ByteBuffer.allocateDirect(16);
int[] lengths = new int[] { 7 , 16 };
int rr1 = csmap.recode(lengths, utf8_num, latin1_num,
my_word_utf8, result_buff);
printRecodeResult(rr1, lengths, my_word_utf8, result_buff);
assert(rr1 == CharsetMapConst.RecodeStatus.RECODE_OK);
assert(lengths[0] == 7);
assert(lengths[1] == 6);
assert(bbcmp(char2bb(cmy_word_latin1), result_buff) == 0);
out.println(" <-- RECODE TEST 1");
}
{
out.println(" --> RECODE TEST 2: recode from Latin1 to to UTF-8");
ByteBuffer my_word_latin1 = char2bb(cmy_word_latin1);
ByteBuffer result_buff = ByteBuffer.allocateDirect(16);
int[] lengths = new int[] { 6 , 16 };
int rr2 = csmap.recode(lengths, latin1_num, utf8_num,
my_word_latin1, result_buff);
printRecodeResult(rr2, lengths, my_word_latin1, result_buff);
assert(rr2 == CharsetMapConst.RecodeStatus.RECODE_OK);
assert(lengths[0] == 6);
assert(lengths[1] == 7);
assert(bbcmp(result_buff, char2bb(cmy_word_utf8)) == 0);
out.println(" <-- RECODE TEST 2");
}
{
out.println(" --> RECODE TEST 3: too-small result buffer");
ByteBuffer my_word_latin1 = char2bb(cmy_word_latin1);
ByteBuffer result_buff = ByteBuffer.allocateDirect(16);
ByteBuffer my_word_truncated = char2bb(cmy_word_truncated);
int[] lengths = new int[] { 6 , 4 }; // 4 is too small
int rr3 = csmap.recode(lengths, latin1_num, utf8_num,
my_word_latin1, result_buff);
printRecodeResult(rr3, lengths, my_word_latin1, result_buff);
assert(rr3 == CharsetMapConst.RecodeStatus.RECODE_BUFF_TOO_SMALL);
assert(lengths[0] == 3);
assert(lengths[1] == 4);
/* Confirm that the first four characters were indeed recoded: */
assert(bbncmp(result_buff, char2bb(cmy_word_truncated), 4) == 0);
out.println(" <-- RECODE TEST 3");
}
{
out.println(" --> RECODE TEST 4: invalid character set");
ByteBuffer my_word_latin1 = char2bb(cmy_word_latin1);
ByteBuffer result_buff = ByteBuffer.allocateDirect(16);
int[] lengths = new int[] { 6 , 16 };
int rr4 = csmap.recode(lengths, 0, 999, my_word_latin1, result_buff);
out.println(" Return code: " + rr4);
assert(rr4 == CharsetMapConst.RecodeStatus.RECODE_BAD_CHARSET);
out.println(" <-- RECODE TEST 4");
}
{
out.println(" --> RECODE TEST 5: source string is ill-formed UTF-8");
ByteBuffer my_bad_utf8 = char2bb(cmy_bad_utf8);
ByteBuffer result_buff = ByteBuffer.allocateDirect(16);
int[] lengths = new int[] { 5 , 16 };
int rr5 = csmap.recode(lengths, utf8_num, latin1_num,
my_bad_utf8, result_buff);
out.println(" Return code: " + rr5);
assert(rr5 == CharsetMapConst.RecodeStatus.RECODE_BAD_SRC);
out.println(" <-- RECODE TEST 5");
}
{
out.println(" --> RECODE TEST 6: convert an actual java string to UTF-8");
// Load the string into a ByteBuffer
ByteBuffer str_bb = ByteBuffer.allocateDirect(16);
CharBuffer cb = str_bb.asCharBuffer();
cb.append("\u00FClker");
cb.rewind();
ByteBuffer result_buff = ByteBuffer.allocateDirect(16);
int[] lengths = new int[] { 12 , 16 };
int rr6 = csmap.recode(lengths, utf16_num, utf8_num,
str_bb, result_buff);
printRecodeResult(rr6, lengths, str_bb, result_buff);
assert(lengths[0]) == 12;
assert(lengths[1]) == 7;
assert(bbncmp(result_buff, char2bb(cmy_word_utf8), 6) == 0);
out.println(" <-- RECODE TEST 6");
}
out.println();
{
out.println(" --> IS MULTIBYTE TEST");
boolean[] result = csmap.isMultibyte(latin1_num);
assert(!result[0]);
result = csmap.isMultibyte(utf16_num);
assert(result[0]);
result = csmap.isMultibyte(utf8_num);
assert(result[0]);
int nNull = 0, nSingle = 0, nMulti = 0;
for(int i = 0; i < 256 ; i++) {
result = csmap.isMultibyte(i);
if(result == null) nNull++;
else {
if(result[0]) nMulti++;
else nSingle++;
}