/**
* (c) Copyright 2013 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.schema.util;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericData.EnumSymbol;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.util.Utf8;
import org.codehaus.jackson.node.IntNode;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.kiji.schema.KijiClientTest;
import org.kiji.schema.KijiSchemaTable;
import org.kiji.schema.avro.AvroSchema;
import org.kiji.schema.util.AvroUtils.SchemaCompatibilityType;
import org.kiji.schema.util.AvroUtils.SchemaPairCompatibility;
public class TestAvroUtils extends KijiClientTest {
private static final Logger LOG = LoggerFactory.getLogger(TestAvroUtils.class);
// -----------------------------------------------------------------------------------------------
private static final Schema NULL_SCHEMA = Schema.create(Schema.Type.NULL);
private static final Schema BOOLEAN_SCHEMA = Schema.create(Schema.Type.BOOLEAN);
private static final Schema INT_SCHEMA = Schema.create(Schema.Type.INT);
private static final Schema LONG_SCHEMA = Schema.create(Schema.Type.LONG);
private static final Schema FLOAT_SCHEMA = Schema.create(Schema.Type.FLOAT);
private static final Schema DOUBLE_SCHEMA = Schema.create(Schema.Type.DOUBLE);
private static final Schema STRING_SCHEMA = Schema.create(Schema.Type.STRING);
private static final Schema BYTES_SCHEMA = Schema.create(Schema.Type.BYTES);
private static final Schema INT_ARRAY_SCHEMA = Schema.createArray(INT_SCHEMA);
private static final Schema LONG_ARRAY_SCHEMA = Schema.createArray(LONG_SCHEMA);
private static final Schema STRING_ARRAY_SCHEMA = Schema.createArray(STRING_SCHEMA);
private static final Schema INT_MAP_SCHEMA = Schema.createMap(INT_SCHEMA);
private static final Schema LONG_MAP_SCHEMA = Schema.createMap(LONG_SCHEMA);
private static final Schema STRING_MAP_SCHEMA = Schema.createMap(STRING_SCHEMA);
private static final Schema ENUM1_AB_SCHEMA =
Schema.createEnum("Enum1", null, null, ImmutableList.of("A", "B"));
private static final Schema ENUM1_ABC_SCHEMA =
Schema.createEnum("Enum1", null, null, ImmutableList.of("A", "B", "C"));
private static final Schema ENUM1_BC_SCHEMA =
Schema.createEnum("Enum1", null, null, ImmutableList.of("B", "C"));
private static final Schema ENUM2_AB_SCHEMA =
Schema.createEnum("Enum2", null, null, ImmutableList.of("A", "B"));
private static final Schema EMPTY_UNION_SCHEMA =
Schema.createUnion(ImmutableList.<Schema>of());
private static final Schema NULL_UNION_SCHEMA =
Schema.createUnion(ImmutableList.of(NULL_SCHEMA));
private static final Schema INT_UNION_SCHEMA =
Schema.createUnion(ImmutableList.of(INT_SCHEMA));
private static final Schema LONG_UNION_SCHEMA =
Schema.createUnion(ImmutableList.of(LONG_SCHEMA));
private static final Schema STRING_UNION_SCHEMA =
Schema.createUnion(ImmutableList.of(STRING_SCHEMA));
private static final Schema INT_STRING_UNION_SCHEMA =
Schema.createUnion(ImmutableList.of(INT_SCHEMA, STRING_SCHEMA));
private static final Schema STRING_INT_UNION_SCHEMA =
Schema.createUnion(ImmutableList.of(STRING_SCHEMA, INT_SCHEMA));
// Non recursive records:
private static final Schema EMPTY_RECORD1 =
Schema.createRecord("Record1", null, null, false);
private static final Schema EMPTY_RECORD2 =
Schema.createRecord("Record2", null, null, false);
private static final Schema A_INT_RECORD1 =
Schema.createRecord("Record1", null, null, false);
private static final Schema A_LONG_RECORD1 =
Schema.createRecord("Record1", null, null, false);
private static final Schema A_INT_B_INT_RECORD1 =
Schema.createRecord("Record1", null, null, false);
private static final Schema A_DINT_RECORD1 = // DTYPE means TYPE with default value
Schema.createRecord("Record1", null, null, false);
private static final Schema A_INT_B_DINT_RECORD1 =
Schema.createRecord("Record1", null, null, false);
private static final Schema A_DINT_B_DINT_RECORD1 =
Schema.createRecord("Record1", null, null, false);
static {
EMPTY_RECORD1.setFields(Collections.<Field>emptyList());
EMPTY_RECORD2.setFields(Collections.<Field>emptyList());
A_INT_RECORD1.setFields(Lists.newArrayList(
new Field("a", INT_SCHEMA, null, null)));
A_LONG_RECORD1.setFields(Lists.newArrayList(
new Field("a", LONG_SCHEMA, null, null)));
A_INT_B_INT_RECORD1.setFields(Lists.newArrayList(
new Field("a", INT_SCHEMA, null, null),
new Field("b", INT_SCHEMA, null, null)));
A_DINT_RECORD1.setFields(Lists.newArrayList(
new Field("a", INT_SCHEMA, null, new IntNode(0))));
A_INT_B_DINT_RECORD1.setFields(Lists.newArrayList(
new Field("a", INT_SCHEMA, null, null),
new Field("b", INT_SCHEMA, null, new IntNode(0))));
A_DINT_B_DINT_RECORD1.setFields(Lists.newArrayList(
new Field("a", INT_SCHEMA, null, new IntNode(0)),
new Field("b", INT_SCHEMA, null, new IntNode(0))));
}
// Recursive records
private static final Schema INT_LIST_RECORD =
Schema.createRecord("List", null, null, false);
private static final Schema LONG_LIST_RECORD =
Schema.createRecord("List", null, null, false);
static {
INT_LIST_RECORD.setFields(Lists.newArrayList(
new Field("head", INT_SCHEMA, null, null),
new Field("tail", INT_LIST_RECORD, null, null)));
LONG_LIST_RECORD.setFields(Lists.newArrayList(
new Field("head", LONG_SCHEMA, null, null),
new Field("tail", LONG_LIST_RECORD, null, null)));
}
// -----------------------------------------------------------------------------------------------
/** Reader/writer schema pair. */
private static final class ReaderWriter {
private final Schema mReader;
private final Schema mWriter;
public ReaderWriter(final Schema reader, final Schema writer) {
mReader = reader;
mWriter = writer;
}
public Schema getReader() {
return mReader;
}
public Schema getWriter() {
return mWriter;
}
}
// -----------------------------------------------------------------------------------------------
private static final Schema WRITER_SCHEMA = Schema.createRecord(Lists.newArrayList(
new Schema.Field("oldfield1", INT_SCHEMA, null, null),
new Schema.Field("oldfield2", STRING_SCHEMA, null, null)));
@Test
public void testGetOptionalType() throws Exception {
final List<Schema> unionSchemas = Lists.newArrayList(
INT_SCHEMA,
NULL_SCHEMA);
final Schema optionalSchema = Schema.createUnion(unionSchemas);
final Schema optionalReverseSchema = Schema.createUnion(Lists.reverse(unionSchemas));
// Ensure that the optional type is retrievable.
assertEquals(INT_SCHEMA, AvroUtils.getOptionalType(optionalSchema));
assertEquals(INT_SCHEMA, AvroUtils.getOptionalType(optionalReverseSchema));
}
@Test
public void testGetNonOptionalType() throws Exception {
final List<Schema> unionSchemas = Lists.newArrayList(
INT_SCHEMA,
STRING_SCHEMA,
NULL_SCHEMA);
final Schema nonOptionalSchema = Schema.createUnion(unionSchemas);
// Ensure that null gets returned when the schema provided isn't an optional type.
assertEquals(null, AvroUtils.getOptionalType(nonOptionalSchema));
}
@Test
public void testValidateSchemaPairMissingField() throws Exception {
final List<Schema.Field> readerFields = Lists.newArrayList(
new Schema.Field("oldfield1", INT_SCHEMA, null, null));
final Schema reader = Schema.createRecord(readerFields);
final AvroUtils.SchemaPairCompatibility expectedResult =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.COMPATIBLE,
reader,
WRITER_SCHEMA,
AvroUtils.READER_WRITER_COMPATIBLE_MESSAGE);
// Test omitting a field.
assertEquals(expectedResult, AvroUtils.checkReaderWriterCompatibility(reader, WRITER_SCHEMA));
}
@Test
public void testValidateSchemaPairMissingSecondField() throws Exception {
final List<Schema.Field> readerFields = Lists.newArrayList(
new Schema.Field("oldfield2", STRING_SCHEMA, null, null));
final Schema reader = Schema.createRecord(readerFields);
final AvroUtils.SchemaPairCompatibility expectedResult =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.COMPATIBLE,
reader,
WRITER_SCHEMA,
AvroUtils.READER_WRITER_COMPATIBLE_MESSAGE);
// Test omitting other field.
assertEquals(expectedResult, AvroUtils.checkReaderWriterCompatibility(reader, WRITER_SCHEMA));
}
@Test
public void testValidateSchemaPairAllFields() throws Exception {
final List<Schema.Field> readerFields = Lists.newArrayList(
new Schema.Field("oldfield1", INT_SCHEMA, null, null),
new Schema.Field("oldfield2", STRING_SCHEMA, null, null));
final Schema reader = Schema.createRecord(readerFields);
final AvroUtils.SchemaPairCompatibility expectedResult =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.COMPATIBLE,
reader,
WRITER_SCHEMA,
AvroUtils.READER_WRITER_COMPATIBLE_MESSAGE);
// Test with all fields.
assertEquals(expectedResult, AvroUtils.checkReaderWriterCompatibility(reader, WRITER_SCHEMA));
}
@Test
public void testValidateSchemaNewFieldWithDefault() throws Exception {
final List<Schema.Field> readerFields = Lists.newArrayList(
new Schema.Field("oldfield1", INT_SCHEMA, null, null),
new Schema.Field("newfield1", INT_SCHEMA, null, IntNode.valueOf(42)));
final Schema reader = Schema.createRecord(readerFields);
final AvroUtils.SchemaPairCompatibility expectedResult =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.COMPATIBLE,
reader,
WRITER_SCHEMA,
AvroUtils.READER_WRITER_COMPATIBLE_MESSAGE);
// Test new field with default value.
assertEquals(expectedResult, AvroUtils.checkReaderWriterCompatibility(reader, WRITER_SCHEMA));
}
@Test
public void testValidateSchemaNewField() throws Exception {
final List<Schema.Field> readerFields = Lists.newArrayList(
new Schema.Field("oldfield1", INT_SCHEMA, null, null),
new Schema.Field("newfield1", INT_SCHEMA, null, null));
final Schema reader = Schema.createRecord(readerFields);
final AvroUtils.SchemaPairCompatibility expectedResult =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.INCOMPATIBLE,
reader,
WRITER_SCHEMA,
String.format(
"Data encoded using writer schema:\n%s\n"
+ "will or may fail to decode using reader schema:\n%s\n",
WRITER_SCHEMA.toString(true),
reader.toString(true)));
// Test new field without default value.
assertEquals(expectedResult, AvroUtils.checkReaderWriterCompatibility(reader, WRITER_SCHEMA));
}
@Test
public void testValidateArrayWriterSchema() throws Exception {
final Schema validReader = Schema.createArray(STRING_SCHEMA);
final Schema invalidReader = Schema.createMap(STRING_SCHEMA);
final AvroUtils.SchemaPairCompatibility validResult =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.COMPATIBLE,
validReader,
STRING_ARRAY_SCHEMA,
AvroUtils.READER_WRITER_COMPATIBLE_MESSAGE);
final AvroUtils.SchemaPairCompatibility invalidResult =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.INCOMPATIBLE,
invalidReader,
STRING_ARRAY_SCHEMA,
String.format(
"Data encoded using writer schema:\n%s\n"
+ "will or may fail to decode using reader schema:\n%s\n",
STRING_ARRAY_SCHEMA.toString(true),
invalidReader.toString(true)));
assertEquals(
validResult,
AvroUtils.checkReaderWriterCompatibility(validReader, STRING_ARRAY_SCHEMA));
assertEquals(
invalidResult,
AvroUtils.checkReaderWriterCompatibility(invalidReader, STRING_ARRAY_SCHEMA));
}
@Test
public void testValidatePrimitiveWriterSchema() throws Exception {
final Schema validReader = Schema.create(Schema.Type.STRING);
final AvroUtils.SchemaPairCompatibility validResult =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.COMPATIBLE,
validReader,
STRING_SCHEMA,
AvroUtils.READER_WRITER_COMPATIBLE_MESSAGE);
final AvroUtils.SchemaPairCompatibility invalidResult =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.INCOMPATIBLE,
INT_SCHEMA,
STRING_SCHEMA,
String.format(
"Data encoded using writer schema:\n%s\n"
+ "will or may fail to decode using reader schema:\n%s\n",
STRING_SCHEMA.toString(true),
INT_SCHEMA.toString(true)));
assertEquals(
validResult,
AvroUtils.checkReaderWriterCompatibility(validReader, STRING_SCHEMA));
assertEquals(
invalidResult,
AvroUtils.checkReaderWriterCompatibility(INT_SCHEMA, STRING_SCHEMA));
}
/** Reader union schema must contain all writer union branches. */
@Test
public void testUnionReaderWriterSubsetIncompatibility() {
final Schema unionWriter = Schema.createUnion(
Lists.newArrayList(INT_SCHEMA, STRING_SCHEMA));
final Schema unionReader = Schema.createUnion(
Lists.newArrayList(STRING_SCHEMA));
final SchemaPairCompatibility result =
AvroUtils.checkReaderWriterCompatibility(unionReader, unionWriter);
assertEquals(SchemaCompatibilityType.INCOMPATIBLE, result.getType());
}
// -----------------------------------------------------------------------------------------------
/** Collection of reader/writer schema pair that are compatible. */
public static final List<ReaderWriter> COMPATIBLE_READER_WRITER_TEST_CASES = ImmutableList.of(
new ReaderWriter(BOOLEAN_SCHEMA, BOOLEAN_SCHEMA),
new ReaderWriter(INT_SCHEMA, INT_SCHEMA),
new ReaderWriter(LONG_SCHEMA, INT_SCHEMA),
new ReaderWriter(LONG_SCHEMA, LONG_SCHEMA),
// Avro spec says INT/LONG can be promoted to FLOAT/DOUBLE.
// This is arguable as this causes a loss of precision.
new ReaderWriter(FLOAT_SCHEMA, INT_SCHEMA),
new ReaderWriter(FLOAT_SCHEMA, LONG_SCHEMA),
new ReaderWriter(DOUBLE_SCHEMA, LONG_SCHEMA),
new ReaderWriter(DOUBLE_SCHEMA, INT_SCHEMA),
new ReaderWriter(DOUBLE_SCHEMA, FLOAT_SCHEMA),
new ReaderWriter(STRING_SCHEMA, STRING_SCHEMA),
new ReaderWriter(BYTES_SCHEMA, BYTES_SCHEMA),
new ReaderWriter(INT_ARRAY_SCHEMA, INT_ARRAY_SCHEMA),
new ReaderWriter(LONG_ARRAY_SCHEMA, INT_ARRAY_SCHEMA),
new ReaderWriter(INT_MAP_SCHEMA, INT_MAP_SCHEMA),
new ReaderWriter(LONG_MAP_SCHEMA, INT_MAP_SCHEMA),
new ReaderWriter(ENUM1_AB_SCHEMA, ENUM1_AB_SCHEMA),
new ReaderWriter(ENUM1_ABC_SCHEMA, ENUM1_AB_SCHEMA),
// Tests involving unions:
new ReaderWriter(EMPTY_UNION_SCHEMA, EMPTY_UNION_SCHEMA),
new ReaderWriter(INT_UNION_SCHEMA, INT_UNION_SCHEMA),
new ReaderWriter(INT_STRING_UNION_SCHEMA, STRING_INT_UNION_SCHEMA),
new ReaderWriter(INT_UNION_SCHEMA, EMPTY_UNION_SCHEMA),
new ReaderWriter(LONG_UNION_SCHEMA, INT_UNION_SCHEMA),
// Special case of singleton unions:
new ReaderWriter(INT_UNION_SCHEMA, INT_SCHEMA),
new ReaderWriter(INT_SCHEMA, INT_UNION_SCHEMA),
// Tests involving records:
new ReaderWriter(EMPTY_RECORD1, EMPTY_RECORD1),
new ReaderWriter(EMPTY_RECORD1, A_INT_RECORD1),
new ReaderWriter(A_INT_RECORD1, A_INT_RECORD1),
new ReaderWriter(A_DINT_RECORD1, A_INT_RECORD1),
new ReaderWriter(A_DINT_RECORD1, A_DINT_RECORD1),
new ReaderWriter(A_INT_RECORD1, A_DINT_RECORD1),
new ReaderWriter(A_LONG_RECORD1, A_INT_RECORD1),
new ReaderWriter(A_INT_RECORD1, A_INT_B_INT_RECORD1),
new ReaderWriter(A_DINT_RECORD1, A_INT_B_INT_RECORD1),
new ReaderWriter(A_INT_B_DINT_RECORD1, A_INT_RECORD1),
new ReaderWriter(A_DINT_B_DINT_RECORD1, EMPTY_RECORD1),
new ReaderWriter(A_DINT_B_DINT_RECORD1, A_INT_RECORD1),
new ReaderWriter(A_INT_B_INT_RECORD1, A_DINT_B_DINT_RECORD1),
new ReaderWriter(INT_LIST_RECORD, INT_LIST_RECORD),
new ReaderWriter(LONG_LIST_RECORD, LONG_LIST_RECORD),
new ReaderWriter(LONG_LIST_RECORD, INT_LIST_RECORD),
new ReaderWriter(NULL_SCHEMA, NULL_SCHEMA)
);
// -----------------------------------------------------------------------------------------------
/** Collection of reader/writer schema pair that are incompatible. */
public static final List<ReaderWriter> INCOMPATIBLE_READER_WRITER_TEST_CASES = ImmutableList.of(
new ReaderWriter(NULL_SCHEMA, INT_SCHEMA),
new ReaderWriter(NULL_SCHEMA, LONG_SCHEMA),
new ReaderWriter(BOOLEAN_SCHEMA, INT_SCHEMA),
new ReaderWriter(INT_SCHEMA, NULL_SCHEMA),
new ReaderWriter(INT_SCHEMA, BOOLEAN_SCHEMA),
new ReaderWriter(INT_SCHEMA, LONG_SCHEMA),
new ReaderWriter(INT_SCHEMA, FLOAT_SCHEMA),
new ReaderWriter(INT_SCHEMA, DOUBLE_SCHEMA),
new ReaderWriter(LONG_SCHEMA, FLOAT_SCHEMA),
new ReaderWriter(LONG_SCHEMA, DOUBLE_SCHEMA),
new ReaderWriter(FLOAT_SCHEMA, DOUBLE_SCHEMA),
new ReaderWriter(STRING_SCHEMA, BOOLEAN_SCHEMA),
new ReaderWriter(STRING_SCHEMA, INT_SCHEMA),
new ReaderWriter(STRING_SCHEMA, BYTES_SCHEMA),
new ReaderWriter(BYTES_SCHEMA, NULL_SCHEMA),
new ReaderWriter(BYTES_SCHEMA, INT_SCHEMA),
new ReaderWriter(BYTES_SCHEMA, STRING_SCHEMA),
new ReaderWriter(INT_ARRAY_SCHEMA, LONG_ARRAY_SCHEMA),
new ReaderWriter(INT_MAP_SCHEMA, INT_ARRAY_SCHEMA),
new ReaderWriter(INT_ARRAY_SCHEMA, INT_MAP_SCHEMA),
new ReaderWriter(INT_MAP_SCHEMA, LONG_MAP_SCHEMA),
new ReaderWriter(ENUM1_AB_SCHEMA, ENUM1_ABC_SCHEMA),
new ReaderWriter(ENUM1_BC_SCHEMA, ENUM1_ABC_SCHEMA),
new ReaderWriter(ENUM1_AB_SCHEMA, ENUM2_AB_SCHEMA),
new ReaderWriter(INT_SCHEMA, ENUM2_AB_SCHEMA),
new ReaderWriter(ENUM2_AB_SCHEMA, INT_SCHEMA),
// Tests involving unions:
new ReaderWriter(INT_UNION_SCHEMA, INT_STRING_UNION_SCHEMA),
new ReaderWriter(STRING_UNION_SCHEMA, INT_STRING_UNION_SCHEMA),
new ReaderWriter(EMPTY_RECORD2, EMPTY_RECORD1),
new ReaderWriter(A_INT_RECORD1, EMPTY_RECORD1),
new ReaderWriter(A_INT_B_DINT_RECORD1, EMPTY_RECORD1),
new ReaderWriter(INT_LIST_RECORD, LONG_LIST_RECORD),
// Last check:
new ReaderWriter(NULL_SCHEMA, INT_SCHEMA)
);
// -----------------------------------------------------------------------------------------------
/** Tests reader/writer compatibility validation. */
@Test
public void testReaderWriterCompatibility() {
for (ReaderWriter readerWriter : COMPATIBLE_READER_WRITER_TEST_CASES) {
final Schema reader = readerWriter.getReader();
final Schema writer = readerWriter.getWriter();
LOG.debug("Testing compatibility of reader {} with writer {}.", reader, writer);
final SchemaPairCompatibility result =
AvroUtils.checkReaderWriterCompatibility(reader, writer);
assertEquals(String.format(
"Expecting reader %s to be compatible with writer %s, but tested incompatible.",
reader, writer),
SchemaCompatibilityType.COMPATIBLE, result.getType());
}
}
/** Tests the reader/writer incompatibility validation. */
@Test
public void testReaderWriterIncompatibility() {
for (ReaderWriter readerWriter : INCOMPATIBLE_READER_WRITER_TEST_CASES) {
final Schema reader = readerWriter.getReader();
final Schema writer = readerWriter.getWriter();
LOG.debug("Testing incompatibility of reader {} with writer {}.", reader, writer);
final SchemaPairCompatibility result =
AvroUtils.checkReaderWriterCompatibility(reader, writer);
assertEquals(String.format(
"Expecting reader %s to be incompatible with writer %s, but tested compatible.",
reader, writer),
SchemaCompatibilityType.INCOMPATIBLE, result.getType());
}
}
// -----------------------------------------------------------------------------------------------
/**
* Descriptor for a test case that encodes a datum according to a given writer schema,
* then decodes it according to reader schema and validates the decoded value.
*/
private static final class DecodingTestCase {
/** Writer schema used to encode the datum. */
private final Schema mWriterSchema;
/** Datum to encode according to the specified writer schema. */
private final Object mDatum;
/** Reader schema used to decode the datum encoded using the writer schema. */
private final Schema mReaderSchema;
/** Expected datum value when using the reader schema to decode from the writer schema. */
private final Object mDecodedDatum;
public DecodingTestCase(
final Schema writerSchema,
final Object datum,
final Schema readerSchema,
final Object decoded) {
mWriterSchema = writerSchema;
mDatum = datum;
mReaderSchema = readerSchema;
mDecodedDatum = decoded;
}
public Schema getReaderSchema() {
return mReaderSchema;
}
public Schema getWriterSchema() {
return mWriterSchema;
}
public Object getDatum() {
return mDatum;
}
public Object getDecodedDatum() {
return mDecodedDatum;
}
}
// -----------------------------------------------------------------------------------------------
public static final List<DecodingTestCase> DECODING_COMPATIBILITY_TEST_CASES = ImmutableList.of(
new DecodingTestCase(INT_SCHEMA, 1, INT_SCHEMA, 1),
new DecodingTestCase(INT_SCHEMA, 1, LONG_SCHEMA, 1L),
new DecodingTestCase(INT_SCHEMA, 1, FLOAT_SCHEMA, 1.0f),
new DecodingTestCase(INT_SCHEMA, 1, DOUBLE_SCHEMA, 1.0d),
// This is currently accepted but causes a precision loss:
// IEEE 754 floats have 24 bits signed mantissa
new DecodingTestCase(INT_SCHEMA, (1 << 24) + 1, FLOAT_SCHEMA, (float) ((1 << 24) + 1)),
// new DecodingTestCase(LONG_SCHEMA, 1L, INT_SCHEMA, 1), // should work in best-effort!
new DecodingTestCase(
ENUM1_AB_SCHEMA, "A",
ENUM1_ABC_SCHEMA, new EnumSymbol(ENUM1_ABC_SCHEMA, "A")),
new DecodingTestCase(
ENUM1_ABC_SCHEMA, "A",
ENUM1_AB_SCHEMA, new EnumSymbol(ENUM1_AB_SCHEMA, "A")),
new DecodingTestCase(
ENUM1_ABC_SCHEMA, "B",
ENUM1_BC_SCHEMA, new EnumSymbol(ENUM1_BC_SCHEMA, "B")),
new DecodingTestCase(
INT_STRING_UNION_SCHEMA, "the string",
STRING_SCHEMA, new Utf8("the string")),
new DecodingTestCase(
INT_STRING_UNION_SCHEMA, "the string",
STRING_UNION_SCHEMA, new Utf8("the string"))
);
/** Tests the reader/writer compatibility at decoding time. */
@Test
public void testReaderWriterDecodingCompatibility() throws Exception {
for (DecodingTestCase testCase : DECODING_COMPATIBILITY_TEST_CASES) {
final Schema readerSchema = testCase.getReaderSchema();
final Schema writerSchema = testCase.getWriterSchema();
final Object datum = testCase.getDatum();
final Object expectedDecodedDatum = testCase.getDecodedDatum();
LOG.debug(
"Testing incompatibility of reader {} with writer {}.",
readerSchema, writerSchema);
LOG.debug("Encode datum {} with writer {}.", datum, writerSchema);
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
final Encoder encoder = EncoderFactory.get().binaryEncoder(baos, null);
final DatumWriter<Object> datumWriter = new GenericDatumWriter<Object>(writerSchema);
datumWriter.write(datum, encoder);
encoder.flush();
LOG.debug(
"Decode datum {} whose writer is {} with reader {}.",
datum, writerSchema, readerSchema);
final byte[] bytes = baos.toByteArray();
final Decoder decoder = DecoderFactory.get().resolvingDecoder(
writerSchema, readerSchema,
DecoderFactory.get().binaryDecoder(bytes, null));
final DatumReader<Object> datumReader = new GenericDatumReader<Object>(readerSchema);
final Object decodedDatum = datumReader.read(null, decoder);
assertEquals(String.format(
"Expecting decoded value %s when decoding value %s whose writer schema is %s "
+ "using reader schema %s, but value was %s.",
expectedDecodedDatum, datum, writerSchema, readerSchema, decodedDatum),
expectedDecodedDatum, decodedDatum);
}
}
@Test
public void testCheckWriterCompatibility() throws Exception {
// Setup schema fields.
final List<Schema.Field> writerFields = Lists.newArrayList(
new Schema.Field("oldfield1", INT_SCHEMA, null, null),
new Schema.Field("oldfield2", STRING_SCHEMA, null, null));
final List<Schema.Field> readerFields1 = Lists.newArrayList(
new Schema.Field("oldfield1", INT_SCHEMA, null, null),
new Schema.Field("newfield1", INT_SCHEMA, null, IntNode.valueOf(42)));
final List<Schema.Field> readerFields2 = Lists.newArrayList(
new Schema.Field("oldfield1", INT_SCHEMA, null, null),
new Schema.Field("oldfield2", STRING_SCHEMA, null, null),
new Schema.Field("newfield1", INT_SCHEMA, null, IntNode.valueOf(42)));
final List<Schema.Field> readerFields3 = Lists.newArrayList(
new Schema.Field("oldfield1", INT_SCHEMA, null, null),
new Schema.Field("oldfield2", STRING_SCHEMA, null, null));
final List<Schema.Field> readerFields4 = Lists.newArrayList(
new Schema.Field("oldfield1", INT_SCHEMA, null, null),
new Schema.Field("newfield1", INT_SCHEMA, null, null));
// Setup schemas.
final Schema writer = Schema.createRecord(writerFields);
final Schema reader1 = Schema.createRecord(readerFields1);
final Schema reader2 = Schema.createRecord(readerFields2);
final Schema reader3 = Schema.createRecord(readerFields3);
final Schema reader4 = Schema.createRecord(readerFields4);
final Set<Schema> readers = Sets.newHashSet(
reader1,
reader2,
reader3,
reader4);
// Setup expectations.
final AvroUtils.SchemaPairCompatibility result1 =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.COMPATIBLE,
reader1,
writer,
AvroUtils.READER_WRITER_COMPATIBLE_MESSAGE);
final AvroUtils.SchemaPairCompatibility result2 =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.COMPATIBLE,
reader2,
writer,
AvroUtils.READER_WRITER_COMPATIBLE_MESSAGE);
final AvroUtils.SchemaPairCompatibility result3 =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.COMPATIBLE,
reader3,
writer,
AvroUtils.READER_WRITER_COMPATIBLE_MESSAGE);
final AvroUtils.SchemaPairCompatibility result4 =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.INCOMPATIBLE,
reader4,
writer,
String.format(
"Data encoded using writer schema:\n%s\n"
+ "will or may fail to decode using reader schema:\n%s\n",
writer.toString(true),
reader4.toString(true)));
// Perform the check.
final AvroUtils.SchemaSetCompatibility results = AvroUtils
.checkWriterCompatibility(readers.iterator(), writer);
// Ensure that the results contain the expected values.
assertEquals(AvroUtils.SchemaCompatibilityType.INCOMPATIBLE, results.getType());
assertTrue(results.getCauses().contains(result1));
assertTrue(results.getCauses().contains(result2));
assertTrue(results.getCauses().contains(result3));
assertTrue(results.getCauses().contains(result4));
}
@Test
public void testCheckReaderCompatibility() throws Exception {
// Setup schema fields.
final List<Schema.Field> writerFields1 = Lists.newArrayList(
new Schema.Field("oldfield1", INT_SCHEMA, null, null),
new Schema.Field("oldfield2", STRING_SCHEMA, null, null));
final List<Schema.Field> writerFields2 = Lists.newArrayList(
new Schema.Field("oldfield1", INT_SCHEMA, null, null),
new Schema.Field("oldfield2", STRING_SCHEMA, null, null),
new Schema.Field("newfield1", INT_SCHEMA, null, null));
final List<Schema.Field> writerFields3 = Lists.newArrayList(
new Schema.Field("oldfield1", INT_SCHEMA, null, null),
new Schema.Field("newfield1", INT_SCHEMA, null, null));
final List<Schema.Field> readerFields = Lists.newArrayList(
new Schema.Field("oldfield1", INT_SCHEMA, null, null),
new Schema.Field("oldfield2", STRING_SCHEMA, null, null));
// Setup schemas.
final Schema writer1 = Schema.createRecord(writerFields1);
final Schema writer2 = Schema.createRecord(writerFields2);
final Schema writer3 = Schema.createRecord(writerFields3);
final Schema reader = Schema.createRecord(readerFields);
final Set<Schema> written = Sets.newHashSet(writer1);
final Set<Schema> writers = Sets.newHashSet(writer2, writer3);
// Setup expectations.
final AvroUtils.SchemaPairCompatibility result1 =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.COMPATIBLE,
reader,
writer1,
AvroUtils.READER_WRITER_COMPATIBLE_MESSAGE);
final AvroUtils.SchemaPairCompatibility result2 =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.COMPATIBLE,
reader,
writer2,
AvroUtils.READER_WRITER_COMPATIBLE_MESSAGE);
final AvroUtils.SchemaPairCompatibility result3 =
new AvroUtils.SchemaPairCompatibility(
AvroUtils.SchemaCompatibilityType.INCOMPATIBLE,
reader,
writer3,
String.format(
"Data encoded using writer schema:\n%s\n"
+ "will or may fail to decode using reader schema:\n%s\n",
writer3.toString(true),
reader.toString(true)));
// Perform the check.
final AvroUtils.SchemaSetCompatibility results = AvroUtils
.checkReaderCompatibility(reader, Iterators.concat(written.iterator(), writers.iterator()));
// Ensure that the results contain the expected values.
assertEquals(AvroUtils.SchemaCompatibilityType.INCOMPATIBLE, results.getType());
assertTrue(results.getCauses().contains(result1));
assertTrue(results.getCauses().contains(result2));
assertTrue(results.getCauses().contains(result3));
}
@Test
public void testAvroSchemaEquals() throws IOException {
final KijiSchemaTable schemaTable = getKiji().getSchemaTable();
final long stringUID = schemaTable.getOrCreateSchemaId(STRING_SCHEMA);
final long intUID = schemaTable.getOrCreateSchemaId(INT_SCHEMA);
final String stringJSON = STRING_SCHEMA.toString();
final String intJSON = INT_SCHEMA.toString();
final AvroSchema stringUIDAS = AvroSchema.newBuilder().setUid(stringUID).build();
final AvroSchema stringJSONAS = AvroSchema.newBuilder().setJson(stringJSON).build();
final AvroSchema intUIDAS = AvroSchema.newBuilder().setUid(intUID).build();
final AvroSchema intJSONAS = AvroSchema.newBuilder().setJson(intJSON).build();
assertTrue(AvroUtils.avroSchemaEquals(schemaTable, stringUIDAS, stringUIDAS));
assertTrue(AvroUtils.avroSchemaEquals(schemaTable, stringUIDAS, stringJSONAS));
assertTrue(AvroUtils.avroSchemaEquals(schemaTable, stringJSONAS, stringUIDAS));
assertTrue(AvroUtils.avroSchemaEquals(schemaTable, intUIDAS, intUIDAS));
assertTrue(AvroUtils.avroSchemaEquals(schemaTable, intUIDAS, intJSONAS));
assertTrue(AvroUtils.avroSchemaEquals(schemaTable, intJSONAS, intUIDAS));
assertFalse(AvroUtils.avroSchemaEquals(schemaTable, stringUIDAS, intUIDAS));
assertFalse(AvroUtils.avroSchemaEquals(schemaTable, stringUIDAS, intJSONAS));
assertFalse(AvroUtils.avroSchemaEquals(schemaTable, stringJSONAS, intJSONAS));
assertFalse(AvroUtils.avroSchemaEquals(schemaTable, stringJSONAS, intUIDAS));
}
@Test
public void testAvroSchemaListContains() throws IOException {
final KijiSchemaTable schemaTable = getKiji().getSchemaTable();
final long stringUID = schemaTable.getOrCreateSchemaId(STRING_SCHEMA);
final long intUID = schemaTable.getOrCreateSchemaId(INT_SCHEMA);
final String stringJSON = STRING_SCHEMA.toString();
final String intJSON = INT_SCHEMA.toString();
final AvroSchema stringUIDAS = AvroSchema.newBuilder().setUid(stringUID).build();
final AvroSchema stringJSONAS = AvroSchema.newBuilder().setJson(stringJSON).build();
final AvroSchema intUIDAS = AvroSchema.newBuilder().setUid(intUID).build();
final AvroSchema intJSONAS = AvroSchema.newBuilder().setJson(intJSON).build();
final List<AvroSchema> stringList = Lists.newArrayList(stringJSONAS, stringUIDAS);
final List<AvroSchema> intList = Lists.newArrayList(intJSONAS, intUIDAS);
final List<AvroSchema> bothList = Lists.newArrayList(stringJSONAS, intUIDAS);
assertTrue(AvroUtils.avroSchemaCollectionContains(schemaTable, stringList, stringJSONAS));
assertTrue(AvroUtils.avroSchemaCollectionContains(schemaTable, stringList, stringUIDAS));
assertFalse(AvroUtils.avroSchemaCollectionContains(schemaTable, stringList, intUIDAS));
assertTrue(AvroUtils.avroSchemaCollectionContains(schemaTable, intList, intJSONAS));
assertTrue(AvroUtils.avroSchemaCollectionContains(schemaTable, intList, intUIDAS));
assertFalse(AvroUtils.avroSchemaCollectionContains(schemaTable, intList, stringUIDAS));
assertTrue(AvroUtils.avroSchemaCollectionContains(schemaTable, bothList, stringJSONAS));
assertTrue(AvroUtils.avroSchemaCollectionContains(schemaTable, bothList, intUIDAS));
}
}