Package com.datasalt.pangool

Source Code of com.datasalt.pangool.BaseTest

/**
* Copyright [2012] [Datasalt Systems S.L.]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datasalt.pangool;

import com.datasalt.pangool.io.*;
import com.datasalt.pangool.io.Schema.Field;
import com.datasalt.pangool.io.Schema.Field.Type;
import com.datasalt.pangool.serialization.HadoopSerialization;
import com.datasalt.pangool.thrift.test.A;
import com.datasalt.pangool.tuplemr.Criteria.Order;
import com.datasalt.pangool.tuplemr.serialization.*;
import com.datasalt.pangool.utils.test.AbstractHadoopTestLibrary;
import org.apache.avro.generic.GenericData.Record;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.ReflectionUtils;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import static org.junit.Assert.assertEquals;

@SuppressWarnings({"rawtypes"})
public abstract class BaseTest extends AbstractHadoopTestLibrary {

  public final static Schema SCHEMA;
  public final static org.apache.avro.Schema AVRO_SCHEMA;

  static {
    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("int_field", Type.INT));
    fields.add(Field.create("string_field", Type.STRING));
    fields.add(Field.create("long_field", Type.LONG));
    fields.add(Field.create("float_field", Type.FLOAT));
    fields.add(Field.create("bytes_field", Type.BYTES));
    fields.add(Field.create("double_field", Type.DOUBLE));
    fields.add(Field.create("boolean_field", Type.BOOLEAN));
    fields.add(Field.createEnum("enum_field", Order.class));
    fields.add(Field.createObject("thrift_field", A.class));

    AVRO_SCHEMA = org.apache.avro.Schema.createRecord("MyRecordSchema", null, null, false);
    List<org.apache.avro.Schema.Field> avroFields = new ArrayList<org.apache.avro.Schema.Field>();
    avroFields.add(new org.apache.avro.Schema.Field
        ("my_int", org.apache.avro.Schema.create(org.apache.avro.Schema.Type.INT), null, null));
    avroFields.add(new org.apache.avro.Schema.Field
        ("my_string", org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING), null, null));
    AVRO_SCHEMA.setFields(avroFields);
    Field avroField = Field.createObject("my_avro", Object.class);
    avroField.setObjectSerialization(AvroFieldSerialization.class);
    avroField.addProp("avro.schema", AVRO_SCHEMA.toString());
    fields.add(avroField);
    SCHEMA = new Schema("schema", fields);
  }

  static Random random = new Random(1);

  protected static ITuple fillTuple(boolean random, ITuple tuple) {
    return fillTuple(random, tuple, 0, tuple.getSchema().getFields().size() - 1);
  }

  /**
   * Fills the fields specified by the range (minIndex, maxIndex) with random data.
   */

  protected static ITuple fillTuple(boolean isRandom, ITuple tuple, int minIndex, int maxIndex) {
    try {
      for (int i = minIndex; i <= maxIndex; i++) {
        Field field = tuple.getSchema().getField(i);
        if (field.isNullable() && random.nextDouble() < 0.2d) {
          tuple.set(i, null);
          continue;
        }
        switch (field.getType()) {
          case INT:
            tuple.set(i, isRandom ? random.nextInt() : 0);
            break;
          case LONG:
            tuple.set(i, isRandom ? random.nextLong() : 0);
            break;
          case BOOLEAN:
            tuple.set(i, isRandom ? random.nextBoolean() : false);
            break;
          case DOUBLE:
            tuple.set(i, isRandom ? random.nextDouble() : 0.0);
            break;
          case FLOAT:
            tuple.set(i, isRandom ? random.nextFloat() : 0f);
            break;
          case STRING:
            fillString(isRandom, tuple, i);
            break;
          case ENUM:
            fillEnum(isRandom, field, tuple, i);
            break;
          case OBJECT:
            fillObject(isRandom, tuple, field, i);
            break;
          case BYTES:
            fillBytes(isRandom, tuple, i);
            break;
          default:
            throw new IllegalArgumentException("Not supported type " + field.getType());
        }
      }
      return tuple;
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

  protected static void fillString(boolean isRandom, ITuple tuple, int index) {
    if (isRandom) {
      switch (random.nextInt(4)) {
        case 0:
          tuple.set(index, "");
          break;
        case 1:
          tuple.set(index, random.nextLong() + "");
          break;
        case 2:
          tuple.set(index, new Utf8(random.nextLong() + ""));
          break;
        case 3:
          tuple.set(index, new Text(random.nextLong() + ""));
          break;
      }
    } else {
      tuple.set(index, "");
    }
  }

  protected static void fillEnum(boolean isRandom, Field field, ITuple tuple, int index) throws Exception {
    Enum[] values = (Enum[]) field.getObjectClass().getEnumConstants();
    tuple.set(index, values[isRandom ? random.nextInt(values.length) : 0]);
  }

  protected static void fillBytes(boolean isRandom, ITuple tuple, int index) throws Exception {
    int BYTES_SIZE = 8;
    Object ob = tuple.get(index);
    if (ob == null || !(ob instanceof ByteBuffer)) {
      ob = ByteBuffer.allocate(BYTES_SIZE);
      tuple.set(index, ob);
    }
    ByteBuffer buffer = (ByteBuffer) ob;
    if (isRandom) {
      random.nextBytes(buffer.array());
      int newLimit = random.nextInt(Math.max(1, buffer.capacity()));
      buffer.limit(newLimit);
    } else {
      int newLimit = 0;
      buffer.limit(newLimit);
    }

  }

  protected static void fillObject(boolean isRandom, ITuple tuple, Field field, int index) {
    Object instance = tuple.get(index);
    if (field.getObjectClass() == A.class) {
      if (instance == null) {
        instance = ReflectionUtils.newInstance(field.getObjectClass(), null);
      }
      A a = (A) instance;
      a.setId(isRandom ? random.nextInt() + "" : "");
      a.setUrl(isRandom ? random.nextLong() + "" : "");
    } else if (field.getObjectSerialization() == AvroFieldSerialization.class) {
      if (instance == null || !(instance instanceof Record)) {
        instance = new Record(AVRO_SCHEMA);
      }
      Record record = (Record) instance;
      record.put("my_int", isRandom ? random.nextInt() : 0);
      record.put("my_string", isRandom ? random.nextLong() + "" : "");
    } else {
      throw new PangoolRuntimeException("Unknown field to fill");
    }
    tuple.set(index, instance);
  }


  protected static void assertSerializable(HadoopSerialization ser, ITuple tuple,
                                           boolean debug) throws IOException {
    DataInputBuffer input = new DataInputBuffer();
    DataOutputBuffer output = new DataOutputBuffer();
    DatumWrapper<ITuple> wrapper = new DatumWrapper<ITuple>(tuple);
    ser.ser(wrapper, output);

    input.reset(output.getData(), 0, output.getLength());
    DatumWrapper<ITuple> wrapper2 = new DatumWrapper<ITuple>();

    wrapper2 = ser.deser(wrapper2, input);
    if (debug) {
      System.out.println("D:" + wrapper2.datum());
    }
    assertEquals(tuple, wrapper2.datum());
  }

  protected static void assertSerializable(TupleSerialization serialization,
                                           DatumWrapper<ITuple> tuple, boolean debug) throws IOException {
    TupleSerializer ser = (TupleSerializer) serialization.getSerializer(null);
    TupleDeserializer deser = (TupleDeserializer) serialization.getDeserializer(null);
    assertSerializable(ser, deser, tuple, debug);
  }

  protected static void assertSerializable(TupleSerializer ser,
                                           TupleDeserializer deser, DatumWrapper<ITuple> tuple, boolean debug) throws IOException {
    DataOutputBuffer output = new DataOutputBuffer();
    ser.open(output);
    ser.serialize(tuple);
    ser.close();

    DataInputBuffer input = new DataInputBuffer();
    input.reset(output.getData(), 0, output.getLength());
    DatumWrapper<ITuple> deserializedTuple = new DatumWrapper<ITuple>();

    deser.open(input);
    deserializedTuple = deser.deserialize(deserializedTuple);
    deser.close();

    if (debug) {
      System.out.println("D:" + deserializedTuple.datum());
    }
    assertEquals(tuple.datum(), deserializedTuple.datum());
  }

  protected void assertSerializable(Tuple tuple, boolean debug) throws IOException {
    assertSerializable(tuple, null, debug);
  }

  protected void assertSerializable(Tuple tuple, Tuple toReuse, boolean debug) throws IOException {
    HadoopSerialization hadoopSer = new HadoopSerialization(getConf());
    SimpleTupleSerializer ser = new SimpleTupleSerializer(hadoopSer);
    SimpleTupleDeserializer deser = new SimpleTupleDeserializer(tuple.getSchema(), hadoopSer, getConf());

    DataOutputBuffer output = new DataOutputBuffer();
    ser.open(output);
    ser.serialize(tuple);
    ser.close();

    DataInputBuffer input = new DataInputBuffer();
    input.reset(output.getData(), 0, output.getLength());
    ITuple deserializedTuple = null;

    deser.open(input);
    deserializedTuple = deser.deserialize(null);
    deser.close();

    if (debug) {
      System.out.println("D:" + tuple);
    }
    assertEquals(tuple, deserializedTuple);
  }


  /**
   * Modify randomly the nullable property of each field in the schema,
   * generating a new schema with the same name.
   */
  protected static Schema decorateWithNullables(Schema schema) {
    List<Field> fields = schema.getFields();
    ArrayList<Field> newFields = new ArrayList<Field>();
    for (Field field : fields) {
      if (random.nextDouble() < 0.30d) {
        newFields.add(Field.cloneField(field, field.getName(), true));
      } else {
        newFields.add(Field.cloneField(field, field.getName(), false));
      }
    }
    return new Schema(schema.getName(), newFields);
  }

}
TOP

Related Classes of com.datasalt.pangool.BaseTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.