Package com.splout.db.hadoop

Source Code of com.splout.db.hadoop.TestTablespaceGenerator

package com.splout.db.hadoop;

/*
* #%L
* Splout SQL Hadoop library
* %%
* Copyright (C) 2012 Datasalt Systems S.L.
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.codehaus.jackson.map.ObjectMapper;
import org.junit.Test;

import com.datasalt.pangool.io.Fields;
import com.datasalt.pangool.io.ITuple;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.Tuple;
import com.datasalt.pangool.io.TupleFile;
import com.datasalt.pangool.io.Utf8;
import com.datasalt.pangool.tuplemr.Criteria;
import com.datasalt.pangool.tuplemr.OrderBy;
import com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat;
import com.datasalt.pangool.utils.test.AbstractHadoopTestLibrary;
import com.google.common.io.Files;
import com.splout.db.common.PartitionEntry;
import com.splout.db.engine.DefaultEngine;
import com.splout.db.engine.SQLite4JavaClient;
import com.splout.db.hadoop.TupleSampler.SamplingType;

@SuppressWarnings({ "rawtypes", "serial" })
public class TestTablespaceGenerator extends AbstractHadoopTestLibrary implements Serializable  {

  public final static String INPUT  = "in-"  + TestTablespaceGenerator.class.getName();
  public final static String OUTPUT = "out-" + TestTablespaceGenerator.class.getName();
  static Schema theSchema1 = new Schema("schema1", Fields.parse("id:string, value:string"));
  static Schema theSchema2 = new Schema("schema2", Fields.parse("id:string, value:string, intValue:int, doubleValue:double, strValue:string"));


  @Test
  public void simpleTest() throws Exception {
    trash(INPUT, OUTPUT);

    Configuration conf = new Configuration();
    TupleFile.Writer writer = new TupleFile.Writer(FileSystem.get(conf), conf, new Path(INPUT), theSchema1);

    writer.append(getTuple("id1", "value12"));
    writer.append(getTuple("id1", "value11"));
    writer.append(getTuple("id1", "value13"));
    writer.append(getTuple("id1", "value14"));

    writer.append(getTuple("id2", "value21"));
    writer.append(getTuple("id2", "value22"));
    writer.append(getTuple("id3", "value31"));
    writer.append(getTuple("id4", "value41"));

    writer.append(getTuple("id5", "value51"));
    writer.append(getTuple("id5", "value52"));
    writer.append(getTuple("id6", "value53"));
    writer.append(getTuple("id6", "value54"));
   
    writer.append(getTuple("id7", "value55"));
    writer.append(getTuple("id7", "value56"));
    writer.append(getTuple("id8", "value61"));
    writer.append(getTuple("id8", "value62"));
   
    writer.close();
   
    TablespaceSpec tablespace = TablespaceSpec.of(theSchema1, "id", new Path(INPUT), new TupleInputFormat()4);
    TablespaceGenerator viewGenerator = new TablespaceGenerator(tablespace, new Path(OUTPUT), this.getClass());
    viewGenerator.generateView(conf, SamplingType.FULL_SCAN, new TupleSampler.RandomSamplingOptions());
   
    List<PartitionEntry> partitionMap = viewGenerator.getPartitionMap().getPartitionEntries();
    assertEquals(4, partitionMap.size());
   
    assertEquals(null, partitionMap.get(0).getMin());
    assertEquals("id1", partitionMap.get(0).getMax());
    assertEquals(0, (int) partitionMap.get(0).getShard());
   
    assertEquals("id1", partitionMap.get(1).getMin());
    assertEquals("id4", partitionMap.get(1).getMax());
    assertEquals(1, (int) partitionMap.get(1).getShard());

    assertEquals("id4", partitionMap.get(2).getMin());
    assertEquals("id6", partitionMap.get(2).getMax());
    assertEquals(2, (int) partitionMap.get(2).getShard());

    assertEquals("id6", partitionMap.get(3).getMin());
    assertEquals(null, partitionMap.get(3).getMax());
    assertEquals(3, (int) partitionMap.get(3).getShard());

    // assert the engine id has been written too
    File engineIdFile = new File(OUTPUT + "/" + TablespaceGenerator.OUT_ENGINE);
    assertTrue(engineIdFile.exists());
    assertEquals(DefaultEngine.class.getName(), Files.toString(engineIdFile, Charset.defaultCharset()));
   
    trash(INPUT, OUTPUT);
  }
 
  @Test
  public void testAcceptNullValues() throws Exception {
    trash(INPUT, OUTPUT);

    Configuration conf = new Configuration();
    TupleFile.Writer writer = new TupleFile.Writer(FileSystem.get(conf), conf, new Path(INPUT), NullableSchema.nullableSchema(theSchema2));

    writer.append(getTupleWithNulls("id1", "value11", null, -1.0, null));
    writer.append(getTupleWithNulls("id2", "value12", null, null, "Hello"));
    writer.append(getTupleWithNulls("id3", "value13", 100, null, "Hello"));
    writer.append(getTupleWithNulls("id4", "value14", 100, 2.0, ""));
    writer.append(getTupleWithNulls("id5", "value15", 100, 2.0, null));
   
    writer.close();
   
    TablespaceSpec tablespace = TablespaceSpec.of(theSchema2, "id", new Path(INPUT), new TupleInputFormat(), 1);
    TablespaceGenerator viewGenerator = new TablespaceGenerator(tablespace, new Path(OUTPUT), this.getClass());
    viewGenerator.generateView(conf, SamplingType.FULL_SCAN, new TupleSampler.RandomSamplingOptions());
   
    SQLite4JavaClient manager = new SQLite4JavaClient(OUTPUT + "/store/0.db", null);
    String results = manager.query("SELECT * FROM schema2;", 100);
    assertTrue(results.contains("null"));

    assertNull(searchRow(results, "id", "id1").get("intValue"));
    assertEquals(-1.0, searchRow(results, "id", "id1").get("doubleValue"));
    assertNull(searchRow(results, "id", "id1").get("strValue"));

    assertNull(searchRow(results, "id", "id2").get("intValue"));
    assertNull(searchRow(results, "id", "id2").get("doubleValue"));
    assertEquals("Hello", searchRow(results, "id", "id2").get("strValue"));

    assertEquals(100, searchRow(results, "id", "id3").get("intValue"));
    assertNull(searchRow(results, "id", "id3").get("doubleValue"));
    assertEquals("Hello", searchRow(results, "id", "id3").get("strValue"));

    assertEquals(100, searchRow(results, "id", "id4").get("intValue"));
    assertEquals(2.0, searchRow(results, "id", "id4").get("doubleValue"));
    assertEquals("", searchRow(results, "id", "id4").get("strValue"));

    assertEquals(100, searchRow(results, "id", "id5").get("intValue"));
    assertEquals(2.0, searchRow(results, "id", "id5").get("doubleValue"));
    assertNull(searchRow(results, "id", "id5").get("strValue"));

    trash(INPUT, OUTPUT);
  }

  @Test
  public void testRecordProcessor() throws Exception {
    testRecordProcessor(false);
  }

  @Test
  public void testRecordProcessorReplicateAll() throws Exception {
    testRecordProcessor(true);
  }

  public void testRecordProcessor(boolean replicateAll) throws Exception {
    int TUPLES_TO_GENERATE = 10;

    trash(INPUT, INPUT + 2, OUTPUT);

    Configuration conf = new Configuration();
    TupleFile.Writer writer = new TupleFile.Writer(FileSystem.get(conf), conf, new Path(INPUT),
        NullableSchema.nullableSchema(theSchema1));

    for(int i=0; i<TUPLES_TO_GENERATE; i++) {
      writer.append(getTuple("id" + i, "str" + i));
    }

    writer.close();

    // Dummy table.
    writer = new TupleFile.Writer(FileSystem.get(conf), conf, new Path(INPUT+2),
        NullableSchema.nullableSchema(theSchema1));
    writer.append(getTuple("dummy", "dummy"));
    writer.close();

    TablespaceBuilder builder = new TablespaceBuilder();
    TableBuilder tBuilder = new TableBuilder(theSchema1);
    tBuilder.addTupleFile(new Path(INPUT), new RecordProcessor() {
      @Override
      public ITuple process(ITuple record, CounterInterface context) throws Throwable {
        context.getCounter("counter", "counter").increment(1);
        ((Utf8) record.get("id")).set(record.get("id") + "mod");
        ((Utf8) record.get("value")).set(record.get("value")+"mod");
        return record;
      }
    });
    if (replicateAll) {
      tBuilder.replicateToAll();
    } else {
      tBuilder.partitionBy("id");
    }
    tBuilder.insertionSortOrder(new OrderBy().add("id", Criteria.Order.ASC));
    builder.add(tBuilder.build());
    builder.setNPartitions(1);

    // Dummy tabled added only because at least one table with partition must be present
    // in the tablespace.
    tBuilder = new TableBuilder(new Schema("dummy", theSchema1.getFields()));
    tBuilder.addTupleFile(new Path(INPUT + 2));
    tBuilder.partitionBy("id");
    builder.add(tBuilder.build());

    TablespaceGenerator viewGenerator = new TablespaceGenerator(builder.build(), new Path(OUTPUT), this.getClass());
    viewGenerator.generateView(conf, SamplingType.FULL_SCAN, new TupleSampler.RandomSamplingOptions());

    SQLite4JavaClient manager = new SQLite4JavaClient(OUTPUT + "/store/0.db", null);
    String results = manager.query("SELECT * FROM schema1;", TUPLES_TO_GENERATE+1);

    System.out.println(results);
    for(int i=0; i<TUPLES_TO_GENERATE; i++) {
      assertEquals("id" + i + "mod", getVal(results, i, "id"));
      assertEquals("str" + i + "mod", getVal(results, i, "value"));
    }

    trash(INPUT, INPUT + 2, OUTPUT);
  }

  public static Object resultSize(String result) throws IOException {
    ObjectMapper mapper = new ObjectMapper();
    return mapper.readValue(result, List.class).size();
  }

  public static Object getVal(String result, int row, String field) throws IOException {
    ObjectMapper mapper = new ObjectMapper();
    return ((Map)mapper.readValue(result, List.class).get(row)).get(field);
  }

  public static Map searchRow(String result, String field, Object value) throws IOException {
    ObjectMapper mapper = new ObjectMapper();
    for (Object o : mapper.readValue(result, List.class)) {
      Map m = (Map) o;
      if (value.equals(m.get(field))) {
        return m;
      }
    }
    return null;
  }

  public static ITuple getTupleWithNulls(String id, String value, Integer intValue, Double doubleValue, String strValue) {
    ITuple tuple = new Tuple(theSchema2);
    tuple.set("id", id);
    tuple.set("value", value);
    tuple.set("intValue", intValue);
    tuple.set("doubleValue", doubleValue);
    tuple.set("strValue", strValue);
    return tuple;
  }
 
  public static ITuple getTuple(String id, String value) {
    ITuple tuple = new Tuple(theSchema1);
    tuple.set("id", id);
    tuple.set("value", value);
    return tuple;
  }
}
TOP

Related Classes of com.splout.db.hadoop.TestTablespaceGenerator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.