package com.splout.db.hadoop;
/*
* #%L
* Splout SQL Hadoop library
* %%
* Copyright (C) 2012 Datasalt Systems S.L.
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.datasalt.pangool.io.ITuple;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.Schema.Field;
import com.datasalt.pangool.io.Tuple;
import com.datasalt.pangool.tuplemr.Criteria.Order;
import com.datasalt.pangool.tuplemr.Criteria.SortElement;
import com.datasalt.pangool.tuplemr.*;
import com.splout.db.common.JSONSerDe;
import com.splout.db.common.JSONSerDe.JSONSerDeException;
import com.splout.db.common.PartitionEntry;
import com.splout.db.common.PartitionMap;
import com.splout.db.common.Tablespace;
import com.splout.db.engine.OutputFormatFactory;
import com.splout.db.hadoop.TupleSampler.TupleSamplerException;
import com.splout.db.hadoop.engine.SQLite4JavaOutputFormat;
import com.splout.db.hadoop.engine.SploutSQLOutputFormat;
import com.splout.db.hadoop.engine.SploutSQLOutputFormat.SploutSQLOutputFormatException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.mortbay.log.Log;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
/**
* A process that generates the SQL data stores needed for deploying a tablespace in Splout, giving a file set table
* specification as input.
* <p/>
* The input to this process will be:
* <ul>
* <li>The {@link Tablespace} specification.</li>
* <li>The Hadoop output Path</li>
* </ul>
* The output of the process is a Splout deployable path with a {@link PartitionMap} . The format of the output is:
* outputPath + / + {@link #OUT_PARTITION_MAP} for the partition map, outputPath + / + {@link #OUT_SAMPLED_INPUT} for
* the list of sampled keys and outputPath + / + {@link #OUT_STORE} for the folder containing the generated SQL store.
* outputPath + / + {@link #OUT_ENGINE} is a file with the {@link SploutEngine} id used to generate the tablespace.
* <p/>
* For creating the store we first sample the input dataset with {@link TupleSampler} and then execute a Hadoop job that
* distributes the data accordingly. The Hadoop job will use {@link SQLite4JavaOutputFormat}.
*/
@SuppressWarnings({"serial", "rawtypes"})
public class TablespaceGenerator implements Serializable {
public static class TablespaceGeneratorException extends Exception {
public TablespaceGeneratorException() {
super();
}
public TablespaceGeneratorException(String message, Throwable cause) {
super(message, cause);
}
public TablespaceGeneratorException(String message) {
super(message);
}
public TablespaceGeneratorException(Throwable cause) {
super(cause);
}
}
// --- Input parameters --- //
private transient final Path outputPath;
protected transient final TablespaceSpec tablespace;
// Number of SQL statements to execute before a COMMIT
private int batchSize = 1000000;
protected PartitionMap partitionMap;
private TupleReducer<ITuple, NullWritable> customReducer = null;
// will be used to set the JarByClass
private Class callingClass;
public TablespaceGenerator(TablespaceSpec tablespace, Path outputPath, Class callingClass) {
this.tablespace = tablespace;
this.outputPath = outputPath;
this.callingClass = callingClass;
}
public final static String OUT_SAMPLED_INPUT = "sampled-input";
public final static String OUT_SAMPLED_INPUT_SORTED = "sampled-input-sorted";
public final static String OUT_PARTITION_MAP = "partition-map";
public final static String OUT_INIT_STATEMENTS = "init-statements";
public final static String OUT_STORE = "store";
public final static String OUT_ENGINE = "engine";
/**
* This is the public method which has to be called when using this class as an API. Business logic has been split in
* various protected functions to ease understading of it and also to be able to subclass this easily to extend its
* functionality.
*/
public void generateView(Configuration conf, TupleSampler.SamplingType samplingType,
TupleSampler.SamplingOptions samplingOptions) throws Exception {
prepareOutput(conf);
final int nPartitions = tablespace.getnPartitions();
if (nPartitions > 1) {
partitionMap = sample(nPartitions, conf, samplingType, samplingOptions);
} else {
partitionMap = PartitionMap.oneShardOpenedMap();
}
Log.info("Calculated partition map: " + partitionMap);
writeOutputMetadata(conf);
TupleMRBuilder builder = createMRBuilder(nPartitions, conf);
executeViewGeneration(builder);
}
// ------------------------------- //
protected void prepareOutput(Configuration conf) throws IOException {
FileSystem fileSystem = outputPath.getFileSystem(conf);
fileSystem.mkdirs(outputPath);
}
/**
* Write the partition map and other metadata to the output folder. They will be needed for deploying the dataset to
* Splout.
*/
protected void writeOutputMetadata(Configuration conf) throws IOException, JSONSerDeException {
FileSystem fileSystem = outputPath.getFileSystem(conf);
// Write the Partition map
Path partitionMapPath = new Path(outputPath, OUT_PARTITION_MAP);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fileSystem.create(
partitionMapPath, true)));
writer.write(JSONSerDe.ser(partitionMap));
writer.close();
// Write init statements, if applicable
if (tablespace.getInitStatements() != null) {
Path initStatementsPath = new Path(outputPath, OUT_INIT_STATEMENTS);
writer = new BufferedWriter(new OutputStreamWriter(fileSystem.create(initStatementsPath, true)));
writer.write(JSONSerDe.ser(tablespace.getInitStatements()));
writer.close();
}
// Write the Engine ID so we know what we are deploying exactly afterwards
Path enginePath = new Path(outputPath, OUT_ENGINE);
writer = new BufferedWriter(new OutputStreamWriter(fileSystem.create(enginePath, true)));
writer.write(tablespace.getEngine().getClass().getName());
writer.close();
}
/**
* Returns the partition key either by using partition-by-fields or partition-by-javascript as configured in the Table
* Spec.
*/
protected static String getPartitionByKey(ITuple tuple, TableSpec tableSpec, JavascriptEngine jsEngine)
throws Throwable {
String strKey = "";
if (tableSpec.getPartitionFields() != null) {
for (Field partitionField : tableSpec.getPartitionFields()) {
Object obj = tuple.get(partitionField.getName());
if (obj == null) {
strKey += "";
} else {
strKey += obj.toString();
}
}
} else {
// use JavaScript
strKey = jsEngine.execute("partition", tuple);
if (strKey == null) {
strKey = "";
}
}
return strKey;
}
/**
* Samples the input, if needed.
*/
protected PartitionMap sample(int nPartitions, Configuration conf,
TupleSampler.SamplingType samplingType, TupleSampler.SamplingOptions samplingOptions)
throws TupleSamplerException, IOException {
FileSystem fileSystem = outputPath.getFileSystem(conf);
// Number of records to sample
long recordsToSample = conf.getLong("splout.sampling.records.to.sample", 100000);
// The sampler will generate a file with samples to use to create the partition map
Path sampledInput = new Path(outputPath, OUT_SAMPLED_INPUT);
Path sampledInputSorted = new Path(outputPath, OUT_SAMPLED_INPUT_SORTED);
TupleSampler sampler = new TupleSampler(samplingType, samplingOptions, callingClass);
long retrivedSamples = sampler.sample(tablespace, conf, recordsToSample, sampledInput);
// 1.1 Sorting sampled keys on disk
fileSystem.delete(sampledInputSorted, true);
SequenceFile.Sorter sorter = new SequenceFile.Sorter(fileSystem, Text.class, NullWritable.class, conf);
sorter.sort(sampledInput, sampledInputSorted);
// Start the reader
final SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, sampledInputSorted, conf);
Log.info(retrivedSamples + " total keys sampled.");
/*
* 2: Calculate partition map
*/
Nextable nextable = new Nextable() {
@Override
public boolean next(Writable writable) throws IOException {
return reader.next(writable);
}
};
List<PartitionEntry> partitionEntries = calculatePartitions(nPartitions, retrivedSamples, nextable);
reader.close();
fileSystem.delete(sampledInput, true);
fileSystem.delete(sampledInputSorted, true);
// 2.2 Create the partition map
return new PartitionMap(partitionEntries);
}
// Useful for unit testing
public static interface Nextable {
public boolean next(Writable wriatable) throws IOException;
}
/**
* Calculates the partitions given a sample. The following policy is followed:
* <ul>
* <li>Trying to create evenly sized partitions</li>
* <li>No empty partitions allowed. Every partition must contain at least one key</li>
* <li>Number of retrieved partitions could be smaller than the requested amount</li>
* </ul>
*/
static List<PartitionEntry> calculatePartitions(int nPartitions, long retrivedSamples, Nextable reader) throws IOException {
// 2.1 Select "n" keys evenly distributed
Text key = new Text();
List<PartitionEntry> partitionEntries = new ArrayList<PartitionEntry>();
int offset = Math.max(1, (int) (retrivedSamples / nPartitions));
String min = null;
int rowPointer = 0;
boolean wereMore = true;
String previousKey = null;
String currentKey = null;
String candidateToLastPartitionMin = null;
boolean foundDistinctKey = false;
for (int i = 1; i <= nPartitions; i++) {
PartitionEntry entry = new PartitionEntry();
if (min != null) {
entry.setMin(min);
}
int keyIndex = i * offset;
foundDistinctKey = false;
do {
wereMore = reader.next(key);
if (wereMore) {
rowPointer++;
// Logic for knowing which is currentKey and previousKey
if (!equalsWithNulls(key.toString(), currentKey)) {
foundDistinctKey = true;
previousKey = currentKey;
currentKey = key.toString();
}
}
// Keep iterating until we have advanced enough and we have find a different key.
} while (wereMore && (rowPointer < keyIndex || !foundDistinctKey));
// If we are sure there are at least one partition more
// we store the possible candidate to last partition min.
if (wereMore && i<nPartitions) {
candidateToLastPartitionMin = previousKey;
}
entry.setMax(key.toString());
min = key.toString();
entry.setShard(i-1); // Shard are 0-indexed
partitionEntries.add(entry);
// No more rows to consume. No more partitions to build.
if (!wereMore) {
break;
}
}
int generatedPartitions = partitionEntries.size();
// Last range must be opened
partitionEntries.get(generatedPartitions-1).setMax(null);
// Especial case. We want to ensure that every partition contains at least
// one entry. Given than ranges are (,] that is ensured for every partition
// but for the latest. We can ensure that the latest partition is not empty
// if it has more sample keys after the latest min. That is, if
// foundDistinctKey is true. Otherwise, we have to adjust:
// We are going to try to adjust latest partition min
// to a key before the selected min. If that is not possible, we merge
// the latest to partitions. That solves the problem.
if (!foundDistinctKey && partitionEntries.size() > 1) {
PartitionEntry previous = partitionEntries.get(generatedPartitions - 2);
PartitionEntry latest = partitionEntries.get(generatedPartitions - 1);
// if previous.getMin() < candidateToLastPartitionMin
// it is possible to adjust the latest two partitions
if (compareWithNulls(previous.getMin(), candidateToLastPartitionMin) < 0) {
previous.setMax(candidateToLastPartitionMin);
latest.setMin(candidateToLastPartitionMin);
} else {
// Was not possible to adjust. Merging two latest partitions.
previous.setMax(null);
partitionEntries.remove(generatedPartitions - 1);
}
}
return partitionEntries;
}
static int compareWithNulls(String a, String b) {
if (a == null) {
return (b == null ? 0 : -1);
} else if (b == null) {
return (a == null) ? 0 : 1;
} else {
return a.compareTo(b);
}
}
/**
* Create TupleMRBuilder for launching generation Job.
*/
protected TupleMRBuilder createMRBuilder(final int nPartitions, Configuration conf)
throws TupleMRException, SploutSQLOutputFormatException {
TupleMRBuilder builder = new TupleMRBuilder(conf, "Splout generating " + outputPath);
List<TableSpec> tableSpecs = new ArrayList<TableSpec>();
// For each Table we add an intermediate Pangool schema
int schemaCounter = 0;
for (Table table : tablespace.getPartitionedTables()) {
List<Field> fields = new ArrayList<Field>();
fields.addAll(table.getTableSpec().getSchema().getFields());
fields.add(SploutSQLOutputFormat.getPartitionField());
final Schema tableSchema = new Schema(table.getTableSpec().getSchema().getName(), fields);
final TableSpec tableSpec = table.getTableSpec();
schemaCounter++;
builder.addIntermediateSchema(NullableSchema.nullableSchema(tableSchema));
// For each input file for the Table we add an input and a TupleMapper
for (TableInput inputFile : table.getFiles()) {
final RecordProcessor recordProcessor = inputFile.getRecordProcessor();
for (Path path : inputFile.getPaths()) {
builder.addInput(path, inputFile.getFormat(), new TupleMapper<ITuple, NullWritable>() {
Tuple tableTuple = new Tuple(tableSchema);
JavascriptEngine jsEngine = null;
CounterInterface counterInterface = null;
@Override
public void map(ITuple fileTuple, NullWritable value, TupleMRContext context,
Collector collector) throws IOException, InterruptedException {
if (counterInterface == null) {
counterInterface = new CounterInterface(context.getHadoopContext());
}
// Initialize JavaScript engine if needed
if (jsEngine == null && tableSpec.getPartitionByJavaScript() != null) {
try {
jsEngine = new JavascriptEngine(tableSpec.getPartitionByJavaScript());
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
// For each input Tuple from this File execute the RecordProcessor
// The Default IdentityRecordProcessor just bypasses the same Tuple
ITuple processedTuple = null;
try {
processedTuple = recordProcessor.process(fileTuple, counterInterface);
} catch (Throwable e1) {
throw new RuntimeException(e1);
}
if (processedTuple == null) {
// The tuple has been filtered out by the user
return;
}
// Get the partition Id from this record
String strKey = "";
try {
strKey = getPartitionByKey(processedTuple, tableSpec, jsEngine);
} catch (Throwable e) {
throw new RuntimeException(e);
}
int shardId = partitionMap.findPartition(strKey);
if (shardId == -1) {
throw new RuntimeException(
"shard id = -1 must be some sort of software bug. This shouldn't happen if PartitionMap is complete.");
}
// Finally write it to the Hadoop output
for (Field field : processedTuple.getSchema().getFields()) {
tableTuple.set(field.getName(), processedTuple.get(field.getName()));
}
tableTuple.set(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, shardId);
collector.write(tableTuple);
}
}, inputFile.getSpecificHadoopInputFormatContext());
}
}
tableSpecs.add(table.getTableSpec());
}
// We do the same for the replicated tables but the Mapper logic will be different
// We will send the data to all the partitions
for (final Table table : tablespace.getReplicateAllTables()) {
List<Field> fields = new ArrayList<Field>();
fields.addAll(table.getTableSpec().getSchema().getFields());
fields.add(SploutSQLOutputFormat.getPartitionField());
final Schema tableSchema = new Schema(table.getTableSpec().getSchema().getName(), fields);
schemaCounter++;
builder.addIntermediateSchema(NullableSchema.nullableSchema(tableSchema));
// For each input file for the Table we add an input and a TupleMapper
for (TableInput inputFile : table.getFiles()) {
final RecordProcessor recordProcessor = inputFile.getRecordProcessor();
for (Path path : inputFile.getPaths()) {
builder.addInput(path, inputFile.getFormat(), new TupleMapper<ITuple, NullWritable>() {
Tuple tableTuple = new Tuple(tableSchema);
CounterInterface counterInterface = null;
@Override
public void map(ITuple key, NullWritable value, TupleMRContext context, Collector collector)
throws IOException, InterruptedException {
if (counterInterface == null) {
counterInterface = new CounterInterface(context.getHadoopContext());
}
// For each input Tuple from this File execute the RecordProcessor
// The Default IdentityRecordProcessor just bypasses the same Tuple
ITuple processedTuple = null;
try {
processedTuple = recordProcessor.process(key, counterInterface);
} catch (Throwable e1) {
throw new RuntimeException(e1);
}
if (processedTuple == null) {
// The tuple has been filtered out by the user
return;
}
// Finally write it to the Hadoop output
for (Field field : processedTuple.getSchema().getFields()) {
tableTuple.set(field.getName(), processedTuple.get(field.getName()));
}
// Send the data of the replicated table to all partitions!
for (int i = 0; i < nPartitions; i++) {
tableTuple.set(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, i);
collector.write(tableTuple);
}
}
}, inputFile.getSpecificHadoopInputFormatContext());
}
}
tableSpecs.add(table.getTableSpec());
}
// Group by partition
builder.setGroupByFields(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD);
if (schemaCounter == 1) {
OrderBy orderBy = new OrderBy();
orderBy.add(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, Order.ASC);
// The only table we have, check if it has specific order by
OrderBy specificOrderBy = tablespace.getPartitionedTables().get(0).getTableSpec()
.getInsertionOrderBy();
if (specificOrderBy != null) {
for (SortElement elem : specificOrderBy.getElements()) {
orderBy.add(elem.getName(), elem.getOrder());
}
}
builder.setOrderBy(orderBy);
} else { // > 1
// More than one schema: set common order by
builder.setOrderBy(OrderBy.parse(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD + ":asc")
.addSchemaOrder(Order.ASC));
// And then as many particular order bys as needed - ....
for (Table partitionedTable : tablespace.getPartitionedTables()) {
if (partitionedTable.getTableSpec().getInsertionOrderBy() != null) {
builder.setSpecificOrderBy(partitionedTable.getTableSpec().getSchema().getName(),
partitionedTable.getTableSpec().getInsertionOrderBy());
}
}
for (Table replicatedTable : tablespace.getReplicateAllTables()) {
if (replicatedTable.getTableSpec().getInsertionOrderBy() != null) {
builder.setSpecificOrderBy(replicatedTable.getTableSpec().getSchema().getName(),
replicatedTable.getTableSpec().getInsertionOrderBy());
}
}
}
if (customReducer == null) {
builder.setTupleReducer(new IdentityTupleReducer());
} else {
builder.setTupleReducer(customReducer);
}
builder.setJarByClass(callingClass);
// Define the output format
TableSpec[] tbls = tableSpecs.toArray(new TableSpec[0]);
OutputFormat outputFormat = null;
try {
outputFormat = OutputFormatFactory.getOutputFormat(tablespace.getEngine(), batchSize, tbls);
} catch (Exception e) {
System.err.println(e);
throw new RuntimeException(e);
}
builder.setOutput(new Path(outputPath, OUT_STORE), outputFormat, ITuple.class, NullWritable.class);
// #reducers = #partitions by default
builder.getConf().setInt("mapred.reduce.tasks", nPartitions);
return builder;
}
protected void executeViewGeneration(TupleMRBuilder builder) throws IOException, InterruptedException,
ClassNotFoundException, TablespaceGeneratorException, TupleMRException {
try {
Job generationJob = builder.createJob();
long start = System.currentTimeMillis();
generationJob.waitForCompletion(true);
if (!generationJob.isSuccessful()) {
throw new TablespaceGeneratorException("Error executing generation Job");
}
long end = System.currentTimeMillis();
Log.info("Tablespace store generated in " + (end - start) + " ms.");
} finally {
builder.cleanUpInstanceFiles();
}
}
// Package-access, to be used for unit testing
void setCustomReducer(TupleReducer<ITuple, NullWritable> customReducer) {
this.customReducer = customReducer;
}
/**
* Returns the generated {@link PartitionMap}. It is also written to the HDFS. This is mainly used for testing.
*/
public PartitionMap getPartitionMap() {
return partitionMap;
}
public int getBatchSize() {
return batchSize;
}
public void setBatchSize(int batchSize) {
this.batchSize = batchSize;
}
private static final boolean equalsWithNulls(Object a, Object b) {
if (a == b) return true;
if ((a == null) || (b == null)) return false;
return a.equals(b);
}
}