/**
* Copyright [2012] [Datasalt Systems S.L.]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datasalt.pangool.utils.test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.ReflectionUtils;
import org.junit.Assert;
import org.junit.Before;
import com.datasalt.pangool.io.ITuple;
import com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat.TupleInputReader;
import com.datasalt.pangool.utils.HadoopUtils;
import com.datasalt.pangool.utils.Pair;
/**
* Niceties and utilities for making Hadoop unit tests less painfully.
*/
public abstract class AbstractHadoopTestLibrary extends AbstractBaseTest {
protected FileSystem fS;
protected Map<String, List<Pair<Object, Object>>> outputs = new HashMap<String, List<Pair<Object, Object>>>();
protected Map<String, SequenceFile.Writer> inputs = new HashMap<String, SequenceFile.Writer>();
@Before
public void initHadoop() throws IOException {
fS = FileSystem.get(getConf());
}
@SuppressWarnings("rawtypes")
private SequenceFile.Writer openWriter(String path, Class key, Class value) throws IOException {
return new SequenceFile.Writer(fS, getConf(), new Path(path), key, value);
}
public Writable writable(Object obj) {
if(obj instanceof Integer) {
return new IntWritable((Integer) obj);
} else if(obj instanceof Double) {
return new DoubleWritable((Double) obj);
} else if(obj instanceof Long) {
return new LongWritable((Long) obj);
} else if(obj instanceof String) {
return new Text((String) obj);
} else if(obj instanceof Float) {
return new FloatWritable((Float)obj);
} else if(obj instanceof Boolean){
return new BooleanWritable((Boolean)obj);
}
return null;
}
public void assertRun(Job job) throws IOException, InterruptedException, ClassNotFoundException {
FileSystem fs = FileSystem.get(job.getConfiguration());
HadoopUtils.deleteIfExists(fs, FileOutputFormat.getOutputPath(job));
// Close input writers first
for(Map.Entry<String, SequenceFile.Writer> entry : inputs.entrySet()) {
entry.getValue().close();
}
job.waitForCompletion(true);
Assert.assertTrue(job.isSuccessful());
}
public void cleanUp() throws IOException {
for(Map.Entry<String, SequenceFile.Writer> entry : inputs.entrySet()) {
trash(entry.getKey());
}
for(Map.Entry<String, List<Pair<Object, Object>>> entry : outputs.entrySet()) {
Path p = new Path(entry.getKey());
if(p.toString().contains("-0000")) {
p = p.getParent();
}
trash(p.toString());
}
}
protected void trash(String... folders) throws IOException {
for(String folder : folders) {
HadoopUtils.deleteIfExists(fS, new Path(folder));
}
}
protected String firstReducerOutput(String path) {
return path + "/part-r-00000";
}
protected String firstMapOutput(String path) {
return path + "/part-m-00000";
}
protected String firstReducerMultiOutput(String path, String multiOutputName) {
return path + "/" + multiOutputName + "-r-00000";
}
protected String firstMapperMultiOutput(String path, String multiOutputName) {
return path + "/" + multiOutputName + "-m-00000";
}
public AbstractHadoopTestLibrary withInput(String input, Object key, Object value) throws IOException {
SequenceFile.Writer writer = inputs.get(input);
if(writer == null) {
writer = openWriter(input, key.getClass(), value.getClass());
inputs.put(input, writer);
}
writer.append(key, value);
return this;
}
public AbstractHadoopTestLibrary withInput(String input, Object key) throws IOException {
return withInput(input, key, NullWritable.get());
}
public void withOutput(String output, Object key) throws IOException, ClassNotFoundException, InstantiationException,
IllegalAccessException {
withOutput(output, key, NullWritable.get());
}
public abstract static class TupleVisitor {
public abstract void onTuple(ITuple tuple);
}
public static class PrintVisitor extends TupleVisitor {
@Override
public void onTuple(ITuple tuple) {
System.out.println(tuple);
}
}
/*
* Read the Tuples from a TupleOutput using TupleInputReader.
*/
public static void readTuples(Path file, Configuration conf, TupleVisitor iterator) throws IOException, InterruptedException {
TupleInputReader reader = new TupleInputReader(conf);
reader.initialize(new Path(file + ""), conf);
while(reader.nextKeyValueNoSync()) {
ITuple tuple = reader.getCurrentKey();
iterator.onTuple(tuple);
}
reader.close();
}
public void withTupleOutput(String output, final ITuple expectedTuple) throws IOException, InterruptedException {
final AtomicBoolean found = new AtomicBoolean(false);
final AtomicInteger tuples = new AtomicInteger(0);
readTuples(new Path(output), new Configuration(), new TupleVisitor() {
@Override
public void onTuple(ITuple tuple) {
tuples.incrementAndGet();
if(tuple.equals(expectedTuple)) {
found.set(true);
}
}
});
if(found.get()) {
return;
}
/*
* Not found. Let's create some meaningful message
*/
if(tuples.get() == 0) {
throw new AssertionError("Empty output " + output);
}
System.err.println("Not found in output. Tuple: " + expectedTuple);
readTuples(new Path(output), new Configuration(), new TupleVisitor() {
public void onTuple(ITuple tuple) {
System.err.println("Output entry -> Tuple: " + tuple);
};
});
throw new AssertionError("Not found in output -> Tuple: " + expectedTuple);
}
public void withOutput(String output, Object key, Object value) throws IOException {
List<Pair<Object, Object>> outs = ensureOutput(output);
for(Pair<Object, Object> inOutput : outs) {
if(inOutput.getFirst().equals(key) && inOutput.getSecond().equals(value)) {
return;
}
}
/*
* Not found. Let's create some meaningful message
*/
if(outs.size() == 0) {
throw new AssertionError("Empty output " + output);
}
System.err.println("Not found in output. KEY: " + key + ", VALUE: " + value);
for(Pair<Object, Object> inOutput : outs) {
System.err.println("Output entry -> KEY: " + inOutput.getFirst() + ", VALUE: " + inOutput.getSecond());
}
throw new AssertionError("Not found in output -> KEY: " + key + ", VALUE: " + value);
}
public List<Pair<Object, Object>> ensureOutput(String output) throws IOException {
List<Pair<Object, Object>> outs = outputs.get(output);
if(outs == null) {
outs = new ArrayList<Pair<Object, Object>>();
SequenceFile.Reader reader = new SequenceFile.Reader(fS, new Path(output), getConf());
Object keyToRead, valueToRead;
keyToRead = ReflectionUtils.newInstance(reader.getKeyClass(), getConf());
valueToRead = ReflectionUtils.newInstance(reader.getValueClass(), getConf());
while(reader.next(keyToRead) != null) {
valueToRead = reader.getCurrentValue(valueToRead);
Pair<Object, Object> p = new Pair<Object, Object>(keyToRead, valueToRead);
outs.add(p);
keyToRead = ReflectionUtils.newInstance(reader.getKeyClass(), getConf());
valueToRead = ReflectionUtils.newInstance(reader.getValueClass(), getConf());
}
reader.close();
outputs.put(output, outs);
}
return outs;
}
/**
* Dumps to string the given output
*/
public void dumpOutput(String output) throws IOException {
List<Pair<Object, Object>> outs = ensureOutput(output);
for(Pair<Object, Object> inOutput : outs) {
System.out.println("KEY: " + inOutput.getFirst() + ", VALUE: " + inOutput.getSecond());
}
}
}