package org.apache.hadoop.io.simpleseekableformat;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Random;
import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.io.simpleseekableformat.SimpleSeekableFormat;
import org.apache.hadoop.io.simpleseekableformat.SimpleSeekableFormatInputStream;
import org.apache.hadoop.io.simpleseekableformat.SimpleSeekableFormatOutputStream;
/**
* TestCase for {@link SimpleSeekableFormatInputStream} and {@link SimpleSeekableFormatOutputStream}
*/
public class TestSimpleSeekableFormatStreams extends TestCase {
public void testNormalWriteAndRead() throws Exception {
testNormalWriteAndRead(null);
testNormalWriteAndRead(GzipCodec.class);
}
void testNormalWriteAndRead(final Class<? extends CompressionCodec> codecClass
) throws Exception {
// Not using loops here so we can know the exact parameter values from
// the stack trace.
(new NormalWriteAndReadTester(codecClass, 1, 65536)).run();
(new NormalWriteAndReadTester(codecClass, 200, 16384)).run();
(new NormalWriteAndReadTester(codecClass, 1000, 4096)).run();
}
/**
* Test the seekForward function.
*/
public void testNormalWriteAndForwardRead() throws Exception {
testNormalWriteAndForwardRead(null, false);
testNormalWriteAndForwardRead(GzipCodec.class, false);
// useFileSystem = true, for testing seek using Seekable
testNormalWriteAndForwardRead(null, true);
testNormalWriteAndForwardRead(GzipCodec.class, true);
}
void testNormalWriteAndForwardRead(final Class<? extends CompressionCodec> codecClass,
boolean useFileSystem) throws Exception {
(new NormalWriteAndForwardReadTester(codecClass, 1000, 16384, 10, useFileSystem)).run();
(new NormalWriteAndForwardReadTester(codecClass, 1000, 16384, 1024, useFileSystem)).run();
(new NormalWriteAndForwardReadTester(codecClass, 1000, 16384, 500 * 1000, useFileSystem)).run();
(new NormalWriteAndForwardReadTester(codecClass, 1000, 16384, 3 * 1024 * 1024 - 100, useFileSystem)).run();
(new NormalWriteAndForwardReadTester(codecClass, 1000, 16384, 3 * 1024 * 1024, useFileSystem)).run();
(new NormalWriteAndForwardReadTester(codecClass, 1000, 16384, 3 * 1024 * 1024 + 100, useFileSystem)).run();
(new NormalWriteAndForwardReadTester(codecClass, 1000, 16384, 3 * 1024 * 1024 + 1024, useFileSystem)).run();
}
/**
* Test the seekForward function with truncated files.
*/
public void testTruncatedWriteAndForwardRead() throws Exception {
testTruncatedWriteAndForwardRead(null, false);
testTruncatedWriteAndForwardRead(GzipCodec.class, false);
// useFileSystem = true, for testing seek using Seekable
testTruncatedWriteAndForwardRead(null, true);
testTruncatedWriteAndForwardRead(GzipCodec.class, true);
}
void testTruncatedWriteAndForwardRead(final Class<? extends CompressionCodec> codecClass,
boolean useFileSystem) throws Exception {
(new TruncatedWriteAndForwardReadTester(codecClass, 1000, 16384, 10, 80, useFileSystem)).run();
(new TruncatedWriteAndForwardReadTester(codecClass, 1000, 16384, 1024, 100 * 1000, useFileSystem)).run();
(new TruncatedWriteAndForwardReadTester(codecClass, 1000, 16384, 500 * 1000, 1024 * 1024, useFileSystem)).run();
(new TruncatedWriteAndForwardReadTester(codecClass, 1000, 16384, 3 * 1024 * 1024 - 100, 6 * 1024 * 1024, useFileSystem)).run();
(new TruncatedWriteAndForwardReadTester(codecClass, 1000, 16384, 3 * 1024 * 1024, 6 * 1024 * 1024, useFileSystem)).run();
(new TruncatedWriteAndForwardReadTester(codecClass, 1000, 16384, 3 * 1024 * 1024 + 100, 6 * 1024 * 1024, useFileSystem)).run();
(new TruncatedWriteAndForwardReadTester(codecClass, 1000, 16384, 3 * 1024 * 1024 + 1024, 6 * 1024 * 1024, useFileSystem)).run();
}
static class NormalWriteAndReadTester {
/**
* @param writeSize
* 0: use "write(int)"; > 0: use "write(byte[])".
* @param readSize
* 0: use "read()"; > 0: use "read(bytes[])".
*/
NormalWriteAndReadTester(final Class<? extends CompressionCodec> codecClass,
final int numRecord, final int maxRecordSize) throws Exception {
this.codecClass = codecClass;
this.numRecord = numRecord;
this.maxRecordSize = maxRecordSize;
}
protected final Class<? extends CompressionCodec> codecClass;
protected final int numRecord;
protected final int maxRecordSize;
void run() throws Exception {
// Random seed for data to be written
final int randSeed = 333;
// Write
long startMs = System.currentTimeMillis();
ByteArrayOutputStream inMemoryFile = write(new Random(randSeed));
long writeDoneMs = System.currentTimeMillis();
// Read
read(new Random(randSeed), inMemoryFile);
long readDoneMs = System.currentTimeMillis();
// Output file size and time used for debugging purpose
System.out.println("File size = " + inMemoryFile.size()
+ " writeMs=" + (writeDoneMs - startMs)
+ " readMs=" + (readDoneMs - writeDoneMs)
+ " numRecord=" + numRecord + " maxRecordSize=" + maxRecordSize
+ " codec=" + codecClass);
}
ByteArrayOutputStream write(final Random dataRandom) throws Exception {
// Create the in-memory file and start to write to it.
ByteArrayOutputStream inMemoryFile = new ByteArrayOutputStream();
SimpleSeekableFormatOutputStream out = new SimpleSeekableFormatOutputStream(inMemoryFile);
// Set compression Codec
Configuration conf = new Configuration();
if (codecClass != null) {
conf.setClass(SimpleSeekableFormat.FILEFORMAT_SSF_CODEC_CONF, codecClass,
CompressionCodec.class);
}
out.setConf(conf);
// Write some data
for (int r = 0; r < numRecord; r++) {
byte[] b = new byte[dataRandom.nextInt(maxRecordSize)];
// Generate some compressible random data
UtilsForTests.nextBytes(dataRandom, b, 16);
out.write(b);
if (r % 100 == 99) {
out.flush();
}
}
out.close();
return inMemoryFile;
}
void read(final Random dataRandom2, final ByteArrayOutputStream inMemoryFile)
throws Exception {
// Open the in-memory file for read
ByteArrayInputStream fileForRead = new ByteArrayInputStream(inMemoryFile.toByteArray());
SimpleSeekableFormatInputStream in = new SimpleSeekableFormatInputStream(fileForRead);
DataInputStream dataIn = new DataInputStream(in);
// Verify the data
for (int r = 0; r < numRecord; r++) {
// Regenerate the same random bytes
byte[] b = new byte[dataRandom2.nextInt(maxRecordSize)];
UtilsForTests.nextBytes(dataRandom2, b, 16);
// Read from the file
byte[] b2 = new byte[b.length];
dataIn.readFully(b2);
UtilsForTests.assertArrayEquals("record " + r + " with length " + b.length,
b, b2);
}
// Verify EOF
Assert.assertEquals(-1, in.read());
byte[] temp = new byte[100];
Assert.assertEquals(-1, in.read(temp));
}
}
static class NormalWriteAndForwardReadTester extends NormalWriteAndReadTester {
NormalWriteAndForwardReadTester(
Class<? extends CompressionCodec> codecClass, int numRecord,
int maxRecordSize, int availableBytes,
boolean useFileSystem) throws Exception {
super(codecClass, numRecord, maxRecordSize);
this.availableBytes = availableBytes;
this.useFileSystem = useFileSystem;
}
protected final int availableBytes;
protected final boolean useFileSystem;
@Override
void read(final Random dataRandom2, final ByteArrayOutputStream inMemoryFile)
throws Exception {
byte[] data = inMemoryFile.toByteArray();
// Open the in-memory file for read
InputStream fileForRead = null;
if (useFileSystem) {
// Write data to a file and then test it.
// This is useful for testing "seek" because FSInputStream is a Seekable.
Configuration conf = new Configuration();
FileSystem fs = LocalFileSystem.getLocal(conf);
Path file = new Path(System.getProperty("user.dir") + "/test_seek.ssf");
OutputStream out = fs.create(file);
out.write(data);
out.close();
fileForRead = fs.open(file);
} else {
fileForRead = new ByteArrayInputStream(data) {
/**
* Only expose at most availableBytes until those bytes are all read.
* This is to simulate a growing file.
*/
@Override
public int available() {
if (pos < availableBytes) {
return Math.min(super.available(), availableBytes - pos);
} else {
return super.available();
}
}
};
}
SimpleSeekableFormatInputStream in = new SimpleSeekableFormatInputStream(fileForRead);
DataInputStream dataIn = new DataInputStream(in);
long seekedPosition = in.seekForward();
{
// We should not be at the beginning of the stream any more.
InterleavedInputStream interleavedIn = in.getInterleavedIn();
long blocks = interleavedIn.getRawOffset() / interleavedIn.getCompleteBlockSize();
long blocksAvailable = (availableBytes - interleavedIn.getMetaDataBlockSize()) / interleavedIn.getCompleteBlockSize();
blocksAvailable = Math.max(0, blocksAvailable);
Assert.assertTrue(blocks >= blocksAvailable);
}
long currentUncompressedPosition = 0;
for (int r = 0; r < numRecord; r++) {
// Regenerate the same random bytes
byte[] b = new byte[dataRandom2.nextInt(maxRecordSize)];
UtilsForTests.nextBytes(dataRandom2, b, 16);
if (currentUncompressedPosition >= seekedPosition) {
// Read from the file
byte[] b2 = new byte[b.length];
dataIn.readFully(b2);
UtilsForTests.assertArrayEquals("record " + r + " with length " + b.length,
b, b2);
}
currentUncompressedPosition += b.length;
}
// Verify EOF
Assert.assertEquals(-1, in.read());
byte[] temp = new byte[100];
Assert.assertEquals(-1, in.read(temp));
}
}
static class TruncatedWriteAndForwardReadTester extends NormalWriteAndForwardReadTester {
private int truncatedBytes;
TruncatedWriteAndForwardReadTester(
Class<? extends CompressionCodec> codecClass, int numRecord,
int maxRecordSize, int availableBytes, int truncatedBytes,
boolean useFileSystem) throws Exception {
super(codecClass, numRecord, maxRecordSize, availableBytes, useFileSystem);
this.truncatedBytes = truncatedBytes;
}
@Override
ByteArrayOutputStream write(final Random dataRandom) throws Exception {
ByteArrayOutputStream out = super.write(dataRandom);
ByteArrayOutputStream result = new ByteArrayOutputStream();
byte[] fullArray = out.toByteArray();
result.write(fullArray, 0, Math.min(fullArray.length, truncatedBytes));
return result;
}
@Override
void read(final Random dataRandom2, final ByteArrayOutputStream inMemoryFile)
throws Exception {
try {
super.read(dataRandom2, inMemoryFile);
} catch (EOFException e) {
System.out.println("Hit EOF while testing truncated file...");
}
}
}
}