/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.storage;
import com.google.protobuf.CodedInputStream;
import com.google.protobuf.Message;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.tajo.catalog.TableMeta;
import org.apache.tajo.catalog.statistics.TableStat;
import org.apache.tajo.common.TajoDataTypes.DataType;
import org.apache.tajo.datum.DatumFactory;
import org.apache.tajo.datum.NullDatum;
import org.apache.tajo.datum.ProtobufDatumFactory;
import org.apache.tajo.util.BitArray;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.Arrays;
public class RawFile {
private static final Log LOG = LogFactory.getLog(RawFile.class);
public static class RawFileScanner extends FileScanner implements SeekableScanner {
private FileChannel channel;
private DataType[] columnTypes;
private Path path;
private ByteBuffer buffer;
private Tuple tuple;
private int headerSize = 0;
private BitArray nullFlags;
private static final int RECORD_SIZE = 4;
private boolean eof = false;
private long fileSize;
public RawFileScanner(Configuration conf, TableMeta meta, Path path) throws IOException {
super(conf, meta, null);
this.path = path;
init();
}
@SuppressWarnings("unused")
public RawFileScanner(Configuration conf, TableMeta meta, Fragment fragment) throws IOException {
this(conf, meta, fragment.getPath());
}
public void init() throws IOException {
//Preconditions.checkArgument(FileUtil.isLocalPath(path));
// TODO - to make it unified one.
URI uri = path.toUri();
RandomAccessFile raf = new RandomAccessFile(new File(uri), "r");
channel = raf.getChannel();
fileSize = channel.size();
if (LOG.isDebugEnabled()) {
LOG.debug("RawFileScanner open:" + path + "," + channel.position() + ", size :" + channel.size());
}
buffer = ByteBuffer.allocateDirect(65535 * 4);
columnTypes = new DataType[schema.getColumnNum()];
for (int i = 0; i < schema.getColumnNum(); i++) {
columnTypes[i] = schema.getColumn(i).getDataType();
}
tuple = new VTuple(columnTypes.length);
// initial read
channel.read(buffer);
buffer.flip();
nullFlags = new BitArray(schema.getColumnNum());
headerSize = RECORD_SIZE + 2 + nullFlags.bytesLength();
super.init();
}
@Override
public long getNextOffset() throws IOException {
return channel.position();
}
@Override
public void seek(long offset) throws IOException {
channel.position(offset);
}
private boolean fillBuffer() throws IOException {
buffer.compact();
if (channel.read(buffer) == -1) {
return false;
} else {
buffer.flip();
return true;
}
}
@Override
public Tuple next() throws IOException {
if(eof) return null;
if (buffer.remaining() < headerSize) {
if (!fillBuffer()) {
return null;
}
}
// backup the buffer state
int recordOffset = buffer.position();
int bufferLimit = buffer.limit();
int recordSize = buffer.getInt();
int nullFlagSize = buffer.getShort();
buffer.limit(buffer.position() + nullFlagSize);
nullFlags.fromByteBuffer(buffer);
// restore the start of record contents
buffer.limit(bufferLimit);
buffer.position(recordOffset + headerSize);
if (buffer.remaining() < (recordSize - headerSize)) {
if (!fillBuffer()) {
return null;
}
}
for (int i = 0; i < columnTypes.length; i++) {
// check if the i'th column is null
if (nullFlags.get(i)) {
tuple.put(i, DatumFactory.createNullDatum());
continue;
}
switch (columnTypes[i].getType()) {
case BOOLEAN :
tuple.put(i, DatumFactory.createBool(buffer.get()));
break;
case BIT :
tuple.put(i, DatumFactory.createBit(buffer.get()));
break;
case CHAR :
int realLen = buffer.getInt();
byte[] buf = new byte[columnTypes[i].getLength()];
buffer.get(buf);
byte[] charBuf = Arrays.copyOf(buf, realLen);
tuple.put(i, DatumFactory.createChar(charBuf));
break;
case INT2 :
tuple.put(i, DatumFactory.createInt2(buffer.getShort()));
break;
case INT4 :
tuple.put(i, DatumFactory.createInt4(buffer.getInt()));
break;
case INT8 :
tuple.put(i, DatumFactory.createInt8(buffer.getLong()));
break;
case FLOAT4 :
tuple.put(i, DatumFactory.createFloat4(buffer.getFloat()));
break;
case FLOAT8 :
tuple.put(i, DatumFactory.createFloat8(buffer.getDouble()));
break;
case TEXT :
// TODO - shoud use CharsetEncoder / CharsetDecoder
//byte [] rawBytes = getColumnBytes();
int strSize2 = buffer.getInt();
byte [] strBytes2 = new byte[strSize2];
buffer.get(strBytes2);
tuple.put(i, DatumFactory.createText(new String(strBytes2)));
break;
case BLOB : {
//byte [] rawBytes = getColumnBytes();
int byteSize = buffer.getInt();
byte [] rawBytes = new byte[byteSize];
buffer.get(rawBytes);
tuple.put(i, DatumFactory.createBlob(rawBytes));
break;
}
case PROTOBUF: {
//byte [] rawBytes = getColumnBytes();
int byteSize = buffer.getInt();
byte [] rawBytes = new byte[byteSize];
buffer.get(rawBytes);
ProtobufDatumFactory factory = ProtobufDatumFactory.get(columnTypes[i]);
Message.Builder builder = factory.newBuilder();
builder.mergeFrom(rawBytes);
tuple.put(i, factory.createDatum(builder.build()));
break;
}
case INET4 :
byte [] ipv4Bytes = new byte[4];
buffer.get(ipv4Bytes);
tuple.put(i, DatumFactory.createInet4(ipv4Bytes));
break;
case NULL:
tuple.put(i, NullDatum.get());
break;
default:
}
}
if(!buffer.hasRemaining() && channel.position() == fileSize){
eof = true;
}
return tuple;
}
/**
* It reads a variable byte array whose length is represented as a variable unsigned integer.
*
* @return A byte array read
*/
private byte [] getColumnBytes() throws IOException {
byte [] lenBytesLen = new byte[4];
buffer.mark();
buffer.get(lenBytesLen);
CodedInputStream ins = CodedInputStream.newInstance(lenBytesLen);
int bytesLen = ins.readUInt32(); // get a variable unsigned integer length to be read
int read = ins.getTotalBytesRead();
buffer.reset();
buffer.position(buffer.position() + read);
byte [] rawBytes = new byte[bytesLen];
buffer.get(rawBytes);
return rawBytes;
}
@Override
public void reset() throws IOException {
// clear the buffer
buffer.clear();
// reload initial buffer
channel.position(0);
channel.read(buffer);
buffer.flip();
eof = false;
}
@Override
public void close() throws IOException {
buffer.clear();
channel.close();
}
@Override
public boolean isProjectable() {
return false;
}
@Override
public boolean isSelectable() {
return false;
}
@Override
public boolean isSplittable(){
return false;
}
}
public static class RawFileAppender extends FileAppender {
private FileChannel channel;
private RandomAccessFile randomAccessFile;
private DataType[] columnTypes;
private ByteBuffer buffer;
private BitArray nullFlags;
private int headerSize = 0;
private static final int RECORD_SIZE = 4;
private TableStatistics stats;
public RawFileAppender(Configuration conf, TableMeta meta, Path path) throws IOException {
super(conf, meta, path);
}
public void init() throws IOException {
// TODO - RawFile only works on Local File System.
//Preconditions.checkArgument(FileUtil.isLocalPath(path));
File file = new File(path.toUri());
randomAccessFile = new RandomAccessFile(file, "rw");
channel = randomAccessFile.getChannel();
columnTypes = new DataType[schema.getColumnNum()];
for (int i = 0; i < schema.getColumnNum(); i++) {
columnTypes[i] = schema.getColumn(i).getDataType();
}
buffer = ByteBuffer.allocateDirect(65535);
// comput the number of bytes, representing the null flags
nullFlags = new BitArray(schema.getColumnNum());
headerSize = RECORD_SIZE + 2 + nullFlags.bytesLength();
if (enabledStats) {
this.stats = new TableStatistics(this.schema);
}
super.init();
}
@Override
public long getOffset() throws IOException {
return channel.position();
}
private void flushBuffer() throws IOException {
buffer.limit(buffer.position());
buffer.flip();
channel.write(buffer);
buffer.clear();
}
private boolean flushBufferAndReplace(int recordOffset, int sizeToBeWritten)
throws IOException {
// if the buffer reaches the limit,
// write the bytes from 0 to the previous record.
if (buffer.remaining() < sizeToBeWritten) {
int limit = buffer.position();
buffer.limit(recordOffset);
buffer.flip();
channel.write(buffer);
buffer.position(recordOffset);
buffer.limit(limit);
buffer.compact();
return true;
} else {
return false;
}
}
@Override
public void addTuple(Tuple t) throws IOException {
if (buffer.remaining() < headerSize) {
flushBuffer();
}
// skip the row header
int recordOffset = buffer.position();
buffer.position(buffer.position() + headerSize);
// reset the null flags
nullFlags.clear();
for (int i = 0; i < schema.getColumnNum(); i++) {
if (enabledStats) {
stats.analyzeField(i, t.get(i));
}
if (t.isNull(i)) {
nullFlags.set(i);
continue;
}
// 8 is the maximum bytes size of all types
if (flushBufferAndReplace(recordOffset, 8)) {
recordOffset = 0;
}
switch(columnTypes[i].getType()) {
case NULL:
nullFlags.set(i);
continue;
case BOOLEAN:
case BIT:
buffer.put(t.get(i).asByte());
break;
case CHAR :
byte[] src = t.getChar(i).asByteArray();
byte[] dst = Arrays.copyOf(src, columnTypes[i].getLength());
buffer.putInt(src.length);
buffer.put(dst);
break;
case INT2 :
buffer.putShort(t.get(i).asInt2());
break;
case INT4 :
buffer.putInt(t.get(i).asInt4());
break;
case INT8 :
buffer.putLong(t.get(i).asInt8());
break;
case FLOAT4 :
buffer.putFloat(t.get(i).asFloat4());
break;
case FLOAT8 :
buffer.putDouble(t.get(i).asFloat8());
break;
case TEXT:
byte [] strBytes2 = t.get(i).asByteArray();
if (flushBufferAndReplace(recordOffset, strBytes2.length + 4)) {
recordOffset = 0;
}
buffer.putInt(strBytes2.length);
buffer.put(strBytes2);
break;
case BLOB : {
byte [] rawBytes = t.get(i).asByteArray();
if (flushBufferAndReplace(recordOffset, rawBytes.length + 4)) {
recordOffset = 0;
}
buffer.putInt(rawBytes.length);
buffer.put(rawBytes);
break;
}
case PROTOBUF: {
// TODO - to be fixed
// byte [] lengthByte = new byte[4];
// byte [] byteArray = t.get(i).asByteArray();
// CodedOutputStream outputStream = CodedOutputStream.newInstance(lengthByte);
// outputStream.writeUInt32NoTag(byteArray.length);
// outputStream.flush();
// int legnthByteLength = CodedOutputStream.computeInt32SizeNoTag(byteArray.length);
// if (flushBufferAndReplace(recordOffset, byteArray.length + legnthByteLength)) {
// recordOffset = 0;
// }
// buffer.put(lengthByte, 0, legnthByteLength);
byte [] rawBytes = t.get(i).asByteArray();
if (flushBufferAndReplace(recordOffset, rawBytes.length + 4)) {
recordOffset = 0;
}
buffer.putInt(rawBytes.length);
buffer.put(rawBytes);
break;
}
case INET4 :
buffer.put(t.get(i).asByteArray());
break;
default:
throw new IOException("Cannot support data type: " + columnTypes[i].getType());
}
}
// write a record header
int pos = buffer.position();
buffer.position(recordOffset);
buffer.putInt(pos - recordOffset);
byte [] flags = nullFlags.toArray();
buffer.putShort((short) flags.length);
buffer.put(flags);
buffer.position(pos);
if (enabledStats) {
stats.incrementRow();
}
}
@Override
public void flush() throws IOException {
flushBuffer();
channel.force(true);
}
@Override
public void close() throws IOException {
flush();
randomAccessFile.close();
}
@Override
public TableStat getStats() {
if (enabledStats) {
return stats.getTableStat();
} else {
return null;
}
}
}
}