Package avrobase.file

Source Code of avrobase.file.FAB

package avrobase.file;

import avrobase.AvroBaseException;
import avrobase.AvroBaseImpl;
import avrobase.AvroFormat;
import avrobase.Creator;
import avrobase.Mutator;
import avrobase.ReversableFunction;
import avrobase.Row;
import com.google.common.base.Supplier;
import jinahya.rfc4648.Base32HEX;
import org.apache.avro.AvroTypeException;
import org.apache.avro.Schema;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificRecord;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Queue;
import java.util.Set;
import java.util.Stack;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

/**
* File based avrobase.
* <p/>
* User: sam
* Date: 10/10/10
* Time: 4:08 PM
*/
public class FAB<T extends SpecificRecord, K> extends AvroBaseImpl<T, K> {

  private static final int HASH_LENGTH = 64;
  private static final int LONG_LENGTH = 8;
  private File dir;
  private File schemaDir;
  private final Base32HEX base32hex = new Base32HEX();

  private final Map<String, ReadWriteLock> locks = new ConcurrentHashMap<String, ReadWriteLock>();
  private Supplier<K> supplier;
  private ReversableFunction<K, byte[]> transformer;

  public FAB(String directory, String schemaDirectory, Supplier<K> supplier, Schema actualSchema, AvroFormat format, ReversableFunction<K, byte[]> transformer) {
    super(actualSchema, format);
    dir = new File(directory);
    dir.mkdirs();
    schemaDir = new File(schemaDirectory);
    schemaDir.mkdirs();
    this.supplier = supplier;
    this.transformer = transformer == null ? new ReversableFunction<K, byte[]>() {
      @Override
      public byte[] apply(K k) {
        return (byte[]) k;
      }

      @Override
      public K unapply(byte[] bytes) {
        return (K) bytes;
      }
    } : transformer;
  }

  private String toFile(K row) {
    StringBuilder sb = new StringBuilder();
    String s = toString(row);
    if (s.length() > 2) {
      sb.append(s.substring(0, 2));
      sb.append("/");
      if (s.length() > 4) {
        sb.append(s.substring(2, 4));
        sb.append("/").append(s.substring(4));
      } else {
        sb.append(s.substring(2));
      }
      s = sb.toString();
    }
    return s;
  }

  private String toString(K row) {
    if (row == null) return null;
    byte[] bytes = transformer == null ? (byte[]) row : transformer.apply(row);
    try {
      return new String(base32hex.encode(bytes));
    } catch (IOException e) {
      throw new AvroBaseException("Could not encode");
    }
  }


  @Override
  public Row<T, K> get(K row) throws AvroBaseException {
    Lock lock = readLock(row);
    try {
      return _get(row);
    } catch (Exception e) {
      throw new AvroBaseException("Failed to get row: " + row, e);
    } finally {
      lock.unlock();
    }
  }

  private Lock readLock(K row) {
    ReadWriteLock readWriteLock = getLock(toString(row));
    Lock lock = readWriteLock.readLock();
    lock.lock();
    return lock;
  }

  private Row<T, K> _get(K row) throws IOException {
    File file = getFile(row, false);
    if (!file.exists()) return null;
    FileInputStream fis = new FileInputStream(file);
    FileChannel channel = fis.getChannel();
    // Lock the file on disk
    InputStream is = new BufferedInputStream(fis);
    try {
      // Read the hash of the schema
      byte[] bytes = new byte[HASH_LENGTH];
      int total = 0;
      int read;
      while (total != HASH_LENGTH && (read = is.read(bytes, total, HASH_LENGTH - total)) != -1) {
        total += read;
      }
      String hash = new String(bytes);
      // Read the version of the object
      bytes = new byte[8];
      total = 0;
      while (total != LONG_LENGTH && (read = is.read(bytes, total, LONG_LENGTH - total)) != -1) {
        total += read;
      }
      long version = ByteBuffer.wrap(bytes).getLong();
      // Get the schema
      Schema schema = schemaCache.get(hash);
      if (schema == null) {
        File schemaFile = new File(schemaDir, hash);
        try {
          schema = Schema.parse(new FileInputStream(schemaFile));
        } catch (IOException ioe) {
          throw new AvroBaseException("Failed to read schema for hash: " + hash + " row: " + row, ioe);
        }
        schemaCache.put(hash, schema);
        hashCache.put(schema, hash);
      }
      try {
        DecoderFactory decoderFactory = new DecoderFactory();
        Decoder d;
        switch (format) {
          case JSON:
            d = decoderFactory.jsonDecoder(schema, is);
            break;
          case BINARY:
          default:
            d = decoderFactory.binaryDecoder(is, null);
            break;
        }
        // Read the data
        SpecificDatumReader<T> sdr = new SpecificDatumReader<T>(schema);
        sdr.setExpected(actualSchema);
        return new Row<T, K>(sdr.read(null, d), row, version);
      } catch (IOException e) {
        throw new AvroBaseException("Failed to read file: " + schema, e);
      } catch (AvroTypeException e) {
        throw new AvroBaseException("Failed to read value: " + schema, e);
      }
    } finally {
      channel.close();
      is.close();
    }
  }

  private Set<File> madedirs = new ConcurrentSkipListSet<File>();

  private File getFile(K row, boolean mkdirs) {
    File file = new File(dir, toFile(row));
    if (mkdirs) {
      File parentFile = file.getParentFile();
      if (!madedirs.contains(parentFile)) {
        madedirs.add(parentFile);
        parentFile.mkdirs();
      }
    }
    return file;
  }

  private ReadWriteLock getLock(String lockKey) {
    ReadWriteLock readWriteLock;
    synchronized (locks) {
      readWriteLock = locks.get(lockKey);
      if (readWriteLock == null) {
        readWriteLock = new ReentrantReadWriteLock();
        locks.put(lockKey, readWriteLock);
      }
    }
    return readWriteLock;
  }

  @Override
  public K create(T value) throws AvroBaseException {
    if (supplier == null) throw new AvroBaseException("No key generator provided");
    K row = supplier.get();
    put(row, value);
    return row;
  }

  @Override
  public void put(K row, T value) throws AvroBaseException {
    Lock lock = writeLock(row);
    try {
      File file = getFile(row, true);
      RandomAccessFile raf = new RandomAccessFile(file, "rw");
      FileChannel channel = raf.getChannel();
      FileLock fileLock = channel.lock();
      try {
        Schema schema = value.getSchema();
        String hash = hashCache.get(schema);
        if (hash == null) {
          String doc = schema.toString();
          hash = createSchemaKey(schema, doc);
          File schemaFile = new File(schemaDir, hash);
          if (!schemaFile.exists()) {
            FileOutputStream schemaOs = new FileOutputStream(schemaFile);
            schemaOs.write(doc.getBytes());
            schemaOs.close();
          }
        }
        File tmp = new File(file.getCanonicalPath() + ".tmp");
        BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(tmp));
        bos.write(hash.getBytes());
        long version = 1;
        if (file.exists()) {
          byte[] bytes = new byte[8];
          int total = 0;
          int read;
          while (total != LONG_LENGTH && (read = raf.read(bytes, total, LONG_LENGTH - total)) != -1) {
            total += read;
          }
          version = ByteBuffer.wrap(bytes).getLong() + 1;
          raf.seek(HASH_LENGTH);
        }
        bos.write(ByteBuffer.wrap(new byte[8]).putLong(version).array());
        bos.write(serialize(value));
        bos.close();
        tmp.renameTo(file);
      } finally {
        channel.force(false);
        fileLock.release();
        channel.close();
        raf.close();
      }
    } catch (Exception e) {
      throw new AvroBaseException("Failed to get row: " + row, e);
    } finally {
      lock.unlock();
    }
  }

  private Lock writeLock(K row) {
    ReadWriteLock readWriteLock = getLock(toString(row));
    Lock lock = readWriteLock.writeLock();
    lock.lock();
    return lock;
  }

  @Override
  public boolean put(K row, T value, long version) throws AvroBaseException {
    Lock lock = writeLock(row);
    try {
      File file = getFile(row, true);
      if (version != 0 && !file.exists()) return false;
      if (version == 0 && file.exists()) return false;
      RandomAccessFile raf = new RandomAccessFile(file, "rw");
      FileChannel channel = raf.getChannel();
      FileLock fileLock = channel.lock();
      try {
        Schema schema = value.getSchema();
        if (file.exists()) {
          raf.seek(HASH_LENGTH);
          byte[] bytes = new byte[8];
          int total = 0;
          int read;
          while (total != LONG_LENGTH && (read = raf.read(bytes, total, LONG_LENGTH - total)) != -1) {
            total += read;
          }
          long saved = ByteBuffer.wrap(bytes).getLong();
          if (saved != version) {
            return false;
          }
        }
        String hash = hashCache.get(schema);
        if (hash == null) {
          String doc = schema.toString();
          hash = createSchemaKey(schema, doc);
          File schemaFile = new File(schemaDir, hash);
          if (!schemaFile.exists()) {
            FileOutputStream schemaOs = new FileOutputStream(schemaFile);
            schemaOs.write(doc.getBytes());
            schemaOs.close();
          }
        }
        File tmp = new File(file.getCanonicalPath() + ".tmp");
        BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(tmp));
        bos.write(hash.getBytes());
        bos.write(ByteBuffer.wrap(new byte[8]).putLong(version + 1).array());
        bos.write(serialize(value));
        bos.close();
        return tmp.renameTo(file);
      } finally {
        channel.force(false);
        fileLock.release();
        channel.close();
        raf.close();
      }
    } catch (Exception e) {
      throw new AvroBaseException("Failed to get row: " + row, e);
    } finally {
      lock.unlock();
    }
  }

  @Override
  public void delete(K row) throws AvroBaseException {
    Lock writeLock = writeLock(row);
    try {
      File file = getFile(row, false);
      RandomAccessFile raf = new RandomAccessFile(file, "rw");
      FileChannel channel = raf.getChannel();
      FileLock fileLock = channel.lock();
      try {
        file.delete();
      } finally {
        fileLock.release();
        channel.close();
        raf.close();
      }
    } catch (FileNotFoundException e) {
      // Already deleted
    } catch (IOException e) {
      throw new AvroBaseException("Failed to delete: " + row, e);
    } finally {
      writeLock.unlock();
    }
  }

  @Override
  public Iterable<Row<T, K>> scan(final K startRow, final K stopRow) throws AvroBaseException {
    final String start = toString(startRow);
    final String stop = toString(stopRow);
    return new Iterable<Row<T, K>>() {
      @Override
      public Iterator<Row<T, K>> iterator() {
        return new Iterator<Row<T, K>>() {
          Queue<File> queue = createFileQueue(dir);
          Stack<Queue<File>> stack = new Stack<Queue<File>>();
          Stack<String> path = new Stack<String>();
          Row<T, K> current;

          @Override
          public synchronized boolean hasNext() {
            if (current != null) return true;
            do {
              File peek = queue.peek();
              if (peek == null) {
                if (stack.size() > 0) {
                  queue = stack.pop();
                  path.pop();
                  continue;
                }
                return false;
              }
              File file = queue.poll();
              if (start != null || stop != null) {
                String p = getPath(file).toString();
                if (!include(p, start, stop)) continue;
              }
              if (file.isDirectory()) {
                path.push(file.getName());
                stack.push(queue);
                queue = createFileQueue(file);
              } else {
                StringBuilder sb = getPath(file);
                try {
                  byte[] decode = base32hex.decode(sb.toString().toCharArray());
                  current = get(transformer == null ? (K) decode : transformer.unapply(decode));
                  return current != null || hasNext();
                } catch (IOException e) {
                  throw new AvroBaseException("Corrupt file system: " + file, e);
                }
              }
            } while (true);
          }

          private StringBuilder getPath(File file) {
            StringBuilder sb = new StringBuilder();
            for (String s : path) {
              sb.append(s);
            }
            sb.append(file.getName());
            return sb;
          }

          @Override
          public Row<T, K> next() {
            if (current == null) hasNext();
            if (current == null) throw new NoSuchElementException();
            Row<T, K> tmp = current;
            current = null;
            return tmp;
          }

          @Override
          public void remove() {
          }
        };
      }
    };
  }

  private LinkedList<File> createFileQueue(File startdir) {
    return new LinkedList<File>(Arrays.asList(startdir.listFiles()));
  }

  private boolean include(String s, String startRow, String stopRow) {
    return !s.endsWith(".tmp") && (startRow == null || s.compareTo(startRow) >= 0) && (stopRow == null || s.compareTo(stopRow) < 0);
  }

  @Override
  public Row<T, K> mutate(K row, Mutator<T> tMutator) throws AvroBaseException {
    Lock writeLock = writeLock(row);
    try {
      File file = getFile(row, true);
      FileInputStream fis = new FileInputStream(file);
      FileChannel channel = fis.getChannel();
      FileLock fileLock = channel.lock();
      try {
        Row<T, K> tStringRow = _get(row);
        if (tStringRow == null) return null;
        T mutate = tMutator.mutate(tStringRow.value);
        if (mutate != null) {
          put(row, mutate);
          return new Row<T, K>(mutate, row);
        }
        return tStringRow;
      } finally {
        fileLock.release();
        channel.close();
        fis.close();
      }
    } catch (FileNotFoundException e) {
      return null;
    } catch (IOException e) {
      throw new AvroBaseException("Failed to delete: " + row, e);
    } finally {
      writeLock.unlock();
    }
  }

  @Override
  public Row<T, K> mutate(K row, Mutator<T> tMutator, Creator<T> tCreator) throws AvroBaseException {
    Lock writeLock = writeLock(row);
    File file = getFile(row, true);
    try {
      FileInputStream fis = new FileInputStream(file);
      FileChannel channel = fis.getChannel();
      FileLock fileLock = channel.lock();
      try {
        Row<T, K> tStringRow = _get(row);
        if (tStringRow == null) return null;
        T mutate = tMutator.mutate(tStringRow.value);
        if (mutate != null) {
          put(row, mutate);
          return new Row<T, K>(mutate, row);
        }
        return tStringRow;
      } finally {
        fileLock.release();
        channel.close();
        fis.close();
      }
    } catch (FileNotFoundException e) {
      try {
        if (file.createNewFile()) {
          FileInputStream fis = new FileInputStream(file);
          FileChannel channel = fis.getChannel();
          FileLock fileLock = channel.lock();
          try {
            if (file.length() == 0) {
              T created = tCreator.create();
              put(row, created);
              return new Row<T, K>(created, row);
            }
          } finally {
            fileLock.release();
            channel.close();
            fis.close();
          }
        }
      } catch (IOException e1) {
        // Then do a normal mutate
      }
      return mutate(row, tMutator);
    } catch (IOException e) {
      throw new AvroBaseException("Failed to delete: " + row, e);
    } finally {
      writeLock.unlock();
    }
  }

}
TOP

Related Classes of avrobase.file.FAB

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.