Package com.wolfram.hadoop.dfs

Source Code of com.wolfram.hadoop.dfs.SequenceFileImportReader

package com.wolfram.hadoop.dfs;

import java.util.ArrayList;

import com.wolfram.jlink.Expr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.typedbytes.TypedBytesWritable;

import com.wolfram.hadoop.ExprUtil;

/**
* Helper class to speed up importing SequenceFiles into Mathematica.
*/
public class SequenceFileImportReader {

  SequenceFile.Reader reader;
  Class<?> keyClass;
  Class<?> valueClass;
  Writable key;
  Writable value;

  public SequenceFileImportReader(Configuration conf, Path sequenceFile)
      throws Exception {
    FileSystem fs = FileSystem.get(conf);
    reader = new SequenceFile.Reader(fs, sequenceFile, conf);
    keyClass = reader.getKeyClass();
    valueClass = reader.getValueClass();
    key = (Writable) ReflectionUtils.newInstance(keyClass, conf);
    value = (Writable) ReflectionUtils.newInstance(valueClass, conf);
  }

  private Expr writableToExpr(Writable w) {
    if (w instanceof TypedBytesWritable) {
      return ExprUtil.toExpr(((TypedBytesWritable) w).getValue());
    }
    return null;
  }

  public Expr next() throws Exception {
    Expr record;
    if (reader.next(key, value)) {
      record = new Expr(
          ExprUtil.toSymbol("List"),
          new Expr[] {
            writableToExpr(key),
            writableToExpr(value)
          });
      return record;
    } else {
      return null;
    }
  }

  public Object[] next(int maxRecords) throws Exception {
    ArrayList<Expr> records = new ArrayList<Expr>(maxRecords);
    for (int i = 0; i < maxRecords; i++) {
      Expr record = next();
      if (record == null) {
        if (i == 0) { return null; }
        break;
      }
      records.add(record);
    }
    return records.toArray();
  }

  public void close() throws Exception {
    reader.close();
  }
}
TOP

Related Classes of com.wolfram.hadoop.dfs.SequenceFileImportReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.