Package org.kiji.schema.mapreduce

Source Code of org.kiji.schema.mapreduce.KijiTableInputFormat$KijiTableRecordReader

/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.kiji.schema.mapreduce;

import static com.google.common.base.Preconditions.checkNotNull;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;

import com.google.common.collect.Lists;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.SerializationUtils;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.mapreduce.TableSplit;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.schema.EntityId;
import org.kiji.schema.HBaseEntityId;
import org.kiji.schema.Kiji;
import org.kiji.schema.KijiDataRequest;
import org.kiji.schema.KijiRegion;
import org.kiji.schema.KijiRowData;
import org.kiji.schema.KijiRowScanner;
import org.kiji.schema.KijiTable;
import org.kiji.schema.KijiTableReader;
import org.kiji.schema.KijiTableReader.KijiScannerOptions;
import org.kiji.schema.KijiURI;
import org.kiji.schema.KijiURIException;
import org.kiji.schema.impl.hbase.HBaseKijiRowData;
import org.kiji.schema.impl.hbase.HBaseKijiTable;
import org.kiji.schema.util.ResourceUtils;

/** InputFormat for Hadoop MapReduce jobs reading from a Kiji table. */
@ApiAudience.Public
@ApiStability.Evolving
@Deprecated
public class KijiTableInputFormat
    extends InputFormat<EntityId, KijiRowData>
    implements Configurable {
  /** Configuration of this input format. */
  private Configuration mConf;

  /** {@inheritDoc} */
  @Override
  public void setConf(Configuration conf) {
    mConf = conf;
  }

  /** {@inheritDoc} */
  @Override
  public Configuration getConf() {
    return mConf;
  }

  /** {@inheritDoc} */
  @Override
  public RecordReader<EntityId, KijiRowData> createRecordReader(
      InputSplit split, TaskAttemptContext context) throws IOException {
    return new KijiTableRecordReader(mConf);
  }

  /** {@inheritDoc} */
  @Override
  public List<InputSplit> getSplits(JobContext context) throws IOException {
    final Configuration conf = context.getConfiguration();
    final KijiURI inputTableURI = getInputTableURI(conf);
    final Kiji kiji = Kiji.Factory.open(inputTableURI, conf);
    final KijiTable table = kiji.openTable(inputTableURI.getTable());

    final HTableInterface htable = HBaseKijiTable.downcast(table).openHTableConnection();
    try {
      final List<InputSplit> splits = Lists.newArrayList();
      for (KijiRegion region : table.getRegions()) {
        final byte[] startKey = region.getStartKey();
        // TODO: a smart way to get which location is most relevant.
        final String location =
            region.getLocations().isEmpty() ? null : region.getLocations().iterator().next();
        final TableSplit tableSplit = new TableSplit(
            htable.getTableName(), startKey, region.getEndKey(), location);
        splits.add(new KijiTableSplit(tableSplit, startKey));
      }
      return splits;

    } finally {
      htable.close();
    }
  }

  /**
   * Configures a Hadoop M/R job to read from a given table.
   *
   * @param job Job to configure.
   * @param tableURI URI of the table to read from.
   * @param dataRequest Data request.
   * @param startRow Minimum row key to process.
   * @param endRow Maximum row Key to process.
   * @throws IOException on I/O error.
   */
  public static void configureJob(
      Job job,
      KijiURI tableURI,
      KijiDataRequest dataRequest,
      String startRow,
      String endRow)
      throws IOException {

    final Configuration conf = job.getConfiguration();
    // As a precaution, be sure the table exists and can be opened.
    final Kiji kiji = Kiji.Factory.open(tableURI, conf);
    final KijiTable table = kiji.openTable(tableURI.getTable());
    ResourceUtils.releaseOrLog(table);
    ResourceUtils.releaseOrLog(kiji);

    // TODO: Check for jars config:
    // GenericTableMapReduceUtil.initTableInput(hbaseTableName, scan, job);

    // TODO: Obey specified start/end rows.

    // Write all the required values to the job's configuration object.
    job.setInputFormatClass(KijiTableInputFormat.class);
    final String serializedRequest =
        Base64.encodeBase64String(SerializationUtils.serialize(dataRequest));
    conf.set(KijiConfKeys.INPUT_DATA_REQUEST, serializedRequest);
    conf.set(KijiConfKeys.INPUT_TABLE_URI, tableURI.toString());
  }

  /** Hadoop record reader for Kiji table rows. */
  public static class KijiTableRecordReader
      extends RecordReader<EntityId, KijiRowData> {

    /** Data request. */
    protected final KijiDataRequest mDataRequest;

    /** Hadoop Configuration object containing settings. */
    protected final Configuration mConf;

    private Kiji mKiji = null;
    private KijiTable mTable = null;
    private KijiTableReader mReader = null;
    private KijiRowScanner mScanner = null;
    private Iterator<KijiRowData> mIterator = null;

    private KijiTableSplit mSplit = null;

    private HBaseKijiRowData mCurrentRow = null;

    /**
     * Creates a new RecordReader for this input format. This RecordReader will perform the actual
     * reads from Kiji.
     *
     * @param conf The configuration object for this Kiji.
     */
    public KijiTableRecordReader(Configuration conf) {
      mConf = conf;

      // Get data request from the job configuration.
      final String dataRequestB64 = checkNotNull(mConf.get(KijiConfKeys.INPUT_DATA_REQUEST),
          "Missing data request in job configuration.");
      final byte[] dataRequestBytes = Base64.decodeBase64(Bytes.toBytes(dataRequestB64));
      mDataRequest = (KijiDataRequest) SerializationUtils.deserialize(dataRequestBytes);
    }

    /** {@inheritDoc} */
    @Override
    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
      assert split instanceof KijiTableSplit;
      mSplit = (KijiTableSplit) split;

      final Configuration conf = context.getConfiguration();
      final KijiURI inputURI = getInputTableURI(conf);
      mKiji = Kiji.Factory.open(inputURI, conf);
      mTable = mKiji.openTable(inputURI.getTable());
      mReader = mTable.openTableReader();
      final KijiScannerOptions scannerOptions =
          new KijiScannerOptions()
          .setStartRow(HBaseEntityId.fromHBaseRowKey(mSplit.getStartRow()))
          .setStopRow(HBaseEntityId.fromHBaseRowKey(mSplit.getEndRow()));
      mScanner = mReader.getScanner(mDataRequest, scannerOptions);
      mIterator = mScanner.iterator();
      mCurrentRow = null;
    }

    /** {@inheritDoc} */
    @Override
    public EntityId getCurrentKey() throws IOException {
      return mCurrentRow.getEntityId();
    }

    /** {@inheritDoc} */
    @Override
    public KijiRowData getCurrentValue() throws IOException {
      return mCurrentRow;
    }

    /** {@inheritDoc} */
    @Override
    public float getProgress() throws IOException {
      // TODO: Implement
      return 0.0f;
    }

    /** {@inheritDoc} */
    @Override
    public boolean nextKeyValue() throws IOException {
      if (mIterator.hasNext()) {
        mCurrentRow = (HBaseKijiRowData) mIterator.next();
        return true;
      } else {
        mCurrentRow = null;
        return false;
      }
    }

    /** {@inheritDoc} */
    @Override
    public void close() throws IOException {
      ResourceUtils.closeOrLog(mScanner);
      ResourceUtils.closeOrLog(mReader);
      ResourceUtils.releaseOrLog(mTable);
      ResourceUtils.releaseOrLog(mKiji);
      mIterator = null;
      mScanner = null;
      mReader = null;
      mTable = null;
      mKiji = null;

      mSplit = null;
      mCurrentRow = null;
    }
  }

  /**
   * Reports the URI of the configured input table.
   *
   * @param conf Read the input URI from this configuration.
   * @return the configured input URI.
   * @throws IOException on I/O error.
   */
  private static KijiURI getInputTableURI(Configuration conf) throws IOException {
    try {
      return KijiURI.newBuilder(conf.get(KijiConfKeys.INPUT_TABLE_URI)).build();
    } catch (KijiURIException kue) {
      throw new IOException(kue);
    }
  }
}
TOP

Related Classes of org.kiji.schema.mapreduce.KijiTableInputFormat$KijiTableRecordReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.