Source Code of org.apache.blur.mapreduce.lib.BlurOutputFormat$BlurRecordWriter

package org.apache.blur.mapreduce.lib;


/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.UUID;


import org.apache.blur.analysis.FieldManager;
import org.apache.blur.log.Log;
import org.apache.blur.log.LogFactory;
import org.apache.blur.lucene.LuceneVersionConstant;
import org.apache.blur.manager.writer.TransactionRecorder;
import org.apache.blur.mapreduce.lib.BlurMutate.MUTATE_TYPE;
import org.apache.blur.server.TableContext;
import org.apache.blur.store.hdfs.HdfsDirectory;
import org.apache.blur.thirdparty.thrift_0_9_0.TException;
import org.apache.blur.thirdparty.thrift_0_9_0.protocol.TJSONProtocol;
import org.apache.blur.thirdparty.thrift_0_9_0.transport.TIOStreamTransport;
import org.apache.blur.thrift.BlurClient;
import org.apache.blur.thrift.generated.Blur.Iface;
import org.apache.blur.thrift.generated.Column;
import org.apache.blur.thrift.generated.Record;
import org.apache.blur.thrift.generated.TableDescriptor;
import org.apache.blur.utils.BlurConstants;
import org.apache.blur.utils.BlurUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.util.Progressable;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.NoLockFactory;


/**
 * {@link BlurOutputFormat} is used to index data and delivery the indexes to
 * the proper Blur table for searching. A typical usage of this class would be
 * as follows.<br/>
 * <br/>
 * 
 * <br/>
 * {@link Iface} client = {@link BlurClient}.getClient("controller1:40010");<br/>
 * <br/>
 * TableDescriptor tableDescriptor = client.describe(tableName);<br/>
 * <br/>
 * Job job = new Job(jobConf, "blur index");<br/>
 * job.setJarByClass(BlurOutputFormatTest.class);<br/>
 * job.setMapperClass(CsvBlurMapper.class);<br/>
 * job.setInputFormatClass(TextInputFormat.class);<br/>
 * <br/>
 * FileInputFormat.addInputPath(job, new Path(input));<br/>
 * CsvBlurMapper.addColumns(job, "cf1", "col");<br/>
 * <br/>
 * BlurOutputFormat.setupJob(job, tableDescriptor);<br/>
 * BlurOutputFormat.setIndexLocally(job, true);<br/>
 * BlurOutputFormat.setOptimizeInFlight(job, false);<br/>
 * <br/>
 * job.waitForCompletion(true);<br/>
 * 
 */
public class BlurOutputFormat extends OutputFormat<Text, BlurMutate> {


  public static final String BLUR_OUTPUT_REDUCER_MULTIPLIER = "blur.output.reducer.multiplier";
  public static final String BLUR_OUTPUT_OPTIMIZEINFLIGHT = "blur.output.optimizeinflight";
  public static final String BLUR_OUTPUT_INDEXLOCALLY = "blur.output.indexlocally";
  public static final String BLUR_OUTPUT_MAX_DOCUMENT_BUFFER_SIZE = "blur.output.max.document.buffer.size";
  public static final String BLUR_TABLE_DESCRIPTOR = "blur.table.descriptor";
  public static final String BLUR_OUTPUT_PATH = "blur.output.path";


  private static final String JAVA_IO_TMPDIR = "java.io.tmpdir";
  private static final String MAPRED_OUTPUT_COMMITTER_CLASS = "mapred.output.committer.class";
  private static ThreadLocal<Progressable> _progressable = new ThreadLocal<Progressable>();
  private static ThreadLocal<GetCounter> _getCounter = new ThreadLocal<GetCounter>();


  static void setProgressable(Progressable progressable) {
    _progressable.set(progressable);
  }


  static Progressable getProgressable() {
    return _progressable.get();
  }


  static void setGetCounter(GetCounter getCounter) {
    _getCounter.set(getCounter);
  }


  static GetCounter getGetCounter() {
    return _getCounter.get();
  }


  @Override
  public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
    Configuration config = context.getConfiguration();
    TableDescriptor tableDescriptor = getTableDescriptor(config);
    if (tableDescriptor == null) {
      throw new IOException("setTableDescriptor needs to be called first.");
    }
    int shardCount = tableDescriptor.getShardCount();
    FileSystem fileSystem = getOutputPath(config).getFileSystem(config);
    Path tablePath = new Path(tableDescriptor.getTableUri());
    if (fileSystem.exists(tablePath)) {
      BlurUtil.validateShardCount(shardCount, fileSystem, tablePath);
    } else {
      throw new IOException("Table path [ " + tablePath + " ] doesn't exist for table [ " + tableDescriptor.getName()
          + " ].");
    }
    BlurUtil.validateWritableDirectory(fileSystem, tablePath);
    int reducers = context.getNumReduceTasks();
    int reducerMultiplier = getReducerMultiplier(config);
    int validNumberOfReducers = reducerMultiplier * shardCount;
    if (reducers > 0 && reducers != validNumberOfReducers) {
      throw new IllegalArgumentException("Invalid number of reducers [ " + reducers + " ]."
          + " Number of Reducers should be [ " + validNumberOfReducers + " ].");
    }
  }


  @Override
  public RecordWriter<Text, BlurMutate> getRecordWriter(TaskAttemptContext context) throws IOException,
      InterruptedException {
    int id = context.getTaskAttemptID().getTaskID().getId();
    TaskAttemptID taskAttemptID = context.getTaskAttemptID();
    return new BlurRecordWriter(context.getConfiguration(), id, taskAttemptID.toString() + ".tmp");
  }


  @Override
  public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
    return new BlurOutputCommitter(context.getTaskAttemptID().isMap(), context.getNumReduceTasks());
  }


  public static TableDescriptor getTableDescriptor(Configuration configuration) throws IOException {
    String tableDesStr = configuration.get(BLUR_TABLE_DESCRIPTOR);
    if (tableDesStr == null) {
      return null;
    }
    ByteArrayInputStream inputStream = new ByteArrayInputStream(tableDesStr.getBytes());
    TIOStreamTransport transport = new TIOStreamTransport(inputStream);
    TJSONProtocol protocol = new TJSONProtocol(transport);
    TableDescriptor descriptor = new TableDescriptor();
    try {
      descriptor.read(protocol);
    } catch (TException e) {
      throw new IOException(e);
    }
    transport.close();
    return descriptor;
  }


  /**
   * This will multiple the number of reducers for this job. For example if the
   * table has 256 shards the normal number of reducers is 256. However if the
   * reducer multiplier is set to 4 then the number of reducers will be 1024 and
   * each shard will get 4 new segments instead of the normal 1.
   * 
   * @param job
   *          the job to setup.
   * @param multiple
   *          the multiple to use.
   * @throws IOException
   */
  public static void setReducerMultiplier(Job job, int multiple) throws IOException {
    TableDescriptor tableDescriptor = getTableDescriptor(job.getConfiguration());
    if (tableDescriptor == null) {
      throw new IOException("setTableDescriptor needs to be called first.");
    }
    job.setNumReduceTasks(tableDescriptor.getShardCount() * multiple);
    Configuration configuration = job.getConfiguration();
    configuration.setInt(BLUR_OUTPUT_REDUCER_MULTIPLIER, multiple);
  }


  public static int getReducerMultiplier(Configuration configuration) {
    return configuration.getInt(BLUR_OUTPUT_REDUCER_MULTIPLIER, 1);
  }


  /**
   * Sets the {@link TableDescriptor} for this job.
   * 
   * @param job
   *          the job to setup.
   * @param tableDescriptor
   *          the {@link TableDescriptor}.
   * @throws IOException
   */
  public static void setTableDescriptor(Job job, TableDescriptor tableDescriptor) throws IOException {
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    TIOStreamTransport transport = new TIOStreamTransport(outputStream);
    TJSONProtocol protocol = new TJSONProtocol(transport);
    try {
      tableDescriptor.write(protocol);
    } catch (TException e) {
      throw new IOException(e);
    }
    transport.close();
    Configuration configuration = job.getConfiguration();
    configuration.set(BLUR_TABLE_DESCRIPTOR, new String(outputStream.toByteArray()));
    setOutputPath(job, new Path(tableDescriptor.getTableUri()));
  }


  /**
   * Sets the maximum number of documents that the buffer will hold in memory
   * before overflowing to disk. By default this is 1000 which will probably be
   * very low for most systems.
   * 
   * @param job
   *          the job to setup.
   * @param maxDocumentBufferSize
   *          the maxDocumentBufferSize.
   */
  public static void setMaxDocumentBufferSize(Job job, int maxDocumentBufferSize) {
    setMaxDocumentBufferSize(job.getConfiguration(), maxDocumentBufferSize);
  }


  /**
   * Sets the maximum number of documents that the buffer will hold in memory
   * before overflowing to disk. By default this is 1000 which will probably be
   * very low for most systems.
   * 
   * @param configuration
   *          the configuration to setup.
   * @param maxDocumentBufferSize
   *          the maxDocumentBufferSize.
   */
  public static void setMaxDocumentBufferSize(Configuration configuration, int maxDocumentBufferSize) {
    configuration.setInt(BLUR_OUTPUT_MAX_DOCUMENT_BUFFER_SIZE, maxDocumentBufferSize);
  }


  public static int getMaxDocumentBufferSize(Configuration configuration) {
    return configuration.getInt(BLUR_OUTPUT_MAX_DOCUMENT_BUFFER_SIZE, 1000);
  }


  public static void setOutputPath(Job job, Path path) {
    Configuration configuration = job.getConfiguration();
    configuration.set(BLUR_OUTPUT_PATH, path.toString());
    configuration.set(MAPRED_OUTPUT_COMMITTER_CLASS, BlurOutputCommitter.class.getName());
  }


  public static Path getOutputPath(Configuration configuration) {
    return new Path(configuration.get(BLUR_OUTPUT_PATH));
  }


  /**
   * Enabled by default, this will enable local indexing on the machine where
   * the task is running. Then when the {@link RecordWriter} closes the index is
   * copied to the remote destination in HDFS.
   * 
   * @param job
   *          the job to setup.
   * @param b
   *          the boolean to true enable, false to disable.
   */
  public static void setIndexLocally(Job job, boolean b) {
    setIndexLocally(job.getConfiguration(), b);
  }


  /**
   * Enabled by default, this will enable local indexing on the machine where
   * the task is running. Then when the {@link RecordWriter} closes the index is
   * copied to the remote destination in HDFS.
   * 
   * @param configuration
   *          the configuration to setup.
   * @param b
   *          the boolean to true enable, false to disable.
   */
  public static void setIndexLocally(Configuration configuration, boolean b) {
    configuration.setBoolean(BLUR_OUTPUT_INDEXLOCALLY, b);
  }


  public static boolean isIndexLocally(Configuration configuration) {
    return configuration.getBoolean(BLUR_OUTPUT_INDEXLOCALLY, true);
  }


  /**
   * Enabled by default, this will optimize the index while copying from the
   * local index to the remote destination in HDFS. Used in conjunction with the
   * setIndexLocally.
   * 
   * @param job
   *          the job to setup.
   * @param b
   *          the boolean to true enable, false to disable.
   */
  public static void setOptimizeInFlight(Job job, boolean b) {
    setOptimizeInFlight(job.getConfiguration(), b);
  }


  /**
   * Enabled by default, this will optimize the index while copying from the
   * local index to the remote destination in HDFS. Used in conjunction with the
   * setIndexLocally.
   * 
   * @param job
   *          the job to setup.
   * @param b
   *          the boolean to true enable, false to disable.
   */
  public static void setOptimizeInFlight(Configuration configuration, boolean b) {
    configuration.setBoolean(BLUR_OUTPUT_OPTIMIZEINFLIGHT, b);
  }


  public static boolean isOptimizeInFlight(Configuration configuration) {
    return configuration.getBoolean(BLUR_OUTPUT_OPTIMIZEINFLIGHT, true);
  }


  static class BlurRecordWriter extends RecordWriter<Text, BlurMutate> {


    private static final Log LOG = LogFactory.getLog(BlurRecordWriter.class);


    private final Text _prevKey = new Text();
    private final Map<String, List<Field>> _documents = new TreeMap<String, List<Field>>();
    private final IndexWriter _writer;
    private final FieldManager _fieldManager;
    private final Directory _finalDir;
    private final Directory _localDir;
    private final File _localPath;
    private final int _maxDocumentBufferSize;
    private final IndexWriterConfig _conf;
    private final IndexWriterConfig _overFlowConf;
    private final Path _newIndex;
    private final boolean _indexLocally;
    private final boolean _optimizeInFlight;
    private Counter _columnCount = emptyCounter();
    private Counter _fieldCount = emptyCounter();
    private Counter _recordCount = emptyCounter();
    private Counter _rowCount = emptyCounter();
    private Counter _recordDuplicateCount = emptyCounter();
    private Counter _rowOverFlowCount = emptyCounter();
    private Counter _rowDeleteCount = emptyCounter();
    private RateCounter _recordRateCounter = new RateCounter(emptyCounter());
    private RateCounter _rowRateCounter = new RateCounter(emptyCounter());
    private RateCounter _copyRateCounter = new RateCounter(emptyCounter());
    private boolean _countersSetup = false;
    private IndexWriter _localTmpWriter;
    private boolean _usingLocalTmpindex;
    private File _localTmpPath;
    private ProgressableDirectory _localTmpDir;
    private String _deletedRowId;


    public BlurRecordWriter(Configuration configuration, int attemptId, String tmpDirName) throws IOException {


      _indexLocally = BlurOutputFormat.isIndexLocally(configuration);
      _optimizeInFlight = BlurOutputFormat.isOptimizeInFlight(configuration);


      TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(configuration);
      int shardCount = tableDescriptor.getShardCount();
      int shardId = attemptId % shardCount;


      _maxDocumentBufferSize = BlurOutputFormat.getMaxDocumentBufferSize(configuration);
      Path tableOutput = BlurOutputFormat.getOutputPath(configuration);
      String shardName = BlurUtil.getShardName(BlurConstants.SHARD_PREFIX, shardId);
      Path indexPath = new Path(tableOutput, shardName);
      _newIndex = new Path(indexPath, tmpDirName);
      _finalDir = new ProgressableDirectory(new HdfsDirectory(configuration, _newIndex),
          BlurOutputFormat.getProgressable());
      _finalDir.setLockFactory(NoLockFactory.getNoLockFactory());


      TableContext tableContext = TableContext.create(tableDescriptor);
      _fieldManager = tableContext.getFieldManager();
      Analyzer analyzer = _fieldManager.getAnalyzerForIndex();


      _conf = new IndexWriterConfig(LuceneVersionConstant.LUCENE_VERSION, analyzer);
      TieredMergePolicy mergePolicy = (TieredMergePolicy) _conf.getMergePolicy();
      mergePolicy.setUseCompoundFile(false);


      _overFlowConf = new IndexWriterConfig(LuceneVersionConstant.LUCENE_VERSION, analyzer);
      _overFlowConf.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);


      if (_indexLocally) {
        String localDirPath = System.getProperty(JAVA_IO_TMPDIR);
        _localPath = new File(localDirPath, UUID.randomUUID().toString() + ".tmp");
        _localDir = new ProgressableDirectory(FSDirectory.open(_localPath), BlurOutputFormat.getProgressable());
        _writer = new IndexWriter(_localDir, _conf.clone());
      } else {
        _localPath = null;
        _localDir = null;
        _writer = new IndexWriter(_finalDir, _conf.clone());
      }
    }


    private Counter emptyCounter() {
      return new Counter() {
      };
    }


    @Override
    public void write(Text key, BlurMutate value) throws IOException, InterruptedException {
      if (!_countersSetup) {
        setupCounter();
        _countersSetup = true;
      }
      if (!_prevKey.equals(key)) {
        flush();
        _prevKey.set(key);
      }
      add(value);
    }


    private void setupCounter() {
      GetCounter getCounter = BlurOutputFormat.getGetCounter();
      _fieldCount = getCounter.getCounter(BlurCounters.LUCENE_FIELD_COUNT);
      _columnCount = getCounter.getCounter(BlurCounters.COLUMN_COUNT);
      _recordCount = getCounter.getCounter(BlurCounters.RECORD_COUNT);
      _recordDuplicateCount = getCounter.getCounter(BlurCounters.RECORD_DUPLICATE_COUNT);
      _rowCount = getCounter.getCounter(BlurCounters.ROW_COUNT);
      _rowDeleteCount = getCounter.getCounter(BlurCounters.ROW_DELETE_COUNT);
      _rowOverFlowCount = getCounter.getCounter(BlurCounters.ROW_OVERFLOW_COUNT);
      _recordRateCounter = new RateCounter(getCounter.getCounter(BlurCounters.RECORD_RATE));
      _rowRateCounter = new RateCounter(getCounter.getCounter(BlurCounters.ROW_RATE));
      _copyRateCounter = new RateCounter(getCounter.getCounter(BlurCounters.COPY_RATE));
    }


    private void add(BlurMutate value) throws IOException {
      BlurRecord blurRecord = value.getRecord();
      Record record = getRecord(blurRecord);
      String recordId = record.getRecordId();
      if (value.getMutateType() == MUTATE_TYPE.DELETE) {
        _deletedRowId = blurRecord.getRowId();
        return;
      }
      _columnCount.increment(record.getColumns().size());
      List<Field> document = TransactionRecorder.getDoc(_fieldManager, blurRecord.getRowId(), record);
      List<Field> dup = _documents.put(recordId, document);
      if (dup != null) {
        _recordDuplicateCount.increment(1);
      } else {
        _fieldCount.increment(document.size());
        _recordCount.increment(1);
      }
      flushToTmpIndexIfNeeded();
    }


    private void flushToTmpIndexIfNeeded() throws IOException {
      if (_documents.size() > _maxDocumentBufferSize) {
        flushToTmpIndex();
      }
    }


    private void flushToTmpIndex() throws IOException {
      if (_documents.isEmpty()) {
        return;
      }
      _usingLocalTmpindex = true;
      if (_localTmpWriter == null) {
        String localDirPath = System.getProperty(JAVA_IO_TMPDIR);
        _localTmpPath = new File(localDirPath, UUID.randomUUID().toString() + ".tmp");
        _localTmpDir = new ProgressableDirectory(FSDirectory.open(_localTmpPath), BlurOutputFormat.getProgressable());
        _localTmpWriter = new IndexWriter(_localTmpDir, _overFlowConf.clone());
        // The local tmp writer has merging disabled so the first document in is
        // going to be doc 0.
        // Therefore the first document added is the prime doc
        List<List<Field>> docs = new ArrayList<List<Field>>(_documents.values());
        docs.get(0).add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE, Store.NO));
        _localTmpWriter.addDocuments(docs);
      } else {
        _localTmpWriter.addDocuments(_documents.values());
      }
      _documents.clear();
    }


    private void resetLocalTmp() {
      _usingLocalTmpindex = false;
      _localTmpWriter = null;
      _localTmpDir = null;
      rm(_localTmpPath);
      _localTmpPath = null;
    }


    private Record getRecord(BlurRecord value) {
      Record record = new Record();
      record.setRecordId(value.getRecordId());
      record.setFamily(value.getFamily());
      for (BlurColumn col : value.getColumns()) {
        record.addToColumns(new Column(col.getName(), col.getValue()));
      }
      return record;
    }


    private void flush() throws CorruptIndexException, IOException {
      if (_usingLocalTmpindex) {
        // since we have flushed to disk then we do not need to index the
        // delete.
        flushToTmpIndex();
        _localTmpWriter.close(false);
        DirectoryReader reader = DirectoryReader.open(_localTmpDir);
        _recordRateCounter.mark(reader.numDocs());
        _writer.addIndexes(reader);
        reader.close();
        resetLocalTmp();
        _rowOverFlowCount.increment(1);
      } else {
        if (_documents.isEmpty()) {
          if (_deletedRowId != null) {
            _writer.addDocument(getDeleteDoc());
            _rowDeleteCount.increment(1);
          }
        } else {
          List<List<Field>> docs = new ArrayList<List<Field>>(_documents.values());
          docs.get(0).add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE, Store.NO));
          _writer.addDocuments(docs);
          _recordRateCounter.mark(_documents.size());
          _documents.clear();
        }
      }
      _deletedRowId = null;
      _rowRateCounter.mark();
      _rowCount.increment(1);
    }


    private Document getDeleteDoc() {
      Document document = new Document();
      document.add(new StringField(BlurConstants.ROW_ID, _deletedRowId, Store.NO));
      document.add(new StringField(BlurConstants.DELETE_MARKER, BlurConstants.DELETE_MARKER_VALUE, Store.NO));
      return document;
    }


    @Override
    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
      flush();
      _writer.close();
      _recordRateCounter.close();
      _rowRateCounter.close();
      if (_indexLocally) {
        if (_optimizeInFlight) {
          copyAndOptimizeInFlightDir();
        } else {
          copyDir();
        }
      }
      _copyRateCounter.close();
    }


    private void copyAndOptimizeInFlightDir() throws IOException {
      CopyRateDirectory copyRateDirectory = new CopyRateDirectory(_finalDir, _copyRateCounter);
      copyRateDirectory.setLockFactory(NoLockFactory.getNoLockFactory());
      DirectoryReader reader = DirectoryReader.open(_localDir);
      IndexWriter writer = new IndexWriter(copyRateDirectory, _conf.clone());
      writer.addIndexes(reader);
      writer.close();
      rm(_localPath);
    }


    private void copyDir() throws IOException {
      CopyRateDirectory copyRateDirectory = new CopyRateDirectory(_finalDir, _copyRateCounter);
      String[] fileNames = _localDir.listAll();
      for (String fileName : fileNames) {
        LOG.info("Copying [{0}] to [{1}]", fileName, _newIndex);
        _localDir.copy(copyRateDirectory, fileName, fileName, IOContext.DEFAULT);
      }
      rm(_localPath);
    }


    private void rm(File file) {
      if (!file.exists()) {
        return;
      }
      if (file.isDirectory()) {
        for (File f : file.listFiles()) {
          rm(f);
        }
      }
      file.delete();
    }
  }


  /**
   * Sets up the output portion of the map reduce job. This does effect the map
   * side of the job, of a map and reduce job.
   * 
   * @param job
   *          the job to setup.
   * @param tableDescriptor
   *          the table descriptor to write the output of the indexing job.
   * @throws IOException
   */
  public static void setupJob(Job job, TableDescriptor tableDescriptor) throws IOException {
    job.setReducerClass(DefaultBlurReducer.class);
    job.setNumReduceTasks(tableDescriptor.getShardCount());
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BlurMutate.class);
    job.setOutputFormatClass(BlurOutputFormat.class);
    setTableDescriptor(job, tableDescriptor);
    BlurMapReduceUtil.addDependencyJars(job);
    BlurMapReduceUtil.addAllJarsInBlurLib(job.getConfiguration());
  }


}
Source Code of org.apache.blur.mapreduce.lib.BlurOutputFormat$BlurRecordWriter

Related Classes of org.apache.blur.mapreduce.lib.BlurOutputFormat$BlurRecordWriter