Source Code of com.splout.db.hadoop.engine.SploutSQLProxyOutputFormat

package com.splout.db.hadoop.engine;


/*
 * #%L
 * Splout SQL Hadoop library
 * %%
 * Copyright (C) 2012 - 2014 Datasalt Systems S.L.
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */


import java.io.IOException;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


import com.datasalt.pangool.io.ITuple;
import com.splout.db.common.HeartBeater;


/**
 * The actual outputformat that is used in Splout SQL database generation. It receives a {@link SploutSQLOutputFormat}
 * by constructor. This outputformat performs the common tasks: heart beating, asking for a temporary folder to write data
 * with the Hadoop API, completing the output, etc. 
 */
@SuppressWarnings("serial")
public class SploutSQLProxyOutputFormat extends FileOutputFormat<ITuple, NullWritable> implements Serializable {


  private SploutSQLOutputFormat outputFormat;
  
  public SploutSQLProxyOutputFormat(SploutSQLOutputFormat outputFormat) {
    this.outputFormat = outputFormat;
  }
  
  private static AtomicLong FILE_SEQUENCE = new AtomicLong(0);
  private HeartBeater heartBeater;
  private Configuration conf;
  TaskAttemptContext context;
  
  @Override
  public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException,
      InterruptedException {


    long waitTimeHeartBeater = context.getConfiguration().getLong(HeartBeater.WAIT_TIME_CONF, 5000);
    heartBeater = new HeartBeater(context, waitTimeHeartBeater);
    heartBeater.needHeartBeat();
    conf = context.getConfiguration();
    this.context = context;
    
    outputFormat.setConf(context.getConfiguration());
    
    return new RecordWriter<ITuple, NullWritable>() {


      // Temporary and permanent Paths for properly writing Hadoop output files
      private Map<Integer, Path> permPool = new HashMap<Integer, Path>();
      private Map<Integer, Path> tempPool = new HashMap<Integer, Path>();


      private void initSql(int partition) throws IOException, InterruptedException {
        // HDFS final location of the generated partition file. It will be
        // loaded to the temporary folder in the HDFS than finally will be
        // committed by the OutputCommitter to the proper location.
        FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(SploutSQLProxyOutputFormat.this.context);
        Path perm = new Path(committer.getWorkPath(), partition + ".db");
        FileSystem fs = perm.getFileSystem(conf);


        // Make a task unique name that contains the actual index output name to
        // make debugging simpler
        // Note: if using JVM reuse, the sequence number will not be reset for a
        // new task using the jvm
        Path temp = conf.getLocalPath("mapred.local.dir", "splout_task_" + SploutSQLProxyOutputFormat.this.context.getTaskAttemptID()
            + '.' + FILE_SEQUENCE.incrementAndGet());


        FileSystem localFileSystem = FileSystem.getLocal(conf);
        if(localFileSystem.exists(temp)) {
          localFileSystem.delete(temp, true);
        }
        localFileSystem.mkdirs(temp);


        Path local = fs.startLocalOutput(perm, new Path(temp, partition + ".db"));
        
        //
        permPool.put(partition, perm);
        tempPool.put(partition, new Path(temp, partition + ".db"));
        
        outputFormat.initPartition(partition, local);
      }
      
      @Override
      public void close(TaskAttemptContext ctx) throws IOException, InterruptedException {
        FileSystem fs = FileSystem.get(ctx.getConfiguration());
        try {
          if(ctx != null) {
            heartBeater.setProgress(ctx);
          }
          outputFormat.close();
          for(Map.Entry<Integer, Path> entry: permPool.entrySet()) {
            // Hadoop - completeLocalOutput()
            fs.completeLocalOutput(entry.getValue(), tempPool.get(entry.getKey()));
          }
        } finally { // in any case, destroy the HeartBeater
          heartBeater.cancelHeartBeat();
        }
      }


      @Override
      public void write(ITuple tuple, NullWritable ignore) throws IOException, InterruptedException {
        int partition = (Integer) tuple.get(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD);
        if(tempPool.get(partition) == null) {
          initSql(partition);
        }
        outputFormat.write(tuple);
      }
      
    };
  }


}
Source Code of com.splout.db.hadoop.engine.SploutSQLProxyOutputFormat

Related Classes of com.splout.db.hadoop.engine.SploutSQLProxyOutputFormat