Package mrdp.ch7

Source Code of mrdp.ch7.RedisOutputDriver

package mrdp.ch7;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import mrdp.utils.MRDPUtils;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.log4j.Logger;

import redis.clients.jedis.Jedis;

public class RedisOutputDriver {

  public static class RedisOutputMapper extends
      Mapper<Object, Text, Text, Text> {

    private Text outkey = new Text();
    private Text outvalue = new Text();

    @Override
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {

      Map<String, String> parsed = MRDPUtils.transformXmlToMap(value
          .toString());

      String userId = parsed.get("Id");
      String reputation = parsed.get("Reputation");

      if (userId == null || reputation == null) {
        return;
      }

      // Set our output key and values
      outkey.set(userId);
      outvalue.set(reputation);

      context.write(outkey, outvalue);
    }
  }

  public static class RedisHashOutputFormat extends OutputFormat<Text, Text> {

    public static final String REDIS_HOSTS_CONF = "mapred.redishashoutputformat.hosts";
    public static final String REDIS_HASH_KEY_CONF = "mapred.redishashinputformat.key";

    /**
     * Sets the CSV string of Redis hosts.
     *
     * @param job
     *            The job conf
     * @param hosts
     *            The CSV string of Redis hosts
     */
    public static void setRedisHosts(Job job, String hosts) {
      job.getConfiguration().set(REDIS_HOSTS_CONF, hosts);
    }

    /**
     * Sets the key of the hash to write to.
     *
     * @param job
     *            The job conf
     * @param hashKey
     *            The name of the hash key
     */
    public static void setRedisHashKey(Job job, String hashKey) {
      job.getConfiguration().set(REDIS_HASH_KEY_CONF, hashKey);
    }

    @Override
    public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
      return new RedisHashRecordWriter(job.getConfiguration().get(
          REDIS_HASH_KEY_CONF), job.getConfiguration().get(
          REDIS_HOSTS_CONF));
    }

    @Override
    public void checkOutputSpecs(JobContext job)
        throws IOException {
      String hosts = job.getConfiguration().get(REDIS_HOSTS_CONF);

      if (hosts == null || hosts.isEmpty()) {
        throw new IOException(REDIS_HOSTS_CONF
            + " is not set in configuration.");
      }

      String hashKey = job.getConfiguration().get(REDIS_HASH_KEY_CONF);

      if (hashKey == null || hashKey.isEmpty()) {
        throw new IOException(REDIS_HASH_KEY_CONF
            + " is not set in configuration.");
      }
    }

    @Override
    public OutputCommitter getOutputCommitter(TaskAttemptContext context)
        throws IOException, InterruptedException {
      return (new NullOutputFormat<Text, Text>())
          .getOutputCommitter(context);
    }

    public static class RedisHashRecordWriter extends
        RecordWriter<Text, Text> {

      private static final Logger LOG = Logger
          .getLogger(RedisHashRecordWriter.class);
      private HashMap<Integer, Jedis> jedisMap = new HashMap<Integer, Jedis>();
      private String hashKey = null;

      public RedisHashRecordWriter(String hashKey, String hosts) {
        LOG.info("Connecting to " + hosts + " and writing to "
            + hashKey);
        this.hashKey = hashKey;
        // Create a connection to Redis for each host
        // Map an integer 0-(numRedisInstances - 1) to the instance
        int i = 0;
        for (String host : hosts.split(",")) {
          Jedis jedis = new Jedis(host);
          jedis.connect();
          jedisMap.put(i, jedis);
          ++i;
        }
      }

      @Override
      public void write(Text key, Text value) throws IOException,
          InterruptedException {
        // Get the Jedis instance that this key/value pair will be
        // written to
        Jedis j = jedisMap.get(Math.abs(key.hashCode())
            % jedisMap.size());

        // Write the key/value pair
        j.hset(hashKey, key.toString(), value.toString());
      }

      @Override
      public void close(TaskAttemptContext context) throws IOException,
          InterruptedException {
        // For each jedis instance, disconnect it
        for (Jedis jedis : jedisMap.values()) {
          jedis.disconnect();
        }
      }
    }
  }

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args)
        .getRemainingArgs();

    if (otherArgs.length != 3) {
      System.err
          .println("Usage: RedisOutput <user data> <redis hosts> <hash name>");
      System.exit(1);
    }

    Path inputPath = new Path(otherArgs[0]);
    String hosts = otherArgs[1];
    String hashName = otherArgs[2];

    Job job = new Job(conf, "Redis Output");
    job.setJarByClass(RedisOutputDriver.class);

    job.setMapperClass(RedisOutputMapper.class);
    job.setNumReduceTasks(0);

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, inputPath);

    job.setOutputFormatClass(RedisHashOutputFormat.class);
    RedisHashOutputFormat.setRedisHosts(job, hosts);
    RedisHashOutputFormat.setRedisHashKey(job, hashName);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    int code = job.waitForCompletion(true) ? 0 : 2;

    System.exit(code);
  }
}
TOP

Related Classes of mrdp.ch7.RedisOutputDriver

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.