Package org.apache.gora.goraci

Source Code of org.apache.gora.goraci.Generator$GeneratorMapper

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.gora.goraci;

import org.apache.gora.goraci.generated.CINode;
import org.apache.gora.goraci.generated.Flushed;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.UUID;

import org.apache.avro.util.Utf8;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.gora.store.DataStore;
import org.apache.gora.store.DataStoreFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
* A Map only job that generates random linked list and stores them using Gora.
*/
public class Generator extends Configured implements Tool {
 
  private static final Log LOG = LogFactory.getLog(Generator.class);
 
  static final int WIDTH = 1000000;
  static final int WRAP = WIDTH * 25;

  static class GeneratorInputFormat extends InputFormat<LongWritable,NullWritable> {
   
    static class GeneratorInputSplit extends InputSplit implements Writable {
     
      @Override
      public long getLength() throws IOException, InterruptedException {
        return 1;
      }
     
      @Override
      public String[] getLocations() throws IOException, InterruptedException {
        return new String[0];
      }
     
      @Override
      public void readFields(DataInput arg0) throws IOException {
        // TODO Auto-generated method stub
       
      }
     
      @Override
      public void write(DataOutput arg0) throws IOException {
        // TODO Auto-generated method stub
       
      }
   }
   
    static class GeneratorRecordReader extends RecordReader<LongWritable,NullWritable> {
     
      private long numNodes;
      private boolean hasNext = true;
     
      @Override
      public void close() throws IOException {
       
      }
     
      @Override
      public LongWritable getCurrentKey() throws IOException, InterruptedException {
        return new LongWritable(numNodes);
      }
     
      @Override
      public NullWritable getCurrentValue() throws IOException, InterruptedException {
        return NullWritable.get();
      }
     
      @Override
      public float getProgress() throws IOException, InterruptedException {
        return 0;
      }
     
      @Override
      public void initialize(InputSplit arg0, TaskAttemptContext context) throws IOException, InterruptedException {
        numNodes = context.getConfiguration().getLong("org.apache.gora.goraci.generator.nodes", 1000000);
      }
     
      @Override
      public boolean nextKeyValue() throws IOException, InterruptedException {
        boolean hasnext = this.hasNext;
        this.hasNext = false;
        return hasnext;
      }
     
    }
   
    @Override
    public RecordReader<LongWritable,NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
      GeneratorRecordReader rr = new GeneratorRecordReader();
      rr.initialize(split, context);
      return rr;
    }
   
    @Override
    public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
      int numMappers = job.getConfiguration().getInt("org.apache.gora.goraci.generator.mappers", 1);
     
      ArrayList<InputSplit> splits = new ArrayList<InputSplit>(numMappers);
     
      for (int i = 0; i < numMappers; i++) {
        splits.add(new GeneratorInputSplit());
      }
     
      return splits;
    }
   
  }

  /**
   * Some ASCII art time:
   * [ . . . ] represents one batch of random longs of length WIDTH
   *
   *                _________________________
   *               |                  ______ |
   *               |                 |      ||
   *             __+_________________+_____ ||
   *             v v                 v     |||
   * first   = [ . . . . . . . . . . . ]   |||
   *             ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^     |||
   *             | | | | | | | | | | |     |||
   * prev    = [ . . . . . . . . . . . ]   |||
   *             ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^     |||
   *             | | | | | | | | | | |     |||
   * current = [ . . . . . . . . . . . ]   |||
   *                                       |||
   * ...                                   |||
   *                                       |||
   * last    = [ . . . . . . . . . . . ]   |||
   *             | | | | | | | | | | |-----|||
   *             |                 |--------||
   *             |___________________________|
   */

  static class GeneratorMapper extends Mapper<LongWritable,NullWritable,NullWritable,NullWritable> {
   
    private boolean concurrent;

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);
      concurrent = context.getConfiguration().getBoolean("org.apache.gora.goraci.generator.concurrent", false);
    }
   
    @Override
    protected void map(LongWritable key, NullWritable value, Context output) throws IOException {
      long num = key.get();
      System.out.println("num" + num);
     
      Utf8 id = new Utf8(UUID.randomUUID().toString());
     
      Configuration conf = new Configuration();
      DataStore<Long,CINode> store = DataStoreFactory.getDataStore(Long.class, CINode.class, conf);
      DataStore<Utf8,Flushed> flushedTable = null;
     
      if (concurrent) {
        flushedTable = DataStoreFactory.getDataStore(Utf8.class, Flushed.class, conf);
        flushedTable.createSchema();
      }
     
      store.createSchema();
     
      Random rand = new Random();
     
      long[] first = null;
      long[] prev = null;
      long[] current = new long[WIDTH];
     
      long count = 0;
      while (count < num) {
        for (int i = 0; i < current.length; i++)
          current[i] = Math.abs(rand.nextLong());
       
        persist(output, store, count, prev, current, id);
       
        if (first == null)
          first = current;
        prev = current;
        current = new long[WIDTH];
       
        count += current.length;
        output.setStatus("Count " + count);
       
        if (count % WRAP == 0) {
          // this block of code turns the 1 million linked list of length 25 into one giant circular linked list of 25 million
         
          circularLeftShift(first);
         
          updatePrev(store, first, prev);
         
          if (concurrent) {
          // keep track of whats flushed in another table, verify can use this info to run concurrently
            Flushed flushed = flushedTable.newPersistent();
            flushed.setCount(count);
            flushedTable.put(id, flushed);
            flushedTable.flush();
          }

          first = null;
          prev = null;
        }
       
      }
     
      store.close();
      if (concurrent)
        flushedTable.close();
     
    }
   
    private static void circularLeftShift(long[] first) {
      long ez = first[0];
      for (int i = 0; i < first.length - 1; i++)
        first[i] = first[i + 1];
      first[first.length - 1] = ez;
    }
   
    private static void persist(Context output, DataStore<Long,CINode> store, long count, long[] prev, long[] current, Utf8 id) throws IOException {
      for (int i = 0; i < current.length; i++) {
        CINode node = store.newPersistent();
        node.setCount(count + i);
        if (prev != null)
          node.setPrev(prev[i]);
        else
          node.setPrev((long) -1);
        node.setClient(id);
       
        store.put(current[i], node);
        if (i % 1000 == 0) {
          // Tickle progress every so often else maprunner will think us hung
          output.progress();
        }
      }
     
      store.flush();
    }
   
    private static void updatePrev(DataStore<Long,CINode> store, long[] first, long[] current) throws IOException {
      for (int i = 0; i < current.length; i++) {
        CINode node = store.newPersistent();
        node.setPrev(current[i]);
        store.put(first[i], node);
      }
     
      store.flush();
    }
  }
 
 
  @Override
  public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption("c", "concurrent", false, "update secondary table with information that allows verification to run concurrently");
   
    GnuParser parser = new GnuParser();
    CommandLine cmd = null;
    try {
      cmd = parser.parse(options, args);
      if (cmd.getArgs().length != 2) {
        throw new ParseException("Did not see expected # of arguments, saw " + cmd.getArgs().length);
      }
    } catch (ParseException e) {
      System.err.println("Failed to parse command line " + e.getMessage());
      System.err.println();
      HelpFormatter formatter = new HelpFormatter();
      formatter.printHelp(getClass().getSimpleName() + " <num mappers> <num nodes per map>", options);
      System.exit(-1);
    }

    int numMappers = Integer.parseInt(cmd.getArgs()[0]);
    long numNodes = Long.parseLong(cmd.getArgs()[1]);
    return run(numMappers, numNodes, cmd.hasOption("c"));
  }

  public int run(int numMappers, long numNodes, boolean concurrent) throws Exception {
    LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes);
   
    Job job = new Job(getConf());
   
    job.setJobName("Link Generator");
    job.setNumReduceTasks(0);
    job.setJarByClass(getClass());
   
    job.setInputFormatClass(GeneratorInputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
   
    job.getConfiguration().setInt("org.apache.gora.goraci.generator.mappers", numMappers);
    job.getConfiguration().setLong("org.apache.gora.goraci.generator.nodes", numNodes);
    job.getConfiguration().setBoolean("org.apache.gora.goraci.generator.concurrent", concurrent);
   
    job.setMapperClass(GeneratorMapper.class);
   
    job.setOutputFormatClass(NullOutputFormat.class);

    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);

    boolean success = job.waitForCompletion(true);
   
    return success ? 0 : 1;
  }
 
  public static void main(String[] args) throws Exception {
    int ret = ToolRunner.run(new Generator(), args);
    System.exit(ret);
  }
}
TOP

Related Classes of org.apache.gora.goraci.Generator$GeneratorMapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.