Package com.amazonaws.services.elasticmapreduce.util

Source Code of com.amazonaws.services.elasticmapreduce.util.StreamingStep

/*
* Copyright 2010-2012 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
*  http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.amazonaws.services.elasticmapreduce.util;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.amazonaws.services.elasticmapreduce.model.HadoopJarStepConfig;

/**
* Class that makes it easy to define Hadoop Streaming steps.
* <p>
* See also: <a href="http://hadoop.apache.org/common/docs/r0.20.2/streaming.html">Hadoop Streaming</a>
*
* <pre>
* AWSCredentials credentials = new BasicAWSCredentials(accessKey, secretKey);
* AmazonElasticMapReduce emr = new AmazonElasticMapReduceClient(credentials);
*
* HadoopJarStepConfig config = new StreamingStep()
*     .withInputs("s3://elasticmapreduce/samples/wordcount/input")
*     .withOutput("s3://my-bucket/output/")
*     .withMapper("s3://elasticmapreduce/samples/wordcount/wordSplitter.py")
*     .withReducer("aggregate")
*     .toHadoopJarStepConfig();
*
* StepConfig wordCount = new StepConfig()
*     .withName("Word Count")
*     .withActionOnFailure("TERMINATE_JOB_FLOW")
*     .withHadoopJarStep(config);
*
* RunJobFlowRequest request = new RunJobFlowRequest()
*     .withName("Word Count")
*     .withSteps(wordCount)
*     .withLogUri("s3://log-bucket/")
*     .withInstances(new JobFlowInstancesConfig()
*         .withEc2KeyName("keypairt")
*         .withHadoopVersion("0.20")
*         .withInstanceCount(5)
*         .withKeepJobFlowAliveWhenNoSteps(true)
*         .withMasterInstanceType("m1.small")
*         .withSlaveInstanceType("m1.small"));
*
* RunJobFlowResult result = emr.runJobFlow(request);
* </pre>
*/
public class StreamingStep {
  private List<String> inputs = new ArrayList<String>();
  private String output;
  private String mapper;
  private String reducer;
  private Map<String, String> hadoopConfig = new HashMap<String, String>();


  /**
   * Creates a new default StreamingStep.
   */
  public StreamingStep() {
  }


  /**
   * Get list of step input paths.
   * @return List of step inputs
   */
  public List<String> getInputs() {
    return inputs;
  }

  /**
   * Set the list of step input paths.
   * @param inputs List of step inputs.
   */
  public void setInputs(Collection<String> inputs) {
    List<String> newInputs = new ArrayList<String>();
    if (inputs != null) {
      newInputs.addAll(inputs);
    }
    this.inputs = newInputs;
  }

  /**
   * Add more input paths to this step.
   * @param inputs A list of inputs to this step.
   * @return A reference to this updated object so that method calls can be chained
   *         together.
   */
  public StreamingStep withInputs(String ... inputs) {
    for (String input : inputs) {
      this.inputs.add(input);
    }
    return this;
  }


  /**
   * Get output path.
   * @return Output path.
   */
  public String getOutput() {
    return output;
  }

  /**
   * Set the output path for this step.
   * @param output Output path.
   */
  public void setOutput(String output) {
    this.output = output;
  }

  /**
   * Set the output path for this step.
   * @param output Output path
   * @return A reference to this updated object so that method calls can be chained
   *         together.
   */
  public StreamingStep withOutput(String output) {
    this.output = output;
    return this;
  }


  /**
   * Get the mapper.
   * @return Mapper.
   */
  public String getMapper() {
    return mapper;
  }

  /**
   * Set the mapper.
   * @param mapper Mapper
   */
  public void setMapper(String mapper) {
    this.mapper = mapper;
  }

  /**
   * Set the mapper
   * @param mapper Mapper
   * @return A reference to this updated object so that method calls can be chained
   *         together.
   */
  public StreamingStep withMapper(String mapper) {
    this.mapper = mapper;
    return this;
  }


  /**
   * Get the reducer
   * @return Reducer
   */
  public String getReducer() {
    return reducer;
  }

  /**
   * Set the reducer
   * @param reducer Reducer
   */
  public void setReducer(String reducer) {
    this.reducer = reducer;
  }

  /**
   * Set the reducer
   * @param reducer Reducer
   * @return A reference to this updated object so that method calls can be chained
   *         together.
   */
  public StreamingStep withReducer(String reducer) {
    this.reducer = reducer;
    return this;
  }


  /**
   * Get the Hadoop config overrides (-D values).
   * @return Hadoop config.
   */
  public Map<String, String> getHadoopConfig() {
    return hadoopConfig;
  }

  /**
   * Set the Hadoop config overrides (-D values).
   * @param hadoopConfig Hadoop config.
   */
  public void setHadoopConfig(Map<String, String> hadoopConfig) {
    this.hadoopConfig = hadoopConfig;
  }

  /**
   * Add a Hadoop config override (-D value).
   * @param key Hadoop configuration key.
   * @param value Configuration value.
   * @return A reference to this updated object so that method calls can be chained
   *         together.
   */
  public StreamingStep withHadoopConfig(String key, String value) {
    hadoopConfig.put(key, value);
    return this;
  }


  /**
   * Creates the final HadoopJarStepConfig once you are done configuring the step. You can use
   * this as you would any other HadoopJarStepConfig.
   * @return HadoopJarStepConfig representing this streaming step.
   */
  public HadoopJarStepConfig toHadoopJarStepConfig() {
    List<String> args = new ArrayList<String>();

    if (reducer == null) {
      hadoopConfig.put("mapred.reduce.tasks", "0");
    }

    for (Map.Entry<String, String> entry : hadoopConfig.entrySet()) {
      args.add("-D");
      args.add(entry.getKey() + "=" + entry.getValue());
    }

    for (String input : inputs) {
      args.add("-input");
      args.add(input);
    }

    if (output != null) {
      args.add("-output");
      args.add(output);
    }

    if (mapper != null) {
      args.add("-mapper");
      args.add(mapper);
    }

    if (reducer != null) {
      args.add("-reducer");
      args.add(reducer);
    }

    return new HadoopJarStepConfig()
      .withJar("/home/hadoop/contrib/streaming/hadoop-streaming.jar")
      .withArgs(args);
  }
}
TOP

Related Classes of com.amazonaws.services.elasticmapreduce.util.StreamingStep

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.