Package com.google.appengine.tools.mapreduce

Source Code of com.google.appengine.tools.mapreduce.MapReduceSettings$Builder

// Copyright 2011 Google Inc. All Rights Reserved.

package com.google.appengine.tools.mapreduce;

import com.google.appengine.api.appidentity.AppIdentityServiceFactory;
import com.google.appengine.api.appidentity.AppIdentityServiceFailureException;
import com.google.appengine.tools.cloudstorage.GcsFileOptions;
import com.google.appengine.tools.cloudstorage.GcsFilename;
import com.google.appengine.tools.cloudstorage.GcsService;
import com.google.appengine.tools.cloudstorage.GcsServiceFactory;
import com.google.appengine.tools.cloudstorage.RetryParams;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.UUID;
import java.util.logging.Level;
import java.util.logging.Logger;


/**
* Settings that affect how a MapReduce is executed. May affect performance and resource usage, but
* should not affect the result (unless the result is dependent on the performance or resource usage
* of the computation, or if different backends, modules or different base urls have different
* versions of the code).
*
* @author ohler@google.com (Christian Ohler)
*/
public class MapReduceSettings extends MapSettings {

  private static final long serialVersionUID = 610088354289299175L;
  private static final Logger log = Logger.getLogger(MapReduceSettings.class.getName());
  public static final int DEFAULT_MAP_FANOUT = 32;
  public static final int DEFAULT_SORT_BATCH_PER_EMIT_BYTES = 32 * 1024;
  public static final int DEFAULT_SORT_READ_TIME_MILLIS = 180000;
  public static final int DEFAULT_MERGE_FANIN = 32;


  private final String bucketName;
  private final int mapFanout;
  private final Long maxSortMemory;
  private final int sortReadTimeMillis;
  private final int sortBatchPerEmitBytes;
  private final int mergeFanin;

  public static class Builder extends BaseBuilder<Builder> {

    private String bucketName;
    private int mapFanout = DEFAULT_MAP_FANOUT;
    private Long maxSortMemory;
    private int sortReadTimeMillis = DEFAULT_SORT_READ_TIME_MILLIS;
    private int sortBatchPerEmitBytes = DEFAULT_SORT_BATCH_PER_EMIT_BYTES;
    private int mergeFanin = DEFAULT_MERGE_FANIN;

    public Builder() {}

    public Builder(MapReduceSettings settings) {
      super(settings);
      this.bucketName = settings.bucketName;
      this.mapFanout = settings.mapFanout;
      this.maxSortMemory = settings.maxSortMemory;
      this.sortReadTimeMillis = settings.sortReadTimeMillis;
      this.sortBatchPerEmitBytes = settings.sortBatchPerEmitBytes;
      this.mergeFanin = settings.mergeFanin;
    }

    public Builder(MapSettings settings) {
      super(settings);
    }

    @Override
    protected Builder self() {
      return this;
    }

    /**
     * Sets the GCS bucket that will be used for temporary files. If this is not set or {@code null}
     * the app's default bucket will be used.
     */
    public Builder setBucketName(String bucketName) {
      this.bucketName = bucketName;
      return this;
    }
   
    /**
     * The maximum number of files the map stage will write to at the same time. A higher number may
     * increase the speed of the job at the expense of more memory used during the map and sort
     * phases and more intermediate files created.
     *
     * Using the default is recommended.
     */
    public Builder setMapFanout(int mapFanout) {
      Preconditions.checkArgument(mapFanout > 0);
      this.mapFanout = mapFanout;
      return this;
    }
   
    /**
     * The maximum memory the sort stage should allocate (in bytes). This is used to lower the
     * amount of memory it will use. Regardless of this setting it will not exhaust available
     * memory. Null or unset will use the default (no maximum)
     *
     * Using the default is recommended.
     */
    public Builder setMaxSortMemory(Long maxMemory) {
      Preconditions.checkArgument(maxMemory == null || maxMemory >= 0);
      this.maxSortMemory = maxMemory;
      return this;
    }
   
    /**
     * The maximum length of time sort should spend reading input before it starts sorting it and
     * writing it out.
     *
     * Using the default is recommended.
     */
    public Builder setSortReadTimeMillis(int sortReadTimeMillis) {
      Preconditions.checkArgument(sortReadTimeMillis >= 0);
      this.sortReadTimeMillis = sortReadTimeMillis;
      return this;
    }
   
    /**
     * Size (in bytes) of items to batch together in the output of the sort. (A higher value saves
     * storage cost, but needs to be small enough to not impact memory use.)
     *
     * Using the default is recommended.
     */
    public Builder setSortBatchPerEmitBytes(int sortBatchPerEmitBytes) {
      Preconditions.checkArgument(sortBatchPerEmitBytes >= 0);
      this.sortBatchPerEmitBytes = sortBatchPerEmitBytes;
      return this;
    }

    /**
     * Number of files the merge stage will read at the same time. A higher number can increase the
     * speed of the job at the expense of requiring more memory in the merge stage.
     *
     * Using the default is recommended.
     */
    public Builder setMergeFanin(int mergeFanin) {
      this.mergeFanin = mergeFanin;
      return this;
    }

    public MapReduceSettings build() {
      return new MapReduceSettings(this);
    }
  }

  private MapReduceSettings(Builder builder) {
    super(builder);
    bucketName = verifyAndSetBucketName(builder.bucketName);
    mapFanout = builder.mapFanout;
    maxSortMemory = builder.maxSortMemory;
    sortReadTimeMillis = builder.sortReadTimeMillis;
    sortBatchPerEmitBytes = builder.sortBatchPerEmitBytes;
    mergeFanin = builder.mergeFanin;
  }

  String getBucketName() {
    return bucketName;
  }

  Long getMaxSortMemory() {
    return maxSortMemory;
  }

  int getMapFanout() {
    return mapFanout;
  }

  int getSortReadTimeMillis() {
    return sortReadTimeMillis;
  }

  int getSortBatchPerEmitBytes() {
    return sortBatchPerEmitBytes;
  }

  int getMergeFanin() {
    return mergeFanin;
  }

  @Override
  public String toString() {
    return "MapReduceSettings [bucketName=" + bucketName + ", mapFanout=" + mapFanout
        + ", maxSortMemory=" + maxSortMemory + ", sortReadTimeMillis=" + sortReadTimeMillis
        + ", sortBatchPerEmitBytes=" + sortBatchPerEmitBytes + ", mergeFanin=" + mergeFanin + "]";
  }

  private static String verifyAndSetBucketName(String bucket) {
    if (Strings.isNullOrEmpty(bucket)) {
      try {
        bucket = AppIdentityServiceFactory.getAppIdentityService().getDefaultGcsBucketName();
        if (Strings.isNullOrEmpty(bucket)) {
          String message = "The BucketName property was not set in the MapReduceSettings object, "
              + "and this application does not have a default bucket configured to fall back on.";
          log.log(Level.SEVERE, message);
          throw new IllegalArgumentException(message);
        }
      } catch (AppIdentityServiceFailureException e) {
        throw new RuntimeException(
            "The BucketName property was not set in the MapReduceSettings object, "
            + "and could not get the default bucket.", e);
      }
    }
    try {
      verifyBucketIsWritable(bucket);
    } catch (Exception e) {
      throw new RuntimeException("Writeable Bucket '" + bucket + "' test failed. See "
          + "http://developers.google.com/appengine/docs/java/googlecloudstorageclient/activate"
          + " for more information on how to setup Google Cloude storage.", e);
    }
    return bucket;
  }

  private static void verifyBucketIsWritable(String bucket) throws IOException {
    GcsService gcsService = GcsServiceFactory.createGcsService(new RetryParams.Builder()
        .retryMinAttempts(2)
        .retryMaxAttempts(3)
        .totalRetryPeriodMillis(20000)
        .requestTimeoutMillis(10000)
        .build());
    GcsFilename filename = new GcsFilename(bucket, UUID.randomUUID() + ".tmp");
    if (gcsService.getMetadata(filename) != null) {
      log.warning("File '" + filename.getObjectName() + "' exists. Skipping bucket write test.");
      return;
    }
    try {
      gcsService.createOrReplace(filename, GcsFileOptions.getDefaultInstance(),
          ByteBuffer.wrap("Delete me!".getBytes(StandardCharsets.UTF_8)));
    } finally {
      gcsService.delete(filename);
    }
  }
}
TOP

Related Classes of com.google.appengine.tools.mapreduce.MapReduceSettings$Builder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.