Package com.google.appengine.tools.mapreduce.inputs

Source Code of com.google.appengine.tools.mapreduce.inputs.GoogleCloudStorageLineInput

package com.google.appengine.tools.mapreduce.inputs;

import static com.google.common.base.Preconditions.checkNotNull;

import com.google.appengine.tools.cloudstorage.GcsFileMetadata;
import com.google.appengine.tools.cloudstorage.GcsFilename;
import com.google.appengine.tools.cloudstorage.GcsService;
import com.google.appengine.tools.cloudstorage.GcsServiceFactory;
import com.google.appengine.tools.mapreduce.Input;
import com.google.appengine.tools.mapreduce.InputReader;
import com.google.appengine.tools.mapreduce.impl.MapReduceConstants;
import com.google.common.base.Preconditions;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
* CloudStorageLineInput shards files in Cloud Storage on separator boundries.
*/
public class GoogleCloudStorageLineInput extends Input<byte[]> {

  private static final long MIN_SHARD_SIZE = 1024L;

  private static final long serialVersionUID = 5501931160319682453L;

  private final GcsFilename file;
  private final byte separator;
  private final int shardCount;
  private final int bufferSize;

  public GoogleCloudStorageLineInput(GcsFilename file, byte separator, int shardCount) {
    this(file, separator, shardCount, 0);
  }

  public GoogleCloudStorageLineInput(
      GcsFilename file, byte separator, int shardCount, int bufferSize) {
    this.file = checkNotNull(file, "Null file");
    this.separator = separator;
    this.shardCount = shardCount;
    this.bufferSize = bufferSize;
  }

  @Override
  public List<? extends InputReader<byte[]>> createReaders() {
    GcsService gcsService =
        GcsServiceFactory.createGcsService(MapReduceConstants.GCS_RETRY_PARAMETERS);
    GcsFileMetadata metadata;
    try {
      metadata = gcsService.getMetadata(file);
      if (metadata == null) {
        throw new RuntimeException("File does not exist: " + file);
      }
    } catch (IOException e) {
      throw new RuntimeException("Unable to read file metadata: " + file, e);
    }
    long blobSize = metadata.getLength();
    return split(file, blobSize, shardCount);
  }


  private List<? extends InputReader<byte[]>> split(GcsFilename file,
      long blobSize, int shardCount) {
    Preconditions.checkNotNull(file);
    Preconditions.checkArgument(shardCount > 0);
    Preconditions.checkArgument(blobSize >= 0);

    // Sanity check
    if (shardCount * MIN_SHARD_SIZE > blobSize) {
      shardCount = (int) (blobSize / MIN_SHARD_SIZE) + 1;
    }

    List<GoogleCloudStorageLineInputReader> result = new ArrayList<>();
    long startOffset = 0L;
    for (int i = 1; i < shardCount; i++) {
      long endOffset = (i * blobSize) / shardCount;
      result.add(new GoogleCloudStorageLineInputReader(file, startOffset, endOffset, separator,
          bufferSize));
      startOffset = endOffset;
    }
    result.add(new GoogleCloudStorageLineInputReader(file, startOffset, blobSize, separator));
    return result;
  }
}
TOP

Related Classes of com.google.appengine.tools.mapreduce.inputs.GoogleCloudStorageLineInput

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.