Source Code of com.amazonaws.services.glacier.transfer.ArchiveTransferManager

/*
 * Copyright 2012 Amazon Technologies, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *    http://aws.amazon.com/apache2.0
 *
 * This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
 * OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and
 * limitations under the License.
 */
package com.amazonaws.services.glacier.transfer;


import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.security.NoSuchAlgorithmException;
import java.util.LinkedList;
import java.util.List;


import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.internal.StaticCredentialsProvider;
import com.amazonaws.services.glacier.AmazonGlacier;
import com.amazonaws.services.glacier.AmazonGlacierClient;
import com.amazonaws.services.glacier.TreeHashGenerator;
import com.amazonaws.services.glacier.internal.TreeHashInputStream;
import com.amazonaws.services.glacier.model.AbortMultipartUploadRequest;
import com.amazonaws.services.glacier.model.CompleteMultipartUploadRequest;
import com.amazonaws.services.glacier.model.CompleteMultipartUploadResult;
import com.amazonaws.services.glacier.model.GetJobOutputRequest;
import com.amazonaws.services.glacier.model.GetJobOutputResult;
import com.amazonaws.services.glacier.model.InitiateJobRequest;
import com.amazonaws.services.glacier.model.InitiateJobResult;
import com.amazonaws.services.glacier.model.InitiateMultipartUploadRequest;
import com.amazonaws.services.glacier.model.InitiateMultipartUploadResult;
import com.amazonaws.services.glacier.model.JobParameters;
import com.amazonaws.services.glacier.model.UploadArchiveRequest;
import com.amazonaws.services.glacier.model.UploadArchiveResult;
import com.amazonaws.services.glacier.model.UploadMultipartPartRequest;
import com.amazonaws.services.s3.internal.InputSubstream;
import com.amazonaws.services.s3.internal.RepeatableFileInputStream;
import com.amazonaws.services.sns.AmazonSNSClient;
import com.amazonaws.services.sqs.AmazonSQSClient;
import com.amazonaws.util.BinaryUtils;


/**
 * Utilities for uploading and downloading data to and from AWS Glacier.
 */
public class ArchiveTransferManager {


    /** The maximum part size, in bytes, for a Glacier multipart upload. */
    private static final long MAXIMUM_PART_SIZE = 1024L * 1024 * 1024 * 4;


    /** The minimum part size, in bytes, for a Glacier multipart upload. */
    private static final long MINIMUM_PART_SIZE = 1024L * 1024;


    /** Threshold, in bytes, for when to use the multipart upload operations */
    private static final long MULTIPART_UPLOAD_SIZE_THRESHOLD = 1024L * 1024L * 100;


    /** Glacier client used for making all requests. */
    private final AmazonGlacier glacier;


  private final AWSCredentialsProvider credentialsProvider;


  private final ClientConfiguration clientConfiguration;


  private final AmazonSQSClient sqs;


  private final AmazonSNSClient sns;


  /**
   * Constructs a new ArchiveTransferManager, using the specified AWS
   * credentials to authenticate requests.
   *
   * @param credentials
   *            The AWS credentials used to authenticate requests.
   */
    public ArchiveTransferManager(AWSCredentials credentials) {
      this(new StaticCredentialsProvider(credentials), new ClientConfiguration());
    }


    /**
     * Constructs a new ArchiveTransferManager, using the specified AWS credentials provider
     * and client configuration.
     *
   * @param credentialsProvider
   *            The AWS credentials provider used to authenticate requests.
   * @param clientConfiguration
   *            Client specific options, such as proxy settings, retries, and timeouts.
     */
    public ArchiveTransferManager(AWSCredentialsProvider credentialsProvider, ClientConfiguration clientConfiguration) {
      this(new AmazonGlacierClient(credentialsProvider, clientConfiguration), credentialsProvider, clientConfiguration);
    }


  /**
   * Constructs a new ArchiveTransferManager, using the specified Amazon
   * Glacier client and AWS credentials provider.
   *
   * @param glacier
   *            The client for working with Amazon Glacier.
   * @param credentialsProvider
   *            The AWS credentials provider used to authenticate requests.
   */
    public ArchiveTransferManager(AmazonGlacierClient glacier, AWSCredentialsProvider credentialsProvider) {
      this(glacier, credentialsProvider, new ClientConfiguration());
    }


  /**
   * Constructs a new ArchiveTransferManager, using the specified Amazon
   * Glacier client and AWS credentials.
   *
   * @param glacier
   *            The client for working with Amazon Glacier.
   * @param credentials
   *            The AWS credentials used to authenticate requests.
   */
    public ArchiveTransferManager(AmazonGlacierClient glacier, AWSCredentials credentials) {
      this(glacier, new StaticCredentialsProvider(credentials), new ClientConfiguration());
    }


  /**
   * Constructs a new ArchiveTransferManager, using the specified Amazon
   * Glacier client, AWS credentials provider and client configuration.
   *
   * @param glacier
   *            The client for working with Amazon Glacier.
   * @param credentialsProvider
   *            The AWS credentials provider used to authenticate requests.
   * @param clientConfiguration
   *            Client specific options, such as proxy settings, retries, and
   *            timeouts.
   */
    public ArchiveTransferManager(AmazonGlacierClient glacier, AWSCredentialsProvider credentialsProvider, ClientConfiguration clientConfiguration) {
      this.credentialsProvider = credentialsProvider;
      this.clientConfiguration = clientConfiguration;
      this.glacier = glacier;
      this.sns = null;
      this.sqs = null;
    }


  /**
   * Constructs a new ArchiveTransferManager, using the specified Amazon
   * Glacier client, and the specified Amazon SQS and Amazon SNS clients for
   * polling download job status.
   * <p>
   * This constructor form can be used to work with ArchiveTransferManager in
   * any AWS region where Amazon Glacier is supported. Just make sure to set
   * the correct endpoint on each individual client object so that they all
   * operate in the same region.
   *
   * @param glacier
   *            The client for working with Amazon Glacier.
   * @param sqs
   *            The client for working with Amazon SQS when polling archive
   *            retrieval job status.
   * @param sns
   *            The client for working with Amazon SNS when polling archive
   *            retrieval job status.
   */
    public ArchiveTransferManager(AmazonGlacierClient glacier, AmazonSQSClient sqs, AmazonSNSClient sns) {
      this.credentialsProvider = null;
      this.clientConfiguration = null;
      this.glacier = glacier;
    this.sqs = sqs;
    this.sns = sns;
    }


  /**
   * Uploads the specified file to Amazon Glacier for archival storage in the
   * specified vault for the user's current account. For small archives, this
   * method will upload the archive directly to Glacier. For larger archives,
   * this method will use Glacier's multipart upload API to split the upload
   * into multiple parts for better error recovery if any errors are
   * encountered while streaming the data to Amazon Glacier.
   *
   * @param vaultName
   *            The name of the vault to upload to.
   * @param archiveDescription
   *            The description of the new archive being uploaded.
   * @param file
   *            The file to upload to Amazon Glacier.
   *
   * @return The result of the upload, including the archive ID needed to
   *         access the upload later.
   *
   * @throws AmazonServiceException
   *             If any problems were encountered while communicating with
   *             AWS.
   * @throws AmazonClientException
   *             If any problems were encountered inside the AWS SDK for Java
   *             client code in making requests or processing responses from
   *             AWS.
   * @throws FileNotFoundException
   *             If the specified file to upload doesn't exist.
   */
    public UploadResult upload(final String vaultName, final String archiveDescription, final File file)
            throws AmazonServiceException, AmazonClientException, FileNotFoundException {
      return upload(null, vaultName, archiveDescription, file);
    }


  /**
   * Uploads the specified file to Amazon Glacier for archival storage in the
   * specified vault in the specified user's account. For small archives, this
   * method will upload the archive directly to Glacier. For larger archives,
   * this method will use Glacier's multipart upload API to split the upload
   * into multiple parts for better error recovery if any errors are
   * encountered while streaming the data to Amazon Glacier.
   *
   * @param accountId
   *            The ID for the account which owns the Glacier vault being
   *            uploaded to. To use the same account the developer is using to
   *            make requests to AWS, the value <code>"-"</code> can be used
   *            instead of the full account ID.
   * @param vaultName
   *            The name of the vault to upload to.
   * @param archiveDescription
   *            The description of the new archive being uploaded.
   * @param file
   *            The file to upload to Amazon Glacier.
   *
   * @return The result of the upload, including the archive ID needed to
   *         access the upload later.
   *
   * @throws AmazonServiceException
   *             If any problems were encountered while communicating with
   *             AWS.
   * @throws AmazonClientException
   *             If any problems were encountered inside the AWS SDK for Java
   *             client code in making requests or processing responses from
   *             AWS.
   * @throws FileNotFoundException
   *             If the specified file to upload doesn't exist.
   */
    public UploadResult upload(final String accountId, final String vaultName, final String archiveDescription, final File file)
            throws AmazonServiceException, AmazonClientException, FileNotFoundException {
        if (file.length() > MULTIPART_UPLOAD_SIZE_THRESHOLD) {
            return uploadInMultipleParts(accountId, vaultName, archiveDescription, file);
        } else {
            return uploadInSinglePart(accountId, vaultName, archiveDescription, file);
        }
    }


  /**
   * Downloads an archive from Amazon Glacier in the specified vault for the
   * current user's account, and saves it to the specified file. Amazon
   * Glacier is optimized for long term storage of data that isn't needed
   * quickly. This method will first make a request to Amazon Glacier to
   * prepare the archive to be downloaded. Once Glacier has finished preparing
   * the archive to be downloaded, this method will start downloading the data
   * and storing it in the specified file.
   *
   * @param vaultName
   *            The name of the vault to download the archive from.
   * @param archiveId
   *            The unique ID of the archive to download.
   * @param file
   *            The file save the archive to.
   *
   * @throws AmazonServiceException
   *             If any problems were encountered while communicating with
   *             AWS.
   * @throws AmazonClientException
   *             If any problems were encountered inside the AWS SDK for Java
   *             client code in making requests or processing responses from
   *             AWS.
   */
    public void download(final String vaultName, final String archiveId, final File file)
            throws AmazonServiceException, AmazonClientException {
      download(null, vaultName, archiveId, file);
    }


  /**
   * Downloads an archive from Amazon Glacier in the specified vault in the
   * specified user's account, and saves it to the specified file. Amazon
   * Glacier is optimized for long term storage of data that isn't needed
   * quickly. This method will first make a request to Amazon Glacier to
   * prepare the archive to be downloaded. Once Glacier has finished preparing
   * the archive to be downloaded, this method will start downloading the data
   * and storing it in the specified file.
   *
   * @param accountId
   *            The ID for the account which owns the Glacier vault where the
   *            archive is being downloaded from. To use the same account the
   *            developer is using to make requests to AWS, the value
   *            <code>"-"</code> can be used instead of the full account ID.
   * @param vaultName
   *            The name of the vault to download the archive from.
   * @param archiveId
   *            The unique ID of the archive to download.
   * @param file
   *            The file save the archive to.
   *
   * @throws AmazonServiceException
   *             If any problems were encountered while communicating with
   *             AWS.
   * @throws AmazonClientException
   *             If any problems were encountered inside the AWS SDK for Java
   *             client code in making requests or processing responses from
   *             AWS.
   */
    public void download(final String accountId, final String vaultName, final String archiveId, final File file)
            throws AmazonServiceException, AmazonClientException {


      JobStatusMonitor jobStatusMonitor = null;
      GetJobOutputResult jobOutputResult = null;
      try {
        if (credentialsProvider != null && clientConfiguration != null) {
          jobStatusMonitor = new JobStatusMonitor(credentialsProvider, clientConfiguration);
        } else {
          jobStatusMonitor = new JobStatusMonitor(sqs, sns);
        }


        JobParameters jobParameters = new JobParameters()
          .withArchiveId(archiveId)
          .withType("archive-retrieval")
          .withSNSTopic(jobStatusMonitor.getTopicArn());
        InitiateJobResult archiveRetrievalResult =
          glacier.initiateJob(new InitiateJobRequest()
            .withAccountId(accountId)
            .withVaultName(vaultName)
            .withJobParameters(jobParameters));
        String jobId = archiveRetrievalResult.getJobId();


        jobStatusMonitor.waitForJobToComplete(jobId);
        jobOutputResult = glacier.getJobOutput(new GetJobOutputRequest()
          .withAccountId(accountId)
          .withVaultName(vaultName)
          .withJobId(jobId));
      } finally {
        jobStatusMonitor.shutdown();
      }


      downloadJobOutput(jobOutputResult, file);
    }


    private void downloadJobOutput(GetJobOutputResult jobOutputResult, File file) {
        TreeHashInputStream input;
        OutputStream output = null;
        byte[] buffer = new byte[1024 * 1024];
    try {
      input = new TreeHashInputStream(new BufferedInputStream(jobOutputResult.getBody()));
    } catch (NoSuchAlgorithmException e) {
      throw new AmazonClientException("Unable to compute hash for data integrity", e);
    }


        try {
            output = new BufferedOutputStream(new FileOutputStream(file));


            int bytesRead = 0;
            do {
              bytesRead = input.read(buffer);
              if (bytesRead <= 0) break;
              output.write(buffer, 0, bytesRead);
            } while (bytesRead > 0);
        } catch (IOException e) {
            throw new AmazonClientException("Unable to save archive to disk", e);
        } finally {
            try {input.close();}  catch (Exception e) {}
            try {output.close();} catch (Exception e) {}


            try {
        String clientSideTreeHash = input.getTreeHash();
        String serverSideTreeHash = jobOutputResult.getChecksum();
        if (!clientSideTreeHash.equalsIgnoreCase(serverSideTreeHash)) {
          throw new AmazonClientException("Client side computed hash doesn't match server side hash; possible data corruption");
        }
      } catch (IOException e) {
        throw new AmazonClientException("Error while trying to confirm data integrity for archive download", e);
      }
        }
    }


    /**
     * Calculates the part size to use when uploading an archive of the
     * specified size using Glacier's multipart upload APIs. Because of the tree
     * hashing algorithm, part sizes must be aligned on 2^n MB boundaries (ex:
     * 1MB, 2MB, 4MB, 8MB, etc). All parts must be the same size, except for the
     * last part.
     *
     * @param fileSize
     *            The size of the file being uploaded.
     *
     * @return The part size to use in the multipart upload.
     */
    private long calculatePartSize(long fileSize) {
        long partSize = MINIMUM_PART_SIZE;
        int approxNumParts = 1;
        while (partSize * approxNumParts < fileSize && partSize*2 <= MAXIMUM_PART_SIZE) {
            partSize *= 2;
            approxNumParts *= 2;
        }
        return partSize;
    }


    private InputSubstream newInputSubstream(File file, long startingPosition, long length) {
        try {
            return new InputSubstream(new RepeatableFileInputStream(file), startingPosition, length, true);
        } catch (FileNotFoundException e) {
            throw new AmazonClientException("Unable to find file '" + file.getAbsolutePath() + "'", e);
        }
    }


    private UploadResult uploadInMultipleParts(final String accountId, final String vaultName, final String archiveDescription, final File file) {
        long partSize = calculatePartSize(file.length());
        String partSizeString = Long.toString(partSize);
        InitiateMultipartUploadResult initiateResult = glacier.initiateMultipartUpload(new InitiateMultipartUploadRequest()
            .withAccountId(accountId)
            .withArchiveDescription(archiveDescription)
            .withVaultName(vaultName)
            .withPartSize(partSizeString));
        String uploadId = initiateResult.getUploadId();


        try {
          List<byte[]> binaryChecksums = new LinkedList<byte[]>();


          long currentPosition = 0;
          while (currentPosition < file.length()) {
              long length = partSize;
              if (currentPosition + partSize > file.length()) {
                  length = file.length() - currentPosition;
              }


              InputStream inputSubStream = newInputSubstream(file, currentPosition, length);
            String checksum = TreeHashGenerator.calculateTreeHash(inputSubStream);
            byte[] binaryChecksum = BinaryUtils.fromHex(checksum);
            binaryChecksums.add(binaryChecksum);


              try {
                  glacier.uploadMultipartPart(new UploadMultipartPartRequest()
                      .withAccountId(accountId)
                      .withChecksum(checksum)
                      .withBody(inputSubStream)
                      .withRange("bytes " + currentPosition + "-" + (currentPosition + length - 1) + "/*")
                      .withUploadId(uploadId)
                      .withVaultName(vaultName));
              } finally {
                  try {inputSubStream.close();} catch (Exception e) {}
              }


              currentPosition += partSize;
          }


          String checksum = TreeHashGenerator.calculateTreeHash(binaryChecksums);


          String archiveSize = Long.toString(file.length());
          CompleteMultipartUploadResult completeMultipartUploadResult =
              glacier.completeMultipartUpload(new CompleteMultipartUploadRequest()
                  .withAccountId(accountId)
                  .withArchiveSize(archiveSize)
                  .withVaultName(vaultName)
                  .withChecksum(checksum)
                  .withUploadId(uploadId));


          String artifactId = completeMultipartUploadResult.getArchiveId();
          return new UploadResult(artifactId);
        } catch (AmazonClientException e) {
          glacier.abortMultipartUpload(new AbortMultipartUploadRequest(accountId, vaultName, uploadId));
          throw e;
        }
    }




    private UploadResult uploadInSinglePart(final String accountId, final String vaultName, final String archiveDescription, final File file)
            throws AmazonServiceException, AmazonClientException, FileNotFoundException {
        String checksum = TreeHashGenerator.calculateTreeHash(file);


        RepeatableFileInputStream input = new RepeatableFileInputStream(file);
        try {
            UploadArchiveResult uploadArchiveResult =
                glacier.uploadArchive(new UploadArchiveRequest()
                    .withAccountId(accountId)
                    .withArchiveDescription(archiveDescription)
                    .withVaultName(vaultName)
                    .withChecksum(checksum)
                    .withBody(input)
                    .withContentLength(file.length())
                    );
            String artifactId = uploadArchiveResult.getArchiveId();
            return new UploadResult(artifactId);
        } finally {
            try {input.close();} catch (Exception e) {}
        }
    }


}
Source Code of com.amazonaws.services.glacier.transfer.ArchiveTransferManager

Related Classes of com.amazonaws.services.glacier.transfer.ArchiveTransferManager