Package com.ikanow.infinit.e.harvest.extraction.document.file

Source Code of com.ikanow.infinit.e.harvest.extraction.document.file.AwsInfiniteFile

/*******************************************************************************
* Copyright 2012, The Infinit.e Open Source Project.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
package com.ikanow.infinit.e.harvest.extraction.document.file;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.util.Date;

import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.AmazonS3Exception;
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectSummary;

import jcifs.smb.NtlmPasswordAuthentication;

public class AwsInfiniteFile extends InfiniteFile {

  // Constructors
 
  public AwsInfiniteFile(String url, NtlmPasswordAuthentication auth) throws IOException {
    BasicAWSCredentials awsAuth = new BasicAWSCredentials(auth.getUsername(), auth.getPassword());
    AmazonS3Client client = new AmazonS3Client(awsAuth);
    _awsClient = (Object)client;
   
    getBucketAndObjectName(url, false);
  }//TESTED
 
  private void getBucketAndObjectName(String url, boolean newFile) {
    // 3 cases .. it can be a bucket (s3://X.Y.com[/]?)
    // or a "directory" (these are handled slightly oddly in S3) .. eg s3://X.Y.com(/blah)+/
    // or an object s3://X.Y.com(/blah)+
       
    // In all cases let's get the bucket name first...
    url = url.substring(5); // ie step over s3://
    int index = url.indexOf('/');
    if (-1 != index) { // Else it's the entire bucket
      _awsBucketName = url.substring(0, index);
    }
    else {
      _awsBucketName = url;
    }//TESTED
   
    //two cases ... might be a bucket, or might be an object
    if (!url.endsWith("/")) { // Going to assume this is a file
     
      _awsObjectName = url.substring(_awsBucketName.length() + 1); // (+1 to step over the /)
      if (!newFile) {
        _awsFileMeta_lastDate = ((AmazonS3Client)_awsClient).getObjectMetadata(_awsBucketName, _awsObjectName).getLastModified();
          // (will fire off an exception if the file doesn't exist)
      }
    }//TESTED
    else if (-1 != index) { // This is a directory
      _awsObjectName = url.substring(_awsBucketName.length() + 1); // (+1 to step over the /)
    }//TESTED
    // (else already done, leave _awsObjectName as null)   
  }//TESTED
 
  public AwsInfiniteFile(String bucketName, String objectName, Date lastModified, Object client) {
    _awsBucketName = bucketName;
    _awsObjectName = objectName;
    _awsFileMeta_lastDate = lastModified;
    _awsClient = client;
  }//TESTED
 
  //////////////////////////////////////////////////////////////////
 
  // Accessors:
 
  @Override
  public InputStream getInputStream() throws IOException {
    S3Object s3Obj = ((AmazonS3Client)_awsClient).getObject(_awsBucketName, _awsObjectName);
    return s3Obj.getObjectContent();
  }
 
  @Override
  public InfiniteFile[] listFiles()  {
    return listFiles(null);
  }
  @Override
  public InfiniteFile[] listFiles(Date optionalFilterDate)  {
    InfiniteFile[] fileList = null;
    ObjectListing list = null;
    _overwriteTime = 0L;
    ListObjectsRequest listRequest = new ListObjectsRequest().withBucketName(_awsBucketName);
    if (null != _awsObjectName) {
      listRequest.withPrefix(_awsObjectName);
    }
    listRequest.withDelimiter("/");
    list = ((AmazonS3Client)_awsClient).listObjects(listRequest);
    fileList = new InfiniteFile[list.getObjectSummaries().size() + list.getCommonPrefixes().size()];
    //TESTED (3.2)
    int nAdded = 0;
    // Get the sub-directories
    for (String subDir: list.getCommonPrefixes()) {
      // Create directories:
      fileList[nAdded] = new AwsInfiniteFile(_awsBucketName, subDir, null, _awsClient);
      nAdded++;
    }//TESTED (3b.3)
    // Get the files:
    for (S3ObjectSummary s3Obj: list.getObjectSummaries()) {
      if (!s3Obj.getKey().endsWith("/")) {
        fileList[nAdded] = new AwsInfiniteFile(s3Obj.getBucketName(), s3Obj.getKey(), s3Obj.getLastModified(), _awsClient);
        long fileTime = fileList[nAdded].getDate();
        if (fileTime > _overwriteTime) {
          _overwriteTime = fileTime;
        }//TESTED (3.2)
        nAdded++;
      }
    }
    return fileList;
  }//TESTED (with and without prefixes)
 
  @Override
  public void delete() throws IOException {
    ((AmazonS3Client)_awsClient).deleteObject(_awsBucketName, _awsObjectName);
  }//TESTED (3.4 and 3b.4)
 
  @Override
  public void rename(String newPathName) throws IOException {
    try {
      String oldBucket = _awsBucketName;
      String oldName = _awsObjectName;
      getBucketAndObjectName(newPathName, true); // (renames self)
      _awsObjectName = new URI("").resolve(_awsObjectName).getPath(); // (resolve relative paths)
     
      // Check parent directory exists:
      int index = _awsObjectName.lastIndexOf('/');
      if (index > 0) {
        String oldParentDir = _awsObjectName.substring(0, 1+index); // (don't include the "/" so will try to create)
       
        GetObjectMetadataRequest objMetaRequest = new GetObjectMetadataRequest(_awsBucketName, oldParentDir);
        ((AmazonS3Client)_awsClient).getObjectMetadata(objMetaRequest);
      }   
      // Create actual file:
     
      ((AmazonS3Client)_awsClient).copyObject(oldBucket, oldName, _awsBucketName, _awsObjectName);
     
      _awsBucketName = oldBucket;
      _awsObjectName = oldName; // (copy back again before deleting)
      delete(); // (original, only gets this far if the copyObject succeeds, else it exceptions out)
    }
    catch (AmazonS3Exception e) {
      throw new IOException(e.getMessage());
    } catch (URISyntaxException e) {
      throw new IOException(e.getMessage());
    }
  }//TESTED (3.4, 3.5)
 
  @Override
  public boolean isDirectory() throws IOException {
    return (null == _awsFileMeta_lastDate);
 
 
  @Override
  public String getUrlString() throws MalformedURLException, URISyntaxException
  {
    return new StringBuffer("s3://").append(_awsBucketName).append('/').append(_awsObjectName).toString();
  }//TESTED
  @Override
  public String getUrlPath() throws MalformedURLException, URISyntaxException, UnsupportedEncodingException
  {
    return URLDecoder.decode(getURI().getPath(), "UTF-8");
  }//TESTED
  @Override
  public URI getURI() throws MalformedURLException, URISyntaxException {
    URI uri = new URI("s3", _awsBucketName, "/" + _awsObjectName, null);
    return uri;
  }//TESTED
 
  @Override
  public String getName() {
    return _awsObjectName.replaceAll(".*/", ""); // remove the leading path
  }//TESTED

  @Override
  public long getDate() {
    if (null != _overwriteTime) {
      return _overwriteTime;
    }//TESTED (3.2)
    else if (null == _awsFileMeta_lastDate) {
      return 0;
    }//TESTED (3.1)
    else {
      return (_overwriteTime = _awsFileMeta_lastDate.getTime());
    }//TEST (3.3)
  }//TESTED
 
  //////////////////////////////////////////////////////////////////
 
  // STATE
 
  // AWS stuff, which is more complicated
  private Object _awsClient;
  private Date _awsFileMeta_lastDate;
  private String _awsBucketName; // use these so we don't have to download the entire thing to look at metadata
  private String _awsObjectName; //
}
TOP

Related Classes of com.ikanow.infinit.e.harvest.extraction.document.file.AwsInfiniteFile

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.