Package org.archive.wayback.resourcestore

Source Code of org.archive.wayback.resourcestore.SimpleResourceStore

/*
*  This file is part of the Wayback archival access software
*   (http://archive-access.sourceforge.net/projects/wayback/).
*
*  Licensed to the Internet Archive (IA) by one or more individual
*  contributors.
*
*  The IA licenses this file to You under the Apache License, Version 2.0
*  (the "License"); you may not use this file except in compliance with
*  the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/
package org.archive.wayback.resourcestore;

import java.io.IOException;
import java.util.logging.Logger;

import org.archive.wayback.ResourceStore;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.core.Resource;
import org.archive.wayback.exception.ResourceNotAvailableException;
import org.archive.wayback.resourcestore.resourcefile.ArcWarcFilenameFilter;
import org.archive.wayback.resourcestore.resourcefile.ResourceFactory;


/**
* Implements ResourceStore where ARC/WARCs are accessed via a local file or an
* HTTP 1.1 range request. All files are assumed to be "rooted" at a particular
* HTTP URL, or within a single local directory. The HTTP version may imply a
* file reverse-proxy to connect through to actual HTTP ARC/WARC locations.
*
* @author brad
* @version $Date$, $Revision$
*/
public class SimpleResourceStore implements ResourceStore {

  private final static Logger LOGGER = Logger.getLogger(
      SimpleResourceStore.class.getName());
  private static String HTTP_ERROR = "HTTP";
  private static String HTTP_502 = "502";
  private String prefix = null;
  private String regex;
  private String replace;
  private String includeFilter;
 
  private int retries = 2;

  public Resource retrieveResource(CaptureSearchResult result)
    throws ResourceNotAvailableException {

    // extract ARC filename
    String fileName = result.getFile();
    if(fileName == null || fileName.length() < 1) {
      throw new ResourceNotAvailableException("No ARC/WARC name in search result...", fileName);
    }
   
        // If includeFilter is provided, filter out paths that don't contain the include filter
        if (includeFilter != null) {
          if (!fileName.contains(includeFilter)) {
            throw new ResourceNotAvailableException("Resource " + fileName + " not found in this store", fileName);
          }
        }   

    final long offset = result.getOffset();
    if(!fileName.endsWith(ArcWarcFilenameFilter.ARC_SUFFIX)
        && !fileName.endsWith(ArcWarcFilenameFilter.ARC_GZ_SUFFIX)
        && !fileName.endsWith(ArcWarcFilenameFilter.WARC_SUFFIX)
        && !fileName.endsWith(ArcWarcFilenameFilter.WARC_GZ_SUFFIX)) {
      fileName = fileName + ArcWarcFilenameFilter.ARC_GZ_SUFFIX;
    }
       
        String fileUrl;
        if ( regex != null && replace != null )
          {
            fileUrl = fileName.replaceAll( regex, replace );
          }
        else
          {
            fileUrl = prefix + fileName;
          }
       
    Resource r = null;
    try {
      int attempts = retries;
          while(attempts-- > 0) {
            try {
              r = ResourceFactory.getResource(fileUrl, offset);
              break;
            } catch (IOException e) {
              String message = e.getMessage();
              if(attempts > 0
                  && message.contains(HTTP_ERROR)
                  && message.contains(HTTP_502)) {
               
                LOGGER.info(String.format(
                    "Failed attempt for (%s) retrying with" +
                    " (%d) attempts left",fileUrl,attempts));
              } else {
                throw e;
              }
            }
          }

    } catch (IOException e) {
      String msg = fileUrl + " - " + e;
      LOGGER.info("Unable to retrieve:" + msg);
     
      throw new ResourceNotAvailableException(msg, fileUrl, e);
    }
    return r;
  }

  /**
   * @return the prefix
   */
  public String getPrefix() {
    return prefix;
  }

  /**
   * @param prefix the prefix to set
   */
  public void setPrefix(String prefix) {
    this.prefix = prefix;
  }

  public String getRegex() {
    return regex;
  }

  public void setRegex(String regex) {
    this.regex = regex;
        }

  public String getReplace() {
    return replace;
  }

  public void setReplace(String replace) {
    this.replace = replace;
        }

  public void shutdown() throws IOException {
    // no-op
  }

  /**
   * @return the number of attempts to fetch resources with an HTTP 502
   * failure.
   */
  public int getRetries() {
    return retries;
  }

  /**
   * @param retries the number of attempts to fetch resources with an HTTP 502
   * failure.
   */
  public void setRetries(int retries) {
    this.retries = retries;
  }

 
  public String getIncludeFilter() {
    return includeFilter;
  }

  public void setIncludeFilter(String includeFilter) {
    this.includeFilter = includeFilter;
  }
}
TOP

Related Classes of org.archive.wayback.resourcestore.SimpleResourceStore

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.