Package org.archive.wayback.resourceindex.cdx

Source Code of org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter

/* CDXLineToSearchResultAdaptor
*
* $Id: CDXLineToSearchResultAdapter.java 2173 2008-02-04 22:58:24Z bradtofel $
*
* Created on 2:27:16 PM Aug 17, 2006.
*
* Copyright (C) 2006 Internet Archive.
*
* This file is part of Wayback.
*
* Wayback is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* any later version.
*
* Wayback is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU Lesser Public License for more details.
*
* You should have received a copy of the GNU Lesser Public License
* along with Wayback; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
package org.archive.wayback.resourceindex.cdx;


import org.apache.commons.httpclient.URIException;
import org.archive.net.UURI;
import org.archive.net.UURIFactory;
import org.archive.wayback.WaybackConstants;
import org.archive.wayback.core.SearchResult;
import org.archive.wayback.util.Adapter;

/**
* Adapter that converts a CDX record String into a SearchResult
*
* @author brad
* @version $Date: 2008-02-04 22:58:24 +0000 (Mon, 04 Feb 2008) $, $Revision: 2173 $
*/
public class CDXLineToSearchResultAdapter implements Adapter<String,SearchResult> {

  public SearchResult adapt(String line) {
    return doAdapt(line);
  }
  /**
   * @param line
   * @return SearchResult representation of input line
   */
  public static SearchResult doAdapt(String line) {
    SearchResult result = new SearchResult();
    String[] tokens = line.split(" ");
    if (tokens.length != 9) {
      return null;
      //throw new IllegalArgumentException("Need 9 columns("+line+")");
    }
    String url = tokens[0];
    String captureDate = tokens[1];
    String origHost = tokens[2];
    String mimeType = tokens[3];
    String httpResponseCode = tokens[4];
    String md5Fragment = tokens[5];
    String redirectUrl = tokens[6];
    long compressedOffset = -1;
    if(!tokens[7].equals("-")) {
      compressedOffset = Long.parseLong(tokens[7]);
    }
    String arcFileName = tokens[8];

    String origUrl = url;
    if(!url.startsWith(WaybackConstants.DNS_URL_PREFIX)) {
      try {
        UURI uri = UURIFactory.getInstance(
            WaybackConstants.HTTP_URL_PREFIX + url);
        if(uri.getPort() != -1) {
          origHost += ":" + uri.getPort();
        }
        origUrl = origHost + uri.getEscapedPathQuery();
      } catch (URIException e) {
        // TODO Stifle? throw an error?
        e.printStackTrace();
        return null;
      }
    }
   
    result.put(WaybackConstants.RESULT_URL, origUrl);
    result.put(WaybackConstants.RESULT_URL_KEY, url);
    result.put(WaybackConstants.RESULT_CAPTURE_DATE, captureDate);
    result.put(WaybackConstants.RESULT_ORIG_HOST, origHost);
    result.put(WaybackConstants.RESULT_MIME_TYPE, mimeType);
    result.put(WaybackConstants.RESULT_HTTP_CODE, httpResponseCode);
    result.put(WaybackConstants.RESULT_MD5_DIGEST, md5Fragment);
    result.put(WaybackConstants.RESULT_REDIRECT_URL, redirectUrl);
    // HACKHACK:
    result.put(WaybackConstants.RESULT_OFFSET, String.valueOf(compressedOffset));
    result.put(WaybackConstants.RESULT_ARC_FILE, arcFileName);

    return result;
  }
}
TOP

Related Classes of org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.