Package org.archive.wayback.core

Examples of org.archive.wayback.core.SearchResult


    memory = new HashMap<String,SearchResult>();
  }

  private SearchResult annotate(SearchResult o) {
    String thisDigest = o.get(WaybackConstants.RESULT_MD5_DIGEST);
    SearchResult last = memory.get(thisDigest);
    if(last == null) {
      return null;
    }
    for(String field : FIELDS) {
      o.put(field, last.get(field));
    }
    o.put(WaybackConstants.RESULT_DUPLICATE_ANNOTATION,
        WaybackConstants.RESULT_DUPLICATE_DIGEST);
    o.put(WaybackConstants.RESULT_DUPLICATE_STORED_DATE,
        last.get(WaybackConstants.RESULT_CAPTURE_DATE));
    return o;
  }
View Full Code Here


    if(dedupeRecords) {
      itr = new AdaptedIterator<SearchResult, SearchResult>(itr,
          new DeduplicationSearchResultAnnotationAdapter());
    }
    while (itr.hasNext()) {
      SearchResult result = itr.next();
      int ruling = filter.filterObject(result);
      if (ruling == ObjectFilter.FILTER_ABORT) {
        break;
      } else if (ruling == ObjectFilter.FILTER_INCLUDE) {
        results.addSearchResult(result, forwards);
View Full Code Here

    String arcFile = result.get(WaybackConstants.RESULT_ARC_FILE);
    return arcFile.equals("-");
  }
 
  private SearchResult forgeFailedSearchResult(URL url) {
    SearchResult result = new SearchResult();

    result.put(WaybackConstants.RESULT_ARC_FILE, "-");
    result.put(WaybackConstants.RESULT_OFFSET, "0");

    result.put(WaybackConstants.RESULT_HTTP_CODE, "0");

    result.put(WaybackConstants.RESULT_MD5_DIGEST, "-");
    result.put(WaybackConstants.RESULT_MIME_TYPE, "-");
    result.put(WaybackConstants.RESULT_CAPTURE_DATE,
        Timestamp.currentTimestamp().getDateStr());

    result.put(WaybackConstants.RESULT_ORIG_HOST, url.getHost());
    result.put(WaybackConstants.RESULT_REDIRECT_URL, "-");
    result.put(WaybackConstants.RESULT_URL, url.toString());

    String indexUrl;
    try {
      indexUrl = canonicalizer.urlStringToKey(url.toString());
    } catch (URIException e) {
      // not gonna happen...
      e.printStackTrace();
      indexUrl = url.toString();
    }
    result.put(WaybackConstants.RESULT_URL_KEY, indexUrl);
   
    return result;
  }
View Full Code Here

      throw e;
    } catch (WaybackException e) {
      e.printStackTrace();
      throw new IOException(e.getMessage());
    }
    SearchResult result = results.getClosest(wbRequest);
    if(result != null) {
      if(isForgedFailedSearchResult(result)) {
        if(isForgedFailRecentEnough(result)) {
          LOGGER.info(url.toString() + " has failed recently");
          throw new LiveDocumentNotAvailableException("failed prev");
        } else {
          LOGGER.info(url.toString() + " failed a while ago");
          throw new ResourceNotInArchiveException("Nope");
        }
      }
      String name = (String) result.get(WaybackConstants.RESULT_ARC_FILE);
      long offset = Long.parseLong(
          (String) result.get(WaybackConstants.RESULT_OFFSET));
      resource = arcCacheDir.getResource(name, offset);
    }
    return resource;
  }
View Full Code Here

    }
   
    NodeList xresults = getSearchResults(document);
    for(int i = 0; i < xresults.getLength(); i++) {
      Node xresult = xresults.item(i);
      SearchResult result = searchElementToSearchResult(xresult);
     
      int ruling = ObjectFilter.FILTER_INCLUDE;
      if (filter != null) {
        ruling = filter.filterObject(result);
      }
View Full Code Here

    return results;
  }

  private SearchResult searchElementToSearchResult(Node e) {

    SearchResult result = new SearchResult();

    NodeList chitlens = e.getChildNodes();
    for(int i = 0; i < chitlens.getLength(); i++) {
      String key = chitlens.item(i).getNodeName();
      String value = chitlens.item(i).getTextContent();
      if(!key.equals("#text")) {
        result.put(key,value);
      }
    }
    return result;
  }
View Full Code Here

    ARCLocation location = null;
    try {
      location = cacher.cache(arcCacheDir, url.toString());
    } catch(LiveDocumentNotAvailableException e) {
      // record the failure, so we can fail early next time:
      SearchResult result = forgeFailedSearchResult(url);
      index.addSearchResult(result);
      LOGGER.info("Added FAIL-URL(" + url.toString() + ") to LiveIndex");
      throw e;
    }
    if(location != null) {
      String name = location.getName();
      long offset = location.getOffset();
      LOGGER.info("Cached URL(" + url.toString() + ") in " +
          "ARC(" + name + ") at (" + offset + ")");
      resource = arcCacheDir.getResource(name, offset);
      // add the result to the index:
      if(resource instanceof ArcResource) {
        ArcResource aResource = (ArcResource) resource;
        ARCRecord record = (ARCRecord) aResource.getArcRecord();
       
        SearchResult result = adapter.adapt(record);
        index.addSearchResult(result);
        LOGGER.info("Added URL(" + url.toString() + ") in " +
            "ARC(" + name + ") at (" + offset + ") to LiveIndex");
       
        // we just read thru the doc in order to index it. Reset:
View Full Code Here

  /**
   * @param line
   * @return SearchResult representation of input line
   */
  public static SearchResult doAdapt(String line) {
    SearchResult result = new SearchResult();
    String[] tokens = line.split(" ");
    if (tokens.length != 9) {
      return null;
      //throw new IllegalArgumentException("Need 9 columns("+line+")");
    }
    String url = tokens[0];
    String captureDate = tokens[1];
    String origHost = tokens[2];
    String mimeType = tokens[3];
    String httpResponseCode = tokens[4];
    String md5Fragment = tokens[5];
    String redirectUrl = tokens[6];
    long compressedOffset = -1;
    if(!tokens[7].equals("-")) {
      compressedOffset = Long.parseLong(tokens[7]);
    }
    String arcFileName = tokens[8];

    String origUrl = url;
    if(!url.startsWith(WaybackConstants.DNS_URL_PREFIX)) {
      try {
        UURI uri = UURIFactory.getInstance(
            WaybackConstants.HTTP_URL_PREFIX + url);
        if(uri.getPort() != -1) {
          origHost += ":" + uri.getPort();
        }
        origUrl = origHost + uri.getEscapedPathQuery();
      } catch (URIException e) {
        // TODO Stifle? throw an error?
        e.printStackTrace();
        return null;
      }
    }
   
    result.put(WaybackConstants.RESULT_URL, origUrl);
    result.put(WaybackConstants.RESULT_URL_KEY, url);
    result.put(WaybackConstants.RESULT_CAPTURE_DATE, captureDate);
    result.put(WaybackConstants.RESULT_ORIG_HOST, origHost);
    result.put(WaybackConstants.RESULT_MIME_TYPE, mimeType);
    result.put(WaybackConstants.RESULT_HTTP_CODE, httpResponseCode);
    result.put(WaybackConstants.RESULT_MD5_DIGEST, md5Fragment);
    result.put(WaybackConstants.RESULT_REDIRECT_URL, redirectUrl);
    // HACKHACK:
    result.put(WaybackConstants.RESULT_OFFSET, String.valueOf(compressedOffset));
    result.put(WaybackConstants.RESULT_ARC_FILE, arcFileName);

    return result;
  }
View Full Code Here

    public void map(WritableComparable key, Writable value,
        OutputCollector output, Reporter reporter) throws IOException {
      ObjectWritable ow = (ObjectWritable) value;
      ARCRecord rec = (ARCRecord) ow.get();
      String line;
      SearchResult result = ARtoSR.adapt(rec);
      if(result != null) {
        line = SRtoCDX.adapt(result);
        if(line != null) {

          outKey.set(line);
View Full Code Here

    }
    return orig;
  }

  private SearchResult getBlankSearchResult() {
    SearchResult result = new SearchResult();
    for(String field : SEARCH_FIELDS) {
      result.put(field, DEFAULT_VALUE);
    }
    return result;
  }
View Full Code Here

TOP

Related Classes of org.archive.wayback.core.SearchResult

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.