Package org.archive.wayback.core

Examples of org.archive.wayback.core.SearchResults


  public SearchResults query(WaybackRequest wbRequest)
      throws ResourceIndexNotAvailableException,
      ResourceNotInArchiveException, BadQueryException,
      AccessControlException {

    SearchResults results = null; // return value placeholder

    String startKey; // actual key where search will begin
    String keyUrl; // "purified" URL request
    int startResult; // calculated based on hits/page * pagenum

    // first grab all the info from the WaybackRequest, and validate it:

    int resultsPerPage = wbRequest.getResultsPerPage();
    int pageNum = wbRequest.getPageNum();
    startResult = (pageNum - 1) * resultsPerPage;

    if (resultsPerPage < 1) {
      throw new BadQueryException("resultsPerPage cannot be < 1");
    }
    if (resultsPerPage > maxRecords) {
      throw new BadQueryException("resultsPerPage cannot be > "
          + maxRecords);
    }
    if (pageNum < 1) {
      throw new BadQueryException("pageNum must be > 0");
    }

    String searchUrl = getRequired(wbRequest, WaybackConstants.REQUEST_URL);
    String searchType = getRequired(wbRequest,
        WaybackConstants.REQUEST_TYPE);
    String startDate = getRequired(wbRequest,
        WaybackConstants.REQUEST_START_DATE, Timestamp
            .earliestTimestamp().getDateStr());
    String endDate = getRequired(wbRequest,
        WaybackConstants.REQUEST_END_DATE, Timestamp.latestTimestamp()
            .getDateStr());
    String exactDate = getRequired(wbRequest,
        WaybackConstants.REQUEST_EXACT_DATE, Timestamp
            .latestTimestamp().getDateStr());

    try {
      keyUrl = canonicalizer.urlStringToKey(searchUrl);
    } catch (URIException e) {
      throw new BadQueryException("invalid "
          + WaybackConstants.REQUEST_URL + " " + searchUrl);
    }

    // set up the common Filters:

    // makes sure we don't inspect too many records: prevents DOS
    GuardRailFilter guardrail = new GuardRailFilter(maxRecords);

    // checks an exclusion service for every matching record
    ObjectFilter<SearchResult> exclusion = wbRequest.getExclusionFilter();

    // count how many results got to the ExclusionFilter:
    CounterFilter preExCounter = new CounterFilter();
    // count how many results got past the ExclusionFilter, or how
    // many total matched, if there was no ExclusionFilter:
    CounterFilter finalCounter = new CounterFilter();
   
    // has the user asked for only results on the exact host specified?
    HostMatchFilter hostMatchFilter = getExactHostFilter(wbRequest);

    if (searchType.equals(WaybackConstants.REQUEST_REPLAY_QUERY)
        || searchType.equals(WaybackConstants.REQUEST_CLOSEST_QUERY)) {

      results = new CaptureSearchResults();

      ObjectFilterChain<SearchResult> forwardFilters =
        new ObjectFilterChain<SearchResult>();

//      ObjectFilterChain<SearchResult> reverseFilters =
//        new ObjectFilterChain<SearchResult>();

      // use the same guardrail for both:
      forwardFilters.addFilter(guardrail);
//      reverseFilters.addFilter(guardrail);
     
      forwardFilters.addFilter(new DuplicateRecordFilter());
     
      // match URL key:
      forwardFilters.addFilter(new UrlMatchFilter(keyUrl));
//      reverseFilters.addFilter(new UrlMatchFilter(keyUrl));

      if(hostMatchFilter != null) {
        forwardFilters.addFilter(hostMatchFilter);
//        reverseFilters.addFilter(hostMatchFilter);
      }
     
      // be sure to only include records within the date range we want:
      // The bin search may start the forward filters at a record older
      // than we want. Since the fowardFilters only include an abort
      // endDateFilter, we might otherwise include a record before the
      // requested range.
      DateRangeFilter drFilter = new DateRangeFilter(startDate,endDate);
      forwardFilters.addFilter(drFilter);
//      reverseFilters.addFilter(drFilter);
     
      // abort processing if we hit a date outside the search range:
      forwardFilters.addFilter(new EndDateFilter(endDate));
//      reverseFilters.addFilter(new StartDateFilter(startDate));

      // for replay, do not include records that redirect to
      // themselves.. We'll leave this for both closest and replays,
      // because the only application of closest at the moment is
      // timeline in which case, we don't want to show captures that
      // redirect to themselves in the timeline if they are not viewable.
      SelfRedirectFilter selfRedirectFilter = new SelfRedirectFilter();
      selfRedirectFilter.setCanonicalizer(canonicalizer);
      forwardFilters.addFilter(selfRedirectFilter);
//      reverseFilters.addFilter(selfRedirectFilter);
     
      // possibly filter via exclusions:
      if(exclusion != null) {
        forwardFilters.addFilter(preExCounter);
        forwardFilters.addFilter(exclusion);

//        reverseFilters.addFilter(preExCounter);
//        reverseFilters.addFilter(exclusion);
      }
      forwardFilters.addFilter(finalCounter);
//      reverseFilters.addFilter(finalCounter);

      forwardFilters.addFilter(new WindowEndFilter(resultsPerPage));
//      int resultsPerDirection = (int) Math.floor(resultsPerPage / 2);
//      reverseFilters.addFilter(new WindowEndFilter(resultsPerDirection));

      startKey = keyUrl;

      try {
//        CloseableIterator<SearchResult> reverse =
//          new AdaptedObjectFilterIterator<SearchResult>(
//          source.getPrefixReverseIterator(startKey),
//          reverseFilters);

//        // reverse the reverseResults:
//        ArrayList<SearchResult> reverseResults =
//          new ArrayList<SearchResult>();
//        while(reverse.hasNext()) {
//          reverseResults.add(0, reverse.next());
//        }
       
        // now make a composite of the reverse and forwards:
       
        CloseableIterator<SearchResult> forward =
          source.getPrefixIterator(startKey);
//       
//        CompositeIterator<SearchResult> resultsItr =
//          new CompositeIterator<SearchResult>();
//        resultsItr.addComponent(reverseResults.iterator());
//        resultsItr.addComponent(forward);
       
        // and filter:
//        filterRecords(resultsItr, forwardFilters, results, true);
        filterRecords(forward, forwardFilters, results, true);

      } catch (IOException e) {
        throw new ResourceIndexNotAvailableException(
            e.getLocalizedMessage());
      }

    } else if (searchType.equals(WaybackConstants.REQUEST_URL_QUERY)) {

      results = new CaptureSearchResults();
      // build up the FilterChain(s):
      ObjectFilterChain<SearchResult> filters =
        new ObjectFilterChain<SearchResult>();
      filters.addFilter(guardrail);
      filters.addFilter(new DuplicateRecordFilter());

      filters.addFilter(new UrlMatchFilter(keyUrl));
      if(hostMatchFilter != null) {
        filters.addFilter(hostMatchFilter);
      }
      filters.addFilter(new EndDateFilter(endDate));
      // possibly filter via exclusions:
      if (exclusion != null) {
        filters.addFilter(preExCounter);
        filters.addFilter(exclusion);
      }
      filters.addFilter(finalCounter);
      // OPTIMIZ: beginning the search at the startDate causes problems
      // with deduplicated results. We need to be smarter about rolling
      // backwards a ways if we start on a deduped record.
//      startKey = keyUrl + " " + startDate;
      startKey = keyUrl + " ";

      // add the start and end windowing filters:
      filters.addFilter(new WindowStartFilter(startResult));
      filters.addFilter(new WindowEndFilter(resultsPerPage));
      try {
        filterRecords(source.getPrefixIterator(startKey), filters, results,
            true);
      } catch (IOException e) {
        throw new ResourceIndexNotAvailableException(
            e.getLocalizedMessage());
      }
     

    } else if (searchType.equals(WaybackConstants.REQUEST_URL_PREFIX_QUERY)) {

      results = new UrlSearchResults();
      // build up the FilterChain(s):
      ObjectFilterChain<SearchResult> filters =
        new ObjectFilterChain<SearchResult>();
      filters.addFilter(guardrail);
      filters.addFilter(new DuplicateRecordFilter());

      filters.addFilter(new UrlPrefixMatchFilter(keyUrl));
      if(hostMatchFilter != null) {
        filters.addFilter(hostMatchFilter);
      }
      filters.addFilter(new DateRangeFilter(startDate, endDate));
      // possibly filter via exclusions:
      if (exclusion != null) {
        filters.addFilter(preExCounter);
        filters.addFilter(exclusion);
      }
      filters.addFilter(new CaptureToUrlResultFilter());
      filters.addFilter(finalCounter);
      startKey = keyUrl;

      // add the start and end windowing filters:
      filters.addFilter(new WindowStartFilter(startResult));
      filters.addFilter(new WindowEndFilter(resultsPerPage));
      try {
        filterRecords(source.getPrefixIterator(startKey), filters, results,
            true);
      } catch (IOException e) {
        throw new ResourceIndexNotAvailableException(
            e.getLocalizedMessage());
      }

    } else {
      throw new BadQueryException("Unknown query type(" + searchType
          + "), must be " + WaybackConstants.REQUEST_REPLAY_QUERY
          + ", " + WaybackConstants.REQUEST_CLOSEST_QUERY + ", "
          + WaybackConstants.REQUEST_URL_QUERY + ", or "
          + WaybackConstants.REQUEST_URL_PREFIX_QUERY);
    }

    int matched = finalCounter.getNumMatched();
    if (matched == 0) {
      if (exclusion != null) {
        if(preExCounter.getNumMatched() > 0) {
          throw new AccessControlException("All results Excluded");
        }
      }
      throw new ResourceNotInArchiveException("the URL " + keyUrl
          + " is not in the archive.");
    }

    // now we need to set some filter properties on the results:
    results.putFilter(WaybackConstants.REQUEST_URL, keyUrl);
    results.putFilter(WaybackConstants.REQUEST_TYPE, searchType);
    results.putFilter(WaybackConstants.REQUEST_START_DATE, startDate);
    results.putFilter(WaybackConstants.REQUEST_EXACT_DATE, exactDate);
    results.putFilter(WaybackConstants.REQUEST_END_DATE, endDate);

    // window info
    results.putFilter(WaybackConstants.RESULTS_FIRST_RETURNED, String
        .valueOf(startResult));
    results.putFilter(WaybackConstants.RESULTS_REQUESTED, String
        .valueOf(resultsPerPage));

    // how many are actually in the results:
    results.putFilter(WaybackConstants.RESULTS_NUM_RESULTS, String
        .valueOf(matched));

    // how many matched (includes those outside window)
    results.putFilter(WaybackConstants.RESULTS_NUM_RETURNED, String
        .valueOf(results.getResultCount()));

    return results;
  }
View Full Code Here


    return filters;
  }
 
  protected SearchResults documentToSearchResults(Document document,
      ObjectFilter<SearchResult> filter) {
    SearchResults results = null;
    NodeList filters = getRequestFilters(document);
    String resultsType = getResultsType(document);
    if(resultsType.equals(WaybackConstants.RESULTS_TYPE_CAPTURE)) {
      results = new CaptureSearchResults();
    } else {
      results = new UrlSearchResults();
    }
    for(int i = 0; i < filters.getLength(); i++) {
      String key = filters.item(i).getNodeName();
      String value = filters.item(i).getTextContent();
      if(!key.equals("#text")) {
        results.putFilter(key,value);
      }
    }
   
    NodeList xresults = getSearchResults(document);
    for(int i = 0; i < xresults.getLength(); i++) {
      Node xresult = xresults.item(i);
      SearchResult result = searchElementToSearchResult(xresult);
     
      int ruling = ObjectFilter.FILTER_INCLUDE;
      if (filter != null) {
        ruling = filter.filterObject(result);
      }
     
      if (ruling == ObjectFilter.FILTER_ABORT) {
        break;
      } else if (ruling == ObjectFilter.FILTER_INCLUDE) {
        results.addSearchResult(result, true);
      }
    }
    return results;
  }
View Full Code Here

    Resource resource = null;
    WaybackRequest wbRequest = makeCacheWBRequest(url,maxCacheMS,bUseOlder);
   
    CaptureSearchResults results = null;
    try {
      SearchResults gresults = index.query(wbRequest);
      if(!(gresults instanceof CaptureSearchResults)) {
        throw new IOException("bad result type...");
      }
      results = (CaptureSearchResults) gresults;
    } catch (ResourceNotInArchiveException e) {
View Full Code Here

      RangeMember best = findBestMember();
      if(best == null) {
        throw new ResourceIndexNotAvailableException("Unable to find active range for request.");
      }
      best.noteConnectionStart();
      SearchResults results;
      try {

        results = best.query(wbRequest);
        best.noteConnectionSuccess();
        return results;
View Full Code Here

  private void handleReplay(WaybackRequest wbRequest,
      HttpServletRequest httpRequest, HttpServletResponse httpResponse)
  throws IOException, ServletException {
    Resource resource = null;
    try {
      SearchResults results = collection.getResourceIndex().query(wbRequest);
      if(!(results instanceof CaptureSearchResults)) {
        throw new ResourceNotAvailableException("Bad results...");
      }
      CaptureSearchResults captureResults = (CaptureSearchResults) results;
 
View Full Code Here

  private void handleQuery(WaybackRequest wbRequest,
      HttpServletRequest httpRequest, HttpServletResponse httpResponse)
  throws ServletException, IOException {

    try {
      SearchResults results = collection.getResourceIndex().query(wbRequest);
      if(results.getResultsType().equals(
          WaybackConstants.RESULTS_TYPE_CAPTURE)) {
        CaptureSearchResults cResults = (CaptureSearchResults) results;
        SearchResult closest = cResults.getClosest(wbRequest);
        closest.put(WaybackConstants.RESULT_CLOSEST_INDICATOR,
            WaybackConstants.RESULT_CLOSEST_VALUE);
View Full Code Here

      e.printStackTrace();
      throw new ResourceIndexNotAvailableException("Unexpected SAX: " +
          e.getMessage());
    }

    SearchResults results;
    String type = wbRequest.get(WaybackConstants.REQUEST_TYPE);
    if(type.equals(WaybackConstants.REQUEST_REPLAY_QUERY) ||
        type.equals(WaybackConstants.REQUEST_URL_QUERY)) {
      results = new CaptureSearchResults();     
    } else {
      // TODO: this is wrong, but needs exploration into what NutchWax can actually do.
      throw new BadQueryException("Unable to perform path prefix requests with this index type");
    }
    NodeList channel = getSearchChannel(document);
    NodeList nodes = getSearchItems(document);

    if (channel == null || channel.getLength() != 1) {
      // TODO: better error for user:
         throw new ResourceNotInArchiveException("No results for " +
             requestUrl);
       }

       if (nodes == null) {
      // TODO: better error for user:
         throw new ResourceNotInArchiveException("No results for " +
             requestUrl);
       }

       for (int i = 0; i < nodes.getLength(); i++) {
        
           Element e = (Element) nodes.item(i);

           SearchResult result = elementToSearchResult(e);
           results.addSearchResult(result);
       }
       Element channelElement = (Element) channel.item(0);
      
       results.putFilter(WaybackConstants.RESULTS_FIRST_RETURNED,
           getNodeContent(channelElement,NUTCH_FIRST_RESULT));
      
       results.putFilter(WaybackConstants.RESULTS_NUM_RESULTS,
           getNodeContent(channelElement,NUTCH_NUM_RESULTS));
      
       results.putFilter(WaybackConstants.RESULTS_NUM_RETURNED,
           getNodeContent(channelElement,NUTCH_NUM_RETURNED));
      
       results.putFilter(WaybackConstants.RESULTS_REQUESTED,
           String.valueOf(wbRequest.getResultsPerPage()));
      
    results.putFilter(WaybackConstants.REQUEST_START_DATE,
        Timestamp.earliestTimestamp().getDateStr());
   
       results.putFilter(WaybackConstants.REQUEST_END_DATE,
           Timestamp.latestTimestamp().getDateStr());
    return results;
  }
View Full Code Here

  @Override
    public boolean renderMementoTimemap(WaybackRequest wbRequest,
            HttpServletRequest request, HttpServletResponse response)
            throws WaybackException, IOException {
   
    SearchResults cResults = wbRequest.getAccessPoint().queryIndex(wbRequest);
   
    MementoUtils.printTimemapResponse((CaptureSearchResults)cResults, wbRequest, response);
    return true;
    }
View Full Code Here

   */
  public SearchResults query(WaybackRequest wbRequest)
      throws ResourceIndexNotAvailableException,
      ResourceNotInArchiveException, BadQueryException,
      AccessControlException {
    SearchResults results = null; // return value placeholder

    if (wbRequest.isReplayRequest()) {

      results = doCaptureQuery(wbRequest, TYPE_REPLAY);
      results.putFilter(WaybackRequest.REQUEST_TYPE,
          WaybackRequest.REQUEST_REPLAY_QUERY);

    } else if (wbRequest.isCaptureQueryRequest()) {

      results = doCaptureQuery(wbRequest, TYPE_CAPTURE);
      results.putFilter(WaybackRequest.REQUEST_TYPE,
          WaybackRequest.REQUEST_CAPTURE_QUERY);

    } else if (wbRequest.isUrlQueryRequest()) {

      results = doUrlQuery(wbRequest);
      results.putFilter(WaybackRequest.REQUEST_TYPE,
          WaybackRequest.REQUEST_URL_QUERY);

    } else {

      throw new BadQueryException("Unknown query type, must be "
View Full Code Here

  }
 
  protected SearchResults documentToSearchResults(Document document,
      ObjectFilter<CaptureSearchResult> filter)
  throws ResourceNotInArchiveException {
    SearchResults results = null;
    NodeList filters = getRequestFilters(document);
    String resultsType = getResultsType(document);
    if(resultsType.equals(SearchResults.RESULTS_TYPE_CAPTURE)) {
      results = documentToCaptureSearchResults(document,filter);
    } else {
      results = documentToUrlSearchResults(document);
    }
    for(int i = 0; i < filters.getLength(); i++) {
      String key = filters.item(i).getNodeName();
      String value = getNodeTextValue(filters.item(i));
      if(!key.equals("#text")) {
        results.putFilter(key,value);
      }
    }
    return results;
  }
View Full Code Here

TOP

Related Classes of org.archive.wayback.core.SearchResults

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.