Package org.archive.cdxserver.processor

Examples of org.archive.cdxserver.processor.BaseProcessor


     
      CDXQuery query,     
      AuthToken authToken,
      CDXAccessFilter accessChecker) {
   
    BaseProcessor outputProcessor = responseWriter;
   
    if (query.limit < 0) {
      query.limit = Math.min(-query.limit, readLimit);
      outputProcessor = new LastNLineProcessor(outputProcessor, query.limit);
    } else if (query.limit == 0) {
      query.limit = readLimit;
    } else {
      query.limit = Math.min(query.limit, readLimit);
    }
   
        if (!query.closest.isEmpty() && query.isSortClosest()) {
            outputProcessor = new ClosestTimestampSorted(outputProcessor, query.closest, query.limit);
        }
       
        // Experimental
        if (query.resolveRevisits) {
          if (query.isReverse()) {
            outputProcessor = new ReverseRevisitResolver(outputProcessor, query.showDupeCount);
          } else {
            outputProcessor = new ForwardRevisitResolver(outputProcessor, query.showDupeCount);           
          }
        } else if (query.showDupeCount) {
          outputProcessor = new DupeCountProcessor(outputProcessor, true);
        }
     
    if (query.showGroupCount || query.showUniqCount) {
      outputProcessor = new GroupCountProcessor(outputProcessor, query.lastSkipTimestamp, query.showUniqCount);
    }
   
    if (query.collapseTime > 0) {
      outputProcessor = new DupeTimestampBestStatusFilter(outputProcessor, query.collapseTime, noCollapsePrefix);
    }
   
    FieldSplitFormat parseFormat = outputProcessor.modifyOutputFormat(cdxLineFactory.getParseFormat());

    FieldRegexFilter filterMatcher = null;

    if (query.filter != null && (query.filter.length > 0)) {
      filterMatcher = new FieldRegexFilter(query.filter, parseFormat);
    }

    CollapseFieldFilter collapser = null;

    if (query.collapse != null && (query.collapse.length > 0)) {
      collapser = new CollapseFieldFilter(query.collapse, parseFormat);
    }

    //CDXLine prev = null;
    CDXLine line = null;

    //boolean prevUrlAllowed = true;
   
    FieldSplitFormat outputFields = null;
   
    if (!authChecker.isAllCdxFieldAccessAllowed(authToken)) {
      outputFields = this.authChecker.getPublicCdxFormat();
    }
   
    if (!query.fl.isEmpty()) {
      if (outputFields == null) {
        outputFields = parseFormat;
      }
      try {
        outputFields = outputFields.createSubset(URLDecoder.decode(query.fl, "UTF-8"));
      } catch (UnsupportedEncodingException e) {

      }
    } else if (outputFields != null) {
      outputFields = parseFormat.createSubset(outputFields);
    }

    outputProcessor.begin();

    int writeCount = 0;
    long allCount = 0;
   
    int writeLimit = query.limit;

    while (cdx.hasNext() && ((writeLimit == 0) || (writeCount < writeLimit)) && (allCount < readLimit) && !responseWriter.isAborted()) {
     
      String rawLine = cdx.next();
      allCount++;

      if (query.offset > 0) {
        --query.offset;
        continue;
      }

//      prev = line;
     
      //line = new CDXLine(rawLine, parseFormat);
      line = this.cdxLineFactory.createStandardCDXLine(rawLine, parseFormat);
     
      //TODO: better way to handle this special case?
      if (line.getMimeType().equals("alexa/dat")) {
        continue;
      }
     
      // Additional access check, per capture
      if (accessChecker != null) {
        if (!accessChecker.includeCapture(line)) {
          continue;
        }
      }

//      if (!authChecker.isAllUrlAccessAllowed(authToken)) {
//        if ((query.matchType != MatchType.exact) && ((prev == null) || !line.getUrlKey().equals(prev.getUrlKey()))) {
//          prevUrlAllowed = authChecker.isUrlAllowed(line.getOriginalUrl(), authToken);
//        }
//
//        if (!prevUrlAllowed) {
//          continue;
//        }
//      }
//     
//      if (!authChecker.isCaptureAllowed(line, authToken)) {
//        continue;
//      }
//     
      outputProcessor.trackLine(line);

      // Timestamp Range Filtering
      String timestamp = line.getTimestamp();

      if (!query.from.isEmpty() && (timestamp.compareTo(query.from) < 0)) {
        continue;
      }

      if (!query.to.isEmpty() && (timestamp.compareTo(query.to) > 0) && !timestamp.startsWith(query.to)) {
        if (query.matchType == MatchType.exact) {
          break;
        } else {
          continue;
        }
      }

      // Check regex matcher if it exists
      if ((filterMatcher != null) && !filterMatcher.include(line)) {
        continue;
      }

      // Check collapser
      if ((collapser != null) && !collapser.include(line)) {
        continue;
      }

      // Filter to only include output fields
      if (outputFields != null) {
        line = new CDXLine(line, outputFields);
      }
     
      writeCount += outputProcessor.writeLine(line);

      if (Thread.interrupted()) {
        break;
      }
    }

    if (query.showResumeKey && (line != null) && (writeLimit > 0) && (writeCount >= writeLimit)) {
      StringBuilder sb = new StringBuilder();
      sb.append(line.getUrlKey());
      sb.append(' ');
      sb.append(UrlSurtRangeComputer.incLastChar(line.getTimestamp()));
      String resumeKey;
      try {
        resumeKey = URLEncoder.encode(sb.toString(), "UTF-8");
        outputProcessor.writeResumeKey(resumeKey);
      } catch (UnsupportedEncodingException e) {

      }
    }

    outputProcessor.end();
  }
View Full Code Here

TOP

Related Classes of org.archive.cdxserver.processor.BaseProcessor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.