Package org.archive.wayback.webapp

Source Code of org.archive.wayback.webapp.FileRegexParamFilterAndFactory

package org.archive.wayback.webapp;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.archive.util.iterator.CloseableIterator;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.resourceindex.filters.FileRegexFilter;
import org.archive.wayback.util.ObjectFilter;
import org.archive.wayback.util.flatfile.FlatFile;

public class FileRegexParamFilterAndFactory extends FileRegexFilter implements CustomResultFilterFactory {
 
  private static final Logger LOGGER =
        Logger.getLogger(FileRegexParamFilterAndFactory.class.getName());
 
  protected String paramFile;
  protected int paramIndex = 1;
  protected boolean isExclusion = true;
  protected char delim = '\t';
 
  protected String prefixMatch;
 
  protected Set<String> paramSet = null;
 
  // This method should be set as the init-method in the spring config
  // init-method="loadParamFile" when using this Filter
  public void loadParamFile()
  {
    FlatFile ff = new FlatFile(paramFile);
   
       
    CloseableIterator<String> itr = null;
   
    try {
      itr = ff.getSequentialIterator();
    } catch (IOException io) {
      LOGGER.warning(io.toString());
    }
   
    paramSet = new HashSet<String>();

    while (itr.hasNext()) {
      String param = itr.next();
      param = param.trim();
     
      if (param.isEmpty() || param.startsWith("#")) {
        continue;
      }
     
      // Use only the first word, ignore the rest
      int wordEnd = param.indexOf(delim);
      if (wordEnd > 0) {
        param = param.substring(0, wordEnd);
      }
     
      paramSet.add(param);
    }
  }

  // Filter and Factory are the same object to avoid creating a new object that is unmodified during
  // the filtering process
 
  @Override
  public ObjectFilter<CaptureSearchResult> get(AccessPoint ap) {
    return this;
  }
 
  @Override
  public int filterObject(CaptureSearchResult o) {
    final String file = o.getFile();
    boolean matched = false;
   
    if (prefixMatch != null) {
      if (!file.startsWith(prefixMatch)) {
        return (isExclusion ? FILTER_INCLUDE : FILTER_EXCLUDE);
      }
    }
   
    for (Pattern pattern : patterns) {
      Matcher matcher = pattern.matcher(file);
      if (matcher.find()) {
        String param = matcher.group(paramIndex);
        if (paramSet.contains(param)) {
          if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.fine("Excluding (w)arc: " + file);
          }
          matched = true;
          break;
        }
      }
    }
   
    if (isExclusion) {
      return (matched ? FILTER_EXCLUDE : FILTER_INCLUDE)
    } else {
      return (matched ? FILTER_INCLUDE : FILTER_EXCLUDE);
    }
  }
 
  //Getters/Setters
 
  public String getParamFile() {
    return paramFile;
  }

  public void setParamFile(String paramFile) {
    this.paramFile = paramFile;
  }
 
  public int getParamIndex() {
    return paramIndex;
  }

  public void setParamIndex(int paramIndex) {
    this.paramIndex = paramIndex;
  }
 
  public boolean isExclusion() {
    return isExclusion;
  }

  public void setExclusion(boolean isExclusion) {
    this.isExclusion = isExclusion;
  }

  public char getDelim() {
    return delim;
  }

  public void setDelim(char delim) {
    this.delim = delim;
  }

  public String getPrefixMatch() {
    return prefixMatch;
  }

  public void setPrefixMatch(String prefixMatch) {
    this.prefixMatch = prefixMatch;
  }
}
TOP

Related Classes of org.archive.wayback.webapp.FileRegexParamFilterAndFactory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.