Package org.archive.cdxserver.filter

Source Code of org.archive.cdxserver.filter.FilenamePrefixFilter

package org.archive.cdxserver.filter;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.archive.format.cdx.CDXLine;
import org.archive.util.GeneralURIStreamFactory;
import org.archive.util.binsearch.SeekableLineReaderFactory;
import org.archive.util.binsearch.SeekableLineReaderIterator;

public class FilenamePrefixFilter implements CDXFilter {
 
  private static final Logger LOGGER =
        Logger.getLogger(FilenamePrefixFilter.class.getName());
 
  protected String paramFile;
  protected int paramIndex = 1;
  protected boolean isExclusion = true;
  protected char delim = '\t';
 
  protected String containsMatch;
 
  protected Set<String> paramSet = null;
  protected Pattern patterns[] = null;
 
  protected List<String> prefixList = null;
 
  // This method should be set as the init-method in the spring config
  // init-method="loadParamFile" when using this Filter
  public void loadParamFile() throws IOException
  {
    SeekableLineReaderFactory fact = null;
    SeekableLineReaderIterator iter = null;
   
    try
      fact = GeneralURIStreamFactory.createSeekableStreamFactory(paramFile, false);
      iter = new SeekableLineReaderIterator(fact.get());
     
      paramSet = new HashSet<String>();
 
      while (iter.hasNext()) {
        String param = iter.next();
        param = param.trim();
       
        if (param.isEmpty() || param.startsWith("#")) {
          continue;
        }
       
        // Use only the first word, ignore the rest
        int wordEnd = param.indexOf(delim);
       
        if (wordEnd > 0) {
          param = param.substring(0, wordEnd);
        }
       
        paramSet.add(param);
      }
    } finally {
      if (iter != null) {
        iter.close();
      }
     
      if (fact != null) {
        fact.close();
      }
    }
  }
 
  public boolean include(CDXLine line) {
    final String file = line.getFilename();
    boolean matched = false;
   
    if (containsMatch != null) {
      if (!file.contains(containsMatch)) {
        return (isExclusion ? true : false);
      }
    }
   
    if (prefixList != null) { 
      for (String prefix : prefixList) {
        if (file.startsWith(prefix)) {
          return (isExclusion ? false : true);
        }
      }
    }
   
    if (patterns != null) {
      for (Pattern pattern : patterns) {
        Matcher matcher = pattern.matcher(file);
        if (matcher.find()) {
          String param = matcher.group(paramIndex);
          if (paramSet.contains(param)) {
            if (LOGGER.isLoggable(Level.FINE)) {
              LOGGER.fine("Excluding (w)arc: " + file);
            }
            matched = true;
            break;
          }
        }
      }
    }
   
    if (isExclusion) {
      return (matched ? false : true)
    } else {
      return (matched ? true : false);
    }
  }
 
  //Getters/Setters
 
  public String getParamFile() {
    return paramFile;
  }

  public void setParamFile(String paramFile) {
    this.paramFile = paramFile;
  }
 
  public int getParamIndex() {
    return paramIndex;
  }

  public void setParamIndex(int paramIndex) {
    this.paramIndex = paramIndex;
  }
 
  public boolean isExclusion() {
    return isExclusion;
  }

  public void setExclusion(boolean isExclusion) {
    this.isExclusion = isExclusion;
  }

  public char getDelim() {
    return delim;
  }

  public void setDelim(char delim) {
    this.delim = delim;
  }

  public String getContainsMatch() {
    return containsMatch;
  }

  public void setContainsMatch(String containsMatch) {
    this.containsMatch = containsMatch;
  }
 
  public List<String> getPatterns() {
    ArrayList<String> s = new ArrayList<String>();
    for(Pattern p : patterns) {
      s.add(p.pattern());
    }
    return s;
  }

  public void setPatterns(List<String> patternStrings) {
    int size = patternStrings.size();
    patterns = new Pattern[size];
    for(int i = 0; i < size; i++) {
      patterns[i] = Pattern.compile(patternStrings.get(i));
    }
  }

  public List<String> getPrefixList() {
    return prefixList;
  }

  public void setPrefixList(List<String> prefixList) {
    this.prefixList = prefixList;
  }
}
TOP

Related Classes of org.archive.cdxserver.filter.FilenamePrefixFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.