Package ivory.bloomir.util

Examples of ivory.bloomir.util.OptionManager


*/
public class GenerateDocumentVectors {
  private static final Logger LOGGER = Logger.getLogger(GenerateDocumentVectors.class);

  public static void main(String[] args) throws Exception {
    OptionManager options = new OptionManager(GenerateDocumentVectors.class.getName());
    options.addOption(OptionManager.INDEX_ROOT_PATH, "path", "index root", true);
    options.addOption(OptionManager.DOCUMENT_VECTOR_CLASS, "class_name", "documentVector class", true);
    options.addOption(OptionManager.OUTPUT_PATH, "path", "output", true);
    options.addOption(OptionManager.JUDGMENT_PATH, "path", "Tab-Delimited judgments", true);

    try {
      options.parse(args);
    } catch(Exception exp) {
      return;
    }

    String indexPath = options.getOptionValue(OptionManager.INDEX_ROOT_PATH);
    String documentVectorClass = options.getOptionValue(OptionManager.DOCUMENT_VECTOR_CLASS);
    String outputPath = options.getOptionValue(OptionManager.OUTPUT_PATH);
    String qrelPath = options.getOptionValue(OptionManager.JUDGMENT_PATH);

    FileSystem fs = FileSystem.get(new Configuration());
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    env.initialize(true);

View Full Code Here


      fvalues[i] = features[i].computeScoreWithSlidingWindow(doc, qterms, hashedTerms, stats);
    }
  }

  public static void main(String[] args) throws Exception {
    OptionManager options = new OptionManager(DocumentVectorSlidingWindow.class.getName());
    options.addOption(OptionManager.INDEX_ROOT_PATH, "path", "index root", true);
    options.addOption(OptionManager.DOCUMENT_VECTOR_CLASS, "class_name", "DocumentVector class", true);
    options.addOption(OptionManager.DOCUMENT_PATH, "path", "documents", true);
    options.addOption(OptionManager.QUERY_PATH, "path", "XML query", true);
    options.addOption(OptionManager.JUDGMENT_PATH, "path", "Tab-Delimited judgments", true);
    options.addOption(OptionManager.FEATURE_PATH, "path", "XML features", true);
    options.addOption(OptionManager.OUTPUT_PATH, "", "Print feature values", false);

    try {
      options.parse(args);
    } catch(Exception exp) {
      return;
    }

    String indexPath = options.getOptionValue(OptionManager.INDEX_ROOT_PATH);
    String documentVectorClass = options.getOptionValue(OptionManager.DOCUMENT_VECTOR_CLASS);
    String documentsPath = options.getOptionValue(OptionManager.DOCUMENT_PATH);
    String queryPath = options.getOptionValue(OptionManager.QUERY_PATH);
    String qrelPath = options.getOptionValue(OptionManager.JUDGMENT_PATH);
    String featurePath = options.getOptionValue(OptionManager.FEATURE_PATH);
    boolean writeOutput = options.foundOption(OptionManager.OUTPUT_PATH);

    FileSystem fs = FileSystem.get(new Configuration());
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    env.initialize(true);

    DocumentVectorSlidingWindow generator = new DocumentVectorSlidingWindow(env, fs);

    //Parse queries, judgemnts and features
    HMapIV<String> parsedQueries = QueryUtility.loadQueries(queryPath);
    HMapIV<int[]> queries = QueryUtility.queryToIntegerCode(env, parsedQueries);
    HMapIF idfs = QueryUtility.loadIdf(env, parsedQueries);
    HMapIF cfs = QueryUtility.loadCf(env, parsedQueries);
    HMapIV<int[]> qrels = QrelUtility.parseQrelsFromTabDelimited(qrelPath);
    Map<String, Feature> featuresMap = FeatureUtility.parseFeatures(featurePath);
    Feature[] features = new Feature[featuresMap.size()];
    int index = 0;
    for(String key: featuresMap.keySet()) {
      features[index++] = featuresMap.get(key);
    }

    //Prepare stats
    generator.prepareStats(idfs, cfs);
    generator.prepareDocuments(documentVectorClass, documentsPath);

    FSDataOutputStream output = null;
    if(writeOutput) {
      output = fs.create(new Path(options.getOptionValue(OptionManager.OUTPUT_PATH)));
    }

    System.gc();
    Thread.currentThread().sleep(20000);
    long cnt = 0;
View Full Code Here

      fvalues[i] = features[i].computeScoreWithMiniIndexes(positions, qterms, dl, stats);
    }
  }

  public static void main(String[] args) throws Exception {
    OptionManager options = new OptionManager(DocumentVectorOnTheFlyIndexing.class.getName());
    options.addOption(OptionManager.INDEX_ROOT_PATH, "path", "index root", true);
    options.addOption(OptionManager.DOCUMENT_VECTOR_CLASS, "class_name", "DocumentVector class", true);
    options.addOption(OptionManager.DOCUMENT_PATH, "path", "documents", true);
    options.addOption(OptionManager.QUERY_PATH, "path", "XML query", true);
    options.addOption(OptionManager.JUDGMENT_PATH, "path", "Tab-Delimited judgments", true);
    options.addOption(OptionManager.FEATURE_PATH, "path", "XML features", true);
    options.addOption(OptionManager.OUTPUT_PATH, "", "Print feature values", false);

    try {
      options.parse(args);
    } catch(Exception exp) {
      return;
    }

    String indexPath = options.getOptionValue(OptionManager.INDEX_ROOT_PATH);
    String documentVectorClass = options.getOptionValue(OptionManager.DOCUMENT_VECTOR_CLASS);
    String documentsPath = options.getOptionValue(OptionManager.DOCUMENT_PATH);
    String queryPath = options.getOptionValue(OptionManager.QUERY_PATH);
    String qrelPath = options.getOptionValue(OptionManager.JUDGMENT_PATH);
    String featurePath = options.getOptionValue(OptionManager.FEATURE_PATH);
    boolean writeOutput = options.foundOption(OptionManager.OUTPUT_PATH);

    FileSystem fs = FileSystem.get(new Configuration());
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    env.initialize(true);

    DocumentVectorOnTheFlyIndexing generator = new DocumentVectorOnTheFlyIndexing(env, fs);

    //Parse queries, judgemnts and features
    HMapIV<String> parsedQueries = QueryUtility.loadQueries(queryPath);
    HMapIV<int[]> queries = QueryUtility.queryToIntegerCode(env, parsedQueries);
    HMapIF idfs = QueryUtility.loadIdf(env, parsedQueries);
    HMapIF cfs = QueryUtility.loadCf(env, parsedQueries);
    HMapIV<int[]> qrels = QrelUtility.parseQrelsFromTabDelimited(qrelPath);
    Map<String, Feature> featuresMap = FeatureUtility.parseFeatures(featurePath);
    Feature[] features = new Feature[featuresMap.size()];
    int index = 0;
    for(String key: featuresMap.keySet()) {
      features[index++] = featuresMap.get(key);
    }

    //Prepare stats
    generator.prepareStats(idfs, cfs);
    generator.prepareDocuments(documentVectorClass, documentsPath);

    FSDataOutputStream output = null;
    if(writeOutput) {
      output = fs.create(new Path(options.getOptionValue(OptionManager.OUTPUT_PATH)));
    }

    System.gc();
    Thread.currentThread().sleep(20000);
    long cnt = 0;
View Full Code Here

    }
    return fvalues;
  }

  public static void main(String[] args) throws Exception {
    OptionManager options = new OptionManager(RankAndFeaturesSmallAdaptive.class.getName());
    options.addOption(OptionManager.INDEX_ROOT_PATH, "path", "index root", true);
    options.addOption(OptionManager.POSTINGS_ROOT_PATH, "path", "Positional postings root", true);
    options.addOption(OptionManager.QUERY_PATH, "path", "XML query", true);
    options.addOption(OptionManager.JUDGMENT_PATH, "path", "Tab-Delimited judgments", true);
    options.addOption(OptionManager.FEATURE_PATH, "path", "XML features", true);
    options.addOption(OptionManager.HITS, "integer", "number of hits (default: 10,000)", false);
    options.addOption(OptionManager.SPAM_PATH, "path", "spam percentile score", false);
    options.addOption(OptionManager.OUTPUT_PATH, "", "Print feature values", false);
    options.addDependency(OptionManager.OUTPUT_PATH, OptionManager.SPAM_PATH);

    try {
      options.parse(args);
    } catch(Exception exp) {
      return;
    }

    String indexPath = options.getOptionValue(OptionManager.INDEX_ROOT_PATH);
    String postingsPath = options.getOptionValue(OptionManager.POSTINGS_ROOT_PATH);
    String queryPath = options.getOptionValue(OptionManager.QUERY_PATH);
    String qrelPath = options.getOptionValue(OptionManager.JUDGMENT_PATH);
    String featurePath = options.getOptionValue(OptionManager.FEATURE_PATH);
    boolean writeOutput = options.foundOption(OptionManager.OUTPUT_PATH);
    int hits = 10000;
    if(options.foundOption(OptionManager.HITS)) {
      hits = Integer.parseInt(options.getOptionValue(OptionManager.HITS));
    }

    FileSystem fs = FileSystem.get(new Configuration());
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    env.initialize(true);

    RankAndFeaturesSmallAdaptive generator = new RankAndFeaturesSmallAdaptive(env, fs);

    //Parse queries and find integer codes for the query terms.
    HMapIV<String> parsedQueries = QueryUtility.loadQueries(queryPath);
    HMapIV<int[]> queries = QueryUtility.queryToIntegerCode(env, parsedQueries);
    HMapIF idfs = QueryUtility.loadIdf(env, parsedQueries);
    HMapIF cfs = QueryUtility.loadCf(env, parsedQueries);
    HMapIV<int[]> qrels = QrelUtility.parseQrelsFromTabDelimited(qrelPath);
    Map<String, Feature> featuresMap = FeatureUtility.parseFeatures(featurePath);
    Feature[] features = new Feature[featuresMap.size()];
    int index = 0;
    for(String key: featuresMap.keySet()) {
      features[index++] = featuresMap.get(key);
    }

    generator.prepareStats(idfs, cfs);
    generator.preparePostings(postingsPath);

    int[] newDocidsLookup = null;
    FSDataOutputStream output = null;
    if(writeOutput) {
      final SpamPercentileScore spamScores = new SpamPercentileScore();
      spamScores.initialize(options.getOptionValue(OptionManager.SPAM_PATH), fs);
      newDocidsLookup = DocumentUtility.reverseLookupSpamSortedDocids(DocumentUtility.spamSortDocids(spamScores));

      output = fs.create(new Path(options.getOptionValue(OptionManager.OUTPUT_PATH)));
    }

    System.gc();
    Thread.currentThread().sleep(20000);
    long cnt = 0;
View Full Code Here

    }
    return results;
  }

  public static void main(String[] args) throws Exception {
    OptionManager options = new OptionManager(BloomRanker.class.getName());
    options.addOption(OptionManager.INDEX_ROOT_PATH, "path", "index root", true);
    options.addOption(OptionManager.POSTINGS_ROOT_PATH, "path", "Non-positional postings root", true);
    options.addOption(OptionManager.BLOOM_ROOT_PATH, "path", "Bloom filters root", true);
    options.addOption(OptionManager.QUERY_PATH, "path", "XML query", true);
    options.addOption(OptionManager.OUTPUT_PATH, "path", "output path (Optional)", false);
    options.addOption(OptionManager.SPAM_PATH, "path", "spam percentile score (Optional)", false);
    options.addOption(OptionManager.HITS, "integer", "number of hits (default: 10,000)", false);
    options.addDependency(OptionManager.OUTPUT_PATH, OptionManager.SPAM_PATH);

    try {
      options.parse(args);
    } catch(Exception exp) {
      return;
    }

    String indexPath = options.getOptionValue(OptionManager.INDEX_ROOT_PATH);
    String postingsIndexPath = options.getOptionValue(OptionManager.POSTINGS_ROOT_PATH);
    String bloomIndexPath = options.getOptionValue(OptionManager.BLOOM_ROOT_PATH);
    String queryPath = options.getOptionValue(OptionManager.QUERY_PATH);
    boolean writeOutput = options.foundOption(OptionManager.OUTPUT_PATH);
    int hits = 10000;
    if(options.foundOption(OptionManager.HITS)) {
      hits = Integer.parseInt(options.getOptionValue(OptionManager.HITS));
    }

    FileSystem fs = FileSystem.get(new Configuration());
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    env.initialize(true);
    DocnoMapping docnoMapping = env.getDocnoMapping();

    BloomRanker ranker = new BloomRanker(postingsIndexPath, bloomIndexPath, fs);

    //Parse queries and find integer codes for the query terms.
    HMapIV<int[]> queries = QueryUtility.queryToIntegerCode(env, queryPath);

    //Evaluate queries and/or write the results to an output file
    int[] newDocidsLookup = null;
    FSDataOutputStream output =null;
    if(writeOutput) {
      final SpamPercentileScore spamScores = new SpamPercentileScore();
      spamScores.initialize(options.getOptionValue(OptionManager.SPAM_PATH), fs);
      newDocidsLookup = DocumentUtility.reverseLookupSpamSortedDocids(DocumentUtility.spamSortDocids(spamScores));

      output = fs.create(new Path(options.getOptionValue(OptionManager.OUTPUT_PATH)));
      output.write(("<parameters>\n").getBytes());
    }

    for (int qid: queries.keySet()) {
      int[] qterms = queries.get(qid);
View Full Code Here

    }
    return results;
  }

  public static void main(String[] args) throws Exception {
    OptionManager options = new OptionManager(LinearMergingRanker.class.getName());
    options.addOption(OptionManager.INDEX_ROOT_PATH, "path", "index root", true);
    options.addOption(OptionManager.POSTINGS_ROOT_PATH, "path", "Non-positional postings root", true);
    options.addOption(OptionManager.QUERY_PATH, "path", "XML query", true);
    options.addOption(OptionManager.OUTPUT_PATH, "path", "output path (Optional)", false);
    options.addOption(OptionManager.SPAM_PATH, "path", "spam percentile score (Optional)", false);
    options.addOption(OptionManager.HITS, "integer", "number of hits (default: 10,000)", false);
    options.addDependency(OptionManager.OUTPUT_PATH, OptionManager.SPAM_PATH);

    try {
      options.parse(args);
    } catch(Exception exp) {
      return;
    }

    String indexPath = options.getOptionValue(OptionManager.INDEX_ROOT_PATH);
    String postingsIndexPath = options.getOptionValue(OptionManager.POSTINGS_ROOT_PATH);
    String queryPath = options.getOptionValue(OptionManager.QUERY_PATH);
    boolean writeOutput = options.foundOption(OptionManager.OUTPUT_PATH);
    int hits = 10000;
    if(options.foundOption(OptionManager.HITS)){
      hits = Integer.parseInt(options.getOptionValue(OptionManager.HITS));
    }

    FileSystem fs = FileSystem.get(new Configuration());
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    env.initialize(true);
    DocnoMapping docnoMapping = env.getDocnoMapping();

    LinearMergingRanker ranker = new LinearMergingRanker(postingsIndexPath, fs);

    //Parse queries and find integer codes for the query terms.
    HMapIV<int[]> queries = QueryUtility.queryToIntegerCode(env, queryPath);

    //Evaluate queries and/or write the results to an output file
    int[] newDocidsLookup = null;
    FSDataOutputStream output =null;
    if(writeOutput) {
      final SpamPercentileScore spamScores = new SpamPercentileScore();
      spamScores.initialize(options.getOptionValue(OptionManager.SPAM_PATH), fs);
      newDocidsLookup = DocumentUtility.reverseLookupSpamSortedDocids(DocumentUtility.spamSortDocids(spamScores));

      output = fs.create(new Path(options.getOptionValue(OptionManager.OUTPUT_PATH)));
      output.write(("<parameters>\n").getBytes());
    }

    for (int qid: queries.keySet()) {
      int[] qterms = queries.get(qid);
View Full Code Here

      index++;
    }
  }

  public static void main(String[] args) throws Exception {
    OptionManager options = new OptionManager(SmallAdaptiveRanker.class.getName());
    options.addOption(OptionManager.INDEX_ROOT_PATH, "path", "index root", true);
    options.addOption(OptionManager.POSTINGS_ROOT_PATH, "path", "Non-positional postings root", true);
    options.addOption(OptionManager.QUERY_PATH, "path", "XML query", true);
    options.addOption(OptionManager.OUTPUT_PATH, "path", "output path (Optional)", false);
    options.addOption(OptionManager.SPAM_PATH, "path", "spam percentile score (Optional)", false);
    options.addOption(OptionManager.HITS, "integer", "number of hits (default: 10,000)", false);
    options.addDependency(OptionManager.OUTPUT_PATH, OptionManager.SPAM_PATH);

    try {
      options.parse(args);
    } catch(Exception exp) {
      return;
    }

    String indexPath = options.getOptionValue(OptionManager.INDEX_ROOT_PATH);
    String postingsIndexPath = options.getOptionValue(OptionManager.POSTINGS_ROOT_PATH);
    String queryPath = options.getOptionValue(OptionManager.QUERY_PATH);
    boolean writeOutput = options.foundOption(OptionManager.OUTPUT_PATH);
    int hits = 10000;
    if(options.foundOption(OptionManager.HITS)){
      hits = Integer.parseInt(options.getOptionValue(OptionManager.HITS));
    }

    FileSystem fs = FileSystem.get(new Configuration());
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    env.initialize(true);
    DocnoMapping docnoMapping = env.getDocnoMapping();

    SmallAdaptiveRanker ranker = new SmallAdaptiveRanker(postingsIndexPath, fs);

    //Parse queries and find integer codes for the query terms.
    HMapIV<int[]> queries = QueryUtility.queryToIntegerCode(env, queryPath);

    //Evaluate queries and/or write the results to an output file
    int[] newDocidsLookup = null;
    FSDataOutputStream output = null;
    if(writeOutput) {
      final SpamPercentileScore spamScores = new SpamPercentileScore();
      spamScores.initialize(options.getOptionValue(OptionManager.SPAM_PATH), fs);
      newDocidsLookup = DocumentUtility.reverseLookupSpamSortedDocids(DocumentUtility.spamSortDocids(spamScores));

      output = fs.create(new Path(options.getOptionValue(OptionManager.OUTPUT_PATH)));
      output.write(("<parameters>\n").getBytes());
    }

    //    DocnoMapping docnoMapping = env.getDocnoMapping();
    for (int qid: queries.keySet()) {
View Full Code Here

*
* @author Nima Asadi
*/
public class GenerateBloomFilters {
  public static void main(String[] args) throws Exception {
    OptionManager options = new OptionManager(GenerateBloomFilters.class.getName());
    options.addOption(OptionManager.INDEX_ROOT_PATH, "path", "index root", true);
    options.addOption(OptionManager.OUTPUT_PATH, "path", "output root", true);
    options.addOption(OptionManager.SPAM_PATH, "path", "spam percentile score", true);
    options.addOption(OptionManager.BITS_PER_ELEMENT, "integer", "number of bits per element", true);
    options.addOption(OptionManager.NUMBER_OF_HASH, "integer", "number of hash functions", true);

    try {
      options.parse(args);
    } catch(Exception exp) {
      return;
    }

    final String input = options.getOptionValue(OptionManager.INDEX_ROOT_PATH);
    final String output = options.getOptionValue(OptionManager.OUTPUT_PATH);
    final String spamPath = options.getOptionValue(OptionManager.SPAM_PATH);
    final int bitsPerElement = Integer.parseInt(options.getOptionValue(OptionManager.BITS_PER_ELEMENT));
    final int nbHash = Integer.parseInt(options.getOptionValue(OptionManager.NUMBER_OF_HASH));

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    RetrievalEnvironment env = new RetrievalEnvironment(input, fs);
    env.initialize(false);
View Full Code Here

*/
public class GenerateDocumentVectors {
  private static final Logger LOGGER = Logger.getLogger(GenerateDocumentVectors.class);

  public static void main(String[] args) throws Exception {
    OptionManager options = new OptionManager(GenerateDocumentVectors.class.getName());
    options.addOption(OptionManager.INDEX_ROOT_PATH, "path", "index root", true);
    options.addOption(OptionManager.DOCUMENT_VECTOR_CLASS, "class_name", "documentVector class", true);
    options.addOption(OptionManager.OUTPUT_PATH, "path", "output", true);
    options.addOption(OptionManager.JUDGMENT_PATH, "path", "Tab-Delimited documents", true);

    try {
      options.parse(args);
    } catch(Exception exp) {
      return;
    }

    String indexPath = options.getOptionValue(OptionManager.INDEX_ROOT_PATH);
    String documentVectorClass = options.getOptionValue(OptionManager.DOCUMENT_VECTOR_CLASS);
    String outputPath = options.getOptionValue(OptionManager.OUTPUT_PATH);
    String qrelPath = options.getOptionValue(OptionManager.JUDGMENT_PATH);

    FileSystem fs = FileSystem.get(new Configuration());
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    env.initialize(true);

View Full Code Here

public class GenerateCompressedPositionalPostings {
  private static final Logger LOGGER = Logger.getLogger(GenerateCompressedPositionalPostings.class);

  public static void main(String[] args) throws Exception {
    OptionManager options = new OptionManager(GenerateCompressedPositionalPostings.class.getName());
    options.addOption(OptionManager.INDEX_ROOT_PATH, "path", "index root", true);
    options.addOption(OptionManager.OUTPUT_PATH, "path", "output", true);
    options.addOption(OptionManager.QUERY_PATH, "path", "XML query", true);
    options.addOption(OptionManager.SPAM_PATH, "path", "spam percentile scores", true);

    try {
      options.parse(args);
    } catch(Exception exp) {
      return;
    }

    String indexPath = options.getOptionValue(OptionManager.INDEX_ROOT_PATH);
    String outputPath = options.getOptionValue(OptionManager.OUTPUT_PATH);
    String spamPath = options.getOptionValue(OptionManager.SPAM_PATH);
    String queryPath = options.getOptionValue(OptionManager.QUERY_PATH);

    FileSystem fs = FileSystem.get(new Configuration());
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    env.initialize(true);
View Full Code Here

TOP

Related Classes of ivory.bloomir.util.OptionManager

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.