Package ivory.ffg.preprocessing

Source Code of ivory.ffg.preprocessing.GenerateDocumentVectors

package ivory.ffg.preprocessing;

import java.util.Set;

import com.google.common.collect.Sets;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;

import edu.umd.cloud9.util.map.HMapIV;

import ivory.bloomir.util.OptionManager;
import ivory.core.RetrievalEnvironment;
import ivory.core.data.document.IntDocVector;
import ivory.ffg.data.DocumentVectorUtility;
import ivory.ffg.util.QrelUtility;

/**
* @author Nima Asadi
*/
public class GenerateDocumentVectors {
  private static final Logger LOGGER = Logger.getLogger(GenerateDocumentVectors.class);

  public static void main(String[] args) throws Exception {
    OptionManager options = new OptionManager(GenerateDocumentVectors.class.getName());
    options.addOption(OptionManager.INDEX_ROOT_PATH, "path", "index root", true);
    options.addOption(OptionManager.DOCUMENT_VECTOR_CLASS, "class_name", "documentVector class", true);
    options.addOption(OptionManager.OUTPUT_PATH, "path", "output", true);
    options.addOption(OptionManager.JUDGMENT_PATH, "path", "Tab-Delimited judgments", true);

    try {
      options.parse(args);
    } catch(Exception exp) {
      return;
    }

    String indexPath = options.getOptionValue(OptionManager.INDEX_ROOT_PATH);
    String documentVectorClass = options.getOptionValue(OptionManager.DOCUMENT_VECTOR_CLASS);
    String outputPath = options.getOptionValue(OptionManager.OUTPUT_PATH);
    String qrelPath = options.getOptionValue(OptionManager.JUDGMENT_PATH);

    FileSystem fs = FileSystem.get(new Configuration());
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    env.initialize(true);

    //Parse queries, judgemnts and features
    HMapIV<int[]> qrels = QrelUtility.parseQrelsFromTabDelimited(qrelPath);

    FSDataOutputStream output = fs.create(new Path(outputPath));
    Set<Integer> docidHistory = Sets.newHashSet();

    //Evaluate queries and/or write the results to an output file
    for(int qid: qrels.keySet()) {
      for(int docid: qrels.get(qid)) {
        if(!docidHistory.contains(docid)) {
          docidHistory.add(docid);

          IntDocVector vector = env.documentVectors(new int[]{docid})[0];
          output.writeInt(docid);
          DocumentVectorUtility.newInstance(documentVectorClass, vector).write(output);
        }
      }
      LOGGER.info("Compressed query " + qid);
    }

    output.writeInt(-1);
    output.close();
  }
}
TOP

Related Classes of ivory.ffg.preprocessing.GenerateDocumentVectors

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.