Package edu.illinois.lis.utils

Source Code of edu.illinois.lis.utils.ExtractGqueriesFromTrecFormat

package edu.illinois.lis.utils;




import java.io.File;

import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;

import edu.illinois.lis.query.TrecTemporalTopicSet;


/**
* creates a simple set of gQueries from the official TREC MB topic file
*
* @author Miles Efron
*
*/
public class ExtractGqueriesFromTrecFormat {

  private JsonObject outputObjects = null;
  private String pathToTrecTopics;
 
  public ExtractGqueriesFromTrecFormat(String pathToTrecTopics) {
    this.pathToTrecTopics = pathToTrecTopics;
    outputObjects = new JsonObject();
  }

  public void harvest() { 
    TrecTemporalTopicSet topicsFile = null;
    try {
      topicsFile = TrecTemporalTopicSet.fromFile(new File(pathToTrecTopics));
    } catch (Exception e) {
      e.printStackTrace();
    }
   
    JsonArray outputJsonArray = new JsonArray();
    for(edu.illinois.lis.query.TrecTemporalTopic query : topicsFile) {
     

      JsonObject outputQueryObject = new JsonObject();
      outputQueryObject.addProperty("title", query.getId());
      outputQueryObject.addProperty("text", query.getQuery());
      outputQueryObject.addProperty("epoch", Double.toString(query.getEpoch()));
      outputQueryObject.addProperty("querytweettime", Long.toString(query.getQueryTweetTime()));
     
      String text = query.getQuery();
      String[] toks = text.split(" ");
     
      JsonArray modelArray = new JsonArray();
      for(String tok : toks) {
        JsonObject tupleObject = new JsonObject();
        tupleObject.addProperty("weight", 1.0);
        tupleObject.addProperty("feature", tok);
        modelArray.add(tupleObject);
      }
      outputQueryObject.add("model", modelArray);


      outputJsonArray.add(outputQueryObject);
   
    outputObjects.add("queries", outputJsonArray);
  }


  public String toString() {
    Gson gson = new GsonBuilder().setPrettyPrinting().create();
    String json = gson.toJson(outputObjects);
    return json;
  }




  public static void main(String[] args) throws Exception {
    String trecQueryPath = args[0];

    ExtractGqueriesFromTrecFormat harvester = new ExtractGqueriesFromTrecFormat(trecQueryPath);
    harvester.harvest();

    System.out.println(harvester);
  }



}
TOP

Related Classes of edu.illinois.lis.utils.ExtractGqueriesFromTrecFormat

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.