Package edu.gslis.ttg.clusters

Examples of edu.gslis.ttg.clusters.Cluster


        clusterMembership.put(topic, new Clusters());
        JSONArray clusters = (JSONArray) ((JSONObject) topicObj.get(topic)).get("clusters");
        Iterator<JSONArray> clusterIt = clusters.iterator();
        while (clusterIt.hasNext()) { // for each cluster in the topic
          JSONArray cluster = (JSONArray) clusterIt.next();
          Cluster c = new Cluster();
          Iterator<String> clusterMemberIt = cluster.iterator();
          while (clusterMemberIt.hasNext()) { // for each docId in the cluster
            String member = clusterMemberIt.next();
            long memberId = Long.parseLong(member);
            c.add(memberId);
          }
          clusterMembership.get(topic).add(c);
        }
      }
    } catch (Exception e) {
      err.println("Error reading training data.");
      e.printStackTrace();
      System.exit(-1);
    }
   
    // instantiate search client
    TrecSearchThriftClient client = new TrecSearchThriftClient(params.getParamValue(HOST_OPTION),
        trainingPort, group, token);

    SimpleSearcher searcher = new SimpleSearcher(client, numResults);
   
    err.println("=== Train Queries ===");
   
    List<Double> thresholds = new ArrayList<Double>();
    double averageThreshold = 0;
    Iterator<GQuery> queryIterator = trainingQueries.iterator();
    while(queryIterator.hasNext()) {
      GQuery query = queryIterator.next();
     
      Map<Long, TResult> seenResults = searcher.search(query);
     
      SimpleJaccardClusterer clusterer = new SimpleJaccardClusterer(new ArrayList<TResult>(seenResults.values()));
     
      // sweep through jaccard steps, calculating F1
      double maxF1 = 0;
      double maxF1Threshold = 1;
      for (double j = 1.0; j >= 0.0; j -= stepSize) { // for each jaccard threshold step
        Clusters clusters = clusterer.cluster(j);
       
        // all clusters are created now, get a finalized set of results
        Set<Long> allResults = new HashSet<Long>(seenResults.keySet());
        allResults.removeAll(clusters.getAllClusteredResults()); // allResults includes unclustered plus one representative from each cluster
        for (Cluster c : clusters) {
          allResults.add(c.getFirstMember());
        }
       
        // calculate f1 on the finalized set
        Clusters seenClusters = new Clusters();
        Clusters trueClusters = clusterMembership.get(query.getTitle());
        Iterator<Long> resultIt = allResults.iterator();
        while (resultIt.hasNext()) {
          long result = resultIt.next();
          Cluster trueCluster = trueClusters.findCluster(result);
          if (trueCluster != null) { // if it is relevant, it will have a true cluster; if this is null, it's non-relevant
            seenClusters.add(trueCluster);
          }
        }
       
View Full Code Here

TOP

Related Classes of edu.gslis.ttg.clusters.Cluster

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.