Examples of uk.ac.cam.ha293.tweetlabel.types.Category

Package uk.ac.cam.ha293.tweetlabel.types

Examples of uk.ac.cam.ha293.tweetlabel.types.Category

uk.ac.cam.ha293.tweetlabel.topics.LLDATopicModel

          classifications.add(classification);
        }
      } 
    } else {
      for(long id : Tools.getCSVUserIDs()) {
        FullLLDAClassification c = new FullLLDAClassification(topicType,alpha,id);
        Map<String,Double> classification = new HashMap<String,Double>();
        int topicCount = 0;
        for(String topic : c.getCategorySet()) {
          if(topicCount == topTopics) break;
          if(topic.equals("Other")) continue;
          classification.put(topic, c.getScore(topic));
          topicCount++;
        }
        classifications.add(classification);
      }
    }

View Full Code Here

      FullLDAClassification c = new FullLDAClassification(uid,1000,100,0,alpha);
      for(String cat : c.getCategorySet()) {
        valueSet.add(c.getScore(cat));
      }
    } else if(topicType.equals("alchemy")) {
      FullLLDAClassification c = new FullLLDAClassification("alchemy",alpha,uid);
      for(String cat : c.getCategorySet()) {
        valueSet.add(c.getScore(cat));
      }
    } else if(topicType.equals("calais")) {
      FullLLDAClassification c = new FullLLDAClassification("calais",alpha,uid);
      for(String cat : c.getCategorySet()) {
        valueSet.add(c.getScore(cat));
      }
    } else if(topicType.equals("textwise")) {
      FullLLDAClassification c = new FullLLDAClassification("textwise",alpha,uid);
      for(String cat : c.getCategorySet()) {
        valueSet.add(c.getScore(cat));
      }
    }
    return valueSet;
  }

View Full Code Here

          double cosineSum = 0.0;
          int cosineCount = 0;
          for(Long uid : uids) {
            if(topicType.equals("alchemy")) {
              FullAlchemyClassification baseline = new FullAlchemyClassification(uid);
              FullLLDAClassification llda = new FullLLDAClassification(topicType,alpha,false,reduction,uid);
              double sim = llda.cosineSimilarity(baseline);
              cosineSum += sim;
              cosineCount++;
            } else if(topicType.equals("calais")) {
              FullCalaisClassification baseline = new FullCalaisClassification(uid);
              FullLLDAClassification llda = new FullLLDAClassification(topicType,alpha,false,reduction,uid);
              double sim = llda.cosineSimilarity(baseline);
              cosineSum += sim;
              cosineCount++;
            } else if(topicType.equals("textwiseproper")) {
              FullTextwiseClassification baseline = new FullTextwiseClassification(uid,true);
              FullLLDAClassification llda = new FullLLDAClassification(topicType,alpha,false,reduction,uid);
              double sim = llda.cosineSimilarity(baseline);
              cosineSum += sim;
              cosineCount++;
            }
          }
          double avgCosine = cosineSum/cosineCount;

View Full Code Here

        totalCount++;
        Set<String> lldaTopicSet = new HashSet<String>();
        Set<String> baselineTopicSet = new HashSet<String>();
        String modTopic = topicType;
        if(modTopic.equals("textwise")) modTopic = "textwiseproper";
        FullLLDAClassification llda = new FullLLDAClassification(modTopic,alpha,uid);
        int kCount=0;
        for(String topic : llda.getCategorySet()) {
          if(kCount == k) break;
          kCount++;
          lldaTopicSet.add(topic);
        }
        if(topicType.equals("alchemy")) {

View Full Code Here

    Map<String,Double> proportions = new HashMap<String,Double>();
    for(String topic : Tools.getTopics(topicType)) {
      proportions.put(topic, 0.0);
    }
    for(Long uid : Tools.getCSVUserIDs()) {
      FullLLDAClassification c = new FullLLDAClassification(topicType,alpha,uid);
      if(c.getCategorySet().size()==0) continue;
      String topTopic = c.getCategorySet().toArray(new String[1])[0];
      proportions.put(topTopic,proportions.get(topTopic)+1.0);
    }
    double sum = 0.0;
    for(String topic : proportions.keySet()) {
      sum += proportions.get(topic);

View Full Code Here

      //System.out.println(totalCount);
      Set<String> lldaTopicSet = new HashSet<String>();
      Set<String> baselineTopicSet = new HashSet<String>();
      String modTopic = topicType;
      if(modTopic.equals("textwise")) modTopic = "textwiseproper";
      FullLLDAClassification llda = new FullLLDAClassification(modTopic,alpha,fewerProfiles,reduction,uid);
      if(llda.getCategorySet().isEmpty()) continue;
      totalCount++;
      int kCount=0;
      for(String topic : llda.getCategorySet()) {
        if(kCount == k) break;
        kCount++;
        lldaTopicSet.add(topic);
      }
      if(topicType.equals("alchemy")) {

View Full Code Here

  
  public void fillSVM(String topicType) {
    System.out.println("Filling from SVM "+topicType+" classifications");
    FullSVMClassification[] classifications = new FullSVMClassification[d];
    for(long id : Tools.getCSVUserIDs()) {
      classifications[indexLookup.get(id)] = new FullSVMClassification(topicType,id);
    }
    
    //cosine similarities!
    for(int m=0; m<d; m++) {
      System.out.println("On row "+m);

View Full Code Here

      for(Long uid : Tools.getCSVUserIDs()) {
        //System.out.println(totalCount);
        totalCount++;
        Set<String> svmTopicSet = new HashSet<String>();
        Set<String> baselineTopicSet = new HashSet<String>();
        FullSVMClassification svm = new FullSVMClassification(topicType,uid);
        int kCount=0;
        for(String topic : svm.getCategorySet()) {
          if(kCount == k) break;
          kCount++;
          svmTopicSet.add(topic);
          System.out.println("Adding topic "+topic+" "+svm.getScore(topic));
        }
        if(topicType.equals("alchemy")) {
          FullAlchemyClassification baseline = new FullAlchemyClassification(uid);
          kCount=0;
          for(String topic : baseline.getCategorySet()) {

View Full Code Here

    Corpus corpus = null;
    if(stem) corpus = Corpus.load("allprofiles-stemmed");
    else corpus = Corpus.load("allprofiles-unstemmed");
    
    //Check for model existence
    LDATopicModel lda = null;
    if(new File("models/lda/"+description+".model").exists()) {
      System.out.println("Found LDA model "+description);
      lda = LDATopicModel.load(description);
    } else {
      System.out.println("Couldn't find LDA model "+description+", creating new one");
      lda = new LDATopicModel(corpus,numTopics,burn,sample,0,alpha,0.01);
      lda.runGibbsSampling();
      lda.save(description);
    }
    
    try {
      //Get the document topic distributions and store these
      List<List<WordScore>> docTopics = lda.getDocuments();
      int docID = 0;
      for(List<WordScore> document : docTopics) {
        Long userID = lda.getDocIDFromIndex(docID);
        FileOutputStream fileOut = new FileOutputStream(dirName+"/"+userID+".csv");
        PrintWriter writeOut = new PrintWriter(fileOut);
        writeOut.println("\"topic\",\"probability\"");
        for(WordScore topic : document) {
          writeOut.println(topic.getWord()+","+topic.getScore());
        }
        writeOut.close();
        docID++;
      }
      
      
      //NOTE: We are saving these for now. However, we always have a saved model
      //and we can get these attributes from the model
      
      //should also save the topic-word distributions
      //okay, so we should definitely serialize topics and vocab
      Map<String,Integer> vocab = lda.getVocab();
      double[][] topics = lda.getTopicsUnsorted();
      
      //Save topics
      FileOutputStream topicsFileOut = new FileOutputStream(dirName+"/TOPICS.obj");
      ObjectOutputStream topicsObjectOut = new ObjectOutputStream(topicsFileOut);
      topicsObjectOut.writeObject(topics);

View Full Code Here

    Corpus corpus = null;
    if(stem) corpus = Corpus.loadLabelled(topicType, "allprofiles-stemmed");
    else corpus = Corpus.loadLabelled(topicType, "allprofiles-unstemmed");
    
    //Check for model existence
    LLDATopicModel llda = null;
    if(new File("models/llda/"+topicType+"/"+description+".model").exists()) {
      System.out.println("Found LLDA model "+description);
      llda = LLDATopicModel.load(topicType,description);
    } else {
      System.out.println("Couldn't find LLDA model "+description+", creating new one");
      llda = new LLDATopicModel(corpus,burn,sample,lag,1,0.01);
      llda.runGibbsSampling();
      llda.save(description);
    }
    
    try {
      //Get the document topic distributions and store these
      List<List<WordScore>> docTopics = llda.getDocuments();
      int docID = 0;
      for(List<WordScore> document : docTopics) {
        Long userID = llda.getDocIDFromIndex(docID);
        FileOutputStream fileOut = new FileOutputStream(dirName+"/"+userID+".csv");
        PrintWriter writeOut = new PrintWriter(fileOut);
        writeOut.println("\"topic\",\"probability\"");
        for(WordScore topic : document) {
          writeOut.println(topic.getWord()+","+topic.getScore());
        }
        writeOut.close();
        docID++;
      }
      
      
      //NOTE: We are saving these for now. However, we always have a saved model
      //and we can get these attributes from the model
      
      //should also save the topic-word distributions
      //okay, so we should definitely serialize topics and vocab
      Map<String,Integer> vocab = llda.getVocab();
      double[][] topics = llda.getTopicsUnsorted();
      ArrayList<String> topicIDs = llda.getTopicsIDList();
      
      //Save topics
      FileOutputStream topicsFileOut = new FileOutputStream(dirName+"/TOPICS.obj");
      ObjectOutputStream topicsObjectOut = new ObjectOutputStream(topicsFileOut);
      topicsObjectOut.writeObject(topics);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of uk.ac.cam.ha293.tweetlabel.types.Category

cc.mallet.pipe.CharSequence2TokenSequence

cc.mallet.pipe.Pipe

cc.mallet.pipe.PrintInputAndTarget

cc.mallet.pipe.SerialPipes

cc.mallet.pipe.TokenSequence2FeatureSequence

cc.mallet.pipe.TokenSequenceLowercase

cc.mallet.pipe.TokenSequenceRemoveStopwords

cc.mallet.topics.ParallelTopicModel

cc.mallet.topics.SimpleLDA

cc.mallet.types.Instance

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.