Examples of LDATopicModel


Examples of uk.ac.cam.ha293.tweetlabel.topics.LDATopicModel

    Corpus corpus = null;
    if(stem) corpus = Corpus.load("allprofiles-stemmed");
    else corpus = Corpus.load("allprofiles-unstemmed");
   
    //Check for model existence
    LDATopicModel lda = null;
    if(new File("models/lda/"+description+".model").exists()) {
      System.out.println("Found LDA model "+description);
      lda = LDATopicModel.load(description);
    } else {
      System.out.println("Couldn't find LDA model "+description+", creating new one");
      lda = new LDATopicModel(corpus,numTopics,burn,sample,0,alpha,0.01);
      lda.runGibbsSampling();
      lda.save(description);
    }
   
    try {
      //Get the document topic distributions and store these
      List<List<WordScore>> docTopics = lda.getDocuments();
      int docID = 0;
      for(List<WordScore> document : docTopics) {
        Long userID = lda.getDocIDFromIndex(docID);
        FileOutputStream fileOut = new FileOutputStream(dirName+"/"+userID+".csv");
        PrintWriter writeOut = new PrintWriter(fileOut);
        writeOut.println("\"topic\",\"probability\"");
        for(WordScore topic : document) {
          writeOut.println(topic.getWord()+","+topic.getScore());
        }
        writeOut.close();
        docID++;
      }
     
     
      //NOTE: We are saving these for now. However, we always have a saved model
      //and we can get these attributes from the model
     
      //should also save the topic-word distributions
      //okay, so we should definitely serialize topics and vocab
      Map<String,Integer> vocab = lda.getVocab();
      double[][] topics = lda.getTopicsUnsorted();
     
      //Save topics
      FileOutputStream topicsFileOut = new FileOutputStream(dirName+"/TOPICS.obj");
      ObjectOutputStream topicsObjectOut = new ObjectOutputStream(topicsFileOut);
      topicsObjectOut.writeObject(topics);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.