Package cc.mallet.types

Examples of cc.mallet.types.FeatureSequence


     
      //alpha: document level
      double qs = 0.0;
      double qw = 0.0;
     
      FeatureSequence fs;
      int seqLen;
      for(int m=0 ; m< numDocuments ; m++){
        fs = (FeatureSequence) instances.get(m).getData();
        seqLen = fs.getLength();
        qs += Samplers.randBernoulli(seqLen/(seqLen+alpha));
        qw += Math.log( Samplers.randBeta(alpha+1, seqLen));
      }
      alpha = Samplers.randGamma(a_alpha+ tables -qs, 1.0/(b_alpha-qw));
    }
View Full Code Here


     for(int i=0; i<K; i++){
       kactive.set(i, k2knew[kactive.get(i)]);
     }
    
     for(int m=0; m<numDocuments ; m++){
       FeatureSequence fs = (FeatureSequence) instances.get(m).getData();
      
       for(int n=0 ; n<fs.getLength() ; n++){
         z[m][n]=k2knew[z[m][n]];
       }
     }
    
   }
View Full Code Here

   private void init(){
    
     nmk = new int[numDocs][K];
     z = new int[numDocs][];
     int type, token, seqLen;
     FeatureSequence fs;
     for(int m=0; m < numDocs; m++){
           
       fs =(FeatureSequence) testInstances.get(m).getData();
       seqLen=fs.getLength();
       effectiveDocLength[m]=seqLen;
       totalTokens+=seqLen;
       z[m]= new int[seqLen];
      
       for(token=0 ; token < seqLen ; token++){
        
         type = fs.getIndexAtPosition(token);
        
         //set unseen word to "-1"
           if(type >= numTypes){
             z[m][token]=(-1);
             effectiveDocLength[m]--;
View Full Code Here

     }
   }
  
   private void updateDocs(int m){
    
     FeatureSequence fs = (FeatureSequence) testInstances.get(m).getData();
     int seqLen = fs.getLength();
     int type, token;
     double sum;
     double[] pp = new double[K];
    
     for(token=0 ; token < seqLen ; token++){
      
       //skip unseen word
       if(z[m][token]<0){continue;}
      
       type = fs.getIndexAtPosition(token);
      
       //decrement
       int k = z[m][token];
       nmk[m][k]--;
      
View Full Code Here

     }
    
     //calculate LL
     for(int m=0 ; m<numDocs ; m++){
      
       FeatureSequence fs = (FeatureSequence) testInstances.get(m).getData();
       int seqLen = fs.getLength();
       int type, token;
      
       for(token=0 ; token < seqLen ; token++){
        
         type = fs.getIndexAtPosition(token);
         //only consider existed type
         if(type < numTypes){
           double sum =0.0;
           for(int k=0 ; k<K ; k++){
             sum += (theta[m][k]*phi[k][type]);
 
View Full Code Here

  {
    int numLabels = outputAlphabet.size();
    boolean[][] connections = new boolean[numLabels][numLabels];
    for (int i = 0; i < trainingSet.size(); i++) {
      Instance instance = trainingSet.get(i);
      FeatureSequence output = (FeatureSequence) instance.getTarget();
      for (int j = 1; j < output.size(); j++) {
        int sourceIndex = outputAlphabet.lookupIndex (output.get(j-1));
        int destIndex = outputAlphabet.lookupIndex (output.get(j));
        assert (sourceIndex >= 0 && destIndex >= 0);
        connections[sourceIndex][destIndex] = true;
      }
    }
View Full Code Here

        weightsPresent[i].set (parameters.weights[i].indexAtLocation(j));
    // Put in the weights in the training set
    for (int i = 0; i < trainingData.size(); i++) {
      Instance instance = trainingData.get(i);
      FeatureVectorSequence input = (FeatureVectorSequence) instance.getData();
      FeatureSequence output = (FeatureSequence) instance.getTarget();
      // gsc: trainingData can have unlabeled instances as well
      if (output != null && output.size() > 0) {
        // Do it for the paths consistent with the labels...
        sumLatticeFactory.newSumLattice (this, input, output, new Transducer.Incrementor() {
          public void incrementTransition (Transducer.TransitionIterator ti, double count) {
            State source = (CRF.State)ti.getSourceState();
            FeatureVector input = (FeatureVector)ti.getInput();
View Full Code Here

TOP

Related Classes of cc.mallet.types.FeatureSequence

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.