Package cc.mallet.types

Examples of cc.mallet.types.Sequence


   
    for (int i = 0; i < instances.size(); i++) {
      if (viterbiOutputStream != null)
        viterbiOutputStream.println ("Viterbi path for "+description+" instance #"+i);
      Instance instance = instances.get(i);
      Sequence input = (Sequence) instance.getData();
      TokenSequence sourceTokenSequence = null;
      if (instance.getSource() instanceof TokenSequence)
        sourceTokenSequence = (TokenSequence) instance.getSource();

      Sequence trueOutput = (Sequence) instance.getTarget();
      assert (input.size() == trueOutput.size());
      Sequence predOutput = transducerTrainer.getTransducer().transduce (input);
      assert (predOutput.size() == trueOutput.size());
     
      for (int j = 0; j < trueOutput.size(); j++) {
        FeatureVector fv = (FeatureVector) input.get(j);
        //viterbiOutputStream.println (tokens.charAt(j)+" "+trueOutput.get(j).toString()+
        //'/'+predOutput.get(j).toString()+"  "+ fv.toString(true));
        if (sourceTokenSequence != null)
          viterbiOutputStream.print (sourceTokenSequence.get(j).getText()+": ");
        viterbiOutputStream.println (trueOutput.get(j).toString()+
            '/'+predOutput.get(j).toString()+"  "+ fv.toString(true));
      }
    }
  }
View Full Code Here


      klfi.induceFeaturesFor (testing, false, false);
    }
    Sequence[] ret = new Sequence[testing.size()];
    for (int i = 0; i < testing.size(); i++) {
      Instance instance = testing.get(i);
      Sequence input = (Sequence) instance.getData();
      Sequence trueOutput = (Sequence) instance.getTarget();
      assert (input.size() == trueOutput.size());
      Sequence predOutput = new MaxLatticeDefault(this, input).bestOutputSequence();
      assert (predOutput.size() == trueOutput.size());
      ret[i] = predOutput;
    }
    return ret;
  }
View Full Code Here

    for (int i = 0; i < pred.size(); i++) {
      Instance aPred = pred.get (i);
      Instance aTarget = targets.get (i);
      Tokenization input = (Tokenization) aPred.getData ();
      Sequence predSeq = (Sequence) aPred.getTarget ();
      Sequence targetSeq = (Sequence) aTarget.getTarget ();
      DocumentExtraction docextr = new DocumentExtraction ("TEST"+i, dict, input, predSeq, targetSeq, "O");
      extraction.addDocumentExtraction (docextr);
    }

    return extraction;
View Full Code Here

    public Instance pipe(Instance carrier)
    {
      StringBuffer sb = new StringBuffer();
      String source = (String) carrier.getSource();
      Sequence as = (Sequence) carrier.getTarget();
      //int startLabelIndex = as.getAlphabet().lookupIndex("start");
      for (int i = 0; i < source.length(); i++) {
        System.out.println("target[" + i + "]=" + as.get(i).toString());
        if (as.get(i).toString().equals("start") && i != 0)
          sb.append(' ');
        sb.append(source.charAt(i));
      }
      carrier.setSource(sb.toString());
      System.out.println("carrier.getSource() = " + carrier.getSource());
View Full Code Here

   
    public Double call() throws Exception {
      double value = 0;
      for (int ii = start; ii < end; ii++) {
        Instance inst = trainingSet.get(ii);
        Sequence input = (Sequence) inst.getData();
        // logZ     
        value -= new SumLatticePR(crf, ii, input, null, modelCopy, cachedDots[ii], true, null, null, false).getTotalWeight();
      }
      return value;
    }
View Full Code Here

        }

      for (int i = 0; i < theTrainingData.size(); i++) {
        logger.info ("instance="+i);
        Instance instance = theTrainingData.get(i);
        Sequence input = (Sequence) instance.getData();
        Sequence trueOutput = (Sequence) instance.getTarget();
        assert (input.size() == trueOutput.size());
        SumLattice lattice =
          crf.sumLatticeFactory.newSumLattice (crf, input, (Sequence)null, (Transducer.Incrementor)null, 
              (LabelAlphabet)theTrainingData.getTargetAlphabet());
        int prevLabelIndex = 0;          // This will put extra error instances in this cluster
        for (int j = 0; j < trueOutput.size(); j++) {
          Label label = (Label) ((LabelSequence)trueOutput).getLabelAtPosition(j);
          assert (label != null);
          //System.out.println ("Instance="+i+" position="+j+" fv="+lattice.getLabelingAtPosition(j).toString(true));
          LabelVector latticeLabeling = lattice.getLabelingAtPosition(j);
          double trueLabelProb = latticeLabeling.value(label.getIndex());
View Full Code Here

      System.out.println("Training Accuracy after training = " + memm.averageTokenAccuracy(lists[0]));
      System.out.println("Testing  Accuracy after training = " + memm.averageTokenAccuracy(lists[1]));
      System.out.println("Training results:");
      for (int i = 0; i < lists[0].size(); i++) {
        Instance inst = lists[0].get(i);
        Sequence input = (Sequence) inst.getData ();
        Sequence output = memm.transduce (input);
        System.out.println (output);
      }
      System.out.println ("Testing results:");
      for (int i = 0; i < lists[1].size(); i++) {
        Instance inst = lists[1].get(i);
        Sequence input = (Sequence) inst.getData ();
        Sequence output = memm.transduce (input);
        System.out.println (output);
      }
    }
  }
View Full Code Here

    numTrueSegments = numPredictedSegments = numCorrectSegments = 0;
    numCorrectSegmentsInAlphabet = numCorrectSegmentsOOV = 0;
    numIncorrectSegmentsInAlphabet = numIncorrectSegmentsOOV = 0;
    for (int i = 0; i < data.size(); i++) {
      Instance instance = data.get(i);
      Sequence input = (Sequence) instance.getData();
      //String tokens = null;
      //if (instance.getSource() != null)
      //tokens = (String) instance.getSource().toString();
      Sequence trueOutput = (Sequence) instance.getTarget();
      assert (input.size() == trueOutput.size());
      Sequence predOutput = model.transduce (input);
      assert (predOutput.size() == trueOutput.size());
      boolean trueStart, predStart;
      for (int j = 0; j < trueOutput.size(); j++) {
        totalTokens++;
        trueStart = predStart = false;
        if (segmentStartTag.equals(trueOutput.get(j))) {
          numTrueSegments++;
          trueStart = true;
        }
        if (segmentStartTag.equals(predOutput.get(j))) {
          predStart = true;
          numPredictedSegments++;
        }
        if (trueStart && predStart) {
          int m;
          //StringBuffer sb = new StringBuffer();
          //sb.append (tokens.charAt(j));
          for (m = j+1; m < trueOutput.size(); m++) {
            trueStart = predStart = false; // Here, these actually mean "end", not "start"
            if (segmentEndTag.equals(trueOutput.get(m)))
              trueStart = true;
            if (segmentEndTag.equals(predOutput.get(m)))
              predStart = true;
            if (trueStart || predStart) {
              if (trueStart && predStart) {
                // It is a correct segment
                numCorrectSegments++;
                //if (HashFile.allLexicons.contains(sb.toString()))
                //numCorrectSegmentsInAlphabet++;
                //else
                //numCorrectSegmentsOOV++;
              } else {
                // It is an incorrect segment; let's find out if it was in the lexicon
                //for (int mm = m; mm < trueOutput.size(); mm++) {
                //if (segmentEndTag.equals(predOutput.get(mm)))
                //break;
                //sb.append (tokens.charAt(mm));
                //}
                //if (HashFile.allLexicons.contains(sb.toString()))
                //numIncorrectSegmentsInAlphabet++;
                //else
                //numIncorrectSegmentsOOV++;
              }
              break;
            }
            //sb.append (tokens.charAt(m));
          }
          // for the case of the end of the sequence
          if(m==trueOutput.size()) {
            if (trueStart==predStart) {
              numCorrectSegments++;
              //if (HashFile.allLexicons.contains(sb.toString()))
              //numCorrectSegmentsInAlphabet++;
              //else
              //numCorrectSegmentsOOV++;
            } else {
              //if (HashFile.allLexicons.contains(sb.toString()))
              //numIncorrectSegmentsInAlphabet++;
              //else
              //numIncorrectSegmentsOOV++;
            }
          }
        } else if (predStart) {
          // Here is an incorrect predicted start, find out if the word is in the lexicon
          //StringBuffer sb = new StringBuffer();
          //sb.append (tokens.charAt(j));
          //for (int mm = j+1; mm < trueOutput.size(); mm++) {
          //if (segmentEndTag.equals(predOutput.get(mm)))
          //break;
          //sb.append (tokens.charAt(mm));
          //}
          //if (HashFile.allLexicons.contains(sb.toString()))
          //numIncorrectSegmentsInAlphabet++;
          //else
          //numIncorrectSegmentsOOV++;
        }
        if (trueOutput.get(j).equals(predOutput.get(j)))
          numCorrectTokens++;
      }
    }
    logger.info (description +" accuracy="+((double)numCorrectTokens)/totalTokens);
    double precision = numPredictedSegments == 0 ? 1 : ((double)numCorrectSegments) / numPredictedSegments;
View Full Code Here

      else
      {
        boolean includeInput = includeInputOption.value();
        for (int i = 0; i < testData.size(); i++)
        {
          Sequence input = (Sequence)testData.get(i).getData();
          Sequence[] outputs = apply(crf, input, nBestOption.value);
          int k = outputs.length;
          boolean error = false;
          for (int a = 0; a < k; a++) {
            if (outputs[a].size() != input.size()) {
              logger.info("Failed to decode input sequence " + i + ", answer " + a);
              error = true;
            }
          }
          if (!error) {
            for (int j = 0; j < input.size(); j++)
            {
               StringBuffer buf = new StringBuffer();
              for (int a = 0; a < k; a++)
                 buf.append(outputs[a].get(j).toString()).append(" ");
              if (includeInput) {
                FeatureVector fv = (FeatureVector)input.get(j);
                buf.append(fv.toString(true));               
              }
              System.out.println(buf.toString());
            }
            System.out.println();
View Full Code Here

    expectations = new CRF.Factors(crf.getParameters());

    constraints.zero();
    for (int ii = 0; ii < trainingSet.size(); ii++) {
      Instance inst = trainingSet.get(ii);
      Sequence input = (Sequence) inst.getData();

      SumLatticePR geLatt =
        new SumLatticePR(crf, ii, input, null, auxModel, cachedDots[ii], false, null, null, true);
      double gammas[][] = geLatt.getGammas();
View Full Code Here

TOP

Related Classes of cc.mallet.types.Sequence

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.