Package cc.mallet.types

Examples of cc.mallet.types.Label


                                 Label backgroundTag)
   {
     int i = 0;
     int docidx = 0;
     while (i < tags.size()) {
       Label thisTag = dict.lookupLabel (tags.get(i).toString());
       int startTokenIdx = i;
       while (i < tags.size()) {
         Label nextTag = dict.lookupLabel (tags.get(i).toString ());
         if (thisTag != nextTag) break;
         i++;
       }
       int endTokenIdx = i;
       Span span = input.subspan(startTokenIdx, endTokenIdx);
View Full Code Here


  {
    double rsum = 0;
    Labeling tmpL;
    Classification tmpC;
    Instance tmpI;
    Label tmpLbl, tmpLbl2;
    int tmpInt;
    for(int i = 0; i < this.size(); i++) {
      tmpC = this.get(i);
      tmpI = tmpC.getInstance();
      tmpL = tmpC.getLabeling();
View Full Code Here

        Instance instance = iter.next();
        double instanceWeight = trainingList.getInstanceWeight(instance);
        FeatureVectorSequence fvs = (FeatureVectorSequence) instance.getData();
        // label of best instance in subList
        Object target = instance.getTarget();
        Label label = null;
        if (target instanceof Labels)
          label = ((Labels)target).get(0);
        else label = (Label)target;
        int positiveIndex =
          Integer.valueOf(label.getBestLabel().getEntry().toString()).intValue();
        if (positiveIndex == -1) { // invalid instance
          logger.warning("True label is -1. Skipping...");
           continue;
        }
        FeatureVector fv = (FeatureVector)fvs.get(positiveIndex);
View Full Code Here

        SumLattice lattice =
          crf.sumLatticeFactory.newSumLattice (crf, input, (Sequence)null, (Transducer.Incrementor)null, 
              (LabelAlphabet)theTrainingData.getTargetAlphabet());
        int prevLabelIndex = 0;          // This will put extra error instances in this cluster
        for (int j = 0; j < trueOutput.size(); j++) {
          Label label = (Label) ((LabelSequence)trueOutput).getLabelAtPosition(j);
          assert (label != null);
          //System.out.println ("Instance="+i+" position="+j+" fv="+lattice.getLabelingAtPosition(j).toString(true));
          LabelVector latticeLabeling = lattice.getLabelingAtPosition(j);
          double trueLabelProb = latticeLabeling.value(label.getIndex());
          int labelIndex = latticeLabeling.getBestIndex();
          //System.out.println ("position="+j+" trueLabelProb="+trueLabelProb);
          if (trueLabelProb < trueLabelProbThreshold) {
            logger.info ("Adding error: instance="+i+" position="+j+" prtrue="+trueLabelProb+
                (label == latticeLabeling.getBestLabel() ? "  " : " *")+
 
View Full Code Here

    {
      StringTokenization ts =  (StringTokenization) carrier.getData();
      StringTokenization newTs = new StringTokenization((CharSequence) ts.getDocument ());
      final LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet();
      LabelSequence labelSeq = new LabelSequence(dict);
      Label start = dict.lookupLabel ("start");
      Label notstart = dict.lookupLabel ("notstart");

      boolean lastWasSpace = true;
      StringBuffer sb = new StringBuffer();
      for (int i = 0; i < ts.size(); i++) {
        StringSpan t = (StringSpan) ts.getSpan(i);
View Full Code Here

    // In SVMLight +1 and 1 are the same label. 
    // Adding a special case to normalize...
    if (classStr.equals("+1")) {
      classStr = "1";
    }
    Label label = ((LabelAlphabet)getTargetAlphabet()).lookupLabel(classStr, true);
    carrier.setTarget(label);
   
    // the rest are feature-value pairs
    ArrayList<Integer> indices = new ArrayList<Integer>();
    ArrayList<Double> values = new ArrayList<Double>();
View Full Code Here

  public void testToXml () {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

    Label O = dict.lookupLabel ("O");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label VB = dict.lookupLabel ("VERB");
    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, O, ANML, ANML });

    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, "O");
    String actualXml = extr.toXmlString();
    String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
View Full Code Here

   public void testToXmlBIO () {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

    Label O = dict.lookupLabel ("O");
    Label BANML = dict.lookupLabel ("B-ANIMAL");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label BVB = dict.lookupLabel ("B-VERB");
    Label VB = dict.lookupLabel ("I-VERB");
    LabelSequence tags = new LabelSequence (new Label[] { O, BANML, ANML, BANML, BVB, VB, O, ANML, ANML });

    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new BIOTokenizationFilter());
    String actualXml = extr.toXmlString();
    String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
View Full Code Here

  {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

    Label O = dict.lookupLabel ("O");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label VB = dict.lookupLabel ("VERB");
    Label JJ = dict.lookupLabel ("ADJ");
    Label MAMMAL = dict.lookupLabel ("MAMMAL");

    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, ANML, ANML, ANML });

    LabeledSpans spans = new DefaultTokenizationFilter ().constructLabeledSpans (dict, document, O, toks, tags);
View Full Code Here

  {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

    Label O = dict.lookupLabel ("O");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label ANML_MAMM = dict.lookupLabel ("ANIMAL|MAMMAL");
    Label VB = dict.lookupLabel ("VERB");
    Label ANML_JJ = dict.lookupLabel ("ANIMAL|ADJ");
    Label ANML_JJ_MAMM = dict.lookupLabel ("ANIMAL|ADJ|MAMMAL");

    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML_MAMM, VB, O, ANML, ANML_JJ, ANML_JJ_MAMM });
    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter ());

    String actualXml = extr.toXmlString();
View Full Code Here

TOP

Related Classes of cc.mallet.types.Label

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.