Examples of cc.mallet.types.LabelSequence

cc.mallet.types.LabelSequence

    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());


    Label O = dict.lookupLabel ("O");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label VB = dict.lookupLabel ("VERB");
    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, O, ANML, ANML });


    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, "O");
    String actualXml = extr.toXmlString();
    String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
            "<doc>the <ANIMAL>quick brown fox </ANIMAL><VERB>leapt </VERB>over the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";

View Full Code Here

    Label O = dict.lookupLabel ("O");
    Label BANML = dict.lookupLabel ("B-ANIMAL");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label BVB = dict.lookupLabel ("B-VERB");
    Label VB = dict.lookupLabel ("I-VERB");
    LabelSequence tags = new LabelSequence (new Label[] { O, BANML, ANML, BANML, BVB, VB, O, ANML, ANML });


    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new BIOTokenizationFilter());
    String actualXml = extr.toXmlString();
    String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
            "<doc>the <ANIMAL>quick brown </ANIMAL><ANIMAL>fox </ANIMAL><VERB>leapt over </VERB>the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";

View Full Code Here

    Label ANML = dict.lookupLabel ("ANIMAL");
    Label VB = dict.lookupLabel ("VERB");
    Label JJ = dict.lookupLabel ("ADJ");
    Label MAMMAL = dict.lookupLabel ("MAMMAL");


    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, ANML, ANML, ANML });


    LabeledSpans spans = new DefaultTokenizationFilter ().constructLabeledSpans (dict, document, O, toks, tags);


    Span foxToken = toks.subspan (3, 4);
    spans.add (new LabeledSpan (foxToken, MAMMAL, false));

View Full Code Here

    Label ANML_MAMM = dict.lookupLabel ("ANIMAL|MAMMAL");
    Label VB = dict.lookupLabel ("VERB");
    Label ANML_JJ = dict.lookupLabel ("ANIMAL|ADJ");
    Label ANML_JJ_MAMM = dict.lookupLabel ("ANIMAL|ADJ|MAMMAL");


    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML_MAMM, VB, O, ANML, ANML_JJ, ANML_JJ_MAMM });
    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter ());


    String actualXml = extr.toXmlString();
    String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
            "<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy <MAMMAL>dog</MAMMAL></ADJ></ANIMAL></doc>\r\n";

View Full Code Here

    public Instance pipe (Instance carrier)
    {
      Object inputData = carrier.getData();
      Alphabet features = getDataAlphabet();
      LabelAlphabet labels;
      LabelSequence target = null;
      String [][] tokens;
      if (inputData instanceof String)
        tokens = parseSentence((String)inputData);
      else if (inputData instanceof String[][])
        tokens = (String[][])inputData;
      else
        throw new IllegalArgumentException("Not a String or String[][]; got "+inputData);
      FeatureVector[] fvs = new FeatureVector[tokens.length];
      if (isTargetProcessing())
      {
        labels = (LabelAlphabet)getTargetAlphabet();
        target = new LabelSequence (labels, tokens.length);
      }
      for (int l = 0; l < tokens.length; l++) {
        int nFeatures;
        if (isTargetProcessing())
        {
          if (tokens[l].length < 1)
            throw new IllegalStateException ("Missing label at line " + l + " instance "+carrier.getName ());
          nFeatures = tokens[l].length - 1;
          target.add(tokens[l][nFeatures]);
        }
        else nFeatures = tokens[l].length;
        ArrayList<Integer> featureIndices = new ArrayList<Integer>();
        for (int f = 0; f < nFeatures; f++) {
          int featureIndex = features.lookupIndex(tokens[l][f]);
          // gdruck
          // If the data alphabet's growth is stopped, featureIndex
          // will be -1.  Ignore these features.
          if (featureIndex >= 0) {
            featureIndices.add(featureIndex);
          }
        }
        int[] featureIndicesArr = new int[featureIndices.size()];
        for (int index = 0; index < featureIndices.size(); index++) {
          featureIndicesArr[index] = featureIndices.get(index);
        }
         fvs[l] = featureInductionOption.value ? new AugmentableFeatureVector(features, featureIndicesArr, null, featureIndicesArr.length) : 
          new FeatureVector(features, featureIndicesArr);
      }
      carrier.setData(new FeatureVectorSequence(fvs));
      if (isTargetProcessing())
        carrier.setTarget(target);
      else
        carrier.setTarget(new LabelSequence(getTargetAlphabet()));
      return carrier;
    }

View Full Code Here

    public Instance pipe (Instance carrier)
    {
      Object inputData = carrier.getData();
      Alphabet features = getDataAlphabet();
      LabelAlphabet labels;
      LabelSequence target = null;
      String [][] tokens;
      if (inputData instanceof String)
        tokens = parseSentence((String)inputData);
      else if (inputData instanceof String[][])
        tokens = (String[][])inputData;
      else
        throw new IllegalArgumentException("Not a String or String[][]; got "+inputData);
      FeatureVector[] fvs = new FeatureVector[tokens.length];
      if (isTargetProcessing())
      {
        labels = (LabelAlphabet)getTargetAlphabet();
        target = new LabelSequence (labels, tokens.length);
      }
      for (int l = 0; l < tokens.length; l++) {
        int nFeatures;
        if (isTargetProcessing())
        {
          if (tokens[l].length < 1)
            throw new IllegalStateException ("Missing label at line " + l + " instance "+carrier.getName ());
          nFeatures = tokens[l].length - 1;
          target.add(tokens[l][nFeatures]);
        }
        else nFeatures = tokens[l].length;
        ArrayList<Integer> featureIndices = new ArrayList<Integer>();
        for (int f = 0; f < nFeatures; f++) {
          int featureIndex = features.lookupIndex(tokens[l][f]);
          // gdruck
          // If the data alphabet's growth is stopped, featureIndex
          // will be -1.  Ignore these features.
          if (featureIndex >= 0) {
            featureIndices.add(featureIndex);
          }
        }
        int[] featureIndicesArr = new int[featureIndices.size()];
        for (int index = 0; index < featureIndices.size(); index++) {
          featureIndicesArr[index] = featureIndices.get(index);
        }
         fvs[l] = featureInductionOption.value ? new AugmentableFeatureVector(features, featureIndicesArr, null, featureIndicesArr.length) : 
          new FeatureVector(features, featureIndicesArr);
      }
      carrier.setData(new FeatureVectorSequence(fvs));
      if (isTargetProcessing())
        carrier.setTarget(target);
      else
        carrier.setTarget(new LabelSequence(getTargetAlphabet()));
      return carrier;
    }

View Full Code Here

    public Instance pipe (Instance carrier)
    {
      Object inputData = carrier.getData();
      Alphabet features = getDataAlphabet();
      LabelAlphabet labels;
      LabelSequence target = null;
      String [][] tokens;
      if (inputData instanceof String)
        tokens = parseSentence((String)inputData);
      else if (inputData instanceof String[][])
        tokens = (String[][])inputData;
      else
        throw new IllegalArgumentException("Not a String or String[][]; got "+inputData);
      FeatureVector[] fvs = new FeatureVector[tokens.length];
      if (isTargetProcessing())
      {
        labels = (LabelAlphabet)getTargetAlphabet();
        target = new LabelSequence (labels, tokens.length);
      }
      for (int l = 0; l < tokens.length; l++) {
        int nFeatures;
        if (isTargetProcessing())
        {
          if (tokens[l].length < 1)
            throw new IllegalStateException ("Missing label at line " + l + " instance "+carrier.getName ());
          nFeatures = tokens[l].length - 1;
          target.add(tokens[l][nFeatures]);
        }
        else nFeatures = tokens[l].length;
        ArrayList<Integer> featureIndices = new ArrayList<Integer>();
        for (int f = 0; f < nFeatures; f++) {
          int featureIndex = features.lookupIndex(tokens[l][f]);
          // gdruck
          // If the data alphabet's growth is stopped, featureIndex
          // will be -1.  Ignore these features.
          if (featureIndex >= 0) {
            featureIndices.add(featureIndex);
          }
        }
        int[] featureIndicesArr = new int[featureIndices.size()];
        for (int index = 0; index < featureIndices.size(); index++) {
          featureIndicesArr[index] = featureIndices.get(index);
        }
         fvs[l] = featureInductionOption.value ? new AugmentableFeatureVector(features, featureIndicesArr, null, featureIndicesArr.length) : 
          new FeatureVector(features, featureIndicesArr);
      }
      carrier.setData(new FeatureVectorSequence(fvs));
      if (isTargetProcessing())
        carrier.setTarget(target);
      else
        carrier.setTarget(new LabelSequence(getTargetAlphabet()));
      return carrier;
    }

View Full Code Here

0 1

TOP

Related Classes of cc.mallet.types.LabelSequence

cc.mallet.extract.test.TestDocumentExtraction

cc.mallet.fst.SimpleTagger$SimpleTaggerSentence2FeatureVectorSequence

cc.mallet.fst.SimpleTaggerStdin$SimpleTaggerSentence2FeatureVectorSequence

cc.mallet.fst.tests.TestMEMM$TestMEMMTokenSequenceRemoveSpaces

cc.mallet.pipe.AddClassifierTokenPredictions

cc.mallet.topics.LDAStream

cc.mallet.types.FeatureVectorSequence.Iterator

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.