Package cc.mallet.util

Examples of cc.mallet.util.CharSequenceLexer


    try {
      for (int i = 0; i < args.length; i++) {
        Instance carrier = new Instance (new File(args[i]), null, null, null);
        SerialPipes p = new SerialPipes (new Pipe[] {
          new Input2CharSequence (),
          new CharSequence2TokenSequence(new CharSequenceLexer())});
        carrier = p.newIteratorFrom (new SingleInstanceIterator(carrier)).next();
        TokenSequence ts = (TokenSequence) carrier.getData();
        System.out.println ("===");
        System.out.println (args[i]);
        System.out.println (ts.toString());
View Full Code Here



  public void testToXml () {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

    Label O = dict.lookupLabel ("O");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label VB = dict.lookupLabel ("VERB");
    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, O, ANML, ANML });
View Full Code Here

  }

   public void testToXmlBIO () {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

    Label O = dict.lookupLabel ("O");
    Label BANML = dict.lookupLabel ("B-ANIMAL");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label BVB = dict.lookupLabel ("B-VERB");
View Full Code Here

  public void testNestedToXML ()
  {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

    Label O = dict.lookupLabel ("O");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label VB = dict.lookupLabel ("VERB");
    Label JJ = dict.lookupLabel ("ADJ");
View Full Code Here

  public void testNestedXMLTokenizationFilter ()
  {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

    Label O = dict.lookupLabel ("O");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label ANML_MAMM = dict.lookupLabel ("ANIMAL|MAMMAL");
    Label VB = dict.lookupLabel ("VERB");
View Full Code Here

    this.backgroundTag = backgroundTag;
  }

  public SGML2TokenSequence (String regex, String backgroundTag)
  {
    this.lexer = new CharSequenceLexer (regex);
    this.backgroundTag = backgroundTag;
  }
View Full Code Here

    this.backgroundTag = backgroundTag;
  }

  public SGML2TokenSequence ()
  {
    this (new CharSequenceLexer(), "O");
  }
View Full Code Here

TOP

Related Classes of cc.mallet.util.CharSequenceLexer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.