Package cc.mallet.extract

Examples of cc.mallet.extract.StringSpan


      int start = buf.length ();
      buf.append (text);
      int end = buf.length ();
      buf.append (" ");

      StringSpan span = new StringSpan (buf, start, end);

      while (j < maxFeatureIdx) {
        span.setFeatureValue (toks[j].intern (), 1.0);
        j++;
      }

      if (includeTokenText) {
        span.setFeatureValue ((textFeaturePrefix+text).intern(), 1.0);
      }

      if (labelsAtEnd) {
        int firstLblIdx = j;
        while (j < toks.length) {
View Full Code Here


        lexer.setCharSequence (string.subSequence (textStart, textEnd));
        while (lexer.hasNext()) {
          lexer.next ();
          int tokStart = textStart + lexer.getStartOffset ();
          int tokEnd = textStart + lexer.getEndOffset ();
          dataTokens.add (new StringSpan (string, tokStart, tokEnd));
          targetTokens.add (new Token (tag));
        }
      }
      textStart = nextStart;
      tag = nextTag;
View Full Code Here

        int start = buf.length ();
        buf.append (token.getText());
        int end = buf.length();

        StringSpan span = new StringSpan (buf, start, end);
        span.setFeatures (token.getFeatures ());
        span.setProperties (token.getProperties ());

        spans.add (span);
        buf.append (" ");
      }
View Full Code Here

      } else
        nFeatures = tokens[l].length;
      int start = source.length();
      String word = makeText(tokens[l]);
      source.append(word + " ");
      Token tok = new StringSpan(source, start, source.length() - 1);
      if (setTokensAsFeatures) {
        for (int f = 0; f < nFeatures; f++)
          tok.setFeatureValue(tokens[l][f], 1.0);
      } else {
        for (int f = 1; f < nFeatures; f++)
          tok.setFeatureValue(tokens[l][f], 1.0);
      }
      ts.add(tok);
    }
    carrier.setData(ts);
    if (isTargetProcessing())
View Full Code Here

      if (saveSource) {
        if (word.equals ("-<S>-")) source.append ("\n\n");
        source.append (word); source.append (" ");
      }

      Token token = new StringSpan (source, start, source.length () - 1);

      // Word and tag unigram at current time
      if (doSpelling) {
        for (int j = 0; j < endings.length; j++) {
          ending[2][j] = ending[1][j];
          ending[1][j] = ending[0][j];
          ending[0][j] = endingPatterns[j].matcher(word).matches();
          if (ending[0][j]) token.setFeatureValue (endingNames[0][0][j], 1);
        }
      }

      if (doTags) {
        token.setFeatureValue ("T="+tag, 1);
      }

      if (doPhrases) {
        token.setFeatureValue ("P="+phrase, 1);
      }

      data.add (token);

      if (isTargetProcessing ()) {
View Full Code Here

      Label notstart = dict.lookupLabel ("notstart");

      boolean lastWasSpace = true;
      StringBuffer sb = new StringBuffer();
      for (int i = 0; i < ts.size(); i++) {
        StringSpan t = (StringSpan) ts.getSpan(i);
        if (t.getText().equals(" "))
          lastWasSpace = true;
        else {
          sb.append(t.getText());
          newTs.add(t);
          labelSeq.add(lastWasSpace ? "start" : "notstart");
          lastWasSpace = false;
        }
      }
View Full Code Here

    CharSequence string = (CharSequence) carrier.getData();
    lexer.setCharSequence (string);
    TokenSequence ts = new StringTokenization (string);
    while (lexer.hasNext()) {
      lexer.next();
      ts.add (new StringSpan (string, lexer.getStartOffset (), lexer.getEndOffset ()));
    }
    carrier.setData(ts);
    return carrier;
  }
View Full Code Here

        lexer.setCharSequence (string.subSequence (textStart, textEnd));
        while (lexer.hasNext()) {
          lexer.next ();
          int tokStart = textStart + lexer.getStartOffset ();
          int tokEnd = textStart + lexer.getEndOffset ();
          dataTokens.add (new StringSpan (string, tokStart, tokEnd));
          targetTokens.add (new Token (tag));
        }
      }
      textStart = nextStart;
      tag = nextTag;
View Full Code Here

      int start = buf.length ();
      buf.append (text);
      int end = buf.length ();
      buf.append (" ");

      StringSpan span = new StringSpan (buf, start, end);

      while (j < maxFeatureIdx) {
        span.setFeatureValue (toks[j].intern (), 1.0);
        j++;
      }

      if (includeTokenText) {
        span.setFeatureValue ((textFeaturePrefix+text).intern(), 1.0);
      }

      if (labelsAtEnd) {
        int firstLblIdx = j;
        while (j < toks.length) {
View Full Code Here

TOP

Related Classes of cc.mallet.extract.StringSpan

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.