Package edu.stanford.nlp.tagger.util

Source Code of edu.stanford.nlp.tagger.util.MakePrefixFile

package edu.stanford.nlp.tagger.util;

import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.tagger.maxent.TaggerConfig;
import edu.stanford.nlp.tagger.io.TaggedFileRecord;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;

import java.util.List;
import java.util.Properties;
import java.util.Random;

/**
* Takes a tagger data file of any format readable by the tagger and
* outputs a new file containing tagged sentences which are prefixes
* of the original data.  The prefixes are of random length.  If the
* -fullSentence parameter is true, the original sentence is output
* after each prefix.
* <br>
* Input is taken from the tagger file described in "input".  Output
* goes to stdout.
*
* @author John Bauer
*/
public class MakePrefixFile {

  public static void main(String[] args) {
    Properties config = StringUtils.argsToProperties(args);
    System.err.println(config);

    boolean fullSentence = PropertiesUtils.getBool(config, "fullSentence", false);

    Random random = new Random();
    String tagSeparator = config.getProperty("tagSeparator", TaggerConfig.TAG_SEPARATOR);

    TaggedFileRecord record = TaggedFileRecord.createRecord(config, config.getProperty("input"));
    for (List<TaggedWord> sentence : record.reader()) {
      int len = random.nextInt(sentence.size()) + 1;
      System.out.println(Sentence.listToString(sentence.subList(0, len), false, tagSeparator));
      if (fullSentence) {
        System.out.println(Sentence.listToString(sentence, false, tagSeparator));       
      }
    }
  }

 
}
TOP

Related Classes of edu.stanford.nlp.tagger.util.MakePrefixFile

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.