Package opennlp.tools.util

Examples of opennlp.tools.util.PlainTextByLineStream


   *          if true will output the sentences marked as news headlines
   */
  public ADSentenceSampleStream(FileInputStream in, String charsetName,
      boolean includeHeadlines) {
    try {
      this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(
          in, charsetName));
    } catch (UnsupportedEncodingException e) {
      // UTF-8 is available on all JVMs, will never happen
      throw new IllegalStateException(e);
    }
View Full Code Here


    Parameters params = ArgumentParser.parse(args, Parameters.class);

    CmdLineUtil.checkInputFile("Data", params.getData());
    FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());

    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn
        .getChannel(), params.getEncoding());

    return new ChunkSampleStream(lineStream);
  }
View Full Code Here

  public ObjectStream<POSSample> create(String[] args) {
    Parameters params = ArgumentParser.parse(args, Parameters.class);

    ObjectStream<String> lineStream;
    try {
      lineStream = new PlainTextByLineStream(new InputStreamReader(
          CmdLineUtil.openInFile(params.getData()), "UTF-8"));
      System.setOut(new PrintStream(System.out, true, "UTF-8"));

      return new ConllXPOSSampleStream(lineStream);
    } catch (UnsupportedEncodingException e) {
View Full Code Here

      opennlp.tools.parser.Parser parser =
          ParserFactory.create(model, beamSize, advancePercentage);

      ObjectStream<String> lineStream =
        new PlainTextByLineStream(new InputStreamReader(System.in));

      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
      perfMon.start();

      try {
        String line;
        while ((line = lineStream.read()) != null) {
          if (line.length() == 0) {
            System.out.println();
          }
          else {
            Parse[] parses = parseLine(line, parser, numParses);
View Full Code Here

     
      System.out.println("Performing evaluation ...");
      TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(nameFinder);
     
      final NameSampleDataStream sampleStream = new NameSampleDataStream(
          new PlainTextByLineStream(new InputStreamReader(new FileInputStream(args[2]), args[1])));
     
      final PerformanceMonitor monitor = new PerformanceMonitor("sent");
     
      monitor.startAndPrintThroughput();
     
View Full Code Here

    InputStream in = getClass().getResourceAsStream(
        "/opennlp/tools/sentdetect/Sentences.txt");

    SentenceModel sentdetectModel = SentenceDetectorME.train(
        "en", new SentenceSampleStream(new PlainTextByLineStream(new InputStreamReader(in))), true, null, 100, 0);
   
    assertEquals("en", sentdetectModel.getLanguage());
   
    SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel);
View Full Code Here

   
    InputStream trainDataIn = TokenizerTestUtil.class.getResourceAsStream(
        "/opennlp/tools/tokenize/token.train");
   
    ObjectStream<TokenSample> samples = new TokenSampleStream(
        new PlainTextByLineStream(new InputStreamReader(trainDataIn, "UTF-8")));
   
    return TokenizerME.train("en", samples, true, 5, 100);
  }
View Full Code Here

  private static ObjectStream<SentenceSample> createSampleStream()
      throws IOException {
    InputStream in = SentenceDetectorFactoryTest.class.getClassLoader()
        .getResourceAsStream("opennlp/tools/sentdetect/Sentences.txt");

    return new SentenceSampleStream(new PlainTextByLineStream(
        new InputStreamReader(in)));
  }
View Full Code Here

      POSModel model = new POSModelLoader().load(new File(args[0]));

      POSTaggerME tagger = new POSTaggerME(model);

      ObjectStream<String> lineStream =
        new PlainTextByLineStream(new InputStreamReader(System.in));

      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
      perfMon.start();

      try {
        String line;
        while ((line = lineStream.read()) != null) {

          String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
          String[] tags = tagger.tag(whitespaceTokenizerLine);

          POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
View Full Code Here

    FileInputStream sampleDataIn = new FileInputStream(new File(
        DictionaryNameFinderEvaluatorTest.class.getClassLoader()
            .getResource("opennlp/tools/namefind/AnnotatedSentences.txt")
            .toURI()));

    return new NameSampleDataStream(new PlainTextByLineStream(
        sampleDataIn.getChannel(), "ISO-8859-1"));
  }
View Full Code Here

TOP

Related Classes of opennlp.tools.util.PlainTextByLineStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.