Package opennlp.tools.doccat

Examples of opennlp.tools.doccat.DocumentCategorizerME


    InputStream modelStream = //<co id="tmx.modelreader"/>
        new FileInputStream(modelFile);
    DoccatModel model = new DoccatModel(modelStream);
    DocumentCategorizer categorizer //<co id="tmx.categorizer"/>
      = new DocumentCategorizerME(model, nffg, bowfg);
    Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
  
    int catCount = categorizer.getNumberOfCategories();
    Collection<String> categories
      = new ArrayList<String>(catCount);
    for (int i=0; i < catCount; i++) {
      categories.add(categorizer.getCategory(i));
    }
    ResultAnalyzer resultAnalyzer = //<co id="tmx.results"/>
        new ResultAnalyzer(categories, "unknown");
    runTest(inputFiles, categorizer, tokenizer, resultAnalyzer); //<co id="tmx.run"/>
    /*<calloutlist>
 
View Full Code Here


      model = modelResource.getModel();
    } catch (ResourceAccessException e) {
      throw new ResourceInitializationException(e);
    }

    mCategorizer = new DocumentCategorizerME(model);
  }
View Full Code Here

      model = modelResource.getModel();
    } catch (ResourceAccessException e) {
      throw new ResourceInitializationException(e);
    }

    mCategorizer = new DocumentCategorizerME(model);
  }
View Full Code Here

      throw new TerminateToolException(1);
    }
   
    DoccatModel model = new DoccatModelLoader().load(new File(args[0]));
   
    DocumentCategorizerME doccat = new DocumentCategorizerME(model);
   
    ObjectStream<String> documentStream = new ParagraphStream(
        new PlainTextByLineStream(new InputStreamReader(System.in)));
   
    PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "doc");
    perfMon.start();
   
    try {
      String document;
      while ((document = documentStream.read()) != null) {
        double prob[] = doccat.categorize(document);
        String category = doccat.getBestCategory(prob);
       
        DocumentSample sample = new DocumentSample(category, document);
        System.out.println(sample.toString());
       
        perfMon.incrementCounter();
View Full Code Here

      System.out.println(getHelp());
    } else {

      DoccatModel model = new DoccatModelLoader().load(new File(args[0]));

      DocumentCategorizerME doccat = new DocumentCategorizerME(model);

      ObjectStream<String> documentStream = new ParagraphStream(
          new PlainTextByLineStream(new InputStreamReader(System.in)));

      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "doc");
      perfMon.start();

      try {
        String document;
        while ((document = documentStream.read()) != null) {
          double prob[] = doccat.categorize(WhitespaceTokenizer.INSTANCE.tokenize(document));
          String category = doccat.getBestCategory(prob);

          DocumentSample sample = new DocumentSample(category, document);
          System.out.println(sample.toString());

          perfMon.incrementCounter();
View Full Code Here

                + e.getMessage());
      }
    }

    DocumentCategorizerEvaluator evaluator = new DocumentCategorizerEvaluator(
        new DocumentCategorizerME(model),
        listeners.toArray(new DoccatEvaluationMonitor[listeners.size()]));

    final PerformanceMonitor monitor = new PerformanceMonitor("doc");

    ObjectStream<DocumentSample> measuredSampleStream = new ObjectStream<DocumentSample>() {
View Full Code Here

      System.out.println(getHelp());
    } else {

      DoccatModel model = new DoccatModelLoader().load(new File(args[0]));

      DocumentCategorizerME doccat = new DocumentCategorizerME(model);

      /**
       * moved initialization to the try block to catch new IOException
       */
      ObjectStream<String> documentStream;

      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "doc");
      perfMon.start();

      try {
        documentStream = new ParagraphStream(
                new PlainTextByLineStream(new SystemInputStreamFactory(), SystemInputStreamFactory.encoding()));
        String document;
        while ((document = documentStream.read()) != null) {
          String[] tokens = model.getFactory().getTokenizer().tokenize(document);

          double prob[] = doccat.categorize(tokens);
          String category = doccat.getBestCategory(prob);

          DocumentSample sample = new DocumentSample(category, tokens);
          System.out.println(sample.toString());

          perfMon.incrementCounter();
View Full Code Here

      model = modelResource.getModel();
    } catch (ResourceAccessException e) {
      throw new ResourceInitializationException(e);
    }

    mCategorizer = new DocumentCategorizerME(model);
  }
View Full Code Here

TOP

Related Classes of opennlp.tools.doccat.DocumentCategorizerME

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.