Package opennlp.tools.doccat

Examples of opennlp.tools.doccat.DoccatModel


    CmdLineUtil.checkOutputFile("document categorizer model", modelOutFile);
    ObjectStream<DocumentSample> sampleStream =
        openSampleData("Training", trainingDataInFile, params.getEncoding());
   
    DoccatModel model;
    try {
      if (mlParams == null) {
       model = DocumentCategorizerME.train(params.getLang(), sampleStream,
           params.getCutoff(), params.getIterations());
      }
View Full Code Here


    if (args.length != 1) {
      System.out.println(getHelp());
      throw new TerminateToolException(1);
    }
   
    DoccatModel model = new DoccatModelLoader().load(new File(args[0]));
   
    DocumentCategorizerME doccat = new DocumentCategorizerME(model);
   
    ObjectStream<String> documentStream = new ParagraphStream(
        new PlainTextByLineStream(new InputStreamReader(System.in)));
View Full Code Here

  }
 
  @Override
  protected DoccatModel loadModel(InputStream modelIn) throws IOException,
      InvalidFormatException {
    return new DoccatModel(modelIn);
  }
View Full Code Here

    File modelOutFile = params.getModel();

    CmdLineUtil.checkOutputFile("document categorizer model", modelOutFile);

    DoccatModel model;
    try {
      model = DocumentCategorizerME.train(params.getLang(), sampleStream, mlParams);
    } catch (IOException e) {
      throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " +
          e.getMessage(), e);
View Full Code Here

   
    if (0 == args.length) {
      System.out.println(getHelp());
    } else {

      DoccatModel model = new DoccatModelLoader().load(new File(args[0]));

      DocumentCategorizerME doccat = new DocumentCategorizerME(model);

      ObjectStream<String> documentStream = new ParagraphStream(
          new PlainTextByLineStream(new InputStreamReader(System.in)));
View Full Code Here

    super("Document Categorizer");
  }
 
  @Override
  protected DoccatModel loadModel(InputStream modelIn) throws IOException {
    return new DoccatModel(modelIn);
  }
View Full Code Here

  }

  public void run(String format, String[] args) {
    super.run(format, args);

    DoccatModel model = new DoccatModelLoader().load(params.getModel());

    List<EvaluationMonitor<DocumentSample>> listeners = new LinkedList<EvaluationMonitor<DocumentSample>>();
    if (params.getMisclassified()) {
      listeners.add(new DoccatEvaluationErrorListener());
    }
View Full Code Here

    FeatureGenerator[] featureGenerators = createFeatureGenerators(params
        .getFeatureGenerators());

    Tokenizer tokenizer = createTokenizer(params.getTokenizer());

    DoccatModel model;
    try {
      DoccatFactory factory = DoccatFactory.create(params.getFactory(),
          tokenizer, featureGenerators);
      model = DocumentCategorizerME.train(params.getLang(), sampleStream,
          mlParams, factory);
View Full Code Here

    if (0 == args.length) {
      System.out.println(getHelp());
    } else {

      DoccatModel model = new DoccatModelLoader().load(new File(args[0]));

      DocumentCategorizerME doccat = new DocumentCategorizerME(model);

      /**
       * moved initialization to the try block to catch new IOException
       */
      ObjectStream<String> documentStream;

      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "doc");
      perfMon.start();

      try {
        documentStream = new ParagraphStream(
                new PlainTextByLineStream(new SystemInputStreamFactory(), SystemInputStreamFactory.encoding()));
        String document;
        while ((document = documentStream.read()) != null) {
          String[] tokens = model.getFactory().getTokenizer().tokenize(document);

          double prob[] = doccat.categorize(tokens);
          String category = doccat.getBestCategory(prob);

          DocumentSample sample = new DocumentSample(category, tokens);
View Full Code Here

    super("Document Categorizer");
  }

  @Override
  protected DoccatModel loadModel(InputStream modelIn) throws IOException {
    return new DoccatModel(modelIn);
  }
View Full Code Here

TOP

Related Classes of opennlp.tools.doccat.DoccatModel

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.