Package opennlp.tools.doccat

Examples of opennlp.tools.doccat.DocumentSample


          + inputFiles[inputFilesIndex]);
    String category = line.substring(0,split);
    String document = line.substring(split+1);
    line = null; // mark line as consumed
    String[] tokens = tokenizer.tokenize(document); //<co id="mee.train.tok"/>
    return new DocumentSample(category, tokens); //<co id="mee.train.sample"/>
  }
View Full Code Here


      AnnotationFS categoryAnnotation  =
          (AnnotationFS) categoryIndex.iterator().next();
     
      // add to event collection
     
      DocumentSample sample = new DocumentSample(
        categoryAnnotation.getStringValue(mCategoryFeature),
        cas.getDocumentText());
     
      documentSamples.add(sample);
    }
View Full Code Here

      count++;
    }

   
    if (sampleText.length() > 0) {
      return new DocumentSample(language, sampleText.toString());
    }
 
    return null;
  }
View Full Code Here

        "/opennlp/tools/formats/leipzig-en.sample");
   
    ObjectStream<DocumentSample> sampleStream =
        new LeipzigDoccatSampleStream("en", 2, in);
   
    DocumentSample doc1 = sampleStream.read();
    assertEquals("en", doc1.getCategory());
   
    DocumentSample doc2 = sampleStream.read();
    assertEquals("en", doc2.getCategory());
   
    DocumentSample doc3 = sampleStream.read();
    assertEquals("en", doc3.getCategory());

    DocumentSample doc4 = sampleStream.read();
    assertEquals("en", doc4.getCategory());
   
    assertNull(sampleStream.read());
  }
View Full Code Here

      AnnotationFS categoryAnnotation  =
          (AnnotationFS) categoryIndex.iterator().next();
     
      // add to event collection
     
      DocumentSample sample = new DocumentSample(
        categoryAnnotation.getStringValue(mCategoryFeature),
        cas.getDocumentText());
     
      documentSamples.add(sample);
    }
View Full Code Here

      String document;
      while ((document = documentStream.read()) != null) {
        double prob[] = doccat.categorize(document);
        String category = doccat.getBestCategory(prob);
       
        DocumentSample sample = new DocumentSample(category, document);
        System.out.println(sample.toString());
       
        perfMon.incrementCounter();
      }
    }
    catch (IOException e) {
View Full Code Here

        String document;
        while ((document = documentStream.read()) != null) {
          double prob[] = doccat.categorize(WhitespaceTokenizer.INSTANCE.tokenize(document));
          String category = doccat.getBestCategory(prob);

          DocumentSample sample = new DocumentSample(category, document);
          System.out.println(sample.toString());

          perfMon.incrementCounter();
        }
      }
      catch (IOException e) {
View Full Code Here

      count++;
    }

   
    if (sampleText.length() > 0) {
      return new DocumentSample(language, sampleText.toString());
    }
 
    return null;
  }
View Full Code Here

        "/opennlp/tools/formats/leipzig-en.sample");

    ObjectStream<DocumentSample> sampleStream =
        new LeipzigDoccatSampleStream("en", 2, in);

    DocumentSample doc1 = sampleStream.read();
    assertEquals("en", doc1.getCategory());

    DocumentSample doc2 = sampleStream.read();
    assertEquals("en", doc2.getCategory());

    DocumentSample doc3 = sampleStream.read();
    assertEquals("en", doc3.getCategory());

    DocumentSample doc4 = sampleStream.read();
    assertEquals("en", doc4.getCategory());

    assertNull(sampleStream.read());
  }
View Full Code Here

          String[] tokens = model.getFactory().getTokenizer().tokenize(document);

          double prob[] = doccat.categorize(tokens);
          String category = doccat.getBestCategory(prob);

          DocumentSample sample = new DocumentSample(category, tokens);
          System.out.println(sample.toString());

          perfMon.incrementCounter();
        }
      } catch (IOException e) {
        CmdLineUtil.handleStdinIoError(e);
View Full Code Here

TOP

Related Classes of opennlp.tools.doccat.DocumentSample

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.