Package opennlp.tools.sentdetect

Examples of opennlp.tools.sentdetect.SentenceDetectorME.sentPosDetect()


        // iterate of the paragraph and extract sentence locations
        int order = 0;
        for (Tuple p : paragraphBag) {
            Integer beginParagraph = (Integer) p.get(1);
            Integer endParagraph = (Integer) p.get(2);
            Span[] spans = sentenceDetector.sentPosDetect(text.substring(
                    beginParagraph, endParagraph));
            for (Span sentenceRelative : spans) {
                // for each sentence found in that paragraph, compute the
                // absolute span of the text
                order++;
View Full Code Here


    /* Configure the sentence detector with preloaded model */
    SentenceDetectorME sentenceDetector = new SentenceDetectorME(model)
   
    /* sentence spans is an array of Span objects where each span is a token
     * an span object has two integers with the start and end offset of a sentence*/
    sentenceSpans = sentenceDetector.sentPosDetect(text.getText());
  }
 
  /**
   * this method gets the sentence that contains the caret position
   */
 
View Full Code Here

    protected List<SurfaceFormOccurrence> extractNameOccurrences(BaseModel nameFinderModel, Text text, URI oType) {
        String intext = text.text();
        SentenceDetectorME sentenceDetector = new SentenceDetectorME((SentenceModel)sentenceModel);
        String[] sentences = sentenceDetector.sentDetect(intext);
        Span[] sentenceEndings = sentenceDetector.sentPosDetect(intext);
        int[] sentencePositions = new int[sentences.length + 1];
        for (int k=0; k<sentenceEndings.length; k++) {
            sentencePositions[k] = sentenceEndings[k].getStart();
        }
View Full Code Here

    SentenceDetectorME  sentenceDetector = new SentenceDetectorME((SentenceModel)sentenceModel);
    TokenizerME tokenizer = new TokenizerME((TokenizerModel)tokenModel);
    POSTaggerME posTagger = new POSTaggerME((POSModel)posModel);
    ChunkerME chunker = new ChunkerME((ChunkerModel)chunkModel);

    Span[] sentSpans = sentenceDetector.sentPosDetect(intext);
    for (Span sentSpan : sentSpans) {
      String sentence = sentSpan.getCoveredText(intext).toString();
      int start = sentSpan.getStart();
      Span[] tokSpans = tokenizer.tokenizePos(sentence);
      String[] tokens = new String[tokSpans.length];
View Full Code Here

        String textWithDots = text.replaceAll("\\n\\n", ".\n");
        text = removeNonUtf8CompliantCharacters(text);

        SentenceDetectorME sentenceDetector = new SentenceDetectorME(getSentenceModel("en"));

        Span[] sentenceSpans = sentenceDetector.sentPosDetect(textWithDots);

        NameFinderME finder = new NameFinderME(nameFinderModel);
        Tokenizer tokenizer = openNLP.getTokenizer(language);
        Map<String,List<NameOccurrence>> nameOccurrences = new LinkedHashMap<String,List<NameOccurrence>>();
        for (int i = 0; i < sentenceSpans.length; i++) {
View Full Code Here

        String textWithDots = text.replaceAll("\\n\\n", ".\n");
        text = removeNonUtf8CompliantCharacters(text);

        SentenceDetectorME sentenceDetector = new SentenceDetectorME(getSentenceModel("en"));

        Span[] sentenceSpans = sentenceDetector.sentPosDetect(textWithDots);

        NameFinderME finder = new NameFinderME(nameFinderModel);
        Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
        Map<String,List<NameOccurrence>> nameOccurrences = new LinkedHashMap<String,List<NameOccurrence>>();
        for (int i = 0; i < sentenceSpans.length; i++) {
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.