Package org.apache.lucene.analysis.ja.tokenattributes

Examples of org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute


          tokenStream.reset(); //required with Solr 4
            while (tokenStream.incrementToken()){
                offset = tokenStream.addAttribute(OffsetAttribute.class);
                Token token = at.addToken(offset.startOffset(), offset.endOffset());
                //Get the POS attribute and init the PosTag
                PartOfSpeechAttribute posAttr = tokenStream.addAttribute(PartOfSpeechAttribute.class);
                PosTag posTag = POS_TAG_SET.getTag(posAttr.getPartOfSpeech());
                if(posTag == null){
                    posTag = adhocTags.get(posAttr.getPartOfSpeech());
                    if(posTag == null){
                        posTag = new PosTag(posAttr.getPartOfSpeech());
                        adhocTags.put(posAttr.getPartOfSpeech(), posTag);
                        log.warn(" ... missing PosTag mapping for {}",posAttr.getPartOfSpeech());
                    }
                }
                //Sentence detection by POS tag
                if(sentStartOffset < 0){ //the last token was a sentence ending
                  sentStartOffset = offset.startOffset();
                }
                if(posTag.hasPos(Pos.Point)) {
                    Sentence sent = at.addSentence(sentStartOffset, offset.startOffset());
                    //add the sentence as context to the NerData instances
                    while(nerSentIndex < nerList.size()){
                        nerList.get(nerSentIndex).context = sent.getSpan();
                        nerSentIndex++;
                    }
                    sentStartOffset = -1;
                }
                //POS
                token.addAnnotation(POS_ANNOTATION, Value.value(posTag));
                //NER
                NerTag nerTag = NER_TAG_SET.getTag(posAttr.getPartOfSpeech());
                if(ner != null && (nerTag == null || !ner.tag.getType().equals(nerTag.getType()))){
                    //write NER annotation
                    Chunk chunk = at.addChunk(ner.start, ner.end);
                    chunk.addAnnotation(NlpAnnotations.NER_ANNOTATION, Value.value(ner.tag));
                    //NOTE that the fise:TextAnnotation are written later based on the nerList
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.