Examples of PartOfSpeechAttribute


Examples of com.github.bibreen.mecab_ko_lucene_analyzer.tokenattributes.PartOfSpeechAttribute

        (CharTermAttribute)tokenizer.addAttribute(CharTermAttribute.class);
    TypeAttribute type =
        (TypeAttribute)tokenizer.addAttribute(TypeAttribute.class);
    SemanticClassAttribute semanticClass =
        (SemanticClassAttribute)tokenizer.addAttribute(SemanticClassAttribute.class);
    PartOfSpeechAttribute pos =
        (PartOfSpeechAttribute)tokenizer.addAttribute(PartOfSpeechAttribute.class);
       

    StringBuilder result = new StringBuilder();
    while (tokenizer.incrementToken() == true) {
      result.append(new String(term.buffer(), 0, term.length())).append(":");
      result.append(type.type()).append(":");
      result.append(pos.partOfSpeech()).append(":");
      result.append(semanticClass.semanticClass()).append(":");
      result.append(String.valueOf(posIncrAtt.getPositionIncrement())).append(":");
      result.append(String.valueOf(posLengthAtt.getPositionLength())).append(":");
      result.append(String.valueOf(extOffset.startOffset())).append(":");
      result.append(String.valueOf(extOffset.endOffset()));
View Full Code Here

Examples of org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute

          tokenStream.reset(); //required with Solr 4
            while (tokenStream.incrementToken()){
                offset = tokenStream.addAttribute(OffsetAttribute.class);
                Token token = at.addToken(offset.startOffset(), offset.endOffset());
                //Get the POS attribute and init the PosTag
                PartOfSpeechAttribute posAttr = tokenStream.addAttribute(PartOfSpeechAttribute.class);
                PosTag posTag = POS_TAG_SET.getTag(posAttr.getPartOfSpeech());
                if(posTag == null){
                    posTag = adhocTags.get(posAttr.getPartOfSpeech());
                    if(posTag == null){
                        posTag = new PosTag(posAttr.getPartOfSpeech());
                        adhocTags.put(posAttr.getPartOfSpeech(), posTag);
                        log.warn(" ... missing PosTag mapping for {}",posAttr.getPartOfSpeech());
                    }
                }
                //Sentence detection by POS tag
                if(sentStartOffset < 0){ //the last token was a sentence ending
                  sentStartOffset = offset.startOffset();
                }
                if(posTag.hasPos(Pos.Point)) {
                    Sentence sent = at.addSentence(sentStartOffset, offset.startOffset());
                    //add the sentence as context to the NerData instances
                    while(nerSentIndex < nerList.size()){
                        nerList.get(nerSentIndex).context = sent.getSpan();
                        nerSentIndex++;
                    }
                    sentStartOffset = -1;
                }
                //POS
                token.addAnnotation(POS_ANNOTATION, Value.value(posTag));
                //NER
                NerTag nerTag = NER_TAG_SET.getTag(posAttr.getPartOfSpeech());
                if(ner != null && (nerTag == null || !ner.tag.getType().equals(nerTag.getType()))){
                    //write NER annotation
                    Chunk chunk = at.addChunk(ner.start, ner.end);
                    chunk.addAnnotation(NlpAnnotations.NER_ANNOTATION, Value.value(ner.tag));
                    //NOTE that the fise:TextAnnotation are written later based on the nerList
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.