Package org.apache.stanbol.enhancer.nlp.model

Examples of org.apache.stanbol.enhancer.nlp.model.Span


    }
   
    private void logChunks(AnalysedText at){
        Iterator<Span> it = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence, SpanTypeEnum.Chunk));
        while(it.hasNext()){
            Span span = it.next();
            if(span.getType() == SpanTypeEnum.Chunk){
                log.trace(" > {} {}",span,span.getSpan());
            } else {
                log.trace(" > {}",span);
            }
            for(Value<PhraseTag> value : span.getAnnotations(PHRASE_ANNOTATION)){
                log.trace("   - {}",value);
            }
        }
    }
View Full Code Here


            atFactory.createAnalysedText(textBlob.getValue()));
        Assert.assertEquals(analysedTextWithData, parsedAt);
        Iterator<Span> origSpanIt = analysedTextWithData.getEnclosed(EnumSet.allOf(SpanTypeEnum.class));
        Iterator<Span> parsedSpanIt = parsedAt.getEnclosed(EnumSet.allOf(SpanTypeEnum.class));
        while(origSpanIt.hasNext() && parsedSpanIt.hasNext()){
            Span orig = origSpanIt.next();
            Span parsed = parsedSpanIt.next();
            Assert.assertEquals(orig, parsed);
            Set<String> origKeys = orig.getKeys();
            Set<String> parsedKeys = parsed.getKeys();
            Assert.assertEquals(origKeys, parsedKeys);
            for(String key : origKeys){
                List<Value<?>> origValues = orig.getValues(key);
                List<Value<?>> parsedValues = parsed.getValues(key);
                Assert.assertEquals(origValues, parsedValues);
            }
        }
        Assert.assertFalse("Original AnalyzedText MUST NOT have additional Spans",origSpanIt.hasNext());
        Assert.assertFalse("Parsed AnalyzedText MUST NOT have additional Spans",parsedSpanIt.hasNext());
View Full Code Here

            if(spanType == null || spanPos[0] < 0 || spanPos[1] < 0){
                log.warn("Illegal or missing span type, start and/or end position (ignored, json: "+jSpan);
                return;
            }
            //now create the Span
            Span span;
            switch (spanType) {
                case Text:
                    log.warn("Encounterd 'Text' span that is not the first span in the "
                        + "'spans' array (ignored, json: "+node+")");
                    return;
View Full Code Here

        Language lang = new Language(language);
        LiteralFactory lf = LiteralFactory.getInstance();
        ci.getLock().writeLock().lock();
        try { //write TextAnnotations for Named Entities
            while(spans.hasNext()){
                Span span = spans.next();
                switch (span.getType()) {
                    case Sentence:
                        context = (Sentence)context;
                        break;
                    default:
                        Value<NerTag> nerAnno = span.getAnnotation(NER_ANNOTATION);
                        if(nerAnno != null){
                            UriRef ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
                            //add span related data
                            metadata.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT,
                                new PlainLiteralImpl(span.getSpan(), lang)));
                            metadata.add(new TripleImpl(ta, ENHANCER_START,
                                lf.createTypedLiteral(span.getStart())));
                            metadata.add(new TripleImpl(ta, ENHANCER_END,
                                lf.createTypedLiteral(span.getEnd())));
                            metadata.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT,
                                new PlainLiteralImpl(context == null ?
                                        getDefaultSelectionContext(at.getSpan(), span.getSpan(), span.getStart()) :
                                            context.getSpan(), lang)));
                            //add the NER type
                            if(nerAnno.value().getType() != null){
                                metadata.add(new TripleImpl(ta,DC_TYPE,nerAnno.value().getType()));
                            }
View Full Code Here

    }
   
    private void logChunks(AnalysedText at){
        Iterator<Span> it = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence, SpanTypeEnum.Chunk));
        while(it.hasNext()){
            Span span = it.next();
            if(span.getType() == SpanTypeEnum.Chunk){
                log.trace(" > {} {}",span,span.getSpan());
            } else {
                log.trace(" > {}",span);
            }
            for(Value<PhraseTag> value : span.getAnnotations(PHRASE_ANNOTATION)){
                log.trace("   - {}",value);
            }
        }
    }
View Full Code Here

        NavigableMap<Span,SentimentInfo> activeSpans = new TreeMap<Span,SentimentInfo>();
        if(spanTypes.contains(SpanTypeEnum.Text)){
            activeSpans.put(at, new SentimentInfo(at));
        }
        while(tokenIt.hasNext()){
            Span span = tokenIt.next();
            switch (span.getType()) {
                case Token:
                    Value<Double> sentiment = span.getAnnotation(SENTIMENT_ANNOTATION);
                    Iterator<Entry<Span,SentimentInfo>> entries = activeSpans.entrySet().iterator();
                    if(sentiment != null){
                        while(entries.hasNext()){
                            Entry<Span,SentimentInfo> entry = entries.next();
                            //if(span.getEnd() > entry.getKey().getEnd()){ //fully enclosed
                            if(entry.getKey().getEnd() > span.getStart()){ //partly enclosed
                                entry.getValue().addSentiment(sentiment.value());
                            } else { // span has completed
                                if(entry.getValue().hasSentiment()){ //if a sentiment was found
                                    //add it to the list
                                    sentimentInfos.add(entry.getValue());
                                }
                                entries.remove(); // remove completed
                            }
                        }
                    }
                    break;
                case Chunk:
                    Value<PhraseTag> phraseTag = span.getAnnotation(PHRASE_ANNOTATION);
                    if(phraseTag.value().getCategory() == LexicalCategory.Noun){
                        //noun phrase
                        activeSpans.put(span, new SentimentInfo((Section)span));
                    }
                    break;
View Full Code Here

            section = sections.next();
            tokens.clear(); //clear token for each section (STANBOL-818)
            Iterator<Span> enclosed = section.getEnclosed(enclosedSpanTypes);
            ChunkData activeChunk = null;
            while(enclosed.hasNext()){
                Span span = enclosed.next();
                if(span.getStart() >= span.getEnd()){ //save guard against empty spans
                    log.warn("Detected Empty Span {} in section {} of Blob {}",
                        new Object[]{span,section, at.getBlob()});
                }
                if(span.getType() == SpanTypeEnum.Chunk){
                    ChunkData chunkData = new ChunkData((Chunk)span);
                    if(chunkData.isProcessable){
                        if(activeChunk != null){ //current Chunk not yet closed -> overlapping chunks!
                            if(activeChunk.getEndChar() < span.getEnd()){ //merge partly overlapping chunks
                                log.info("   - merge overlapping and processable Chunks {} <-> {}",
                                    activeChunk.merged == null? activeChunk.chunk : activeChunk.merged,span);
                                activeChunk.merged = (Chunk)span; //set this one as last merged
                            } //ignore completely covered chunks
                        } else { // a new Chunk starts
                            activeChunk = chunkData;
                            activeChunk.startToken = tokens.size();
                            if(log.isDebugEnabled()){
                                log.debug(">> Chunk: (type:{}, startPos: {}) text: '{}'",
                                    new Object []{
                                        activeChunk.chunk.getType(),
                                        activeChunk.startToken,
                                        activeChunk.chunk.getSpan()
                                    });
                            }
                        }
                    } //else ignore chunks that are not processable
                } else if(span.getType() == SpanTypeEnum.Token){
                    TokenData tokenData = new TokenData(tokens.size(),(Token)span,activeChunk);
                    if(log.isDebugEnabled()){
                        log.debug("  > Token {}: {} (pos:{}) chunk: '{}' | morpho: {}",
                            new Object[]{tokenData.index,tokenData.token,
                                         tokenData.token.getAnnotations(POS_ANNOTATION),
                                         tokenData.inChunk != null ? tokenData.inChunk.chunk.getSpan() : "none",
                                         tokenData.morpho != null ? tokenData.morpho : "none"});
                    }
                    tokens.add(tokenData);
                    if(!foundProcessable){
                        foundProcessable = tokenData.isProcessable;
                    }
                    if(activeChunk != null){
                        if(tokenData.isMatchable ){
                            activeChunk.matchableCount++;
                        }
                        if (span.getEnd() >= activeChunk.getEndChar()){
                            //this is the last token in the current chunk
                            activeChunk.endToken = tokens.size()-1;
                            log.debug("   - end Chunk@pos: {}", activeChunk.endToken);
                            if(tpc.isLinkMultiMatchableTokensInChunk() &&
                                    activeChunk.matchableCount > 1 ){
View Full Code Here

            UriRef sentence = null;
            UriRef phrase = null;
            UriRef word = null;
            boolean firstWordInSentence = true;
            while(spans.hasNext()){
                Span span = spans.next();
                //TODO: filter Spans based on additional requirements
                //(1) write generic information about the span
                UriRef current = writeSpan(metadata, base, at, language, span);
                //(2) add the relations between the different spans
                switch (span.getType()) {
                    case Sentence:
                        if(sentence != null){
                            metadata.add(new TripleImpl(sentence, SsoOntology.nextSentence.getUri(), current));
                        }
                        sentence = current;
                        firstWordInSentence = true;
                        break;
                    case Chunk:
                        if(sentence != null){
                            metadata.add(new TripleImpl(current, StringOntology.superString.getUri(), sentence));
                            if(word != null){
                                metadata.add(new TripleImpl(word, SsoOntology.lastWord.getUri(), sentence));
                            }
                        }
                        phrase = current;
                        break;
                    case Token:
                        if(sentence != null){
                            metadata.add(new TripleImpl(current, SsoOntology.sentence.getUri(), sentence));
                            if(firstWordInSentence){
                                metadata.add(new TripleImpl(current, SsoOntology.firstWord.getUri(), sentence));
                                firstWordInSentence = false;
                            }
                        }
                        if(phrase != null){
                            metadata.add(new TripleImpl(current, SsoOntology.parent.getUri(), phrase));
                        }
                        if(word != null){
                            metadata.add(new TripleImpl(word, SsoOntology.nextWord.getUri(), current));
                            metadata.add(new TripleImpl(current, SsoOntology.previousWord.getUri(), word));
                        }
                        word = current;
                        break;
                    default:
                        break;
                }
                //(3) add specific information such as POS, chunk type ...
                writePos(metadata, span, current);
                writePhrase(metadata, span, current);
                //OlIA does not include Sentiments
               
                Value<Double> sentiment = span.getAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION);
                if(sentiment != null && sentiment.value() != null){
                    metadata.add(new TripleImpl(current, SENTIMENT_PROPERTY,
                        lf.createTypedLiteral(sentiment.value())));
                }
            }
View Full Code Here

   
   
    private void logAnnotations(AnalysedText at){
        Iterator<Span> it = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence, SpanTypeEnum.Token));
        while(it.hasNext()){
            Span span = it.next();
            log.trace(" > {}",span);
            for(Value<PosTag> value : span.getAnnotations(POS_ANNOTATION)){
                log.trace("   - {}",value);
            }
        }
    }
View Full Code Here

TOP

Related Classes of org.apache.stanbol.enhancer.nlp.model.Span

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.