Examples of AnalysedText

org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText
org.apache.stanbol.enhancer.nlp.model.AnalysedText
Provides access to NLP processing results of the text/plain {@link Blob} of an ContentItem. Intended to be{@link ContentItem#addPart(org.apache.clerezza.rdf.core.UriRef,Object) addedas ContentPart} by using {@link #ANALYSED_TEXT_URI}. @see ContentItem#addPart(UriRef,Object)

Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText

    
    @Test
    public void testEngine() throws IOException, EngineException {
        ContentItem ci = ciFactory.createContentItem(new StringSource(text));
        Assert.assertNotNull(ci);
        AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
        Assert.assertNotNull(at);
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
        Assert.assertEquals("it", EnhancementEngineHelper.getLanguage(ci));
        
        Assert.assertEquals("Can not enhance Test ContentItem", EnhancementEngine.ENHANCE_ASYNC,engine.canEnhance(ci));
        //compute the enhancements
        try {
            engine.computeEnhancements(ci);
        } catch (EngineException e) {
            RemoteServiceHelper.checkServiceUnavailable(e);
            return; //deactivate test
        }
        //now validate the enhancements
        int sentimentExpressionCnt=0;
        for(Iterator<Token> tokens = at.getTokens(); tokens.hasNext();){
            Token token = tokens.next();
            log.info("Token: {}",token);
            List<Value<Double>> sentimentExpressionsList = token.getAnnotations(NlpAnnotations.SENTIMENT_ANNOTATION);
            if(sentimentExpressionsList!=null && sentimentExpressionsList.size()>0)
              sentimentExpressionCnt++;

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText

     *          if the underlying process failed to work as
     *          expected
     */
    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        AnalysedText analysedText = getAnalysedText(this,ci, true);
        String language = getLanguage(this, ci, true);
        SentimentClassifier classifier = classifiers.get(language);
        if(classifier == null){
            throw new IllegalStateException("Sentiment Classifier for language '"
                + language +"' not available. As this is also checked in "
                + " canEnhance this may indicate an Bug in the used "
                + "EnhancementJobManager!");
        }
        //TODO: locking for AnalysedText not yet defined
//        ci.getLock().writeLock().lock();
//        try {
        Iterator<Token> tokens = analysedText.getTokens();
        while(tokens.hasNext()){
            Token token = tokens.next();
            Set<LexicalCategory> cats = null;
            boolean process = false;
            if(!adjectivesOnly){

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText

    
    @Test
    public void testEngineDe() throws IOException, EngineException {
        ContentItem ci = ciFactory.createContentItem(new StringSource(de_text));
        Assert.assertNotNull(ci);
        AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
        Assert.assertNotNull(at);
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("de")));
        Assert.assertEquals("de", EnhancementEngineHelper.getLanguage(ci));
        
        //Add some Tokens with POS annotations to test the usage of
        //existing POS annotations by the lemmatizer
        Token verbrachten = at.addToken(de_verbStart,de_verbStart+de_verb.length());
        verbrachten.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("V",LexicalCategory.Verb), de_verbProb));
        
        Token schonen = at.addToken(de_adjectiveStart,de_adjectiveStart+de_adjective.length()); 
        schonen.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("ADJ",LexicalCategory.Adjective), de_adjectiveProb));
        
        Token urlaub = at.addToken(de_nounStart,de_nounStart+de_noun.length()); 
        urlaub.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("NC",LexicalCategory.Noun), de_nounProb));
        
        Assert.assertEquals("Can not enhance Test ContentItem",
            EnhancementEngine.ENHANCE_ASYNC,engine.canEnhance(ci));
        //compute the enhancements
        try {
            engine.computeEnhancements(ci);
        } catch (EngineException e) {
            RemoteServiceHelper.checkServiceUnavailable(e);
            return; //deactivate test
        }
        //now validate the enhancements
        boolean foundVerb = false;
        boolean foundAdjective = false;
        boolean foundNoun = false;
        for(Iterator<Token> tokens = at.getTokens(); tokens.hasNext();){
            Token token = tokens.next();
            log.info("Token: {}",token);
            List<Value<MorphoFeatures>> mfs = token.getAnnotations(NlpAnnotations.MORPHO_ANNOTATION);
            if(de_verb.equals(token.getSpan())){
                foundVerb = !mfs.isEmpty();

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText

        return ENHANCE_ASYNC;
    }
    
    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        AnalysedText at = getAnalysedText(this, ci, true);
        String language = getLanguage(this, ci, true);
        isLangaugeConfigured(this, languageConfig, language, true);
        List<SentimentExpression> seList;
        try {
          seList = this.client.extractSentimentExpressions(at.getSpan(), language);
        } catch (IOException e) {
            throw new EngineException("Error while calling the CELI Sentiment Analysis service (configured URL: " + serviceURL + ")!", e);
        } catch (SOAPException e) {
            throw new EngineException("Error wile encoding/decoding the request/response to the CELI Sentiment Analysis service!", e);
        }
        
        for(SentimentExpression se : seList){
            //Add the Sentiment Expression as Token to the Text. NOTE that if a Token with the same start/end positions already exist this
            //Method returns the existing instance
            Token token = at.addToken(se.getStartSnippet(),se.getEndSnippet());
            token.addAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION, new Value<Double>(se.getSentimentPolarityAsDoubleValue()) );
        }
    }

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText

    public void setupTest() throws IOException {
        //create a contentItem for the plain text used for testing
        InputStream is = FstLinkingEngineTest.class.getClassLoader().getResourceAsStream(TEST_TEXT_FILE);
        Assert.assertNotNull("Unable to load '"+TEST_TEXT_FILE+"' via classpath",is);
        ContentItem ci = cif.createContentItem(new StreamSource(is,"text/plain"));
        AnalysedText at = atf.createAnalysedText(ci, ci.getBlob());
        is.close();
        //parse the prepared NLP results and add it to the ContentItem
        is = FstLinkingEngineTest.class.getClassLoader().getResourceAsStream(TEST_TEXT_NLP_FILE);
        Assert.assertNotNull("Unable to load '"+TEST_TEXT_NLP_FILE+"' via classpath",is);
        AnalyzedTextParser.getDefaultInstance().parse(is, Charset.forName("UTF-8"), at);
        is.close();
        //set the language of the contentItem
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, 
            EN_LANGUAGE));
        //set the contentItem and also the content
        this.ci = ci;
        this.content = at.getText().toString();
    }

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText

     *          if the underlying process failed to work as
     *          expected
     */
    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        AnalysedText at = initAnalysedText(this,analysedTextFactory,ci);
        String language = getLanguage(this, ci, true);
        
        Tokenizer tokenizer = getTokenizer(language);
        if(tokenizer == null){
            log.warn("Tokenizer for language {} is no longer available. "
                    + "This might happen if the model becomes unavailable during enhancement. "
                    + "If this happens more often it might also indicate an bug in the used "
                    + "EnhancementJobManager implementation as the availability is also checked "
                    + "in the canEnhance(..) method of this Enhancement Engine.");
            return;
        }
        //Try to use sentences for tokenizing
        Iterator<? extends Section> sections = at.getSentences();
        if(!sections.hasNext()){
            //if no sentences are annotated
            sections = Collections.singleton(at).iterator();
        }

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText

     *          if the underlying process failed to work as
     *          expected
     */
    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        AnalysedText at = getAnalysedText(this, ci, true);
        String language = getLanguage(this, ci, true);
        isLangaugeConfigured(this, languageConfiguration, language, true);
        //init the PhraseBuilder
        ChunkFactory chunkFactory = new ChunkFactoryImpl(at, ci.getLock());
        List<PhraseBuilder> phraseBuilders = new ArrayList<PhraseBuilder>(phraseTypeDefinitions.size());
        for(PhraseTypeDefinition ptd : phraseTypeDefinitions){
            phraseBuilders.add(new PhraseBuilder(ptd, chunkFactory, minPosScore));
        }
        Iterator<? extends Section> sentences = at.getSentences();
        if(!sentences.hasNext()){ //no sentences ... iterate over the whole text
            sentences = Collections.singleton(at).iterator();
        }
        while(sentences.hasNext()){
            // (1) get Tokens and POS information for the sentence

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText

     *          if the underlying process failed to work as
     *          expected
     */
    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        AnalysedText at = initAnalysedText(this,analysedTextFactory,ci);
        String language = getLanguage(this, ci, true);
        SentenceDetector sentenceDetector = getSentenceDetector(language);
        if(sentenceDetector != null){
            for(opennlp.tools.util.Span sentSpan : sentenceDetector.sentPosDetect(at.getSpan())) {
                //detect sentences and add it to the AnalyzedText.
                Sentence sentence = at.addSentence(sentSpan.getStart(), sentSpan.getEnd());
                log.trace(" > add {}",sentence);
            }
        } else {
            log.warn("SentenceDetector model for language {} is no longer available. "
                + "This might happen if the model becomes unavailable during enhancement. "

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText

        Map<UriRef,Resource> expected = new HashMap<UriRef,Resource>();
        expected.put(Properties.DC_CREATOR, lf.createTypedLiteral(engine.getClass().getName()));
        expected.put(Properties.ENHANCER_EXTRACTED_FROM,contentItem.getUri());
        Assert.assertEquals(16, EnhancementStructureHelper.validateAllTextAnnotations(
            contentItem.getMetadata(), text, expected));
        AnalysedText at = AnalysedTextUtils.getAnalysedText(contentItem);
        Assert.assertNotNull(at);
        List<Sentence> sentences = AnalysedTextUtils.asList(at.getSentences());
        Assert.assertNotNull(sentences);
        Assert.assertEquals(7, sentences.size());
        //TODO: values in the following arrays are based on the first run of the
        // engine. So this is only to detect changes in results. It can not validate
        // that the tokenization and NER detections are correct - sorry I do not

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText

     */
    @Override
    public void computeEnhancements(final ContentItem ci) throws EngineException {
        checkRESTfulNlpAnalysisService(); //validate that the service is active
        //get/create the AnalysedText
        final AnalysedText at = NlpEngineHelper.initAnalysedText(this, analysedTextFactory, ci);
        final Blob blob = at.getBlob();
        //send the text to the server
        final String language = getLanguage(this, ci, true);
        final HttpPost request = new HttpPost(analysisServiceUrl);
        request.addHeader(HttpHeaders.CONTENT_LANGUAGE, language);
        request.setEntity(new InputStreamEntity(
            blob.getStream(), blob.getContentLength(),
            ContentType.create(blob.getMimeType(), 
                blob.getParameter().get("charset"))));
        //execute the request
        try {
            AccessController.doPrivileged(new PrivilegedExceptionAction<AnalysedText>() {
                public AnalysedText run() throws ClientProtocolException, IOException {
                    return httpClient.execute(request, new AnalysisResponseHandler(at));
                }
            });
        } catch (PrivilegedActionException pae) {
            Exception e = pae.getException();
            if(e instanceof ClientProtocolException) {
                //force re-initialisation upon error
                serviceInitialised = false;
                throw new EngineException(this, ci, "Exception while executing Request "
                    + "on RESTful NLP Analysis Service at "+analysisServiceUrl, e);
            } else if(e instanceof IOException) {
                //force re-initialisation upon error
                serviceInitialised = false;
                throw new EngineException(this, ci, "Exception while executing Request "
                        + "on RESTful NLP Analysis Service at "+analysisServiceUrl, e);
            } else {
                throw RuntimeException.class.cast(e);
            }
        }
        if(writeTextAnnotations){
            Iterator<Span> spans = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence,SpanTypeEnum.Chunk));
            Sentence context = null;
            MGraph metadata = ci.getMetadata();
            Language lang = new Language(language);
            LiteralFactory lf = LiteralFactory.getInstance();
            ci.getLock().writeLock().lock();
            try { //write TextAnnotations for Named Entities
                while(spans.hasNext()){
                    Span span = spans.next();
                    switch (span.getType()) {
                        case Sentence:
                            context = (Sentence)context;
                            break;
                        default:
                            Value<NerTag> nerAnno = span.getAnnotation(NER_ANNOTATION);
                            if(nerAnno != null){
                                UriRef ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
                                //add span related data
                                metadata.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT, 
                                    new PlainLiteralImpl(span.getSpan(), lang)));
                                metadata.add(new TripleImpl(ta, ENHANCER_START, 
                                    lf.createTypedLiteral(span.getStart())));
                                metadata.add(new TripleImpl(ta, ENHANCER_END, 
                                    lf.createTypedLiteral(span.getEnd())));
                                metadata.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT, 
                                    new PlainLiteralImpl(context == null ?
                                            getDefaultSelectionContext(at.getSpan(), span.getSpan(), span.getStart()) :
                                                context.getSpan(), lang)));
                                //add the NER type
                                if(nerAnno.value().getType() != null){
                                    metadata.add(new TripleImpl(ta,DC_TYPE,nerAnno.value().getType()));
                                }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.