Package org.apache.stanbol.enhancer.engines.uimalocal

Source Code of org.apache.stanbol.enhancer.engines.uimalocal.UIMALocal

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.uimalocal;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;

import org.apache.clerezza.rdf.core.UriRef;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Properties;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Service;
import org.apache.lucene.analysis.uima.ae.AEProvider;
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
import org.apache.stanbol.commons.caslight.Feature;
import org.apache.stanbol.commons.caslight.FeatureStructure;
import org.apache.stanbol.commons.caslight.FeatureStructureListHolder;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
import org.apache.stanbol.enhancer.servicesapi.NoSuchPartException;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.TypeDescription;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* @author Mihaly Heder
*/
@Component(immediate = true, metatype = true, inherit = true, label = "UIMA Local Enhancement Engine",
        description = "Runs UIMA Analysis Engine and retuns values.")
@Service
@Properties(value = {
        @Property(name = EnhancementEngine.PROPERTY_NAME, value = "uimalocal")
})
public class UIMALocal extends AbstractEnhancementEngine<RuntimeException, RuntimeException>
        implements EnhancementEngine, ServiceProperties {


    private final Logger logger = LoggerFactory.getLogger(getClass());

    @Property(value = "sourceName", label = "UIMA source name",
            description = "The name of this UIMA source which will be used for referring internally to the UIMA endpoint")
    public static final String UIMA_SOURCENAME = "stanbol.engine.uimalocal.sourcename";

    @Property(value = "/path/to/descriptor",
            label = "UIMA descriptor file path",
            description = "The file path to the UIMA descriptor XML to load")
    public static final String UIMA_DESCRIPTOR_PATH = "stanbol.engine.uimalocal.descriptorpath";

    @Property(value = "uima.apache.org", label = "Content Part URI reference",
            description = "The URI Reference of the UIMA content part to be created. This content part will "
                    + "contain Annotations from all the resources above.")
    public static final String UIMA_CONTENTPART_URIREF = "stanbol.engine.uimalocal.contentpart.uriref";

    @Property(cardinality = 1000, value = "text/plain", label = "Supported Mime Types",
            description = "Mime Types supported by this client. This should be aligned to the capabilities of the UIMA Endpoints.")
    public static final String UIMA_SUPPORTED_MIMETYPES = "stanbol.engine.uimalocal.contentpart.mimetypes";

    public static final Integer defaultOrder = ServiceProperties.ORDERING_PRE_PROCESSING;

    private AEProvider aeProvider;
    private Set<String> SUPPORTED_MIMETYPES;
    private String uimaUri;
    private String uimaSourceName;
    private String uimaDescriptorPath;
    private List<String> uimaTypeNames;


    @Override
    protected void activate(ComponentContext ctx) throws ConfigurationException {
        super.activate(ctx);
        Dictionary<String, Object> props = ctx.getProperties();

        this.uimaUri = (String) props.get(UIMA_CONTENTPART_URIREF);
        this.uimaSourceName = (String) props.get(UIMA_SOURCENAME);
        this.uimaDescriptorPath = (String) props.get(UIMA_DESCRIPTOR_PATH);

        SUPPORTED_MIMETYPES = Collections.unmodifiableSet(new HashSet<String>(
                Arrays.asList((String[]) props.get(UIMA_SUPPORTED_MIMETYPES))));

        aeProvider = AEProviderFactory.getInstance().getAEProvider(uimaSourceName,
                uimaDescriptorPath, new HashMap<String, Object>());
        try {
            AnalysisEngine ae = aeProvider.getAE();
            TypeDescription[] aeTypes = ae.getAnalysisEngineMetaData().getTypeSystem().getTypes();
            uimaTypeNames = new ArrayList<String>();
            for (TypeDescription aeType : aeTypes) {
                String aeTypeName = aeType.getName();
                logger.info("Configuring Analysis Engine Type:" + aeTypeName);
                uimaTypeNames.add(aeTypeName);
            }
        } catch (ResourceInitializationException ex) {
            logger.error("Cannot retrieve AE from AEProvider. ", ex);
            throw new ConfigurationException(uimaDescriptorPath, "Cannot retreive AE from AEProvider", ex);
        }
    }

    @Override
    protected void deactivate(ComponentContext ctx) {

        super.deactivate(ctx);
    }

    @Override
    public int canEnhance(ContentItem ci) throws EngineException {
        if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES) != null) {
            return ENHANCE_ASYNC;
        }
        return CANNOT_ENHANCE;
    }

    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
        if (contentPart == null) {
            throw new IllegalStateException("No ContentPart with an supported Mimetype '"
                    + SUPPORTED_MIMETYPES + "' found for ContentItem " + ci.getUri()
                    + ": This is also checked in the canEnhance method! -> This "
                    + "indicated an Bug in the implementation of the "
                    + "EnhancementJobManager!");
        }
        String text;
        try {
            text = ContentItemHelper.getText(contentPart.getValue());
        } catch (IOException e) {
            throw new InvalidContentException(this, ci, e);
        }
        JCas jcas;
        try {
            logger.info("Processing text with UIMA AE...");
            jcas = processText(text);
        } catch (ResourceInitializationException ex) {
            logger.error("Error initializing UIMA AE", ex);
            throw new EngineException("Error initializing UIMA AE", ex);
        } catch (AnalysisEngineProcessException ex) {
            logger.error("Error running UIMA AE", ex);
            throw new EngineException("Error running UIMA AE", ex);
        }

        //just for being sure
        if (jcas == null) {
            return;
        }

        for (String typeName : uimaTypeNames) {
            List<FeatureStructure> featureSetList = concertToCasLight(jcas, typeName);
            UriRef uimaUriRef = new UriRef(uimaUri);

            FeatureStructureListHolder holder;
            ci.getLock().writeLock().lock();
            try {
                holder = ci.getPart(uimaUriRef, FeatureStructureListHolder.class);
            } catch (NoSuchPartException e) {
                holder = new FeatureStructureListHolder();
                logger.info("Adding FeatureSet List Holder content part with uri:" + uimaUri);
                ci.addPart(uimaUriRef, holder);
                logger.info(uimaUri + " content part added.");
            } finally {
                ci.getLock().writeLock().unlock();
            }

            ci.getLock().writeLock().lock();
            try {
                holder.addFeatureStructureList(uimaSourceName, featureSetList);
            } finally {
                ci.getLock().writeLock().unlock();
            }
        }

    }

    @Override
    public Map<String, Object> getServiceProperties() {
        return Collections.unmodifiableMap(Collections.singletonMap(
                ENHANCEMENT_ENGINE_ORDERING,
                (Object) defaultOrder));
    }

    /*
     * process a field value executing UIMA the CAS containing it as document
     * text - From SOLR.
     */
    private JCas processText(String textFieldValue) throws ResourceInitializationException,
            AnalysisEngineProcessException {
        logger.info(new StringBuffer("Analazying text").toString());
        /*
         * get the UIMA analysis engine
         */
        AnalysisEngine ae = aeProvider.getAE();

        /*
         * create a JCas which contain the text to analyze
         */
        JCas jcas = ae.newJCas();
        jcas.setDocumentText(textFieldValue);

        /*
         * perform analysis on text field
         */
        ae.process(jcas);
        logger.info(new StringBuilder("Text processing completed").toString());
        return jcas;
    }

    private List<FeatureStructure> concertToCasLight(JCas jcas, String typeName) {
        List<FeatureStructure> ret = new ArrayList<FeatureStructure>();

        Type type = jcas.getTypeSystem().getType(typeName);
        List<org.apache.uima.cas.Feature> featList = type.getFeatures();

        for (FSIterator<org.apache.uima.cas.FeatureStructure> iterator = jcas.getFSIndexRepository().getAllIndexedFS(type); iterator.hasNext(); ) {
            org.apache.uima.cas.FeatureStructure casFs = iterator.next();
            logger.debug("Processing UIMA CAS FeatureSet:" + casFs.toString());
            FeatureStructure newFs = new FeatureStructure(UUID.randomUUID().toString(), type.getShortName());
            for (org.apache.uima.cas.Feature casF : featList) {
                String fName = casF.getShortName();
                logger.debug("Feature Name:" + fName);

                if (casF.getRange().getName().equals("uima.cas.Sofa")) {
                    continue;
                }
                if (casF.getRange().isPrimitive()) {
                    logger.debug("Getting primitive value...");
                    if (casF.getRange().getName().equals("uima.cas.String")) {
                        String fVal = casFs.getStringValue(casF);
                        newFs.addFeature(new Feature<String>(fName, fVal));
                    } else if (casF.getRange().getName().equals("uima.cas.Integer")) {
                        int fVal = casFs.getIntValue(casF);
                        newFs.addFeature(new Feature<Integer>(fName, fVal));
                    } else if (casF.getRange().getName().equals("uima.cas.Short")) {
                        short fVal = casFs.getShortValue(casF);
                        newFs.addFeature(new Feature<Integer>(fName, (int) fVal));
                    } else if (casF.getRange().getName().equals("uima.cas.Byte")) {
                        byte fVal = casFs.getByteValue(casF);
                        newFs.addFeature(new Feature<Integer>(fName, (int) fVal));
                    } else if (casF.getRange().getName().equals("uima.cas.Double")) {
                        double fVal = casFs.getDoubleValue(casF);
                        newFs.addFeature(new Feature<Double>(fName, fVal));
                    } else if (casF.getRange().getName().equals("uima.cas.Float")) {
                        float fVal = casFs.getFloatValue(casF);
                        newFs.addFeature(new Feature<Double>(fName, (double) fVal));
                    } else {
                        Object fVal = casFs.clone();
                        newFs.addFeature(new Feature<Object>(fName, fVal));
                    }

                } else {
                    logger.debug("Getting FeatureStructure value...");
                    throw new UnsupportedOperationException("This client cannot handle FeatureStructure features");
                }
                if (casFs instanceof Annotation && "coveredText".equals(fName)) {
                    newFs.setCoveredText(((Annotation) casFs).getCoveredText());
                }
            }
            logger.debug("FeatureStructure:" + newFs);
            ret.add(newFs);
        }
        return ret;
    }
}
TOP

Related Classes of org.apache.stanbol.enhancer.engines.uimalocal.UIMALocal

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.