Package org.apache.stanbol.entityhub.jersey.resource.reconcile

Source Code of org.apache.stanbol.entityhub.jersey.resource.reconcile.BaseGoogleRefineReconcileResource

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.entityhub.jersey.resource.reconcile;

import static javax.ws.rs.core.MediaType.TEXT_HTML;
import static org.apache.stanbol.commons.web.base.CorsHelper.addCORSOrigin;
import static org.apache.stanbol.commons.web.base.CorsHelper.enableCORS;
import static org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum.resultScore;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import javax.servlet.ServletContext;
import javax.ws.rs.FormParam;
import javax.ws.rs.GET;
import javax.ws.rs.OPTIONS;
import javax.ws.rs.POST;
import javax.ws.rs.QueryParam;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.ResponseBuilder;

import org.apache.stanbol.commons.namespaceprefix.NamespaceMappingUtils;
import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
import org.apache.stanbol.commons.viewable.Viewable;
import org.apache.stanbol.commons.web.base.ContextHelper;
import org.apache.stanbol.commons.web.base.CorsHelper;
import org.apache.stanbol.commons.web.base.resource.BaseStanbolResource;
import org.apache.stanbol.commons.web.base.utils.MediaTypeUtil;
import org.apache.stanbol.entityhub.jersey.grefine.ReconcileProperty;
import org.apache.stanbol.entityhub.jersey.grefine.ReconcileQuery;
import org.apache.stanbol.entityhub.jersey.grefine.ReconcileValue;
import org.apache.stanbol.entityhub.jersey.grefine.Utils;
import org.apache.stanbol.entityhub.servicesapi.EntityhubException;
import org.apache.stanbol.entityhub.servicesapi.defaults.SpecialFieldEnum;
import org.apache.stanbol.entityhub.servicesapi.model.Reference;
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
import org.apache.stanbol.entityhub.servicesapi.model.Text;
import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
import org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint.MODE;
import org.apache.stanbol.entityhub.servicesapi.site.SiteException;
import org.apache.stanbol.entityhub.servicesapi.util.ModelUtils;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
* Implementation of the <a href="http://code.google.com/p/google-refine/wiki/ReconciliationServiceApi">
* Google Refine Reconciliation API</a>
* This base class is used to support this API for the Entityhub, ReferencedSites
* and the ReferencedSiteManager.
*
* @author Rupert Westenthaler
*
*/
public abstract class BaseGoogleRefineReconcileResource extends BaseStanbolResource {

    private final Logger log = LoggerFactory.getLogger(BaseGoogleRefineReconcileResource.class);

    private static final String NAME_FIELD = "http://www.w3.org/2000/01/rdf-schema#label";
    private static final String TYPE_FIELD = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
    private static final Collection<String> SELECTED_FIELDS = Collections.unmodifiableList(
        Arrays.asList(NAME_FIELD,TYPE_FIELD));

    private static final Comparator<JSONObject> resultScoreComparator = new Comparator<JSONObject>() {

        @Override
        public int compare(JSONObject o1, JSONObject o2) {
            try {
                return Double.compare(o2.getDouble("score"),o1.getDouble("score"));
            } catch (JSONException e) {
                throw new IllegalStateException(e);
            }
        }
       
    };
   
    protected final NamespacePrefixService nsPrefixService;

    protected BaseGoogleRefineReconcileResource(ServletContext context){
        super();
        nsPrefixService = ContextHelper.getServiceFromContext(
            NamespacePrefixService.class, context);
    }
   
    @OPTIONS
    public final Response handleCorsPreflight(@Context HttpHeaders headers){
        ResponseBuilder res = Response.ok();
        enableCORS(servletContext, res, headers);
        return res.build();
    }
   
    @POST
    public final Response queryPOST(@FormParam(value="query") String query,
                          @FormParam(value="queries")String queries,
                          @FormParam(value="callback")String callback,
                          @Context HttpHeaders header) throws WebApplicationException {
        return query(query,queries,callback,header);
    }
    @GET
    public final Response query(@QueryParam(value="query") String query,
                          @QueryParam(value="queries")String queries,
                          @QueryParam(value="callback")String callback,
                          @Context HttpHeaders header) throws WebApplicationException {
        if(callback != null){
            log.info("callback: {}",callback);
            try {
                return sendMetadata(callback,header);
            } catch (JSONException e) {
                throw new WebApplicationException(e);
            }
        }
        JSONObject jResult;
        if(query != null){
            log.debug("query: {}",query);
            try {
                jResult = reconcile(ReconcileQuery.parseQuery(query,nsPrefixService));
            } catch (JSONException e) {
                throw new WebApplicationException(
                    Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(
                        String.format("Error while writing Reconcilation results (%s: %s)",
                            JSONException.class.getSimpleName(),e.getMessage())).build());
            } catch (EntityhubException e) {
                throw new WebApplicationException(
                    Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(
                        String.format("Error while searching on %s (%s: %s)",
                            getSiteName(),SiteException.class.getSimpleName(),e.getMessage())).build());
            }
        } else if(queries != null){
            log.debug("multi-query: {}",queries);
            try {
                jResult = reoncile(ReconcileQuery.parseQueries(queries,nsPrefixService));
            } catch (JSONException e) {
                throw new WebApplicationException(
                    Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(
                        String.format("Error while writing Reconcilation results (%s: %s)",
                            JSONException.class.getSimpleName(),e.getMessage())).build());
            } catch (EntityhubException e) {
                throw new WebApplicationException(
                    Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(
                        String.format("Error while searching on %s (%s: %s)",
                            getSiteName(),SiteException.class.getSimpleName(),e.getMessage())).build());
            }
        } else {
            if(MediaTypeUtil.isAcceptableMediaType(header,MediaType.TEXT_HTML_TYPE)){
                ResponseBuilder rb = Response.ok(new Viewable("index", this));
                rb.header(HttpHeaders.CONTENT_TYPE, TEXT_HTML+"; charset=utf-8");
                addCORSOrigin(servletContext, rb, header);
                return rb.build();
            }
            throw new WebApplicationException(
                Response.status(Response.Status.BAD_REQUEST).entity(
                    "One of the 'query' or 'querues' or 'callback=jsonp' parameter MUST BE present!").build());
        }
        //return the results and enable Cors
        ResponseBuilder rb = Response.ok(jResult.toString()).type(MediaType.APPLICATION_JSON_TYPE);
        CorsHelper.addCORSOrigin(servletContext, rb, header);
        return rb.build();

    }


    private JSONObject reoncile(Map<String,ReconcileQuery> parsedQueries) throws JSONException, EntityhubException {
        JSONObject container = new JSONObject();
        for(Entry<String,ReconcileQuery> query : parsedQueries.entrySet()){
            container.put(query.getKey(), reconcile(query.getValue()));
        }
        return container;
    }

    private JSONObject reconcile(ReconcileQuery rQuery) throws JSONException, EntityhubException {
        FieldQuery query = createFieldQuery();
        query.addSelectedFields(SELECTED_FIELDS);
        addNameConstraint(rQuery, query);
        addTypeConstraint(rQuery, query);
        addPropertyConstraints(rQuery, query);
        query.setLimit(query.getLimit());
        QueryResultList<Representation> results = performQuery(query);
        List<JSONObject> jResultList = new ArrayList<JSONObject>(results.size());
        //we need to know the highest score to normalise between [0..1]
        double maxQueryScore = -1;
        if(!results.isEmpty()){
            for(Representation r : results){
                if(maxQueryScore < 0){
                    maxQueryScore = r.getFirst(resultScore.getUri(),Number.class).doubleValue();
                }
                JSONObject jResult = new JSONObject();
                jResult.put("id", r.getId());
                double similarity = 0.0;
                String name = null; //the name returned for the entity
                for(Iterator<Text> labels = r.getText(NAME_FIELD);labels.hasNext();){
                    Text label = labels.next();
                    if(label.getText().equalsIgnoreCase(rQuery.getQuery())){
                        name = label.getText();
                        similarity = 1.0;
                        break;
                    }
                    double curSimilarity = Utils.levenshtein(rQuery.getQuery(), label.getText());
                    if(similarity < curSimilarity){
                        name = label.getText();
                        similarity = curSimilarity;
                    }
                }
                //set the selected name
                jResult.put("name", name);
                Iterator<Reference> types = r.getReferences(TYPE_FIELD);
                if(types != null && types.hasNext()) {
                    jResult.put("type", new JSONArray(ModelUtils.asCollection(types)));
                }
                double normalisedScore = r.getFirst(resultScore.getUri(),Number.class).doubleValue();
                normalisedScore = normalisedScore*similarity/maxQueryScore;
                jResult.put("score", normalisedScore);
                jResult.put("match", similarity >= 0);
                jResultList.add(jResult);
            }
        } //else no results ... nothing todo
        //sort results based on score
        Collections.sort(jResultList, resultScoreComparator);
        JSONObject jResultContainer = new JSONObject();
        jResultContainer.put("result", new JSONArray(jResultList));
        return jResultContainer;
    }
    /**
     * @param query
     * @return
     * @throws SiteException
     */
    protected abstract QueryResultList<Representation> performQuery(FieldQuery query) throws EntityhubException;
   
    /**
     * Getter for the name of the Site as used for logging
     * @return
     */
    protected abstract String getSiteName();
   
    /**
     * Creates a new FieldQuery
     * @return
     */
    protected abstract FieldQuery createFieldQuery();
   
    /**
     * @param rQuery
     * @param query
     */
    private void addPropertyConstraints(ReconcileQuery rQuery, FieldQuery query) {
        Collection<String> ids = new HashSet<String>();
        List<String> texts = new ArrayList<String>(); // keep order for texts
        Collection<Object> values = new HashSet<Object>();
       
        //hold all references for @references special property
        HashSet<String> references = new HashSet<String>();
        //holds all texts for @fullText special property
        List<String> fullText = new ArrayList<String>();
        //holds the context for the @similarity special property
        StringBuilder similarityContext = new StringBuilder();
        //the field used for the @similarity special property
        HashSet<String> similarityFields = new LinkedHashSet<String>();
       
        for (Entry<ReconcileProperty,Collection<ReconcileValue>> propertyEntry : rQuery.getProperties()) {
            ReconcileProperty property = propertyEntry.getKey();
            // collect the properties
            for (ReconcileValue value : propertyEntry.getValue()) {
                if (value.getId() != null) {
                    ids.add(value.getId());
                }
                if (value.getValue() instanceof String) {
                    texts.add((String) value.getValue());
                } else {
                    values.add(value.getValue());
                }
            }
            //handle supported special properties
            if(property.isSpecial()){
                if(property.getName().equalsIgnoreCase("references")){
                    //Note that multiple "references" properties might be present
                    //if Users do parse parameters - so we need to collect all values
                    if(property.getParameter() != null){
                        log.warn("parameters are not supported for @references -> ignore '{}'",property.getParameter());
                    }
                    if(ids.isEmpty()){
                        log.warn("No URI values present for parsed @references property! (values: "
                            +propertyEntry.getValue());
                    }
                    for(String id : ids){
                        references.add(id);
                    }
                } else if(property.getName().equalsIgnoreCase("fulltext")){
                    //Note that multiple "fullText" properties might be present
                    //if Users do parse parameters - so we need to collect all values
                    if(property.getParameter() != null){
                        log.warn("parameters are not supported for @fullText -> ignore '{}'",property.getParameter());
                    }
                    for(String text : texts){ //add the values
                        fullText.add(text);
                    }
                } else if(property.getName().equalsIgnoreCase("similarity")){
                    String propUri = property.getParameter() != null ?
                            nsPrefixService.getFullName(property.getParameter()) :
                                SpecialFieldEnum.fullText.getUri();
                    if(propUri != null){
                        similarityFields.add(propUri);
                    } else {
                        //TODO: maybe throw an Exception instead
                        log.warn("Unknown prefix '{}' used by Google Refine query parameter of property '{}'! "
                            + "Will use the full text field as fallback",
                            NamespaceMappingUtils.getPrefix(property.getParameter()),property);
                        similarityFields.add(SpecialFieldEnum.fullText.getUri());
                    }
                    for(String text : texts){ //Append the text values to the context
                        similarityContext.append(text).append(' ');
                    }
                } else {
                    //TODO: implement LDPATH support
                    log.warn("ignore unsupported special property {}",property);
                }
            } else { //no special property
                // add the Constraint to the FieldQuery
                // TODO: how to deal with values of different types
                //  * currently References > Text > Datatype. First present value
                //    is used
                //  * non Reference | Text | Datatype values are ignored
                if (!ids.isEmpty()) {
                    // only references -> create reference constraint
                    query.setConstraint(property.getName(), new ReferenceConstraint(ids));
                    if (ids.size() != propertyEntry.getValue().size()) {
                        log.info("Only some of the parsed values of the field {} contain"
                                 + "references -> will ignore values with missing references");
                    }
                } else if (!texts.isEmpty()) {
                    // NOTE: This will use OR over all texts. To enforce AND one
                    // would need to parse a single string with all values e.g. by
                    // using StringUtils.join(texts," ")
                    query.setConstraint(property.getName(), new TextConstraint(texts));
                    if (ids.size() != propertyEntry.getValue().size()) {
                        log.info("Only some of the parsed values of the field {} are"
                                 + "of type String -> will ignore non-string values");
                    }
                } else if(!values.isEmpty()){
                    query.setConstraint(property.getName(), new ValueConstraint(values));
                } //else no values ... ignore property
            }
            //clean up
            ids.clear();
            texts.clear();
            values.clear();
        }
        //now add constraints for the collected special properties
        if(!references.isEmpty()){
            //add references constraint
            ReferenceConstraint refConstraint = new ReferenceConstraint(references, MODE.all);
            query.setConstraint(SpecialFieldEnum.references.getUri(), refConstraint);
        }
        if(!fullText.isEmpty()){
            TextConstraint textConstraint = new TextConstraint(fullText);
            query.setConstraint(SpecialFieldEnum.fullText.getUri(), textConstraint);
            //add full text constraint
        }
        if(similarityContext.length() > 0 && !similarityFields.isEmpty()){
            //add similarity constraint
            Iterator<String> fieldIt = similarityFields.iterator();
            String field = fieldIt.next();
            SimilarityConstraint simConstraint;
            if(fieldIt.hasNext()){
                List<String> addFields = new ArrayList<String>(similarityFields.size()-1);
                while(fieldIt.hasNext()){
                    addFields.add(fieldIt.next());
                }
                simConstraint = new SimilarityConstraint(similarityContext.toString(),addFields);
            } else {
                simConstraint = new SimilarityConstraint(similarityContext.toString());
            }
            query.setConstraint(field, simConstraint);
        }
    }
   
   
    /**
     * @param rQuery
     * @param query
     */
    private void addTypeConstraint(ReconcileQuery rQuery, FieldQuery query) {
        //maybe an other column was also mapped to the TYPE_FIELD property
        Collection<ReconcileValue> additionalTypes = rQuery.removeProperty(TYPE_FIELD);
        Set<String> queryTypes = rQuery.getTypes();
        Set<String> types = null;
        if(additionalTypes == null){
            if(queryTypes != null){
                types = queryTypes;
            }
        } else {
            types = new HashSet<String>();
            if(queryTypes != null){
                types.add(rQuery.getQuery());
            }
            for(ReconcileValue value : additionalTypes){
                if(value != null){
                    if(value.getId() != null){
                        types.add(value.getId());
                    } else if (value.getValue() instanceof String){
                        //TODO: check if the assumption that String values are
                        //good for types is valid
                        types.add((String)value.getValue());
                    }
                } //else null -> ignore
            }
        }
        if (!types.isEmpty()) {
            query.setConstraint(TYPE_FIELD, new ReferenceConstraint(types));
        }
    }
    /**
     * @param rQuery
     * @param query
     */
    private void addNameConstraint(ReconcileQuery rQuery, FieldQuery query) {
        //maybe an other column was also mapped to the NAME_FIELD property
        Collection<ReconcileValue> additionalValues = rQuery.removeProperty(NAME_FIELD);
        List<String> values;
        if(additionalValues == null){
            values = Collections.singletonList(rQuery.getQuery());
        } else {
            values = new ArrayList<String>(additionalValues.size()+1);
            values.add(rQuery.getQuery());
            for(ReconcileValue value : additionalValues){
                if(value != null && value.getValue() instanceof String){
                    values.add((String)value.getValue());
                }
            }
        }
        query.setConstraint(NAME_FIELD, new TextConstraint(values));
    }
    /**
     * Called on requests for the Metadata for the Reconciliation service
     * @param callback
     * @param header
     * @return
     * @throws JSONException
     */
    protected Response sendMetadata(String callback, HttpHeaders header) throws JSONException {
        //TODO: implement!!
        JSONObject jMetadata = new JSONObject();
        jMetadata.put("name", "Stanbol Entityhub: "+getSiteName());
        StringBuilder callbackString = new StringBuilder(callback);
        callbackString.append('(');
        callbackString.append(jMetadata.toString());
        callbackString.append(')');
        ResponseBuilder rb = Response.ok(callbackString.toString()).type(MediaType.APPLICATION_JSON_TYPE);
        CorsHelper.addCORSOrigin(servletContext, rb, header);
        return rb.build();
    }
}
TOP

Related Classes of org.apache.stanbol.entityhub.jersey.resource.reconcile.BaseGoogleRefineReconcileResource

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.