Package org.deri.grefine.reconcile.rdf.factories

Source Code of org.deri.grefine.reconcile.rdf.factories.JenaTextSparqlQueryFactory

package org.deri.grefine.reconcile.rdf.factories;

import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.queryparser.classic.QueryParser;
import org.deri.grefine.reconcile.model.ReconciliationRequest;
import org.deri.grefine.reconcile.model.SearchResultItem;
import org.deri.grefine.reconcile.model.ReconciliationRequestContext.PropertyContext;
import org.deri.grefine.reconcile.util.StringUtils;
import org.json.JSONException;
import org.json.JSONWriter;

import com.google.common.collect.ImmutableList;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.Literal;

/**
* factories for queries understood by <a href="http://jena.sourceforge.net/ARQ/lucene-arq.html">LARQ</a>
* notice that the queries use SPARQL 1.1 IN function so make sure you have a recent version of ARQ if you are using this class
* This class is not thread-safe. meant to be used once and die, do not try to save or reuse it is cheap to create
* @author fadmaa
*
*/
public class JenaTextSparqlQueryFactory extends AbstractSparqlQueryFactory{

  @Override
  public String getTypeSuggestSparqlQuery(String prefix, int limit) {
    return SUGGEST_TYPE_QUERY_TEMPLATE.replace("[[QUERY]]", escapeQuery(prefix)).replaceAll("\\[\\[LIMIT\\]\\]", String.valueOf(limit));
  }
 
  /**
   * @param request
   * @param searchPropertyUris
   * @return sparql query according to the syntax expected by LARQ as described in their <a href="http://jena.sourceforge.net/ARQ/lucene-arq.html">documentation</a>.
   *   It is a standard SPARQL query apart form pf:textMatch used for full text search. <b>Note that this query uses <a href="http://www.w3.org/TR/2010/WD-sparql11-query-20101014/#func-in">IN function</a> which is only available in SPARQL 1.1</b>
   */
  @Override
  public String getReconciliationSparqlQuery(ReconciliationRequest request, ImmutableList<String> searchPropertyUris) {
    //prepare type filter
    String typesFilter = "";
    if(request.getTypes().length>0){
      typesFilter = StringUtils.join(request.getTypes(), ">. } UNION ", "{?entity rdf:type <", " {", ">. }}");
    }
    //prepare context filter
    StringBuilder contextFilter = new StringBuilder();
    for(PropertyContext prop : request.getContext().getProperties()){
      contextFilter.append(PROPERTY_FILTER.replace("[[PROPERTY_URI]]", prop.getPid()).replace("[[VALUE]]", prop.getV().asSparqlValue()));
    }
    if(searchPropertyUris.size()==1){
      return getReconciliationSparqlQuery(SINGLE_LABEL_PROPERTY_RECONCILE_QUERY_TEMPLATE, searchPropertyUris, request.getQueryString(),typesFilter, contextFilter.toString(), "[[LABEL_PROPERTY_URI]]", searchPropertyUris.get(0), request.getLimit());
    }
    //prepare property URIs list (with || as separator)
    String labelFilter = StringUtils.join(searchPropertyUris, "> || ", "?p=<", "FILTER (", ">)");

    return getReconciliationSparqlQuery(RECONCILE_QUERY_TEMPLATE, searchPropertyUris, request.getQueryString(),typesFilter, contextFilter.toString(), "[[LABEL_PROPERTY_FILTER]]", labelFilter, request.getLimit());
 
 
  @Override
  public void write(JSONWriter writer) throws JSONException {
    writer.object();
    writer.key("type");  writer.value("larq");
    writer.endObject();
  }
 
  private String getReconciliationSparqlQuery(String queryTemplate, ImmutableList<String> searchPropertyUris, String query, String typesFilter, String contextFilter, String labelPlaceHolder, String labelFilter, int limit){
    String escapedQuery = escapeQuery(query);
    //the query returns a unique answer per (entity,label) pair. the *maximum* number of results is searchPropertyUris.size() * request.getLimit()
    //the answers are ordered according to their scores descendingly. thus we need to pick only the *first* request.getLimit() *unique* entity answer
    int calculatedLimit = Math.max(searchPropertyUris.size(),1) * limit;
    return queryTemplate.replace("[[QUERY]]", escapedQuery)
            .replace(labelPlaceHolder, labelFilter)
            .replace(labelPlaceHolder, labelFilter)
            .replace("[[TYPE_FILTER]]", typesFilter)
            .replace("[[CONTEXT_FILTER]]", contextFilter)
            .replace("[[LIMIT]]", String.valueOf(calculatedLimit))
            .replace("[[LIMIT]]", String.valueOf(calculatedLimit));
   
  }
 
  @Override
  public String getPropertySuggestSparqlQuery(String prefix, String typeUri, int limit) {
    return SUGGEST_PROPERTY_WITH_SPECIFIC_SUBJECT_TYPE_QUERY_TEMPLATE.replaceAll("\\[\\[QUERY\\]\\]", escapeQuery(prefix)).
                    replaceAll("\\[\\[LIMIT\\]\\]", String.valueOf(limit))
                    .replace("[[TYPE_URI]]", typeUri);
  }
 
  @Override
  public String getPropertySuggestSparqlQuery(String prefix, int limit) {
    return SUGGEST_PROPERTY_QUERY_TEMPLATE.replaceAll("\\[\\[QUERY\\]\\]", prefix).replaceAll("\\[\\[LIMIT\\]\\]", String.valueOf(limit));
  }
 
  @Override
  public String getSampleInstancesSparqlQuery(String typeUri, ImmutableList<String> searchPropertyUris, int limit) {
    return SAMPLE_INSTANCES_OF_TYPE_QUERY_TEMPLATE.replace("[[TYPE_URI]]", typeUri)
                          .replace("[[PROPERTY_URI]]", searchPropertyUris.get(0))
                          .replace("[[LIMIT]]", String.valueOf(limit));
  }

 
  @Override
  public ImmutableList<SearchResultItem> wrapTypeSuggestResultSet(ResultSet resultSet, String prefix, int limit) {
    List<SearchResultItem> result = new ArrayList<SearchResultItem>();
    while(resultSet.hasNext()){
      QuerySolution sol = resultSet.nextSolution();
      String pUri = sol.getResource("type").getURI();
      String label = getPreferredLabel(sol);
      result.add(new SearchResultItem(pUri, label));
    }
    return ImmutableList.copyOf(result);
  }

  @Override
  public ImmutableList<SearchResultItem> wrapPropertySuggestResultSet(ResultSet resultSet, String prefix, int limit) {
    List<SearchResultItem> result = new ArrayList<SearchResultItem>();
    while(resultSet.hasNext()){
      QuerySolution sol = resultSet.nextSolution();
      String pUri = sol.getResource("p").getURI();
      String label = getPreferredLabel(sol);
      result.add(new SearchResultItem(pUri, label));
    }
    return ImmutableList.copyOf(result);
  }

  @Override
  public String getEntitySearchSparqlQuery(String prefix, ImmutableList<String> searchPropertyUris, int limit) {
    //prepare property URIs list (with || as separator)
    String labelFilter = StringUtils.join(searchPropertyUris, "> || ", "?label_prop=<", "FILTER (", ">)");
    int calculatedLimit = searchPropertyUris.size() * limit;//because we want the maximum possible number
    return SEARCH_ENTITY_QUERY_TEMPLATE.replace("[[QUERY]]", escapeQuery(prefix))
                      .replace("[[LABEL_PROPERTY_FILTER]]", labelFilter)
                      .replace("[[LIMIT]]",String.valueOf(calculatedLimit))
                      .replace("[[LIMIT]]",String.valueOf(calculatedLimit));
  }
 
  private String getPreferredLabel(QuerySolution sol){
    Literal s1 = sol.getLiteral("score1");
    Literal s2 = sol.getLiteral("score2");
    if(s1!=null){
      if(s2==null){
        return sol.getLiteral("label1").getString();
      }else{
        if(s1.getDouble()>s2.getDouble()){
          return sol.getLiteral("label1").getString();
        }else{
          return sol.getLiteral("label2").getString();
        }
      }
    }else if(s2!=null){
      return sol.getLiteral("label2").getString();
    }else{
      return "";
    }
  }

  private String escapeQuery(String q){
    String s = QueryParser.escape(q);
    return s.replaceAll("\\\\","\\\\\\\\").replaceAll("'", "\\\\'");
  }
 
  /**
   * A (String, double) pair
   * @author fadmaa
   *
   */
  protected static class ScoredLabel{
    final String label;
    final double score;
    public ScoredLabel(String label, double score) {
      this.label = label;
      this.score = score;
    }
    public double getScore() {
      return score;
    }
   
  }
 
  private static final String SUGGEST_TYPE_QUERY_TEMPLATE =
      "PREFIX text:<http://jena.apache.org/text#> " +
        "SELECT DISTINCT ?type ?label1 ?label2  " +
        "WHERE{" +
        "[] a ?type. " +
        "{" +
        "OPTIONAL {?type <http://www.w3.org/2000/01/rdf-schema#label> (?label1  '[[QUERY]]*' [[LIMIT]] ) . " +
        "?type <http://www.w3.org/2000/01/rdf-schema#label>  ?label1 . }" +
        "OPTIONAL {?type <http://www.w3.org/2004/02/skos/core#prefLabel> (?label2 '[[QUERY]]*' [[LIMIT]] )." +
        "?type <http://www.w3.org/2004/02/skos/core#prefLabel> ?label2.} " +
        "FILTER (bound(?label1) || bound(?label2))" +
        "}" +
        "} LIMIT [[LIMIT]]";
 
  private static final String SUGGEST_PROPERTY_WITH_SPECIFIC_SUBJECT_TYPE_QUERY_TEMPLATE =
        "PREFIX text:<http://jena.apache.org/text#> " +
        "SELECT DISTINCT ?p ?label1  ?label2 " +
        "WHERE{" +
        "[] a <[[TYPE_URI]]>; " +
        "?p ?v. " +
        "{" +
        "OPTIONAL {?p <http://www.w3.org/2000/01/rdf-schema#label> (?label1 '[[QUERY]]*' [[LIMIT]]). " +
        "?p <http://www.w3.org/2000/01/rdf-schema#label> ?label1. }" +
        "OPTIONAL {?p <http://www.w3.org/2004/02/skos/core#prefLabel> (?label2 '[[QUERY]]*' [[LIMIT]]). " +
        "?p <http://www.w3.org/2004/02/skos/core#prefLabel> ?label2. }" +
        "FILTER (bound(?label1) || bound(?label2))" +
        "}" +
        "} LIMIT [[LIMIT]]";
 
  private static final String SUGGEST_PROPERTY_QUERY_TEMPLATE =
    "PREFIX text:<http://jena.apache.org/text#> " +
    "SELECT DISTINCT ?p ?label1 ?label2 " +
    "WHERE{" +
    "[] ?p ?v. " +
    "{" +
    "OPTIONAL {?p <http://www.w3.org/2000/01/rdf-schema#label> (?label1 '[[QUERY]]*'  [[LIMIT]]). " +
    "?p <http://www.w3.org/2000/01/rdf-schema#label> ?label1. }" +
    "OPTIONAL {?p <http://www.w3.org/2004/02/skos/core#prefLabel> (?label2 '[[QUERY]]*' [[LIMIT]]). " +
    "?p <http://www.w3.org/2004/02/skos/core#prefLabel> ?label2. }" +
    "FILTER (bound(?label1) || bound(?label2))" +
    "}" +
    "} LIMIT [[LIMIT]]";
 
  private static final String RECONCILE_QUERY_TEMPLATE =
        "PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> " +
        "PREFIX text:<http://jena.apache.org/text#> " +
        "PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
        "SELECT ?entity ?label " +
        "WHERE" +
        "{" +
          "?entity ?p (?label '[[QUERY]]' [[LIMIT]])." +
          "?entity ?p ?label." +
          "[[LABEL_PROPERTY_FILTER]]" +
          "[[TYPE_FILTER]]" +
          "[[CONTEXT_FILTER]]" +
        " FILTER (isIRI(?entity))}GROUP BY ?entity ?label " +
        "LIMIT [[LIMIT]]";
  private static final String SINGLE_LABEL_PROPERTY_RECONCILE_QUERY_TEMPLATE =
    "PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> " +
    "PREFIX text:<http://jena.apache.org/text#> " +
    "PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
    "SELECT ?entity ?label " +
    "WHERE " +
    "{ " +
      "?entity text:query (<[[LABEL_PROPERTY_URI]]> '[[QUERY]]' [[LIMIT]]) . " +
      "?entity <[[LABEL_PROPERTY_URI]]> ?label ." +
      "[[TYPE_FILTER]]" +
      "[[CONTEXT_FILTER]]" +
    "}GROUP BY ?entity ?label " +
    "ORDER BY DESC(?score1) LIMIT [[LIMIT]]";
  private static final String PROPERTY_FILTER = "?entity <[[PROPERTY_URI]]> [[VALUE]]. ";
 
  private static final String SAMPLE_INSTANCES_OF_TYPE_QUERY_TEMPLATE =
      "SELECT ?entity (SAMPLE(?label) AS ?label1) " +
      "WHERE{" +
      "?entity a <[[TYPE_URI]]>. " +
      "?entity <[[PROPERTY_URI]]> ?label." +
      "}GROUP BY ?entity LIMIT [[LIMIT]]";
 
  private static final String SEARCH_ENTITY_QUERY_TEMPLATE =
      "PREFIX text:<http://jena.apache.org/text#> " +
      "SELECT ?entity ?label " +
      "WHERE{" +
      "?entity ?label_prop (?label '[[QUERY]]*' [[LIMIT]]) . " +
      "?entity ?label_prop ?label . " +
      "[[LABEL_PROPERTY_FILTER]]. " +
      "} LIMIT [[LIMIT]]";
}
TOP

Related Classes of org.deri.grefine.reconcile.rdf.factories.JenaTextSparqlQueryFactory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.