package org.deri.grefine.reconcile.rdf.factories;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.deri.grefine.reconcile.model.ReconciliationRequest;
import org.deri.grefine.reconcile.model.SearchResultItem;
import org.deri.grefine.reconcile.model.ReconciliationRequestContext.PropertyContext;
import org.deri.grefine.reconcile.util.StringUtils;
import org.json.JSONException;
import org.json.JSONWriter;
import com.google.common.collect.ImmutableList;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.Literal;
/**
* factories for queries understood by <a href="http://jena.sourceforge.net/ARQ/lucene-arq.html">LARQ</a>
* notice that the queries use SPARQL 1.1 IN function so make sure you have a recent version of ARQ if you are using this class
* This class is not thread-safe. meant to be used once and die, do not try to save or reuse it is cheap to create
* @author fadmaa
*
*/
public class JenaTextSparqlQueryFactory extends AbstractSparqlQueryFactory{
@Override
public String getTypeSuggestSparqlQuery(String prefix, int limit) {
return SUGGEST_TYPE_QUERY_TEMPLATE.replace("[[QUERY]]", escapeQuery(prefix)).replaceAll("\\[\\[LIMIT\\]\\]", String.valueOf(limit));
}
/**
* @param request
* @param searchPropertyUris
* @return sparql query according to the syntax expected by LARQ as described in their <a href="http://jena.sourceforge.net/ARQ/lucene-arq.html">documentation</a>.
* It is a standard SPARQL query apart form pf:textMatch used for full text search. <b>Note that this query uses <a href="http://www.w3.org/TR/2010/WD-sparql11-query-20101014/#func-in">IN function</a> which is only available in SPARQL 1.1</b>
*/
@Override
public String getReconciliationSparqlQuery(ReconciliationRequest request, ImmutableList<String> searchPropertyUris) {
//prepare type filter
String typesFilter = "";
if(request.getTypes().length>0){
typesFilter = StringUtils.join(request.getTypes(), ">. } UNION ", "{?entity rdf:type <", " {", ">. }}");
}
//prepare context filter
StringBuilder contextFilter = new StringBuilder();
for(PropertyContext prop : request.getContext().getProperties()){
contextFilter.append(PROPERTY_FILTER.replace("[[PROPERTY_URI]]", prop.getPid()).replace("[[VALUE]]", prop.getV().asSparqlValue()));
}
if(searchPropertyUris.size()==1){
return getReconciliationSparqlQuery(SINGLE_LABEL_PROPERTY_RECONCILE_QUERY_TEMPLATE, searchPropertyUris, request.getQueryString(),typesFilter, contextFilter.toString(), "[[LABEL_PROPERTY_URI]]", searchPropertyUris.get(0), request.getLimit());
}
//prepare property URIs list (with || as separator)
String labelFilter = StringUtils.join(searchPropertyUris, "> || ", "?p=<", "FILTER (", ">)");
return getReconciliationSparqlQuery(RECONCILE_QUERY_TEMPLATE, searchPropertyUris, request.getQueryString(),typesFilter, contextFilter.toString(), "[[LABEL_PROPERTY_FILTER]]", labelFilter, request.getLimit());
}
@Override
public void write(JSONWriter writer) throws JSONException {
writer.object();
writer.key("type"); writer.value("larq");
writer.endObject();
}
private String getReconciliationSparqlQuery(String queryTemplate, ImmutableList<String> searchPropertyUris, String query, String typesFilter, String contextFilter, String labelPlaceHolder, String labelFilter, int limit){
String escapedQuery = escapeQuery(query);
//the query returns a unique answer per (entity,label) pair. the *maximum* number of results is searchPropertyUris.size() * request.getLimit()
//the answers are ordered according to their scores descendingly. thus we need to pick only the *first* request.getLimit() *unique* entity answer
int calculatedLimit = Math.max(searchPropertyUris.size(),1) * limit;
return queryTemplate.replace("[[QUERY]]", escapedQuery)
.replace(labelPlaceHolder, labelFilter)
.replace(labelPlaceHolder, labelFilter)
.replace("[[TYPE_FILTER]]", typesFilter)
.replace("[[CONTEXT_FILTER]]", contextFilter)
.replace("[[LIMIT]]", String.valueOf(calculatedLimit))
.replace("[[LIMIT]]", String.valueOf(calculatedLimit));
}
@Override
public String getPropertySuggestSparqlQuery(String prefix, String typeUri, int limit) {
return SUGGEST_PROPERTY_WITH_SPECIFIC_SUBJECT_TYPE_QUERY_TEMPLATE.replaceAll("\\[\\[QUERY\\]\\]", escapeQuery(prefix)).
replaceAll("\\[\\[LIMIT\\]\\]", String.valueOf(limit))
.replace("[[TYPE_URI]]", typeUri);
}
@Override
public String getPropertySuggestSparqlQuery(String prefix, int limit) {
return SUGGEST_PROPERTY_QUERY_TEMPLATE.replaceAll("\\[\\[QUERY\\]\\]", prefix).replaceAll("\\[\\[LIMIT\\]\\]", String.valueOf(limit));
}
@Override
public String getSampleInstancesSparqlQuery(String typeUri, ImmutableList<String> searchPropertyUris, int limit) {
return SAMPLE_INSTANCES_OF_TYPE_QUERY_TEMPLATE.replace("[[TYPE_URI]]", typeUri)
.replace("[[PROPERTY_URI]]", searchPropertyUris.get(0))
.replace("[[LIMIT]]", String.valueOf(limit));
}
@Override
public ImmutableList<SearchResultItem> wrapTypeSuggestResultSet(ResultSet resultSet, String prefix, int limit) {
List<SearchResultItem> result = new ArrayList<SearchResultItem>();
while(resultSet.hasNext()){
QuerySolution sol = resultSet.nextSolution();
String pUri = sol.getResource("type").getURI();
String label = getPreferredLabel(sol);
result.add(new SearchResultItem(pUri, label));
}
return ImmutableList.copyOf(result);
}
@Override
public ImmutableList<SearchResultItem> wrapPropertySuggestResultSet(ResultSet resultSet, String prefix, int limit) {
List<SearchResultItem> result = new ArrayList<SearchResultItem>();
while(resultSet.hasNext()){
QuerySolution sol = resultSet.nextSolution();
String pUri = sol.getResource("p").getURI();
String label = getPreferredLabel(sol);
result.add(new SearchResultItem(pUri, label));
}
return ImmutableList.copyOf(result);
}
@Override
public String getEntitySearchSparqlQuery(String prefix, ImmutableList<String> searchPropertyUris, int limit) {
//prepare property URIs list (with || as separator)
String labelFilter = StringUtils.join(searchPropertyUris, "> || ", "?label_prop=<", "FILTER (", ">)");
int calculatedLimit = searchPropertyUris.size() * limit;//because we want the maximum possible number
return SEARCH_ENTITY_QUERY_TEMPLATE.replace("[[QUERY]]", escapeQuery(prefix))
.replace("[[LABEL_PROPERTY_FILTER]]", labelFilter)
.replace("[[LIMIT]]",String.valueOf(calculatedLimit))
.replace("[[LIMIT]]",String.valueOf(calculatedLimit));
}
private String getPreferredLabel(QuerySolution sol){
Literal s1 = sol.getLiteral("score1");
Literal s2 = sol.getLiteral("score2");
if(s1!=null){
if(s2==null){
return sol.getLiteral("label1").getString();
}else{
if(s1.getDouble()>s2.getDouble()){
return sol.getLiteral("label1").getString();
}else{
return sol.getLiteral("label2").getString();
}
}
}else if(s2!=null){
return sol.getLiteral("label2").getString();
}else{
return "";
}
}
private String escapeQuery(String q){
String s = QueryParser.escape(q);
return s.replaceAll("\\\\","\\\\\\\\").replaceAll("'", "\\\\'");
}
/**
* A (String, double) pair
* @author fadmaa
*
*/
protected static class ScoredLabel{
final String label;
final double score;
public ScoredLabel(String label, double score) {
this.label = label;
this.score = score;
}
public double getScore() {
return score;
}
}
private static final String SUGGEST_TYPE_QUERY_TEMPLATE =
"PREFIX text:<http://jena.apache.org/text#> " +
"SELECT DISTINCT ?type ?label1 ?label2 " +
"WHERE{" +
"[] a ?type. " +
"{" +
"OPTIONAL {?type <http://www.w3.org/2000/01/rdf-schema#label> (?label1 '[[QUERY]]*' [[LIMIT]] ) . " +
"?type <http://www.w3.org/2000/01/rdf-schema#label> ?label1 . }" +
"OPTIONAL {?type <http://www.w3.org/2004/02/skos/core#prefLabel> (?label2 '[[QUERY]]*' [[LIMIT]] )." +
"?type <http://www.w3.org/2004/02/skos/core#prefLabel> ?label2.} " +
"FILTER (bound(?label1) || bound(?label2))" +
"}" +
"} LIMIT [[LIMIT]]";
private static final String SUGGEST_PROPERTY_WITH_SPECIFIC_SUBJECT_TYPE_QUERY_TEMPLATE =
"PREFIX text:<http://jena.apache.org/text#> " +
"SELECT DISTINCT ?p ?label1 ?label2 " +
"WHERE{" +
"[] a <[[TYPE_URI]]>; " +
"?p ?v. " +
"{" +
"OPTIONAL {?p <http://www.w3.org/2000/01/rdf-schema#label> (?label1 '[[QUERY]]*' [[LIMIT]]). " +
"?p <http://www.w3.org/2000/01/rdf-schema#label> ?label1. }" +
"OPTIONAL {?p <http://www.w3.org/2004/02/skos/core#prefLabel> (?label2 '[[QUERY]]*' [[LIMIT]]). " +
"?p <http://www.w3.org/2004/02/skos/core#prefLabel> ?label2. }" +
"FILTER (bound(?label1) || bound(?label2))" +
"}" +
"} LIMIT [[LIMIT]]";
private static final String SUGGEST_PROPERTY_QUERY_TEMPLATE =
"PREFIX text:<http://jena.apache.org/text#> " +
"SELECT DISTINCT ?p ?label1 ?label2 " +
"WHERE{" +
"[] ?p ?v. " +
"{" +
"OPTIONAL {?p <http://www.w3.org/2000/01/rdf-schema#label> (?label1 '[[QUERY]]*' [[LIMIT]]). " +
"?p <http://www.w3.org/2000/01/rdf-schema#label> ?label1. }" +
"OPTIONAL {?p <http://www.w3.org/2004/02/skos/core#prefLabel> (?label2 '[[QUERY]]*' [[LIMIT]]). " +
"?p <http://www.w3.org/2004/02/skos/core#prefLabel> ?label2. }" +
"FILTER (bound(?label1) || bound(?label2))" +
"}" +
"} LIMIT [[LIMIT]]";
private static final String RECONCILE_QUERY_TEMPLATE =
"PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> " +
"PREFIX text:<http://jena.apache.org/text#> " +
"PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> "+
"SELECT ?entity ?label " +
"WHERE" +
"{" +
"?entity ?p (?label '[[QUERY]]' [[LIMIT]])." +
"?entity ?p ?label." +
"[[LABEL_PROPERTY_FILTER]]" +
"[[TYPE_FILTER]]" +
"[[CONTEXT_FILTER]]" +
" FILTER (isIRI(?entity))}GROUP BY ?entity ?label " +
"LIMIT [[LIMIT]]";
private static final String SINGLE_LABEL_PROPERTY_RECONCILE_QUERY_TEMPLATE =
"PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> " +
"PREFIX text:<http://jena.apache.org/text#> " +
"PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> "+
"SELECT ?entity ?label " +
"WHERE " +
"{ " +
"?entity text:query (<[[LABEL_PROPERTY_URI]]> '[[QUERY]]' [[LIMIT]]) . " +
"?entity <[[LABEL_PROPERTY_URI]]> ?label ." +
"[[TYPE_FILTER]]" +
"[[CONTEXT_FILTER]]" +
"}GROUP BY ?entity ?label " +
"ORDER BY DESC(?score1) LIMIT [[LIMIT]]";
private static final String PROPERTY_FILTER = "?entity <[[PROPERTY_URI]]> [[VALUE]]. ";
private static final String SAMPLE_INSTANCES_OF_TYPE_QUERY_TEMPLATE =
"SELECT ?entity (SAMPLE(?label) AS ?label1) " +
"WHERE{" +
"?entity a <[[TYPE_URI]]>. " +
"?entity <[[PROPERTY_URI]]> ?label." +
"}GROUP BY ?entity LIMIT [[LIMIT]]";
private static final String SEARCH_ENTITY_QUERY_TEMPLATE =
"PREFIX text:<http://jena.apache.org/text#> " +
"SELECT ?entity ?label " +
"WHERE{" +
"?entity ?label_prop (?label '[[QUERY]]*' [[LIMIT]]) . " +
"?entity ?label_prop ?label . " +
"[[LABEL_PROPERTY_FILTER]]. " +
"} LIMIT [[LIMIT]]";
}