Package org.deri.grefine.reconcile.rdf.factories

Source Code of org.deri.grefine.reconcile.rdf.factories.PlainSparqlQueryFactory

package org.deri.grefine.reconcile.rdf.factories;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.deri.grefine.reconcile.model.ReconciliationCandidate;
import org.deri.grefine.reconcile.model.ReconciliationRequest;
import org.deri.grefine.reconcile.model.SearchResultItem;
import org.deri.grefine.reconcile.model.ReconciliationRequestContext.PropertyContext;
import org.deri.grefine.reconcile.rdf.factories.JenaTextSparqlQueryFactory.ScoredLabel;
import org.deri.grefine.reconcile.util.StringUtils;
import org.json.JSONException;
import org.json.JSONWriter;

import com.google.common.collect.ImmutableList;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.Resource;

public class PlainSparqlQueryFactory extends AbstractSparqlQueryFactory{

  @Override
  public String getReconciliationSparqlQuery(ReconciliationRequest request, ImmutableList<String> searchPropertyUris) {
    String query = prepareQueryRE(request.getQueryString());
    String typesFilter = getTypesFilter(request.getTypes());
    String contextFilter = getContextFilter(request.getContext().getProperties());
    String fulltextFilter = getFulltextFilter(query, searchPropertyUris);
    String labelClause = getLabelClause(searchPropertyUris.size());
   
    return RECONCILE_QUERY_TEMPLATE.replace("[[LABEL_CLAUSE]]", labelClause)
                    .replace("[[FULLTEXT_SEARCH_FILTER]]",fulltextFilter)
                    .replace("[[TYPE_FILTER]]", typesFilter)
                    .replace("[[CONTEXT_FILTER]]", contextFilter)
                    .replace("[[LIMIT]]", String.valueOf(request.getLimit()));
  }

  @Override
  public List<ReconciliationCandidate> wrapReconciliationResultset(ResultSet resultSet, String queryString, ImmutableList<String> searchPropertyUris, int limit, double matchThreshold) {
    List<ReconciliationCandidate> candidates = new ArrayList<ReconciliationCandidate>();
    boolean match = false;
    boolean moreThanOneMatch = false;
    Set<String> seen = new HashSet<String>();
    while(resultSet.hasNext()){
      QuerySolution solution = resultSet.nextSolution();
      Resource entity = solution.getResource("entity");
      String entityUri = entity.getURI();
      if(seen.contains(entityUri)){
        //already seen
        continue;
      }
      seen.add(entityUri);
      ScoredLabel scoredLabel = getPreferredLabel(solution, queryString, searchPropertyUris);
      if(scoredLabel.score >= matchThreshold){
        if(match){
          moreThanOneMatch = true;
        }else{
          match = true;
        }
      }
      ReconciliationCandidate candidate = new ReconciliationCandidate(entity.getURI(), scoredLabel.label, new String[] {}, scoredLabel.score,match);
     
      candidates.add(candidate);
    }
    if(moreThanOneMatch){
      //set all candidates as match =false
      for(ReconciliationCandidate candidate:candidates){
        candidate.setMatch(false);
      }
    }
    //sort according to score
    Collections.sort(candidates, new Comparator<ReconciliationCandidate>() {

      @Override
      public int compare(ReconciliationCandidate o1,
          ReconciliationCandidate o2) {
        //descending order
        return Double.compare(o2.getScore(), o1.getScore());
      }
      });
    return candidates;
  }

  public String getExactMatchReconciliationSparqlQuery(ReconciliationRequest request, ImmutableList<String> searchPropertyUris){
    String query = prepareQuery(request.getQueryString());
    String typesFilter = getTypesFilter(request.getTypes());
    String contextFilter = getContextFilter(request.getContext().getProperties());
    String fulltextFilter = getExactMatchFulltextFilter(query, searchPropertyUris);
    String labelClause = "";
   
    return RECONCILE_QUERY_TEMPLATE.replace("[[LABEL_CLAUSE]]", labelClause)
                    .replace("[[FULLTEXT_SEARCH_FILTER]]",fulltextFilter)
                    .replace("[[TYPE_FILTER]]", typesFilter)
                    .replace("[[CONTEXT_FILTER]]", contextFilter)
                    .replace("[[LIMIT]]", String.valueOf(request.getLimit()));
  }
 
  @Override
  public String getTypeSuggestSparqlQuery(String prefix, int limit) {
    return SUGGEST_TYPE_QUERY_TEMPLATE.replace("[[QUERY]]", prepareQueryRE(prefix)).replace("[[LIMIT]]", String.valueOf(limit));
  }

 

  @Override
  public ImmutableList<SearchResultItem> wrapTypeSuggestResultSet(ResultSet resultSet, String prefix, int limit) {
    List<SearchResultItem> items = new ArrayList<SearchResultItem>();
    while(resultSet.hasNext()){
      QuerySolution solution = resultSet.nextSolution();
      String type = solution.getResource("type").getURI();
      String label = solution.getLiteral("label").getString();
      double score = StringUtils.getLevenshteinScore(label, prefix);
      items.add(new SearchResultItem(type, label, score));
    }
    Collections.sort(items, new Comparator<SearchResultItem>() {

      @Override
      public int compare(SearchResultItem o1, SearchResultItem o2) {
        //descending order
        return Double.compare(o2.getScore(), o1.getScore());
      }
    });
   
    return ImmutableList.copyOf(items);
  }

  public List<ReconciliationCandidate> wrapResultset(ResultSet resultSet,String queryString, double matchThreshold){
    List<ReconciliationCandidate> candidates = new ArrayList<ReconciliationCandidate>();
    while(resultSet.hasNext()){
      QuerySolution solution = resultSet.nextSolution();
      Resource entity = solution.getResource("entity");
      ReconciliationCandidate candidate = new ReconciliationCandidate(entity.getURI(), queryString, new String[] {}, 1.0d,true);
     
      candidates.add(candidate);
    }
    if(candidates.size() > 1 || matchThreshold > 1 ){
      //set all match =false
      for(ReconciliationCandidate candidate : candidates){
        candidate.setMatch(false);
      }
    }
    return candidates;
  }

  @Override
  public String getPropertySuggestSparqlQuery(String prefix, String typeUri, int limit) {
    return SUGGEST_PROPERTY_WITH_SPECIFIC_SUBJECT_TYPE_QUERY_TEMPLATE.replace("[[QUERY]]", prefix)
                                    .replace("[[TYPE_URI]]", typeUri)
                                    .replace("[[LIMIT]]", String.valueOf(limit));
  }
 
  @Override
  public String getPropertySuggestSparqlQuery(String prefix, int limit) {
    return SUGGEST_PROPERTY_QUERY_TEMPLATE.replace("[[QUERY]]", prefix)
                        .replace("[[LIMIT]]", String.valueOf(limit));
  }
 
  @Override
  public String getEntitySearchSparqlQuery(String prefix,  ImmutableList<String> searchPropertyUris, int limit) {
    String labelClause = getLabelClause(searchPropertyUris.size());
    StringBuilder regexPatternClause = new StringBuilder();
    StringBuilder boundFilterClause = new StringBuilder();
    for(int i =1;i<=searchPropertyUris.size();i++){
      String propUri = searchPropertyUris.get(i-1);
      regexPatternClause.append(REGEX_SEARCH_PATTERN.replaceAll("\\[\\[INDEX\\]\\]", String.valueOf(i))
                        .replace("[[QUERY]]", prefix)
                        .replace("[[PROPERTY_URI]]", propUri));
      boundFilterClause.append(BOUND_LABEL_FILTER.replace("[[INDEX]]", String.valueOf(i)));
    }
    //remove the last additional " ||"
    boundFilterClause.setLength(boundFilterClause.length()-3);
   
    return SEARCH_ENTITY_QUERY_TEMPLATE.replace("[[LABEL_CLAUSE]]", labelClause)
                      .replace("[[REGEX_SEARCH_PATTERN]]", regexPatternClause.toString())
                      .replace("[[BOUND_LABEL_FILTER]]", boundFilterClause.toString())
                      .replace("[[LIMIT]]", String.valueOf(limit));
                 
  }

 
  @Override
  public void write(JSONWriter writer) throws JSONException {
    writer.object();
    writer.key("type"); writer.value("plain");
    writer.endObject();
  }

  /**
   * @param solution
   * @param queryString
   * @param searchPropertyUris
   * @return the best label... which is the one with the best score
   */
  private ScoredLabel getPreferredLabel(QuerySolution solution, String queryString, ImmutableList<String> searchPropertyUris){
    double maxScore = -1d;
    String bestLabel = "";
    for(int i=1; i<=searchPropertyUris.size(); i++){
      Literal label = solution.getLiteral("label" + i);
      double score;
      if(label!=null){
        score = StringUtils.getLevenshteinScore(label.getString(), queryString);
        if(score>maxScore){
          maxScore = score;
          bestLabel = label.getString();
        }
      }
    }
    if(bestLabel.isEmpty()){
      //fail... should never get here as bound restrictions are added to the SPARQL query used
      throw new RuntimeException("could not find label in the resultset for " + queryString);
    }
    return new ScoredLabel(bestLabel, maxScore);
  }
 
  private String getTypesFilter(String[] types){
    if(types==null || types.length==0 || (types.length==1 && types[0].isEmpty())){
      return "";
    }
    String typesFilter = "";
    if(types.length>0){
      if(types.length==1){
        typesFilter = getSimpleTypeFilter(types[0]);
      }else{
        StringBuilder typesFilterBuilder = new StringBuilder();
        typesFilterBuilder.append("{");
        for(int i=0; i<types.length; i++){
          typesFilterBuilder.append(TYPE_FILTER.replace("[[TYPE_URI]]", types[i]));
        }
        //  remove the last additional UNION
        typesFilterBuilder.setLength(typesFilterBuilder.length()-6);
        typesFilterBuilder.append("}");
     
        typesFilter = typesFilterBuilder.toString();
      }
    }
    return typesFilter;
  }
 
  private String getFulltextFilter(String queryString, ImmutableList<String> searchPropertyUris){
    if(searchPropertyUris.size()==1){
      //simple optimization
      return SIMPLE_REGEX_SEARCH_PATTERN.replace("[[QUERY]]", queryString)
                    .replace("[[PROPERTY_URI]]", searchPropertyUris.get(0));
    }
   
    StringBuilder regexPatternClause = new StringBuilder();
    StringBuilder boundFilterClause = new StringBuilder();
    for(int i =1;i<=searchPropertyUris.size();i++){
      String propUri = searchPropertyUris.get(i-1);
      regexPatternClause.append(REGEX_SEARCH_PATTERN.replaceAll("\\[\\[INDEX\\]\\]", String.valueOf(i))
                        .replace("[[QUERY]]", queryString)
                        .replace("[[PROPERTY_URI]]", propUri));
      boundFilterClause.append(BOUND_LABEL_FILTER.replace("[[INDEX]]", String.valueOf(i)));
    }
    //remove the last additional " ||"
    boundFilterClause.setLength(boundFilterClause.length()-3);
   
    return FULLTEXT_SEARCH_FILTER.replace("[[REGEX_SEARCH_PATTERN]]", regexPatternClause.toString())
                          .replace("[[BOUND_LABEL_FILTER]]", boundFilterClause.toString());
  }
 
  private String getExactMatchFulltextFilter(String queryString, ImmutableList<String> searchPropertyUris){
    if(searchPropertyUris.size()==1){
      //simple optimization
      return SIMPLE_EXACT_MATCH_SEARCH_PATTERN.replace("[[QUERY]]", queryString)
                    .replace("[[PROPERTY_URI]]", searchPropertyUris.get(0));
    }
    StringBuilder exactMatchPatternClause = new StringBuilder();
    for(int i =1;i<=searchPropertyUris.size();i++){
      String propUri = searchPropertyUris.get(i-1);
      exactMatchPatternClause.append(EXACT_MATCH_SEARCH_PATTERN.replace("[[QUERY]]", queryString)
                        .replace("[[PROPERTY_URI]]", propUri)
                        .replaceAll("\\[\\[INDEX\\]\\]", String.valueOf(i)));
    }
    //remove the last additional "UNION "
    exactMatchPatternClause.setLength(exactMatchPatternClause.length()-6);
   
    return EXACT_MATCH_FULLTEXT_SEARCH_FILTER.replace("[[EXACT_MATCH_SEARCH_PATTERN]]", exactMatchPatternClause.toString());
  }
 
  private String getLabelClause(int num){
    StringBuilder labelClause = new StringBuilder();
    for(int i=1;i<=num;i++){
      labelClause.append(LABEL).append(i);
    }
    return labelClause.toString();
  }
 
  private String getContextFilter(Set<PropertyContext> properties){
    StringBuilder contextFilter = new StringBuilder();
    for(PropertyContext prop:properties){
      contextFilter.append(
          PROPERTY_FILTER.replace("[[PROPERTY_URI]]", prop.getPid())
                  .replace("[[VALUE]]", prop.getV().asSparqlValue())
          );
    }
    return contextFilter.toString();
  }
 
  private String getSimpleTypeFilter(String typeUri){
    return SIMPLE_TYPE_FILTER.replace("[[TYPE_URI]]", typeUri);
  }
 
  private String prepareQuery(String query){
    return query.replaceAll("'", "\\\\'");
   
  }
 
  private String prepareQueryRE(String query){
    return query.replaceAll("'", "\\\\'").replaceAll("\\?", "\\\\\\\\?").replaceAll("\\.", "\\\\\\\\.").replaceAll("\\(","\\\\\\\\(")
    .replaceAll("\\)","\\\\\\\\)");
   
//    return Pattern.quote(query.replaceAll("'", "\\\\'"));
  }
 
  private static final String RECONCILE_QUERY_TEMPLATE =
    "PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> " +
    "PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> " +
    "SELECT ?entity[[LABEL_CLAUSE]] " +
    "WHERE" +
    "{" +
    "[[FULLTEXT_SEARCH_FILTER]]" +
    "[[TYPE_FILTER]]" +
    "[[CONTEXT_FILTER]]" +
    "FILTER isIRI(?entity). }LIMIT [[LIMIT]]";
  private static final String LABEL = " ?label";
  private static final String REGEX_SEARCH_PATTERN =
    "OPTIONAL{ " +
    "?entity <[[PROPERTY_URI]]> ?label[[INDEX]]. " +
    "FILTER regex(str(?label[[INDEX]]),'[[QUERY]]','i')" +
    "}";
  private static final String FULLTEXT_SEARCH_FILTER =
    "{" +
    "[[REGEX_SEARCH_PATTERN]]" +
    "FILTER ([[BOUND_LABEL_FILTER]])" +
    "}" ;
  private static final String SIMPLE_REGEX_SEARCH_PATTERN =
    "?entity <[[PROPERTY_URI]]> ?label1. " +
    "FILTER regex(str(?label1),'[[QUERY]]','i'). " ;
  private static final String SIMPLE_EXACT_MATCH_SEARCH_PATTERN =
    "?entity <[[PROPERTY_URI]]> ?label. FILTER (str(?label) = '[[QUERY]]'). ";
  private static final String BOUND_LABEL_FILTER = " bound(?label[[INDEX]]) ||";
  private static final String TYPE_FILTER = "{?entity rdf:type <[[TYPE_URI]]>. } UNION ";
  private static final String SIMPLE_TYPE_FILTER = "?entity rdf:type <[[TYPE_URI]]>. ";
 
  private static final String EXACT_MATCH_FULLTEXT_SEARCH_FILTER =
    "{" +
    "[[EXACT_MATCH_SEARCH_PATTERN]]" +
    "}" ;
  private static final String EXACT_MATCH_SEARCH_PATTERN =
    "{ " +
    "?entity <[[PROPERTY_URI]]> ?label[[INDEX]]. FILTER (str(?label[[INDEX]]) = '[[QUERY]]'). " +
    "}UNION ";
  private static final String PROPERTY_FILTER = "?entity <[[PROPERTY_URI]]> [[VALUE]]. ";
 
  private static final String SUGGEST_TYPE_QUERY_TEMPLATE =
      "SELECT DISTINCT ?type ?label " +
      "WHERE{" +
      "[] a ?type. " +
      "?type ?p ?label. " +
      "FILTER (?p=<http://www.w3.org/2000/01/rdf-schema#label> || ?p=<http://www.w3.org/2004/02/skos/core#prefLabel>). " +
      "FILTER regex(str(?label),'^[[QUERY]]','i')" +
      "} LIMIT [[LIMIT]]";
 
  private static final String SUGGEST_PROPERTY_WITH_SPECIFIC_SUBJECT_TYPE_QUERY_TEMPLATE =
       "SELECT DISTINCT ?p ?label " +
       "WHERE{" +
       "[] a <[[TYPE_URI]]>;" +
       "?p ?v." +
       "?p ?label_prop ?label. " +
       "FILTER (?label_prop=<http://www.w3.org/2000/01/rdf-schema#label> || ?label_prop=<http://www.w3.org/2004/02/skos/core#prefLabel>). " +
       "FILTER regex(str(?label),'^[[QUERY]]','i')" +
       "} LIMIT [[LIMIT]]";
  private static final String SUGGEST_PROPERTY_QUERY_TEMPLATE =
     "SELECT DISTINCT ?p ?label " +
     "WHERE{" +
     "[] ?p ?v." +
     "?p ?label_prop ?label. " +
     "FILTER (?label_prop=<http://www.w3.org/2000/01/rdf-schema#label> || ?label_prop=<http://www.w3.org/2004/02/skos/core#prefLabel>). " +
     "FILTER regex(str(?label),'^[[QUERY]]','i')" +
     "} LIMIT [[LIMIT]]";
 
  private static final String SEARCH_ENTITY_QUERY_TEMPLATE =
    "SELECT ?entity[[LABEL_CLAUSE]] " +
    "WHERE" +
    "{" +
    "[[REGEX_SEARCH_PATTERN]]" +
    "FILTER ([[BOUND_LABEL_FILTER]]). " +
    "FILTER isIRI(?entity). }LIMIT [[LIMIT]]";
}
TOP

Related Classes of org.deri.grefine.reconcile.rdf.factories.PlainSparqlQueryFactory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.