Package org.apache.stanbol.entityhub.indexing.source.jenatdb

Source Code of org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource$RdfEntityIterator

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.entityhub.indexing.source.jenatdb;

import static org.apache.stanbol.entityhub.indexing.source.jenatdb.Constants.DEFAULT_MODEL_DIRECTORY;
import static org.apache.stanbol.entityhub.indexing.source.jenatdb.Constants.PARAM_MODEL_DIRECTORY;
import static org.apache.stanbol.entityhub.indexing.source.jenatdb.Utils.initTDBDataset;

import java.io.File;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;

import org.apache.commons.io.FilenameUtils;
import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
import org.apache.stanbol.entityhub.indexing.core.EntityDataIterable;
import org.apache.stanbol.entityhub.indexing.core.EntityDataIterator;
import org.apache.stanbol.entityhub.indexing.core.EntityDataProvider;
import org.apache.stanbol.entityhub.indexing.core.IndexingComponent;
import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
import org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader;
import org.apache.stanbol.entityhub.indexing.core.source.ResourceState;
import org.apache.stanbol.entityhub.servicesapi.model.Reference;
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
import org.apache.stanbol.entityhub.servicesapi.model.Text;
import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
import org.apache.stanbol.entityhub.servicesapi.util.ModelUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import at.newmedialab.ldpath.api.backend.RDFBackend;

import com.hp.hpl.jena.datatypes.BaseDatatype;
import com.hp.hpl.jena.datatypes.DatatypeFormatException;
import com.hp.hpl.jena.datatypes.RDFDatatype;
import com.hp.hpl.jena.datatypes.xsd.XSDDateTime;
import com.hp.hpl.jena.datatypes.xsd.XSDDuration;
import com.hp.hpl.jena.graph.Node;
import com.hp.hpl.jena.graph.Triple;
import com.hp.hpl.jena.graph.impl.LiteralLabel;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.AnonId;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.sparql.core.Var;
import com.hp.hpl.jena.sparql.engine.binding.Binding;
import com.hp.hpl.jena.tdb.TDBFactory;
import com.hp.hpl.jena.tdb.base.file.Location;
import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
/**
* Implementation of an {@link IndexingComponent} for Entity data that provides
* the possibility to both:<ol>
* <li>randomly access entity data via the {@link EntityDataProvider} interface
* <li>iterate over all entities in this store via the {@link EntityDataIterator}
* interface.
* </ol>
*
* @author Rupert Westenthaler
*
*/
public class RdfIndexingSource extends AbstractTdbBackend implements EntityDataIterable,EntityDataProvider, RDFBackend<Node> {
    /**
     * The Parameter used to configure the source folder(s) relative to the
     * {@link IndexingConfig#getSourceFolder()}. The ',' (comma) is used as
     * separator to parsed multiple sources.
     */
    public static final String PARAM_SOURCE_FILE_OR_FOLDER = "source";
    /**
     * The directory where successfully imported files are copied to
     */
    public static final String PARAM_IMPORTED_FOLDER = "imported";
    /**
     * Allows to enable/disable the indexing of Bnodes (see
     * <a href="https://issues.apache.org/jira/browse/STANBOL-765">STANBOL-765</a>
     * for details).
     */
    private static final String PARAM_BNODE_STATE = "bnode";
    /**
     * If present, this Parameter allows to convert RDF BNodes to dereferable
     * URIs by using {bnode-prefix}{bnode-id} (see
     * <a href="https://issues.apache.org/jira/browse/STANBOL-765">STANBOL-765</a>
     * for details)
     */
    public static final String PARAM_BNODE_PREFIX = "bnode-prefix";
    /**
     * The Parameter that can be used to deactivate the importing of sources.
     * If this parameter is set to <code>false</code> the values configured for
     * {@link #PARAM_IMPORT_SOURCE} are ignored. The default value is
     * <code>true</code>
     */
    public static final String PARAM_IMPORT_SOURCE = "import";
    /**
     * The default directory name used to search for RDF files to be imported
     */
    public static final String DEFAULT_SOURCE_FOLDER_NAME = "rdfdata";
   
    public static final String DEFAULT_IMPORTED_FOLDER_NAME = "imported";
    //protected to allow internal classes direct access (without hidden getter/
    //setter added by the compiler that decrease performance)
    protected final static Logger log = LoggerFactory.getLogger(RdfIndexingSource.class);
   
    /**
     * The RDF data
     */
    private DatasetGraphTDB indexingDataset;
    /**
     * The valueFactory used to create {@link Representation}s, {@link Reference}s
     * and {@link Text} instances.
     */
    private ValueFactory vf;
   
    private ResourceLoader loader;

    protected String bnodePrefix; //protected to allow direct access in inner classes
    /**
     * used for logging a single WARN level entry on the first ignored BNode
     */
    private boolean bnodeIgnored = false;
   
    /**
     * Default Constructor relaying on that {@link #setConfiguration(Map)} is
     * called afterwards to provide the configuration!
     */
    public RdfIndexingSource(){
        this(null);
    }
    /**
     * Internally used to initialise a {@link ValueFactory}
     * @param valueFactory
     */
    private RdfIndexingSource(ValueFactory valueFactory){
        if(valueFactory == null){
            this.vf = InMemoryValueFactory.getInstance();
        } else {
            this.vf = valueFactory;
        }
    }
    /**
     * Constructs an instance based on the provided parameter
     * @param modelLocation the directory for the RDF model. MUST NOT be NULL
     * however the parsed {@link File} needs not to exist.
     * @param sourceFileOrDirectory the source file or directory containing the
     * file(s) to import. Parse <code>null</code> if no RDF files need to be
     * imported
     * @param valueFactory The {@link ValueFactory} used to create instances
     * or <code>null</code> to use the default implementation.
     */
    public RdfIndexingSource(File modelLocation,
                               File sourceFileOrDirectory,
                               ValueFactory valueFactory){
        if(modelLocation == null){
            throw new IllegalArgumentException("The parsed model location MUST NOT be NULL!");
        }
        //init the store
        this.indexingDataset = initTDBDataset(modelLocation);
        //use a ResourceLoader that fails on the first invalid RDF file (STANBOL-328)
        this.loader =  new ResourceLoader(new RdfResourceImporter(indexingDataset), true,true);
        loader.addResource(sourceFileOrDirectory);
    }
    @Override
    public void setConfiguration(Map<String,Object> config) {
        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
        //first init the RDF Model
        this.indexingDataset = Utils.getTDBDataset(config);
        //second we need to check if we need to import RDF files to the RDF model
        //create the ResourceLoader
        this.loader =  new ResourceLoader(new RdfResourceImporter(indexingDataset), true);
       
        Object value = config.get(PARAM_IMPORTED_FOLDER);
        String importedFolderName;
        if(value != null && !value.toString().isEmpty()){
            importedFolderName = value.toString();
        } else {
            importedFolderName = DEFAULT_IMPORTED_FOLDER_NAME;
        }
        File importedFolder = new File(indexingConfig.getSourceFolder(),importedFolderName);
        log.info("Imported RDF File Folder: {}",importedFolder);
        this.loader.setImportedDir(importedFolder);
        //check if importing is deactivated
        boolean importSource = true; //default is true
        value = config.get(PARAM_IMPORT_SOURCE);
        if(value != null){
            importSource = Boolean.parseBoolean(value.toString());
        }
        if(importSource){ // if we need to import ... check the source config
            log.info("Importing RDF data from:");
            value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
            if(value == null){ //if not set use the default
                value = DEFAULT_SOURCE_FOLDER_NAME;
            }
            for(String source : value.toString().split(",")){
                File sourceFileOrDirectory = indexingConfig.getSourceFile(source);
                if(sourceFileOrDirectory.exists()){
                    //register the configured source with the ResourceLoader
                    this.loader.addResource(sourceFileOrDirectory);
                } else {
                    if(FilenameUtils.getExtension(source).isEmpty()){
                        //non existent directory -> create
                        //This is typically the case if this method is called to
                        //initialise the default configuration. So we will try
                        //to create the directory users need to copy the source
                        //RDF files.
                        if(!sourceFileOrDirectory.mkdirs()){
                            log.warn("Unable to create directory {} configured to improt RDF data from. " +
                                "You will need to create this directory manually before copying the" +
                                "RDF files into it.",sourceFileOrDirectory);
                            //this would not be necessary because the directory will
                            //be empty - however I like to be consistent and have
                            //all configured and existent files & dirs added the the
                            //resource loader
                            this.loader.addResource(sourceFileOrDirectory);
                        }
                    } else {
                        log.warn("Unable to find RDF source {} within the indexing Source folder ",source,indexingConfig.getSourceFolder());
                    }
                }
            }
            if(log.isInfoEnabled()){
                for(String registeredSource : loader.getResources(ResourceState.REGISTERED)){
                    log.info(" > "+registeredSource);
                }
            }
        } else {
            log.info("Importing RDF data deactivated by parameer {}={}"+PARAM_IMPORT_SOURCE,value);
        }
        //STANBOL-765: parsed bnode-prefix from parsed configuration.
        value = config.get(PARAM_BNODE_STATE);
        final Boolean bnodeState;
        if(value != null){
            bnodeState = value instanceof Boolean ? (Boolean) value :
                Boolean.parseBoolean(value.toString());
        } else if(config.containsKey(PARAM_BNODE_STATE)){ //support key without value
            bnodeState = true;
        } else {
            bnodeState = null; //undefined
        }
        if(bnodeState == null || bnodeState){ //null or enabled -> consider prefix
            value = config.get(PARAM_BNODE_PREFIX);
            if(value != null){
                try {
                    new URI(value.toString());
                } catch (URISyntaxException e) {
                    throw new IllegalArgumentException("The configured "+PARAM_BNODE_PREFIX+"='"
                        + value.toString() + "' MUST BE a valid URI!");
                }
                bnodePrefix = value.toString();
            } else if(bnodeState != null) { //use default prefix if bnodeState is true
                bnodePrefix = String.format("urn:bnode:%s:",indexingConfig.getName());
            } // else bnodeState == null and no custom prefix -> disable by default
        }
        if(bnodePrefix != null){
            log.info("Indexing of Bnodes enabled (prefix: {}",bnodePrefix);
        } else {
            log.info("Indexing of Bnodes disabled");
           
        }
    }
    @Override
    public boolean needsInitialisation() {
        //if there are resources with the state REGISTERED we need an initialisation
        return !loader.getResources(ResourceState.REGISTERED).isEmpty();
    }
    @Override
    public void initialise(){
        loader.loadResources();
    }
    @Override
    public void close() {
        loader = null;
        indexingDataset.close();
    }
    public void debug(){
        String entityVar = "s";
        String fieldVar = "p";
        String valueVar = "o";
        StringBuilder qb = new StringBuilder();
        qb.append(String.format("SELECT ?%s ?%s ?%s \n",
            entityVar,fieldVar,valueVar)); //for the select
        qb.append("{ \n");
        qb.append(String.format("    ?%s ?%s ?%s . \n",
            entityVar,fieldVar,valueVar)); //for the where
        qb.append("} \n");
        log.debug("EntityDataIterator Query: \n"+qb.toString());
        Query q = QueryFactory.create(qb.toString(), Syntax.syntaxARQ);
        ResultSet rs = QueryExecutionFactory.create(q, indexingDataset.toDataset()).execSelect();
        Var s = Var.alloc(entityVar);
        Var p = Var.alloc(fieldVar);
        Var o = Var.alloc(valueVar);
        while (rs.hasNext()){
            Binding b = rs.nextBinding();
            log.info("{} {} {}",new Object[]{b.get(s),b.get(p),b.get(o)});
        }
    }
   
    @Override
    public EntityDataIterator entityDataIterator() {
        String entityVar = "s";
        String fieldVar = "p";
        String valueVar = "o";
        StringBuilder qb = new StringBuilder();
        qb.append(String.format("SELECT ?%s ?%s ?%s \n",
            entityVar,fieldVar,valueVar)); //for the select
        qb.append("{ \n");
        qb.append(String.format("    ?%s ?%s ?%s . \n",
            entityVar,fieldVar,valueVar)); //for the where
        qb.append("} \n");
        log.debug("EntityDataIterator Query: \n"+qb.toString());
        Query q = QueryFactory.create(qb.toString(), Syntax.syntaxARQ);
        return new RdfEntityIterator(
            QueryExecutionFactory.create(q, indexingDataset.toDataset()).execSelect(),
            entityVar,fieldVar,valueVar);
    }

    @Override
    public Representation getEntityData(String id) {
        final Node resource;
        //STANBOL-765: check if the parsed id represents an bnode
        if(bnodePrefix != null && id.startsWith(bnodePrefix)){
            resource = Node.createAnon(AnonId.create(id.substring(bnodePrefix.length())));
        } else {
            resource = Node.createURI(id);
        }
        Representation source = vf.createRepresentation(id);
        ExtendedIterator<Triple> outgoing = indexingDataset.getDefaultGraph().find(resource, null, null);
        boolean found = outgoing.hasNext();
        while(outgoing.hasNext()){ //iterate over the statements for that resource
            Triple statement = outgoing.next();
            Node predicate = statement.getPredicate();
            if(predicate == null || !predicate.isURI()){
                log.warn("Ignore field {} for resource {} because it is null or not an URI!",
                    predicate,resource);
            } else {
                String field = predicate.getURI();
                Node value = statement.getObject();
                processValue(value, source, field);
            } //end else predicate != null
        } //end iteration over resource triple
        if(found) {
            if(log.isTraceEnabled()){
                log.info("Resource: \n{}", ModelUtils.getRepresentationInfo(source));
            }
            return source;
        } else {
            log.debug("No Statements found for id {} (Node: {})!",id,resource);
            return null;
        }
    }

    /**
     * Processes a {@link Node} and adds the according value to the parsed
     * Representation.
     * @param value The node to convert to an value for the Representation
     * @param source the representation (MUST NOT be <code>null</code>
     * @param field the field (MUST NOT be <code>null</code>)
     */
    private void processValue(Node value, Representation source, String field) {
        if(value == null){
            log.warn("Encountered NULL value for field {} and entity {}",
                    field,source.getId());
        } else if(value.isURI()){ //add a reference
            source.addReference(field, value.getURI());
        } else if(value.isLiteral()){ //add a value or a text depending on the dataType
            LiteralLabel ll = value.getLiteral();
//            log.debug("LL: lexical {} | value {} | dataType {} | language {}",
//                new Object[]{ll.getLexicalForm(),ll.getValue(),ll.getDatatype(),ll.language()});
            //if the dataType == null , than we can expect a plain literal
            RDFDatatype dataType = ll.getDatatype();
            if(dataType != null){ //add a value
                Object literalValue;
                try {
                    literalValue = ll.getValue();
                    if(literalValue instanceof BaseDatatype.TypedValue){
                        //used for unknown data types
                        // -> in such cases just use the lexical type
                        String lexicalValue = ((BaseDatatype.TypedValue)literalValue).lexicalValue;
                        if(lexicalValue != null && !lexicalValue.isEmpty()){
                            source.add(field,lexicalValue);
                        }
                    } else if(literalValue instanceof XSDDateTime) {
                        source.add(field, ((XSDDateTime)literalValue).asCalendar().getTime()); //Entityhub uses the time
                    } else if(literalValue instanceof XSDDuration) {
                        String duration = literalValue.toString();
                        if(duration != null && !duration.isEmpty()) {
                            source.add(field, literalValue.toString());
                        }
                    } else {
                        source.add(field, literalValue);
                    }
                } catch (DatatypeFormatException e) {
                    log.warn(" Unable to convert {} to {} -> use lecicalForm",
                        ll.getLexicalForm(),ll.getDatatype());
                    literalValue = ll.getLexicalForm();
                }
            } else { //add a text
                String lexicalForm = ll.getLexicalForm();
                if(lexicalForm != null && !lexicalForm.isEmpty()){
                    String language = ll.language();
                    if(language!=null && language.length()<1){
                        language = null;
                    }
                    source.addNaturalText(field, lexicalForm, language);
                } //else ignore empty literals
            }
            // "" is parsed if there is no language
        } else if(value.isBlank()) {
            if(bnodePrefix != null) { //STANBOL-765: convert Bnodes to URIs
                StringBuilder sb = new StringBuilder(bnodePrefix);
                sb.append(value.getBlankNodeId().getLabelString());
                source.addReference(field, sb.toString());
            } else {
                logIgnoredBnode(log, source, field, value);
            }
        }  else {
            log.warn("ignoreing value {} for field {} and Resource {} because it is of an unsupported type!",
                    new Object[]{value,field,source.getId()});
        } //end different value node type
    }
    /**
     * Logs that a BNode was ignored (only the first time). Also debugs the
     * ignored triple.
     * @param log the logger to use
     * @param s subject
     * @param p predicate
     * @param o object
     */
    protected void logIgnoredBnode(Logger log, Object s, Object p, Object o) {
        if(!bnodeIgnored){
            bnodeIgnored = true;
            log.warn("The Indexed RDF Data do contain Blank Nodes. Those are "
                + "ignored unless the '{}' parameter is set to valid URI. "
                + "If this parameter is set Bnodes are converted to URIs by "
                + "using {bnode-prefix}{bnodeId} (see STANBOL-765)",
                PARAM_BNODE_PREFIX);
        }
        log.debug("ignoreing blank node value(s) for Triple {},{},{}!",
            new Object[]{s,p,o});
    }
    /**
     * Implementation of the iterator over the entities stored in a
     * {@link RdfIndexingSource}. This Iterator is based on query
     * {@link ResultSet}. It uses the low level SPARQL API because this allows
     * to use the same code to create values for Representations
     * @author Rupert Westenthaler
     *
     */
    public final class RdfEntityIterator implements EntityDataIterator {
        /**
         * Variable used to
         */
        final Var entityVar;
        final Var fieldVar;
        final Var valueVar;
        /**
         * The result set containing all triples in the form of <code>
         * "entity -&gt; field -&gt; value"</code>
         */
        private final ResultSet resultSet;
        /**
         * The {@link Node} representing the current entity or <code>null</code>
         * if the iterator is newly created.<p>
         * {@link Node#isURI()} is guaranteed to return <code>true</code> and
         * {@link Node#getURI()} is guaranteed to return the id for the entity
         */
        private Node currentEntity = null;
        /**
         * The {@link Node} for the next Entity in the iteration or <code>null</code>
         * in case there are no further or the iterator is newly created (in that
         * case {@link #currentEntity} will be also <code>null</code>)<p>
         * {@link Node#isURI()} is guaranteed to return <code>true</code> and
         * {@link Node#getURI()} is guaranteed to return the id for the entity
         */
        private Node nextEntity = null;
        /**
         * The Representation of the current Element. Only available after a
         * call to {@link #getRepresentation()}
         */
        private Representation currentRepresentation = null;
        /**
         * Holds all <code>field,value"</code> pairs of the current Entity.
         * Elements at even positions represent<code>fields</code> and elements
         * at uneven positions represent <code>values</code>.
         */
        private List<Node> data = new ArrayList<Node>();
        /**
         * The next (not consumed) solution of the query.
         */
        private Binding nextBinding = null;
       
        protected RdfEntityIterator(ResultSet resultSet, String entityVar,String fieldVar, String valueVar){
            if(resultSet == null){
                throw new IllegalArgumentException("The parsed ResultSet MUST NOT be NULL!");
            }
            //check if the ResultSet provides the required variables to perform the query
            List<String> vars = resultSet.getResultVars();
            if(!vars.contains(entityVar)){
                throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
                    "Variable \""+entityVar+"\" representing the Entity!");
            } else {
                this.entityVar = Var.alloc(entityVar);
            }
            if(!vars.contains(fieldVar)){
                throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
                        "Variable \""+fieldVar+"\" representing the Field of an Entity!");
            } else {
                this.fieldVar = Var.alloc(fieldVar);
            }
            if(!vars.contains(valueVar)){
                throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
                        "Variable \""+valueVar+"\" representing the Value of a Field of an Entity!");
            } else {
                this.valueVar = Var.alloc(valueVar);
            }
            this.resultSet = resultSet;
            //this will read until the first binding of the first Entity is found
            initFirst();
        }
        private void initFirst(){
            if(currentEntity == null && nextEntity == null){ //only for the first call
                //consume binding until the first valid entity starts
                while(nextEntity == null && resultSet.hasNext()){
                    Binding firstValid = resultSet.nextBinding();
                    Node entityNode = firstValid.get(entityVar);
                    if((entityNode.isURI() && !entityNode.toString().isEmpty()) ||
                            entityNode.isBlank() && bnodePrefix != null){
                      //store it temporarily in nextBinding
                        nextBinding = firstValid;
                        //store it as next (first) entity
                        nextEntity = entityNode;
                    } else {
                        logIgnoredBnode(log,entityNode,firstValid.get(fieldVar),firstValid.get(valueVar));
                    }
                }
            } else {
                throw new IllegalStateException("This Mehtod MUST be only used for Initialisation!");
            }
        }
        @Override
        public void close() {
            data.clear();
            data = null;
            currentEntity = null;
            currentRepresentation = null;
            //Looks like it is not possible to close a resultSet
        }

        @Override
        public Representation getRepresentation() {
            //current Entity will be null if
            //  - next() was never called
            //  - the end of the iteration was reached
            if(currentEntity == null){
                return null;
            } else if(currentRepresentation == null){
                currentRepresentation = createRepresentation();
            }
            return currentRepresentation;
        }

        @Override
        public boolean hasNext() {
            return resultSet.hasNext();
        }

        @Override
        public String next() {
            return getNext();
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException(
                "Removal of Entities is not supported by this Implementation!");
        }
        /**
         * Iterates over all {@link QuerySolution} of the {@link #resultSet}
         * that do have {@link #currentEntity} as
         * {@link RdfIndexingSource#VARIABLE_NAME_ENTITY VARIABLE_NAME_ENTITY}.
         * NOTES: <ul>
         * <li>This method also initialises the {@link #data} and sets the
         * {@link #nextBinding} to the first solution of the next entity.<br>
         * <li>That means also, that it would iterate over additional
         * {@link RdfIndexingSource#VARIABLE_NAME_ENTITY VARIABLE_NAME_ENTITY}
         * values that are not URIResources ( in cases
         * {@link RDFNode#isURIResource()} returns <code>false</code>)
         * <li>This method is also used to initialise the first Entity
         * @return the URI of the current entity
         */
        private String getNext(){
            //check for more elements
            if(!resultSet.hasNext()){
                throw new NoSuchElementException("No more Entities available");
            }
            //clean up data of the previous entity
            this.data.clear(); //remove data of the previous entity
            this.currentRepresentation = null; //and the representation
            this.currentEntity = nextEntity; //set the nextEntity to the current

            //and process the first binding already consumed from the resultSet
            //by calling this method for the previous Entity
            if(nextBinding != null){ //will be null for the first Entity
                processSolution(nextBinding);
            }
            //now get all the other Solutions for the current entity
            boolean next = false;
            while(!next && resultSet.hasNext()){
                Binding binding = resultSet.nextBinding();
                Node entityNode = binding.get(entityVar);
                //NOTES:
                // * for URIs we need to check for empty URIs!
                // * STANBOL-765: added support for BNodes
                if((entityNode.isURI() && !entityNode.toString().isEmpty()) ||
                        entityNode.isBlank() && bnodePrefix != null){
                    if(!entityNode.equals(currentEntity)){
                        //start of next Entity
                        this.nextEntity = entityNode; //store the node for the next entity
                        this.nextBinding = binding; //store the first binding of the next entity
                        //we are done for this entity -> exit the loop
                        next = true;
                    } else {
                        processSolution(binding);
                    }
                } else {
                    logIgnoredBnode(log,entityNode,binding.get(fieldVar),binding.get(valueVar));
                }
            }
            if(!next){ // exit the loop but still no new entity ... that means
                nextEntity = null; //there are no more entities
                nextBinding = null; // and there are also no more solutions
            }
            //STANBOL-765: if current is a Bnode add the bnode-prefix
            return currentEntity.isBlank() ?
                new StringBuilder(bnodePrefix).append(currentEntity.getBlankNodeId().getLabelString()).toString() :
                    currentEntity.getURI();
        }
        /**
         * Processes a {@link Binding} by storing the {@link Node}s for the
         * variables {@link #fieldVar} and {@link #valueVar} to {@link #data}.
         * This method ensures that both values are not <code>null</code> and
         * that the {@link Node} representing the field is an URI (
         * returns <code>true</code> for {@link Node#isURI()}).
         * @param binding the binding to process
         */
        private void processSolution(Binding binding) {
            Node field = binding.get(fieldVar);
            if(field != null && field.isURI()){ //property MUST BE an URI
                Node value = binding.get(valueVar);
                if(value != null){
                    //add the pair
                    data.add(field);
                    data.add(value);
                }
            } else {
                //This may only happen if the Query used to create the ResultSet
                //containing this Solution does not link the variable
                //VARIABLE_NAME_FIELD to properties.
                log.error("Found Field {} for Entity {} that is not an URIResource",field,currentEntity);
            }
        }
        /**
         * Used to create the Representation the first time
         * {@link #getRepresentation()} is called for the current entity. The
         * information for the Representation are already stored in {@link #data}
         */
        private Representation createRepresentation() {
            final String uri;
            if(currentEntity.isBlank()){ //STANBOL-765: support bNodes
                StringBuilder sb = new StringBuilder(bnodePrefix);
                sb.append(currentEntity.getBlankNodeId().getLabelString());
                uri = sb.toString();
            } else {
                uri = currentEntity.getURI();
            }
            Representation representation = vf.createRepresentation(uri);
            Iterator<Node> it = data.iterator();
            while(it.hasNext()){
                //data contains field,value pairs
                //because of that we call two times next for
                String field = it.next().getURI(); //the field
                Node value = it.next();//and the value
                processValue(value, representation, field);
            }
            return representation;
        }
    }
   
    /* ----------------------------------------------------------------------
     *     RDF Backend implementation
     * ----------------------------------------------------------------------
     */
    @Override
    public Collection<Node> listObjects(Node subject, Node property) {
        Collection<Node> nodes = new ArrayList<Node>();
        ExtendedIterator<Triple> it = indexingDataset.getDefaultGraph().find(subject, property, null);
        while(it.hasNext()){
            //STANBOL-765: we need also to transform bnodes to URIs for the
            //RDFBackend implementation
            Node object = it.next().getObject();
            if(bnodePrefix != null && object.isBlank()){
                StringBuilder sb = new StringBuilder(bnodePrefix);
                sb.append(object.getBlankNodeId().getLabelString());
                object = Node.createURI(sb.toString());
            }
            nodes.add(object);
        }
        it.close();
        return nodes;
    }
    @Override
    public Collection<Node> listSubjects(Node property, Node object) {
        Collection<Node> nodes = new ArrayList<Node>();
        ExtendedIterator<Triple> it = indexingDataset.getDefaultGraph().find(null, property, object);
        while(it.hasNext()){
            Node subject = it.next().getSubject();
            //STANBOL-765: we need also to transform bnodes to URIs for the
            //RDFBackend implementation
            if(bnodePrefix != null && subject.isBlank()){
                StringBuilder sb = new StringBuilder(bnodePrefix);
                sb.append(subject.getBlankNodeId().getLabelString());
                subject = Node.createURI(sb.toString());
            }
            nodes.add(subject);
        }
        it.close();
        return nodes;
    }
    /**
     * Since STANBOL-765 BNodes are converted to URIs if a {@link #bnodePrefix}
     * is configured. This also means that one needs to expect calls to the
     * {@link RDFBackend} interface with transformed Nodes. <p>
     * This method ensures that if someone requests an uri {@link Node} for a
     * URI that represents a transformed Bnode (when the URI starts with
     * {@link #bnodePrefix}) that the according bnode {@link Node} is created
     * @param node the node
     * @return
     */
    @Override
    public Node createURI(String uri) {
        if(bnodePrefix != null && uri.startsWith(bnodePrefix)){
            return Node.createAnon(AnonId.create(uri.substring(bnodePrefix.length())));
        } else {
            return super.createURI(uri);
        }
    }
   
}
TOP

Related Classes of org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource$RdfEntityIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.