Package org.apache.clerezza.rdf.cris

Source Code of org.apache.clerezza.rdf.cris.GraphIndexer$OptimizationTask

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.clerezza.rdf.cris;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.event.FilterTriple;
import org.apache.clerezza.rdf.core.event.GraphEvent;
import org.apache.clerezza.rdf.core.event.GraphListener;
import org.apache.clerezza.rdf.cris.ontologies.CRIS;
import org.apache.clerezza.rdf.ontologies.RDF;
import org.apache.clerezza.rdf.utils.GraphNode;
import org.apache.clerezza.rdf.utils.RdfList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Creates an index of RDF resources and provides an interface to
* search for indexed resources.
*
* @author reto, tio, daniel
*/
public class GraphIndexer extends ResourceFinder {
 
  /**
   * Default value for {@code maxhits}.
   */
  public static final int DEFAULT_MAXHITS = 100000;
 
  private final Logger logger = LoggerFactory.getLogger(getClass());
 
  /**
   * Prefix for stored Lucene fields.
   */
  static final String SORT_PREFIX = "_STORED_";
 
  /**
   * Field name for the resource field in Lucene.
   */
  static final String URI_FIELD_NAME = "resource-uri";
 
  /**
   * Handler for asynchronous indexing.
   */
  ReindexThread reindexer;
 
  private TripleCollection definitionGraph;
  private TripleCollection baseGraph;
  private int maxHits;
  private Analyzer analyzer;
  private LuceneTools luceneTools;
  private Map<UriRef, Set<VirtualProperty>> type2IndexedProperties = null;
  private Map<VirtualProperty, Set<UriRef>> property2TypeMap = new HashMap<VirtualProperty, Set<UriRef>>();
  private Map<UriRef, Set<VirtualProperty>> property2IncludingVProperty = new HashMap<UriRef, Set<VirtualProperty>>();
  private final GraphListener typeChangeListener;
  private final GraphListener indexedPropertyChangeListener;
  private Map<SortFieldArrayWrapper, Sort> sortCache = new HashMap<SortFieldArrayWrapper, Sort>();
  private Timer timer = new Timer();
  private final OptimizationTask optimizationTask = new OptimizationTask();
 
  /**
   * Allows to schedule optimizations using a Timer.
   *
   * NOTE: not for public access as this functionality is likely to be moved
   * into a stand-alone service.
   */
  private class OptimizationTask extends TimerTask {
    @Override
    public void run() {
      optimizeIndex();
    }
  }
 
  /**
   * When resources are (re)-indexed,
   * this thread updates the Lucene index asynchronously.
   */
  class ReindexThread extends Thread {

    private final long resourceCacheCapacity;
    private final long stableThreshold;
    private final String name;
    private final Set<Resource> resourcesToProcess;
    private final Lock lock = new ReentrantLock(true);
    private final java.util.concurrent.locks.Condition indexResources =
        lock.newCondition();
    private long counter;
    private boolean stop;
    private boolean resourcesClean;

    /**
     * Constructs a new thread with specified name and indexing threshold.
     * Setting the name may be useful for distinguishing logging output when
     * multiple instances of GraphIndexer are running.
     *
     * {@code stableThreshold} specifies a waiting period before the
     * indexing starts. The timer is restarted if more resources are added
     * within {@code stableThreshold} nanoseconds. A high value means the
     * thread will wait a long time before indexing resources added using
     * {@link addResource(Resource resource)}. A short value means new
     * resources are added to the index quickly. Configure this value such
     * that when adding many new resources in a short time these are
     * gathered and indexed at once.
     *
     * @param name  the thread name (used in logging output).
     * @param stableThreshold
     *    If no new resource has been added for {@code stableThreshold}
     *    nanoseconds and there are cached unindexed resources, then
     *    indexing starts.
     * @param resourceCacheCapacity 
     *    How many resources will be cached maximally before indexing.
     *    A negative number means infinite.
     */
    ReindexThread(String name, long stableThreshold, long resourceCacheCapacity) {
      this.resourceCacheCapacity = resourceCacheCapacity;
      this.stableThreshold = stableThreshold;
      this.name = name;
      this.resourcesToProcess = new HashSet<Resource>();
      this.resourcesClean = true;
    }
   
    /**
     * Constructs a new thread with specified indexing threshold.
     *
     * @code stableThreshold} specifies a waiting period before the
     * indexing starts. The timer is restarted if more resources are added
     * within {@code stableThreshold} nanoseconds. A high value means the
     * thread will wait a long time before indexing resources added using
     * {@link addResource(Resource resource)}. A short value means new
     * resources are added to the index quickly. Configure this value such
     * that when adding many new resources in a short time these are
     * gathered and indexed at once.
     *
     * @param stableThreshold
     *    If no new resource has been added for {@code stableThreshold}
     *    nanoseconds and there are cached unindexed resources, then
     *    indexing starts.
     * @param resourceCacheCapacity 
     *    How many resources will be cached maximally before indexing.
     *    A negative number means infinite.
     */
    ReindexThread(long stableThreshold, long resourceCacheCapacity) {
      this(null, stableThreshold, resourceCacheCapacity);
    }
   

    /**
     * Request the termination of this thread. The thread will finish its
     * current operations before it terminates.
     */
    void stopThread() {
      stop = true;
      lock.lock();
      try {
        indexResources.signal();
      } finally {
        lock.unlock();
      }
    }

    @Override
    public void run() {
      if(name == null) {
        setName("CRIS Reindex Thread[" + getId() + "]");
      } else {
        setName(name);
      }
      stop = false;
      counter = 0;
      Set<Resource> set;
      logger.info("{} started.", getName());
      while (true) {
        try {
          lock.lock();
          try {
            waitForDirty();
            if (stop) {
              break;
            }
            logger.debug("{}: registered write - waiting for more writes to follow.", getName());
            waitUntilStable();
            set = new HashSet<Resource>(resourcesToProcess);
            resourcesToProcess.clear();
            counter = 0;
            resourcesClean = true;
          } finally {
            lock.unlock();
          }
          logger.info("{}: cache full or writes have ceased. Indexing...", getName());
          for (Resource resource : set) {
            indexResource(resource, luceneTools.getIndexWriter());
          }
          luceneTools.commitChanges();
        } catch (InterruptedException ex) {
          logger.warn("{}: interrupted: {}.", getName(), ex);
        }
      }
      logger.info("{} stopped.", getName());
    }
   
    private void waitUntilStable() throws InterruptedException {
      while (!resourcesClean) {
        resourcesClean = true;
        indexResources.awaitNanos(stableThreshold);
        if(resourceCacheCapacity >= 0 && ++counter > resourceCacheCapacity) {
          break;
        }
      }
    }

    private void waitForDirty() throws InterruptedException {
      while (resourcesClean && !stop) {
        indexResources.await();
      }
    }

    /**
     * Add a new resource for indexing.
     *
     * @param resource the resource.
     */
    public void addResource(Resource resource) {
      lock.lock();
      try {
        resourcesToProcess.add(resource);
        resourcesClean = false;
        indexResources.signal();
      } finally {
        lock.unlock();
      }
    }
  }

  /**
   * Creates a new index.
   *
   * The {@code GraphIndexer} looks for specifications of what properties on
   * what resources to index in the {@code definitionGraph}.
   *
   * The {@code baseGraph} specifies the graph on which the index is built.
   *
   * <p>Notes:
   *
   * <p>
   * This is an expensive operation and it is advisable to call
   * {@link #closeLuceneIndex()} when this instance is no longer needed.
   * </p><p>
   * The GraphIndexer must have write-access to the index directory specified.
   * </p>
   *
   * @param definitionGraph
   *    where index definitions are stored
   * @param baseGraph
   *    where the resources to index are stored
   * @param indexDirectory
   *    The directory where the index is stored.
   * @param createNewIndex
   *    Whether to create a new index or reuse an existing one.
   *    The constructor does not check if there is a valid exiting index.
   *    The user is responsible for setting this value correctly.
   * @param maxHits
   *    How many results the indexer returns. All entries in the index are
   *    searched, but only @code{maxHits} resources are resolved and
   *    returned in the result.
   *
   * @see IndexDefinitionManager
   */
  public GraphIndexer(TripleCollection definitionGraph,
      TripleCollection baseGraph, Directory indexDirectory,
      boolean createNewIndex, int maxHits) {
    this.definitionGraph = definitionGraph;
    this.baseGraph = baseGraph;
    this.maxHits = maxHits;
    analyzer = new StandardAnalyzer(Version.LUCENE_30);

    luceneTools = new LuceneTools(indexDirectory, analyzer);
    processDefinitions();

    this.reindexer = new ReindexThread(100000000L, 500000L);

    typeChangeListener = new GraphListener() {

      @Override
      public void graphChanged(List<GraphEvent> events) {
        for (GraphEvent e : events) {
          Triple triple = e.getTriple();
          logger.debug("processing addition of type " + triple.getObject());
          if (type2IndexedProperties.containsKey((UriRef) triple.getObject())) {
            scheduleForReindex(triple.getSubject());
          }

        }
      }
    };

    baseGraph.addGraphListener(typeChangeListener, new FilterTriple(null, RDF.type, null));

    indexedPropertyChangeListener = new GraphListener() {

      @Override
      public void graphChanged(List<GraphEvent> events) {
        for (GraphEvent e : events) {
          logger.debug("Triple: " + e.getTriple());
          Triple triple = e.getTriple();

          UriRef predicate = triple.getPredicate();
          Set<VirtualProperty> vProperties = property2IncludingVProperty.get(predicate);

          List<Resource> indexedResources = new ArrayList<Resource>();
          logger.debug("Predicate: " + predicate);
          for (VirtualProperty vProperty : vProperties) {
            logger.debug("Subject: " + " " + triple.getSubject());
            followInversePaths(triple.getSubject(),
                vProperty.pathToIndexedResource(predicate), indexedResources);

          }
          for (Resource resource : indexedResources) {
            GraphNode node = new GraphNode(resource, e.getGraph());
            List<UriRef> types = new ArrayList<UriRef>();
            Lock lock = node.readLock();
            lock.lock();
            try {
              Iterator<Resource> resources = node.getObjects(RDF.type);
              while (resources.hasNext()) {
                Resource res = resources.next();
                if (res instanceof UriRef) {
                  types.add((UriRef) res);
                }
              }
            } finally {
              lock.unlock();
            }
            for (UriRef type : types) {
              if (type2IndexedProperties.containsKey(type)) {
                scheduleForReindex(resource);
              }
            }
          }

        }
      }
    };

    baseGraph.addGraphListener(indexedPropertyChangeListener,
        new FilterTriple(null, null, null) {

      @Override
      public boolean match(Triple triple) {
        UriRef predicate = triple.getPredicate();
        //check indirectly involved properties
        Set<VirtualProperty> vProperties = property2IncludingVProperty.get(predicate);
        if (vProperties != null) {
          for (VirtualProperty vProperty : vProperties) {
            if (property2TypeMap.containsKey(vProperty)) {
              return true;
            }
          }
        }
        return false;
      }
    });

    reindexer.start();

    if (createNewIndex) {
      reCreateIndex();
    }
  }
 
  /**
   * Creates a new index with default {@code maxHits}.
   *
   * The {@code GraphIndexer} looks for specifications of what properties on
   * what resources to index in the {@code definitionGraph}.
   *
   * The {@code baseGraph} specifies the graph on which the index is built.
   *
   * <p>Notes:
   *
   * <p>
   * This is an expensive operation and it is advisable to call
   * {@link #closeLuceneIndex()} when this instance is no longer needed.
   * </p><p>
   * The GraphIndexer must have write-access to the index directory specified.
   * </p>
   *
   * @param definitionGraph
   *    where index definitions are stored
   * @param baseGraph
   *    where the resources to index are stored
   * @param indexDirectory
   *    The directory where the index is stored.
   * @param createNewIndex
   *    Whether to create a new index or reuse an existing one.
   *    The constructor does not check if there is a valid exiting index.
   *    The user is responsible for setting this value correctly.
   */
  public GraphIndexer(TripleCollection definitionGraph,
      TripleCollection baseGraph, Directory indexDirectory,
      boolean createNewIndex) {
    this(definitionGraph, baseGraph, indexDirectory, createNewIndex,
        DEFAULT_MAXHITS);
  }

  /**
   * Creates a new in-memory index with default {@code maxHits}.
   *
   * The {@code GraphIndexer} looks for specifications of what properties on
   * what resources to index in the {@code definitionGraph}.
   *
   * The {@code baseGraph} specifies the graph on which the index is built.
   *
   * <p>Notes:
   *
   * <p>
   * This is an expensive operation and it is advisable to call
   * {@link #closeLuceneIndex()} when this instance is no longer needed.
   * </p><p>
   * The GraphIndexer must have write-access to the index directory specified.
   * </p>
   *
   * @param definitionGraph
   *    where index definitions are stored
   * @param baseGraph
   *    where the resources to index are stored
   */
  public GraphIndexer(TripleCollection definitionGraph, TripleCollection baseGraph) {
    this(definitionGraph, baseGraph, new RAMDirectory(), true);
  }

  /**
   * Releases resources held by GraphIndexer. After the call to this method,
   * this GraphIndexer instance must not be used anymore.
   */
  public void closeLuceneIndex() {
    this.baseGraph.removeGraphListener(typeChangeListener);
    this.baseGraph.removeGraphListener(indexedPropertyChangeListener);
    this.reindexer.stopThread();
    this.luceneTools.closeIndexSearcher();
    this.luceneTools.closeIndexWriter();
    this.sortCache.clear();
  }
 
  /**
   * Returns the Analyzer used by this GraphIndexer instance.
   *
   * @return the Analyzer
   */
  public Analyzer getAnalyzer() {
    return analyzer;
  }

  /**
   * Returns the graph that this GraphIndexer builds an index on.
   *
   * @return  The graph containing the indexed resources.
   */
  public TripleCollection getBaseGraph() {
    return baseGraph;
  }
 
  /**
   * Returns the graph where the index definitions are stored.
   *
   * @return The graph with the index definitions.
   */
  public TripleCollection getDefinitionGraph() {
    return definitionGraph;
  }
 
  /**
   * How many results a search on the index returns maximally.
   *
   * @return  the maximum number of results.
   */
  public int getMaxHits() {
    return maxHits;
  }

  /**
   * Set how many results a search on the index returns maximally.
   *
   * @param maxHits  the maximum number of results.
   */
  public void setMaxHits(int maxHits) {
    this.maxHits = maxHits;
  }

  @Override
  public void optimizeIndex() {
    luceneTools.optimizeIndex();
  }
 
  /**
   * Schedule optimizations for repeated executions.
   *
   * @param delay
   *    The delay before the first execution in milliseconds.
   * @param period
   *    Time between successive executions (execution rate) in milliseconds.
   */
  public void scheduleIndexOptimizations(long delay, long period) {
    if(timer != null) {
      timer.cancel();
    }
    timer = new Timer();
    timer.scheduleAtFixedRate(optimizationTask, delay, period);
  }
 
  /**
   * Cancel scheduled optimizations. This call does not have any effect on
   * optimizations that are being executed while the method is called.
   */
  public void terminateIndexOptimizationSchedule() {
    timer.cancel();
    timer = null;
  }

  @Override
  public void reCreateIndex() {
    processDefinitions();

    List<NonLiteral> instances = new ArrayList<NonLiteral>();

    for (UriRef indexedType : type2IndexedProperties.keySet()) {
      //lock necessary?
      Lock lock =  new GraphNode(indexedType, this.baseGraph).readLock();
      lock.lock();
      try {
        Iterator<Triple> iter  = this.baseGraph.filter(null, RDF.type, indexedType);
        while (iter.hasNext()) {
          instances.add(iter.next().getSubject());
        }
      } finally {
        lock.unlock();
      }
    }
    logger.debug("instances " + instances.size());
    IndexWriter writer = luceneTools.getIndexWriter(true);
    for (NonLiteral instance : instances) {
      indexResource(instance, writer);
    }
    luceneTools.commitChanges();
  }

  @Override
  public List<NonLiteral> findResources(List<? extends Condition> conditions,
      SortSpecification sortSpecification,
      FacetCollector... facetCollectors) throws ParseException {
    return findResources(conditions, sortSpecification,
        Arrays.asList(facetCollectors), 0, maxHits + 1);
  }
 
  /**
   * Find resources using conditions and collect facets and specify a sort order.
   *
   * This method allows to specify the indices of the query results to return
   * (e.g. for pagination).
   *
   * @param conditions
   *    a list of conditions to construct a query from.
   * @param facetCollectors
   *    Facet collectors to apply to the query result.
   *    Can be {@link Collections#EMPTY_LIST}, if not used.
   * @param sortSpecification
   *    Specifies the sort order. Can be null, if not used.
   * @param from
   *    return results starting from this index (inclusive).
   * @param to
   *    return results until this index (exclusive).
   * @return 
   *    a list of resources that match the query.
   *
   * @throws ParseException when the resulting query is illegal.
   */
  public List<NonLiteral> findResources(List<? extends Condition> conditions,
      SortSpecification sortSpecification,
      List<FacetCollector> facetCollectors, int from, int to)
      throws ParseException {

    if(from < 0) {
      from = 0;
    }
   
    if(to < from) {
      to = from + 1;
    }
   
    if(facetCollectors == null) {
      facetCollectors = Collections.EMPTY_LIST;
    }
   
    BooleanQuery booleanQuery = new BooleanQuery();
    for (Condition c : conditions) {
      booleanQuery.add(c.query(), BooleanClause.Occur.MUST);
    }
   
    IndexSearcher searcher = luceneTools.getIndexSearcher();
    ScoreDoc[] hits = null;
    try {
      if(sortSpecification != null) {
        SortFieldArrayWrapper fieldKey =
            new SortFieldArrayWrapper(sortSpecification.getSortFields());
        Sort sort = sortCache.get(fieldKey);
        if(sort == null) {
          sort = new Sort(sortSpecification.getSortFields());
          sortCache.put(fieldKey, sort);
        }
        searcher.setDefaultFieldSortScoring(true, true);
        TopFieldDocs topFieldDocs = searcher.search(booleanQuery, null, to, sort);
        hits = topFieldDocs.scoreDocs;
      } else {
        TopScoreDocCollector collector = TopScoreDocCollector.create(to, true);
        searcher.search(booleanQuery, collector);
        hits = collector.topDocs().scoreDocs;
      }
    } catch (IOException ex) {
      throw new RuntimeException(ex);
    }

    List<NonLiteral> result = new ArrayList<NonLiteral>();

    for (int i = from; i < hits.length; ++i) {
      int docId = hits[i].doc;
      Document d;
      try {
        d = searcher.doc(docId);
        collectFacets(facetCollectors, d);
        result.add(getResource(d));
      } catch (IOException ex) {
        logger.error("CRIS Error: ", ex);
      }
    }
   
    for(FacetCollector facetCollector : facetCollectors) {
      facetCollector.postProcess();
    }
   
    return result;
  }
 
  @Override
  public void finalize()
      throws Throwable {
    super.finalize();
    closeLuceneIndex();

  }
 
  /**
   * Schedule an update or creation of an index for a resource.
   *
   * @param resource  the resource to index.
   */
  protected void scheduleForReindex(Resource resource) {
    logger.debug("Scheduling for reindex: " + resource);
    reindexer.addResource(resource);
  }

  /**
   * Read the index definitions and initialize the GraphIndexer with them.
   */
  protected void processDefinitions() {

    Iterator<Triple> indexDefinitionResources =
        this.definitionGraph.filter(null, RDF.type, CRIS.IndexDefinition);

    Map<UriRef, Set<VirtualProperty>> type2IndexedPropertiesTuples =
        new HashMap<UriRef, Set<VirtualProperty>>();

    while (indexDefinitionResources.hasNext()) {
      GraphNode node = new GraphNode(indexDefinitionResources.next().getSubject(),
          this.definitionGraph);
      Iterator<GraphNode> types = node.getObjectNodes(CRIS.indexedType);
      while (types.hasNext()) {
        UriRef tUri = (UriRef) types.next().getNode();
        Iterator<GraphNode> properties = node.getObjectNodes(CRIS.indexedProperty);
        Set<VirtualProperty> props = new HashSet<VirtualProperty>();
        while (properties.hasNext()) {
          VirtualProperty vProp = asVirtualProperty(properties.next(), null);
          if (property2TypeMap.containsKey(vProp)) {
            property2TypeMap.get(vProp).add(tUri);
          } else {
            Set<UriRef> set = new HashSet<UriRef>();
            set.add(tUri);
            property2TypeMap.put(vProp, set);
          }

          for (UriRef baseProperty : vProp.baseProperties) {
            if (property2IncludingVProperty.containsKey(baseProperty)) {
              property2IncludingVProperty.get(baseProperty).add(vProp);
            } else {
              Set<VirtualProperty> set = new HashSet<VirtualProperty>();
              set.add(vProp);
              property2IncludingVProperty.put(baseProperty, set);
            }
          }
          props.add(vProp);

        }
        type2IndexedPropertiesTuples.put(tUri, props);

      }
    }
    type2IndexedProperties = new HashMap(type2IndexedPropertiesTuples);

  }
 
  /**
   * Index a resource.
   *
   * @param resource  the resource to index.
   * @param writer  the index writer.
   */
  protected void indexResource(Resource resource, IndexWriter writer) {
    if (resource instanceof UriRef) {
      try {
        indexNamedResource((UriRef) resource, writer);
      } catch (IOException ex) {
        throw new RuntimeException(ex);
      }
    } else {
      indexAnonymousResource(resource);
    }
  }
 
  private NonLiteral getResource(Document d) {
    return new UriRef(d.get(URI_FIELD_NAME));
  }

  private void followInversePaths(Resource resource, List<UriRef> pathToIndexedResource, List<Resource> list) {
    if (pathToIndexedResource.isEmpty()) {
      list.add(resource);
    } else {
      GraphNode node = new GraphNode(resource, this.baseGraph);
      Lock lock = node.readLock();
      lock.lock();
      try {
        Iterator<NonLiteral> predecessors = node.getSubjects(pathToIndexedResource.get(0));

        List<UriRef> tail = pathToIndexedResource.subList(1, pathToIndexedResource.size());
        while (predecessors.hasNext()) {
          followInversePaths(predecessors.next(), tail, list);
        }
      } finally {
        lock.unlock();
      }
      //throw new RuntimeException("modification of indirect properties not yet supported")
    }
  }

  private VirtualProperty asVirtualProperty(GraphNode r, List<VirtualProperty> vPropertyList) {
    if (r.hasProperty(RDF.type, CRIS.JoinVirtualProperty)) {
      if (vPropertyList == null) {
        vPropertyList = new ArrayList<VirtualProperty>();
      }
      return new JoinVirtualProperty(getVirtualPropertyList(r, vPropertyList));
    } else {
      if (r.hasProperty(RDF.type, CRIS.PathVirtualProperty)) {
        return new PathVirtualProperty(getUriPropertyList(r));
      } else {
        if ((r.getNode()) instanceof UriRef) {
          return new PropertyHolder((UriRef) r.getNode());
        } else {
          throw new RuntimeException(r + " is not of a knows VirtualProperty type and its not a UriRef  (it's a " + (r.getNode()).getClass() + ")");
        }
      }
    }
  }

  private List<VirtualProperty> getVirtualPropertyList(GraphNode r, List<VirtualProperty> vPropertyList) {
    List<Resource> rdfList = getPropertyList(r);
    for (Resource childPropertyResource : rdfList) {
      vPropertyList.add(asVirtualProperty(new GraphNode(childPropertyResource, r.getGraph()), vPropertyList));
    }

    return vPropertyList;
  }

  private List<UriRef> getUriPropertyList(GraphNode r) {
    List<UriRef> uriPropertyList = new ArrayList<UriRef>();

    List<Resource> rdfList = getPropertyList(r);
    for (Resource childPropertyResource : rdfList) {
      uriPropertyList.add((UriRef) childPropertyResource);
    }
    return uriPropertyList;
  }

  private List<Resource> getPropertyList(GraphNode r) {
    Iterator<GraphNode> propertyLists = r.getObjectNodes(CRIS.propertyList);
    if (propertyLists.hasNext()) {
      List<Resource> rdfList = new RdfList(propertyLists.next());
      return rdfList;
    }
    throw new RuntimeException("There is no propertyList on this definition.");
  }
 
  private void collectFacets(List<FacetCollector> facetCollectors, Document d) {
    if(facetCollectors.size() > 0) {
      for(FacetCollector facetCollector : facetCollectors) {
        Map<VirtualProperty, Map<String, Object>> facetMap =
            facetCollector.getFacetMap();
        for(VirtualProperty property : facetMap.keySet()) {
          String[] values = d.getValues(SORT_PREFIX + property.getStringKey());
          if(values != null) {
            for(String value : values) {
              facetCollector.addFacetValue(property, value);
            }
          }
        }
      }
    }
  }

  private void indexNamedResource(UriRef uriRef, IndexWriter writer)
      throws IOException {

    Term term = new Term(URI_FIELD_NAME, uriRef.getUnicodeString());
    writer.deleteDocuments(term);
    //the reindexing might be caused by the removal of a type statement

    GraphNode node = new GraphNode(uriRef, this.baseGraph);
    List<UriRef> types = new ArrayList<UriRef>();
    Lock lock = node.readLock();
    lock.lock();
    try {
      Iterator<Resource> resources = node.getObjects(RDF.type);
      while (resources.hasNext()) {
        Resource res = resources.next();
        if (res instanceof UriRef) {
          types.add((UriRef) res);
        }
      }
    } finally {
      lock.unlock();
    }
    for (UriRef type : types) {
      if (type2IndexedProperties.containsKey(type)) {
        Document doc = resourceToDocument(uriRef, type);
        doc.add(new Field(URI_FIELD_NAME, uriRef.getUnicodeString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);
      }
    }
  }

  private Document resourceToDocument(UriRef resource, UriRef resourceType) {
    Document doc = new Document();
    Set<VirtualProperty> indexedProperties = type2IndexedProperties.get(resourceType);
    logger.info("indexing " + resource + " considering " + indexedProperties.size() + " properties (" + indexedProperties + ")");
    for (VirtualProperty vProperty : indexedProperties) {
      logger.info("indexing " + vProperty + " with values " + (vProperty.value(
          new GraphNode(resource, this.baseGraph))).size());
      for (String propertyValue : vProperty.value(new GraphNode(resource, this.baseGraph))) {
        logger.info("indexing " + vProperty + "(" + vProperty.stringKey + ") with value " + (propertyValue));
        //for sorting
        doc.add(new Field(SORT_PREFIX + vProperty.stringKey,
            propertyValue,
            Field.Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS));
        //for searching (the extra field doesn't cost much time)
        doc.add(new Field(vProperty.stringKey,
            propertyValue,
            Field.Store.NO,
            Field.Index.NOT_ANALYZED));
        doc.add(new Field(vProperty.stringKey,
            propertyValue,
            Field.Store.NO,
            Field.Index.ANALYZED));
      }
    }
    return doc;
  }

  private void indexAnonymousResource(Resource resource) {
    logger.warn("Currently only indexing named resources is supported");
    /*val doc = resourceToDocument(resource)
    doc.add(new Field(URI_FIELD_NAME, getIdentifier(resource), Field.Store.YES, Field.Index.ANALYZED))
    writer.addDocument(doc)*/
  }
TOP

Related Classes of org.apache.clerezza.rdf.cris.GraphIndexer$OptimizationTask

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.