
Source Code of

* JBoss DNA (
* See the COPYRIGHT.txt file distributed with this work for information
* regarding copyright ownership.  Some portions may be licensed
* to Red Hat, Inc. under one or more contributor license agreements.
* See the AUTHORS.txt file in the distribution for a full listing of
* individual contributors.
* JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
* is licensed to you under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
* JBoss DNA is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site:

import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import net.jcip.annotations.NotThreadSafe;
import org.jboss.dna.common.i18n.I18n;
import org.jboss.dna.common.util.CheckArg;
import org.jboss.dna.common.util.Logger;
import org.jboss.dna.common.util.NamedThreadFactory;
import org.jboss.dna.graph.DnaLexicon;
import org.jboss.dna.graph.ExecutionContext;
import org.jboss.dna.graph.GraphI18n;
import org.jboss.dna.graph.JcrLexicon;
import org.jboss.dna.graph.Location;
import org.jboss.dna.graph.connector.RepositoryConnectionFactory;
import org.jboss.dna.graph.connector.RepositorySourceException;
import org.jboss.dna.graph.request.ChangeRequest;
import org.jboss.dna.graph.request.CompositeRequestChannel;
import org.jboss.dna.graph.request.CreateNodeRequest;
import org.jboss.dna.graph.request.DeleteBranchRequest;
import org.jboss.dna.graph.request.DeleteChildrenRequest;
import org.jboss.dna.graph.request.GetWorkspacesRequest;
import org.jboss.dna.graph.request.InvalidWorkspaceException;
import org.jboss.dna.graph.request.ReadAllPropertiesRequest;
import org.jboss.dna.graph.request.ReadBranchRequest;
import org.jboss.dna.graph.request.Request;
import org.jboss.dna.graph.request.UpdatePropertiesRequest;

* A utility class that can be used to update the indexes of a search engine by crawling parts of the source. Each instance is
* created to make a series of atomic updates to the search engine using a single connection to the source.
public class SearchEngineIndexer {
     * The default maximum depth of each subgraph read operation is {@value} .
    protected static final int DEFAULT_MAX_DEPTH_PER_READ = 100;

    private final ExecutionContext context;
    private final RepositoryConnectionFactory connectionFactory;
    private final String sourceName;
    private final SearchEngine searchEngine;
    private final int maxDepthPerRead = DEFAULT_MAX_DEPTH_PER_READ;
    private final ExecutorService service;
    private final CompositeRequestChannel channel;
    private final SearchEngineProcessor processor;
    private boolean closed = false;

     * Create an indexer that will update the indexes in the supplied search engine by crawling content, using the supplied
     * connection factory to obtain connections.
     * <p>
     * As soon as this indexer is created, it establishes a connection to the underlying source and is ready to being retrieving
     * content from the source and using it to update the indexes. Therefore, <i>the instance <strong>must</strong> be
     * {@link #close() closed} when completed.</i>
     * </p>
     * @param context the context in which the indexing operations are to be performed
     * @param searchEngine the search engine that is to be updated
     * @param connectionFactory the factory for creating connections to the repository containing the content
     * @throws IllegalArgumentException if the search engine or connection factory references are null
    public SearchEngineIndexer( ExecutionContext context,
                                SearchEngine searchEngine,
                                RepositoryConnectionFactory connectionFactory ) {
        CheckArg.isNotNull(context, "context");
        CheckArg.isNotNull(searchEngine, "searchEngine");
        CheckArg.isNotNull(connectionFactory, "connectionFactory");
        this.context = context;
        this.searchEngine = searchEngine;
        this.sourceName = searchEngine.getSourceName();
        this.connectionFactory = connectionFactory; = new CompositeRequestChannel(this.sourceName);
        this.service = Executors.newSingleThreadExecutor(new NamedThreadFactory("search-" + sourceName));
        // Start the channel and search engine processor right away (this is why this object must be closed), this.context, this.connectionFactory);
        this.processor = this.searchEngine.createProcessor(this.context, null, false);

     * Get the name of the source containing the content.
     * @return the source name; never null
    public String getSourceName() {
        return sourceName;

     * Index all of the content in the named workspace within the {@link #getSourceName() source}. This method operates
     * synchronously and returns when the requested indexing is completed.
     * @param workspaceName the name of the workspace
     * @return this object for convenience in method chaining; never null
     * @throws IllegalArgumentException if the context or workspace name is null, or if the depth per read is not positive
     * @throws RepositorySourceException if there is a problem accessing the content
     * @throws SearchEngineException if there is a problem updating the indexes
     * @throws InvalidWorkspaceException if the workspace does not exist
    public SearchEngineIndexer index( String workspaceName ) throws RepositorySourceException, SearchEngineException {
        Path rootPath = context.getValueFactories().getPathFactory().createRootPath();
        index(workspaceName, Location.create(rootPath));
        return this;

     * Index (or re-index) all of the content in all of the workspaces within the source. This method operates synchronously and
     * returns when the requested indexing is completed.
     * @return this object for convenience in method chaining; never null
     * @throws RepositorySourceException if there is a problem accessing the content
     * @throws SearchEngineException if there is a problem updating the indexes
     * @throws IllegalArgumentException if the context is null, or if depth per read is not positive
    public SearchEngineIndexer indexAllWorkspaces() throws RepositorySourceException, SearchEngineException {
        // Get the names of all the workspaces ...
        GetWorkspacesRequest getWorkspaces = new GetWorkspacesRequest();
        try {
        } catch (InterruptedException e) {
            // Clear the interrupted status of the thread and continue ...
            return this;
        // Index all of the workspaces ...
        Path rootPath = context.getValueFactories().getPathFactory().createRootPath();
        Location rootLocation = Location.create(rootPath);
        for (String workspaceName : getWorkspaces.getAvailableWorkspaceNames()) {
            index(workspaceName, rootLocation);
        return this;

     * Crawl and index the full subgraph content starting at the supplied path in the named workspace.
     * @param workspaceName the name of the workspace
     * @param path the path of the content to be indexed
     * @return this object for convenience in method chaining; never null
     * @throws IllegalArgumentException if the workspace name or location are null, or if the depth is less than 1
     * @throws IllegalStateException if this object has already been {@link #close() closed}
     * @throws InvalidWorkspaceException if there is no workspace with the supplied name
    public SearchEngineIndexer index( String workspaceName,
                                      Path path ) {
        CheckArg.isNotNull(workspaceName, "workspaceName");
        CheckArg.isNotNull(path, "path");
        indexSubgraph(workspaceName, Location.create(path), Integer.MAX_VALUE);
        return this;

     * Crawl and index the content starting at the supplied path in the named workspace, to the designated depth.
     * @param workspaceName the name of the workspace
     * @param path the path of the content to be indexed
     * @param depth the depth of the content to be indexed
     * @return this object for convenience in method chaining; never null
     * @throws IllegalArgumentException if the workspace name or location are null, or if the depth is less than 1
     * @throws IllegalStateException if this object has already been {@link #close() closed}
     * @throws InvalidWorkspaceException if there is no workspace with the supplied name
    public SearchEngineIndexer index( String workspaceName,
                                      Path path,
                                      int depth ) {
        CheckArg.isNotNull(workspaceName, "workspaceName");
        CheckArg.isNotNull(path, "path");
        CheckArg.isPositive(depth, "depth");
        if (depth == 1) {
            indexProperties(workspaceName, Location.create(path));
        } else {
            indexSubgraph(workspaceName, Location.create(path), depth);
        return this;

     * Crawl and index the full subgraph content starting at the supplied location in the named workspace.
     * @param workspaceName the name of the workspace
     * @param location the location of the content to be indexed
     * @return this object for convenience in method chaining; never null
     * @throws IllegalArgumentException if the workspace name or location are null, or if the depth is less than 1
     * @throws InvalidWorkspaceException if there is no workspace with the supplied name
    public SearchEngineIndexer index( String workspaceName,
                                      Location location ) {
        CheckArg.isNotNull(workspaceName, "workspaceName");
        CheckArg.isNotNull(location, "location");
        indexSubgraph(workspaceName, location, Integer.MAX_VALUE);
        return this;

     * Crawl and index the content starting at the supplied location in the named workspace, to the designated depth.
     * @param workspaceName the name of the workspace
     * @param location the location of the content to be indexed
     * @param depth the depth of the content to be indexed
     * @return this object for convenience in method chaining; never null
     * @throws IllegalArgumentException if the workspace name or location are null, or if the depth is less than 1
     * @throws IllegalStateException if this object has already been {@link #close() closed}
     * @throws InvalidWorkspaceException if there is no workspace with the supplied name
    public SearchEngineIndexer index( String workspaceName,
                                      Location location,
                                      int depth ) {
        CheckArg.isNotNull(workspaceName, "workspaceName");
        CheckArg.isNotNull(location, "location");
        CheckArg.isPositive(depth, "depth");
        if (depth == 1) {
            indexProperties(workspaceName, location);
        } else {
            indexSubgraph(workspaceName, location, depth);
        return this;

    protected void indexSubgraph( String workspaceName,
                                  Location startingLocation,
                                  int depth ) {
        int depthPerRead = Math.min(maxDepthPerRead, depth);
        // Read the first subgraph ...
        ReadBranchRequest readSubgraph = new ReadBranchRequest(startingLocation, workspaceName, depthPerRead);
        try {
        } catch (InterruptedException e) {
            // Clear the interrupted status of the thread and continue ...
        } catch (InvalidPathException e) {
            // The node must no longer exist, so delete it from the indexes ...
            process(new DeleteBranchRequest(startingLocation, workspaceName));
        Iterator<Location> locationIter = readSubgraph.iterator();
        assert locationIter.hasNext();

        // Destroy the nodes at the supplied location ...
        if (startingLocation.getPath().isRoot()) {
            // Just delete the whole content ...
            process(new DeleteBranchRequest(startingLocation, workspaceName));
        } else {
            // We can't delete the node, since later same-name-siblings might be changed. So delete the children ...
            process(new DeleteChildrenRequest(startingLocation, workspaceName));

        // Now update all of the properties, removing any that are no longer needed ...
        Location topNode =;
        assert topNode.equals(startingLocation);
        Map<Name, Property> properties = readSubgraph.getPropertiesFor(topNode);
        if (properties == null) return;
        if (startingLocation.getPath().isRoot()) {
            // The properties of the root node generally don't include the primary type, but we need to add it here ...
            Property rootPrimaryType = context.getPropertyFactory().create(JcrLexicon.PRIMARY_TYPE, DnaLexicon.ROOT);
            properties.put(JcrLexicon.PRIMARY_TYPE, rootPrimaryType);
        UpdatePropertiesRequest request = new UpdatePropertiesRequest(topNode, workspaceName, properties, true);

        // Create a queue that we'll use to walk the content ...
        LinkedList<Location> locationsToRead = new LinkedList<Location>();

        // Now walk the remaining nodes in the subgraph ...
        while (true) {
            while (locationIter.hasNext()) {

                // Index the node ...
                Location location =;
                Path path = location.getPath();
                Location parent = readSubgraph.getLocationFor(path.getParent());
                Name childName = path.getLastSegment().getName();
                Collection<Property> nodePoperties = readSubgraph.getPropertiesFor(location).values();
                CreateNodeRequest create = new CreateNodeRequest(parent, workspaceName, childName, nodePoperties);
                create.setActualLocationOfNode(location); // set this so we don't have to figure it out
                if (create.isCancelled() || create.hasError()) return;

                // Process the children ...
                for (Location child : readSubgraph.getChildren(location)) {
                    if (!readSubgraph.includes(child)) {
                        // Record this location as needing to be read ...

            if (locationsToRead.isEmpty()) break;
            Location location = locationsToRead.poll();
            assert location != null;

            // Recompute the depth per read ...
            depthPerRead = depth - location.getPath().size();
            if (depthPerRead < 1) continue;
            readSubgraph = new ReadBranchRequest(location, workspaceName, depthPerRead);
            try {
            } catch (InterruptedException e) {
                // Clear the interrupted status of the thread and continue ...

    protected void indexProperties( String workspaceName,
                                    Location location ) {
        ReadAllPropertiesRequest readProps = new ReadAllPropertiesRequest(location, workspaceName);
        try {
        } catch (InterruptedException e) {
            // Clear the interrupted status of the thread and continue ...

        // Now update the properties in the search engine ...
        location = readProps.getActualLocationOfNode();
        Map<Name, Property> properties = readProps.getPropertiesByName();
        UpdatePropertiesRequest request = new UpdatePropertiesRequest(location, workspaceName, properties, true);

     * Send the supplied change request directly to the search engine's processor.
     * @param searchEngineRequest
    public final void process( ChangeRequest searchEngineRequest ) {

    protected final void checkRequestForErrors( Request request ) throws RepositorySourceException, RuntimeException {
        if (request.hasError()) {
            Throwable t = request.getError();
            if (t instanceof RuntimeException) throw (RuntimeException)t;
            throw new RepositorySourceException(sourceName, t);

    protected final void checkNotClosed() throws IllegalStateException {
        if (closed) {
            throw new IllegalStateException(GraphI18n.searchEngineIndexerForSourceHasAlreadyBeenClosed.text(sourceName));

     * Return whether this indexer has already been {@link #close() closed}.
     * @return true if this has been closed, or false if it is still usable
     * @see #close()
    public boolean isClosed() {
        return closed;

     * Close this indexer and release all resources. This method has no effect if it is called when this indexer is alread closed.
     * @see #isClosed()
    public void close() {
        if (closed) return;
        closed = true;
        // Close the channel ...
        try {
        } finally {
            // And shut down the executor service ...
            try {
                service.awaitTermination(5, TimeUnit.SECONDS);
            } catch (InterruptedException e) {
                // Log this ...
                I18n msg = GraphI18n.errorShuttingDownExecutorServiceInSearchEngineIndexer;
                Logger.getLogger(getClass()).error(msg, sourceName);
                // Clear the interrupted status of the thread ...
            } finally {
                // Close the search engine processor ...

Related Classes of

Copyright © 2018 All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact