Source Code of org.jboss.dna.graph.search.SearchEngineIndexer

/*
 * JBoss DNA (http://www.jboss.org/dna)
 * See the COPYRIGHT.txt file distributed with this work for information
 * regarding copyright ownership.  Some portions may be licensed
 * to Red Hat, Inc. under one or more contributor license agreements.
 * See the AUTHORS.txt file in the distribution for a full listing of 
 * individual contributors.
 *
 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
 * is licensed to you under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 * 
 * JBoss DNA is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */
package org.jboss.dna.graph.search;


import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import net.jcip.annotations.NotThreadSafe;
import org.jboss.dna.common.i18n.I18n;
import org.jboss.dna.common.util.CheckArg;
import org.jboss.dna.common.util.Logger;
import org.jboss.dna.common.util.NamedThreadFactory;
import org.jboss.dna.graph.DnaLexicon;
import org.jboss.dna.graph.ExecutionContext;
import org.jboss.dna.graph.GraphI18n;
import org.jboss.dna.graph.JcrLexicon;
import org.jboss.dna.graph.Location;
import org.jboss.dna.graph.connector.RepositoryConnectionFactory;
import org.jboss.dna.graph.connector.RepositorySourceException;
import org.jboss.dna.graph.property.InvalidPathException;
import org.jboss.dna.graph.property.Name;
import org.jboss.dna.graph.property.Path;
import org.jboss.dna.graph.property.Property;
import org.jboss.dna.graph.request.ChangeRequest;
import org.jboss.dna.graph.request.CompositeRequestChannel;
import org.jboss.dna.graph.request.CreateNodeRequest;
import org.jboss.dna.graph.request.DeleteBranchRequest;
import org.jboss.dna.graph.request.DeleteChildrenRequest;
import org.jboss.dna.graph.request.GetWorkspacesRequest;
import org.jboss.dna.graph.request.InvalidWorkspaceException;
import org.jboss.dna.graph.request.ReadAllPropertiesRequest;
import org.jboss.dna.graph.request.ReadBranchRequest;
import org.jboss.dna.graph.request.Request;
import org.jboss.dna.graph.request.UpdatePropertiesRequest;


/**
 * A utility class that can be used to update the indexes of a search engine by crawling parts of the source. Each instance is
 * created to make a series of atomic updates to the search engine using a single connection to the source.
 */
@NotThreadSafe
public class SearchEngineIndexer {
    /**
     * The default maximum depth of each subgraph read operation is {@value} .
     */
    protected static final int DEFAULT_MAX_DEPTH_PER_READ = 100;


    private final ExecutionContext context;
    private final RepositoryConnectionFactory connectionFactory;
    private final String sourceName;
    private final SearchEngine searchEngine;
    private final int maxDepthPerRead = DEFAULT_MAX_DEPTH_PER_READ;
    private final ExecutorService service;
    private final CompositeRequestChannel channel;
    private final SearchEngineProcessor processor;
    private boolean closed = false;


    /**
     * Create an indexer that will update the indexes in the supplied search engine by crawling content, using the supplied
     * connection factory to obtain connections.
     * <p>
     * As soon as this indexer is created, it establishes a connection to the underlying source and is ready to being retrieving
     * content from the source and using it to update the indexes. Therefore, <i>the instance <strong>must</strong> be
     * {@link #close() closed} when completed.</i>
     * </p>
     * 
     * @param context the context in which the indexing operations are to be performed
     * @param searchEngine the search engine that is to be updated
     * @param connectionFactory the factory for creating connections to the repository containing the content
     * @throws IllegalArgumentException if the search engine or connection factory references are null
     */
    public SearchEngineIndexer( ExecutionContext context,
                                SearchEngine searchEngine,
                                RepositoryConnectionFactory connectionFactory ) {
        CheckArg.isNotNull(context, "context");
        CheckArg.isNotNull(searchEngine, "searchEngine");
        CheckArg.isNotNull(connectionFactory, "connectionFactory");
        this.context = context;
        this.searchEngine = searchEngine;
        this.sourceName = searchEngine.getSourceName();
        this.connectionFactory = connectionFactory;
        this.channel = new CompositeRequestChannel(this.sourceName);
        this.service = Executors.newSingleThreadExecutor(new NamedThreadFactory("search-" + sourceName));
        // Start the channel and search engine processor right away (this is why this object must be closed)
        this.channel.start(service, this.context, this.connectionFactory);
        this.processor = this.searchEngine.createProcessor(this.context, null, false);
    }


    /**
     * Get the name of the source containing the content.
     * 
     * @return the source name; never null
     */
    public String getSourceName() {
        return sourceName;
    }


    /**
     * Index all of the content in the named workspace within the {@link #getSourceName() source}. This method operates
     * synchronously and returns when the requested indexing is completed.
     * 
     * @param workspaceName the name of the workspace
     * @return this object for convenience in method chaining; never null
     * @throws IllegalArgumentException if the context or workspace name is null, or if the depth per read is not positive
     * @throws RepositorySourceException if there is a problem accessing the content
     * @throws SearchEngineException if there is a problem updating the indexes
     * @throws InvalidWorkspaceException if the workspace does not exist
     */
    public SearchEngineIndexer index( String workspaceName ) throws RepositorySourceException, SearchEngineException {
        Path rootPath = context.getValueFactories().getPathFactory().createRootPath();
        index(workspaceName, Location.create(rootPath));
        return this;
    }


    /**
     * Index (or re-index) all of the content in all of the workspaces within the source. This method operates synchronously and
     * returns when the requested indexing is completed.
     * 
     * @return this object for convenience in method chaining; never null
     * @throws RepositorySourceException if there is a problem accessing the content
     * @throws SearchEngineException if there is a problem updating the indexes
     * @throws IllegalArgumentException if the context is null, or if depth per read is not positive
     */
    public SearchEngineIndexer indexAllWorkspaces() throws RepositorySourceException, SearchEngineException {
        // Get the names of all the workspaces ...
        GetWorkspacesRequest getWorkspaces = new GetWorkspacesRequest();
        try {
            channel.addAndAwait(getWorkspaces);
            checkRequestForErrors(getWorkspaces);
        } catch (InterruptedException e) {
            // Clear the interrupted status of the thread and continue ...
            Thread.interrupted();
            return this;
        }
        // Index all of the workspaces ...
        Path rootPath = context.getValueFactories().getPathFactory().createRootPath();
        Location rootLocation = Location.create(rootPath);
        for (String workspaceName : getWorkspaces.getAvailableWorkspaceNames()) {
            index(workspaceName, rootLocation);
        }
        return this;
    }


    /**
     * Crawl and index the full subgraph content starting at the supplied path in the named workspace.
     * 
     * @param workspaceName the name of the workspace
     * @param path the path of the content to be indexed
     * @return this object for convenience in method chaining; never null
     * @throws IllegalArgumentException if the workspace name or location are null, or if the depth is less than 1
     * @throws IllegalStateException if this object has already been {@link #close() closed}
     * @throws InvalidWorkspaceException if there is no workspace with the supplied name
     */
    public SearchEngineIndexer index( String workspaceName,
                                      Path path ) {
        checkNotClosed();
        CheckArg.isNotNull(workspaceName, "workspaceName");
        CheckArg.isNotNull(path, "path");
        indexSubgraph(workspaceName, Location.create(path), Integer.MAX_VALUE);
        return this;
    }


    /**
     * Crawl and index the content starting at the supplied path in the named workspace, to the designated depth.
     * 
     * @param workspaceName the name of the workspace
     * @param path the path of the content to be indexed
     * @param depth the depth of the content to be indexed
     * @return this object for convenience in method chaining; never null
     * @throws IllegalArgumentException if the workspace name or location are null, or if the depth is less than 1
     * @throws IllegalStateException if this object has already been {@link #close() closed}
     * @throws InvalidWorkspaceException if there is no workspace with the supplied name
     */
    public SearchEngineIndexer index( String workspaceName,
                                      Path path,
                                      int depth ) {
        checkNotClosed();
        CheckArg.isNotNull(workspaceName, "workspaceName");
        CheckArg.isNotNull(path, "path");
        CheckArg.isPositive(depth, "depth");
        if (depth == 1) {
            indexProperties(workspaceName, Location.create(path));
        } else {
            indexSubgraph(workspaceName, Location.create(path), depth);
        }
        return this;
    }


    /**
     * Crawl and index the full subgraph content starting at the supplied location in the named workspace.
     * 
     * @param workspaceName the name of the workspace
     * @param location the location of the content to be indexed
     * @return this object for convenience in method chaining; never null
     * @throws IllegalArgumentException if the workspace name or location are null, or if the depth is less than 1
     * @throws InvalidWorkspaceException if there is no workspace with the supplied name
     */
    public SearchEngineIndexer index( String workspaceName,
                                      Location location ) {
        checkNotClosed();
        CheckArg.isNotNull(workspaceName, "workspaceName");
        CheckArg.isNotNull(location, "location");
        indexSubgraph(workspaceName, location, Integer.MAX_VALUE);
        return this;
    }


    /**
     * Crawl and index the content starting at the supplied location in the named workspace, to the designated depth.
     * 
     * @param workspaceName the name of the workspace
     * @param location the location of the content to be indexed
     * @param depth the depth of the content to be indexed
     * @return this object for convenience in method chaining; never null
     * @throws IllegalArgumentException if the workspace name or location are null, or if the depth is less than 1
     * @throws IllegalStateException if this object has already been {@link #close() closed}
     * @throws InvalidWorkspaceException if there is no workspace with the supplied name
     */
    public SearchEngineIndexer index( String workspaceName,
                                      Location location,
                                      int depth ) {
        checkNotClosed();
        CheckArg.isNotNull(workspaceName, "workspaceName");
        CheckArg.isNotNull(location, "location");
        CheckArg.isPositive(depth, "depth");
        if (depth == 1) {
            indexProperties(workspaceName, location);
        } else {
            indexSubgraph(workspaceName, location, depth);
        }
        return this;
    }


    protected void indexSubgraph( String workspaceName,
                                  Location startingLocation,
                                  int depth ) {
        int depthPerRead = Math.min(maxDepthPerRead, depth);
        // Read the first subgraph ...
        ReadBranchRequest readSubgraph = new ReadBranchRequest(startingLocation, workspaceName, depthPerRead);
        try {
            channel.addAndAwait(readSubgraph);
            checkRequestForErrors(readSubgraph);
        } catch (InterruptedException e) {
            // Clear the interrupted status of the thread and continue ...
            Thread.interrupted();
            return;
        } catch (InvalidPathException e) {
            // The node must no longer exist, so delete it from the indexes ...
            process(new DeleteBranchRequest(startingLocation, workspaceName));
            return;
        }
        Iterator<Location> locationIter = readSubgraph.iterator();
        assert locationIter.hasNext();


        // Destroy the nodes at the supplied location ...
        if (startingLocation.getPath().isRoot()) {
            // Just delete the whole content ...
            process(new DeleteBranchRequest(startingLocation, workspaceName));
        } else {
            // We can't delete the node, since later same-name-siblings might be changed. So delete the children ...
            process(new DeleteChildrenRequest(startingLocation, workspaceName));
        }


        // Now update all of the properties, removing any that are no longer needed ...
        Location topNode = locationIter.next();
        assert topNode.equals(startingLocation);
        Map<Name, Property> properties = readSubgraph.getPropertiesFor(topNode);
        if (properties == null) return;
        if (startingLocation.getPath().isRoot()) {
            // The properties of the root node generally don't include the primary type, but we need to add it here ...
            Property rootPrimaryType = context.getPropertyFactory().create(JcrLexicon.PRIMARY_TYPE, DnaLexicon.ROOT);
            properties.put(JcrLexicon.PRIMARY_TYPE, rootPrimaryType);
        }
        UpdatePropertiesRequest request = new UpdatePropertiesRequest(topNode, workspaceName, properties, true);
        request.setActualLocationOfNode(topNode);
        process(request);
        checkRequestForErrors(request);


        // Create a queue that we'll use to walk the content ...
        LinkedList<Location> locationsToRead = new LinkedList<Location>();


        // Now walk the remaining nodes in the subgraph ...
        while (true) {
            while (locationIter.hasNext()) {


                // Index the node ...
                Location location = locationIter.next();
                Path path = location.getPath();
                Location parent = readSubgraph.getLocationFor(path.getParent());
                Name childName = path.getLastSegment().getName();
                Collection<Property> nodePoperties = readSubgraph.getPropertiesFor(location).values();
                CreateNodeRequest create = new CreateNodeRequest(parent, workspaceName, childName, nodePoperties);
                create.setActualLocationOfNode(location); // set this so we don't have to figure it out
                process(create);
                if (create.isCancelled() || create.hasError()) return;


                // Process the children ...
                for (Location child : readSubgraph.getChildren(location)) {
                    if (!readSubgraph.includes(child)) {
                        // Record this location as needing to be read ...
                        locationsToRead.add(child);
                    }
                }
            }


            if (locationsToRead.isEmpty()) break;
            Location location = locationsToRead.poll();
            assert location != null;


            // Recompute the depth per read ...
            depthPerRead = depth - location.getPath().size();
            if (depthPerRead < 1) continue;
            readSubgraph = new ReadBranchRequest(location, workspaceName, depthPerRead);
            try {
                channel.addAndAwait(readSubgraph);
            } catch (InterruptedException e) {
                // Clear the interrupted status of the thread and continue ...
                Thread.interrupted();
                return;
            }
            checkRequestForErrors(readSubgraph);
        }
    }


    protected void indexProperties( String workspaceName,
                                    Location location ) {
        ReadAllPropertiesRequest readProps = new ReadAllPropertiesRequest(location, workspaceName);
        try {
            channel.addAndAwait(readProps);
        } catch (InterruptedException e) {
            // Clear the interrupted status of the thread and continue ...
            Thread.interrupted();
        }
        checkRequestForErrors(readProps);


        // Now update the properties in the search engine ...
        location = readProps.getActualLocationOfNode();
        Map<Name, Property> properties = readProps.getPropertiesByName();
        UpdatePropertiesRequest request = new UpdatePropertiesRequest(location, workspaceName, properties, true);
        request.setActualLocationOfNode(location);
        process(request);
        checkRequestForErrors(readProps);
    }


    /**
     * Send the supplied change request directly to the search engine's processor.
     * 
     * @param searchEngineRequest
     */
    public final void process( ChangeRequest searchEngineRequest ) {
        processor.process(searchEngineRequest);
    }


    protected final void checkRequestForErrors( Request request ) throws RepositorySourceException, RuntimeException {
        if (request.hasError()) {
            Throwable t = request.getError();
            if (t instanceof RuntimeException) throw (RuntimeException)t;
            throw new RepositorySourceException(sourceName, t);
        }
    }


    protected final void checkNotClosed() throws IllegalStateException {
        if (closed) {
            throw new IllegalStateException(GraphI18n.searchEngineIndexerForSourceHasAlreadyBeenClosed.text(sourceName));
        }
    }


    /**
     * Return whether this indexer has already been {@link #close() closed}.
     * 
     * @return true if this has been closed, or false if it is still usable
     * @see #close()
     */
    public boolean isClosed() {
        return closed;
    }


    /**
     * Close this indexer and release all resources. This method has no effect if it is called when this indexer is alread closed.
     * 
     * @see #isClosed()
     */
    public void close() {
        if (closed) return;
        closed = true;
        // Close the channel ...
        try {
            channel.close();
        } finally {
            // And shut down the executor service ...
            service.shutdown();
            try {
                service.awaitTermination(5, TimeUnit.SECONDS);
            } catch (InterruptedException e) {
                // Log this ...
                I18n msg = GraphI18n.errorShuttingDownExecutorServiceInSearchEngineIndexer;
                Logger.getLogger(getClass()).error(msg, sourceName);
                // Clear the interrupted status of the thread ...
                Thread.interrupted();
            } finally {
                // Close the search engine processor ...
                processor.close();
            }
        }
    }
}
Source Code of org.jboss.dna.graph.search.SearchEngineIndexer

Related Classes of org.jboss.dna.graph.search.SearchEngineIndexer