Package org.apache.jackrabbit.core.query.lucene

Source Code of org.apache.jackrabbit.core.query.lucene.ConsistencyCheck$MultipleEntries

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.core.query.lucene;

import org.apache.commons.io.IOExceptionWithCause;
import org.apache.jackrabbit.core.HierarchyManager;
import org.apache.jackrabbit.core.RepositoryImpl;
import org.apache.jackrabbit.core.cluster.ClusterException;
import org.apache.jackrabbit.core.cluster.ClusterNode;
import org.apache.jackrabbit.core.persistence.IterablePersistenceManager;
import org.apache.jackrabbit.core.persistence.PersistenceManager;
import org.apache.jackrabbit.core.state.ItemState;
import org.apache.jackrabbit.core.state.ItemStateManager;
import org.apache.jackrabbit.core.state.NoSuchItemStateException;
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.core.state.ItemStateException;
import org.apache.jackrabbit.core.state.ChildNodeEntry;
import org.apache.jackrabbit.core.id.NodeId;
import org.apache.jackrabbit.spi.Path;
import org.apache.jackrabbit.spi.commons.conversion.MalformedPathException;
import org.apache.jackrabbit.spi.commons.name.NameConstants;
import org.apache.jackrabbit.spi.commons.name.PathBuilder;
import org.apache.lucene.document.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.jcr.ItemNotFoundException;
import javax.jcr.RepositoryException;

import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.Set;
import java.util.HashSet;

/**
* Implements a consistency check on the search index. Currently the following
* checks are implemented:
* <ul>
* <li>Does the node exist in the ItemStateManager? If it does not exist
* anymore the node is deleted from the index.</li>
* <li>Is the parent of a node also present in the index? If it is not present it
* will be indexed.</li>
* <li>Is a node indexed multiple times? If that is the case, all occurrences
* in the index for such a node are removed, and the node is re-indexed.</li>
* <li>Is a node missing from the index? If so, it is added.</li>
* </ul>
*/
public class ConsistencyCheck {

    /**
     * Logger instance for this class
     */
    private static final Logger log = LoggerFactory.getLogger(ConsistencyCheck.class);

    /**
     * The number of nodes to fetch at once from the persistence manager. Defaults to 8kb
     */
    private static final int NODESATONCE = Integer.getInteger("org.apache.jackrabbit.checker.nodesatonce", 1024 * 8);

    private final SearchIndex handler;

    /**
     * The ItemStateManager of the workspace.
     */
    private final ItemStateManager stateMgr;

    /**
     * The PersistenceManager of the workspace.
     */
    private IterablePersistenceManager pm;

    /**
     * The index to check.
     */
    private final MultiIndex index;

    /**
     * All the node ids and whether they were found in the index.
     */
    private Map<NodeId, Boolean> nodeIds;

    /**
     * Paths of nodes that are not be indexed
     */
    private Set<Path> excludedPaths;

    /**
     * Paths of nodes that will be excluded from consistency check
     */
    private final Set<Path> ignoredPaths = new HashSet<Path>();

    /**
     * List of all errors.
     */
    private final List<ConsistencyCheckError> errors =
        new ArrayList<ConsistencyCheckError>();

    /**
     * Private constructor.
     */
    private ConsistencyCheck(MultiIndex index, SearchIndex handler, Set<NodeId> excludedIds) {
        this.index = index;
        this.handler = handler;
        final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager();
        excludedPaths = new HashSet<Path>(excludedIds.size());
        for (NodeId excludedId : excludedIds) {
            try {
                final Path path = hierarchyManager.getPath(excludedId);
                excludedPaths.add(path);
            } catch (ItemNotFoundException e) {
                log.warn("Excluded node does not exist");
            } catch (RepositoryException e) {
                log.error("Failed to get excluded path", e);
            }
        }

        //JCR-3773: ignore the tree jcr:nodeTypes
        PathBuilder pathBuilder = new PathBuilder();
        pathBuilder.addRoot();
        pathBuilder.addLast(NameConstants.JCR_NODETYPES);
        try {
            Path path = pathBuilder.getPath();
            log.info("consistency check will skip " + path);
            ignoredPaths.add(path);
        } catch (MalformedPathException e) {
            //will never happen
            log.error("Malformed path", e);
        }

        this.stateMgr = handler.getContext().getItemStateManager();
        final PersistenceManager pm = handler.getContext().getPersistenceManager();
        if (pm instanceof IterablePersistenceManager) {
            this.pm = (IterablePersistenceManager) pm;
        }
    }

    /**
     * Runs the consistency check on <code>index</code>.
     *
     *
     *
     * @param index the index to check.
     * @param handler the QueryHandler to use.
     * @param excludedIds the set of node ids that are not indexed
     * @return the consistency check with the results.
     * @throws IOException if an error occurs while checking.
     */
    static ConsistencyCheck run(MultiIndex index, SearchIndex handler, final Set<NodeId> excludedIds)
            throws IOException {
        ConsistencyCheck check = new ConsistencyCheck(index, handler, excludedIds);
        check.run();
        return check;
    }

    /**
     * Repairs detected errors during the consistency check.
     * @param ignoreFailure if <code>true</code> repair failures are ignored,
     *   the repair continues without throwing an exception. If
     *   <code>false</code> the repair procedure is aborted on the first
     *   repair failure.
     * @throws IOException if a repair failure occurs.
     */
    public void repair(boolean ignoreFailure) throws IOException {
        if (errors.size() == 0) {
            log.info("No errors found.");
            return;
        }
        int notRepairable = 0;
        for (ConsistencyCheckError error : errors) {
            try {
                if (error.repairable()) {
                    error.repair();
                } else {
                    log.warn("Not repairable: " + error);
                    notRepairable++;
                }
            } catch (Exception e) {
                if (ignoreFailure) {
                    log.warn("Exception while repairing: " + error, e);
                } else if (e instanceof IOException) {
                    throw (IOException) e;
                } else {
                    throw new IOExceptionWithCause(e);
                }
            }
        }
        log.info("Repaired " + (errors.size() - notRepairable) + " errors.");
        if (notRepairable > 0) {
            log.warn("" + notRepairable + " error(s) not repairable.");
        }
    }

    /**
     * Returns the errors detected by the consistency check.
     * @return the errors detected by the consistency check.
     */
    public List<ConsistencyCheckError> getErrors() {
        return new ArrayList<ConsistencyCheckError>(errors);
    }

    /**
     * Runs the consistency check.
     * @throws IOException if an error occurs while running the check.
     */
    private void run() throws IOException {
        log.info("Checking index of workspace " + handler.getContext().getWorkspace());
        loadNodes();
        if (nodeIds != null) {
            checkIndexConsistency();
            checkIndexCompleteness();
        }
    }

    public void doubleCheckErrors() {
        if (!errors.isEmpty()) {
            log.info("Double checking errors");
            final ClusterNode clusterNode = handler.getContext().getClusterNode();
            if (clusterNode != null) {
                try {
                    clusterNode.sync();
                } catch (ClusterException e) {
                    log.error("Could not sync cluster node for double checking errors");
                }
            }
            final Iterator<ConsistencyCheckError> iterator = errors.iterator();
            while (iterator.hasNext()) {
                try {
                    final ConsistencyCheckError error = iterator.next();
                    if (!error.doubleCheck(handler, stateMgr)) {
                        log.info("False positive: " + error.toString());
                        iterator.remove();
                    }
                } catch (RepositoryException e) {
                    log.error("Failed to double check consistency error", e);
                } catch (IOException e) {
                    log.error("Failed to double check consistency error", e);
                }
            }
        }
    }

    private void loadNodes() {
        log.info("Loading nodes");
        try {
            int count = 0;
            Map<NodeId, Boolean> nodeIds = new HashMap<NodeId, Boolean>();
            List<NodeId> batch = pm.getAllNodeIds(null, NODESATONCE);
            NodeId lastId = null;
            while (!batch.isEmpty()) {
                for (NodeId nodeId : batch) {
                    lastId = nodeId;

                    count++;
                    if (count % 1000 == 0) {
                        log.info(pm + ": loaded " + count + " node ids...");
                    }

                    nodeIds.put(nodeId, Boolean.FALSE);

                }
                batch = pm.getAllNodeIds(lastId, NODESATONCE);
            }
            if (pm.exists(lastId)) {
                this.nodeIds = nodeIds;
            } else {
                log.info("Failed to read all nodes, starting over");
                loadNodes();
            }
        } catch (ItemStateException e) {
            log.error("Exception while loading items to check", e);
        } catch (RepositoryException e) {
            log.error("Exception while loading items to check", e);
        }
    }

    private void checkIndexConsistency() throws IOException {
        log.info("Checking index consistency");
        // Ids of multiple nodes in the index
        Set<NodeId> multipleEntries = new HashSet<NodeId>();
        CachingMultiIndexReader reader = index.getIndexReader();
        try {
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (i > 10 && i % (reader.maxDoc() / 5) == 0) {
                    long progress = Math.round((100.0 * (float) i) / ((float) reader.maxDoc() * 2f));
                    log.info("progress: " + progress + "%");
                }
                if (reader.isDeleted(i)) {
                    continue;
                }
                Document d = reader.document(i, FieldSelectors.UUID);
                NodeId id = new NodeId(d.get(FieldNames.UUID));
                if (!isIgnored(id)) {
                    boolean nodeExists = nodeIds.containsKey(id);
                    if (nodeExists) {
                        Boolean alreadyIndexed = nodeIds.put(id, Boolean.TRUE);
                        if (alreadyIndexed) {
                            multipleEntries.add(id);
                        }
                    } else {
                        errors.add(new NodeDeleted(id));
                    }
                }
            }
        } finally {
            reader.release();
        }

        // create multiple entries errors
        for (NodeId id : multipleEntries) {
            errors.add(new MultipleEntries(id));
        }

        reader = index.getIndexReader();
        try {
            // run through documents again and check parent
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (i > 10 && i % (reader.maxDoc() / 5) == 0) {
                    long progress = Math.round((100.0 * (float) i) / ((float) reader.maxDoc() * 2f));
                    log.info("progress: " + (progress + 50) + "%");
                }
                if (reader.isDeleted(i)) {
                    continue;
                }
                Document d = reader.document(i, FieldSelectors.UUID_AND_PARENT);
                NodeId id = new NodeId(d.get(FieldNames.UUID));
                if (!nodeIds.containsKey(id) || isIgnored(id)) {
                    // this node is ignored or was already marked for deletion
                    continue;
                }
                String parent = d.get(FieldNames.PARENT);
                if (parent == null || parent.isEmpty()) {
                    continue;
                }
                final NodeId parentId = new NodeId(parent);

                boolean parentExists = nodeIds.containsKey(parentId);
                boolean parentIndexed = parentExists && nodeIds.get(parentId);
                if (parentIndexed) {
                    continue;
                } else if (id.equals(RepositoryImpl.SYSTEM_ROOT_NODE_ID)
                        && parentId.equals(RepositoryImpl.ROOT_NODE_ID)) {
                    continue; // special case for the /jcr:system node
                }

                // parent is missing from index
                if (parentExists) {
                    errors.add(new MissingAncestor(id, parentId));
                } else {
                    try {
                        final ItemState itemState = stateMgr.getItemState(id);
                        if (parentId.equals(itemState.getParentId())) {
                            // orphaned node
                            errors.add(new UnknownParent(id, parentId));
                        } else {
                            errors.add(new WrongParent(id, parentId, itemState.getParentId()));
                        }
                    } catch (ItemStateException ignored) {
                    }
                }
            }
        } finally {
            reader.release();
        }

    }

    private void checkIndexCompleteness() {
        log.info("Checking index completeness");
        int i = 0;
        int size = nodeIds.size();
        for (Map.Entry<NodeId, Boolean> entry : nodeIds.entrySet()) {
            // check whether all nodes in the repository are indexed
            NodeId nodeId = entry.getKey();
            boolean indexed = entry.getValue();
            try {
                if (++i > 10 && i % (size / 10) == 0) {
                    long progress = Math.round((100.0 * (float) i) / (float) size);
                    log.info("progress: " + progress + "%");
                }
                if (!indexed && !isIgnored(nodeId) && !isExcluded(nodeId)) {
                    NodeState nodeState = getNodeState(nodeId);
                    if (nodeState != null && !isBrokenNode(nodeId, nodeState)) {
                        errors.add(new NodeAdded(nodeId));
                    }
                }
            } catch (ItemStateException e) {
                log.error("Failed to check node: " + nodeId, e);
            }
        }
    }

    private boolean isExcluded(NodeId id) {
        try {
            final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager();
            final Path path = hierarchyManager.getPath(id);
            for (Path excludedPath : excludedPaths) {
                if (excludedPath.isEquivalentTo(path) || excludedPath.isAncestorOf(path)) {
                    return true;
                }
            }
        } catch (RepositoryException ignored) {
        }
        return false;
    }

    private boolean isIgnored(NodeId id) {
        try {
            final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager();
            final Path path = hierarchyManager.getPath(id);
            for (Path excludedPath : ignoredPaths) {
                if (excludedPath.isEquivalentTo(path) || excludedPath.isAncestorOf(path)) {
                    return true;
                }
            }
        } catch (RepositoryException ignored) {
        }
        return false;
    }

    private NodeState getNodeState(NodeId nodeId) throws ItemStateException {
        try {
            return (NodeState) stateMgr.getItemState(nodeId);
        } catch (NoSuchItemStateException e) {
            return null;
        }
    }

    private boolean isBrokenNode(final NodeId nodeId, final NodeState nodeState) throws ItemStateException {
        final NodeId parentId = nodeState.getParentId();
        if (parentId != null) {
            final NodeState parentState = getNodeState(parentId);
            if (parentState == null) {
                log.warn("Node missing from index is orphaned node: " + nodeId);
                return true;
            }
            if (!parentState.hasChildNodeEntry(nodeId)) {
                log.warn("Node missing from index is abandoned node: " + nodeId);
                return true;
            }
        }
        return false;
    }

    /**
     * Returns the path for <code>node</code>. If an error occurs this method
     * returns the uuid of the node.
     *
     * @param node the node to retrieve the path from
     * @return the path of the node or its uuid.
     */
    private String getPath(NodeState node) {
        // remember as fallback
        String uuid = node.getNodeId().toString();
        StringBuilder path = new StringBuilder();
        List<ChildNodeEntry> elements = new ArrayList<ChildNodeEntry>();
        try {
            while (node.getParentId() != null) {
                NodeId parentId = node.getParentId();
                NodeState parent = (NodeState) stateMgr.getItemState(parentId);
                ChildNodeEntry entry = parent.getChildNodeEntry(node.getNodeId());
                if (entry == null) {
                    log.warn("Failed to build path: abandoned child {} of node {}. " +
                            "Please run a repository consistency check", node.getNodeId(), parentId);
                    return uuid;
                }
                elements.add(entry);
                node = parent;
            }
            for (int i = elements.size() - 1; i > -1; i--) {
                ChildNodeEntry entry = elements.get(i);
                path.append('/').append(entry.getName().getLocalName());
                if (entry.getIndex() > 1) {
                    path.append('[').append(entry.getIndex()).append(']');
                }
            }
            if (path.length() == 0) {
                path.append('/');
            }
            return path.toString();
        } catch (ItemStateException e) {
            return uuid;
        }
    }

    //-------------------< ConsistencyCheckError classes >----------------------

    /**
     * One or more ancestors of an indexed node are not available in the index.
     */
    private class MissingAncestor extends ConsistencyCheckError {

        private final NodeId parentId;

        private MissingAncestor(NodeId id, NodeId parentId) {
            super("Parent of " + id + " missing in index. Parent: " + parentId, id);
            this.parentId = parentId;
        }

        /**
         * Returns <code>true</code>.
         * @return <code>true</code>.
         */
        public boolean repairable() {
            return true;
        }

        /**
         * Repairs the missing node by indexing the missing ancestors.
         * @throws Exception if an error occurs while repairing.
         */
        public void repair() throws Exception {
            NodeId ancestorId = parentId;
            while (ancestorId != null && nodeIds.containsKey(ancestorId) && nodeIds.get(ancestorId)) {
                NodeState n = (NodeState) stateMgr.getItemState(ancestorId);
                log.info("Repairing missing node " + getPath(n) + " (" + ancestorId + ")");
                Document d = index.createDocument(n);
                index.addDocument(d);
                nodeIds.put(n.getNodeId(), Boolean.TRUE);
                ancestorId = n.getParentId();
            }
        }

        @Override
        boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager)
                throws RepositoryException, IOException {
            final List<Document> documents = handler.getNodeDocuments(id);
            for (Document document : documents) {
                final String parent = document.get(FieldNames.PARENT);
                if (parent != null && !parent.isEmpty()) {
                    final NodeId parentId = new NodeId(parent);
                    if (handler.getNodeDocuments(parentId).isEmpty()) {
                        return true;
                    }
                }
            }
            return false;

        }
    }

    /**
     * The parent of a node is not in the repository
     */
    private static class UnknownParent extends ConsistencyCheckError {

        private NodeId parentId;

        private UnknownParent(NodeId id, NodeId parentId) {
            super("Node " + id + " has unknown parent: " + parentId, id);
            this.parentId = parentId;
        }

        /**
         * Not reparable (yet).
         * @return <code>false</code>.
         */
        public boolean repairable() {
            return false;
        }

        /**
         * No operation.
         */
        public void repair() {
            log.warn("Unknown parent for " + id + " cannot be repaired");
        }

        @Override
        boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager)
                throws IOException, RepositoryException {
            final List<Document> documents = handler.getNodeDocuments(id);
            for (Document document : documents) {
                final String parent = document.get(FieldNames.PARENT);
                if (parent != null && !parent.isEmpty()) {
                    final NodeId parentId = new NodeId(parent);
                    if (parentId.equals(this.parentId) && !stateManager.hasItemState(parentId)) {
                        return true;
                    }
                }
            }
            return false;
        }
    }

    /**
     * The parent as indexed does not correspond with the actual parent in the repository
     */
    private class WrongParent extends ConsistencyCheckError {

        private NodeId indexedParentId;

        private WrongParent(NodeId id, NodeId indexedParentId, NodeId actualParentId) {
            super("Node " + id + " has wrong parent: " + indexedParentId + ", should be : " + actualParentId, id);
            this.indexedParentId = indexedParentId;
        }

        @Override
        public boolean repairable() {
            return true;
        }

        /**
         * Reindex node.
         */
        @Override
        void repair() throws Exception {
            index.removeAllDocuments(id);
            try {
                NodeState node = (NodeState) stateMgr.getItemState(id);
                log.info("Re-indexing node with wrong parent in index: " + getPath(node));
                Document d = index.createDocument(node);
                index.addDocument(d);
                nodeIds.put(node.getNodeId(), Boolean.TRUE);
            } catch (NoSuchItemStateException e) {
                log.info("Not re-indexing node with wrong parent because node no longer exists");
            }
        }

        @Override
        boolean doubleCheck(final SearchIndex handler, final ItemStateManager stateManager)
                throws RepositoryException, IOException {
            final List<Document> documents = handler.getNodeDocuments(id);
            for (Document document : documents) {
                final String parent = document.get(FieldNames.PARENT);
                if (parent != null && !parent.isEmpty()) {
                    final NodeId parentId = new NodeId(parent);
                    if (parentId.equals(indexedParentId) && !stateManager.hasItemState(parentId)) {
                        return true;
                    }
                }
            }
            return false;
        }

    }

    /**
     * A node is present multiple times in the index.
     */
    private class MultipleEntries extends ConsistencyCheckError {

        MultipleEntries(NodeId id) {
            super("Multiple entries found for node " + id, id);
        }

        /**
         * Returns <code>true</code>.
         * @return <code>true</code>.
         */
        public boolean repairable() {
            return true;
        }

        /**
         * Removes the nodes with the identical uuids from the index and
         * re-index the node.
         * @throws IOException if an error occurs while repairing.
         */
        public void repair() throws Exception {
            // first remove all occurrences
            index.removeAllDocuments(id);
            // then re-index the node
            try {
                NodeState node = (NodeState) stateMgr.getItemState(id);
                log.info("Re-indexing duplicate node occurrences in index: " + getPath(node));
                Document d = index.createDocument(node);
                index.addDocument(d);
                nodeIds.put(node.getNodeId(), Boolean.TRUE);
            } catch (NoSuchItemStateException e) {
                log.info("Not re-indexing node with multiple occurrences because node no longer exists");
            }
        }

        @Override
        boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager)
                throws RepositoryException, IOException {
            return handler.getNodeDocuments(id).size() > 1;
        }
    }

    /**
     * Indicates that a node has been deleted but is still in the index.
     */
    private class NodeDeleted extends ConsistencyCheckError {

        NodeDeleted(NodeId id) {
            super("Node " + id + " no longer exists.", id);
        }

        /**
         * Returns <code>true</code>.
         * @return <code>true</code>.
         */
        public boolean repairable() {
            return true;
        }

        /**
         * Deletes the nodes from the index.
         * @throws IOException if an error occurs while repairing.
         */
        public void repair() throws IOException {
            log.info("Removing deleted node from index: " + id);
            index.removeDocument(id);
        }

        @Override
        boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager)
                throws RepositoryException, IOException {
            final List<Document> documents = handler.getNodeDocuments(id);
            if (!documents.isEmpty()) {
                if (!stateManager.hasItemState(id)) {
                    return true;
                }
            }
            return false;
        }
    }

    private class NodeAdded extends ConsistencyCheckError {

        NodeAdded(final NodeId id) {
            super("Node " + id + " is missing.", id);
        }

        @Override
        public boolean repairable() {
            return true;
        }

        @Override
        void repair() throws Exception {
            try {
                NodeState nodeState = (NodeState) stateMgr.getItemState(id);
                log.info("Adding missing node to index: " + getPath(nodeState));
                final Iterator<NodeId> remove = Collections.<NodeId>emptyList().iterator();
                final Iterator<NodeState> add = Collections.singletonList(nodeState).iterator();
                handler.updateNodes(remove, add);
            } catch (NoSuchItemStateException e) {
                log.info("Not adding missing node because node no longer exists");
            }
        }

        @Override
        boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager)
                throws RepositoryException, IOException {
            final List<Document> documents = handler.getNodeDocuments(id);
            if (documents.isEmpty()) {
                if (stateManager.hasItemState(id)) {
                    return true;
                }
            }
            return false;
        }

    }
}
TOP

Related Classes of org.apache.jackrabbit.core.query.lucene.ConsistencyCheck$MultipleEntries

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.