/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.core.query.lucene;
import org.apache.commons.io.IOExceptionWithCause;
import org.apache.jackrabbit.core.HierarchyManager;
import org.apache.jackrabbit.core.RepositoryImpl;
import org.apache.jackrabbit.core.cluster.ClusterException;
import org.apache.jackrabbit.core.cluster.ClusterNode;
import org.apache.jackrabbit.core.persistence.IterablePersistenceManager;
import org.apache.jackrabbit.core.persistence.PersistenceManager;
import org.apache.jackrabbit.core.state.ItemState;
import org.apache.jackrabbit.core.state.ItemStateManager;
import org.apache.jackrabbit.core.state.NoSuchItemStateException;
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.core.state.ItemStateException;
import org.apache.jackrabbit.core.state.ChildNodeEntry;
import org.apache.jackrabbit.core.id.NodeId;
import org.apache.jackrabbit.spi.Path;
import org.apache.jackrabbit.spi.commons.conversion.MalformedPathException;
import org.apache.jackrabbit.spi.commons.name.NameConstants;
import org.apache.jackrabbit.spi.commons.name.PathBuilder;
import org.apache.lucene.document.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.jcr.ItemNotFoundException;
import javax.jcr.RepositoryException;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.Set;
import java.util.HashSet;
/**
* Implements a consistency check on the search index. Currently the following
* checks are implemented:
* <ul>
* <li>Does the node exist in the ItemStateManager? If it does not exist
* anymore the node is deleted from the index.</li>
* <li>Is the parent of a node also present in the index? If it is not present it
* will be indexed.</li>
* <li>Is a node indexed multiple times? If that is the case, all occurrences
* in the index for such a node are removed, and the node is re-indexed.</li>
* <li>Is a node missing from the index? If so, it is added.</li>
* </ul>
*/
public class ConsistencyCheck {
/**
* Logger instance for this class
*/
private static final Logger log = LoggerFactory.getLogger(ConsistencyCheck.class);
/**
* The number of nodes to fetch at once from the persistence manager. Defaults to 8kb
*/
private static final int NODESATONCE = Integer.getInteger("org.apache.jackrabbit.checker.nodesatonce", 1024 * 8);
private final SearchIndex handler;
/**
* The ItemStateManager of the workspace.
*/
private final ItemStateManager stateMgr;
/**
* The PersistenceManager of the workspace.
*/
private IterablePersistenceManager pm;
/**
* The index to check.
*/
private final MultiIndex index;
/**
* All the node ids and whether they were found in the index.
*/
private Map<NodeId, Boolean> nodeIds;
/**
* Paths of nodes that are not be indexed
*/
private Set<Path> excludedPaths;
/**
* Paths of nodes that will be excluded from consistency check
*/
private final Set<Path> ignoredPaths = new HashSet<Path>();
/**
* List of all errors.
*/
private final List<ConsistencyCheckError> errors =
new ArrayList<ConsistencyCheckError>();
/**
* Private constructor.
*/
private ConsistencyCheck(MultiIndex index, SearchIndex handler, Set<NodeId> excludedIds) {
this.index = index;
this.handler = handler;
final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager();
excludedPaths = new HashSet<Path>(excludedIds.size());
for (NodeId excludedId : excludedIds) {
try {
final Path path = hierarchyManager.getPath(excludedId);
excludedPaths.add(path);
} catch (ItemNotFoundException e) {
log.warn("Excluded node does not exist");
} catch (RepositoryException e) {
log.error("Failed to get excluded path", e);
}
}
//JCR-3773: ignore the tree jcr:nodeTypes
PathBuilder pathBuilder = new PathBuilder();
pathBuilder.addRoot();
pathBuilder.addLast(NameConstants.JCR_NODETYPES);
try {
Path path = pathBuilder.getPath();
log.info("consistency check will skip " + path);
ignoredPaths.add(path);
} catch (MalformedPathException e) {
//will never happen
log.error("Malformed path", e);
}
this.stateMgr = handler.getContext().getItemStateManager();
final PersistenceManager pm = handler.getContext().getPersistenceManager();
if (pm instanceof IterablePersistenceManager) {
this.pm = (IterablePersistenceManager) pm;
}
}
/**
* Runs the consistency check on <code>index</code>.
*
*
*
* @param index the index to check.
* @param handler the QueryHandler to use.
* @param excludedIds the set of node ids that are not indexed
* @return the consistency check with the results.
* @throws IOException if an error occurs while checking.
*/
static ConsistencyCheck run(MultiIndex index, SearchIndex handler, final Set<NodeId> excludedIds)
throws IOException {
ConsistencyCheck check = new ConsistencyCheck(index, handler, excludedIds);
check.run();
return check;
}
/**
* Repairs detected errors during the consistency check.
* @param ignoreFailure if <code>true</code> repair failures are ignored,
* the repair continues without throwing an exception. If
* <code>false</code> the repair procedure is aborted on the first
* repair failure.
* @throws IOException if a repair failure occurs.
*/
public void repair(boolean ignoreFailure) throws IOException {
if (errors.size() == 0) {
log.info("No errors found.");
return;
}
int notRepairable = 0;
for (ConsistencyCheckError error : errors) {
try {
if (error.repairable()) {
error.repair();
} else {
log.warn("Not repairable: " + error);
notRepairable++;
}
} catch (Exception e) {
if (ignoreFailure) {
log.warn("Exception while repairing: " + error, e);
} else if (e instanceof IOException) {
throw (IOException) e;
} else {
throw new IOExceptionWithCause(e);
}
}
}
log.info("Repaired " + (errors.size() - notRepairable) + " errors.");
if (notRepairable > 0) {
log.warn("" + notRepairable + " error(s) not repairable.");
}
}
/**
* Returns the errors detected by the consistency check.
* @return the errors detected by the consistency check.
*/
public List<ConsistencyCheckError> getErrors() {
return new ArrayList<ConsistencyCheckError>(errors);
}
/**
* Runs the consistency check.
* @throws IOException if an error occurs while running the check.
*/
private void run() throws IOException {
log.info("Checking index of workspace " + handler.getContext().getWorkspace());
loadNodes();
if (nodeIds != null) {
checkIndexConsistency();
checkIndexCompleteness();
}
}
public void doubleCheckErrors() {
if (!errors.isEmpty()) {
log.info("Double checking errors");
final ClusterNode clusterNode = handler.getContext().getClusterNode();
if (clusterNode != null) {
try {
clusterNode.sync();
} catch (ClusterException e) {
log.error("Could not sync cluster node for double checking errors");
}
}
final Iterator<ConsistencyCheckError> iterator = errors.iterator();
while (iterator.hasNext()) {
try {
final ConsistencyCheckError error = iterator.next();
if (!error.doubleCheck(handler, stateMgr)) {
log.info("False positive: " + error.toString());
iterator.remove();
}
} catch (RepositoryException e) {
log.error("Failed to double check consistency error", e);
} catch (IOException e) {
log.error("Failed to double check consistency error", e);
}
}
}
}
private void loadNodes() {
log.info("Loading nodes");
try {
int count = 0;
Map<NodeId, Boolean> nodeIds = new HashMap<NodeId, Boolean>();
List<NodeId> batch = pm.getAllNodeIds(null, NODESATONCE);
NodeId lastId = null;
while (!batch.isEmpty()) {
for (NodeId nodeId : batch) {
lastId = nodeId;
count++;
if (count % 1000 == 0) {
log.info(pm + ": loaded " + count + " node ids...");
}
nodeIds.put(nodeId, Boolean.FALSE);
}
batch = pm.getAllNodeIds(lastId, NODESATONCE);
}
if (pm.exists(lastId)) {
this.nodeIds = nodeIds;
} else {
log.info("Failed to read all nodes, starting over");
loadNodes();
}
} catch (ItemStateException e) {
log.error("Exception while loading items to check", e);
} catch (RepositoryException e) {
log.error("Exception while loading items to check", e);
}
}
private void checkIndexConsistency() throws IOException {
log.info("Checking index consistency");
// Ids of multiple nodes in the index
Set<NodeId> multipleEntries = new HashSet<NodeId>();
CachingMultiIndexReader reader = index.getIndexReader();
try {
for (int i = 0; i < reader.maxDoc(); i++) {
if (i > 10 && i % (reader.maxDoc() / 5) == 0) {
long progress = Math.round((100.0 * (float) i) / ((float) reader.maxDoc() * 2f));
log.info("progress: " + progress + "%");
}
if (reader.isDeleted(i)) {
continue;
}
Document d = reader.document(i, FieldSelectors.UUID);
NodeId id = new NodeId(d.get(FieldNames.UUID));
if (!isIgnored(id)) {
boolean nodeExists = nodeIds.containsKey(id);
if (nodeExists) {
Boolean alreadyIndexed = nodeIds.put(id, Boolean.TRUE);
if (alreadyIndexed) {
multipleEntries.add(id);
}
} else {
errors.add(new NodeDeleted(id));
}
}
}
} finally {
reader.release();
}
// create multiple entries errors
for (NodeId id : multipleEntries) {
errors.add(new MultipleEntries(id));
}
reader = index.getIndexReader();
try {
// run through documents again and check parent
for (int i = 0; i < reader.maxDoc(); i++) {
if (i > 10 && i % (reader.maxDoc() / 5) == 0) {
long progress = Math.round((100.0 * (float) i) / ((float) reader.maxDoc() * 2f));
log.info("progress: " + (progress + 50) + "%");
}
if (reader.isDeleted(i)) {
continue;
}
Document d = reader.document(i, FieldSelectors.UUID_AND_PARENT);
NodeId id = new NodeId(d.get(FieldNames.UUID));
if (!nodeIds.containsKey(id) || isIgnored(id)) {
// this node is ignored or was already marked for deletion
continue;
}
String parent = d.get(FieldNames.PARENT);
if (parent == null || parent.isEmpty()) {
continue;
}
final NodeId parentId = new NodeId(parent);
boolean parentExists = nodeIds.containsKey(parentId);
boolean parentIndexed = parentExists && nodeIds.get(parentId);
if (parentIndexed) {
continue;
} else if (id.equals(RepositoryImpl.SYSTEM_ROOT_NODE_ID)
&& parentId.equals(RepositoryImpl.ROOT_NODE_ID)) {
continue; // special case for the /jcr:system node
}
// parent is missing from index
if (parentExists) {
errors.add(new MissingAncestor(id, parentId));
} else {
try {
final ItemState itemState = stateMgr.getItemState(id);
if (parentId.equals(itemState.getParentId())) {
// orphaned node
errors.add(new UnknownParent(id, parentId));
} else {
errors.add(new WrongParent(id, parentId, itemState.getParentId()));
}
} catch (ItemStateException ignored) {
}
}
}
} finally {
reader.release();
}
}
private void checkIndexCompleteness() {
log.info("Checking index completeness");
int i = 0;
int size = nodeIds.size();
for (Map.Entry<NodeId, Boolean> entry : nodeIds.entrySet()) {
// check whether all nodes in the repository are indexed
NodeId nodeId = entry.getKey();
boolean indexed = entry.getValue();
try {
if (++i > 10 && i % (size / 10) == 0) {
long progress = Math.round((100.0 * (float) i) / (float) size);
log.info("progress: " + progress + "%");
}
if (!indexed && !isIgnored(nodeId) && !isExcluded(nodeId)) {
NodeState nodeState = getNodeState(nodeId);
if (nodeState != null && !isBrokenNode(nodeId, nodeState)) {
errors.add(new NodeAdded(nodeId));
}
}
} catch (ItemStateException e) {
log.error("Failed to check node: " + nodeId, e);
}
}
}
private boolean isExcluded(NodeId id) {
try {
final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager();
final Path path = hierarchyManager.getPath(id);
for (Path excludedPath : excludedPaths) {
if (excludedPath.isEquivalentTo(path) || excludedPath.isAncestorOf(path)) {
return true;
}
}
} catch (RepositoryException ignored) {
}
return false;
}
private boolean isIgnored(NodeId id) {
try {
final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager();
final Path path = hierarchyManager.getPath(id);
for (Path excludedPath : ignoredPaths) {
if (excludedPath.isEquivalentTo(path) || excludedPath.isAncestorOf(path)) {
return true;
}
}
} catch (RepositoryException ignored) {
}
return false;
}
private NodeState getNodeState(NodeId nodeId) throws ItemStateException {
try {
return (NodeState) stateMgr.getItemState(nodeId);
} catch (NoSuchItemStateException e) {
return null;
}
}
private boolean isBrokenNode(final NodeId nodeId, final NodeState nodeState) throws ItemStateException {
final NodeId parentId = nodeState.getParentId();
if (parentId != null) {
final NodeState parentState = getNodeState(parentId);
if (parentState == null) {
log.warn("Node missing from index is orphaned node: " + nodeId);
return true;
}
if (!parentState.hasChildNodeEntry(nodeId)) {
log.warn("Node missing from index is abandoned node: " + nodeId);
return true;
}
}
return false;
}
/**
* Returns the path for <code>node</code>. If an error occurs this method
* returns the uuid of the node.
*
* @param node the node to retrieve the path from
* @return the path of the node or its uuid.
*/
private String getPath(NodeState node) {
// remember as fallback
String uuid = node.getNodeId().toString();
StringBuilder path = new StringBuilder();
List<ChildNodeEntry> elements = new ArrayList<ChildNodeEntry>();
try {
while (node.getParentId() != null) {
NodeId parentId = node.getParentId();
NodeState parent = (NodeState) stateMgr.getItemState(parentId);
ChildNodeEntry entry = parent.getChildNodeEntry(node.getNodeId());
if (entry == null) {
log.warn("Failed to build path: abandoned child {} of node {}. " +
"Please run a repository consistency check", node.getNodeId(), parentId);
return uuid;
}
elements.add(entry);
node = parent;
}
for (int i = elements.size() - 1; i > -1; i--) {
ChildNodeEntry entry = elements.get(i);
path.append('/').append(entry.getName().getLocalName());
if (entry.getIndex() > 1) {
path.append('[').append(entry.getIndex()).append(']');
}
}
if (path.length() == 0) {
path.append('/');
}
return path.toString();
} catch (ItemStateException e) {
return uuid;
}
}
//-------------------< ConsistencyCheckError classes >----------------------
/**
* One or more ancestors of an indexed node are not available in the index.
*/
private class MissingAncestor extends ConsistencyCheckError {
private final NodeId parentId;
private MissingAncestor(NodeId id, NodeId parentId) {
super("Parent of " + id + " missing in index. Parent: " + parentId, id);
this.parentId = parentId;
}
/**
* Returns <code>true</code>.
* @return <code>true</code>.
*/
public boolean repairable() {
return true;
}
/**
* Repairs the missing node by indexing the missing ancestors.
* @throws Exception if an error occurs while repairing.
*/
public void repair() throws Exception {
NodeId ancestorId = parentId;
while (ancestorId != null && nodeIds.containsKey(ancestorId) && nodeIds.get(ancestorId)) {
NodeState n = (NodeState) stateMgr.getItemState(ancestorId);
log.info("Repairing missing node " + getPath(n) + " (" + ancestorId + ")");
Document d = index.createDocument(n);
index.addDocument(d);
nodeIds.put(n.getNodeId(), Boolean.TRUE);
ancestorId = n.getParentId();
}
}
@Override
boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager)
throws RepositoryException, IOException {
final List<Document> documents = handler.getNodeDocuments(id);
for (Document document : documents) {
final String parent = document.get(FieldNames.PARENT);
if (parent != null && !parent.isEmpty()) {
final NodeId parentId = new NodeId(parent);
if (handler.getNodeDocuments(parentId).isEmpty()) {
return true;
}
}
}
return false;
}
}
/**
* The parent of a node is not in the repository
*/
private static class UnknownParent extends ConsistencyCheckError {
private NodeId parentId;
private UnknownParent(NodeId id, NodeId parentId) {
super("Node " + id + " has unknown parent: " + parentId, id);
this.parentId = parentId;
}
/**
* Not reparable (yet).
* @return <code>false</code>.
*/
public boolean repairable() {
return false;
}
/**
* No operation.
*/
public void repair() {
log.warn("Unknown parent for " + id + " cannot be repaired");
}
@Override
boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager)
throws IOException, RepositoryException {
final List<Document> documents = handler.getNodeDocuments(id);
for (Document document : documents) {
final String parent = document.get(FieldNames.PARENT);
if (parent != null && !parent.isEmpty()) {
final NodeId parentId = new NodeId(parent);
if (parentId.equals(this.parentId) && !stateManager.hasItemState(parentId)) {
return true;
}
}
}
return false;
}
}
/**
* The parent as indexed does not correspond with the actual parent in the repository
*/
private class WrongParent extends ConsistencyCheckError {
private NodeId indexedParentId;
private WrongParent(NodeId id, NodeId indexedParentId, NodeId actualParentId) {
super("Node " + id + " has wrong parent: " + indexedParentId + ", should be : " + actualParentId, id);
this.indexedParentId = indexedParentId;
}
@Override
public boolean repairable() {
return true;
}
/**
* Reindex node.
*/
@Override
void repair() throws Exception {
index.removeAllDocuments(id);
try {
NodeState node = (NodeState) stateMgr.getItemState(id);
log.info("Re-indexing node with wrong parent in index: " + getPath(node));
Document d = index.createDocument(node);
index.addDocument(d);
nodeIds.put(node.getNodeId(), Boolean.TRUE);
} catch (NoSuchItemStateException e) {
log.info("Not re-indexing node with wrong parent because node no longer exists");
}
}
@Override
boolean doubleCheck(final SearchIndex handler, final ItemStateManager stateManager)
throws RepositoryException, IOException {
final List<Document> documents = handler.getNodeDocuments(id);
for (Document document : documents) {
final String parent = document.get(FieldNames.PARENT);
if (parent != null && !parent.isEmpty()) {
final NodeId parentId = new NodeId(parent);
if (parentId.equals(indexedParentId) && !stateManager.hasItemState(parentId)) {
return true;
}
}
}
return false;
}
}
/**
* A node is present multiple times in the index.
*/
private class MultipleEntries extends ConsistencyCheckError {
MultipleEntries(NodeId id) {
super("Multiple entries found for node " + id, id);
}
/**
* Returns <code>true</code>.
* @return <code>true</code>.
*/
public boolean repairable() {
return true;
}
/**
* Removes the nodes with the identical uuids from the index and
* re-index the node.
* @throws IOException if an error occurs while repairing.
*/
public void repair() throws Exception {
// first remove all occurrences
index.removeAllDocuments(id);
// then re-index the node
try {
NodeState node = (NodeState) stateMgr.getItemState(id);
log.info("Re-indexing duplicate node occurrences in index: " + getPath(node));
Document d = index.createDocument(node);
index.addDocument(d);
nodeIds.put(node.getNodeId(), Boolean.TRUE);
} catch (NoSuchItemStateException e) {
log.info("Not re-indexing node with multiple occurrences because node no longer exists");
}
}
@Override
boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager)
throws RepositoryException, IOException {
return handler.getNodeDocuments(id).size() > 1;
}
}
/**
* Indicates that a node has been deleted but is still in the index.
*/
private class NodeDeleted extends ConsistencyCheckError {
NodeDeleted(NodeId id) {
super("Node " + id + " no longer exists.", id);
}
/**
* Returns <code>true</code>.
* @return <code>true</code>.
*/
public boolean repairable() {
return true;
}
/**
* Deletes the nodes from the index.
* @throws IOException if an error occurs while repairing.
*/
public void repair() throws IOException {
log.info("Removing deleted node from index: " + id);
index.removeDocument(id);
}
@Override
boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager)
throws RepositoryException, IOException {
final List<Document> documents = handler.getNodeDocuments(id);
if (!documents.isEmpty()) {
if (!stateManager.hasItemState(id)) {
return true;
}
}
return false;
}
}
private class NodeAdded extends ConsistencyCheckError {
NodeAdded(final NodeId id) {
super("Node " + id + " is missing.", id);
}
@Override
public boolean repairable() {
return true;
}
@Override
void repair() throws Exception {
try {
NodeState nodeState = (NodeState) stateMgr.getItemState(id);
log.info("Adding missing node to index: " + getPath(nodeState));
final Iterator<NodeId> remove = Collections.<NodeId>emptyList().iterator();
final Iterator<NodeState> add = Collections.singletonList(nodeState).iterator();
handler.updateNodes(remove, add);
} catch (NoSuchItemStateException e) {
log.info("Not adding missing node because node no longer exists");
}
}
@Override
boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager)
throws RepositoryException, IOException {
final List<Document> documents = handler.getNodeDocuments(id);
if (documents.isEmpty()) {
if (stateManager.hasItemState(id)) {
return true;
}
}
return false;
}
}
}