Source Code of org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditor

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.oak.plugins.index.lucene;


import static org.apache.jackrabbit.JcrConstants.JCR_DATA;
import static org.apache.jackrabbit.oak.commons.PathUtils.concat;
import static org.apache.jackrabbit.oak.commons.PathUtils.getName;
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newFulltextField;
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPathField;
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPropertyField;
import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm;
import static org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.skipTokenization;


import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;


import org.apache.jackrabbit.JcrConstants;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.CommitFailedException;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.plugins.index.IndexEditor;
import org.apache.jackrabbit.oak.plugins.index.IndexUpdateCallback;
import org.apache.jackrabbit.oak.spi.commit.Editor;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.PrefixQuery;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.WriteOutContentHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
 * {@link IndexEditor} implementation that is responsible for keeping the
 * {@link LuceneIndex} up to date
 * 
 * @see LuceneIndex
 */
public class LuceneIndexEditor implements IndexEditor {


    private static final Logger log =
            LoggerFactory.getLogger(LuceneIndexEditor.class);


    private final LuceneIndexEditorContext context;


    /** Name of this node, or {@code null} for the root node. */
    private final String name;


    /** Parent editor or {@code null} if this is the root editor. */
    private final LuceneIndexEditor parent;


    /** Path of this editor, built lazily in {@link #getPath()}. */
    private String path;


    private boolean propertiesChanged = false;


    LuceneIndexEditor(NodeBuilder definition, Analyzer analyzer,
            IndexUpdateCallback updateCallback) throws CommitFailedException {
        this.parent = null;
        this.name = null;
        this.path = "/";
        this.context = new LuceneIndexEditorContext(definition, analyzer,
                updateCallback);
    }


    private LuceneIndexEditor(LuceneIndexEditor parent, String name) {
        this.parent = parent;
        this.name = name;
        this.path = null;
        this.context = parent.context;
    }


    public String getPath() {
        if (path == null) { // => parent != null
            path = concat(parent.getPath(), name);
        }
        return path;
    }


    @Override
    public void enter(NodeState before, NodeState after)
            throws CommitFailedException {
    }


    @Override
    public void leave(NodeState before, NodeState after)
            throws CommitFailedException {
        if (propertiesChanged || !before.exists()) {
            String path = getPath();
            if (addOrUpdate(path, after, before.exists())) {
                long indexed = context.incIndexedNodes();
                if (indexed % 1000 == 0) {
                    log.debug("Indexed {} nodes...", indexed);
                }
            }
        }


        if (parent == null) {
            try {
                context.closeWriter();
            } catch (IOException e) {
                throw new CommitFailedException("Lucene", 4,
                        "Failed to close the Lucene index", e);
            }
            if (context.getIndexedNodes() > 0) {
                log.debug("Indexed {} nodes, done.", context.getIndexedNodes());
            }
        }
    }


    @Override
    public void propertyAdded(PropertyState after) {
        propertiesChanged = true;
    }


    @Override
    public void propertyChanged(PropertyState before, PropertyState after) {
        propertiesChanged = true;
    }


    @Override
    public void propertyDeleted(PropertyState before) {
        propertiesChanged = true;
    }


    @Override
    public Editor childNodeAdded(String name, NodeState after) {
        return new LuceneIndexEditor(this, name);
    }


    @Override
    public Editor childNodeChanged(
            String name, NodeState before, NodeState after) {
        return new LuceneIndexEditor(this, name);
    }


    @Override
    public Editor childNodeDeleted(String name, NodeState before)
            throws CommitFailedException {
        String path = concat(getPath(), name);


        try {
            IndexWriter writer = context.getWriter();
            // Remove all index entries in the removed subtree
            writer.deleteDocuments(newPathTerm(path));
            writer.deleteDocuments(new PrefixQuery(newPathTerm(path + "/")));
            this.context.indexUpdate();
        } catch (IOException e) {
            throw new CommitFailedException(
                    "Lucene", 5, "Failed to remove the index entries of"
                    + " the removed subtree " + path, e);
        }


        return null; // no need to recurse down the removed subtree
    }


    private boolean addOrUpdate(String path, NodeState state, boolean isUpdate)
            throws CommitFailedException {
        try {
            Document d = makeDocument(path, state, isUpdate);
            if (d != null) {
                context.getWriter().updateDocument(newPathTerm(path), d);
                return true;
            }
        } catch (IOException e) {
            throw new CommitFailedException("Lucene", 3,
                    "Failed to index the node " + path, e);
        }
        return false;
    }


    private Document makeDocument(String path, NodeState state, boolean isUpdate) throws CommitFailedException {
        List<Field> fields = new ArrayList<Field>();
        boolean dirty = false;
        for (PropertyState property : state.getProperties()) {
            String pname = property.getName();
            if (isVisible(pname)
                    && (context.getPropertyTypes() & (1 << property.getType()
                            .tag())) != 0 && context.includeProperty(pname)) {
                if (Type.BINARY.tag() == property.getType().tag()) {
                    this.context.indexUpdate();
                    fields.addAll(newBinary(property, state));
                    dirty = true;
                } else {
                    for (String value : property.getValue(Type.STRINGS)) {
                        this.context.indexUpdate();
                        fields.add(newPropertyField(pname, value,
                                !skipTokenization(pname),
                                context.isStored(pname)));
                        fields.add(newFulltextField(value));
                        dirty = true;
                    }
                }
            }
        }


        if (isUpdate && !dirty) {
            // updated the state but had no relevant changes
            return null;
        }
        Document document = new Document();
        document.add(newPathField(path));
        String name = getName(path);
        if (name != null) {
            document.add(newFulltextField(name));
        }
        for (Field f : fields) {
            document.add(f);
        }
        return document;
    }


    private static boolean isVisible(String name) {
        return name.charAt(0) != ':';
    }


    private List<Field> newBinary(
            PropertyState property, NodeState state) {
        List<Field> fields = new ArrayList<Field>();
        Metadata metadata = new Metadata();
        if (JCR_DATA.equals(property.getName())) {
            String type = state.getString(JcrConstants.JCR_MIMETYPE);
            if (type != null) { // not mandatory
                metadata.set(Metadata.CONTENT_TYPE, type);
            }
            String encoding = state.getString(JcrConstants.JCR_ENCODING);
            if (encoding != null) { // not mandatory
                metadata.set(Metadata.CONTENT_ENCODING, encoding);
            }
        }


        for (Blob v : property.getValue(Type.BINARIES)) {
            fields.add(newFulltextField(parseStringValue(v, metadata)));
        }
        return fields;
    }


    private String parseStringValue(Blob v, Metadata metadata) {
        WriteOutContentHandler handler = new WriteOutContentHandler();
        try {
            InputStream stream = v.getNewStream();
            try {
                context.getParser().parse(stream, handler, metadata, new ParseContext());
            } finally {
                stream.close();
            }
        } catch (LinkageError e) {
            // Capture and ignore errors caused by extraction libraries
            // not being present. This is equivalent to disabling
            // selected media types in configuration, so we can simply
            // ignore these errors.
        } catch (Throwable t) {
            // Capture and report any other full text extraction problems.
            // The special STOP exception is used for normal termination.
            if (!handler.isWriteLimitReached(t)) {
                log.debug("Failed to extract text from a binary property."
                        + " This is a fairly common case, and nothing to"
                        + " worry about. The stack trace is included to"
                        + " help improve the text extraction feature.", t);
                return "TextExtractionError";
            }
        }
        return handler.toString();
    }


}
Source Code of org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditor

Related Classes of org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditor