Package org.apache.jackrabbit.core.query.lucene

Source Code of org.apache.jackrabbit.core.query.lucene.TextExtractorFilter

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.core.query.lucene;

import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;

import javax.jcr.RepositoryException;

import org.apache.jackrabbit.core.query.TextFilter;
import org.apache.jackrabbit.core.state.PropertyState;
import org.apache.jackrabbit.core.value.BLOBFileValue;
import org.apache.jackrabbit.core.value.InternalValue;
import org.apache.jackrabbit.extractor.TextExtractor;

/**
* Utility base class for migrating functionality from existing implementations
* of the deprecated {@link TextFilter} interface to the new
* {@link TextExtractor} interface. Once the functionality of an existing
* TextFilter has been copied to a new TextExtractor, the original class can
* be replaced with the following template to keep backwards compatibility
* while avoiding the burden of maintaining duplicate code:
* <pre>
* <b>public class</b> SomeTextFilter <b>extends</b> TextExtractorFilter {
*     <b>public</b> SomeTextFilter() {
*         <b>super</b>(<b>new</b> SomeTextExtractor());
*     }
* }
* </pre>
*/
public class TextExtractorFilter implements TextFilter {

    /**
     * The adapted text extractor.
     */
    private final TextExtractor extractor;

    /**
     * Creates a text filter adapter for the given text extractor.
     *
     * @param extractor adapted text extractor
     */
    public TextExtractorFilter(TextExtractor extractor) {
        this.extractor = extractor;
    }

    /**
     * Returns true if the adapted text extractor supports the given
     * content type.
     *
     * @param mimeType content type
     * @return <code>true</code> if the content type is supported,
     *         <code>false</code> otherwise
     */
    public boolean canFilter(String mimeType) {
        mimeType = mimeType.toLowerCase();
        String[] types = extractor.getContentTypes();
        for (int i = 0; i < types.length; i++) {
            if (types[i].equals(mimeType)) {
                return true;
            }
        }
        return false;
    }

    /**
     * Extracts text content of the given binary property using the adapted
     * text extractor.
     *
     * @param data binary property
     * @param encoding character encoding, or <code>null</code>
     * @return map that contains a reader for the extracted text as
     *         the {@link FieldNames#FULLTEXT} entry
     * @throws RepositoryException if the binary property can not be read
     */
    public Map doFilter(PropertyState data, String encoding)
            throws RepositoryException {
        InternalValue[] values = data.getValues();
        if (values.length == 1) {
            try {
                String type = "application/octet-stream";
                String[] types = extractor.getContentTypes();
                if (types.length > 0) {
                    type = types[0];
                }

                BLOBFileValue blob = values[0].getBLOBFileValue();
                Reader reader =
                    extractor.extractText(blob.getStream(), type, encoding);

                Map result = new HashMap();
                result.put(FieldNames.FULLTEXT, reader);
                return result;
            } catch (IOException e) {
                throw new RepositoryException("Text extraction error", e);
            }
        } else {
            // multi value not supported
            throw new RepositoryException(
                    "Multi-valued binary properties not supported.");
        }
    }

}
TOP

Related Classes of org.apache.jackrabbit.core.query.lucene.TextExtractorFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.