Source Code of org.apache.jackrabbit.core.query.lucene.TextExtractorFilter

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.core.query.lucene;


import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;


import javax.jcr.RepositoryException;


import org.apache.jackrabbit.core.query.TextFilter;
import org.apache.jackrabbit.core.state.PropertyState;
import org.apache.jackrabbit.core.value.BLOBFileValue;
import org.apache.jackrabbit.core.value.InternalValue;
import org.apache.jackrabbit.extractor.TextExtractor;


/**
 * Utility base class for migrating functionality from existing implementations
 * of the deprecated {@link TextFilter} interface to the new
 * {@link TextExtractor} interface. Once the functionality of an existing
 * TextFilter has been copied to a new TextExtractor, the original class can
 * be replaced with the following template to keep backwards compatibility
 * while avoiding the burden of maintaining duplicate code:
 * <pre>
 * <b>public class</b> SomeTextFilter <b>extends</b> TextExtractorFilter {
 *     <b>public</b> SomeTextFilter() {
 *         <b>super</b>(<b>new</b> SomeTextExtractor());
 *     }
 * }
 * </pre>
 */
public class TextExtractorFilter implements TextFilter {


    /**
     * The adapted text extractor.
     */
    private final TextExtractor extractor;


    /**
     * Creates a text filter adapter for the given text extractor.
     *
     * @param extractor adapted text extractor
     */
    public TextExtractorFilter(TextExtractor extractor) {
        this.extractor = extractor;
    }


    /**
     * Returns true if the adapted text extractor supports the given
     * content type.
     *
     * @param mimeType content type
     * @return <code>true</code> if the content type is supported,
     *         <code>false</code> otherwise
     */
    public boolean canFilter(String mimeType) {
        mimeType = mimeType.toLowerCase();
        String[] types = extractor.getContentTypes();
        for (int i = 0; i < types.length; i++) {
            if (types[i].equals(mimeType)) {
                return true;
            }
        }
        return false;
    }


    /**
     * Extracts text content of the given binary property using the adapted
     * text extractor.
     *
     * @param data binary property
     * @param encoding character encoding, or <code>null</code>
     * @return map that contains a reader for the extracted text as
     *         the {@link FieldNames#FULLTEXT} entry
     * @throws RepositoryException if the binary property can not be read
     */
    public Map doFilter(PropertyState data, String encoding)
            throws RepositoryException {
        InternalValue[] values = data.getValues();
        if (values.length == 1) {
            try {
                String type = "application/octet-stream";
                String[] types = extractor.getContentTypes();
                if (types.length > 0) {
                    type = types[0];
                }


                BLOBFileValue blob = values[0].getBLOBFileValue();
                Reader reader =
                    extractor.extractText(blob.getStream(), type, encoding);


                Map result = new HashMap();
                result.put(FieldNames.FULLTEXT, reader);
                return result;
            } catch (IOException e) {
                throw new RepositoryException("Text extraction error", e);
            }
        } else {
            // multi value not supported
            throw new RepositoryException(
                    "Multi-valued binary properties not supported.");
        }
    }


}
Source Code of org.apache.jackrabbit.core.query.lucene.TextExtractorFilter

Related Classes of org.apache.jackrabbit.core.query.lucene.TextExtractorFilter