Package org.elasticsearch.common.lucene.search

Source Code of org.elasticsearch.common.lucene.search.XBooleanFilter

package org.elasticsearch.common.lucene.search;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.FilterClause;
import org.apache.lucene.search.BitsFilteredDocIdSet;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.common.lucene.docset.AllDocIdSet;
import org.elasticsearch.common.lucene.docset.AndDocIdSet;
import org.elasticsearch.common.lucene.docset.DocIdSets;
import org.elasticsearch.common.lucene.docset.NotDocIdSet;
import org.elasticsearch.common.lucene.docset.OrDocIdSet.OrBits;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;

/**
* Similar to {@link org.apache.lucene.queries.BooleanFilter}.
* <p/>
* Our own variance mainly differs by the fact that we pass the acceptDocs down to the filters
* and don't filter based on them at the end. Our logic is a bit different, and we filter based on that
* at the top level filter chain.
*/
public class XBooleanFilter extends Filter implements Iterable<FilterClause> {

    private static final Comparator<DocIdSetIterator> COST_DESCENDING = new Comparator<DocIdSetIterator>() {
        @Override
        public int compare(DocIdSetIterator o1, DocIdSetIterator o2) {
            return Long.compare(o2.cost(), o1.cost());
        }
    };
    private static final Comparator<DocIdSetIterator> COST_ASCENDING = new Comparator<DocIdSetIterator>() {
        @Override
        public int compare(DocIdSetIterator o1, DocIdSetIterator o2) {
            return Long.compare(o1.cost(), o2.cost());
        }
    };

    final List<FilterClause> clauses = new ArrayList<>();

    /**
     * Returns the a DocIdSetIterator representing the Boolean composition
     * of the filters that have been added.
     */
    @Override
    public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
        final int maxDoc = context.reader().maxDoc();

        // the 0-clauses case is ambiguous because an empty OR filter should return nothing
        // while an empty AND filter should return all docs, so we handle this case explicitely
        if (clauses.isEmpty()) {
            return null;
        }

        // optimize single case...
        if (clauses.size() == 1) {
            FilterClause clause = clauses.get(0);
            DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs);
            if (clause.getOccur() == Occur.MUST_NOT) {
                if (DocIdSets.isEmpty(set)) {
                    return new AllDocIdSet(maxDoc);
                } else {
                    return new NotDocIdSet(set, maxDoc);
                }
            }
            // SHOULD or MUST, just return the set...
            if (DocIdSets.isEmpty(set)) {
                return null;
            }
            return set;
        }

        // We have several clauses, try to organize things to make it easier to process
        List<DocIdSetIterator> shouldIterators = new ArrayList<>();
        List<Bits> shouldBits = new ArrayList<>();
        boolean hasShouldClauses = false;

        List<DocIdSetIterator> requiredIterators = new ArrayList<>();
        List<DocIdSetIterator> excludedIterators = new ArrayList<>();

        List<Bits> requiredBits = new ArrayList<>();
        List<Bits> excludedBits = new ArrayList<>();

        for (FilterClause clause : clauses) {
            DocIdSet set = clause.getFilter().getDocIdSet(context, null);
            DocIdSetIterator it = null;
            Bits bits = null;
            if (DocIdSets.isEmpty(set) == false) {
                it = set.iterator();
                if (it != null) {
                    bits = set.bits();
                }
            }

            switch (clause.getOccur()) {
            case SHOULD:
                hasShouldClauses = true;
                if (it == null) {
                    // continue, but we recorded that there is at least one should clause
                    // so that if all iterators are null we know that nothing matches this
                    // filter since at least one SHOULD clause needs to match
                } else if (bits != null && DocIdSets.isBroken(it)) {
                    shouldBits.add(bits);
                } else {
                    shouldIterators.add(it);
                }
                break;
            case MUST:
                if (it == null) {
                    // no documents matched a clause that is compulsory, then nothing matches at all
                    return null;
                } else if (bits != null && DocIdSets.isBroken(it)) {
                    requiredBits.add(bits);
                } else {
                    requiredIterators.add(it);
                }
                break;
            case MUST_NOT:
                if (it == null) {
                    // ignore
                } else if (bits != null && DocIdSets.isBroken(it)) {
                    excludedBits.add(bits);
                } else {
                    excludedIterators.add(it);
                }
                break;
            default:
                throw new AssertionError();
            }
        }

        // Since BooleanFilter requires that at least one SHOULD clause matches,
        // transform the SHOULD clauses into a MUST clause

        if (hasShouldClauses) {
            if (shouldIterators.isEmpty() && shouldBits.isEmpty()) {
                // we had should clauses, but they all produced empty sets
                // yet BooleanFilter requires that at least one clause matches
                // so it means we do not match anything
                return null;
            } else if (shouldIterators.size() == 1 && shouldBits.isEmpty()) {
                requiredIterators.add(shouldIterators.get(0));
            } else {
                // apply high-cardinality should clauses first
                CollectionUtil.timSort(shouldIterators, COST_DESCENDING);

                BitDocIdSet.Builder shouldBuilder = null;
                for (DocIdSetIterator it : shouldIterators) {
                    if (shouldBuilder == null) {
                        shouldBuilder = new BitDocIdSet.Builder(maxDoc);
                    }
                    shouldBuilder.or(it);
                }

                if (shouldBuilder != null && shouldBits.isEmpty() == false) {
                    // we have both iterators and bits, there is no way to compute
                    // the union efficiently, so we just transform the iterators into
                    // bits
                    // add first since these are fast bits
                    shouldBits.add(0, shouldBuilder.build().bits());
                    shouldBuilder = null;
                }

                if (shouldBuilder == null) {
                    // only bits
                    assert shouldBits.size() >= 1;
                    if (shouldBits.size() == 1) {
                        requiredBits.add(shouldBits.get(0));
                    } else {
                        requiredBits.add(new OrBits(shouldBits.toArray(new Bits[shouldBits.size()])));
                    }
                } else {
                    assert shouldBits.isEmpty();
                    // only iterators, we can add the merged iterator to the list of required iterators
                    requiredIterators.add(shouldBuilder.build().iterator());
                }
            }
        } else {
            assert shouldIterators.isEmpty();
            assert shouldBits.isEmpty();
        }

        // From now on, we don't have to care about SHOULD clauses anymore since we upgraded
        // them to required clauses (if necessary)

        // cheap iterators first to make intersection faster
        CollectionUtil.timSort(requiredIterators, COST_ASCENDING);
        CollectionUtil.timSort(excludedIterators, COST_ASCENDING);

        // Intersect iterators
        BitDocIdSet.Builder res = null;
        for (DocIdSetIterator iterator : requiredIterators) {
            if (res == null) {
                res = new BitDocIdSet.Builder(maxDoc);
                res.or(iterator);
            } else {
                res.and(iterator);
            }
        }
        for (DocIdSetIterator iterator : excludedIterators) {
            if (res == null) {
                res = new BitDocIdSet.Builder(maxDoc, true);
            }
            res.andNot(iterator);
        }

        // Transform the excluded bits into required bits
        if (excludedBits.isEmpty() == false) {
            Bits excluded;
            if (excludedBits.size() == 1) {
                excluded = excludedBits.get(0);
            } else {
                excluded = new OrBits(excludedBits.toArray(new Bits[excludedBits.size()]));
            }
            requiredBits.add(new NotDocIdSet.NotBits(excluded));
        }

        // The only thing left to do is to intersect 'res' with 'requiredBits'

        // the main doc id set that will drive iteration
        DocIdSet main;
        if (res == null) {
            main = new AllDocIdSet(maxDoc);
        } else {
            main = res.build();
        }

        // apply accepted docs and compute the bits to filter with
        // accepted docs are added first since they are fast and will help not computing anything on deleted docs
        if (acceptDocs != null) {
            requiredBits.add(0, acceptDocs);
        }
        // the random-access filter that we will apply to 'main'
        Bits filter;
        if (requiredBits.isEmpty()) {
            filter = null;
        } else if (requiredBits.size() == 1) {
            filter = requiredBits.get(0);
        } else {
            filter = new AndDocIdSet.AndBits(requiredBits.toArray(new Bits[requiredBits.size()]));
        }

        return BitsFilteredDocIdSet.wrap(main, filter);
    }

    /**
     * Adds a new FilterClause to the Boolean Filter container
     *
     * @param filterClause A FilterClause object containing a Filter and an Occur parameter
     */
    public void add(FilterClause filterClause) {
        clauses.add(filterClause);
    }

    public final void add(Filter filter, Occur occur) {
        add(new FilterClause(filter, occur));
    }

    /**
     * Returns the list of clauses
     */
    public List<FilterClause> clauses() {
        return clauses;
    }

    /**
     * Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to
     * make it possible to do:
     * <pre class="prettyprint">for (FilterClause clause : booleanFilter) {}</pre>
     */
    public final Iterator<FilterClause> iterator() {
        return clauses().iterator();
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }

        if ((obj == null) || (obj.getClass() != this.getClass())) {
            return false;
        }

        final XBooleanFilter other = (XBooleanFilter) obj;
        return clauses.equals(other.clauses);
    }

    @Override
    public int hashCode() {
        return 657153718 ^ clauses.hashCode();
    }

    /**
     * Prints a user-readable version of this Filter.
     */
    @Override
    public String toString() {
        final StringBuilder buffer = new StringBuilder("BooleanFilter(");
        final int minLen = buffer.length();
        for (final FilterClause c : clauses) {
            if (buffer.length() > minLen) {
                buffer.append(' ');
            }
            buffer.append(c);
        }
        return buffer.append(')').toString();
    }

    static class ResultClause {

        public final DocIdSet docIdSet;
        public final Bits bits;
        public final FilterClause clause;

        DocIdSetIterator docIdSetIterator;

        ResultClause(DocIdSet docIdSet, Bits bits, FilterClause clause) {
            this.docIdSet = docIdSet;
            this.bits = bits;
            this.clause = clause;
        }

        /**
         * @return An iterator, but caches it for subsequent usage. Don't use if iterator is consumed in one invocation.
         */
        DocIdSetIterator iterator() throws IOException {
            if (docIdSetIterator != null) {
                return docIdSetIterator;
            } else {
                return docIdSetIterator = docIdSet.iterator();
            }
        }

    }

    static boolean iteratorMatch(DocIdSetIterator docIdSetIterator, int target) throws IOException {
        assert docIdSetIterator != null;
        int current = docIdSetIterator.docID();
        if (current == DocIdSetIterator.NO_MORE_DOCS || target < current) {
            return false;
        } else {
            if (current == target) {
                return true;
            } else {
                return docIdSetIterator.advance(target) == target;
            }
        }
    }

}
TOP

Related Classes of org.elasticsearch.common.lucene.search.XBooleanFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.