Package it.unimi.dsi.mg4j.search

Source Code of it.unimi.dsi.mg4j.search.AndDocumentIterator$AndIndexIntervalIterator

package it.unimi.dsi.mg4j.search;

/*    
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2003-2010 Paolo Boldi and Sebastiano Vigna
*
*  This library is free software; you can redistribute it and/or modify it
*  under the terms of the GNU Lesser General Public License as published by the Free
*  Software Foundation; either version 3 of the License, or (at your option)
*  any later version.
*
*  This library is distributed in the hope that it will be useful, but
*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
*  for more details.
*
*  You should have received a copy of the GNU Lesser General Public License
*  along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/

import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.objects.ObjectArrays;
import it.unimi.dsi.fastutil.objects.ObjectHeapSemiIndirectPriorityQueue;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.util.Interval;
import it.unimi.dsi.util.Intervals;

import java.io.IOException;


/** A document iterator that returns the AND of a number of document iterators.
*
* <P>This class adds to {@link it.unimi.dsi.mg4j.search.AbstractIntersectionDocumentIterator}
* an interval iterator generating the AND of the intervals returned for each of the documents involved.
*/

public class AndDocumentIterator extends AbstractIntersectionDocumentIterator {

  private final static boolean DEBUG = false;
 
  /** Returns a document iterator that computes the AND of the given array of iterators.
   *
   * <P>Note that the special case of the empty and of the singleton arrays
   * are handled efficiently.
   *
    * @param index the default index; relevant only if <code>it</code> has zero length.
   * @param documentIterator the iterators to be joined.
   * @return a document iterator that computes the AND of <code>it</code>.
   * @throws IOException
   */
  public static DocumentIterator getInstance( final Index index, final DocumentIterator... documentIterator ) throws IOException {
    if ( documentIterator.length == 0 ) return TrueDocumentIterator.getInstance( index );
    if ( documentIterator.length == 1 ) return documentIterator[ 0 ];
    return new AndDocumentIterator( documentIterator );
  }

  /** Returns a document iterator that computes the AND of the given nonzero-length array of iterators.
   *
   * <P>Note that the special case of the singleton array is handled efficiently.
   *
   * @param documentIterator the iterators to be joined (at least one).
   * @return a document iterator that computes the AND of <code>it</code>.
   * @throws IOException
   */
  public static DocumentIterator getInstance( final DocumentIterator... documentIterator ) throws IOException {
    if ( documentIterator.length == 0 ) throw new IllegalArgumentException();
    return getInstance( null, documentIterator );
  }

  protected AndDocumentIterator( final DocumentIterator[] documentIterator ) throws IOException {
    super( documentIterator );
  }

  protected IntervalIterator getComposedIntervalIterator( final Index index ) {
    return indexIterator == null ? new AndIntervalIterator( index ) : new AndIndexIntervalIterator( index )
  }

  /** An interval iterator returning the AND (in the Clarke&minus;Cormack&minus;Burkowski lattice) of the component interval iterators. */
 
  private class AndIntervalIterator extends AbstractCompositeIntervalIterator implements IntervalIterator {
    /** The index of this iterator. */
    final private Index index;
    /** A heap-based indirect priority queue used to keep track of the currently scanned intervals. */
    final private ObjectHeapSemiIndirectPriorityQueue<Interval> queue;
    /** Whether the scan is over. */
    private boolean endOfProcess;
    /** The left extreme of the last returned interval, or {@link Integer#MIN_VALUE} after a {@link #reset()}. */
    private int lastLeft;
    /** The maximum right extreme currently in the queue. */
    private int maxRight;

    /** Creates a new AND interval iterator.
     *
     * @param index the index of the iterator.
     */

    public AndIntervalIterator( final Index index ) {
      super( n );
      // We just set up some internal data, but we perform no initialisation.
      this.index = index;
      queue = new ObjectHeapSemiIndirectPriorityQueue<Interval>( curr, Intervals.STARTS_BEFORE_OR_PROLONGS );
    }


    public void reset() throws IOException {
      ObjectArrays.fill( curr, null );
      queue.clear();
      next = null;
      maxRight = Integer.MIN_VALUE;
      lastLeft = Integer.MIN_VALUE;
      endOfProcess = false;
     
      for ( int i = 0; i < n; i++ ) {
        intervalIterator[ i ] = documentIterator[ i ].intervalIterator( index );
        if ( ! intervalIterator[ i ].hasNext() ) {
          // If by any chance we meet an empty (false) iterator we are just false
          endOfProcess = true;
          return;
        }
        // TRUE iterators need a special treatment
        if ( intervalIterator[ i ] != IntervalIterators.TRUE ) {
          curr[ i ] = intervalIterator[ i ].nextInterval();
          queue.enqueue( i );
          maxRight = Math.max( maxRight, curr[ i ].right );
        }
      }

      /* At this point the queue could not be full because of TRUE iterators, but it's OK,
       * because TRUE is the neutral element of conjunction. However, we cannot handle here
       * the case in which all conjuncts are TRUE, as this case must be handled at a higher level. */
      if ( queue.isEmpty() ) throw new IllegalArgumentException();
    }

    public void intervalTerms( final IntSet terms ) {
      for( int i = n; i-- != 0; ) intervalIterator[ i ].intervalTerms( terms );
    }


    public Interval nextInterval() throws IOException {
      if ( next != null ) {
        final Interval result = next;
        next = null;
        return result;
      }

      if ( endOfProcess ) return null;

      int first;
      while ( curr[ first = queue.first() ].left == lastLeft ) {
        if ( ( curr[ first ] = intervalIterator[ first ].nextInterval() ) == null ) {
          endOfProcess = true;
          return null;
        }
        maxRight = Math.max( maxRight, curr[ first ].right );
        queue.changed();
      }

      int nextLeft, nextRight;
     
      do {
        nextLeft = curr[ first = queue.first() ].left;
        nextRight = maxRight;
        if ( DEBUG ) System.err.println( this + " is saving interval " + Interval.valueOf( nextLeft, nextRight ) );

        /* We check whether the current top is equal to the span of the queue, and
         * whether the top interval iterator is exhausted. In both cases, the
         * current span is guaranteed to be a minimal interval. */
        if ( curr[ first ].right == maxRight || ( endOfProcess = ( curr[ first ] = intervalIterator[ first ].nextInterval() ) == null ) ) break;
        maxRight = Math.max( maxRight, curr[ first ].right );
        queue.changed();
      } while ( maxRight == nextRight );
     
      return Interval.valueOf( lastLeft = nextLeft, nextRight );
    }
   
    public int extent() {
      int s = 0;
      for ( int i = n; i-- != 0; ) s += intervalIterator[ i ].extent();
      return s;
    }
  }


  /** An interval iterator returning the AND (in the Clarke&minus;Cormack&minus;Burkowski lattice) of the component interval iterators. */
 
  private class AndIndexIntervalIterator extends AbstractCompositeIndexIntervalIterator implements IntervalIterator {
    /** The index of this iterator. */
    final private Index index;
    /** A heap-based indirect priority queue used to keep track of the currently scanned positions. */
    final private IntHeapSemiIndirectPriorityQueue queue;
    /** Whether the scan is over. */
    private boolean endOfProcess;
    /** The left extreme of the last returned interval, or {@link Integer#MIN_VALUE} after a {@link #reset()}. */
    private int lastLeft;
    /** The maximum right extreme currently in the queue. */
    private int maxRight;

    /** Creates a new AND interval iterator.
     *
     * @param index the index of the iterator.
     */

    public AndIndexIntervalIterator( final Index index ) {
      super( n );
      // We just set up some internal data, but we perform no initialisation.
      this.index = index;
      queue = new IntHeapSemiIndirectPriorityQueue( curr );
    }


    public void reset() throws IOException {
      queue.clear();
      maxRight = Integer.MIN_VALUE;
      lastLeft = Integer.MIN_VALUE;
      next = null;
      endOfProcess = false;
     
      for ( int i = 0; i < n; i++ ) {
        // The case != index is identical to the TRUE case in AndIntervalIterator.
        if ( indexIterator[ i ].index() == index ) {
          position[ i ] = indexIterator[ i ].positionArray();
          count[ i ] = indexIterator[ i ].count();
          curr[ i ] = position[ i ][ currPos[ i ] = 0 ];
          queue.enqueue( i );
          maxRight = Math.max( maxRight, curr[ i ] );
        }
      }

      /* At this point the queue could not be full because of TRUE iterators, but it's OK,
       * because TRUE is the neutral element of conjunction. However, we cannot handle here
       * the case in which all conjuncts are TRUE, as this case must be handled at a higher level. */
      if ( queue.isEmpty() ) throw new IllegalArgumentException();
    }
   
    public void intervalTerms( final IntSet terms ) {
      for( int i = n; i-- != 0; ) terms.add( indexIterator[ i ].termNumber() );
    }


    public Interval nextInterval() {
      if ( next != null ) {
        final Interval result = next;
        next = null;
        return result;
      }

      if ( endOfProcess ) return null;

      int first;
      while ( curr[ first = queue.first() ] == lastLeft ) {
        if ( ++currPos[ first ] == count[ first ] ) {
          endOfProcess = true;
          return null;
        }
        curr[ first ] = position[ first ][ currPos[ first ] ];
        maxRight = Math.max( maxRight, curr[ first ] );
        queue.changed();
      }
   
      int nextLeft, nextRight;
     
      do {
        nextLeft = curr[ first = queue.first() ];
        nextRight = maxRight;
        if ( DEBUG ) System.err.println( this + " is saving interval " + Interval.valueOf( nextLeft, nextRight ) );

        /* We check whether all iterators are on the same position, and
         * whether the top interval iterator is exhausted. In both cases, the
         * current span is guaranteed to be a minimal interval. */
        if ( curr[ first ] == maxRight || ( endOfProcess = ++currPos[ first ] == count[ first ] ) ) break;

        curr[ first ] = position[ first ][ currPos[ first ] ];
        if ( maxRight < curr[ first ] ) maxRight = curr[ first ];
        queue.changed();
      } while ( maxRight == nextRight );
     
      return Interval.valueOf( lastLeft = nextLeft, nextRight );
    }
   
    public int extent() {
      return n;
    }
  }
}
TOP

Related Classes of it.unimi.dsi.mg4j.search.AndDocumentIterator$AndIndexIntervalIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.