Package it.unimi.dsi.mg4j.search

Source Code of it.unimi.dsi.mg4j.search.OrDocumentIterator

package it.unimi.dsi.mg4j.search;

/*    
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2003-2010 Paolo Boldi and Sebastiano Vigna
*
*  This library is free software; you can redistribute it and/or modify it
*  under the terms of the GNU Lesser General Public License as published by the Free
*  Software Foundation; either version 3 of the License, or (at your option)
*  any later version.
*
*  This library is distributed in the hope that it will be useful, but
*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
*  for more details.
*
*  You should have received a copy of the GNU Lesser General Public License
*  along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/

import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.objects.ObjectHeapSemiIndirectPriorityQueue;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.index.IndexIterator;
import it.unimi.dsi.util.Interval;
import it.unimi.dsi.util.Intervals;

import java.io.IOException;

/** An iterator on documents that returns the OR of a number of document iterators.
*
* <P>This class adds to {@link it.unimi.dsi.mg4j.search.AbstractUnionDocumentIterator}
* an interval iterator generating the OR of the intervals returned for each of the documents involved.
*/

public class OrDocumentIterator extends AbstractUnionDocumentIterator implements DocumentIterator {
  @SuppressWarnings("unused")
  private final static boolean DEBUG = false;

  /** Returns a document iterator that computes the OR of the given array of iterators.
   *
   * <P>Note that the special case of the empty and of the singleton arrays
   * are handled efficiently.
   *
    * @param index the default index; relevant only if <code>it</code> has zero length.
   * @param documentIterator the iterators to be joined.
   * @return a document iterator that computes the OR of <code>it</code>.
   * @throws IOException
   */
  public static DocumentIterator getInstance( final Index index, DocumentIterator... documentIterator  ) throws IOException {
    if ( documentIterator.length == 0 ) return FalseDocumentIterator.getInstance( index );
    if ( documentIterator.length == 1 ) return documentIterator[ 0 ];
    return new OrDocumentIterator( documentIterator );
  }

  /** Returns a document iterator that computes the OR of the given nonzero-length array of iterators.
   *
   * <P>Note that the special case of the singleton array is handled efficiently.
   *
   * @param documentIterator the iterators to be joined.
   * @return a document iterator that computes the OR of <code>it</code>.
   * @throws IOException
   */
  public static DocumentIterator getInstance( DocumentIterator... documentIterator  ) throws IOException {
    if ( documentIterator.length == 0 ) throw new IllegalArgumentException();
    return getInstance( null, documentIterator );
  }

  /** Creates a new document iterator that computes the OR of the given array of iterators.
   *  @param documentIterator the iterators to be joined.
   * @throws IOException
   */
  protected OrDocumentIterator( final DocumentIterator... documentIterator ) throws IOException {
    super( documentIterator );
  }

  protected IntervalIterator getComposedIntervalIterator( final Index index ) {
    return indexIterator == null ? new OrIntervalIterator( index : new OrIndexIntervalIterator( index );
  }

  /** An interval iterator of nondecreasing disjoint intervals
   *  obtained ORing the output of a set of iterators with
   *  the same property.
   */

  private class OrIntervalIterator extends AbstractCompositeIntervalIterator {
    /** The index of this iterator. */
    final Index index;
    /** A heap-based indirect priority queue used to keep track of the currently scanned intervals. */
    private ObjectHeapSemiIndirectPriorityQueue<Interval> intervalQueue;
    /** The left extreme of the last returned interval, or {@link Integer#MIN_VALUE} after a {@link #reset()}. */
    private int lastLeft;
    /** An array to hold the front of the interval queue. */
    private final int[] intervalFront;

    /** Creates a new OR interval iterator.
     *
     * @param index the index of the iterator.
     */
    public OrIntervalIterator( final Index index ) {
      super( n );
      // We just set up some internal data, but we perform no initialisation.
      this.index = index;
      intervalQueue = new ObjectHeapSemiIndirectPriorityQueue<Interval>( curr, Intervals.ENDS_BEFORE_OR_IS_SUFFIX );
      intervalFront = new int[ n ];
    }

    public void reset() throws IOException {
      lastLeft = Integer.MIN_VALUE;
      next = null;
      intervalQueue.clear();

      for ( int i = computeFront(), k; i-- != 0; ) {
        k = front[ i ];
        intervalIterator[ k ] = documentIterator[ k ].intervalIterator( index );
        if ( intervalIterator[ k ] != IntervalIterators.TRUE && ( curr[ k ] = intervalIterator[ k ].nextInterval() ) != null ) intervalQueue.enqueue( k );
      }
    }

    public void intervalTerms( final IntSet terms ) {
      final int frontSize = intervalQueue.front( intervalFront );
      final int[] intervalFront = this.intervalFront;
      for( int i = frontSize; i-- != 0; ) intervalIterator[ intervalFront[ i ] ].intervalTerms( terms );
    }
   
    public Interval nextInterval () throws IOException {
      if ( next != null ) {
        final Interval result = next;
        next = null;
        return result;
      }

      if ( intervalQueue.isEmpty() ) return null;

      int first;

      while ( curr[ first = intervalQueue.first() ].left <= lastLeft ) {
        if ( ( curr[ first ] = intervalIterator[ first ].nextInterval() ) != null ) intervalQueue.changed();
        else {
          intervalQueue.dequeue();
          if ( intervalQueue.isEmpty() ) return null;
        }
      }

      lastLeft = curr[ first ].left;
      return curr[ first ];
    }

    public int extent() {
      int e, s;

      s = Integer.MAX_VALUE;
      for ( int i = computeFront(), k; i-- != 0; ) {
        k = front[ i ];
        e = curr[ k ] != null ? intervalIterator[ k ].extent() : Integer.MAX_VALUE;
        if ( e < s ) s = e;
      }
      return s;
    }
  }
 
  /** An optimised interval iterator with the same semantics as that implemented
   *  by {@link OrDocumentIterator}, but using just {@link IndexIterator#positionArray()}.
   */
  protected class OrIndexIntervalIterator extends AbstractCompositeIndexIntervalIterator implements IntervalIterator {
    @SuppressWarnings({ "unused", "hiding" })
    private final static boolean DEBUG = false;
    private final static boolean ASSERTS = false;

    /** The index of this iterator. */
    final Index index;
    /** A heap-based semi-indirect priority queue used to keep track of the currently scanned positions. */
    private final IntHeapSemiIndirectPriorityQueue positionQueue;
    /** An array to hold the front of the position queue. */
    private final int[] positionFront;
   
    public OrIndexIntervalIterator( final Index index ) {
      super( n );
      this.index = index;
      positionQueue = new IntHeapSemiIndirectPriorityQueue( curr );
      positionFront = new int[ n ];
    }

    public void reset() throws IOException {
      positionQueue.clear();

      for ( int i = computeFront(), k; i-- != 0; ) {
        k = front[ i ];
        if ( indexIterator[ k ].index() == index ) {
          position[ k ] = indexIterator[ k ].positionArray();
          count[ k ] = indexIterator[ k ].count();
          curr[ k ] = position[ k ][ currPos[ k ] = 0 ];
          positionQueue.enqueue( k );
        }
      }
     
      if ( ASSERTS ) assert ! positionQueue.isEmpty();
      next = Interval.valueOf( curr[ positionQueue.first() ] );
    }

    public void intervalTerms( final IntSet terms ) {
      final int frontSize = positionQueue.front( positionFront );
      final int[] positionFront = this.positionFront;
      for( int i = frontSize; i-- != 0; ) terms.add( indexIterator[ positionFront[ i ] ].termNumber() );
    }
   
    public Interval nextInterval() {
      if ( next != null ) {
        final Interval result = next;
        next = null;
        return result;
      }
     
      if ( positionQueue.isEmpty() ) return null;
     
      int first = positionQueue.first();
      final int previous = curr[ first ];
     
      do
        if ( ++currPos[ first ] == count[ first ] ) {
          positionQueue.dequeue();
          if ( positionQueue.isEmpty() ) return null;
        }
        else {
          curr[ first ] = position[ first ][ currPos[ first ] ];
          positionQueue.changed();
        }
      while ( curr[ first = positionQueue.first() ] == previous );

      return Interval.valueOf( curr[ first ] );
    }

    public int extent() {
      return 1;
    }
   }
}
TOP

Related Classes of it.unimi.dsi.mg4j.search.OrDocumentIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.