Package it.unimi.dsi.mg4j.index

Source Code of it.unimi.dsi.mg4j.index.MultiTermIndexIteratorTest

package it.unimi.dsi.mg4j.index;

import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.mg4j.index.BitStreamIndex;
import it.unimi.dsi.mg4j.index.DiskBasedIndex;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.index.IndexIterator;
import it.unimi.dsi.mg4j.index.MultiTermIndexIterator;
import it.unimi.dsi.mg4j.query.nodes.Query;
import it.unimi.dsi.mg4j.query.nodes.QueryBuilderVisitorException;
import it.unimi.dsi.mg4j.query.parser.QueryParserException;
import it.unimi.dsi.mg4j.query.parser.SimpleParser;
import it.unimi.dsi.mg4j.search.DocumentIterator;
import it.unimi.dsi.mg4j.search.DocumentIteratorBuilderVisitor;
import it.unimi.dsi.mg4j.search.OrDocumentIterator;
import it.unimi.dsi.mg4j.search.visitor.AbstractDocumentIteratorVisitor;
import it.unimi.dsi.mg4j.tool.IndexBuilder;
import it.unimi.dsi.util.Interval;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.URISyntaxException;

import junit.framework.TestCase;

import org.apache.commons.configuration.ConfigurationException;

import it.unimi.dsi.mg4j.document.StringArrayDocumentCollection;
import it.unimi.dsi.mg4j.search.IntArrayIndexIterator;

public class MultiTermIndexIteratorTest extends TestCase {
  private BitStreamIndex index;
  private SimpleParser simpleParser;
  public void setUp() throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {

    String basename = File.createTempFile( getClass().getSimpleName(), "test" ).getCanonicalPath();
    new IndexBuilder( basename, new StringArrayDocumentCollection( "a", "b", "c" ) ).run();
    index = DiskBasedIndex.getInstance( basename + "-text", true, true );
    simpleParser = new SimpleParser( index.termProcessor );
  }


  public void testSkipBug() throws QueryParserException, QueryBuilderVisitorException, IOException {
    Query query = simpleParser.parse( "a + b + c" );
    DocumentIteratorBuilderVisitor documentIteratorBuilderVisitor = new DocumentIteratorBuilderVisitor( null, index, Integer.MAX_VALUE );
    DocumentIterator documentIterator = query.accept( documentIteratorBuilderVisitor );
    assertEquals( 2, documentIterator.skipTo( 2 ) );
    documentIterator.dispose();
  }
 

  public void test() throws IOException {
    IndexIterator i0 = new IntArrayIndexIterator( new int[] { 0, 1, 2 },
        new int[][] {
        { 0, 3 },
        { 0 },
        { 0 },
        } );
    IndexIterator i1 = new IntArrayIndexIterator( new int[] { 0, 2 },
        new int[][] {
        { 1 },
        { 1 },
        } );
    IndexIterator i2 = new IntArrayIndexIterator( new int[] { 0, 1, 3 },
        new int[][] {
        { 2 },
        { 2 },
        { 0 },
        } );
    MultiTermIndexIterator multiTermIndexIterator = (MultiTermIndexIterator)MultiTermIndexIterator.getInstance( i0, i1, i2 );
    assertEquals( 3, multiTermIndexIterator.frequency() );
   
    assertTrue( multiTermIndexIterator.hasNext() );
    assertTrue( multiTermIndexIterator.hasNext() ); // To increase coverage
   
    assertEquals( 0, multiTermIndexIterator.nextDocument() );
    assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );
    assertTrue( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage
    assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );
    assertEquals( Interval.valueOf( 1 ), multiTermIndexIterator.intervalIterator().nextInterval() );
    assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );

    assertEquals( 4, multiTermIndexIterator.count() );
    int[] position = multiTermIndexIterator.positionArray();
    assertEquals( 0, position[ 0 ] );
    assertEquals( 1, position[ 1 ] );
    assertEquals( 2, position[ 2 ] );
    assertEquals( 3, position[ 3 ] );

    assertEquals( Interval.valueOf( 2 ), multiTermIndexIterator.intervalIterator().nextInterval() );
   
    position = new int[ 4 ];
    multiTermIndexIterator.positions( position );
    assertEquals( 0, position[ 0 ] );
    assertEquals( 1, position[ 1 ] );
    assertEquals( 2, position[ 2 ] );
    assertEquals( 3, position[ 3 ] );

   
    assertEquals( Interval.valueOf( 3 ), multiTermIndexIterator.intervalIterator().nextInterval() );

    IntIterator positions = multiTermIndexIterator.positions();
    assertEquals( 0, positions.nextInt() );
    assertEquals( 1, positions.nextInt() );
    assertEquals( 2, positions.nextInt() );
    assertEquals( 3, positions.nextInt() );
    assertFalse( positions.hasNext() );

   
    assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );
    assertFalse( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage

    assertEquals( 1, multiTermIndexIterator.nextDocument() );
    assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );
    assertTrue( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage
    assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );
    assertEquals( Interval.valueOf( 2 ), multiTermIndexIterator.intervalIterator().nextInterval() );

    assertEquals( 2, multiTermIndexIterator.count() );
    position = multiTermIndexIterator.positionArray();
    assertEquals( 0, position[ 0 ] );
    assertEquals( 2, position[ 1 ] );
    positions = multiTermIndexIterator.positions();
    assertEquals( 0, positions.nextInt() );
    assertEquals( 2, positions.nextInt() );
    assertFalse( positions.hasNext() );
   
    assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );

    assertEquals( 2, multiTermIndexIterator.nextDocument() );
    assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );
    assertTrue( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage
    assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );
    assertEquals( Interval.valueOf( 1 ), multiTermIndexIterator.intervalIterator().nextInterval() );

    assertEquals( 2, multiTermIndexIterator.count() );
    position = multiTermIndexIterator.positionArray();
    assertEquals( 0, position[ 0 ] );
    assertEquals( 1, position[ 1 ] );
    positions = multiTermIndexIterator.positions();
    assertEquals( 0, positions.nextInt() );
    assertEquals( 1, positions.nextInt() );
    assertFalse( positions.hasNext() );
   
    assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );
   
    // Here we get the iterator of the underlying IndexIterator
    assertEquals( 3, multiTermIndexIterator.nextDocument() );
    assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );
    assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );

    assertEquals( 1, multiTermIndexIterator.count() );
    position = multiTermIndexIterator.positionArray();
    assertEquals( 0, position[ 0 ] );
    positions = multiTermIndexIterator.positions();
    assertEquals( 0, positions.nextInt() );
    assertFalse( positions.hasNext() );
   
    assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );
   
    // The end
    assertFalse( multiTermIndexIterator.hasNext() );
    assertFalse( multiTermIndexIterator.hasNext() ); // To increase coverage
  }
 
  // Contributed by Fabien Campagne
  public void testMG4JMultiTermPositionIssue() throws IllegalAccessException, NoSuchMethodException, ConfigurationException, IOException, InvocationTargetException, InstantiationException, ClassNotFoundException, URISyntaxException {
    String basename = File.createTempFile( getClass().getSimpleName(), "test" ).getCanonicalPath();
    new IndexBuilder( basename, new StringArrayDocumentCollection(
                "A B C D E F F G G",
                "G A T H S K L J W L",
                "E S K D L J F K L S J D L S J D",
                "E B"
    ) ).run();
    Index index = DiskBasedIndex.getInstance( basename + "-text", true, true );

        /// String query = "A| B+C+G|W|S+J";
        DocumentIterator iterator = OrDocumentIterator.getInstance(
                index.documents("A"),
                MultiTermIndexIterator.getInstance(
                        index.documents("B"),
                        index.documents("C"),
                        index.documents("G")
                ),
                index.documents("W"),
                MultiTermIndexIterator.getInstance(
                        index.documents("S"),
                        index.documents("J")
                ));


        final int[] currDoc = new int[ 1 ];
        // A visitor invoking positionArray() on IndexIterators positioned on the current document.
        AbstractDocumentIteratorVisitor visitor = new AbstractDocumentIteratorVisitor() {
          public Boolean visit(IndexIterator indexIterator) throws IOException {
            if (indexIterator.count() > 0 && indexIterator.document() == currDoc[ 0 ] ) indexIterator.positionArray();
              return Boolean.TRUE;
          }
        };


        for (int document = 0; document < index.numberOfDocuments; document++) {
            currDoc[ 0 ] = iterator.skipTo(document);

            if (document == currDoc[ 0 ]) {
               iterator.accept(visitor); // see method visit below.
            }
        }

       
        while( iterator.hasNext() ) {
          currDoc[ 0 ] = iterator.nextDocument();
          iterator.acceptvisitor );
        }
    }

}
TOP

Related Classes of it.unimi.dsi.mg4j.index.MultiTermIndexIteratorTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.