Package proj.zoie.test

Source Code of proj.zoie.test.ZoieTest

package proj.zoie.test;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;

import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;

import junit.framework.TestCase;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.junit.Test;

import proj.zoie.api.DataConsumer.DataEvent;
import proj.zoie.api.DataDoc;
import proj.zoie.api.DefaultDirectoryManager;
import proj.zoie.api.DirectoryManager;
import proj.zoie.api.UIDDocIdSet;
import proj.zoie.api.ZoieException;
import proj.zoie.api.ZoieMultiReader;
import proj.zoie.api.ZoieSegmentReader;
import proj.zoie.api.impl.DocIDMapperImpl;
import proj.zoie.api.indexing.IndexingEventListener;
import proj.zoie.api.indexing.ZoieIndexable;
import proj.zoie.api.indexing.ZoieIndexableInterpreter;
import proj.zoie.impl.indexing.AsyncDataConsumer;
import proj.zoie.impl.indexing.MemoryStreamDataProvider;
import proj.zoie.impl.indexing.ZoieConfig;
import proj.zoie.impl.indexing.ZoieSystem;
import proj.zoie.test.data.DataForTests;
import proj.zoie.test.mock.MockDataLoader;

public class ZoieTest extends ZoieTestCaseBase {
  static Logger log = Logger.getLogger(ZoieTest.class);

  public ZoieTest() {
  }

  private static int countHits(ZoieSystem<IndexReader, String> idxSystem, Query q)
      throws IOException {
    IndexSearcher searcher = null;
    MultiReader reader = null;
    List<ZoieMultiReader<IndexReader>> readers = null;
    try {
      readers = idxSystem.getIndexReaders();
      reader = new MultiReader(readers.toArray(new IndexReader[readers.size()]), false);
      searcher = new IndexSearcher(reader);
      TopDocs hits = searcher.search(q, 10);
      return hits.totalHits;
    } finally {
      try {
        if (reader != null) {
          reader.close();
          reader = null;
        }
      } finally {
        idxSystem.returnIndexReaders(readers);
      }
    }
  }

  @Test
  public void testIndexWithAnalyzer() throws ZoieException, IOException {
    File idxDir = getIdxDir();
    ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true, 20,
      new WhitespaceAnalyzer(Version.LUCENE_43), null, ZoieConfig.DEFAULT_VERSION_COMPARATOR, false);
    idxSystem.start();

    MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>(
        ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE);
    memoryProvider.setDataConsumer(idxSystem);
    memoryProvider.start();

    List<DataEvent<String>> list = new ArrayList<DataEvent<String>>(2);
    list.add(new DataEvent<String>("hao,yan 0", "0"));
    list.add(new DataEvent<String>("hao,yan 1", "1"));
    memoryProvider.addEvents(list);
    memoryProvider.flush();

    idxSystem.syncWithVersion(10000, "1");
    List<ZoieMultiReader<IndexReader>> readers = null;
    IndexSearcher searcher = null;
    MultiReader reader = null;

    try {
      readers = idxSystem.getIndexReaders();
      reader = new MultiReader(readers.toArray(new IndexReader[readers.size()]), false);
      searcher = new IndexSearcher(reader);

      TopDocs hits = searcher.search(new TermQuery(new Term("contents", "hao,yan")), 10);
      assertEquals(1, hits.totalHits);
      assertEquals(String.valueOf((Integer.MAX_VALUE * 2L + 1L)),
        searcher.doc(hits.scoreDocs[0].doc).get("id"));

      hits = searcher.search(new TermQuery(new Term("contents", "hao")), 10);
      assertEquals(1, hits.totalHits);
      assertEquals(String.valueOf((Integer.MAX_VALUE * 2L)), searcher.doc(hits.scoreDocs[0].doc)
          .get("id"));
      idxSystem.returnIndexReaders(readers);
    } finally {
      try {
        if (reader != null) {
          reader.close();
          reader = null;
        }
      } finally {
        memoryProvider.stop();
        idxSystem.shutdown();
        deleteDirectory(idxDir);
      }
    }
  }

  @Test
  public void testRealtime2() throws ZoieException {
    File idxDir = getIdxDir();
    ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true,
      ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    idxSystem.start();

    MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>(
        ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE);
    memoryProvider.setDataConsumer(idxSystem);
    memoryProvider.start();

    try {
      int count = DataForTests.testdata.length;
      List<DataEvent<String>> list = new ArrayList<DataEvent<String>>(count);
      for (int i = 0; i < count; ++i) {
        list.add(new DataEvent<String>(DataForTests.testdata[i], "" + i));
      }
      memoryProvider.addEvents(list);
      memoryProvider.flush();

      idxSystem.flushEvents(10000);

      List<ZoieMultiReader<IndexReader>> readers = idxSystem.getIndexReaders();

      int numDocs = 0;
      for (ZoieMultiReader<IndexReader> r : readers) {
        numDocs += r.numDocs();
      }
      idxSystem.returnIndexReaders(readers);

      assertEquals(count, numDocs);
    } catch (IOException ioe) {
      throw new ZoieException(ioe.getMessage());
    } finally {
      memoryProvider.stop();
      idxSystem.shutdown();
      deleteDirectory(idxDir);
    }
  }

  private static class EvenUidPurgeFilter extends Filter {
    @Override
    public DocIdSet getDocIdSet(final AtomicReaderContext ctx, Bits bits) throws IOException {

      return new DocIdSet() {
        IndexReader reader = ctx.reader();

        @Override
        public DocIdSetIterator iterator() throws IOException {
          return new DocIdSetIterator() {

            int doc = -1;
            int maxdoc = reader.maxDoc();

            @Override
            public int advance(int target) throws IOException {
              doc = target - 1;
              return nextDoc();
            }

            @Override
            public int docID() {
              return doc;
            }

            @Override
            public int nextDoc() throws IOException {
              while (++doc < maxdoc) {
                long uid = Long.parseLong(reader.document(doc).get("id"));
                if (uid % 2 == 0) {
                  return doc;
                }
              }
              return DocIdSetIterator.NO_MORE_DOCS;
            }

            @Override
            public long cost() {
              // TODO Auto-generated method stub
              return 0;
            }

          };
        }

      };
    }
  }

  @Test
  public void testIndexEventListener() throws Exception {
    File idxDir = getIdxDir();
    final int[] flushNum = { 0 };
    final String[] flushVersion = { null };

    ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true,
      ZoieConfig.DEFAULT_VERSION_COMPARATOR, true);

    idxSystem.start();

    idxSystem.addIndexingEventListener(new IndexingEventListener() {

      @Override
      public void handleUpdatedDiskVersion(String version) {
        flushVersion[0] = version;
      }

      @Override
      public void handleIndexingEvent(IndexingEvent evt) {
        flushNum[0]++;
      }
    });

    MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>(
        ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE);
    memoryProvider.setDataConsumer(idxSystem);
    memoryProvider.start();

    try {
      int count = DataForTests.testdata.length;
      List<DataEvent<String>> list = new ArrayList<DataEvent<String>>(count);
      for (int i = 0; i < count; ++i) {
        list.add(new DataEvent<String>(DataForTests.testdata[i], "" + i));
      }
      memoryProvider.addEvents(list);
      memoryProvider.flush();

      idxSystem.flushEvents(10000);
      String diskVersion = null;
      int wait = 0;
      while (!"9".equals(diskVersion)) {
        diskVersion = idxSystem.getCurrentDiskVersion();
        Thread.sleep(500);
        if (++wait >= 10) {
          break;
        }
      }
      assertTrue(wait < 10);
    } finally {
      memoryProvider.stop();
      idxSystem.shutdown();
      deleteDirectory(idxDir);
    }
    assertTrue(flushNum[0] > 0);
    assertEquals("9", flushVersion[0]);
  }

  @SuppressWarnings({ "unchecked", "rawtypes" })
  @Test
  public void testSegmentTermDocs() throws Exception {
    class DefaultInterpreter implements ZoieIndexableInterpreter<DataDoc> {

      @Override
      public ZoieIndexable convertAndInterpret(DataDoc src) {
        return src;
      }

    }

    File idxDir = getIdxDir();

    ZoieConfig zConfig = new ZoieConfig();

    ZoieSystem<?, DataDoc> zoie = ZoieSystem.buildDefaultInstance(idxDir, new DefaultInterpreter(),
      zConfig);
    zoie.start();

    Document d1 = new Document();
    StringField f1 = new StringField("num", "abcdef", Store.YES);
    d1.add(f1);

    Document d2 = new Document();
    StringField f2 = new StringField("num", "abcd", Store.YES);
    d2.add(f2);

    Document d3 = new Document();
    StringField f3 = new StringField("num", "abcde", Store.YES);
    d3.add(f3);

    DataEvent<DataDoc> de1 = new DataEvent<DataDoc>(new DataDoc(1, d1), "1");
    DataEvent<DataDoc> de2 = new DataEvent<DataDoc>(new DataDoc(2, d2), "1");
    DataEvent<DataDoc> de3 = new DataEvent<DataDoc>(new DataDoc(3, d3), "1");

    try {
      zoie.consume(Arrays.asList(de1, de2, de3));
      zoie.flushEvents(10000);

      List<?> readerList = zoie.getIndexReaders();
      // combine the readers
      MultiReader reader = new MultiReader(readerList.toArray(new IndexReader[readerList.size()]),
          false);
      // do search
      IndexSearcher searcher = new IndexSearcher(reader);
      QueryParser parser = new QueryParser(Version.LUCENE_43, "num", new StandardAnalyzer(
          Version.LUCENE_43));
      Query q = parser.parse("num:abc*");
      TopDocs ret = searcher.search(q, 100);
      TestCase.assertEquals(3, ret.totalHits);

      zoie.returnIndexReaders((List) readerList);

      de1 = new DataEvent<DataDoc>(new DataDoc(1), "2");
      de2 = new DataEvent<DataDoc>(new DataDoc(2), "2");
      de3 = new DataEvent<DataDoc>(new DataDoc(3), "2");
      zoie.consume(Arrays.asList(de1, de2, de3));

      zoie.flushEventsToMemoryIndex(10000);

      readerList = zoie.getIndexReaders();
      // combine the readers
      reader = new MultiReader(readerList.toArray(new IndexReader[readerList.size()]), false);
      // do search
      searcher = new IndexSearcher(reader);
      ret = searcher.search(q, 100);

      TestCase.assertEquals(0, ret.totalHits);
      zoie.returnIndexReaders((List) readerList);
    } catch (IOException ioe) {
      throw new ZoieException(ioe.getMessage());
    } finally {
      zoie.shutdown();
      deleteDirectory(idxDir);
    }
  }

  @Test
  public void testPurgeFilter() throws Exception {
    File idxDir = getIdxDir();
    ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true,
      ZoieConfig.DEFAULT_VERSION_COMPARATOR, true);

    idxSystem.setPurgeFilter(new EvenUidPurgeFilter());
    idxSystem.start();

    MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>(
        ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE);
    memoryProvider.setDataConsumer(idxSystem);
    memoryProvider.start();

    try {
      int count = DataForTests.testdata.length;
      List<DataEvent<String>> list = new ArrayList<DataEvent<String>>(count);
      for (int i = 0; i < count; ++i) {
        list.add(new DataEvent<String>(DataForTests.testdata[i], "" + i));
      }
      memoryProvider.addEvents(list);
      memoryProvider.flush();

      idxSystem.flushEvents(10000);

      List<ZoieMultiReader<IndexReader>> readers = idxSystem.getIndexReaders();

      MultiReader multiReader = new MultiReader(readers.toArray(new IndexReader[0]), false);

      IndexSearcher searcher = new IndexSearcher(multiReader);

      int numDocs = searcher.search(new MatchAllDocsQuery(), 10).totalHits;

      log.info("numdocs: " + numDocs);
      TestCase.assertEquals(10, numDocs);

      idxSystem.returnIndexReaders(readers);

      idxSystem.getAdminMBean().flushToDiskIndex();

      idxSystem.refreshDiskReader();
      readers = idxSystem.getIndexReaders();

      multiReader = new MultiReader(readers.toArray(new IndexReader[0]), false);

      searcher = new IndexSearcher(multiReader);

      numDocs = searcher.search(new MatchAllDocsQuery(), 10).totalHits;

      numDocs = multiReader.numDocs();

      log.info("new numdocs: " + numDocs);
      TestCase.assertTrue("numdDocs should be 5, but it is " + numDocs, numDocs == 5);

      idxSystem.returnIndexReaders(readers);

    } catch (IOException ioe) {
      throw new ZoieException(ioe.getMessage());
    } finally {
      memoryProvider.stop();
      idxSystem.shutdown();
      deleteDirectory(idxDir);
    }
  }

  @Test
  public void testStore() throws ZoieException {
    File idxDir = getIdxDir();
    ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true,
      ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    idxSystem.start();

    MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>(
        ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE);
    memoryProvider.setDataConsumer(idxSystem);
    memoryProvider.start();

    try {
      int count = DataForTests.testdata.length;
      List<DataEvent<String>> list = new ArrayList<DataEvent<String>>(count);
      for (int i = 0; i < count; ++i) {
        list.add(new DataEvent<String>(DataForTests.testdata[i], "" + i));
      }
      memoryProvider.addEvents(list);
      memoryProvider.flush();

      idxSystem.flushEvents(5000);

      List<ZoieMultiReader<IndexReader>> readers = idxSystem.getIndexReaders();

      BytesRef data = null;
      for (ZoieMultiReader<IndexReader> r : readers) {
        data = r.getStoredValue(((Integer.MAX_VALUE) * 2L));
        if (data != null) break;
      }

      TestCase.assertNotNull(data);
      String val = data.utf8ToString();
      String[] parts = val.split(" ");
      long id = Long.parseLong(parts[parts.length - 1]);
      TestCase.assertEquals(0L, id);

      data = null;
      for (ZoieMultiReader<IndexReader> r : readers) {
        data = r.getStoredValue(((Integer.MAX_VALUE) * 2L) + 1L);
        if (data != null) break;
      }
      TestCase.assertNull(data);
      idxSystem.returnIndexReaders(readers);

    } catch (IOException ioe) {
      throw new ZoieException(ioe.getMessage());
    } finally {
      memoryProvider.stop();
      idxSystem.shutdown();
      deleteDirectory(idxDir);
    }
  }

  // hao: test for new zoieVersion
  @Test
  public void testRealtime() throws ZoieException {
    File idxDir = getIdxDir();
    ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true,
      ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    idxSystem.start();
    String query = "zoie";
    QueryParser parser = new QueryParser(Version.LUCENE_43, "contents", idxSystem.getAnalyzer());
    Query q = null;
    try {
      q = parser.parse(query);
    } catch (Exception e) {
      throw new ZoieException(e.getMessage(), e);
    }
    MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>(
        ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE);
    memoryProvider.setDataConsumer(idxSystem);
    memoryProvider.start();
    try {
      int count = DataForTests.testdata.length;
      List<DataEvent<String>> list = new ArrayList<DataEvent<String>>(count);

      for (int i = 0; i < count; ++i) {
        list.add(new DataEvent<String>(DataForTests.testdata[i], "" + i));
      }
      memoryProvider.addEvents(list);

      idxSystem.syncWithVersion(10000, "" + (count - 1));

      int repeat = 20;
      int idx = 0;
      int[] results = new int[repeat];
      int[] expected = new int[repeat];
      Arrays.fill(expected, count);

      // should be consumed by the idxing system
      IndexSearcher searcher = null;
      MultiReader reader = null;
      List<ZoieMultiReader<IndexReader>> readers = null;
      for (int i = 0; i < repeat; ++i) {
        try {
          readers = idxSystem.getIndexReaders();
          reader = new MultiReader(readers.toArray(new IndexReader[readers.size()]), false);
          searcher = new IndexSearcher(reader);

          TopDocs hits = searcher.search(q, 10);
          results[idx++] = hits.totalHits;

        } finally {
          try {
            if (searcher != null) {
              searcher = null;
              reader.close();
              reader = null;
            }
          } finally {
            idxSystem.returnIndexReaders(readers);
          }
        }
        try {
          Thread.sleep(30);
        } catch (InterruptedException e) {
          e.printStackTrace();
        }
      }

      assertEquals("maybe race condition in disk flush", Arrays.toString(expected),
        Arrays.toString(results));
    } catch (IOException ioe) {
      throw new ZoieException(ioe.getMessage());
    } finally {
      memoryProvider.stop();
      idxSystem.shutdown();
      deleteDirectory(idxDir);
    }
  }

  @Test
  public void testStreamDataProvider() throws ZoieException {
    MockDataLoader<Integer> consumer = new MockDataLoader<Integer>();
    MemoryStreamDataProvider<Integer> memoryProvider = new MemoryStreamDataProvider<Integer>(
        ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE);
    memoryProvider.setDataConsumer(consumer);
    memoryProvider.start();
    try {
      int count = 10;

      List<DataEvent<Integer>> list = new ArrayList<DataEvent<Integer>>(count);
      for (int i = 0; i < count; ++i) {
        list.add(new DataEvent<Integer>(i, "" + i));
      }
      memoryProvider.addEvents(list);

      memoryProvider.syncWithVersion(10000, "" + (count - 1));
      int num = consumer.getCount();
      assertEquals(num, count);
    } finally {
      memoryProvider.stop();
    }
  }

  @Test
  public void testStreamDataProviderFlush() throws ZoieException {
    MockDataLoader<Integer> consumer = new MockDataLoader<Integer>();
    MemoryStreamDataProvider<Integer> memoryProvider = new MemoryStreamDataProvider<Integer>(
        ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    memoryProvider.setBatchSize(100);
    memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE);
    memoryProvider.setDataConsumer(consumer);
    memoryProvider.start();
    try {
      int count = 10;

      List<DataEvent<Integer>> list = new ArrayList<DataEvent<Integer>>(count);
      for (int i = 0; i < count; ++i) {
        list.add(new DataEvent<Integer>(i, "" + i));
      }
      memoryProvider.addEvents(list);

      memoryProvider.flush();

      int num = consumer.getCount();
      assertEquals(num, count);
    } finally {
      memoryProvider.stop();
    }
  }

  @Test
  public void testAsyncDataConsumer() throws ZoieException {
    final long[] delays = { 0L, 10L, 100L, 1000L };
    // final long[] delays = {100L, 1000L };
    final int[] batchSizes = { 1, 10, 100, 1000, 1000 };
    final int count = 1000;
    // final int count=TestData.testdata.length;
    final long timeout = 10000L;

    for (long delay : delays) {
      for (int batchSize : batchSizes) {

        if (delay * (count / batchSize + 1) > timeout) {
          continue; // skip this combination. it will take too long.
        }

        MockDataLoader<Integer> mockLoader = new MockDataLoader<Integer>();
        mockLoader.setDelay(delay);

        AsyncDataConsumer<Integer> asyncConsumer = new AsyncDataConsumer<Integer>(
            ZoieConfig.DEFAULT_VERSION_COMPARATOR);
        asyncConsumer.setDataConsumer(mockLoader);
        asyncConsumer.setBatchSize(batchSize);
        asyncConsumer.start();

        MemoryStreamDataProvider<Integer> memoryProvider = new MemoryStreamDataProvider<Integer>(
            ZoieConfig.DEFAULT_VERSION_COMPARATOR);
        memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE);

        memoryProvider.setDataConsumer(asyncConsumer);
        memoryProvider.start();
        memoryProvider.setBatchSize(batchSize);
        try {
          List<DataEvent<Integer>> list = new ArrayList<DataEvent<Integer>>(count);
          for (int i = 0; i < count; ++i) {
            list.add(new DataEvent<Integer>(i, "" + i));
          }
          memoryProvider.addEvents(list);

          boolean done = false;
          long start = System.currentTimeMillis();
          while (!done) {
            try {
              asyncConsumer.syncWithVersion(timeout, "" + (count - 1));
              done = true;
            } catch (ZoieException e) {
              if (!e.getMessage().contains("sync timed out")) throw e;
              else System.out
                  .println("sync time out could be legit for slow systems. Elapsed time: "
                      + (System.currentTimeMillis() - start) + "ms");
              if (System.currentTimeMillis() - start > 600000L) throw e;
            }
          }
          int num = mockLoader.getCount();
          assertEquals("batchSize=" + batchSize, num, count);
          assertTrue("batch not working",
            (mockLoader.getMaxBatch() > 1 || mockLoader.getMaxBatch() == batchSize));
        } finally {
          memoryProvider.stop();
          asyncConsumer.stop();
        }
      }
    }
  }

  @Test
  public void testDelSet() throws ZoieException {
    for (int i = 0; i < 2; i++) {
      testDelSetImpl();
    }
  }

  private void testDelSetImpl() throws ZoieException {
    File idxDir = getIdxDir();
    final ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true, 100,
      ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    idxSystem.getAdminMBean().setFreshness(50);
    idxSystem.start();
    int numThreads = 5;
    QueryThread[] queryThreads = new QueryThread[numThreads];
    for (int i = 0; i < queryThreads.length; i++) {
      queryThreads[i] = new QueryThread() {
        @Override
        public void run() {
          final String query = "zoie";
          QueryParser parser = new QueryParser(Version.LUCENE_43, "contents",
              idxSystem.getAnalyzer());
          Query q;
          try {
            q = parser.parse(query);
          } catch (Exception e) {
            exception = e;
            return;
          }

          int expected = DataForTests.testdata.length;
          while (!stop) {
            IndexSearcher searcher = null;
            List<ZoieMultiReader<IndexReader>> readers = null;
            MultiReader reader = null;
            try {
              readers = idxSystem.getIndexReaders();
              IndexReader[] subReaders = new IndexReader[readers.size()];
              for (int i = 0; i < subReaders.length; ++i) {
                subReaders[i] = readers.get(i);
              }
              reader = new MultiReader(subReaders, false);

              searcher = new IndexSearcher(reader);

              TopDocs hits = searcher.search(q, 10);
              int count = hits.totalHits;

              if (count != expected) {
                mismatch = true;
                message = "hit count: " + count + " / expected: " + expected;
                stop = true;
                StringBuffer sb = new StringBuffer();
                sb.append(message + "\n");
                sb.append("each\n");
                sb.append(groupDump(readers, q));
                sb.append("main\n");
                sb.append(dump(reader, hits));
                System.out.println(sb.toString());
                log.info(sb.toString());
              }
              Thread.sleep(2);
            } catch (Exception ex) {
              ex.printStackTrace();
              exception = ex;
              stop = true;
            } finally {
              try {
                if (searcher != null) {
                  reader.close();
                  reader = null;
                  searcher = null;
                }
              } catch (IOException ioe) {
                log.error(ioe.getMessage(), ioe);
              } finally {
                idxSystem.returnIndexReaders(readers);
              }
            }
          }
        }

        private String groupDump(List<ZoieMultiReader<IndexReader>> readers, Query q)
            throws IOException {
          StringBuffer sb = new StringBuffer();
          for (ZoieMultiReader<IndexReader> reader : readers) {
            sb.append(reader).append("\n");
            IndexSearcher searcher = new IndexSearcher(reader);
            TopDocs hits = searcher.search(q, 20);
            sb.append(dump(reader, hits));
            searcher = null;
          }
          return sb.toString();
        }

        private String dump(IndexReader reader, TopDocs hits) throws CorruptIndexException,
            IOException {
          StringBuffer sb = new StringBuffer();
          ScoreDoc[] sd = hits.scoreDocs;
          long[] uids = new long[sd.length];
          for (int i = 0; i < sd.length; i++) {
            Document doc = reader.document(sd[i].doc);
            uids[i] = Long.parseLong(doc.get("id"));
          }
          sb.append(Thread.currentThread() + Arrays.toString(uids)).append("\n");
          int max = reader.maxDoc();
          uids = new long[max];
          for (int i = 0; i < max; i++) {
            Document doc = reader.document(i);
            uids[i] = Long.parseLong(doc.get("id"));
          }
          sb.append("uids: " + Arrays.toString(uids)).append("\n");
          return sb.toString();
        }
      };
      queryThreads[i].setDaemon(true);
    }

    MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>(
        ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE);
    memoryProvider.setDataConsumer(idxSystem);
    memoryProvider.start();
    try {
      idxSystem.setBatchSize(10);

      final int count = DataForTests.testdata.length;
      List<DataEvent<String>> list = new ArrayList<DataEvent<String>>(count);
      for (int i = 0; i < count; i++) {
        list.add(new DataEvent<String>(DataForTests.testdata[i], "" + i));
      }
      memoryProvider.addEvents(list);
      idxSystem.syncWithVersion(100000, "" + (count - 1));

      for (QueryThread queryThread : queryThreads)
        queryThread.start();

      for (int n = 1; n <= 3; n++) {
        for (int i = 0; i < count; i++) {
          long version = n * count + i;
          list = new ArrayList<DataEvent<String>>(1);
          list.add(new DataEvent<String>(DataForTests.testdata[i], "" + version));
          memoryProvider.addEvents(list);

          idxSystem.syncWithVersion(100000, "" + version);
        }
        boolean stopNow = false;
        for (QueryThread queryThread : queryThreads)
          stopNow |= queryThread.stop;
        if (stopNow) break;
      }
      for (QueryThread queryThread : queryThreads)
        queryThread.stop = true; // stop all query threads
      for (QueryThread queryThread : queryThreads) {
        queryThread.join();
        assertTrue("count mismatch[" + queryThread.message + "]", !queryThread.mismatch);
      }
    } catch (Exception e) {
      for (QueryThread queryThread : queryThreads) {
        if (queryThread.exception == null) throw new ZoieException(e);
      }
    } finally {
      memoryProvider.stop();
      idxSystem.shutdown();
      deleteDirectory(idxDir);
    }
    System.out.println(" done round");
    log.info(" done round");
    for (QueryThread queryThread : queryThreads) {
      if (queryThread.exception != null) throw new ZoieException(queryThread.exception);
    }
  }

  @Test
  public void testDocIDMapper() {
    final long[] uidList = new long[500000];
    long[] qryList = new long[100000];
    int intersection = 10000;
    int del = 5;
    int[] ansList1 = new int[qryList.length];
    int[] ansList2 = new int[qryList.length];
    java.util.Random rand = new java.util.Random(System.currentTimeMillis());
    DocIDMapperImpl mapper = null;

    for (int k = 0; k < 10; k++) {
      java.util.HashSet<Long> uidset = new java.util.HashSet<Long>();
      java.util.HashSet<Long> qryset = new java.util.HashSet<Long>();
      long id;
      for (int i = 0; i < intersection; i++) {
        do {
          id = rand.nextInt() + (Integer.MAX_VALUE) * 2L;
        } while (id == ZoieSegmentReader.DELETED_UID || uidset.contains(id));

        uidset.add(id);
        uidList[i] = (i % del) > 0 ? id : ZoieSegmentReader.DELETED_UID;
        qryList[i] = id;
        ansList1[i] = (i % del) > 0 ? i : -1;
      }
      for (int i = intersection; i < uidList.length; i++) {
        do {
          id = rand.nextInt() + (Integer.MAX_VALUE) * 2L;
        } while (id == ZoieSegmentReader.DELETED_UID || uidset.contains(id));

        uidset.add(id);
        uidList[i] = (i % del) > 0 ? id : ZoieSegmentReader.DELETED_UID;
      }
      for (int i = intersection; i < qryList.length; i++) {
        do {
          id = rand.nextInt() + (Integer.MAX_VALUE) * 2L;
        } while (id == ZoieSegmentReader.DELETED_UID || uidset.contains(id) || qryset.contains(id));

        qryset.add(id);
        qryList[i] = id;
        ansList1[i] = -1;
      }

      mapper = new DocIDMapperImpl(uidList);

      for (int i = 0; i < qryList.length; i++) {
        ansList2[i] = mapper.getDocID(qryList[i]);
      }
      assertTrue("wrong result", Arrays.equals(ansList1, ansList2));
    }
  }

  @Test
  public void testExportImport() throws ZoieException, IOException {
    File idxDir = getIdxDir();
    final ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true,
      ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    idxSystem.start();

    DirectoryManager dirMgr = new DefaultDirectoryManager(idxDir);

    String query = "zoie";
    QueryParser parser = new QueryParser(Version.LUCENE_43, "contents", idxSystem.getAnalyzer());
    Query q = null;
    try {
      q = parser.parse(query);
    } catch (Exception e) {
      throw new ZoieException(e.getMessage(), e);
    }

    try {
      List<DataEvent<String>> list;

      list = new ArrayList<DataEvent<String>>(DataForTests.testdata.length);
      for (int i = 0; i < DataForTests.testdata.length; ++i) {
        list.add(new DataEvent<String>(DataForTests.testdata[i], "" + i));
      }
      idxSystem.consume(list);
      idxSystem.flushEvents(100000);

      assertEquals("index version mismatch after first flush", DataForTests.testdata.length - 1,
        DataForTests.testdata.length - 1);

      int hits = countHits(idxSystem, q);

      RandomAccessFile exportFile;
      FileChannel channel;

      exportFile = new RandomAccessFile(new File(getTmpDir(), "zoie_export.dat"), "rw");
      channel = exportFile.getChannel();
      idxSystem.exportSnapshot(channel);
      channel.close();
      exportFile.close();
      exportFile = null;
      channel = null;

      list = new ArrayList<DataEvent<String>>(DataForTests.testdata2.length);
      for (int i = 0; i < DataForTests.testdata2.length; ++i) {

        list.add(new DataEvent<String>(DataForTests.testdata.length + DataForTests.testdata2[i], ""
            + (DataForTests.testdata.length + i)));
      }
      idxSystem.consume(list);
      idxSystem.flushEvents(100000);
      String zvt = dirMgr.getVersion();
      assertEquals("index version mismatch after second flush", DataForTests.testdata.length
          + DataForTests.testdata2.length - 1, (long) Long.valueOf(zvt));
      assertEquals("should have no hits", 0, countHits(idxSystem, q));

      exportFile = new RandomAccessFile(new File(getTmpDir(), "zoie_export.dat"), "r");
      channel = exportFile.getChannel();
      idxSystem.importSnapshot(channel);
      idxSystem.flushEvents(10000);
      channel.close();
      exportFile.close();

      assertEquals("count is wrong", hits, countHits(idxSystem, q));
    } catch (ZoieException e) {
      throw e;
    } finally {
      idxSystem.shutdown();
      deleteDirectory(idxDir);
    }
  }

  @Test
  public void testUIDDocIdSet() throws IOException {
    LongOpenHashSet uidset = new LongOpenHashSet();
    int count = 100;
    Random rand = new Random();
    int id;
    for (int i = 0; i < count; ++i) {
      do {
        id = rand.nextInt();
      } while (id == ZoieSegmentReader.DELETED_UID || uidset.contains(id));
      uidset.add(id);
    }

    long[] uidArray = uidset.toLongArray();

    final long[] even = new long[uidArray.length / 2];
    int[] ans = new int[even.length];
    for (int i = 0; i < even.length; ++i) {
      even[i] = uidArray[i * 2];
      ans[i] = i;
    }

    DocIDMapperImpl mapper = new DocIDMapperImpl(even);
    UIDDocIdSet uidSet = new UIDDocIdSet(even, mapper);
    DocIdSetIterator docidIter = uidSet.iterator();
    IntArrayList intList = new IntArrayList();
    int docid;
    while ((docid = docidIter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
      intList.add(docid);
    }
    assertTrue("wrong result from iter", Arrays.equals(ans, intList.toIntArray()));

    long[] newidArray = new long[count];
    for (int i = 0; i < count; ++i) {
      newidArray[i] = i;
    }

    mapper = new DocIDMapperImpl(newidArray);
    uidSet = new UIDDocIdSet(newidArray, mapper);
    docidIter = uidSet.iterator();
    intList = new IntArrayList();
    for (int i = 0; i < newidArray.length; ++i) {
      docid = docidIter.advance(i * 10);
      if (docid == DocIdSetIterator.NO_MORE_DOCS) break;
      intList.add(docid);
      docid = docidIter.nextDoc();
      if (docid == DocIdSetIterator.NO_MORE_DOCS) break;
      intList.add(docid);
    }

    int[] answer = new int[] { 0, 1, 10, 11, 20, 21, 30, 31, 40, 41, 50, 51, 60, 61, 70, 71, 80,
        81, 90, 91 };
    assertTrue("wrong result from mix of next and skip",
      Arrays.equals(answer, intList.toIntArray()));
  }
}
TOP

Related Classes of proj.zoie.test.ZoieTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.