/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
* https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sindice.siren.index.codecs.block;
import java.io.IOException;
import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.junit.Test;
import org.sindice.siren.index.codecs.CodecTestCase;
import org.sindice.siren.index.codecs.siren10.DocsFreqBlockIndexInput;
import org.sindice.siren.index.codecs.siren10.DocsFreqBlockIndexInput.DocsFreqBlockReader;
import org.sindice.siren.index.codecs.siren10.DocsFreqBlockIndexOutput;
import org.sindice.siren.index.codecs.siren10.DocsFreqBlockIndexOutput.DocsFreqBlockWriter;
import org.sindice.siren.index.codecs.siren10.Siren10BlockStreamFactory;
public class TestAForCodec extends CodecTestCase {
private DocsFreqBlockIndexOutput getIndexOutput(final int blockSize) throws IOException {
final Siren10BlockStreamFactory factory = new Siren10BlockStreamFactory(blockSize);
factory.setDocsBlockCompressor(new AForBlockCompressor());
factory.setFreqBlockCompressor(new AForBlockCompressor());
return factory.createDocsFreqOutput(directory, "test", newIOContext(random()));
}
private DocsFreqBlockIndexInput getIndexInput() throws IOException {
final Siren10BlockStreamFactory factory = new Siren10BlockStreamFactory(0);
factory.setDocsBlockDecompressor(new AForBlockDecompressor());
factory.setFreqBlockDecompressor(new AForBlockDecompressor());
return factory.openDocsFreqInput(directory, "test", newIOContext(random()));
}
public void testReadDoc() throws IOException {
final DocsFreqBlockIndexOutput out = this.getIndexOutput(512);
final DocsFreqBlockWriter writer = out.getBlockWriter();
writer.setNodeBlockIndex(out.index());
writer.setPosBlockIndex(out.index());
for (int i = 0; i < 11777; i++) {
if (writer.isFull()) {
writer.flush();
}
writer.write(i);
}
writer.flush(); // flush remaining data
out.close();
final DocsFreqBlockIndexInput in = this.getIndexInput();
final DocsFreqBlockReader reader = in.getBlockReader();
reader.setNodeBlockIndex(in.index());
reader.setPosBlockIndex(in.index());
for (int i = 0; i < 11777; i++) {
if (reader.isExhausted()) {
reader.nextBlock();
}
assertEquals(i, reader.nextDocument());
}
in.close();
}
public void testReadDocAndFreq() throws IOException {
final DocsFreqBlockIndexOutput out = this.getIndexOutput(512);
final DocsFreqBlockWriter writer = out.getBlockWriter();
writer.setNodeBlockIndex(out.index());
writer.setPosBlockIndex(out.index());
for (int i = 0; i < 11777; i++) {
if (writer.isFull()) {
writer.flush();
}
writer.write(i);
writer.writeNodeFreq(random().nextInt(10) + 1);
}
writer.flush(); // flush remaining data
out.close();
final DocsFreqBlockIndexInput in = this.getIndexInput();
final DocsFreqBlockReader reader = in.getBlockReader();
reader.setNodeBlockIndex(in.index());
reader.setPosBlockIndex(in.index());
for (int i = 0; i < 11777; i++) {
if (reader.isExhausted()) {
reader.nextBlock();
}
assertEquals(i, reader.nextDocument());
final int frq = reader.nextNodeFreq();
assertTrue(frq > 0);
assertTrue(frq <= 10);
}
in.close();
}
public void testRandomDoc() throws IOException {
final int blockSize = BLOCK_SIZES[random().nextInt(BLOCK_SIZES.length)];
final DocsFreqBlockIndexOutput out = this.getIndexOutput(blockSize);
final DocsFreqBlockWriter writer = out.getBlockWriter();
writer.setNodeBlockIndex(out.index());
writer.setPosBlockIndex(out.index());
// generate doc ids
final Set<Integer> docIds = new TreeSet<Integer>();
final int lenght = (int) this.nextLong(128000, 512000);
for (int i = 0; i < lenght; i++) {
docIds.add((int) this.nextLong(0, ((1L << 31) - 1)));
}
for (final int docId : docIds) {
if (writer.isFull()) {
writer.flush();
}
writer.write(docId);
}
writer.flush(); // flush remaining data
out.close();
final DocsFreqBlockIndexInput in = this.getIndexInput();
final DocsFreqBlockReader reader = in.getBlockReader();
reader.setNodeBlockIndex(in.index());
reader.setPosBlockIndex(in.index());
for (final int docId : docIds) {
if (reader.isExhausted()) {
reader.nextBlock();
}
assertEquals(docId, reader.nextDocument());
}
in.close();
}
@Test
public void testIntegerRange() throws Exception {
this.doTestIntegerRange(1, 32, new AForBlockCompressor(), new AForBlockDecompressor());
}
@Test
public void testShortPostingList() throws IOException {
final DocsFreqBlockIndexOutput out = this.getIndexOutput(512);
final DocsFreqBlockWriter writer = out.getBlockWriter();
writer.setNodeBlockIndex(out.index());
writer.setPosBlockIndex(out.index());
for (int i = 0; i < 5; i++) {
if (writer.isFull()) {
writer.flush();
}
writer.write(i);
}
writer.flush(); // flush remaining data
out.close();
final DocsFreqBlockIndexInput in = this.getIndexInput();
final DocsFreqBlockReader reader = in.getBlockReader();
reader.setNodeBlockIndex(in.index());
reader.setPosBlockIndex(in.index());
for (int i = 0; i < 5; i++) {
if (reader.isExhausted()) {
reader.nextBlock();
}
assertEquals(i, reader.nextDocument());
}
in.close();
}
@Test
public void testIncompleteFrame() throws IOException {
final BlockCompressor compressor = new AForBlockCompressor();
final IntsRef input = new IntsRef(64);
final BytesRef output = new BytesRef(compressor.maxCompressedSize(64));
// fill first part with 1
for (int i = 0; i < 33; i++) {
input.ints[i] = 1;
}
// fill the rest with random numbers
for (int i = 33; i < 64; i++) {
input.ints[i] = (int) this.nextLong(64, Short.MAX_VALUE);
}
input.offset = 0;
input.length = 33;
// the random numbers after the end of the input array should not impact
// compression
compressor.compress(input, output);
// should be frame code 1 : 32 ints encoded with 1 bits
assertEquals(1, output.bytes[0]);
// followed by 4 bytes at 255
assertEquals(0xFF, output.bytes[1] & 0xFF);
assertEquals(0xFF, output.bytes[2] & 0xFF);
assertEquals(0xFF, output.bytes[3] & 0xFF);
assertEquals(0xFF, output.bytes[4] & 0xFF);
// then frame code 34 : 16 ints encoded with 1 bits
assertEquals(34, output.bytes[5]);
// followed by 1 byte with at least 128 and a second byte with 0
assertEquals(128, output.bytes[6] & 0x80);
assertEquals(0, output.bytes[7] & 0xFF);
// followed by frame code 33: 16 ints encoded with 0 bits
assertEquals(33, output.bytes[8]);
}
}