package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import junit.framework.TestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.document.*;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.RAMDirectory;
import java.io.Reader;
import java.io.IOException;
public class TestDocumentWriter extends TestCase {
private RAMDirectory dir;
public TestDocumentWriter(String s) {
super(s);
}
protected void setUp() {
dir = new RAMDirectory();
}
protected void tearDown() {
}
public void test() {
assertTrue(dir != null);
}
public void testAddDocument() throws Exception {
Document testDoc = new Document();
DocHelper.setupDoc(testDoc);
Analyzer analyzer = new WhitespaceAnalyzer();
Similarity similarity = Similarity.getDefault();
DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
String segName = "test";
writer.addDocument(segName, testDoc);
//After adding the document, we should be able to read it back in
SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));
assertTrue(reader != null);
Document doc = reader.document(0);
assertTrue(doc != null);
//System.out.println("Document: " + doc);
Fieldable [] fields = doc.getFields("textField2");
assertTrue(fields != null && fields.length == 1);
assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT));
assertTrue(fields[0].isTermVectorStored());
fields = doc.getFields("textField1");
assertTrue(fields != null && fields.length == 1);
assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_1_TEXT));
assertFalse(fields[0].isTermVectorStored());
fields = doc.getFields("keyField");
assertTrue(fields != null && fields.length == 1);
assertTrue(fields[0].stringValue().equals(DocHelper.KEYWORD_TEXT));
fields = doc.getFields(DocHelper.NO_NORMS_KEY);
assertTrue(fields != null && fields.length == 1);
assertTrue(fields[0].stringValue().equals(DocHelper.NO_NORMS_TEXT));
fields = doc.getFields(DocHelper.TEXT_FIELD_3_KEY);
assertTrue(fields != null && fields.length == 1);
assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_3_TEXT));
// test that the norm file is not present if omitNorms is true
for (int i = 0; i < reader.fieldInfos.size(); i++) {
FieldInfo fi = reader.fieldInfos.fieldInfo(i);
if (fi.isIndexed) {
assertTrue(fi.omitNorms == !dir.fileExists(segName + ".f" + i));
}
}
}
public void testPositionIncrementGap() throws IOException {
Analyzer analyzer = new Analyzer() {
public TokenStream tokenStream(String fieldName, Reader reader) {
return new WhitespaceTokenizer(reader);
}
public int getPositionIncrementGap(String fieldName) {
return 500;
}
};
Similarity similarity = Similarity.getDefault();
DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
Document doc = new Document();
doc.add(new Field("repeated", "repeated one", Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("repeated", "repeated two", Field.Store.YES, Field.Index.TOKENIZED));
String segName = "test";
writer.addDocument(segName, doc);
SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));
TermPositions termPositions = reader.termPositions(new Term("repeated", "repeated"));
assertTrue(termPositions.next());
int freq = termPositions.freq();
assertEquals(2, freq);
assertEquals(0, termPositions.nextPosition());
assertEquals(502, termPositions.nextPosition());
}
public void testPreAnalyzedField() throws IOException {
Similarity similarity = Similarity.getDefault();
DocumentWriter writer = new DocumentWriter(dir, new SimpleAnalyzer(), similarity, 50);
Document doc = new Document();
doc.add(new Field("preanalyzed", new TokenStream() {
private String[] tokens = new String[] {"term1", "term2", "term3", "term2"};
private int index = 0;
public Token next() throws IOException {
if (index == tokens.length) {
return null;
} else {
return new Token(tokens[index++], 0, 0);
}
}
}, TermVector.NO));
String segName = "test";
writer.addDocument(segName, doc);
SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));
TermPositions termPositions = reader.termPositions(new Term("preanalyzed", "term1"));
assertTrue(termPositions.next());
assertEquals(1, termPositions.freq());
assertEquals(0, termPositions.nextPosition());
termPositions.seek(new Term("preanalyzed", "term2"));
assertTrue(termPositions.next());
assertEquals(2, termPositions.freq());
assertEquals(1, termPositions.nextPosition());
assertEquals(3, termPositions.nextPosition());
termPositions.seek(new Term("preanalyzed", "term3"));
assertTrue(termPositions.next());
assertEquals(1, termPositions.freq());
assertEquals(2, termPositions.nextPosition());
}
}