Source Code of org.apache.lucene.index.TestDocumentWriter

package org.apache.lucene.index;


/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


import junit.framework.TestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.document.*;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.RAMDirectory;


import java.io.Reader;
import java.io.IOException;


public class TestDocumentWriter extends TestCase {
  private RAMDirectory dir;


  public TestDocumentWriter(String s) {
    super(s);
  }


  protected void setUp() {
    dir = new RAMDirectory();
  }


  protected void tearDown() {


  }


  public void test() {
    assertTrue(dir != null);


  }


  public void testAddDocument() throws Exception {
    Document testDoc = new Document();
    DocHelper.setupDoc(testDoc);
    Analyzer analyzer = new WhitespaceAnalyzer();
    Similarity similarity = Similarity.getDefault();
    DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
    String segName = "test";
    writer.addDocument(segName, testDoc);
    //After adding the document, we should be able to read it back in
    SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));
    assertTrue(reader != null);
    Document doc = reader.document(0);
    assertTrue(doc != null);


    //System.out.println("Document: " + doc);
    Fieldable [] fields = doc.getFields("textField2");
    assertTrue(fields != null && fields.length == 1);
    assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT));
    assertTrue(fields[0].isTermVectorStored());


    fields = doc.getFields("textField1");
    assertTrue(fields != null && fields.length == 1);
    assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_1_TEXT));
    assertFalse(fields[0].isTermVectorStored());


    fields = doc.getFields("keyField");
    assertTrue(fields != null && fields.length == 1);
    assertTrue(fields[0].stringValue().equals(DocHelper.KEYWORD_TEXT));


    fields = doc.getFields(DocHelper.NO_NORMS_KEY);
    assertTrue(fields != null && fields.length == 1);
    assertTrue(fields[0].stringValue().equals(DocHelper.NO_NORMS_TEXT));


    fields = doc.getFields(DocHelper.TEXT_FIELD_3_KEY);
    assertTrue(fields != null && fields.length == 1);
    assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_3_TEXT));


    // test that the norm file is not present if omitNorms is true
    for (int i = 0; i < reader.fieldInfos.size(); i++) {
      FieldInfo fi = reader.fieldInfos.fieldInfo(i);
      if (fi.isIndexed) {
        assertTrue(fi.omitNorms == !dir.fileExists(segName + ".f" + i));
      }
    }


  }


  public void testPositionIncrementGap() throws IOException {
    Analyzer analyzer = new Analyzer() {
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new WhitespaceTokenizer(reader);
      }


      public int getPositionIncrementGap(String fieldName) {
        return 500;
      }
    };


    Similarity similarity = Similarity.getDefault();
    DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
    Document doc = new Document();
    doc.add(new Field("repeated", "repeated one", Field.Store.YES, Field.Index.TOKENIZED));
    doc.add(new Field("repeated", "repeated two", Field.Store.YES, Field.Index.TOKENIZED));


    String segName = "test";
    writer.addDocument(segName, doc);
    SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));


    TermPositions termPositions = reader.termPositions(new Term("repeated", "repeated"));
    assertTrue(termPositions.next());
    int freq = termPositions.freq();
    assertEquals(2, freq);
    assertEquals(0, termPositions.nextPosition());
    assertEquals(502, termPositions.nextPosition());
  }
  
  public void testPreAnalyzedField() throws IOException {
    Similarity similarity = Similarity.getDefault();
    DocumentWriter writer = new DocumentWriter(dir, new SimpleAnalyzer(), similarity, 50);
    Document doc = new Document();
    
    doc.add(new Field("preanalyzed", new TokenStream() {
      private String[] tokens = new String[] {"term1", "term2", "term3", "term2"};
      private int index = 0;
      
      public Token next() throws IOException {
        if (index == tokens.length) {
          return null;
        } else {
          return new Token(tokens[index++], 0, 0);
        }        
      }
      
    }, TermVector.NO));
    
    String segName = "test";
    writer.addDocument(segName, doc);
    SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));


    TermPositions termPositions = reader.termPositions(new Term("preanalyzed", "term1"));
    assertTrue(termPositions.next());
    assertEquals(1, termPositions.freq());
    assertEquals(0, termPositions.nextPosition());


    termPositions.seek(new Term("preanalyzed", "term2"));
    assertTrue(termPositions.next());
    assertEquals(2, termPositions.freq());
    assertEquals(1, termPositions.nextPosition());
    assertEquals(3, termPositions.nextPosition());
    
    termPositions.seek(new Term("preanalyzed", "term3"));
    assertTrue(termPositions.next());
    assertEquals(1, termPositions.freq());
    assertEquals(2, termPositions.nextPosition());


  }
}
Source Code of org.apache.lucene.index.TestDocumentWriter

Related Classes of org.apache.lucene.index.TestDocumentWriter