Package com.gentics.cr.lucene.pdftest

Source Code of com.gentics.cr.lucene.pdftest.PDFHighlighterTest

package com.gentics.cr.lucene.pdftest;

import java.io.InputStream;

import junit.framework.TestCase;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.poi.util.IOUtils;
import org.junit.After;
import org.junit.Before;

import com.gentics.cr.CRResolvableBean;
import com.gentics.cr.configuration.GenericConfiguration;
import com.gentics.cr.lucene.LuceneVersion;
import com.gentics.cr.lucene.indexer.transformer.ContentTransformer;
import com.gentics.cr.lucene.indexer.transformer.pdf.PDFContentTransformer;
import com.gentics.cr.lucene.search.highlight.ContentHighlighter;
import com.gentics.cr.lucene.search.highlight.PhraseBolder;
import com.gentics.cr.lucene.search.highlight.VectorBolder;
import com.gentics.cr.lucene.search.highlight.WhitespaceVectorBolder;

public class PDFHighlighterTest extends TestCase {

  private static final int HITS = 1;

  CRResolvableBean bean;
  GenericConfiguration config;

  Directory dir;
  IndexSearcher searcher;
  Analyzer analyzer;
  Query query;

  @Before
  public void setUp() throws Exception {
    bean = new CRResolvableBean();

    InputStream stream = PDFHighlighterTest.class.getResourceAsStream("test.pdf");
    byte[] arr = IOUtils.toByteArray(stream);
    bean.set("binarycontent", arr);

    config = new GenericConfiguration();
    config.set("attribute", "binarycontent");
    analyzer = new StandardAnalyzer(LuceneVersion.getVersion());
    dir = new RAMDirectory();
    prepareIndex();

    searcher = new IndexSearcher(dir);

    QueryParser parser = new QueryParser(LuceneVersion.getVersion(), "binarycontent", analyzer);

    query = parser.parse("binarycontent:(ahst~0.5)");

    query = query.rewrite(searcher.getIndexReader());
  }

  private Document getDocument(CRResolvableBean bean) throws Exception {
    //TRANSFORM BEAN
    ContentTransformer t = new PDFContentTransformer(config);
    t.processBean(bean);

    //CREATE DOCUMENT
    Document doc = new Document();
    Object value = bean.get("binarycontent");
    Field f = new Field("binarycontent", value.toString(), Store.YES, Field.Index.ANALYZED,
        TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f);
    doc.add(new Field("testid", "pdftest", Field.Store.YES, Field.Index.NOT_ANALYZED));
    return doc;
  }

  private void prepareIndex() throws Exception {
    IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
    writer.addDocument(getDocument(bean));
    writer.close();

  }

  public void testVectorBolder() throws Exception {
    System.out.println("VECTOR");
    VectorBolder h = new VectorBolder(new GenericConfiguration());
    String ret = h.highlight(query, searcher.getIndexReader(), 0, "binarycontent");
    System.out.println(ret);

    assertTrue(ret != null && !"".equals(ret));
  }

  public void testWhitespaceVectorBolder() throws Exception {
    System.out.println("WHITESPACEVECTOR");
    WhitespaceVectorBolder h = new WhitespaceVectorBolder(new GenericConfiguration());
    String ret = h.highlight(query, searcher.getIndexReader(), 0, "binarycontent");
    System.out.println(ret);

    assertTrue(ret != null && !"".equals(ret));
  }

  public void testPhraseBolder2() throws Exception {
    System.out.println("PHRASE2");
    ContentHighlighter h = new PhraseBolder(new GenericConfiguration());
    CRResolvableBean crBean = new CRResolvableBean();
    crBean.set("binarycontent", "this is a test (AHSt)");
    String ret = h.highlight((String) crBean.get("binarycontent"), query);
    System.out.println(ret);

    assertTrue(ret != null && !"".equals(ret));
  }

  @After
  public void tearDown() throws Exception {

  }
}
TOP

Related Classes of com.gentics.cr.lucene.pdftest.PDFHighlighterTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.