Package com.google.enterprise.connector.db.diffing

Source Code of com.google.enterprise.connector.db.diffing.LobDocumentBuilderTest

// Copyright 2011 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.enterprise.connector.db.diffing;

import static org.easymock.EasyMock.anyInt;
import static org.easymock.EasyMock.createMock;
import static org.easymock.EasyMock.expect;
import static org.easymock.EasyMock.replay;

import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.ByteStreams;
import com.google.common.io.Files;
import com.google.enterprise.connector.db.DBClient;
import com.google.enterprise.connector.db.DBException;
import com.google.enterprise.connector.db.testing.MockClient;
import com.google.enterprise.connector.spi.Property;
import com.google.enterprise.connector.spi.RepositoryException;
import com.google.enterprise.connector.spi.SkippedDocumentException;
import com.google.enterprise.connector.spi.SpiConstants;
import com.google.enterprise.connector.spi.Value;
import com.google.enterprise.connector.spiimpl.BinaryValue;
import com.google.enterprise.connector.traversal.FileSizeLimitInfo;
import com.google.enterprise.connector.traversal.MimeTypeMap;
import com.google.enterprise.connector.util.InputStreamFactory;
import com.google.enterprise.connector.util.MimeTypeDetector;

import org.easymock.IAnswer;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.sql.Blob;
import java.sql.Clob;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.sql.rowset.serial.SerialClob;

public class LobDocumentBuilderTest extends DocumentBuilderFixture {
  @Override
  protected void setUp() throws Exception {
    super.setUp();

    // Connector manager does this in production.
    MimeTypeDetector.setTraversalContext(context);
  }

  public void testPrimaryKeySkipped() throws Exception {
    Object docid = 2;
    String expectedDocid = "B/" + docid;
    Object clobContent = "hello, world";
    Map<String, Object> row = ImmutableMap.of(
        primaryKeyColumn, docid, dbContext.getLobField(), clobContent);

    FieldNameBean bean = new FieldNameBean(dbContext) {
        @Override public String get() { return primaryKeyColumn; }
        @Override public void set(String value) {
          dbContext.setPrimaryKeys(value); }
        @Override public List<String> getNameVariations() {
          return ImmutableList.of("id", "Id", "ID", "  id", " iD  "); } };

    // We don't care about the property value testing, just test docid.
    testFieldName("primaryKey", bean,
        new LobDocumentBuilder(dbContext, context), row,
        SpiConstants.PROPNAME_DOCID, expectedDocid);
  }

  public void testLobFieldValue() throws Exception {
    Object clobContent = "hello, world";
    String originalName = dbContext.getLobField();
    Map<String, Object> row =
        ImmutableMap.of(primaryKeyColumn, 2, originalName, clobContent);

    testFieldName("lobField", new LobDocumentBuilder(dbContext, context),
        row, SpiConstants.PROPNAME_CONTENT, clobContent);
  }

  /**
   * Tests that the lobField is filtered from the XML map, which calls
   * toString on the map values. In order to avoid extraneous calls to
   * toString in LobDocumentBuilder.getBinaryContent when extracting
   * the LOB value, we need to supply it as either a String (which we
   * can't override toString on) or a Clob. EasyMock cannot override
   * toString, so I picked a random class I found that implements Clob
   * so I don't have to, namely javax.sql.rowset.serial.SerialClob.
   */
  public void testLobFieldFiltered() throws Exception {
    Object docid = 2;
    String expectedDocid = "B/" + docid;
    Object clobContent = new SerialClob("hello, world".toCharArray()) {
        @Override
        public String toString() { throw new IllegalStateException(); } };
    String originalName = dbContext.getLobField();
    Map<String, Object> row =
        ImmutableMap.of(primaryKeyColumn, docid, originalName, clobContent);

    // We don't care about the property value testing, just test docid.
    testFieldName("lobField", new LobDocumentBuilder(dbContext, context),
        row, SpiConstants.PROPNAME_DOCID, expectedDocid);
  }

  public void testFetchUrlFieldValue() throws Exception {
    Object expectedUrl = "http://example.com/2";
    String originalName = dbContext.getFetchURLField();
    Map<String, Object> row =
        ImmutableMap.of(primaryKeyColumn, 2, originalName, expectedUrl);

    testFieldName("fetchURLField", new LobDocumentBuilder(dbContext, context),
        row, SpiConstants.PROPNAME_DISPLAYURL, expectedUrl);
  }

  /**
   * When fetchURLField is null or whitespace, the connector should
   * return a dbConnector display URL, and it should not try to call
   * row.get(String) with the null key (Map implementations are not
   * guaranteed to support null keys).
   */
  public void testFetchUrlFieldNull() throws Exception {
    DocumentBuilder builder = new LobDocumentBuilder(dbContext, context);
    Object docid = 2;
    String expectedUrl = builder.getDisplayUrl("B/" + docid);
    dbContext.setFetchURLField(null);
    Map<String, Object> row = new HashMap<String, Object>() {
      @Override
      public Object get(Object key) {
        if (key == null) {
          throw new NullPointerException();
        } else {
          return super.get(key);
        }
      }
    };
    row.put(primaryKeyColumn, docid);

    testFieldName("fetchURLField", builder, row,
        SpiConstants.PROPNAME_DISPLAYURL, expectedUrl);
  }

  private Map<String, Object> getLargeObjectRow() {
    Map<String, Object> rowMap = new HashMap<String, Object>();
    // Define common test data.
    String versionColumn = "version";
    String versionValue = "2.3.4";
    rowMap.put(primaryKeyColumn, 1);
    rowMap.put(versionColumn, versionValue);
    return rowMap;
  }

  public void testDigestContentHolderClobDocument() throws Exception {
    String clobContent = getClobContent();
    testCLOBDataScenarios(
        DigestContentHolder.getInstance(clobContent.getBytes(Charsets.UTF_8),
            new MimeTypeDetector()),
        clobContent);
  }

  public void testStringClobDocument() throws Exception {
    String clobContent = getClobContent();
    testCLOBDataScenarios(clobContent, clobContent);
  }

  public void testCharArrayClobDocument() throws Exception {
    String clobContent = getClobContent();
    testCLOBDataScenarios(clobContent.toCharArray(), clobContent);
  }

  public void testSqlClobDocument() throws Exception {
    final String clobContent = getClobContent();
    long clobLength = clobContent.length();
    // The Clob is reused and should return a new stream each time.
    IAnswer<StringReader> stream = new IAnswer<StringReader>() {
      public StringReader answer() { return new StringReader(clobContent); } };

    Clob clob = createMock(Clob.class);
    expect(clob.length()).andReturn(clobLength).anyTimes();
    expect(clob.getCharacterStream()).andAnswer(stream).anyTimes();
    replay(clob);

    testCLOBDataScenarios(clob, clobContent);
  }

  private String getClobContent() {
    // Define CLOB data larger than the FileBackedOutputStream will
    // hold in memory for this test case.
    return MockClient.getClob(100000);
  }

  private void assertEmptyContent(JsonDocument doc)
      throws IOException, RepositoryException {
    // SQL NULLs are rendered as empty byte arrays for easier handling.
    Value value = Value.getSingleValue(doc, SpiConstants.PROPNAME_CONTENT);
    assertTrue(value.getClass().toString(), value instanceof BinaryValue);
    assertEquals(-1, ((BinaryValue) value).getInputStream().read());
  }

  private void assertEmptyContent(InputStreamFactory factory)
      throws IOException, RepositoryException {
    // SQL NULLs are rendered as empty byte arrays for easier handling.
    assertEquals(-1, factory.getInputStream().read());
  }

  /**
   * Test scenarios for CLOB data types.
   */
  public void testCLOBDataScenarios(Object clobValue, String clobContent)
        throws IOException, DBException, RepositoryException {
    Map<String, Object> rowMap = getLargeObjectRow();
    rowMap.put(dbContext.getLobField(), clobValue);

    FileSizeLimitInfo fileSizeLimitInfo = new FileSizeLimitInfo();
    fileSizeLimitInfo.setMaxDocumentSize(5);
    context.setFileSizeLimitInfo(fileSizeLimitInfo);

    JsonDocument clobDoc =
        getJsonDocument(new LobDocumentBuilder(dbContext, context), rowMap);
    // As the size of the document is more than supported, clobDoc should have
    // null value.
    assertNotNull(clobDoc);
    assertEmptyContent(clobDoc);

    // Increase the maximum supported size of the document.
    fileSizeLimitInfo.setMaxDocumentSize(1024 * 1024);
    context.setFileSizeLimitInfo(fileSizeLimitInfo);
    clobDoc =
        getJsonDocument(new LobDocumentBuilder(dbContext, context), rowMap);
    assertNotNull(clobDoc);

    // Test scenario:- this doc will have column name "version" as
    // metadata key and value will be "2.3.4".
    assertEquals(rowMap.get("version"), getProperty(clobDoc, "version"));

    // Test scenario:- the content of this document will be same as the
    // content of CLOB column.
    assertEquals(clobContent,
        getProperty(clobDoc, SpiConstants.PROPNAME_CONTENT));

    // The MIME type of the content should have been automatically determined.
    assertEquals("text/plain",
        getProperty(clobDoc, SpiConstants.PROPNAME_MIMETYPE));

    // Test scenario:- primary key column should be excluded while
    // indexing external metadata.
    assertNull(getProperty(clobDoc, primaryKeyColumn));
  }

  private byte[] getBlobContent() {
    // Gets a randomized 100-byte array that resolves as application/pdf.
    return MockClient.getBlob(100, true);
  }

  private Map<String, Object> getBlobRow(Object blobContent) {
    Map<String, Object> rowMap = getLargeObjectRow();
    rowMap.put(dbContext.getLobField(), blobContent);
    return rowMap;
  }

  public void testDigestContentHolderBlobDocument() throws Exception {
    byte[] blobContent = getBlobContent();
    testBLOBDataScenarios(
        DigestContentHolder.getInstance(blobContent, new MimeTypeDetector()),
        blobContent);
  }

  public void testByteArrayBlobDocument() throws Exception {
    byte[] blobContent = getBlobContent();
    testBLOBDataScenarios(blobContent, blobContent);
  }

  public void testSqlBlobDocument() throws Exception {
    byte[] blobContent = getBlobContent();
    long blobLength = blobContent.length;

    Blob blob = createMock(Blob.class);
    expect(blob.length()).andReturn(blobLength).anyTimes();
    expect(blob.getBytes(anyInt(), anyInt())).andReturn(blobContent).anyTimes();
    replay(blob);

    testBLOBDataScenarios(blob, blobContent);
  }

  /**
   * Test scenarios for BLOB.
   */
  public void testBLOBDataScenarios(Object blobValue, byte[] blobContent)
      throws Exception {
    Map<String, Object> rowMap = getBlobRow(blobValue);

    // Define for fetching BLOB content
    String fetchURL = "http://myhost:8030/app?dpc_id=120";
    rowMap.put(dbContext.getFetchURLField(), fetchURL);

    FileSizeLimitInfo fileSizeLimitInfo = new FileSizeLimitInfo();
    fileSizeLimitInfo.setMaxDocumentSize(5);
    context.setFileSizeLimitInfo(fileSizeLimitInfo);

    JsonDocument blobDoc =
        getJsonDocument(new LobDocumentBuilder(dbContext, context), rowMap);

    // The BLOB to too large.
    assertNotNull(blobDoc);
    assertEmptyContent(blobDoc);

    // Increase the maximum supported size of the document.
    fileSizeLimitInfo.setMaxDocumentSize(1024 * 1024);
    context.setFileSizeLimitInfo(fileSizeLimitInfo);
    blobDoc =
        getJsonDocument(new LobDocumentBuilder(dbContext, context), rowMap);

    assertNotNull(blobDoc);
    // Test scenario:- this doc will have column name "version" as
    // metadata key and value will be "2.3.4".
    assertEquals(rowMap.get("version"), getProperty(blobDoc, "version"));

    // Test scenario:- primary key column should be excluded while
    // indexing external metadata.
    assertNull(getProperty(blobDoc, primaryKeyColumn));

    // If one of the column holds the URL for fetching BLOB data. It
    // will be used as display URL in feed.
    assertEquals(fetchURL,
                 getProperty(blobDoc, SpiConstants.PROPNAME_DISPLAYURL));

    assertEquals("application/pdf",
        getProperty(blobDoc, SpiConstants.PROPNAME_MIMETYPE));

    assertTrue(Arrays.equals(blobContent, readBlobContent(blobDoc)));
  }

  public void testUnsupportedBlob() throws Exception {
    Map<String, Object> rowMap = getBlobRow(getBlobContent());

    // Set "application/pdf" MIME type in unsupported list. Now we should get
    // null value for DB document Content as this document is in unsupported
    // mimetype list.
    Set<String> unsupportedMime = new HashSet<String>();
    unsupportedMime.add("application/pdf");
    MimeTypeMap mimeTypeMap = new MimeTypeMap();
    mimeTypeMap.setUnsupportedMimeTypes(unsupportedMime);
    context.setMimeTypeMap(mimeTypeMap);
    JsonDocument.setTraversalContext(context);

    DocumentBuilder docBuilder = new LobDocumentBuilder(dbContext, context);
    ContentHolder holder = docBuilder.getContentHolder(rowMap,
        ImmutableList.of(primaryKeyColumn), "1");
    assertEmptyContent((InputStreamFactory) holder.getContent());

    JsonDocument blobDoc = getJsonDocument(docBuilder, rowMap);
    Property docContent = blobDoc.findProperty(SpiConstants.PROPNAME_CONTENT);
    // Document content should have null value.
    assertNull(docContent);
  }

  public void testExcludedBlob() throws Exception {
    Map<String, Object> rowMap = getBlobRow(getBlobContent());

    // Set "application/pdf" MIME type in ignore list. Now we should get null
    // value for DB document as this document is ignored by connector.
    Set<String> unsupportedMime = new HashSet<String>();
    unsupportedMime.add("application/pdf");
    MimeTypeMap mimeTypeMap = new MimeTypeMap();
    mimeTypeMap.setExcludedMimeTypes(unsupportedMime);
    context.setMimeTypeMap(mimeTypeMap);
    JsonDocument.setTraversalContext(context);

    DocumentBuilder docBuilder = new LobDocumentBuilder(dbContext, context);
    ContentHolder holder = docBuilder.getContentHolder(rowMap,
        ImmutableList.of(primaryKeyColumn), "1");
    assertEmptyContent((InputStreamFactory) holder.getContent());

    JsonDocument blobDoc = getJsonDocument(docBuilder, rowMap);
    try {
      blobDoc.findProperty(SpiConstants.PROPNAME_CONTENT);
      fail("Expected SkippedDocumentException, but got none.");
    } catch (SkippedDocumentException expected) {
    }
  }

  /**
   * Tests a failure in getBytes, which falls back to calling getBinaryStream.
   */
  public void testSqlBlobOneExceptionDocument() throws Exception {
    byte[] blobContent = getBlobContent();
    long blobLength = blobContent.length;
    InputStream blobStream = new ByteArrayInputStream(blobContent);

    Blob blob = createMock(Blob.class);
    expect(blob.length()).andReturn(blobLength).anyTimes();
    expect(blob.getBytes(anyInt(), anyInt()))
        .andThrow(new SQLException()).atLeastOnce();
    expect(blob.getBinaryStream()).andReturn(blobStream).atLeastOnce();
    replay(blob);

    // We can't use testBLOBDataScenarios in
    // testSqlBlobTwoExceptionsDocument, so don't use it here so we
    // know we get a non-null document without the MimeTypeDetector
    // and TraversalContext configuration.
    Map<String, Object> rowMap = getBlobRow(blob);
    JsonDocument blobDoc =
        getJsonDocument(new LobDocumentBuilder(dbContext, context), rowMap);
    assertNotNull(blobDoc);
    assertTrue(Arrays.equals(blobContent, readBlobContent(blobDoc)));
  }

  /**
   * Tests a failure in both getBytes and getBinaryStream, which
   * returns a null JsonDocument.
   */
  public void testSqlBlobTwoExceptionsDocument() throws Exception {
    byte[] blobContent = getBlobContent();
    long blobLength = blobContent.length;

    Blob blob = createMock(Blob.class);
    expect(blob.length()).andReturn(blobLength).anyTimes();
    expect(blob.getBytes(anyInt(), anyInt()))
        .andThrow(new SQLException()).atLeastOnce();
    expect(blob.getBinaryStream()).andThrow(new SQLException()).atLeastOnce();
    replay(blob);

    Map<String, Object> rowMap = getBlobRow(blob);
    try {
      JsonDocument blobDoc =
          getJsonDocument(new LobDocumentBuilder(dbContext, context), rowMap);
      fail("Expected DBException but got " + blobDoc.toJson());
    } catch (DBException expected) {
    }
  }

  /**
   * Test Case for fetching a BLOB File from Database and dumping it on the file
   * system using JsonDocument Object.
   */
  /* TODO: This does not work because the TESTEMPTABLE has not been set up.
   * And even if it was, it does not have any PDF BLOB data.
   */
  public void testPdfBlob(int dummy)
      throws IOException, DBException, RepositoryException {
    DBClient dbClient = dbContext.getClient();
    dbContext.setNumberOfRows(1);
    List<Map<String, Object>> rows = dbClient.executePartialQuery(0,
        dbContext.getNumberOfRows());
    JsonDocument jsonDocument = null;
    for (Map<String, Object> row : rows) {
      jsonDocument =
          getJsonDocument(new LobDocumentBuilder(dbContext, context), row);
    }

    byte[] blobcontent = readBlobContent(jsonDocument);
    assertNotNull(blobcontent);
    File newFile = new File("newreport.pdf");
    ByteStreams.write(blobcontent, Files.newOutputStreamSupplier(newFile));
  }
}
TOP

Related Classes of com.google.enterprise.connector.db.diffing.LobDocumentBuilderTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.