Package com.google.enterprise.connector.dctm

Source Code of com.google.enterprise.connector.dctm.DctmDocumentList

// Copyright 2007 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.enterprise.connector.dctm;

import com.google.enterprise.connector.dctm.dfcwrap.ICollection;
import com.google.enterprise.connector.dctm.dfcwrap.IQuery;
import com.google.enterprise.connector.dctm.dfcwrap.ISession;
import com.google.enterprise.connector.dctm.dfcwrap.ITime;
import com.google.enterprise.connector.spi.Document;
import com.google.enterprise.connector.spi.DocumentList;
import com.google.enterprise.connector.spi.RepositoryDocumentException;
import com.google.enterprise.connector.spi.RepositoryException;
import com.google.enterprise.connector.spi.SpiConstants;

import java.util.HashSet;
import java.util.logging.Logger;

public class DctmDocumentList implements DocumentList {
  private static final Logger logger =
      Logger.getLogger(DctmDocumentList.class.getName());

  private ISession session;

  private final ICollection collectionToAdd;

  private final ICollection collectionToDel;

  /**
   * Used for merging multiple chronicleIds in the same batch of deletes,
   * so we only send a single add or delete request.
   */
  private final HashSet<String> deletedIds = new HashSet<String>();

  private final Checkpoint checkpoint;

  private final DctmTraversalManager traversalManager;

  public DctmDocumentList(DctmTraversalManager traversalManager,
      ISession session, ICollection collToAdd, ICollection collToDel,
      Checkpoint checkpoint) {
    this.traversalManager = traversalManager;
    this.session = session;
    this.collectionToAdd = collToAdd;
    this.collectionToDel = collToDel;
    this.checkpoint = checkpoint;
  }

  @Override
  public Document nextDocument() throws RepositoryException {
    DctmSysobjectDocument dctmSysobjectDocument;
    Document retDoc = null;
    boolean skippingDoc = false;
    try {
      while (retDoc == null) {
        if (isOpen(collectionToAdd) && collectionToAdd.next()) {
          logger.fine("Looking through the collection of documents to add");

          String objId = "";
          ITime modifyDate = null;
          try {
            objId = collectionToAdd.getString("r_object_id");
            modifyDate = collectionToAdd.getTime("r_modify_date");
            logger.fine("r_object_id is " + objId + "  modifyDate is "
                        + modifyDate.getDate());
          } catch (RepositoryException e) {
            logger.severe("impossible to get the r_object_id of the document");
            return null;
          }
          checkpoint.setInsertCheckpoint(modifyDate.getDate(), objId);

          dctmSysobjectDocument = new DctmSysobjectDocument(traversalManager,
              session, objId, collectionToAdd.getString("i_chronicle_id"),
              modifyDate, SpiConstants.ActionType.ADD, checkpoint);

          logger.fine("Creation of a new dctmSysobjectDocument to add");
          retDoc = dctmSysobjectDocument;
        } else if (isOpen(collectionToDel) && collectionToDel.next()) {
          logger.fine("Looking through the collection of documents to remove");

          String eventId = "";
          ITime deleteDate = null;
          try {
            eventId = collectionToDel.getString("r_object_id");
            deleteDate = collectionToDel.getTime("time_stamp_utc");
            logger.fine("delete event r_object_id is " + eventId
                        + "  deleteDate is " + deleteDate.getDate());
          } catch (RepositoryException e) {
            logger.warning("impossible to get the r_object_id of the delete event");
            return null;
          }
          checkpoint.setDeleteCheckpoint(deleteDate.getDate(), eventId);

          String chronicleId = collectionToDel.getString("chronicle_id");

          // Deleting multiple versions can post multiple dm_destroy and
          // dm_prune events with the same chronicle_id.  We only want to
          // send a single delete request to the GSA.
          if (deletedIds.contains(chronicleId)) {
            logger.fine("Skipping redundant delete of version: " + chronicleId);
            continue; // Already deleted this chronicle_id in this batch.
          }

          // If we are deleting the last version of a document, remove it
          // from the index.  If we may be deleting the latest version of
          // the document, force the new latest version to be re-indexed.
          ICollection versions = null;
          try {
            versions = getCurrentVersion(chronicleId);
            if (versions != null && versions.next()) {
              ITime lastModify = versions.getTime("r_modify_date");
              if (lastModify.getDate().before(deleteDate.getDate())) {
                // We may have deleted the latest version, so refeed the
                // current latest version.
                dctmSysobjectDocument = new DctmSysobjectDocument(
                    traversalManager, session,
                    versions.getString("r_object_id"), chronicleId,
                    lastModify, SpiConstants.ActionType.ADD, checkpoint);
                logger.fine("Creation of a new dctmSysobjectDocument to "
                            + "resubmit newest version of deleted item: "
                            + chronicleId);
              } else {
                // Skip this doc.
                logger.fine("Skipping delete of old version: " + chronicleId);
                continue;
              }
            } else {
              // No more versions of the document remain.
              // Delete the document from the index.
              dctmSysobjectDocument = new DctmSysobjectDocument(
                  traversalManager, session,
                  collectionToDel.getString("audited_obj_id"), chronicleId,
                  deleteDate, SpiConstants.ActionType.DELETE, checkpoint);
              logger.fine("Creation of a new dctmSysobjectDocument to delete: "
                          + chronicleId);
            }
          } finally {
            if (versions != null) {
              versions.close();
            }
          }
          // Handled this version in this batch.
          deletedIds.add(chronicleId);
          retDoc = dctmSysobjectDocument;
        } else {
          logger.fine("End of document list");
          break;
        }
      }
    } catch (RepositoryDocumentException rde) {
      logger.warning("Error while trying to get next document : " + rde);
      skippingDoc = true;
      throw rde;
    } catch (RepositoryException re) {
      logger.warning("Error while trying to get next document : " + re);
      if (lostConnection()) {
        // If we have lost connectivity to the server, rollback the
        // checkpoint to the previous state, retrying this document later.
        checkpoint.restore();
      }
      throw re;
    } finally {
      if (retDoc == null && !skippingDoc) {
        logger.fine("retDoc is null before finalize");
        finalize();
      }
    }
    return retDoc;
  }

  @Override
  public String checkpoint() throws RepositoryException {
    try {
      return checkpoint.asString();
    } finally {
      finalize();
    }
  }

  /**
   * Return a ICollection containing the latest version of document based
   * upon the supplied chronicleId.
   *
   * @param chronicleId root document version
   * @return ICollection of versions.
   */
  public ICollection getCurrentVersion(String chronicleId)
      throws RepositoryException {
    IQuery query = traversalManager.getClientX().getQuery();
    query.setDQL(traversalManager.buildVersionsQueryString(checkpoint,
        chronicleId));
    return query.execute(session, IQuery.EXECUTE_READ_QUERY);
  }

  /**
   * Test connectivity to server.  If we have a session and
   * can verify that session isConnected(), return false.
   * If we cannot verify the session is connected return true.
   * If we don't have a session to test, return false.
   */
  private boolean lostConnection() {
    try {
      return (session != null && !session.isConnected());
    } catch (Exception e) {
      logger.warning("Lost connectivity to server: " + e);
      return true;
    }
  }

  /**
   * Return true if the collection is open (non-null and not in CLOSED_STATE).
   */
  private static boolean isOpen(ICollection collection) {
    return ((collection != null) &&
            (collection.getState() != ICollection.DF_CLOSED_STATE));
  }

  // Last chance to make sure the collections are closed and their sessions
  // are released.
  @Override
  protected void finalize() {
    try {
      if (isOpen(collectionToAdd)) {
        try {
          collectionToAdd.close();
          logger.fine("collection of documents to add closed");
        } catch (RepositoryException e) {
          logger.severe(
              "Error while closing the collection of documents to add: " + e);
        }
      }

      if (isOpen(collectionToDel)) {
        try {
          collectionToDel.close();
          logger.fine("collection of documents to delete closed");
        } catch (RepositoryException e) {
          logger.severe(
              "Error while closing the collection of documents to delete: "
              + e);
        }
      }
    } finally {
      if (session != null) {
        traversalManager.getSessionManager().release(session);
        session = null;
        logger.fine("collection session released");
      }
    }
  }
}
TOP

Related Classes of com.google.enterprise.connector.dctm.DctmDocumentList

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.