Package com.google.enterprise.connector.pusher

Source Code of com.google.enterprise.connector.pusher.DocPusher

// Copyright 2006 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.enterprise.connector.pusher;

import com.google.common.annotations.VisibleForTesting;
import com.google.enterprise.connector.logging.NDC;
import com.google.enterprise.connector.manager.Context;
import com.google.enterprise.connector.spi.Document;
import com.google.enterprise.connector.spi.RepositoryDocumentException;
import com.google.enterprise.connector.spi.RepositoryException;
import com.google.enterprise.connector.spi.SpiConstants;
import com.google.enterprise.connector.spi.SpiConstants.FeedType;
import com.google.enterprise.connector.traversal.FileSizeLimitInfo;
import com.google.enterprise.connector.util.filter.DocumentFilterFactory;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.LinkedList;
import java.util.ListIterator;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.FutureTask;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* Class to generate xml feed for a document from the Document and send it
* to GSA.
*/
public class DocPusher implements Pusher {
  private static final Logger LOGGER =
      Logger.getLogger(DocPusher.class.getName());

  /**
   * Separate Logger for Feed Logging.
   */
  private static final Logger FEED_WRAPPER_LOGGER =
      Logger.getLogger(LOGGER.getName() + ".FEED_WRAPPER");
  private static final Logger FEED_LOGGER =
      Logger.getLogger(FEED_WRAPPER_LOGGER.getName() + ".FEED");
  private static final Level FEED_LOG_LEVEL = Level.FINER;

  /**
   * Configured maximum document size and maximum feed file size supported.
   */
  private final FileSizeLimitInfo fileSizeLimit;

  /**
   * FeedConnection that is the sink for our generated XmlFeeds.
   */
  private final FeedConnection feedConnection;

  /**
   * The {@link DocumentFilterFactory} is used to construct
   * {@code Document} instances that act as filters on a source
   * document.  Document filters may add, remove, or modify
   * {@code Properties}.  The DocumentFilterFactory set here
   * is typically a {@link DocumentFilterChain} - a chain of
   * DocumentFilterFactory beans that is used to construct a
   * Document manipulation pipeline.
   */
  private final DocumentFilterFactory documentFilterFactory;

  /**
   * The Connector name that is the dataSource for this Feed.
   */
  private final String connectorName;

  /**
   * ExcecutorService that submits a Feed to the GSA in a separate thread.
   * This allows us to overlap I/O reading content from the Repository
   * in the traversal thread, and submitting content to the GSA in
   * a submitFeed thread.
   */
  private final ExecutorService feedSender;

  /**
   * This is the list of outstanding asynchronous feed submissions.
   */
  private final LinkedList<FutureTask<String>> submissions;

  /**
   * This is used to build up a multi-record feed.  Documents are added to the
   * feed until the size of the feed exceeds the FileSizeLimitInfo.maxFeedSize
   * or we are finished with the batch of documents. The feed is then
   * submitted to the feed connection.
   */
  private XmlFeed xmlFeed = null;

  /**
   * This field is used to construct a feed record in parallel to the main feed
   * InputStream construction.  It is only used if the feed logging level is set
   * to the appropriate level.  It only exists during the time the main feed is
   * being constructed.  Once sufficient information has been appended to this
   * buffer its contents will be logged and it will be nulled.
   */
  private StringBuilder feedLog = null;

  // For use by unit tests.
  private String gsaResponse;

  /**
   * Creates a {@code DocPusher} object from the specified
   * {@code feedConnection} and {@code connectorName}.  The supplied
   * {@link FileSizeLimitInfo} specifies constraints as to the size of a
   * Document's content and the size of generated Feed files.
   *
   * @param feedConnection a FeedConnection
   * @param connectorName The connector name that is the source of the feed
   * @param fileSizeLimitInfo FileSizeLimitInfo constraints on document content
   *        and feed size.
   * @param documentFilterFactory a {@link DocumentFilterFactory} that creates
   *        document processing filters.
   */
  public DocPusher(FeedConnection feedConnection, String connectorName,
                   FileSizeLimitInfo fileSizeLimitInfo,
                   DocumentFilterFactory documentFilterFactory) {
    this.feedConnection = feedConnection;
    this.connectorName = connectorName;
    this.fileSizeLimit = fileSizeLimitInfo;
    this.documentFilterFactory = documentFilterFactory;

    // Initialize background feed submission.
    this.submissions = new LinkedList<FutureTask<String>>();
    this.feedSender = Executors.newSingleThreadExecutor();
  }

  /**
   * Return the Feed Logger.
   */
  public static Logger getFeedLogger() {
    return FEED_WRAPPER_LOGGER;
  }

  /**
   * Gets the response from GSA when the feed is sent. For testing only.
   *
   * @return gsaResponse response from GSA.
   */
  protected String getGsaResponse() {
    return gsaResponse;
  }

  /**
   * The {@code DocumentStore} parameter is ignored and may be null.
   *
   * @param document Document corresponding to the document.
   * @param documentStore {@code DocumentStore} for recording document
   *        status.  Ignored - may be {@code null}.
   * @deprecated Use the overload without the {@code DocumentStore} parameter
   */
  @SuppressWarnings("deprecation")
  @Deprecated
  public PusherStatus take(Document document,
      com.google.enterprise.connector.database.DocumentStore documentStore)
      throws PushException, FeedException, RepositoryException {
    return take(document);
  }

  /**
   * Takes a Document and sends a the feed to the GSA.
   *
   * @param document Document corresponding to the document.
   * @return true if Pusher should accept more documents, false otherwise.
   * @throws PushException if Pusher problem
   * @throws FeedException if transient Feed problem
   * @throws RepositoryDocumentException if fatal Document problem
   * @throws RepositoryException if transient Repository problem
   */
  @Override
  public PusherStatus take(Document document)
      throws PushException, FeedException, RepositoryException {
    if (feedSender.isShutdown()) {
      return PusherStatus.DISABLED;
    }
    checkSubmissions();

    // Apply any configured Document filters to the document.
    document = documentFilterFactory.newDocumentFilter(document);

    FeedType feedType;
    try {
      feedType = DocUtils.getFeedType(document);
    } catch (RuntimeException e) {
      LOGGER.log(Level.WARNING,
          "Rethrowing RuntimeException as RepositoryDocumentException", e);
      throw new RepositoryDocumentException(e);
    }

    // All feeds in a feed file must be of the same type.
    // If the feed would change type, send the feed off to the GSA
    // and start a new one.
    // TODO: Fix this check to allow ACLs in any type feed.
    if (xmlFeed != null && !feedType.isCompatible(xmlFeed.getFeedType())) {
      if (LOGGER.isLoggable(Level.FINE)) {
        LOGGER.fine("A new feedType, " + feedType + ", requires a new feed for "
            + connectorName + ". Closing feed and sending to GSA.");
      }
      submitFeed();
    }

    if (xmlFeed == null) {
      if (LOGGER.isLoggable(Level.FINE)) {
        LOGGER.fine("Creating new " + feedType + " feed for " + connectorName);
      }
      try {
        startNewFeed(feedType);
      } catch (OutOfMemoryError me) {
        throw new PushException("Unable to allocate feed buffer.  Try reducing"
            + " the maxFeedSize setting, reducing the number of connector"
            + " intances, or adjusting the JVM heap size parameters.", me);
      }
    }

    boolean isThrowing = false;
    int resetPoint = xmlFeed.size();
    int resetCount = xmlFeed.getRecordCount();
    try {
      if (LOGGER.isLoggable(Level.FINER)) {
        LOGGER.log(Level.FINER, "DOCUMENT: Adding document with docid={0} and "
            + "searchurl={1} from connector {2} to feed.", new Object[] {
            DocUtils.getOptionalString(document, SpiConstants.PROPNAME_DOCID),
            DocUtils.getOptionalString(document,
              SpiConstants.PROPNAME_SEARCHURL),
            connectorName});
      }

      // Add this document to the feed.
      xmlFeed.addRecord(document);

      // If the feed is full, send it off to the GSA.
      if (xmlFeed.isFull() || lowMemory()) {
        if (LOGGER.isLoggable(Level.FINE)) {
          LOGGER.fine("Feed for " + connectorName + " has grown to "
              + xmlFeed.size() + " bytes. Closing feed and sending to GSA.");
        }
        submitFeed();
        return getPusherStatus();
      }

      // Indicate that this Pusher may accept more documents.
      return PusherStatus.OK;

    } catch (OutOfMemoryError me) {
      resetFeed(resetPoint, resetCount);
      throw new PushException("Out of memory building feed, retrying.", me);
    } catch (RuntimeException e) {
      resetFeed(resetPoint, resetCount);
      LOGGER.log(Level.WARNING,
          "Rethrowing RuntimeException as RepositoryDocumentException", e);
      throw new RepositoryDocumentException(e);
    } catch (RepositoryDocumentException rde) {
      // Skipping this document, remove it from the feed.
      resetFeed(resetPoint, resetCount);
      throw rde;
    } catch (IOException ioe) {
      LOGGER.log(Level.SEVERE, "IOException while reading: skipping", ioe);
      resetFeed(resetPoint, resetCount);
      Throwable t = ioe.getCause();
      isThrowing = true;
      if (t != null && (t instanceof RepositoryException)) {
        throw (RepositoryException) t;
      } else {
        throw new RepositoryDocumentException("I/O error reading data", ioe);
      }
    }
  }

  /** Rolls back a feed to the reset point. */
  private void resetFeed(int resetPoint, int resetCount) {
    xmlFeed.reset(resetPoint);
    xmlFeed.setRecordCount(resetCount);
  }

  /**
   * Finish a feed.  No more documents are anticipated.
   * If there is an outstanding feed file, submit it to the GSA.
   *
   * @throws PushException if Pusher problem
   * @throws FeedException if transient Feed problem
   * @throws RepositoryException
   */
  @Override
  public void flush() throws PushException, FeedException, RepositoryException {
    checkSubmissions();
    if (!feedSender.isShutdown()) {
      if (xmlFeed != null) {
        LOGGER.fine("Flushing accumulated feed to GSA");
        submitFeed();
      }
      feedSender.shutdown();
    }
    while (!feedSender.isTerminated()) {
      try {
        feedSender.awaitTermination(10, TimeUnit.SECONDS);
      } catch (InterruptedException ie) {
        if (checkSubmissions() > 0) {
          throw new FeedException("Interrupted while waiting for feeds.");
        }
      }
    }
    checkSubmissions();
  }

  /**
   * Cancels any feed being constructed.  Any accumulated feed data is lost.
   */
  @Override
  public void cancel() {
    // Discard any feed under construction.
    if (xmlFeed != null) {
      LOGGER.fine("Discarding accumulated feed for " + connectorName);
      xmlFeed = null;
    }
    if (feedLog != null) {
      feedLog = null;
    }
    // Cancel any feeds under asynchronous submission.
    feedSender.shutdownNow();
  }

  @Override
  public PusherStatus getPusherStatus()
      throws PushException, FeedException, RepositoryException {
    // Is Pusher shutdown?
    if (feedSender.isShutdown()) {
      return PusherStatus.DISABLED;
    }

    // If we are running low on memory, don't start another feed -
    // tell the Traverser to finish this batch.
    if (lowMemory()) {
      return PusherStatus.LOW_MEMORY;
    }

    // If the number of feeds waiting to be sent has backed up,
    // tell the Traverser to finish this batch.
    if (checkSubmissions() > 10) {
      return PusherStatus.LOCAL_FEED_BACKLOG;
    } else if (feedConnection.isBacklogged()) {
      return PusherStatus.GSA_FEED_BACKLOG;
    }

    // Indicate that this Pusher may accept more documents.
    return PusherStatus.OK;
  }

  /**
   * Checks on asynchronously submitted feeds to see if they completed
   * or failed.  If any of the submissions failed, throw an Exception.
   *
   * @return number if items remaining in the submissions list
   */
  @VisibleForTesting
  int checkSubmissions()
      throws PushException, FeedException, RepositoryException {
    int count = 0// Count of outstanding items in the list.
    synchronized(submissions) {
      ListIterator<FutureTask<String>> iter = submissions.listIterator();
      while (iter.hasNext()) {
        FutureTask<String> future = iter.next();
        if (future.isDone()) {
          iter.remove();
          try {
            gsaResponse = future.get();
          } catch (InterruptedException ie) {
            // Shouldn't happen if isDone.
          } catch (ExecutionException ee) {
            Throwable cause = ee.getCause();
            if (cause == null) {
              cause = ee;
            }
            if (cause instanceof PushException) {
              throw (PushException) cause;
            } else if (cause instanceof FeedException) {
              throw (FeedException) cause;
            } else if (cause instanceof RepositoryException) {
              throw (RepositoryException) cause;
            } else {
              throw new FeedException("Error submitting feed", cause);
            }
          }
        } else {
          count++;
        }
      }
    }
    return count;
  }

  /**
   * Checks for low available memory condition.
   *
   * @return true if free memory is running low.
   */
  private boolean lowMemory() {
    long threshold = ((fileSizeLimit.maxFeedSize() + fileSizeLimit.maxDocumentSize()) * 4) / 3;
    Runtime rt = Runtime.getRuntime();
    if ((rt.maxMemory() - (rt.totalMemory() - rt.freeMemory())) < threshold) {
      rt.gc();
      if ((rt.maxMemory() - (rt.totalMemory() - rt.freeMemory())) < threshold) {
        return true;
      }
    }
    return false;
  }

  /**
   * Allocates initial memory for a new XmlFeed and feed logger.
   *
   * @param feedType
   */
  private void startNewFeed(FeedType feedType) throws PushException {
    // Allocate a buffer to construct the feed log.
    try {
      if (FEED_LOGGER.isLoggable(FEED_LOG_LEVEL) && feedLog == null) {
        feedLog = new StringBuilder(256 * 1024);
        feedLog.append("Records generated for ").append(feedType);
        feedLog.append(" feed of ").append(connectorName).append(":\n");
      }
    } catch (OutOfMemoryError me) {
      throw new OutOfMemoryError(
           "Unable to allocate feed log buffer for connector " + connectorName);
    }

    long feedSize = fileSizeLimit.maxFeedSize();
    try {
      try {
        // Allocate XmlFeed of the target size.
        xmlFeed = new XmlFeed(connectorName, feedType, fileSizeLimit, feedLog,
            feedConnection);
      } catch (OutOfMemoryError me) {
        // We shouldn't even have gotten this far under a low memory condition.
        // However, try to allocate a tiny feed buffer.  It should fill up on
        // the first document, forcing it to be submitted.  DocPusher.take()
        // should then return a signal to the caller to terminate the batch.
        LOGGER.warning("Insufficient memory available to allocate an optimally"
            + " sized feed - retrying with a much smaller feed allocation.");
        feedSize = 1024;
        FileSizeLimitInfo newLimit = new FileSizeLimitInfo();
        newLimit.setMaxFeedSize(feedSize);
        newLimit.setMaxDocumentSize(fileSizeLimit.maxDocumentSize());
        try {
          xmlFeed = new XmlFeed(connectorName, feedType, newLimit, feedLog,
              feedConnection);
        } catch (OutOfMemoryError oome) {
          throw new OutOfMemoryError(
               "Unable to allocate feed buffer for connector " + connectorName);
        }
      }
    } catch (IOException ioe) {
      throw new PushException("Error creating feed", ioe);
    }

    LOGGER.fine("Allocated a new feed of size " + feedSize);
    return;
  }

  /**
   * Takes the accumulated XmlFeed and sends the feed to the GSA.
   *
   * @throws PushException if Pusher problem
   * @throws FeedException if transient Feed problem
   * @throws RepositoryException
   */
  private void submitFeed()
      throws PushException, FeedException, RepositoryException {
    if (xmlFeed == null) {
      return;
    }

    final XmlFeed feed = xmlFeed;
    xmlFeed = null;
    final String logMessage;
    if (feedLog != null) {
      logMessage = feedLog.toString();
      feedLog = null;
    } else {
      logMessage = null;
    }

    try {
      feed.close();
    } catch (IOException ioe) {
      throw new PushException("Error closing feed", ioe);
    }

    try {
      // Send the feed to the GSA in a separate thread.
      FutureTask<String> future = new FutureTask<String> (
          new Callable<String>() {
            public String call()
                throws PushException, FeedException, RepositoryException {
              try {
                NDC.push("Feed " + feed.getDataSource());
                return submitFeed(feed, logMessage);
              } finally {
                NDC.remove();
              }
            }
          }
        );
      feedSender.execute(future);
      // Add the future to list of outstanding submissions.
      synchronized(submissions) {
        submissions.add(future);
      }
    } catch (RejectedExecutionException ree) {
      throw new FeedException("Asynchronous feed was rejected. ", ree);
    }
  }

  /**
   * Takes the supplied XmlFeed and sends that feed to the GSA.
   *
   * @param feed an XmlFeed
   * @param logMessage a Feed Log message
   * @return response String from GSA
   * @throws PushException if Pusher problem
   * @throws FeedException if transient Feed problem
   * @throws RepositoryException
   */
  private String submitFeed(XmlFeed feed, String logMessage)
      throws PushException, FeedException, RepositoryException {

    if (LOGGER.isLoggable(Level.FINE)) {
      LOGGER.fine("Submitting " + feed.getFeedType() + " feed for "
          + feed.getDataSource() + " to the GSA. " + feed.getRecordCount()
          + " records totaling " + feed.size() + " bytes.");
    }

    // Write the generated feedLog message to the feed logger.
    if (logMessage != null && FEED_LOGGER.isLoggable(FEED_LOG_LEVEL)) {
      FEED_LOGGER.log(FEED_LOG_LEVEL, logMessage);
    }

    // Write the Feed to the TeedFeedFile, if one was specified.
    String teedFeedFilename = Context.getInstance().getTeedFeedFile();
    if (teedFeedFilename != null) {
      boolean isThrowing = false;
      OutputStream os = null;
      try {
        os = new FileOutputStream(teedFeedFilename, true);
        feed.writeTo(os);
      } catch (IOException e) {
        isThrowing = true;
        throw new FeedException("Cannot write to file: " + teedFeedFilename, e);
      } finally {
        if (os != null) {
          try {
            os.close();
          } catch (IOException e) {
            if (!isThrowing) {
              throw new FeedException(
                   "Cannot write to file: " + teedFeedFilename, e);
            }
          }
        }
      }
    }

    String gsaResponse = feedConnection.sendData(feed);
    if (!gsaResponse.equals(GsaFeedConnection.SUCCESS_RESPONSE)) {
      String eMessage = gsaResponse;
      if (GsaFeedConnection.UNAUTHORIZED_RESPONSE.equals(gsaResponse)) {
        eMessage += ": Client is not authorized to send feeds. Make "
            + "sure the GSA is configured to trust feeds from your host.";
      }
      if (GsaFeedConnection.INTERNAL_ERROR_RESPONSE.equals(gsaResponse)) {
        eMessage += ": Check GSA status or feed format.";
      }
      throw new PushException(eMessage);
    }
    return gsaResponse;
  }
}
TOP

Related Classes of com.google.enterprise.connector.pusher.DocPusher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.