Source Code of com.google.enterprise.connector.pusher.XmlFeed

// Copyright 2009 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


package com.google.enterprise.connector.pusher;


import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Charsets;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.enterprise.connector.common.AlternateContentFilterInputStream;
import com.google.enterprise.connector.common.BigEmptyDocumentFilterInputStream;
import com.google.enterprise.connector.common.CompressedFilterInputStream;
import com.google.enterprise.connector.manager.Context;
import com.google.enterprise.connector.servlet.ServletUtil;
import com.google.enterprise.connector.spi.Document;
import com.google.enterprise.connector.spi.Principal;
import com.google.enterprise.connector.spi.Property;
import com.google.enterprise.connector.spi.RepositoryDocumentException;
import com.google.enterprise.connector.spi.RepositoryException;
import com.google.enterprise.connector.spi.SpiConstants;
import com.google.enterprise.connector.spi.SpiConstants.AclAccess;
import com.google.enterprise.connector.spi.SpiConstants.AclScope;
import com.google.enterprise.connector.spi.SpiConstants.ActionType;
import com.google.enterprise.connector.spi.SpiConstants.ContentEncoding;
import com.google.enterprise.connector.spi.SpiConstants.DocumentType;
import com.google.enterprise.connector.spi.SpiConstants.FeedType;
import com.google.enterprise.connector.spi.Value;
import com.google.enterprise.connector.spi.XmlUtils;
import com.google.enterprise.connector.spiimpl.PrincipalValue;
import com.google.enterprise.connector.spiimpl.ValueImpl;
import com.google.enterprise.connector.traversal.FileSizeLimitInfo;
import com.google.enterprise.connector.util.Base64FilterInputStream;
import com.google.enterprise.connector.util.UniqueIdGenerator;
import com.google.enterprise.connector.util.UuidGenerator;
import com.google.enterprise.connector.util.filter.DocumentFilterFactory;


import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;


/**
 * Class to generate XML Feed for a document from the Document and send it
 * to GSA.
 */
public class XmlFeed extends ByteArrayOutputStream implements FeedData {
  private static final Logger LOGGER =
      Logger.getLogger(XmlFeed.class.getName());


  @SuppressWarnings("deprecation")
  private static class RoleSuffixes {
    private static final List<String> BELOW_READER = ImmutableList.of(
        "=" + SpiConstants.RoleType.PEEKER);


    private static final List<String> READER_OR_BELOW = ImmutableList.of(
        "=" + SpiConstants.RoleType.PEEKER,
        "=" + SpiConstants.RoleType.READER);


    private static final List<String> READER_OR_ABOVE = ImmutableList.of(
        "=" + SpiConstants.RoleType.READER,
        "=" + SpiConstants.RoleType.WRITER,
        "=" + SpiConstants.RoleType.OWNER);


    private static final List<String> ABOVE_READER = ImmutableList.of(
        "=" + SpiConstants.RoleType.WRITER,
        "=" + SpiConstants.RoleType.OWNER);
  }


  private final String dataSource;
  private final FeedType feedType;
  private final UrlConstructor urlConstructor;
  private final int maxFeedSize;
  private final Appendable feedLogBuilder;
  private final String feedId;
  private final FileSizeLimitInfo fileSizeLimit;
  private final AclTransformFilter aclTransformFilter;


  /** Encoding method to use for Document content. */
  private final ContentEncoding contentEncoding;


  /** If true, ACLs support inheritance and deny; otherwise legacy ACLs. */
  private final boolean supportsInheritedAcls;


  private static UniqueIdGenerator uniqueIdGenerator = new UuidGenerator();


  private static DocumentFilterFactory stripAclDocumentFilter =
      new StripAclDocumentFilter();
  private static DocumentFilterFactory extractedAclDocumentFilter =
      new ExtractedAclDocumentFilter();
  private static DocumentFilterFactory inheritFromExtractedAclDocumentFilter =
      new InheritFromExtractedAclDocumentFilter();


  private boolean isClosed;
  private int recordCount;


  @SuppressWarnings("deprecation")
  public static final Set<String> propertySkipSet = ImmutableSet.<String>of(
      // TODO: What about displayurl, ispublic, searchurl? Should we
      // have an explicit opt-in list of google: properties instead an
      // opt-out list?
      SpiConstants.PROPNAME_ACLINHERITFROM_DOCID,
      SpiConstants.PROPNAME_ACLINHERITFROM_FEEDTYPE,
      SpiConstants.PROPNAME_ACLINHERITFROM_FRAGMENT,
      SpiConstants.PROPNAME_ACTION,
      SpiConstants.PROPNAME_AUTHMETHOD,
      SpiConstants.PROPNAME_CONTENT,
      SpiConstants.PROPNAME_CONTENTURL,
      SpiConstants.PROPNAME_CONTENT_ENCODING,
      SpiConstants.PROPNAME_CONTENT_LENGTH,
      SpiConstants.PROPNAME_CRAWL_IMMEDIATELY,
      SpiConstants.PROPNAME_CRAWL_ONCE,
      SpiConstants.PROPNAME_DOCID,
      SpiConstants.PROPNAME_DOCUMENTTYPE,
      SpiConstants.PROPNAME_FEEDTYPE,
      SpiConstants.PROPNAME_LOCK,
      SpiConstants.PROPNAME_OVERWRITEACLS,
      SpiConstants.PROPNAME_PAGERANK,
      SpiConstants.PROPNAME_SECURITYTOKEN);


  // Strings for XML tags.
  public static final Charset XML_DEFAULT_CHARSET = Charsets.UTF_8;
  private static final String XML_START = "<?xml version='1.0' encoding='"
      + XML_DEFAULT_CHARSET.name() + "'?><!DOCTYPE gsafeed PUBLIC"
      + " \"-//Google//DTD GSA Feeds//EN\" \"gsafeed.dtd\">";
  private static final String XML_GSAFEED = "gsafeed";
  private static final String XML_HEADER = "header";
  private static final String XML_DATASOURCE = "datasource";
  private static final String XML_FEEDTYPE = "feedtype";
  private static final String XML_GROUP = "group";
  private static final String XML_RECORD = "record";
  private static final String XML_METADATA = "metadata";
  private static final String XML_META = "meta";
  private static final String XML_CONTENT = "content";
  private static final String XML_ACTION = "action";
  private static final String XML_URL = "url";
  private static final String XML_DISPLAY_URL = "displayurl";
  private static final String XML_MIMETYPE = "mimetype";
  private static final String XML_LAST_MODIFIED = "last-modified";
  private static final String XML_CRAWL_IMMEDIATELY = "crawl-immediately";
  private static final String XML_CRAWL_ONCE = "crawl-once";
  private static final String XML_LOCK = "lock";
  private static final String XML_AUTHMETHOD = "authmethod";
  private static final String XML_PAGERANK = "pagerank";
  private static final String XML_NAME = "name";
  private static final String XML_ENCODING = "encoding";
  private static final String XML_OVERWRITEACLS = "overwrite-acls";


  private static final String CONNECTOR_AUTHMETHOD = "httpbasic";


  private static final String XML_ACL = "acl";
  private static final String XML_TYPE = "inheritance-type";
  private static final String XML_INHERIT_FROM = "inherit-from";
  private static final String XML_PRINCIPAL = "principal";
  private static final String XML_SCOPE = "scope";
  private static final String XML_ACCESS = "access";


  private final String  supportedEncodings;


  public XmlFeed(String dataSource, FeedType feedType, 
      FileSizeLimitInfo fileSizeLimit, Appendable feedLogBuilder,
      FeedConnection feedConnection) throws IOException {
    super((int) fileSizeLimit.maxFeedSize());
    this.maxFeedSize = (int) fileSizeLimit.maxFeedSize();
    this.dataSource = dataSource;
    this.feedType = feedType;
    this.fileSizeLimit = fileSizeLimit;
    this.feedLogBuilder = feedLogBuilder;
    this.recordCount = 0;
    this.isClosed = false;
    this.feedId = uniqueIdGenerator.uniqueId();
    this.supportsInheritedAcls = feedConnection.supportsInheritedAcls();


    // Configure the dynamic ACL transformation filters for the documents.
    this.urlConstructor = new UrlConstructor(dataSource, feedType);
    this.aclTransformFilter =
        new AclTransformFilter(feedConnection, this.urlConstructor);


    supportedEncodings = feedConnection.getContentEncodings().toLowerCase();
    // Check to see if the GSA supports compressed content feeds.
    this.contentEncoding = (
        supportedEncodings.indexOf(ContentEncoding.BASE64COMPRESSED.toString())
        >= 0) ? ContentEncoding.BASE64COMPRESSED : ContentEncoding.BASE64BINARY;


    String prefix = xmlFeedPrefix(dataSource, feedType);
    write(prefix.getBytes(XML_DEFAULT_CHARSET));
  }


  @VisibleForTesting
  static void setUniqueIdGenerator(UniqueIdGenerator idGenerator) {
    uniqueIdGenerator = idGenerator;
  }


  /*
   * XmlFeed Public Interface.
   */


  /**
   * Returns the unique ID assigned to this Feed file and all
   * records within the Feed.
   */
  public String getFeedId() {
    return feedId;
  }


  /**
   * Returns {@code true} if the feed is sufficiently full to submit
   * to the GSA.
   */
  public boolean isFull() {
    int bytesLeft = maxFeedSize - size();
    int avgRecordSize = size()/recordCount;
    // If less then 3 average size docs would fit, then consider it full.
    if (bytesLeft < (3 * avgRecordSize)) {
      return true;
    } else if (bytesLeft > (10 * avgRecordSize)) {
      return false;
    } else {
      // If its more 90% full, then consider it full.
      return (bytesLeft < (maxFeedSize / 10));
    }
  }


  /**
   * Set the count of records in this feed.
   */
  public synchronized void setRecordCount(int count) {
    recordCount = count;
  }


  /**
   * Return the count of records in this feed.
   */
  public synchronized int getRecordCount() {
    return recordCount;
  }


  /**
   * Add the XML record for a given document to the Feed.
   */
  public synchronized void addRecord(Document document)
      throws RepositoryException, IOException {
    // Apply any ACL transformations to the document.
    // Build an XML feed record for the document.
    xmlWrapRecord(aclTransformFilter.newDocumentFilter(document));
  }


  /*
   * FeedData Interface.
   */


  /**
   * Return the {@link FeedType} for all records in this Feed.
   */
  public FeedType getFeedType() {
    return feedType;
  }


  /**
   * Return the data source for all records in this Feed.
   */
  public String getDataSource() {
    return dataSource;
  }


  /*
   * ByteArrayOutputStream (and related) Interface.
   */


  /**
   * Resets the size of this ByteArrayOutputStream to the
   * specified {@code size}, effectively discarding any
   * data that may have been written passed that point.
   * Like {@code reset()}, this method retains the previously
   * allocated buffer.
   * <p>
   * This method may be used to reduce the size of the data stored,
   * but not to increase it.  In other words, the specified {@code size}
   * cannot be greater than the current size.
   *
   * @param size new data size.
   */
  public synchronized void reset(int size) {
    if (size < 0 || size > count) {
      throw new IllegalArgumentException(
          "New size must not be negative or greater than the current size.");
    }
    count = size;
  }


  /**
   * Reads the complete contents of the supplied InputStream
   * directly into buffer of this ByteArrayOutputStream.
   * This avoids the data copy that would occur if using
   * {@code InputStream.read(byte[], int, int)}, followed by
   * {@code ByteArrayOutputStream.write(byte[], int, int)}.
   *
   * @param in the InputStream from which to read the data.
   * @throws IOException if an I/O error occurs.
   */
  public synchronized void readFrom(InputStream in) throws IOException {
    int bytes = 0;
    do {
      count += bytes;
      if (count >= buf.length) {
        // Need to grow buffer.
        int incr = Math.min(buf.length, 8 * 1024 * 1024);
        byte[] newbuf = new byte[buf.length + incr];
        System.arraycopy(buf, 0, newbuf, 0, buf.length);
        buf = newbuf;
      }
      bytes = in.read(buf, count, buf.length - count);
    } while (bytes != -1);
  }


  @Override
  public synchronized void close() throws IOException {
    if (!isClosed) {
      isClosed = true;
      String suffix = xmlFeedSuffix();
      write(suffix.getBytes(XML_DEFAULT_CHARSET));
    }
  }


  /*
   * Private Methods to XML encode the feed data.
   */


  /**
   * Construct the XML header for a feed file.
   *
   * @param dataSource The dataSource for the feed.
   * @param feedType The type of feed.
   * @return XML feed header string.
   */
  private static String xmlFeedPrefix(String dataSource, FeedType feedType) {
    // Build prefix.
    StringBuffer prefix = new StringBuffer();
    prefix.append(XML_START).append('\n');
    prefix.append(XmlUtils.xmlWrapStart(XML_GSAFEED)).append('\n');
    prefix.append(XmlUtils.xmlWrapStart(XML_HEADER)).append('\n');
    prefix.append(XmlUtils.xmlWrapStart(XML_DATASOURCE));
    prefix.append(dataSource);
    prefix.append(XmlUtils.xmlWrapEnd(XML_DATASOURCE));
    prefix.append(XmlUtils.xmlWrapStart(XML_FEEDTYPE));
    prefix.append(feedType.toLegacyString());
    prefix.append(XmlUtils.xmlWrapEnd(XML_FEEDTYPE));
    prefix.append(XmlUtils.xmlWrapEnd(XML_HEADER));
    prefix.append(XmlUtils.xmlWrapStart(XML_GROUP)).append('\n');
    return prefix.toString();
  }


  /**
   * Construct the XML footer for a feed file.
   *
   * @return XML feed suffix string.
   */
  private static String xmlFeedSuffix() {
    // Build suffix.
    StringBuffer suffix = new StringBuffer();
    suffix.append(XmlUtils.xmlWrapEnd(XML_GROUP));
    suffix.append(XmlUtils.xmlWrapEnd(XML_GSAFEED));
    return suffix.toString();
  }


  /*
   * Generate the record tag for the xml data.
   *
   * @throws IOException only from Appendable, and that can't really
   *         happen when using StringBuilder.
   */
  private void xmlWrapRecord(Document document)
      throws RepositoryException, IOException {
    if (supportsInheritedAcls) {
      String docType = DocUtils.getOptionalString(document,
          SpiConstants.PROPNAME_DOCUMENTTYPE);
      if (docType != null
          && DocumentType.findDocumentType(docType) == DocumentType.ACL) {
        xmlWrapAclRecord(document);
        recordCount++;
        return;
      } else if (feedType == FeedType.CONTENTURL
                 && DocUtils.hasAclProperties(document)) {
        // GSA 7.0 does not support case-sensitivity or namespaces in ACLs
        // during crawl-time. So we have to send the ACLs at feed-time.
        // But the crawl-time metadata overwrites the feed-time ACLs.
        // The proposed escape is to send a named resource ACL in the feed for
        // each document, and at crawl-time return an empty ACL that inherits
        // from the corresponding named resource ACL.
        xmlWrapAclRecord(
            extractedAclDocumentFilter.newDocumentFilter(document));
        recordCount++;
        document =
            inheritFromExtractedAclDocumentFilter.newDocumentFilter(document);
      }
    }
    xmlWrapDocumentRecord(document);
    recordCount++;
  }


  /*
   * Generate the record tag for the xml data.
   *
   * @throws IOException only from Appendable, and that can't really
   *         happen when using StringBuilder.
   */
  private void xmlWrapDocumentRecord(Document document)
      throws RepositoryException, IOException {
    boolean aclRecordAllowed = supportsInheritedAcls;
    boolean metadataAllowed = (feedType != FeedType.CONTENTURL);
    boolean contentAllowed = (feedType == FeedType.CONTENT);


    StringBuilder prefix = new StringBuilder();
    prefix.append("<").append(XML_RECORD);


    String searchUrl =
        urlConstructor.getRecordUrl(document, DocumentType.RECORD);
    XmlUtils.xmlAppendAttr(XML_URL, searchUrl, prefix);


    String displayUrl = DocUtils.getOptionalString(document,
        SpiConstants.PROPNAME_DISPLAYURL);
    XmlUtils.xmlAppendAttr(XML_DISPLAY_URL, displayUrl, prefix);


    ActionType actionType = null;
    String action = DocUtils.getOptionalString(document,
        SpiConstants.PROPNAME_ACTION);
    if (action != null) {
      // Compare to legal action types.
      actionType = ActionType.findActionType(action);
      if (actionType == ActionType.ADD) {
        XmlUtils.xmlAppendAttr(XML_ACTION, actionType.toString(), prefix);
      } else if (actionType == ActionType.DELETE) {
        XmlUtils.xmlAppendAttr(XML_ACTION, actionType.toString(), prefix);
        aclRecordAllowed = false;
        metadataAllowed = false;
        contentAllowed = false;
      } else if (actionType == ActionType.ERROR) {
        LOGGER.log(Level.WARNING, "Illegal tag used for ActionType: " + action);
        actionType = null;
      }
    }


    boolean lock = DocUtils.getOptionalBoolean(document, SpiConstants.PROPNAME_LOCK, false);
    if (lock) {
      XmlUtils.xmlAppendAttr(XML_LOCK, Value.getBooleanValue(true).toString(), prefix);
    }


    boolean crawlImmediately = DocUtils.getOptionalBoolean(
        document, SpiConstants.PROPNAME_CRAWL_IMMEDIATELY, false);
    if (crawlImmediately) {
      XmlUtils.xmlAppendAttr(XML_CRAWL_IMMEDIATELY,
          Value.getBooleanValue(true).toString(), prefix);
    }


    boolean crawlOnce = DocUtils.getOptionalBoolean(
        document, SpiConstants.PROPNAME_CRAWL_ONCE, false);
    if (crawlOnce) {
      XmlUtils.xmlAppendAttr(
          XML_CRAWL_ONCE, Value.getBooleanValue(true).toString(), prefix);
    }


    // Do not validate the values, just send them in the feed.
    String pagerank =
        DocUtils.getOptionalString(document, SpiConstants.PROPNAME_PAGERANK);
    XmlUtils.xmlAppendAttr(XML_PAGERANK, pagerank, prefix);


    String mimetype =
        DocUtils.getOptionalString(document, SpiConstants.PROPNAME_MIMETYPE);
    if (mimetype == null) {
      mimetype = SpiConstants.DEFAULT_MIMETYPE;
    }
    XmlUtils.xmlAppendAttr(XML_MIMETYPE, mimetype, prefix);


    try {
      String lastModified = DocUtils.getCalendarAndThrow(document,
          SpiConstants.PROPNAME_LASTMODIFIED);
      if (lastModified == null) {
        LOGGER.log(Level.FINEST, "Document does not contain "
            + SpiConstants.PROPNAME_LASTMODIFIED);
      } else {
        XmlUtils.xmlAppendAttr(XML_LAST_MODIFIED, lastModified, prefix);
      }
    } catch (IllegalArgumentException e) {
      LOGGER.log(Level.WARNING, "Swallowing exception while getting "
          + SpiConstants.PROPNAME_LASTMODIFIED, e);
    }


    try {
      ValueImpl v = (ValueImpl) Value.getSingleValue(document,
          SpiConstants.PROPNAME_ISPUBLIC);
      if (v != null) {
        boolean isPublic = v.toBoolean();
        if (!isPublic) {
          String authmethod = DocUtils.getOptionalString(document,
              SpiConstants.PROPNAME_AUTHMETHOD);
          if (Strings.isNullOrEmpty(authmethod)){
            authmethod = CONNECTOR_AUTHMETHOD;
          }
          XmlUtils.xmlAppendAttr(XML_AUTHMETHOD, authmethod, prefix);
        }
      }
    } catch (IllegalArgumentException e) {
      LOGGER.log(Level.WARNING, "Illegal value for ispublic property."
          + " Treat as a public doc", e);
    }


    prefix.append(">\n");


    if (aclRecordAllowed && DocUtils.hasAclProperties(document)) {
      xmlWrapAclRecord(prefix, document);
      // Prevent ACLs from showing up in metadata.
      document = stripAclDocumentFilter.newDocumentFilter(document);
    }
    if (metadataAllowed) {
      xmlWrapMetadata(prefix, document);
    }


    StringBuilder suffix = new StringBuilder();
    ContentEncoding documentContentEncoding = null;
    ContentEncoding alternateEncoding = null;
    if (contentAllowed) {
      // Determine the content encoding to specify.
      String documentContentEncodingStr = DocUtils.getOptionalString(
          document, SpiConstants.PROPNAME_CONTENT_ENCODING);


      if (!Strings.isNullOrEmpty(documentContentEncodingStr)) {
        documentContentEncoding =
            ContentEncoding.findContentEncoding(documentContentEncodingStr);
        if (documentContentEncoding == ContentEncoding.ERROR
            || supportedEncodings.indexOf(documentContentEncodingStr) < 0) {
          String message =
              "Unsupported content encoding: " + documentContentEncodingStr;
          LOGGER.log(Level.WARNING, message);
          throw new RepositoryDocumentException(message);
        }
      }
      alternateEncoding = (documentContentEncoding == null) 
          ? contentEncoding : documentContentEncoding;
  
      // If including document content, wrap it with <content> tags.
      prefix.append("<");
      prefix.append(XML_CONTENT);
      XmlUtils.xmlAppendAttr(XML_ENCODING,
          alternateEncoding.toString(), prefix);
      prefix.append(">\n");


      suffix.append('\n');
      XmlUtils.xmlAppendEndTag(XML_CONTENT, suffix);
    }


    write(prefix.toString().getBytes(XML_DEFAULT_CHARSET));


    if (contentAllowed) {
      InputStream contentStream = getContentStream(
          document, documentContentEncoding, alternateEncoding);
      try {
        readFrom(contentStream);
      } finally {
        contentStream.close();
      }
    }


    XmlUtils.xmlAppendEndTag(XML_RECORD, suffix);
    write(suffix.toString().getBytes(XML_DEFAULT_CHARSET));


    if (feedLogBuilder != null) {
      try {
        feedLogBuilder.append(prefix);
        if (contentAllowed) {
          feedLogBuilder.append("...content...");
        }
        feedLogBuilder.append(suffix);
      } catch (IOException e) {
        // This won't happen with StringBuffer or StringBuilder.
        LOGGER.log(Level.WARNING, "Exception while constructing feed log:", e);
      }
    }
  }


  /*
   * Generate the record tag for the ACL xml data, appending to {@code aclBuff}.
   */
  private void xmlWrapAclRecord(StringBuilder aclBuff, Document acl)
      throws IOException, RepositoryException {
    aclBuff.append("<").append(XML_ACL);
    String docType = DocUtils.getOptionalString(acl,
        SpiConstants.PROPNAME_DOCUMENTTYPE);
    if (docType != null
        && DocumentType.findDocumentType(docType) == DocumentType.ACL) {
      // Only specify the URL if this is a stand-alone ACL.
      XmlUtils.xmlAppendAttr(XML_URL, 
         urlConstructor.getRecordUrl(acl, DocumentType.ACL), aclBuff);
    }


    String inheritanceType = DocUtils.getOptionalString(acl,
        SpiConstants.PROPNAME_ACLINHERITANCETYPE);
    if (!Strings.isNullOrEmpty(inheritanceType)) {
      XmlUtils.xmlAppendAttr(XML_TYPE, inheritanceType, aclBuff);
    }


    String inheritFrom = urlConstructor.getInheritFromUrl(acl);
    if (!Strings.isNullOrEmpty(inheritFrom)) {
      XmlUtils.xmlAppendAttr(XML_INHERIT_FROM, inheritFrom, aclBuff);
    }
    aclBuff.append(">\n");


    // add principal info
    getPrincipalXml(acl, aclBuff);
    XmlUtils.xmlAppendEndTag(XML_ACL, aclBuff);
  }


  /*
   * Generate the record tag for the ACL xml data.
   */
  private void xmlWrapAclRecord(Document acl) throws IOException,
      RepositoryException {
    StringBuilder aclBuff = new StringBuilder();
    xmlWrapAclRecord(aclBuff, acl);


    write(aclBuff.toString().getBytes(XML_DEFAULT_CHARSET));


    if (feedLogBuilder != null) {
      try {
        feedLogBuilder.append(aclBuff);
      } catch (IOException e) {
        // This won't happen with StringBuffer or StringBuilder.
        LOGGER.log(Level.WARNING, "Exception while constructing feed log:", e);
      }
    }
  }


  /*
   * Generate the ACL principal XML data.
   */
  private void getPrincipalXml(Document acl, StringBuilder buff)
      throws IOException, RepositoryException {
    Property property;


    property = acl.findProperty(SpiConstants.PROPNAME_ACLUSERS);
    if (property != null) {
      wrapAclPrincipal(buff, property, AclScope.USER, AclAccess.PERMIT);
    }


    property = acl.findProperty(SpiConstants.PROPNAME_ACLGROUPS);
    if (property != null) {
      wrapAclPrincipal(buff, property, AclScope.GROUP, AclAccess.PERMIT);
    }


    property = acl.findProperty(SpiConstants.PROPNAME_ACLDENYUSERS);
    if (property != null) {
      wrapAclPrincipal(buff, property, AclScope.USER, AclAccess.DENY);
    }


    property = acl.findProperty(SpiConstants.PROPNAME_ACLDENYGROUPS);
    if (property != null) {
      wrapAclPrincipal(buff, property, AclScope.GROUP, AclAccess.DENY);
    }
  }


  /*
   * Wrap the ACL principal info as XML data.
   */
  private static void wrapAclPrincipal(StringBuilder buff, Property property,
      AclScope scope, AclAccess access)
      throws RepositoryException, IOException {
    ValueImpl value;
    while ((value = (ValueImpl) property.nextValue()) != null) {
      Principal principal = (value instanceof PrincipalValue)
          ? ((PrincipalValue) value).getPrincipal()
          : new Principal(value.toString().trim());
      String name = stripRoles(principal.getName(), access);
      if (!Strings.isNullOrEmpty(name)) {
        buff.append("<").append(XML_PRINCIPAL);
        if (principal.getPrincipalType() ==
            SpiConstants.PrincipalType.UNQUALIFIED) {
          // UNQUALIFIED is a special-case on the GSA to allow us to prevent the
          // GSA from mistakeningly finding a domain in the principal name.
          XmlUtils.xmlAppendAttr(ServletUtil.XMLTAG_PRINCIPALTYPE_ATTRIBUTE,
              SpiConstants.PrincipalType.UNQUALIFIED.toString(), buff);
        }
        if (!Strings.isNullOrEmpty(principal.getNamespace())) {
          XmlUtils.xmlAppendAttr(ServletUtil.XMLTAG_NAMESPACE_ATTRIBUTE,
                                 principal.getNamespace(), buff);
        }
        // The GSA's default is EVERYTHING_CASE_SENSITIVE. No need to send the
        // attribute when it is the default.
        if (principal.getCaseSensitivityType()
            != SpiConstants.CaseSensitivityType.EVERYTHING_CASE_SENSITIVE) {
          XmlUtils.xmlAppendAttr(
              ServletUtil.XMLTAG_CASESENSITIVITYTYPE_ATTRIBUTE,
              principal.getCaseSensitivityType().toString(), buff);
        }
        XmlUtils.xmlAppendAttr(XML_SCOPE, scope.toString(), buff);
        XmlUtils.xmlAppendAttr(XML_ACCESS, access.toString(), buff);
        buff.append(">");
        XmlUtils.xmlAppendAttrValue(name, buff);
        XmlUtils.xmlAppendEndTag(XML_PRINCIPAL, buff);
      }
    }
  }


  /*
   * Strip any Roles from the supplied Principal name.
   * Peeker users are discarded entirely, whereas other roles are simply
   * removed from the end of the name.
   *
   * @param name the principal name
   * @param the principal access, one of {@code PERMIT} or {@code DENY}
   */
  private static String stripRoles(String name, AclAccess access) {
    if (!Strings.isNullOrEmpty(name) && name.indexOf('=') >= 0) {
      List<String> skip = (access == AclAccess.PERMIT)
          ? RoleSuffixes.BELOW_READER : RoleSuffixes.ABOVE_READER;
      List<String> strip = (access == AclAccess.PERMIT)
          ? RoleSuffixes.READER_OR_ABOVE : RoleSuffixes.READER_OR_BELOW;


      for (String role : skip) {
        if (name.endsWith(role)) {
          return null;
        }
      }
      for (String role : strip) {
        if (name.endsWith(role)) {
          return name.substring(0, name.length() - role.length());
        }
      }
    }
    return name;
  }


  /**
   * Wrap the metadata and append it to the string buffer. Empty metadata
   * properties are not appended.
   *
   * @param buf string buffer
   * @param document Document
   * @throws RepositoryException if error reading Property from Document
   * @throws IOException only from Appendable, and that can't really
   *         happen when using StringBuilder.
   */
  private void xmlWrapMetadata(StringBuilder buf, Document document)
      throws RepositoryException, IOException {
    boolean overwriteAcls = DocUtils.getOptionalBoolean(document,
        SpiConstants.PROPNAME_OVERWRITEACLS, true);
    buf.append('<').append(XML_METADATA);
    if (!overwriteAcls) {
      XmlUtils.xmlAppendAttr(XML_OVERWRITEACLS,
          Value.getBooleanValue(false).toString(), buf);
    }
    buf.append(">\n");


    // Add all the metadata supplied by the Connector.
    Set<String> propertyNames = document.getPropertyNames();
    if ((propertyNames == null) || propertyNames.isEmpty()) {
      LOGGER.log(Level.WARNING, "Property names set is empty");
    } else {
      // Sort property names so that metadata is written in a canonical form.
      // The GSA's metadata change detection logic depends on the metadata to be
      // in the same order each time in order to prevent reindexing.
      propertyNames = new TreeSet<String>(propertyNames);
      for (String name : propertyNames) {
        if (propertySkipSet.contains(name)) {
          if (LOGGER.isLoggable(Level.FINEST)) {
            logOneProperty(document, name);
          }
          continue;
        }
        Property property = document.findProperty(name);
        if (property != null) {
          wrapOneProperty(buf, name, property);
        }
      }
    }
    XmlUtils.xmlAppendEndTag(XML_METADATA, buf);
  }


  /**
   * Wrap a single Property and append to string buffer. Does nothing if the
   * Property's value is null or zero-length.
   *
   * @param buf string builder
   * @param name the property's name
   * @param property Property
   * @throws RepositoryException if error reading Property from Document
   * @throws IOException only from Appendable, and that can't really
   *         happen when using StringBuilder.
   */
  private static void wrapOneProperty(StringBuilder buf, String name,
      Property property) throws RepositoryException, IOException {
    ValueImpl value = null;
    while ((value = (ValueImpl) property.nextValue()) != null) {
      if (LOGGER.isLoggable(Level.FINEST)) {
        LOGGER.finest("PROPERTY: " + name + " = \"" + value.toString() + "\"");
      }
      String valString = value.toFeedXml();
      if (valString != null && valString.length() > 0) {
        buf.append("<").append(XML_META);
        XmlUtils.xmlAppendAttr(XML_NAME, name, buf);
        XmlUtils.xmlAppendAttr(XML_CONTENT, valString, buf);
        buf.append("/>\n");
      }
    }
  }


  /**
   * Log the given Property's values.  This is called for
   * the small set of document properties that are not simply
   * added to the feed via wrapOneProperty().
   */
  private void logOneProperty(Document document, String name)
      throws RepositoryException, IOException {
    if (SpiConstants.PROPNAME_CONTENT.equals(name)) {
      LOGGER.finest("PROPERTY: " + name + " = \"...content...\"");
    } else {
      Property property = document.findProperty(name);
      if (property != null) {
        ValueImpl value = null;
        while ((value = (ValueImpl) property.nextValue()) != null) {
          LOGGER.finest("PROPERTY: " + name + " = \"" + value.toString()
                        + "\"");
        }
      }
    }
  }


  /**
   * Return an InputStream for the Document's content.
   */
  private InputStream getContentStream(Document document,
      ContentEncoding documentContentEncoding,
      ContentEncoding alternateEncoding) throws RepositoryException {
    InputStream contentStream;
      InputStream original = new BigEmptyDocumentFilterInputStream(
          DocUtils.getOptionalStream(document, SpiConstants.PROPNAME_CONTENT),
          fileSizeLimit.maxDocumentSize());
      InputStream encodedContentStream;
      if (documentContentEncoding == null) {
        encodedContentStream = getEncodedStream(contentEncoding, 
            original, (Context.getInstance().getTeedFeedFile() != null),
            1024 * 1024);
      } else {
        encodedContentStream = original;
      }


      InputStream encodedAlternateStream = getEncodedStream(alternateEncoding, 
          AlternateContentFilterInputStream.getAlternateContent(
          DocUtils.getOptionalString(document, SpiConstants.PROPNAME_TITLE), 
          DocUtils.getOptionalString(document, SpiConstants.PROPNAME_MIMETYPE)),
          false, 2048);


      return new AlternateContentFilterInputStream(
          encodedContentStream, encodedAlternateStream, this);
  }


  /**
   * Wrap the content stream with the suitable encoding (either
   * Base64 or Base64Compressed, based upon GSA encoding support.
   */
  // TODO: Don't compress tiny content or already compressed data
  // (based on mimetype).  This is harder than it sounds.
  private InputStream getEncodedStream(ContentEncoding contentEncoding,
      InputStream content, boolean wrapLines, int ioBufferSize) {
    if (contentEncoding == ContentEncoding.BASE64COMPRESSED) {
      return new Base64FilterInputStream(
          new CompressedFilterInputStream(content, ioBufferSize), wrapLines);
    } else {
      return new Base64FilterInputStream(content, wrapLines);
    }
  }
}
Source Code of com.google.enterprise.connector.pusher.XmlFeed

Related Classes of com.google.enterprise.connector.pusher.XmlFeed