Source Code of org.apache.shindig.gadgets.rewrite.MutableContent

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package org.apache.shindig.gadgets.rewrite;


import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;


import org.apache.commons.io.IOUtils;
import org.apache.shindig.common.util.CharsetUtil;
import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.http.HttpResponse;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
import org.apache.shindig.gadgets.parse.HtmlSerialization;
import org.w3c.dom.Document;


import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;


/**
 * Object that maintains a String representation of arbitrary contents
 * and a consistent view of those contents as an HTML parse tree.
 */
public class MutableContent {
  private static final Map<String, Object> EMPTY_MAP = ImmutableMap.of();


  // String representation of contentBytes taking into account the correct
  // encoding of the content.
  private String content;
  private byte[] contentBytes;


  // Encoding of the content bytes. UTF-8 by default.
  private Charset contentEncoding;


  private HttpResponse contentSource;


  private Document document;
  private int numChanges;
  private final GadgetHtmlParser contentParser;
  private Map<String, Object> pipelinedData;


  private static final String MUTABLE_CONTENT_LISTENER = "MutableContentListener";
  private static final Logger logger = Logger.getLogger(MutableContent.class.getName());


  public static void notifyEdit(Document doc) {
    MutableContent mc = (MutableContent) doc.getUserData(MUTABLE_CONTENT_LISTENER);
    if (mc != null) {
      mc.documentChanged();
    }
  }


  /**
   * Construct with decoded string content
   */
  public MutableContent(GadgetHtmlParser contentParser, String content) {
    this.contentParser = contentParser;
    this.content = content;
    this.numChanges = 0;
    this.contentEncoding = Charsets.UTF_8;
  }


  /**
   * Construct with HttpResponse so we can defer string decoding until we actually need
   * the content. Given that we dont rewrite many mime types this is a performance advantage
   */
  public MutableContent(GadgetHtmlParser contentParser, HttpResponse contentSource) {
    this.contentParser = contentParser;
    this.contentSource = contentSource;
    this.contentEncoding = contentSource != null ? contentSource.getEncodingCharset() : null;
  }


  /**
   * Retrieves the current content for this object in String form.
   * If content has been retrieved in parse tree form and has
   * been edited, the String form is computed from the parse tree by
   * rendering it. It is <b>strongly</b> encouraged to avoid switching
   * between retrieval of parse tree (through {@code getParseTree}),
   * with subsequent edits and retrieval of String contents to avoid
   * repeated serialization and deserialization.
   * As a final fallback, if content has been set as bytes, interprets
   * them as a UTF8 String.
   * @return Renderable/active content.
   */
  public String getContent() {
    if (content == null) {
      if (contentSource != null) {
        content = contentSource.getResponseAsString();
        // Clear on first use
        contentSource = null;
      } else if (document != null) {
        content = HtmlSerialization.serialize(document);
      } else if (contentBytes != null) {
        Charset useEncoding = contentEncoding != null ? contentEncoding : Charsets.UTF_8;
        content = useEncoding.decode(ByteBuffer.wrap(contentBytes)).toString();
      }
    }
    return content;
  }


  /**
   * Sets the object's content as a raw String. Note, this operation
   * may clear the document if the content has changed
   * @param newContent New content.
   */
  public void setContent(String newContent) {
    // TODO - Equality check may be unnecessary overhead
    if (content == null || !content.equals(newContent)) {
      content = newContent;
      document = null;
      contentSource = null;
      contentBytes = null;
      incrementNumChanges();
    }
  }


  /**
   * Retrieves the current content for this object as an InputStream.
   * @return Active content as InputStream.
   */
  public InputStream getContentBytes() {
    return new ByteArrayInputStream(getRawContentBytes());
  }


  protected byte[] getRawContentBytes() {
    if (contentBytes == null) {
      if (contentSource != null) {
        try {
          setContentBytesState(IOUtils.toByteArray(contentSource.getResponse()),
              contentSource.getEncodingCharset());
          contentSource = null;
        } catch (IOException e) {
          // Doesn't occur; responseBytes wrapped as a ByteArrayInputStream.
        }
      } else if (content != null) {
        // If retrieving a String here, we've already converted to UTF8.
        // Be sure to reflect this when setting bytes.
        // In the case of HttpResponseBuilder, this re-sets charset in Content-Type
        // to UTF-8 rather than whatever it was before. We do this to standardize
        // on UTF-8 for all String handling.
        setContentBytesState(CharsetUtil.getUtf8Bytes(content), Charsets.UTF_8);
      } else if (document != null) {
        setContentBytesState(
            CharsetUtil.getUtf8Bytes(HtmlSerialization.serialize(document)), Charsets.UTF_8);
      }
    }
    return contentBytes;
  }


  /**
   * Sets the object's contentBytes as the given raw input. If ever interpreted
   * as a String, the data will be decoded as the encoding specified.
   * Note, this operation may clear the document if the content has changed.
   * Also note, it's mandated that the new bytes array will NOT be modified
   * by the caller of this API. The array is not copied, for performance reasons.
   * If the caller may modify a byte array, it MUST pass in a new copy.
   * @param newBytes New content.
   */
  public void setContentBytes(byte[] newBytes, Charset newEncoding) {
    if (contentBytes == null || !Arrays.equals(contentBytes, newBytes)) {
      setContentBytesState(newBytes, newEncoding);
      document = null;
      contentSource = null;
      content = null;
      incrementNumChanges();
    }
  }
  
  /**
   * Sets content to new byte array, with unspecified charset. It is
   * recommended to use the {@code setContentBytes(byte[], Charset)} API instead,
   * where possible.
   * @param newBytes New content.
   */
  public final void setContentBytes(byte[] newBytes) {
    setContentBytes(newBytes, null);
  }
  
  /**
   * Sets internal state having to do with content bytes, from the provided
   * byte array and charset.
   * This MUST be the only place in which MutableContent's notion of encoding is mutated.
   * @param newBytes New content.
   * @param newEncoding Encoding for the bytes, or null for unspecified.
   */
  protected void setContentBytesState(byte[] newBytes, Charset newEncoding) {
    contentBytes = newBytes;
    contentEncoding = newEncoding;
  }


  /**
   * Notification that the content of the document has changed. Causes the content
   * string and bytes to be cleared.
   */
  public void documentChanged() {
    if (document != null) {
      content = null;
      contentSource = null;
      contentBytes = null;
      incrementNumChanges();
    }
  }


  /**
   * Retrieves the object contents in parsed form, if a
   * {@code GadgetHtmlParser} is configured and is able to parse the string
   * contents appropriately. To modify the object's
   * contents by parse tree after setting new String contents,
   * this method must be called again. However, this practice is highly
   * discouraged, as parsing a tree from String is a costly operation and should
   * be done at most once per rewrite.
   */
  public Document getDocument() {
    // TODO - Consider actually imposing one parse limit on rewriter pipeline
    if (document != null) {
      return document;
    }
    try {
      document = contentParser.parseDom(getContent());
      document.setUserData(MUTABLE_CONTENT_LISTENER, this, null);
    } catch (GadgetException e) {
      logger.log(Level.WARNING, "Got GadgetException when parsing content", e);
      return null;
    }
    return document;
  }


  public int getNumChanges() {
    return numChanges;
  }


  protected void incrementNumChanges() {
    ++numChanges;
  }


  /**
   * True if current state has a parsed document. Allows rewriters to switch mode based on
   * which content is most readily available
   */
  public boolean hasDocument() {
    return (document != null);
  }


  public void addPipelinedData(String key, Object value) {
    if (null == pipelinedData) {
      pipelinedData = Maps.newHashMap();
    }
    pipelinedData.put(key, value);
  }


  public Map<String, Object> getPipelinedData() {
    return (null == pipelinedData) ? EMPTY_MAP : pipelinedData;
  }
}
Source Code of org.apache.shindig.gadgets.rewrite.MutableContent

Related Classes of org.apache.shindig.gadgets.rewrite.MutableContent