Package com.google.caja.plugin.templates

Source Code of com.google.caja.plugin.templates.TemplateSanitizer

// Copyright (C) 2009 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.caja.plugin.templates;

import com.google.caja.SomethingWidgyHappenedError;
import com.google.caja.lang.html.HTML;
import com.google.caja.lang.html.HtmlSchema;
import com.google.caja.parser.html.AttribKey;
import com.google.caja.parser.html.ElKey;
import com.google.caja.parser.html.Nodes;
import com.google.caja.plugin.Placeholder;
import com.google.caja.plugin.PluginMessageType;
import com.google.caja.reporting.Message;
import com.google.caja.reporting.MessageLevel;
import com.google.caja.reporting.MessagePart;
import com.google.caja.reporting.MessageQueue;
import com.google.caja.util.Criterion;

import java.util.ArrayList;
import java.util.List;

import org.w3c.dom.Attr;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;

/**
* Rewrites an IHTML DOM, removing potentially unsafe constructs that
* can be ignored, and issuing errors if the constructs cannot be removed.
*
* @author mikesamuel@gmail.com
*/
public final class TemplateSanitizer {
  private final HtmlSchema schema;
  private final MessageQueue mq;
  private static String cajaPrefix = "data-caja-";

  /**
   * @param schema specifies which tags and attributes are allowed, and which
   *   attribute values are allowed.
   * @param mq a message queue that will receive errors on unsafe nodes or
   *   attributes, and warnings on removed nodes.
   */
  public TemplateSanitizer(HtmlSchema schema, MessageQueue mq) {
    this.schema = schema;
    this.mq = mq;
  }

  /**
   * @param t the node to sanitize.
   * @return true iff the htmlRoot can be safely used.  If false, explanatory
   *     messages were added to the MessageQueue passed to the constructor.
   */
  public boolean sanitize(Node t) {
    boolean valid = true;
    switch (t.getNodeType()) {
      case Node.ELEMENT_NODE:
      {
        Element el = (Element) t;
        ElKey elKey = ElKey.forElement(el);
        {
          if (!schema.isElementAllowed(elKey)) {
            IhtmlMessageType msgType = schema.lookupElement(elKey) != null
                ? IhtmlMessageType.UNSAFE_TAG
                : IhtmlMessageType.UNKNOWN_TAG;

            // Figure out what to do with the disallowed tag.  We can remove it
            // from the node, replace it with its children (fold), or error out.
            boolean ignore = false, fold = false;
            Node p = el.getParentNode();
            if (p != null) {
              if (isElementIgnorable(elKey)) {
                ignore = true;
              } else if (HtmlSchema.isElementFoldable(elKey)) {
                fold = true;
                msgType = IhtmlMessageType.FOLDING_ELEMENT;
              }
            }

            MessageLevel msgLevel
                = ignore || fold ? MessageLevel.WARNING : msgType.getLevel();
            mq.getMessages().add(new Message(
                msgType, msgLevel, Nodes.getFilePositionFor(el), elKey));

            if (ignore) {
              assert p != null// ignore = true  ->  p != null above
              p.removeChild(el);
              return valid;  // Don't recurse to children if removed.
            } else {
              // According to http://www.w3.org/TR/html401/appendix/notes.html
              // the recommended behavior is to try to render an unrecognized
              // element's contents
              return valid & foldElement(elKey, el);
            }
          }
          valid &= sanitizeAttrs(elKey, el, false);
        }
        // We know by construction of org.w3c.Element that there can only be
        // one attribute with a given name.
        // If that were not the case, passes that only inspect the
        // first occurrence of an attribute could be spoofed.
        break;
      }
      case Node.DOCUMENT_FRAGMENT_NODE:
      case Node.TEXT_NODE:
      case Node.CDATA_SECTION_NODE:
      case Node.COMMENT_NODE:
        break;
      default:
        throw new SomethingWidgyHappenedError(t.getNodeName());
    }
    for (Node child : Nodes.childrenOf(t)) {
      valid &= sanitize(child);
    }
    return valid;
  }

  private boolean sanitizeAttrs(ElKey elKey, Element el, boolean ignore) {
    boolean valid = true;
    // Iterate in reverse so that removed attributes don't break iteration.
    NamedNodeMap attrs = el.getAttributes();
    for (int i = attrs.getLength(); --i >= 0;) {
      valid &= sanitizeAttr(elKey, el, (Attr) attrs.item(i), ignore);
    }
    return valid;
  }

  private boolean sanitizeAttr(
      ElKey elKey, Element el, Attr attrib, boolean ignore) {
    boolean valid = true;
    AttribKey attrKey = AttribKey.forAttribute(elKey, attrib);
    HTML.Attribute a = schema.lookupAttribute(attrKey);
    if (null == a) {
      if (!Placeholder.ID_ATTR.is(attrib)) {
        if (attrKey.localName.endsWith("__")) { valid = false; }
        else if (!attrKey.localName.startsWith(cajaPrefix)) {
          // Remove this attribute and create another that
          // starts with "data-caja-".
          String localName = attrKey.localName;
          String value = attrib.getValue();
          removeBadAttribute(el, attrKey);
          el.setAttributeNS(
              attrKey.ns.uri,
              cajaPrefix + localName,
              value == null ? localName : value);
        }
      }
    } else if (!schema.isAttributeAllowed(attrKey)) {
      if (!ignore) {
        mq.addMessage(
            PluginMessageType.UNSAFE_ATTRIBUTE,
            Nodes.getFilePositionFor(attrib), attrKey, elKey);
      }
      valid &= removeBadAttribute(el, attrKey);
    } else {
      // We do not subject "target" attributes to the value criteria
      if (a.getType() != HTML.Attribute.Type.FRAME_TARGET) {
        Criterion<? super String> criteria = a.getValueCriterion();
        if (!criteria.accept(attrib.getNodeValue())) {
          if (!ignore) {
            mq.addMessage(
                PluginMessageType.DISALLOWED_ATTRIBUTE_VALUE,
                Nodes.getFilePositionForValue(attrib),
                attrKey, MessagePart.Factory.valueOf(attrib.getNodeValue()));
          }
          valid &= removeBadAttribute(el, attrKey);
        }
      }
    }
    return valid;
  }

  /**
   * Elements that can be safely removed from the DOM without changing behavior.
   */
  private static boolean isElementIgnorable(ElKey elKey) {
    if (!elKey.isHtml()) { return false; }
    String lcName = elKey.localName;
    return "noscript".equals(lcName) || "noembed".equals(lcName)
        || "noframes".equals(lcName) || "title".equals(lcName);
  }

  /**
   * Fold the children of a {@link HtmlSchema#isElementFoldable foldable}
   * element into that element's parent.
   *
   * <p>
   * This should have the property that:<ul>
   * <li>Every element is processed
   * <li>Elements can recursively fold
   * <li>Folded elements that are implied (such as head when a title
   *     is present) don't break cajoling.
   * <li>We don't fold elements that are explicitly allowed by the whitelist.
   * <li>Nothing is removed from the parse tree without a notification
   *     to the user.
   * </ul>
   *
   * @param el a tag with a mutable parent which will be modified in place.
   * @return true iff the el's children are transitively valid, and if they
   *     could all be folded into the parent.
   */
  private boolean foldElement(ElKey elKey, Element el) {
    boolean valid = true;

    // Recurse to children to ensure that all nodes are processed.
    valid &= sanitizeAttrs(elKey, el, true);
    for (Node child : Nodes.childrenOf(el)) { valid &= sanitize(child); }

    for (Attr a : Nodes.attributesOf(el)) {
      String attrName = a.getNodeName();
      if (attrName.startsWith(cajaPrefix)) {
        attrName = attrName.substring(10);
      }
      mq.addMessage(
          PluginMessageType.CANNOT_FOLD_ATTRIBUTE, Nodes.getFilePositionFor(a),
          MessagePart.Factory.valueOf(attrName),
          MessagePart.Factory.valueOf(el.getLocalName()));
    }

    // Pick the subset of children to fold in.
    List<Node> foldedChildren = new ArrayList<Node>();
    for (Node child : Nodes.childrenOf(el)) {
      switch (child.getNodeType()) {
        case Node.ELEMENT_NODE: case Node.TEXT_NODE:
        case Node.CDATA_SECTION_NODE:
          foldedChildren.add(child);
          break;
        default:
          // Ignore.
      }
    }

    // Rebuild the sibling list, substituting foldedChildren for any occurrences
    // of el.node.
    Node next = el.getNextSibling();
    Node parent = el.getParentNode();
    parent.removeChild(el);
    for (Node n : foldedChildren) { parent.insertBefore(n, next); }

    return valid;
  }

  private boolean removeBadAttribute(Element el, AttribKey attrKey) {
    el.removeAttributeNS(attrKey.ns.uri, attrKey.localName);
    return true;
  }
}
TOP

Related Classes of com.google.caja.plugin.templates.TemplateSanitizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.