Package com.google.caja.lang.html

Source Code of com.google.caja.lang.html.HtmlSchema

// Copyright (C) 2008 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.caja.lang.html;

import com.google.caja.SomethingWidgyHappenedError;
import com.google.caja.config.ConfigUtil;
import com.google.caja.config.WhiteList;
import com.google.caja.lexer.ParseException;
import com.google.caja.parser.html.AttribKey;
import com.google.caja.parser.html.ElKey;
import com.google.caja.parser.html.Namespaces;
import com.google.caja.plugin.LoaderType;
import com.google.caja.plugin.UriEffect;
import com.google.caja.reporting.Message;
import com.google.caja.reporting.MessageQueue;
import com.google.caja.reporting.SimpleMessageQueue;
import com.google.caja.util.Multimap;
import com.google.caja.util.Multimaps;
import com.google.caja.util.Pair;
import com.google.caja.util.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
* An HTML schema which defines attributes of elements and which elements are
* allowed.
*
* @author mikesamuel@gmail.com
*/
public final class HtmlSchema {
  // Public for convenience of c.g.c.plugin.Config.
  public static final URI defaultElementWhitelistURL = URI.create(
      "resource:///com/google/caja/lang/html/htmlall-elements.json");
  public static final URI defaultAttributeWhitelistURL = URI.create(
      "resource:///com/google/caja/lang/html/htmlall-attributes.json");

  private static final String VIRTUALIZATION_PREFIX = "caja-v-";
  private static final ElKey SCRIPT = ElKey.forHtmlElement("script");
  private static final ElKey STYLE = ElKey.forHtmlElement("style");
  private static final String DEFAULT_SCHEMA_INTERFACE = "HTMLElement";
  private static final String UNKNOWN_INTERFACE = "HTMLUnknownElement";

  private final Set<ElKey> allowedElements;
  private final Map<ElKey, HTML.Element> elementDetails;
  private final Set<AttribKey> allowedAttributes;
  private final Map<AttribKey, HTML.Attribute> attributeDetails;
  private final List<HTML.Attribute> attributesForUnknownHTMLElement;

  private static Pair<HtmlSchema, List<Message>> defaultSchema;
  /**
   * The default HTML4 whitelist.  See the JSON files in this directory for
   * the actual definitions.
   */
  public static HtmlSchema getDefault(MessageQueue mq) {
    if (defaultSchema == null) {
      SimpleMessageQueue cacheMq = new SimpleMessageQueue();
      try {
        defaultSchema = Pair.pair(
            new HtmlSchema(
                ConfigUtil.loadWhiteListFromJson(defaultElementWhitelistURL,
                    ConfigUtil.RESOURCE_RESOLVER, cacheMq),
                ConfigUtil.loadWhiteListFromJson(defaultAttributeWhitelistURL,
                    ConfigUtil.RESOURCE_RESOLVER, cacheMq)),
            cacheMq.getMessages());
      // If the default schema is borked, there's not much we can do.
      } catch (IOException ex) {
        mq.getMessages().addAll(cacheMq.getMessages());
        throw new SomethingWidgyHappenedError("Default schema is borked", ex);
      } catch (ParseException ex) {
        cacheMq.getMessages().add(ex.getCajaMessage());
        mq.getMessages().addAll(cacheMq.getMessages());
        throw new SomethingWidgyHappenedError("Default schema is borked", ex);
      }
    }
    mq.getMessages().addAll(defaultSchema.b);
    return defaultSchema.a;
  }

  /**
   * Elements that can and should be removed from the DOM and replaced by their
   * children with no semantic effect, when a document is being sanitized
   * without scripting support.
   */
  public static boolean isElementFoldable(ElKey el) {
    if (!el.isHtml()) { return false; }
    String cname = el.localName;
    return "head".equals(cname) || "body".equals(cname) || "html".equals(cname);
  }

  public HtmlSchema(WhiteList tagList, WhiteList attribList) {
    this.allowedAttributes = Sets.newHashSet();
    for (String key : attribList.allowedItems()) {
      AttribKey attribKey = attribKey(key);
      allowedAttributes.add(attribKey);
    }
    Map<AttribKey, RegularCriterion> criteria = Maps.newHashMap();
    for (WhiteList.TypeDefinition def : attribList.typeDefinitions().values()) {
      final String values = (String) def.get("values", null);
      RegularCriterion criterion = null;
      if (values != null) {
        criterion = RegularCriterion.Factory.fromValueSet(
            Arrays.asList(values.split(",")));
      } else {
        String pattern = (String) def.get("pattern", null);
        if (pattern != null) {
          criterion = RegularCriterion.Factory.fromPattern(
              "(?i:" + pattern + ")");
        }
      }
      if (criterion != null) {
        String key = Strings.lower((String) def.get("key", null));
        criteria.put(attribKey(key), criterion);
      }
    }

    this.attributeDetails = Maps.newHashMap();
    Multimap<ElKey, HTML.Attribute> attributeDetailsByElement
        = Multimaps.newListHashMultimap();
    for (WhiteList.TypeDefinition def : attribList.typeDefinitions().values()) {
      String key = Strings.lower((String) def.get("key", null));
      AttribKey elAndAttrib = attribKey(key);
      if (elAndAttrib == null) { throw new NullPointerException(key); }
      ElKey element = elAndAttrib.el;
      HTML.Attribute.Type type = HTML.Attribute.Type.NONE;
      String typeName = (String) def.get("type", null);
      if (typeName != null) {
        // TODO(mikesamuel): divert IllegalArgumentExceptions to MessageQueue
        type = HTML.Attribute.Type.valueOf(typeName);
      }
      String loaderTypeStr = (String) def.get("loaderType", null);
      // TODO(mikesamuel): divert IllegalArgumentExceptions to MessageQueue
      LoaderType loaderType = loaderTypeStr != null
          ? LoaderType.valueOf(loaderTypeStr) : null;
      String uriEffectStr = (String) def.get("uriEffect", null);
      // TODO(mikesamuel): divert IllegalArgumentExceptions to MessageQueue
      UriEffect uriEffect = uriEffectStr != null
          ? UriEffect.valueOf(uriEffectStr) : null;
      RegularCriterion elCriterion = criteria.get(elAndAttrib);
      RegularCriterion wcCriterion = criteria.get(elAndAttrib.onAnyElement());
      RegularCriterion criterion = conjunction(elCriterion, wcCriterion);
      String defaultValue = (String) def.get("default", null);
      boolean optional = Boolean.TRUE.equals(def.get("optional", true));
      String safeValue = (String) def.get("safeValue", null);
      if (safeValue == null) {
        String candidate = defaultValue != null ? defaultValue : "";
        if (criterion.accept(candidate)) {
          safeValue = candidate;
        } else {
          String values = (String) def.get("values", null);
          if (values != null) {
            safeValue = values.split(",")[0];
          }
        }
      }
      boolean valueless = Boolean.TRUE.equals(def.get("valueless", false));
      // For valueless attributes, like checked, we allow the blank value.
      if (valueless && !criterion.accept("")) {
        criterion = RegularCriterion.Factory.or(
            criterion,
            RegularCriterion.Factory.fromValueSet(Collections.singleton("")));
      }

      HTML.Attribute a = new HTML.Attribute(
          elAndAttrib, type, defaultValue, safeValue, valueless, optional,
          loaderType, uriEffect, criterion);
      attributeDetails.put(elAndAttrib, a);
      attributeDetailsByElement.put(element, a);
    }

    this.attributesForUnknownHTMLElement = Collections.unmodifiableList(
        Lists.newArrayList(attributeDetailsByElement.get(ElKey.HTML_WILDCARD)));

    this.allowedElements = Sets.newHashSet();
    for (String qualifiedName : tagList.allowedItems()) {
      allowedElements.add(
          ElKey.forElement(Namespaces.HTML_DEFAULT, qualifiedName));
    }
    this.elementDetails = Maps.newHashMap();
    for (WhiteList.TypeDefinition def : tagList.typeDefinitions().values()) {
      String qualifiedTagName = (String) def.get("key", null);
      ElKey key = ElKey.forElement(Namespaces.HTML_DEFAULT, qualifiedTagName);
      Collection<HTML.Attribute> specific = attributeDetailsByElement.get(key);
      ElKey wc = ElKey.wildcard(key.ns);
      Collection<HTML.Attribute> general = attributeDetailsByElement.get(wc);
      List<HTML.Attribute> attrs = Lists.newArrayList(specific);
      if (!general.isEmpty()) {
        Set<AttribKey> present = Sets.newHashSet();
        for (HTML.Attribute a : attrs) {
          present.add(a.getKey().onElement(wc));
        }
        for (HTML.Attribute a : general) {
          if (!present.contains(a.getKey())) { attrs.add(a); }
        }
      }
      boolean empty = (Boolean) def.get("empty", Boolean.FALSE);
      boolean optionalEnd = (Boolean) def.get("optionalEnd", Boolean.FALSE);
      boolean containsText = (Boolean) def.get("textContent", Boolean.TRUE)
          && !empty;
      String domInterface = (String) def.get("interface",
          DEFAULT_SCHEMA_INTERFACE);
      elementDetails.put(
          key, new HTML.Element(key, attrs, empty, optionalEnd, containsText,
              domInterface));
    }
  }

  public Set<AttribKey> getAttributeNames() {
    return attributeDetails.keySet();
  }

  // TODO(kpreid): Virtualization makes this set arguably unbounded; review
  // whether any code is affected.
  public Set<ElKey> getElementNames() {
    return elementDetails.keySet();
  }

  public boolean isElementAllowed(ElKey elementName) {
    return allowedElements.contains(elementName) ||
        elementName.localName.startsWith(VIRTUALIZATION_PREFIX);
  }

  public HTML.Element lookupElement(ElKey elementName) {
    HTML.Element details = elementDetails.get(elementName);
    if (details != null) {
      return details;
    } else {
      // May be a virtualized form of an unknown element, but we don't care --
      // just virtualize all non-global attributes on it.
      return new HTML.Element(elementName, attributesForUnknownHTMLElement,
        false, false, false, UNKNOWN_INTERFACE);
    }
  }

  /**
   * Elements that are to be rewritten with different tag names to avoid the
   * known or unknown browser semantics of them.
   */
  public boolean isElementVirtualized(ElKey el) {
    // <script> and <style> are excluded because we want to rewrite them,
    // not virtualize them, but they are considered unsafe because their
    // text contents have meaning.
    return !isElementAllowed(el) && !SCRIPT.equals(el) && !STYLE.equals(el);
  }

  public ElKey virtualToRealElementName(ElKey virtual) {
    if (isElementVirtualized(virtual)) {
      // TODO(kpreid): Better to modify the NS instead of the local name if the
      // input is not in HTML NS (i.e. we are in XML rather than HTML).
      // Currently can't happen as only HTML elements are virtualized.
      return ElKey.forElement(virtual.ns,
                              VIRTUALIZATION_PREFIX + virtual.localName);
    } else {
      return virtual;
    }
  }

  public boolean isAttributeAllowed(AttribKey k) {
    HTML.Attribute a = lookupAttribute(k);
    if (a != null && allowedAttributes.contains(a.getKey())) { return true; }
    String keyName = k.localName;
    return keyName.startsWith("data-caja-") && !keyName.endsWith("___");
  }

  public HTML.Attribute lookupAttribute(AttribKey k) {
    if (isElementVirtualized(k.el)) {
      k = k.onElement(virtualToRealElementName(k.el));
    }
    HTML.Attribute attr = attributeDetails.get(k);
    if (attr == null) {
      attr = attributeDetails.get(k.onAnyElement());
    }
    return attr;
  }

  private static AttribKey attribKey(String key) {
    int separator = key.indexOf("::");
    if (separator == -1) {
      throw new IllegalArgumentException("no :: in: " + key);
    }
    String elQName = key.substring(0, separator);
    String attrQName = key.substring(separator + 2);
    ElKey el = ElKey.forElement(Namespaces.HTML_DEFAULT, elQName);
    if (el == null) { throw new NullPointerException(elQName); }
    AttribKey a = AttribKey.forAttribute(
        Namespaces.HTML_DEFAULT, el, attrQName);
    if (a == null) { throw new NullPointerException(attrQName); }
    return a;
  }

  private static RegularCriterion conjunction(
      RegularCriterion a, RegularCriterion b) {
    RegularCriterion c = a;
    if (b != null) { c = c == null ? b : RegularCriterion.Factory.and(c, b); }
    return c != null ? c : RegularCriterion.Factory.optimist();
  }
}
TOP

Related Classes of com.google.caja.lang.html.HtmlSchema

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.