Package com.google.caja.lang.html

Source Code of com.google.caja.lang.html.HtmlDefinitions$Builder

// Copyright (C) 2008 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.caja.lang.html;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Date;
import java.util.EnumMap;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;

import com.google.caja.SomethingWidgyHappenedError;
import com.google.caja.config.AllowedFileResolver;
import com.google.caja.config.ConfigUtil;
import com.google.caja.config.ImportResolver;
import com.google.caja.lang.html.HTML.Attribute;
import com.google.caja.lexer.FilePosition;
import com.google.caja.lexer.HtmlTextEscapingMode;
import com.google.caja.lexer.InputSource;
import com.google.caja.lexer.ParseException;
import com.google.caja.parser.ParseTreeNodeContainer;
import com.google.caja.parser.html.AttribKey;
import com.google.caja.parser.html.ElKey;
import com.google.caja.parser.js.Block;
import com.google.caja.parser.js.Expression;
import com.google.caja.parser.js.Identifier;
import com.google.caja.parser.js.IntegerLiteral;
import com.google.caja.parser.js.Reference;
import com.google.caja.parser.js.Statement;
import com.google.caja.parser.js.StringLiteral;
import com.google.caja.parser.quasiliteral.QuasiBuilder;
import com.google.caja.plugin.LoaderType;
import com.google.caja.plugin.UriEffect;
import com.google.caja.reporting.EchoingMessageQueue;
import com.google.caja.reporting.MessageContext;
import com.google.caja.reporting.MessageQueue;
import com.google.caja.reporting.PropertyNameQuotingMode;
import com.google.caja.reporting.RenderContext;
import com.google.caja.reporting.SimpleMessageQueue;
import com.google.caja.tools.BuildCommand;
import com.google.caja.util.Charsets;
import com.google.caja.util.Function;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

/**
* Generates a JavaScript tree with tables mapping HTML element and attribute
* names to information about those elements and attributes.
*
* @author mikesamuel@gmail.com
*/
public final class HtmlDefinitions {

  /** Combined to form a bit-field describing an element. */
  public enum EFlag {
    OPTIONAL_ENDTAG(1),
    EMPTY(2),
    CDATA(4),
    RCDATA(8),
    UNSAFE(0x10),
    /**
     * Elements that can be removed from the DOM without changing non-scripting
     * semantics as long as their children are folded into the element's parent.
     * Should always be paired with UNSAFE.
     *
     * This flag is used by html-sanitizer.js which does sanitization without
     * script sandboxing, as opposed to VIRTUALIZED which is used in full
     * client-side Caja.
     */
    FOLDABLE(0x20),
    SCRIPT(0x40),
    STYLE(0x80),
    /**
     * Elements that are unsafe to pass through, but which may be kept in the
     * DOM by renaming. Should always be paired with UNSAFE.
     */
    VIRTUALIZED(0x100),
    ;

    public final int bitMask;

    EFlag(int bitMask) { this.bitMask = bitMask; }
  }

  private static final AttribKey SCRIPT_SRC = AttribKey.forHtmlAttrib(
      ElKey.forHtmlElement("script"), "src");

  private static Statement export(String key, Expression e) {
    FilePosition unk = FilePosition.UNKNOWN;
    // The html4[@k] assignment is an explicit export for Closure Compiler
    return (Statement) QuasiBuilder.substV(
            "{ html4.@i = @e; html4[@k] = html4.@i; }",
            "i", new Reference(new Identifier(unk, key)),
            "k", new StringLiteral(unk, key),
            "e", e);
  }

  private static <U> Statement mapFromEnum(
      Iterable<U> entries, String key,
      Function<U, String> keyMaker, Function<U, Integer> valueMaker) {
    FilePosition unk = FilePosition.UNKNOWN;
    List<StringLiteral> keys = Lists.newArrayList();
    List<IntegerLiteral> values = Lists.newArrayList();
    for (U e : entries) {
      // Since enum values are public, we don't want Closure compiler
      // to rewrite them, so we need quoted keys.
      String quoted = StringLiteral.toQuotedValue(keyMaker.apply(e));
      keys.add(new StringLiteral(unk, quoted));
      values.add(new IntegerLiteral(unk, valueMaker.apply(e)));
    }
    return export(key, (Expression) QuasiBuilder.substV(
        "({ @k*: @v* })",
        "i", new Reference(new Identifier(unk, key)),
        "k", new ParseTreeNodeContainer(keys),
        "v", new ParseTreeNodeContainer(values)));
  }

  /**
   * Generate a JS object-building statement from the given Map, whose keys
   * should stringify to the JS keys and whose values are mapped by the function
   * {@code codegen}.
   */
  private static <T> Statement mapFromMap(
      Map<?, T> entries, String key, Function<T, Expression> codegen) {
    final FilePosition unk = FilePosition.UNKNOWN;
    List<StringLiteral> keys = new ArrayList<StringLiteral>();
    List<Expression> values = new ArrayList<Expression>();
    for (Map.Entry<?, T> e : entries.entrySet()) {
      keys.add(StringLiteral.valueOf(unk, e.getKey().toString()));
      values.add(codegen.apply(e.getValue()));
    }
    return export(key, (Expression) QuasiBuilder.substV(
        "({ @k*: @v* })",
        "k", new ParseTreeNodeContainer(keys),
        "v", new ParseTreeNodeContainer(values)));
  }

  public static Block generateJavascriptDefinitions(HtmlSchema schema) {
    final FilePosition unk = FilePosition.UNKNOWN;
    Map<AttribKey, HTML.Attribute.Type> atypes = attributeTypes(schema);
    Map<ElKey, EnumSet<EFlag>> eflags = elementFlags(schema);
    Map<ElKey, String> einterfaces = elementDOMInterfaces(schema);
    Map<AttribKey, UriEffect> uriEffects = uriEffects(schema);
    Map<AttribKey, LoaderType> ltypes = loaderTypes(schema);

    Block definitions = new Block();
    definitions.appendChild(QuasiBuilder.substV("var html4 = {};"));

    definitions.appendChild(mapFromEnum(
        EnumSet.allOf(HTML.Attribute.Type.class),
        "atype",
        new Function<HTML.Attribute.Type, String>() {
          public String apply(HTML.Attribute.Type f) {
            return f.name();
          }
        },
        new Function<HTML.Attribute.Type, Integer>() {
          public Integer apply(HTML.Attribute.Type f) {
            return A_TYPE_MAP.get(f);
          }
        })
    );

    {
      List<StringLiteral> keys = new ArrayList<StringLiteral>();
      List<IntegerLiteral> values = new ArrayList<IntegerLiteral>();
      for (Map.Entry<AttribKey, HTML.Attribute.Type> e : atypes.entrySet()) {
        AttribKey key = e.getKey();
        if (ElKey.HTML_WILDCARD.equals(key.el)
            || schema.isElementAllowed(key.el)
            // Whitelisted to allow dynamic script loading via proxy
            || SCRIPT_SRC.equals(key)) {
          keys.add(StringLiteral.valueOf(unk, key.toString()));
          values.add(new IntegerLiteral(unk, A_TYPE_MAP.get(e.getValue())));
        }
      }
      definitions.appendChild(export("ATTRIBS",
          (Expression) QuasiBuilder.substV(
              "({ @k*: @v* })",
              "k", new ParseTreeNodeContainer(keys),
              "v", new ParseTreeNodeContainer(values))));
    }

    definitions.appendChild(mapFromEnum(
        EnumSet.allOf(EFlag.class),
        "eflags",
        new Function<EFlag, String>() {
          public String apply(EFlag f) {
            return f.name();
          }
        },
        new Function<EFlag, Integer>() {
          public Integer apply(EFlag f) {
            return f.bitMask;
          }
        })
    );

    definitions.appendChild(mapFromMap(
        eflags,
        "ELEMENTS",
        new Function<EnumSet<EFlag>, Expression>() {
          public Expression apply(EnumSet<EFlag> flags) {
            int value = 0;
            for (EFlag f : flags) { value |= f.bitMask; }
            return new IntegerLiteral(unk, value);
          }
        })
    );

    definitions.appendChild(mapFromMap(
        einterfaces,
        "ELEMENT_DOM_INTERFACES",
        new Function<String, Expression>() {
          public Expression apply(String domInterface) {
            return new StringLiteral(unk, domInterface);
          }
        })
    );

    definitions.appendChild(mapFromEnum(
        EnumSet.allOf(UriEffect.class),
        "ueffects",
        new Function<UriEffect, String>() {
          public String apply(UriEffect f) {
            return f.name();
          }
        },
        new Function<UriEffect, Integer>() {
          public Integer apply(UriEffect f) {
            return A_UEFFECT_MAP.get(f);
          }
        })
    );

    definitions.appendChild(mapFromEnum(
        uriEffects.entrySet(),
        "URIEFFECTS",
        new Function<Entry<AttribKey, UriEffect>, String>() {
          public String apply(Entry<AttribKey, UriEffect> f) {
            return f.getKey().toString();
          }
        },
        new Function<Entry<AttribKey, UriEffect>, Integer>() {
          public Integer apply(Entry<AttribKey, UriEffect> f) {
            return A_UEFFECT_MAP.get(f.getValue());
          }
        })
    );

    definitions.appendChild(mapFromEnum(
        EnumSet.allOf(LoaderType.class),
        "ltypes",
        new Function<LoaderType, String>() {
          public String apply(LoaderType f) {
            return f.name();
          }
        },
        new Function<LoaderType, Integer>() {
          public Integer apply(LoaderType f) {
            return L_TYPE_MAP.get(f);
          }
        })
    );

    definitions.appendChild(mapFromEnum(
        ltypes.entrySet(),
        "LOADERTYPES",
        new Function<Entry<AttribKey, LoaderType>, String>() {
          public String apply(Entry<AttribKey, LoaderType> f) {
            return f.getKey().toString();
          }
        },
        new Function<Entry<AttribKey, LoaderType>, Integer>() {
          public Integer apply(Entry<AttribKey, LoaderType> f) {
            return L_TYPE_MAP.get(f.getValue());
          }
        })
    );

    return definitions;
  }

  /** Maps attribute types to integers for use in the JavaScript output. */
  private static final Map<HTML.Attribute.Type, Integer> A_TYPE_MAP
      = new EnumMap<HTML.Attribute.Type, Integer>(HTML.Attribute.Type.class);
  static {
    // Under no circumstances should this be changed to use Enum.ordinal().
    // This type to integer mapping must stay the same, or version skew
    // will mean that the HTML definitions JavaScript can only be used with
    // the same version of the cajoler that cajoled the gadget.
    A_TYPE_MAP.put(HTML.Attribute.Type.NONE, 0);
    A_TYPE_MAP.put(HTML.Attribute.Type.URI, 1);
    A_TYPE_MAP.put(HTML.Attribute.Type.SCRIPT, 2);
    A_TYPE_MAP.put(HTML.Attribute.Type.STYLE, 3);
    A_TYPE_MAP.put(HTML.Attribute.Type.ID, 4);
    A_TYPE_MAP.put(HTML.Attribute.Type.IDREF, 5);
    A_TYPE_MAP.put(HTML.Attribute.Type.IDREFS, 6);
    A_TYPE_MAP.put(HTML.Attribute.Type.GLOBAL_NAME, 7);
    A_TYPE_MAP.put(HTML.Attribute.Type.LOCAL_NAME, 8);
    A_TYPE_MAP.put(HTML.Attribute.Type.CLASSES, 9);
    A_TYPE_MAP.put(HTML.Attribute.Type.FRAME_TARGET, 10);
    A_TYPE_MAP.put(HTML.Attribute.Type.URI_FRAGMENT, 11);
    A_TYPE_MAP.put(HTML.Attribute.Type.HTML, 12);
    A_TYPE_MAP.put(HTML.Attribute.Type.MEDIA_QUERY, 13);
    for (HTML.Attribute.Type t : HTML.Attribute.Type.values()) {
      if (!A_TYPE_MAP.containsKey(t)) {
        throw new IllegalStateException("Not all Attribute Types mapped");
      }
    }
  }

  /** Maps urieffects to integers for use in the JavaScript output. */
  private static final Map<UriEffect, Integer> A_UEFFECT_MAP
      = new EnumMap<UriEffect, Integer>(UriEffect.class);
  static {
    // Under no circumstances should this be changed to use Enum.ordinal().
    // This type to integer mapping must stay the same, or version skew
    // will mean that the HTML definitions JavaScript can only be used with
    // the same version of the cajoler that cajoled the gadget.
    A_UEFFECT_MAP.put(UriEffect.NOT_LOADED, 0);
    A_UEFFECT_MAP.put(UriEffect.SAME_DOCUMENT, 1);
    A_UEFFECT_MAP.put(UriEffect.NEW_DOCUMENT, 2);
    for (UriEffect u : UriEffect.values()) {
      if (!A_UEFFECT_MAP.containsKey(u)) {
        throw new IllegalStateException("Not all UriEffects mapped");
      }
    }
  }

  /** Maps attribute types to integers for use in the JavaScript output. */
  private static final Map<LoaderType, Integer> L_TYPE_MAP
      = new EnumMap<LoaderType, Integer>(LoaderType.class);
  static {
    // Under no circumstances should this be changed to use Enum.ordinal().
    // This type to integer mapping must stay the same, or version skew
    // will mean that the HTML definitions JavaScript can only be used with
    // the same version of the cajoler that cajoled the gadget.
    L_TYPE_MAP.put(LoaderType.DATA, 0);
    L_TYPE_MAP.put(LoaderType.SANDBOXED, 1);
    L_TYPE_MAP.put(LoaderType.UNSANDBOXED, 2);
    for (LoaderType t : LoaderType.values()) {
      if (!L_TYPE_MAP.containsKey(t)) {
        throw new IllegalStateException("Not all Loader Types mapped");
      }
    }
  }

  public static int getJavascriptValueForAType(HTML.Attribute.Type atype) {
    return A_TYPE_MAP.get(atype);
  }

  public static int getJavascriptValueForUEffect(UriEffect uEffect) {
    return A_UEFFECT_MAP.get(uEffect);
  }

  public static int getJavascriptValueForLType(LoaderType ltype) {
    return L_TYPE_MAP.get(ltype);
  }

  private static Map<AttribKey, HTML.Attribute.Type> attributeTypes(
      HtmlSchema schema) {
    Map<AttribKey, HTML.Attribute.Type> attributeFlags = Maps.newTreeMap();
    for (AttribKey attribKey : schema.getAttributeNames()) {
      if (schema.isAttributeAllowed(attribKey)) {
        HTML.Attribute a = schema.lookupAttribute(attribKey);
        HTML.Attribute.Type type = a.getType();
        attributeFlags.put(attribKey, type);
      }
    }
    return attributeFlags;
  }

  private interface SchemaExtractor<Result> { Result extract(HTML.Attribute attr); }

  private static <A> Map<AttribKey, A> deriveMapFromSchema(
      HtmlSchema schema, SchemaExtractor<A> extractor) {
    Map<AttribKey, A> result = Maps.newTreeMap();
    for (AttribKey attribKey : schema.getAttributeNames()) {
      if (schema.isAttributeAllowed(attribKey)) {
        HTML.Attribute a = schema.lookupAttribute(attribKey);
        A type = extractor.extract(a);
        if (null != type) {
          result.put(attribKey, type);
        }
      }
    }
    return result;
  }

  private static Map<AttribKey, UriEffect> uriEffects(
      HtmlSchema schema) {
    return deriveMapFromSchema(schema, new SchemaExtractor<UriEffect>() {
      public UriEffect extract(Attribute attr) {
        return attr.getUriEffect();
      }
    });
  }

  private static Map<AttribKey, LoaderType> loaderTypes(
      HtmlSchema schema) {
    return deriveMapFromSchema(schema, new SchemaExtractor<LoaderType>() {
      public LoaderType extract(Attribute attr) {
        return attr.getLoaderType();
      }
    });
  }

  private static Map<ElKey, EnumSet<EFlag>> elementFlags(HtmlSchema schema) {
    final ElKey SCRIPT = ElKey.forHtmlElement("script");
    final ElKey STYLE = ElKey.forHtmlElement("style");
    Map<ElKey, EnumSet<EFlag>> elementFlags = Maps.newTreeMap();
    for (ElKey elementName : schema.getElementNames()) {
      HTML.Element el = schema.lookupElement(elementName);
      EnumSet<EFlag> flags = EnumSet.noneOf(EFlag.class);
      if (el.isEndTagOptional()) { flags.add(EFlag.OPTIONAL_ENDTAG); }
      if (el.isEmpty()) { flags.add(EFlag.EMPTY); }
      if (elementName.isHtml()) {
        switch (HtmlTextEscapingMode.getModeForTag(elementName.localName)) {
          case CDATA:
            flags.add(EFlag.CDATA);
            break;
          case RCDATA:
            flags.add(EFlag.RCDATA);
            break;
          default: break;
        }
      }
      if (!schema.isElementAllowed(elementName)) {
        flags.add(EFlag.UNSAFE);
        if (SCRIPT.equals(elementName)) {
          flags.add(EFlag.SCRIPT);
        } else if (STYLE.equals(elementName)) {
          flags.add(EFlag.STYLE);
        }
      }
      if (HtmlSchema.isElementFoldable(elementName)) {
        flags.add(EFlag.FOLDABLE);
      }
      if (schema.isElementVirtualized(elementName)) {
        flags.add(EFlag.VIRTUALIZED);
      }
      elementFlags.put(elementName, flags);
    }
    return elementFlags;
  }

  private static Map<ElKey, String> elementDOMInterfaces(HtmlSchema schema) {
    Map<ElKey, String> data = Maps.newTreeMap();
    for (ElKey elementName : schema.getElementNames()) {
      HTML.Element el = schema.lookupElement(elementName);
      data.put(elementName, el.getDOMInterface());
    }
    return data;
  }

  private static void generateSourceText(HtmlSchema schema, Writer out)
      throws IOException {
    String currentDate = "" + new Date();
    if (currentDate.indexOf("\n") >= 0) {
      throw new SomethingWidgyHappenedError("Date should not contain newline");
    }
    out.write("// Copyright Google Inc.\n");
    out.write("// Licensed under the Apache Licence Version 2.0\n");
    out.write("// Autogenerated at " + currentDate + "\n");
    out.write("// @overrides window\n");
    out.write("// @provides html4\n");
    Block node = generateJavascriptDefinitions(schema);
    RenderContext rc = new RenderContext(node.makeRenderer(out, null))
        .withPropertyNameQuotingMode(
            PropertyNameQuotingMode.PRESERVE_QUOTES);
    renderFlattenedBlocks(node, rc);
    rc.getOut().noMoreTokens();
    out.write("\n");
    out.write("// export for Closure Compiler\n");
    out.write("if (typeof window !== 'undefined') {\n");
    out.write("  window['html4'] = html4;\n");
    out.write("}\n");
  }

  /**
   * The Blocks in the tree are used just as bundles of statements, so
   * flatten them out.
   */
  private static void renderFlattenedBlocks(Statement node, RenderContext rc) {
    if (node instanceof Block) {
      for (Statement s : ((Block) node).children()) {
        renderFlattenedBlocks(s, rc);
        if (!s.isTerminal()) { rc.getOut().consume(";"); }
      }
    } else {
      node.render(rc);
    }
  }

  public static class Builder implements BuildCommand {
    public boolean build(List<File> inputs, List<File> deps, Map<String, Object> options,
        File output) throws IOException {
      File elementsFile = null;
      File attrsFile = null;
      for (File input : inputs) {
        if (input.getName().endsWith(".json")) {
          if (elementsFile == null) {
            elementsFile = input;
          } else if (attrsFile == null) {
            attrsFile = input;
          } else {
            throw new IOException("Unused input " + input);
          }
        }
      }
      if (elementsFile == null) {
        throw new IOException("No JSON whitelist for HTML elements");
      }
      if (attrsFile == null) {
        throw new IOException("No JSON whitelist for HTML attributes");
      }

      FilePosition elements = FilePosition.startOfFile(new InputSource(
          elementsFile.getAbsoluteFile().toURI()));
      FilePosition attrs = FilePosition.startOfFile(new InputSource(
          attrsFile.getAbsoluteFile().toURI()));

      MessageContext mc = new MessageContext();
      mc.addInputSource(elements.source());
      mc.addInputSource(attrs.source());
      MessageQueue mq = new EchoingMessageQueue(
          new PrintWriter(new OutputStreamWriter(System.err), true), mc, false);

      Set<File> inputsAndDeps = new HashSet<File>();
      for (File f : inputs) { inputsAndDeps.add(f.getAbsoluteFile()); }
      for (File f : deps) { inputsAndDeps.add(f.getAbsoluteFile()); }

      ImportResolver resolver = new AllowedFileResolver(inputsAndDeps);

      HtmlSchema schema;
      try {
        schema = new HtmlSchema(
            ConfigUtil.loadWhiteListFromJson(
                elements.source().getUri(), resolver, mq),
            ConfigUtil.loadWhiteListFromJson(
                attrs.source().getUri(), resolver, mq));
      } catch (ParseException ex) {
        ex.toMessageQueue(mq);
        throw (IOException) new IOException("Failed to parse schema")
            .initCause(ex);
      }

      Writer out = new OutputStreamWriter(
          new FileOutputStream(output), Charsets.UTF_8.name());
      try {
        generateSourceText(schema, out);
      } finally {
        out.close();
      }
      return true;
    }
  }

  public static void main(String[] args) throws IOException {
    HtmlSchema schema = HtmlSchema.getDefault(new SimpleMessageQueue());
    Writer out = new OutputStreamWriter(System.out);
    try {
      generateSourceText(schema, out);
    } finally {
      out.close();
    }
  }
}
TOP

Related Classes of com.google.caja.lang.html.HtmlDefinitions$Builder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.