Package nu.validator.spec.html5

Source Code of nu.validator.spec.html5.Html5SpecBuilder

/*
* Copyright (c) 2007-2012 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/

package nu.validator.spec.html5;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.TreeMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.sax.HtmlParser;
import nu.validator.saxtree.DocumentFragment;
import nu.validator.saxtree.TreeBuilder;
import nu.validator.spec.Spec;
import nu.validator.xml.AttributesImpl;
import nu.validator.xml.EmptyAttributes;

import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

import com.thaiopensource.xml.util.Name;

public class Html5SpecBuilder implements ContentHandler {

    private static final String NS = "http://www.w3.org/1999/xhtml";

    private static final String SPEC_LINK_URI = System.getProperty(
            "nu.validator.spec.html5-link",
            "http://www.whatwg.org/specs/web-apps/current-work/");

    private static final String SPEC_LOAD_URI = System.getProperty(
            "nu.validator.spec.html5-load",
            "http://www.whatwg.org/specs/web-apps/current-work/");

    private static final Pattern THE = Pattern.compile("^.*The.*$", Pattern.DOTALL);

    private static final Pattern ELEMENT = Pattern.compile("^.*The.*element\\s*$", Pattern.DOTALL);

    private static final Pattern CATEGORIES = Pattern.compile("^\\s*Categories\\s*");

    private static final Pattern CONTEXT = Pattern.compile("^\\s*Contexts\\s+in\\s+which\\s+th(is|ese)\\s+element[s]?\\s+can\\s+be\\s+used:?\\s*");

    private static final Pattern CONTENT_MODEL = Pattern.compile("^\\s*Content\\s+model:?\\s*$");

    private static final Pattern ATTRIBUTES = Pattern.compile("^\\s*Content\\s+attributes:?\\s*$");

    private static final Map<String, String[]> validInputTypesByAttributeName = new TreeMap<String, String[]>();

    static {
        validInputTypesByAttributeName.put("accept", new String[] { "file" });
        validInputTypesByAttributeName.put("alt", new String[] { "image" });
        validInputTypesByAttributeName.put("autocomplete", new String[] {
                "text", "search", "url", "tel", "e-mail", "password", "datetime",
                "date", "month", "week", "time", "datetime-local", "number",
                "range", "color" });
        validInputTypesByAttributeName.put("checked", new String[] { "checkbox", "radio" });
        validInputTypesByAttributeName.put("dirname", new String[] { "text", "search" });
        validInputTypesByAttributeName.put("formaction", new String[] { "submit", "image" });
        validInputTypesByAttributeName.put("formenctype", new String[] { "submit", "image" });
        validInputTypesByAttributeName.put("formmethod", new String[] { "submit", "image" });
        validInputTypesByAttributeName.put("formnovalidate", new String[] { "submit", "image" });
        validInputTypesByAttributeName.put("formtarget", new String[] { "submit", "image" });
        validInputTypesByAttributeName.put("height", new String[] { "image" });
        validInputTypesByAttributeName.put("list", new String[] { "text",
                "search", "url", "tel", "e-mail", "datetime", "date", "month",
                "week", "time", "datetime-local", "number", "range", "color" });
        validInputTypesByAttributeName.put("max", new String[] { "datetime", "date", "month",
                "week", "time", "datetime-local", "number", "range", });
        validInputTypesByAttributeName.put("maxlength", new String[] { "text", "search", "url",
                "tel", "e-mail", "password" });
        validInputTypesByAttributeName.put("min", new String[] { "datetime", "date", "month",
                "week", "time", "datetime-local", "number", "range", });
        validInputTypesByAttributeName.put("multiple", new String[] { "email", "file" });
        validInputTypesByAttributeName.put("pattern", new String[] { "text", "search", "url",
                "tel", "e-mail", "password" });
        validInputTypesByAttributeName.put("placeholder", new String[] { "text", "search", "url",
                "tel", "e-mail", "password", "number" });
        validInputTypesByAttributeName.put("readonly", new String[] {
                "text", "search", "url", "tel", "e-mail", "password", "datetime",
                "date", "month", "week", "time", "datetime-local", "number" });
        validInputTypesByAttributeName.put("required", new String[] {
                "text", "search", "url", "tel", "e-mail", "password", "datetime",
                "date", "month", "week", "time", "datetime-local", "number",
                "checkbox", "radio", "file" });
        validInputTypesByAttributeName.put("size", new String[] { "text", "search", "url", "tel",
                "e-mail", "password" });
        validInputTypesByAttributeName.put("src", new String[] { "image" });
        validInputTypesByAttributeName.put("step", new String[] { "datetime", "date", "month",
                "week", "time", "datetime-local", "number", "range", });
        validInputTypesByAttributeName.put("width", new String[] { "image" });
    }

    private static final Map<String, String> fragmentIdByInputType = new TreeMap<String, String>();

    static {
        fragmentIdByInputType.put("hidden", "#hidden-state-type-hidden");
        fragmentIdByInputType.put("text",
                "#text-type-text-state-and-search-state-type-search");
        fragmentIdByInputType.put("search",
                "#text-type-text-state-and-search-state-type-search");
        fragmentIdByInputType.put("url", "#url-state-type-url");
        fragmentIdByInputType.put("tel", "#telephone-state-type-tel");
        fragmentIdByInputType.put("email", "#e-mail-state-type-email");
        fragmentIdByInputType.put("password", "#password-state-type-password");
        fragmentIdByInputType.put("datetime",
                "#date-and-time-state-type-datetime");
        fragmentIdByInputType.put("date", "#date-state-type-date");
        fragmentIdByInputType.put("month", "#month-state-type-month");
        fragmentIdByInputType.put("week", "#week-state-type-week");
        fragmentIdByInputType.put("time", "#time-state-type-time");
        fragmentIdByInputType.put("datetime-local",
                "#local-date-and-time-state-type-datetime-local");
        fragmentIdByInputType.put("number", "#number-state-type-number");
        fragmentIdByInputType.put("range", "#range-state-type-range");
        fragmentIdByInputType.put("color", "#color-state-type-color");
        fragmentIdByInputType.put("checkbox", "#checkbox-state-type-checkbox");
        fragmentIdByInputType.put("radio", "#radio-button-state-type-radio");
        fragmentIdByInputType.put("file", "#file-upload-state-type-file");
        fragmentIdByInputType.put("submit", "#submit-button-state-type-submit");
        fragmentIdByInputType.put("image", "#image-button-state-type-image");
        fragmentIdByInputType.put("reset", "#reset-button-state-type-reset");
        fragmentIdByInputType.put("button", "#button-state-type-button");
    }

    private enum State {
        AWAITING_HEADING, IN_H4, IN_CODE_IN_H4, AWAITING_ELEMENT_DL, IN_ELEMENT_DL_START, IN_CATEGORIES_DT, CAPTURING_CATEGORIES_DDS, IN_CONTEXT_DT, CAPTURING_CONTEXT_DDS, IN_CONTENT_MODEL_DT, CAPTURING_CONTENT_MODEL_DDS, IN_ATTRIBUTES_DT, CAPTURING_ATTRIBUTES_DDS
    }

    private Locator locator;
   
    private State state = State.AWAITING_HEADING;

    private int captureDepth = 0;

    private String currentId;

    private StringBuilder nameText = new StringBuilder();

    private StringBuilder referenceText = new StringBuilder();

    private StringBuilder attributeText = new StringBuilder();

    private TreeBuilder fragmentBuilder;

    private Name currentName;

    private Map<Name, String> urisByElement = new HashMap<Name, String>();

    private Map<Name, DocumentFragment> categoriesByElement = new HashMap<Name, DocumentFragment>();

    private Map<Name, DocumentFragment> contextsByElement = new HashMap<Name, DocumentFragment>();

    private Map<Name, DocumentFragment> contentModelsByElement = new HashMap<Name, DocumentFragment>();

    private Map<Name, DocumentFragment> attributesByElement = new HashMap<Name, DocumentFragment>();

    private boolean ignoreTextNodes = false;

    public static Spec parseSpec(InputSource in) throws IOException, SAXException {
        HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
        Html5SpecBuilder handler = new Html5SpecBuilder();
        parser.setContentHandler(handler);
        parser.parse(in);
        return handler.buildSpec();
    }

    public static Spec parseSpec() throws IOException, SAXException {
        return parseSpec(new InputSource(SPEC_LOAD_URI));
    }
   
    public static void main(String[] args) throws IOException, SAXException {
        try {
            parseSpec();           
        } catch (SAXParseException e) {
            System.err.printf("Line: %d Col: %d\n", e.getLineNumber(), e.getColumnNumber());
            e.printStackTrace();
        }
    }
   
    public static Spec parseSpec(InputStream html5SpecAsStream) throws IOException, SAXException {
        return parseSpec(new InputSource(html5SpecAsStream));
    }

    private Spec buildSpec() {
        return new Spec(urisByElement, contextsByElement,
                contentModelsByElement, attributesByElement);
    }

    /**
     *
     */
    private Html5SpecBuilder() {
        super();
    }

    public void characters(char[] ch, int start, int length)
            throws SAXException {
        switch (state) {
            case AWAITING_HEADING:
                break;
            case IN_H4:
                referenceText.append(ch, start, length);
                if (nameText.length() != 0) {
                  Matcher m = THE.matcher(referenceText);
                  if (m.matches()) {
                    String ln = nameText.toString().intern();
                    if ("" == ln) {
                      throw new SAXParseException(
                          "Malformed spec: no element "+currentName, locator);
                    }
                    currentName = new Name(NS, ln);
                    if (!urisByElement.containsKey(currentName)) {
                      if (currentId == null) {
                        state = State.AWAITING_HEADING;
                        //                                throw new SAXParseException(
                        //                                        "Malformed spec: no element id.", locator);
                      }
                      urisByElement.put(currentName, SPEC_LINK_URI + "#"
                          + currentId);
                    }
                  }
                }
                break;
            case IN_CODE_IN_H4:
                nameText.append(ch, start, length);
                break;
            case AWAITING_ELEMENT_DL:
                break;
            case IN_ELEMENT_DL_START:
                break;
            case IN_CATEGORIES_DT:
            case IN_CONTEXT_DT:
            case IN_CONTENT_MODEL_DT:
            case IN_ATTRIBUTES_DT:
                referenceText.append(ch, start, length);
                break;
            case CAPTURING_CATEGORIES_DDS:
            case CAPTURING_CONTEXT_DDS:
            case CAPTURING_CONTENT_MODEL_DDS:
            case CAPTURING_ATTRIBUTES_DDS:
                if (ignoreTextNodes) {
                    ignoreTextNodes = false;
                } else {
                    fragmentBuilder.characters(ch, start, length);
                    if (state == State.CAPTURING_ATTRIBUTES_DDS) {
                        attributeText.append(ch, start, length);
                    }
                }
                break;
        }
    }

    public void endDocument() throws SAXException {
        switch (state) {
            case AWAITING_ELEMENT_DL:
            case AWAITING_HEADING:
                // XXX finish
                break;
            case IN_H4:
            case IN_CODE_IN_H4:
            case IN_ELEMENT_DL_START:
            case IN_CATEGORIES_DT:
            case IN_CONTEXT_DT:
            case IN_CONTENT_MODEL_DT:
            case IN_ATTRIBUTES_DT:
            case CAPTURING_CATEGORIES_DDS:
            case CAPTURING_CONTEXT_DDS:
            case CAPTURING_CONTENT_MODEL_DDS:
            case CAPTURING_ATTRIBUTES_DDS:
                throw new SAXException(
                        "Malformed spec: Wrong state for document end.");
        }
    }

    public void endElement(String uri, String localName, String qName)
            throws SAXException {
        switch (state) {
            case AWAITING_HEADING:
                break;
            case IN_H4:
                if ("h4" == localName && NS == uri) {
                    Matcher m = ELEMENT.matcher(referenceText);
                    if (m.matches()) {
                        String ln = nameText.toString().intern();
                        if ("" == ln) {
                            throw new SAXParseException(
                                    "Malformed spec: no element"+currentName, locator);
                        }
                            if (currentId == null) {
                                state = State.AWAITING_HEADING;
//                                throw new SAXParseException(
//                                        "Malformed spec: no element id.", locator);
                            }
                            state = State.AWAITING_ELEMENT_DL;
                    } else {
                        currentId = null;
                        nameText.setLength(0);
                        state = State.AWAITING_HEADING;
                    }
                }
                break;
            case IN_CODE_IN_H4:
                if ("code" == localName && NS == uri) {
                    state = State.IN_H4;
                }
                break;
            case AWAITING_ELEMENT_DL:
                break;
            case IN_ELEMENT_DL_START:
                throw new SAXParseException(
                        "Malformed spec: no children in element dl.", locator);
            case IN_CATEGORIES_DT:
                if ("a" == localName && NS == uri) {
                    Matcher m = CATEGORIES.matcher(referenceText);
                    if (m.matches()) {
                        state = State.CAPTURING_CATEGORIES_DDS;
                        captureDepth = 0;
                        fragmentBuilder = new TreeBuilder(true, true);
                    } else {
                        throw new SAXParseException(
                                "Malformed spec: Expected dt to be categories dt but it was not.", locator);
                    }
                }
                break;
            case IN_CONTEXT_DT:
                if ("a" == localName && NS == uri) {
                    Matcher m = CONTEXT.matcher(referenceText);
                    if (m.matches()) {
                        state = State.CAPTURING_CONTEXT_DDS;
                        captureDepth = 0;
                        fragmentBuilder = new TreeBuilder(true, true);
                    } else {
                      System.err.printf("Line: %d Col: %d\n", locator.getLineNumber(), locator.getColumnNumber());
                        throw new SAXParseException(
                                "Malformed spec at element " + currentName.getLocalName() + " (" + currentId + "): Expected dt to be context dt but it was not.", locator);
                    }
                }
                break;
            case IN_CONTENT_MODEL_DT:
                if ("a" == localName && NS == uri) {
                    Matcher m = CONTENT_MODEL.matcher(referenceText);
                    if (m.matches()) {
                        state = State.CAPTURING_CONTENT_MODEL_DDS;
                        captureDepth = 0;
                        fragmentBuilder = new TreeBuilder(true, true);
                    } else {
                        throw new SAXParseException(
                                "Malformed spec: Expected dt to be content-model dt but it was not.", locator);
                    }
                }
                break;
            case IN_ATTRIBUTES_DT:
                if ("a" == localName && NS == uri) {
                    Matcher m = ATTRIBUTES.matcher(referenceText);
                    if (m.matches()) {
                        state = State.CAPTURING_ATTRIBUTES_DDS;
                        captureDepth = 0;
                        fragmentBuilder = new TreeBuilder(true, true);
                    } else {
                        throw new SAXParseException(
                                "Malformed spec: Expected dt to be content-attributes dt but it was not.", locator);
                    }
                }
                break;
            case CAPTURING_CATEGORIES_DDS:
            case CAPTURING_CONTEXT_DDS:
            case CAPTURING_CONTENT_MODEL_DDS:
            case CAPTURING_ATTRIBUTES_DDS:
                if ("dt" == localName) {
                    break;
                }
                if (captureDepth == 0) {
                    throw new SAXParseException(
                            "Malformed spec: Did not see following dt when capturing dds.", locator);
                }
                captureDepth--;
                String attributeName = attributeText.toString().trim();
                if (state == State.CAPTURING_ATTRIBUTES_DDS
                        && "input".equals(currentName.getLocalName())
                        && "dd".equals(localName)) {
                    listInputTypesForAttribute(attributeName, fragmentBuilder);
                    attributeText.setLength(0);
                }
                fragmentBuilder.endElement(uri, localName, qName);
                break;
        }
    }

    public void endPrefixMapping(String prefix) throws SAXException {
    }

    public void ignorableWhitespace(char[] ch, int start, int length)
            throws SAXException {
    }

    public void processingInstruction(String target, String data)
            throws SAXException {
    }

    public void setDocumentLocator(Locator locator) {
        this.locator = locator;
    }

    public void skippedEntity(String name) throws SAXException {
    }

    public void startDocument() throws SAXException {
        // TODO Auto-generated method stub

    }

    public void startElement(String uri, String localName, String qName,
            Attributes atts) throws SAXException {
        switch (state) {
            case AWAITING_HEADING:
                if ("h4" == localName && NS == uri) {
                    referenceText.setLength(0);
                    currentId = atts.getValue("", "id");
                    currentName = null;
                    state = State.IN_H4;
                }
                break;
            case IN_H4:
                if ("code" == localName && NS == uri) {
                    nameText.setLength(0);
                    state = State.IN_CODE_IN_H4;
                }
                break;
            case IN_CODE_IN_H4:
                break;
            case AWAITING_ELEMENT_DL:
                if ("dl" == localName && NS == uri
                        && "element".equals(atts.getValue("", "class"))) {
                    state = State.IN_ELEMENT_DL_START;
                }
                break;
            case IN_ELEMENT_DL_START:
                if ("dt" == localName && NS == uri) {
                    referenceText.setLength(0);
                    state = State.IN_CATEGORIES_DT;
                } else {
                    throw new SAXParseException("Malformed spec: Expected dt in dl.", locator);
                }
                break;
            case IN_CATEGORIES_DT:
                if ("a" == localName && NS == uri) {
                    state = State.IN_CATEGORIES_DT;
                    break;
                }
            case IN_CONTEXT_DT:
                if ("a" == localName && NS == uri) {
                    state = State.IN_CONTEXT_DT;
                    break;
                }
            case IN_CONTENT_MODEL_DT:
                if ("a" == localName && NS == uri) {
                    state = State.IN_CONTENT_MODEL_DT;
                    break;
                }
            case IN_ATTRIBUTES_DT:
                if ("a" == localName && NS == uri) {
                    state = State.IN_ATTRIBUTES_DT;
                    break;
                }
                throw new SAXParseException(
                        "Malformed spec: Not expecting children in dts.", locator);
            case CAPTURING_CATEGORIES_DDS:
            case CAPTURING_CONTEXT_DDS:
            case CAPTURING_CONTENT_MODEL_DDS:
            case CAPTURING_ATTRIBUTES_DDS:
                if ("dt" == localName && NS == uri && captureDepth == 0) {
                    ignoreTextNodes = true;
                    DocumentFragment fragment = (DocumentFragment) fragmentBuilder.getRoot();
                    fragmentBuilder = null;
                    referenceText.setLength(0);
                    if (state == State.CAPTURING_CATEGORIES_DDS) {
                        categoriesByElement.put(currentName, fragment);
                        state = State.IN_CONTEXT_DT;
                    } else if (state == State.CAPTURING_CONTEXT_DDS) {
                        contextsByElement.put(currentName, fragment);
                        state = State.IN_CONTENT_MODEL_DT;
                    } else if (state == State.CAPTURING_CONTENT_MODEL_DDS) {
                        contentModelsByElement.put(currentName, fragment);
                        state = State.IN_ATTRIBUTES_DT;
                    } else {
                        attributesByElement.put(currentName, fragment);
                        state = State.AWAITING_HEADING;
                    }
                } else {
                    captureDepth++;
                    String href = null;
                    if ("a" == localName && NS == uri
                            && (href = atts.getValue("", "href")) != null) {
                        if (href.startsWith("#")) {
                            href = SPEC_LINK_URI + href;
                        }
                        AttributesImpl attributesImpl = new AttributesImpl();
                        attributesImpl.addAttribute("href", href);
                        fragmentBuilder.startElement(uri, localName, qName,
                                attributesImpl);
                    } else if (state == State.CAPTURING_ATTRIBUTES_DDS
                            && "input".equals(currentName.getLocalName())
                            && "code".equals(localName)
                            ) {
                        AttributesImpl attributesImpl = new AttributesImpl();
                        attributesImpl.addAttribute("class", "inputattrname");
                        fragmentBuilder.startElement(uri, localName, qName,
                                attributesImpl);
                    } else {
                        fragmentBuilder.startElement(uri, localName, qName,
                                EmptyAttributes.EMPTY_ATTRIBUTES);
                    }
                }
                break;
        }
    }

    public void startPrefixMapping(String prefix, String uri)
            throws SAXException {
    }

    private void listInputTypesForAttribute(String attributeName,
            TreeBuilder fragmentBuilder) throws SAXException {
        if (validInputTypesByAttributeName.containsKey(attributeName)
                || "value".equals(attributeName)) {
            addText(" ");
            AttributesImpl attributesImpl = new AttributesImpl();
            attributesImpl.addAttribute("class", "inputattrtypes " + attributeName);
            fragmentBuilder.startElement(NS, "span", "span", attributesImpl);
            addText("when ");
            fragmentBuilder.startElement(NS, "code", "code", EmptyAttributes.EMPTY_ATTRIBUTES);
            addText("type");
            fragmentBuilder.endElement(NS, "code", "code");
            addText(" is ");
            if ("value".equals(attributeName)) {
                addText("not ");
                addHyperlink("file", SPEC_LINK_URI
                        + fragmentIdByInputType.get("file"));
                addText(" or ");
                addHyperlink("image", SPEC_LINK_URI
                        + fragmentIdByInputType.get("image"));
            } else {
                String[] typeNames = validInputTypesByAttributeName.get(attributeName);
                int typeCount = typeNames.length;
                for (int i = 0; i < typeCount; i++) {
                    String typeName = typeNames[i];
                    if (i > 0) {
                        addText(" ");
                    }
                    if (typeCount > 1 && i == typeCount - 1) {
                        addText("or ");
                    }
                    addHyperlink(typeName, SPEC_LINK_URI
                            + fragmentIdByInputType.get(typeName));
                    if (i < typeCount - 1 && typeCount > 2) {
                        addText(",");
                    }
                }
            }
            fragmentBuilder.endElement(NS, "span", "span");
        } else {
            AttributesImpl attributesImpl = new AttributesImpl();
            attributesImpl.addAttribute("class", "inputattrtypes");
            fragmentBuilder.startElement(NS, "span", "span", attributesImpl);
            fragmentBuilder.endElement(NS, "span", "span");
        }
    }

    private void addText(String text) throws SAXException {
        char[] ch = text.toCharArray();
        fragmentBuilder.characters(ch, 0, ch.length);
    }

    private void addHyperlink(String text, String href) throws SAXException {
        AttributesImpl attributesImpl = new AttributesImpl();
        attributesImpl.addAttribute("href", href);
        fragmentBuilder.startElement(NS, "a", "a", attributesImpl);
        addText(text);
        fragmentBuilder.endElement(NS, "a", "a");
    }

}
TOP

Related Classes of nu.validator.spec.html5.Html5SpecBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.