Package org.pegdown

Source Code of org.pegdown.GaidenParser

/*
* Copyright 2014 the original author or authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.pegdown;

import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.Source;
import net.htmlparser.jericho.Tag;
import org.apache.commons.lang3.StringUtils;
import org.parboiled.Context;
import org.parboiled.Rule;
import org.parboiled.annotations.Cached;
import org.parboiled.annotations.DontSkipActionsInPredicates;
import org.parboiled.support.StringBuilderVar;
import org.parboiled.support.StringVar;
import org.parboiled.support.Var;
import org.pegdown.ast.*;

import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@SuppressWarnings({"InfiniteRecursion"})
public class GaidenParser extends Parser {

    private static Pattern MARKDOWN_ATTRIBUTE_PATTERN = Pattern.compile("(1|span|block)");
    private static List<String> CONTAIN_SPAN_ELEMENTS = Arrays.asList("p", "h1", "h2", "h3", "h4", "h5", "h6", "li", "dd", "dt", "td", "th", "legend", "address");

    public GaidenParser() {
        super(ALL & ~HARDWRAPS, PegDownProcessor.DEFAULT_MAX_PARSING_TIME, DefaultParseRunnerProvider);
    }

    public Rule SetextHeading1() {
        Var<GaidenHeaderNode> gaidenHeaderNodeVar = new Var<>();
        return Sequence(
            SetextInline(), push(gaidenHeaderNodeVar.setAndGet(new GaidenHeaderNode(1, popAsNode()))),
            ZeroOrMore(SetextInline(), addAsChild()),
            Sp(),
            Optional(SpecialAttributes(), gaidenHeaderNodeVar.get().setSpecialAttributes((SpecialAttributesNode) pop())),
            Sp(),
            Newline(), NOrMore('=', 3), Newline()
        );
    }

    public Rule SetextHeading2() {
        Var<GaidenHeaderNode> gaidenHeaderNodeVar = new Var<>();
        return Sequence(
            SetextInline(), push(gaidenHeaderNodeVar.setAndGet(new GaidenHeaderNode(2, popAsNode()))),
            ZeroOrMore(SetextInline(), addAsChild()),
            Sp(),
            Optional(SpecialAttributes(), gaidenHeaderNodeVar.get().setSpecialAttributes((SpecialAttributesNode) pop())),
            Sp(),
            Newline(), NOrMore('-', 3), Newline()
        );
    }

    public Rule SetextInline() {
        return Sequence(
            TestNot(Endline()),
            TestNot(Sp(), Optional(SpecialAttributes()), Sp(), Endline()),
            Inline()
        );
    }

    public Rule AtxHeading() {
        Var<GaidenHeaderNode> gaidenHeaderNodeVar = new Var<>();
        return Sequence(
            FirstOf("######", "#####", "####", "###", "##", "#"),
            push(gaidenHeaderNodeVar.setAndGet(new GaidenHeaderNode(match().length()))),
            Sp(),
            OneOrMore(AtxInline(), addAsChild()),
            Sp(),
            ZeroOrMore('#'),
            Sp(),
            Optional(SpecialAttributes(), gaidenHeaderNodeVar.get().setSpecialAttributes((SpecialAttributesNode) pop())),
            Sp(),
            Newline()
        );
    }

    public Rule AtxInline() {
        return Sequence(
            TestNot(Newline()),
            TestNot(Sp(), ZeroOrMore('#'), Sp(), Optional(SpecialAttributes()), Sp(), Newline()),
            Inline()
        );
    }

    @Cached
    public Rule ExplicitLink(boolean image) {
        return Sequence(
            Spn1(), '(', Sp(),
            LinkSource(),
            Spn1(), FirstOf(LinkTitle(), push("")),
            Sp(), ')',
            FirstOf(SpecialAttributes(), push(new SpecialAttributesNode())),
            push(image ?
                    new GaidenExpImageNode((SpecialAttributesNode) pop(), popAsString(), popAsString(), popAsNode()) :
                    new GaidenExpLinkNode((SpecialAttributesNode) pop(), popAsString(), popAsString(), popAsNode())
            )
        );
    }

    public Rule Reference() {
        Var<ReferenceNode> ref = new Var<>();
        return NodeSequence(
            NonindentSpace(),
            Label(),
            push(ref.setAndGet(new GaidenReferenceNode(popAsNode()))),
            ':',
            Spn1(),
            RefSrc(ref),
            Sp(),
            Optional(RefTitle(ref)),
            Sp(),
            Optional(SpecialAttributes(), ((GaidenReferenceNode) ref.get()).setSpecialAttributesNode((SpecialAttributesNode) pop())),
            Sp(),
            Newline(),
            ZeroOrMore(BlankLine()),
            references.add(ref.get())
        );
    }

    public Rule RefTitle(char open, char close, Var<ReferenceNode> ref) {
        return Sequence(
            open,
            ZeroOrMore(TestNot(close, Sp(), Optional(SpecialAttributes()), Sp(), Newline()), NotNewline(), ANY),
            ref.get().setTitle(match()),
            close
        );
    }

    public Rule FencedCodeBlock() {
        StringBuilderVar text = new StringBuilderVar();
        Var<Integer> markerLength = new Var<>();
        return NodeSequence(
            CodeFence(markerLength),
            TestNot(CodeFence(markerLength)), // prevent empty matches
            ZeroOrMore(BlankLine(), text.append('\n')),
            OneOrMore(TestNot(Newline(), CodeFence(markerLength)), ANY, text.append(matchedChar())),
            Newline(),
            push(new GaidenVerbatimNode(text.appended('\n').getString(), (SpecialAttributesNode) pop())),
            CodeFence(markerLength), drop()
        );
    }

    @Cached
    public Rule CodeFence(Var<Integer> markerLength) {
        Var<String> type = new Var<>();
        Var<SpecialAttributesNode> specialAttributesNodeVar = new Var<>();
        return Sequence(
            FirstOf(NOrMore('~', 3), NOrMore('`', 3)),
            (markerLength.isSet() && matchLength() == markerLength.get()) ||
                (markerLength.isNotSet() && markerLength.set(matchLength())),
            Sp(),
            FirstOf(
                SpecialAttributes(),
                Sequence(
                    ZeroOrMore(TestNot(Newline()), ANY),
                    type.set(match()),
                    specialAttributesNodeVar.setAndGet(new SpecialAttributesNode()).addClass(type.get()),
                    push(specialAttributesNodeVar.get())
                )
            ), // GFM code type identifier
            Sp(),
            Newline()
        );
    }

    public Rule SpecialAttributes() {
        Var<SpecialAttributesNode> specialAttributesNodeVar = new Var<>();
        return Sequence(
            Test('{', Sp(), OneOrMore(FirstOf(IdAttribute(), ClassAttribute()), Sp()), '}'),
            push(specialAttributesNodeVar.setAndGet(new SpecialAttributesNode())),
            '{',
            Sp(),
            OneOrMore(
                FirstOf(IdAttribute(), ClassAttribute()),
                match().startsWith("#") ? specialAttributesNodeVar.get().setId(match().substring(1)) : specialAttributesNodeVar.get().addClass(match().substring(1)),
                Sp()
            ),
            '}'
        );
    }

    public Rule IdAttribute() {
        return Sequence("#", OneOrMore(FirstOf(AnyOf("-_"), NormalChar())));
    }

    public Rule ClassAttribute() {
        return Sequence(".", OneOrMore(FirstOf(AnyOf("-_"), NormalChar())));
    }

    //************* HTML BLOCK ****************

    public Rule HtmlBlock() {
        return NodeSequence(
            FirstOf(HtmlBlockInTags(), HtmlComment(), HtmlBlockSelfClosing()),
            push(createHtmlBlockNode(ext(SUPPRESS_HTML_BLOCKS) ? "" : match())),
            OneOrMore(BlankLine())
        );
    }

    @Cached
    public Rule HtmlTagBlock(StringVar tagName) {
        return FirstOf(NonIndentHtmlTagBlock(tagName), IndentHtmlTagBlock(tagName));
    }

    @Cached
    public Rule NonIndentHtmlTagBlock(StringVar tagName) {
        return Sequence(
            HtmlBlockOpen(tagName),
            TestNot(BlankLine()),
            ZeroOrMore(
                FirstOf(
                    NonIndentHtmlTagBlock(tagName),
                    Sequence(TestNot(NonIndentHtmlBlockClose(tagName)), ANY)
                )
            ),
            NonIndentHtmlBlockClose(tagName)
        );
    }

    @Cached
    public Rule IndentHtmlTagBlock(StringVar tagName) {
        return Sequence(
            HtmlBlockOpen(tagName),
            BlankLine(),
            ZeroOrMore(TestNot(IndentHtmlBlockClose(tagName)), ANY),
            IndentHtmlBlockClose(tagName)
        );
    }

    @DontSkipActionsInPredicates
    public Rule NonIndentHtmlBlockClose(StringVar tagName) {
        return Sequence('<', Spn1(), '/', OneOrMore(Alphanumeric()), match().equals(tagName.get()), Spn1(), '>');
    }

    @DontSkipActionsInPredicates
    public Rule IndentHtmlBlockClose(StringVar tagName) {
        return Sequence(Newline(), '<', Spn1(), '/', OneOrMore(Alphanumeric()), match().equals(tagName.get()), Spn1(), '>');
    }

    public Node createHtmlBlockNode(String text) {
        Source source = new Source(text);
        return createMarkdownInsideHtmlBlockNode(text, 0, source, new SuperNode());
    }

    private Node createMarkdownInsideHtmlBlockNode(String text, int pos, Source source, SuperNode superNode) {
        Element markdownElement = source.getNextElement(pos, "markdown", MARKDOWN_ATTRIBUTE_PATTERN);
        if (markdownElement == null) {
            superNode.getChildren().add(new HtmlBlockNode(text.substring(pos)));
            return superNode;
        }

        superNode.getChildren().add(new HtmlBlockNode(text.substring(pos, markdownElement.getBegin())));

        String indent = getIndent(text, markdownElement);
        Tag endTag = getEndTag(text, source, markdownElement, indent);
        String innerText = getInnerText(text, markdownElement, endTag, indent);
        List<Node> children = parseInnerText(innerText);

        superNode.getChildren().add(new MarkdownInsideHtmlBlockNode(
            markdownElement.getStartTag().toString(),
            children,
            endTag.toString()
        ));

        return createMarkdownInsideHtmlBlockNode(text, endTag.getEnd(), source, superNode);
    }

    private List<Node> parseInnerText(String innerText) {
        Context<Object> context = getContext();
        RootNode innerRoot = parseInternal(innerText.toCharArray());
        setContext(context);
        return withIndicesShifted(innerRoot, (Integer) peek()).getChildren();
    }

    private String getInnerText(String text, Element markdownElement, Tag endTag, String indent) {
        StringBuilder innerTextBuilder = new StringBuilder();
        String innerText = text.substring(markdownElement.getStartTag().getEnd(), endTag.getBegin());

        if (CONTAIN_SPAN_ELEMENTS.contains(markdownElement.getName())) {
            innerTextBuilder.append(innerText.trim().replaceAll("\n", " "));
            if (markdownElement.getAttributeValue("markdown").equals("block")) {
                innerTextBuilder.append("\n\n");
            }
        } else {
            innerTextBuilder.append(removeIndent(innerText, indent));
            innerTextBuilder.append("\n\n");
        }

        return innerTextBuilder.toString();
    }

    private Tag getEndTag(String text, Source source, Element markdownElement, String indent) {
        if (indent != null) {
            Pattern endTagPattern = Pattern.compile("\n(" + indent + "</" + markdownElement.getName() + "\\s*>)");
            Matcher endTagMatcher = endTagPattern.matcher(text);
            if (endTagMatcher.find(markdownElement.getStartTag().getEnd())) {
                return source.getNextEndTag(endTagMatcher.start(1), markdownElement.getStartTag().getName());
            }
        }

        return markdownElement.getEndTag();
    }

    private String removeIndent(String text, String indent) {
        if (StringUtils.isEmpty(indent)) {
            return text;
        }

        return text.replaceAll("^" + indent, "").replaceAll("\n" + indent, "\n");
    }

    private String getIndent(String text, Element markdownElement) {
        Pattern beginPattern = Pattern.compile("^( *)" + markdownElement.getStartTag().toString());
        Matcher beginMatcher = beginPattern.matcher(text);
        if (beginMatcher.find()) {
            return beginMatcher.group(1);
        }

        Pattern innerPattern = Pattern.compile("\n( +)$");
        Matcher innerMatcher = innerPattern.matcher(text.substring(0, markdownElement.getBegin()));
        if (innerMatcher.find()) {
            return innerMatcher.group(1);
        }

        return null;
    }
}
TOP

Related Classes of org.pegdown.GaidenParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.