Source Code of org.python.indexer.demos.DocStringParser

/**
 * Copyright 2009, Google Inc.  All rights reserved.
 * Licensed to PSF under a Contributor Agreement.
 */
package org.python.indexer.demos;


import org.python.indexer.Indexer;
import org.python.indexer.NBinding;
import org.python.indexer.Ref;
import org.python.indexer.Scope;
import org.python.indexer.StyleRun;
import org.python.indexer.ast.NStr;


import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


/**
 * Scans doc strings looking for interesting stuff to highlight or hyperlink.
 */
class DocStringParser {


    /**
     * Only try to resolve possible qnames of at least this length.
     * Helps cut down on noise.
     */
    private static final int MIN_TYPE_NAME_LENGTH = 4;


    /**
     * Matches an unqualified Python identifier.
     */
    private static final String IDENT = "[a-zA-Z_][a-zA-Z0-9_]*";


    /**
     * Matches probable type names.  Does loose matching; caller must do more checks.
     */
    private static final Pattern TYPE_NAME =
            Pattern.compile(
                // any two or more identifiers joined with dots.
                IDENT + "\\." + IDENT + "(?:\\." + IDENT + ")*\\b"


                // a capitalized word that isn't all-caps
                + "|\\b[A-Z][a-zA-Z0-9_]*?[a-z][a-zA-Z0-9_]*\\b"


                // an __identifier__
                + "|(?<![a-zA-Z0-9_])?__[a-zA-Z][a-zA-Z_]*?__");


    private boolean resolveReferences = true;
    private int docOffset;  // doc string start offset
    private String docString;  // the doc string text
    private NStr docNode;  // the node
    private Scope scope;  // scope for name lookups
    private String file;  // file containing the doc string


    private Set<Integer> offsets = new HashSet<Integer>();  // styles we've already added
    private List<StyleRun> styles = new ArrayList<StyleRun>();
    private Linker linker;


    /**
     * Constructor.
     * @param start beginning 0-based file offset of the doc string
     * @param comment the doc string or doc-comment text
     * @param node the AST node for the doc string
     */
    public DocStringParser(String comment, NStr node, Linker linker) {
        docOffset = node.start();
        docString = comment;
        docNode = node;
        scope = node.getEnclosingNamespace();
        file = node.getFile();
        this.linker = linker;
    }


    /**
     * Configures whether to highlight syntactically or semantically.
     *
     * @param resolve {@code true} to do name resolution, {@code false}
     *        to guess purely based on syntax in the doc string.
     *        Pass {@code false} if you're using the highlighter to
     *        syntax-highlight a file (i.e. no code graph or indexing.)
     */
    public void setResolveReferences(boolean resolve) {
        resolveReferences = resolve;
    }


    public boolean isResolvingReferences() {
        return resolveReferences;
    }


    /**
     * Main entry point.
     *
     * @return the non-{@code null} but possibly empty list of additional
     *         styles for the doc string.
     */
    public List<StyleRun> highlight() {
        if (resolveReferences) {
            scanCommentForTypeNames();
        }


        return styles;
    }


    /**
     * Try to match potential type names against the code graph.
     * If any match, graph references and styles are added for them.
     */
    private void scanCommentForTypeNames() {
        Matcher m = TYPE_NAME.matcher(docString);
        while (m.find()) {
            String qname = m.group();
            int beg = m.start() + docOffset;


            // If we already added a style here, skip this one.
            if (offsets.contains(beg)) {
                continue;
            }


            // Arbitrarily require them to be at least N chars, to reduce noise.
            if (qname.length() < MIN_TYPE_NAME_LENGTH) {
                continue;
            }


            checkForReference(beg, qname);
        }
    }


    /**
     * Look for the name in the current scope.  If found, and its
     * qname is a valid binding in the graph, record a reference.
     */
    private void checkForReference(int offset, String qname) {
        NBinding nb;
        if (qname.indexOf('.') == -1) {
            nb = scope.lookup(qname);
            if (nb == null) {
                nb = Indexer.idx.globaltable.lookup(qname);
            }
        } else {
            nb = Indexer.idx.lookupQname(qname);
        }


        if (nb != null) {
            linker.processRef(new Ref(file, offset, qname), nb);
        }
    }


    private void addStyle(int beg, int len, NBinding nb) {
        addStyle(beg, len, StyleRun.Type.TYPE_NAME);
        offsets.add(beg);
    }


    private void addStyle(int beg, int len, StyleRun.Type type) {
        styles.add(new StyleRun(type, beg, len));
        offsets.add(beg);
    }
}
Source Code of org.python.indexer.demos.DocStringParser

Related Classes of org.python.indexer.demos.DocStringParser