Package org.jnode.shell

Source Code of org.jnode.shell.PathnamePattern

/*
* $Id$
*
* Copyright (C) 2003-2014 JNode.org
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; If not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.jnode.shell;

import java.io.File;
import java.io.FilenameFilter;
import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Loosely modeled on java.util.regex.Pattern, this class provides a simple
* mechanism for expanding UNIX-style pathname patterns into a list of pathnames
* for filesystem objects.
*
* Depending on the flags supplied when a pattern is compiled, the following
* pattern constructs are available:
* <ul>
* <li>A star ("*") matches zero or more characters.
* <li>A question mark ("?") matches exactly one character.
* <li>A matching pair of square brackets ("[]") denote a character class. The
* character class "[abz]" matches one of "a", "b" or "z". Ranges are allowed,
* so that "[0-9A-F]" matches a hexadecimal digit. If the first character of a
* character class is "!" or "^", the character class is negated; i.e.
* "[^a-zA-Z]" matches any character that is not an ASCII letter.
* <li>A single quote ("'") causes characters up to the next "'" to be treated
* as literal characters.
* <li>A backslash ("\") causes the next character (even a single quote) to be
* treated as a literal character; i.e. any special meaning.
* </ul>
* <p>
* Patterns are first split into file components on "/" boundaries, then the
* sub-patterns are used to match names in a given directory. Neither quoting or
* escaping affect "/" interpretation, and a "/" in a character class causes it
* to be treated as literal characters.
* <p>
* The pattern expander treats "dot" files (i.e. files starting with ".") as
* hidden. A hidden file is only matched when the pattern has an explicit "." as
* the first character of a component. Thus the pattern "*" does not match "."
* or "..", but the pattern ".*" does.
* <p>
* This class also exposes a static method for compiling patterns in the UNIX
* shell-style syntax to Java {@link Pattern} objects.  The resulting
* objects allow you to use the shell-style syntax for matching arbitrary
* strings.  The pathname-specific matching behaviors of PathnamePattern
* such as implicit anchoring, and the handling of '/' in character classes
* are supported via flags.
* <p>
* TODO:
* <ul>
* <li>Provide a method that returns a "lazy" pathname iterator for cases where
* we don't want to build a (potentially huge) in-memory list of pathnames.
* <li>Support expansions of ~ and {..,..} patterns.  (Note that the latter are
* not part of the POSIX specification.)
* <li>Add a parameter (or parameters) to allow the caller to limit the size of
* the result list.
* </ul>
*
* @author crawley@jnode
*/
public class PathnamePattern {

    /**
     * When set, this flag causes the pathname list returned by 'expand' to be
     * lexically sorted.
     */
    public static final int SORT_MATCHES = 0x01;

    /**
     * When set, this flag enables UNIX like handling of hidden files. File and
     * directories whose name starts with a "." are only matched if the first
     * character in the pattern is a ".".
     */
    public static final int HIDE_DOT_FILENAMES = 0x02;

    /**
     * When set, this flag causes the '.' and '..' directories to be included in
     * domain of objects to be matched. (You probably don't want to set this
     * flag without setting HIDE_DOT_FILENAMES as well. Under normal
     * circumstances a user doesn't expect '.' and '..' to be returned in a
     * pattern match.)
     */
    public static final int INCLUDE_DOT_AND_DOTDOT = 0x04;

    /**
     * When set, this flag causes a '\' in a pattern to escape the next
     * character. For example, the sequence "\*" in a pattern will match a "*"
     * character in a filename.
     */
    public static final int BACKSLASH_ESCAPES = 0x08;

    /**
     * When set, this flag causes characters inside matching single-quote
     * characters to be match literal characters in the pathname. Only a '\' is
     * unaffected. Thus "'a*c'" will match the file "a*c", but "'a\'c'" will
     * match "a'c"; i.e. a filename containing a single-quote character.
     */
    public static final int SINGLE_QUOTE_ESCAPES = 0x10;
   
    /**
     * When set, this flag causes characters inside matching double-quote
     * characters to be match literal characters in the pathname. Only a '\' is
     * unaffected. Thus ""a*c"" will match the file "a*c", but ""a\"c"" will
     * match "a"c"; i.e. a filename containing a double-quote character.
     */
    public static final int DOUBLE_QUOTE_ESCAPES = 0x20;

    /**
     * When set, this flag causes the [...] character class syntax to be
     * recognized.
     */
    public static final int CHARACTER_CLASSES = 0x40;
   
    /**
     * When set, the pattern is anchored to the left of the string to be searched.
     * This is set implicitly by the pathname matching methods.
     */
    public static final int ANCHOR_LEFT = 0x80;
   
    /**
     * When set, the pattern is anchored to the right of the string to be searched. 
     * This is set implicitly by the pathname matching methods.
     */
    public static final int ANCHOR_RIGHT = 0x100;
   
    /**
     * When set, '*' is eager, matching as many characters as possible. 
     * This is set implicitly by the pathname matching methods.
     * matching is always eager.
     */
    public static final int EAGER = 0x200;
   
    /**
     * When set, an unescaped '/' inside a character class causes the entire class
     * to be interpreted as a literal character sequence. 
     * This is set implicitly by the pathname matching methods.
     */
    public static final int SLASH_DISABLES_CHARACTER_CLASSES = 0x400;
   

    public static final int DEFAULT_FLAGS = SORT_MATCHES | HIDE_DOT_FILENAMES
            | INCLUDE_DOT_AND_DOTDOT | BACKSLASH_ESCAPES | SINGLE_QUOTE_ESCAPES
            | DOUBLE_QUOTE_ESCAPES | CHARACTER_CLASSES;

    private static final boolean DEBUG = false;

    private final String source;
    private ArrayList<Object> patterns;
    private boolean isAbsolute;
    private char lastQuote;

    // Use a weak reference for the pattern cache to avoid storage leakage.
    private static WeakReference<HashMap<String, PathnamePattern>> cache;

    private PathnamePattern(String source) {
        this.source = source;
        this.patterns = new ArrayList<Object>();
    }

    /**
     * Expand a pattern, returning the pathnames of the file system objects that
     * it matches.
     *
     * @param current this is the notional current directory for expanding a
     *        relative pattern.
     * @return the lest of matching pathnames. The names will be absolute if the
     *         original pattern was absolute, and relative if not.
     */
    public LinkedList<String> expand(File current) {
        return doGlob(isAbsolute ? new File(File.separator) : current, 0,
                DEFAULT_FLAGS);
    }

    /**
     * Expand a pattern, returning the pathnames of the file system objects that
     * it matches.
     *
     * @param current this is the notional current directory for expanding a
     *        relative pattern.
     * @param flags these flags control the behavior of the expander.
     * @return the lest of matching pathnames. The names will be absolute if the
     *         original pattern was absolute, and relative if not.
     */
    public LinkedList<String> expand(File current, int flags) {
        return doGlob(isAbsolute ? new File(File.separator) : current, 0, flags);
    }

    /**
     * This method recursively visits each element of the compiled pattern,
     * building a list of the pathname strings for FS objects that match it.
     *
     * @param current the current file context for expansion.
     * @param pos our index into the 'pattern' array.
     * @return the list of partial pathnames matched in the context of
     *         'current'.
     */
    private LinkedList<String> doGlob(File current, int pos, int flags) {
        LinkedList<File> matches = new LinkedList<File>();
        LinkedList<String> res = new LinkedList<String>();
        if (patterns.get(pos) instanceof String) {
            File file = new File(current, (String) patterns.get(pos));
            if (file.exists()) {
                matches.add(file);
            }
        } else {
            final Pattern pat = (Pattern) patterns.get(pos);
            final Matcher mat = pat.matcher("");
            final FilenameFilter filter = new FilenameFilter() {
                public boolean accept(File dir, String name) {
                    return mat.reset(name).matches();
                }
            };
            // A directory's "." and ".." entries are not returned by
            // File.listFiles so we have to match / add them explicitly.
            if ((flags & INCLUDE_DOT_AND_DOTDOT) != 0) {
                if (filter.accept(current, ".")) {
                    matches.add(new File(current, "."));
                }
                if (filter.accept(current, "..")) {
                    matches.add(new File(current, ".."));
                }
            }
            // Process the 'regular' directory contents
            for (File file : current.listFiles(filter)) {
                matches.add(file);
            }
            if ((flags & SORT_MATCHES) == SORT_MATCHES) {
                Collections.sort(matches);
            }
        }

        for (File match : matches) {
            String name = match.getName();
            if (pos == 0 && isAbsolute) {
                name = File.separator + name;
            }
            if (pos == patterns.size() - 1) {
                res.add(name);
            } else if (match.isDirectory()) {
                LinkedList<String> subList = doGlob(match, pos + 1, flags);
                for (String sub : subList) {
                    res.add(name + File.separator + sub);
                }
            }
        }
        return res;
    }

    /**
     * Create and compile a pathname pattern using the default flags.
     *
     * @param source the pattern source
     * @return a compiler pattern for the source.
     */
    public static PathnamePattern compilePathPattern(String source) {
        return compilePathPattern(source, DEFAULT_FLAGS);
    }

    /**
     * Create and compile a pathname pattern. The flags determine which pattern
     * meta-characters are recognized by the compiled pattern. If a pattern
     * meta-character is not recognized, it will be treated as a literal
     * character.
     *
     * @param source the pattern source
     * @param flags pattern compilation flags
     * @return a compiler pattern for the source.
     */
    public static PathnamePattern compilePathPattern(String source, int flags) {
        String key = flags + ":" + source;
        synchronized (PathnamePattern.class) {
            HashMap<String, PathnamePattern> cp;
            if (cache != null && (cp = cache.get()) != null) {
                PathnamePattern pat = cp.get(key);
                if (pat != null) {
                    return pat;
                }
            }
        }

        PathnamePattern pp = new PathnamePattern(source);
        String[] parts = source.split(File.separator + "+", -1);
        for (int i = 0; i < parts.length; i++) {
            String part = parts[i];
            Object pat = (isPattern(part, flags)) ?
                compilePosixShellPattern(part,
                        flags | ANCHOR_LEFT | ANCHOR_RIGHT | EAGER | SLASH_DISABLES_CHARACTER_CLASSES,
                        pp) : part;
            if (pat == null || pat.toString().length() == 0) {
                if (i == 0) {
                    pp.isAbsolute = true;
                }
            } else {
                pp.patterns.add(pat);
            }
            if (DEBUG) {
                System.err.println(i + ": " + pat);
            }
        }
        if (pp.lastQuote != 0) {
            throw new IllegalArgumentException("Unbalanced quotes in pattern");
        }
        synchronized (PathnamePattern.class) {
            HashMap<String, PathnamePattern> cp = null;
            if (cache == null || (cp = cache.get()) == null) {
                cp = new HashMap<String, PathnamePattern>();
                cache = new WeakReference<HashMap<String, PathnamePattern>>(cp);
            }
            cp.put(key, pp);
        }
        return pp;
    }
   
    /**
     * Clear the pattern cache
     */
    public static void clearCache() {
        synchronized (PathnamePattern.class) {
            cache = null;
        }
    }

    /**
     * Provide a fast determination if a string requires pattern expansion,
     * assuming the default pattern flags.
     *
     * @param str the string to be examined
     * @return <code>true</code> if the string is potentially a pattern; i.e.
     *         if it contains '*', '?' or '[' characters.
     */
    public static boolean isPattern(String str) {
        return isPattern(str, DEFAULT_FLAGS);
    }

    /**
     * Provide a fast determination if a string requires pattern expansion.
     *
     * @param str the string to be examined
     * @param flags pattern compilation flags
     * @return <code>true</code> if the string is potentially a pattern; i.e.
     *         if it contains meta-characters enabled in the compilation flags.
     */
    public static boolean isPattern(String str, int flags) {
        int len = str.length();
        for (int i = 0; i < len; i++) {
            switch (str.charAt(i)) {
                case '*':
                case '?':
                    return true;
                case '[':
                    if ((flags & CHARACTER_CLASSES) != 0) {
                        return true;
                    }
                    break;
                case '\\':
                    if ((flags & BACKSLASH_ESCAPES) != 0) {
                        return true;
                    }
                    break;
                case '\'':
                    if ((flags & SINGLE_QUOTE_ESCAPES) != 0) {
                        return true;
                    }
                    break;
                case '\"':
                    if ((flags & DOUBLE_QUOTE_ESCAPES) != 0) {
                        return true;
                    }
                    break;
                default:
            }
        }
        return false;
    }

    /**
     * Turn a string in POSIX shell pattern syntax into a regex.  This method
     * generates a {@link Pattern} that can be matched against a character sequence.
     *
     * @param pattern the pattern in shell syntax.
     * @param flags compilation flags
     * @return the corresponding regex as a {@link Pattern}.
     */
    public static Pattern compilePosixShellPattern(CharSequence pattern, int flags) {
        return compilePosixShellPattern(pattern, flags, null);
    }
   
    /**
     * @param pattern the pattern in shell syntax.
     * @param flags compilation flags
     * @param pp if not {@code null},
     * @return the corresponding regex as a {@link Pattern}.
     */
    private static Pattern compilePosixShellPattern(
            CharSequence pattern, int flags, PathnamePattern pp) {
        // This method needs to be really careful to avoid 'ordinary' characters
        // in the source pattern being accidentally mapped to Java regex
        // meta-characters.
        int len = pattern.length();
        StringBuilder sb = new StringBuilder(len);
        char quote = (pp == null) ? ((char) 0) : pp.lastQuote;
        boolean eager = (flags & EAGER) != 0;
        for (int i = 0; i < len; i++) {
            char ch = pattern.charAt(i);
            switch (ch) {
                case '?':
                    if (quote != 0) {
                        sb.append(protect(ch));
                    } else if (i == 0 && (flags & HIDE_DOT_FILENAMES) != 0) {
                        sb.append("[^\\.]");
                    } else {
                        sb.append(".");
                    }
                    break;
                case '*':
                    if (quote != 0) {
                        sb.append(protect(ch));
                    } else if (i == 0 && (flags & HIDE_DOT_FILENAMES) != 0) {
                        sb.append("(|[^\\.]").append(eager ? ".*" : ".*?").append(")");
                    } else {
                        sb.append(eager ? ".*" : ".*?");
                    }
                    break;
                case '[':
                    if ((flags & CHARACTER_CLASSES) != 0) {
                        int j;
                        StringBuilder sb2 = new StringBuilder(len);
                        boolean charClassOK = true;
                    LOOP:
                        for (j = i + 1; j < len; j++) {
                            char ch2 = pattern.charAt(j);
                            switch (ch2) {
                                case ']':
                                    break LOOP;
                                case '\\':
                                    sb2.append(protect(pattern.charAt(++j)));
                                    break;
                                case '!':
                                case '^':
                                    sb2.append((j == i + 1) ? "^" : protect(ch2));
                                    break;
                                case '-':
                                    sb2.append('-');
                                    break;
                                case '/':
                                    sb2.append(protect(ch2));
                                    charClassOK = ((flags & SLASH_DISABLES_CHARACTER_CLASSES) == 0);
                                    break;
                                default:
                                    sb2.append(protect(ch2));
                            }
                        }
                        if (j == len) {
                            sb.append(protect('['));
                        } else if (!charClassOK) {
                            sb.append(protect('[')).append(sb2).append(protect(']'));
                            i = j;
                        } else {
                            sb.append("[").append(sb2).append(']');
                            i = j;
                        }
                    } else {
                        sb.append(protect(ch));
                    }
                    break;
                case '\\':
                    if ((flags & BACKSLASH_ESCAPES) != 0) {
                        sb.append(protect(pattern.charAt(++i)));
                    } else {
                        sb.append(protect(ch));
                    }
                    break;
                case '\'':
                    if ((flags & SINGLE_QUOTE_ESCAPES) != 0) {
                        if (quote == '\'') {
                            quote = 0;
                        } else if (quote == 0) {
                            quote = '\'';
                        } else {
                            sb.append(protect(ch));
                        }
                    } else {
                        sb.append(protect(ch));
                    }
                    break;
                case '\"':
                    if ((flags & DOUBLE_QUOTE_ESCAPES) != 0) {
                        if (quote == '\"') {
                            quote = 0;
                        } else if (quote == 0) {
                            quote = '\"';
                        } else {
                            sb.append(protect(ch));
                        }
                    } else {
                        sb.append(protect(ch));
                    }
                    break;
                default:
                    sb.append(protect(ch));
            }
        }
        if (pp != null) {
            pp.lastQuote = quote;
        }
        if (sb.length() == 0) {
            return null;
        }
        if ((flags & ANCHOR_LEFT) != 0) {
            sb.insert(0, '^');
        }
        if ((flags & ANCHOR_RIGHT) != 0) {
            sb.append('$');
        }
        return Pattern.compile(sb.toString());
    }

    private static String protect(char ch) {
        switch (ch) {
            case '.':
            case '|':
            case '[':
            case ']':
            case '(':
            case ')':
            case '+':
            case '*':
            case '?':
            case '$':
            case '{':
            case '}':
            case '^':
            case '\\':
                return "\\" + ch;
            default:
                return Character.toString(ch);
        }
    }

    public String toString() {
        return source;
    }

    public String toRegexString() {
        StringBuilder sb = new StringBuilder();
        sb.append("PathnamePattern{source='").append(this.source);
        sb.append("',absolute=").append(this.isAbsolute);
        sb.append(",patterns=[");
        int len = this.patterns.size();
        for (int i = 0; i < len; i++) {
            if (i > 0) {
                sb.append(",");
            }
            sb.append('\'').append(patterns.get(i)).append('\'');
        }
        sb.append("]}");
        return sb.toString();
    }
}
TOP

Related Classes of org.jnode.shell.PathnamePattern

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.