Package com.asakusafw.runtime.directio.hadoop

Source Code of com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.runtime.directio.hadoop;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.nio.charset.Charset;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.util.Progressable;

import com.asakusafw.runtime.compatibility.FileSystemCompatibility;
import com.asakusafw.runtime.directio.AbstractDirectDataSource;
import com.asakusafw.runtime.directio.Counter;
import com.asakusafw.runtime.directio.DirectDataSource;
import com.asakusafw.runtime.directio.DirectDataSourceProfile;
import com.asakusafw.runtime.directio.DirectDataSourceProvider;
import com.asakusafw.runtime.directio.DirectDataSourceRepository;
import com.asakusafw.runtime.directio.FilePattern;
import com.asakusafw.runtime.directio.FilePattern.PatternElement;
import com.asakusafw.runtime.directio.FilePattern.PatternElementKind;
import com.asakusafw.runtime.directio.FilePattern.Segment;
import com.asakusafw.runtime.directio.FilePattern.Selection;
import com.asakusafw.runtime.directio.OutputAttemptContext;
import com.asakusafw.runtime.directio.OutputTransactionContext;
import com.asakusafw.runtime.stage.StageConstants;

/**
* Utilities for Direct data access facilities on Hadoop.
* @since 0.2.5
*/
public final class HadoopDataSourceUtil {

    static final Log LOG = LogFactory.getLog(HadoopDataSourceUtil.class);

    /**
     * The key prefix of data sources.
     */
    public static final String PREFIX = "com.asakusafw.directio.";

    /**
     * The key name of path.
     */
    public static final String KEY_PATH = "path";

    private static final Pattern PREFIX_PATTERN = Pattern.compile('^' + Pattern.quote(PREFIX));

    /**
     * The key name of system directory for this format.
     */
    public static final String KEY_SYSTEM_DIR = "com.asakusafw.output.system.dir";

    /**
     * The attribute key name of local tempdir.
     */
    public static final String KEY_LOCAL_TEMPDIR = "com.asakusafw.output.local.tempdir";

    static final String DEFAULT_SYSTEM_DIR = "_directio";

    static final String TRANSACTION_INFO_DIR = "transactions";

    /**
     * Charset for commit mark file comments.
     */
    public static final Charset COMMENT_CHARSET = Charset.forName("UTF-8");

    /**
     * Loads a profile list from the configuration.
     * @param conf target configuration
     * @return the restored profile list
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static List<DirectDataSourceProfile> loadProfiles(Configuration conf) {
        if (conf == null) {
            throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
        }
        Map<String, String> pathToKey = new HashMap<String, String>();
        Map<String, String> map = getConfigMap(conf);
        Set<String> keys = getChildKeys(map, ".");
        try {
            List<DirectDataSourceProfile> results = new ArrayList<DirectDataSourceProfile>();
            for (String key : keys) {
                String className = map.get(key);
                Map<String, String> config = createPrefixMap(map, key + ".");
                String path = config.remove(KEY_PATH);
                if (path == null) {
                    throw new IllegalStateException(MessageFormat.format(
                            "Missing I/O configuration: {0}",
                            PREFIX + key + '.' + KEY_PATH));
                }
                path = normalizePath(path);
                if (pathToKey.containsKey(path)) {
                    throw new IllegalStateException(MessageFormat.format(
                            "Path mapping is duplicated: {0} ({1} <=> {2})",
                            path.isEmpty() ? "/" : path,
                            PREFIX + key + '.' + KEY_PATH,
                            PREFIX + pathToKey.get(key) + '.' + KEY_PATH));
                } else {
                    pathToKey.put(path, key);
                }
                Class<? extends AbstractDirectDataSource> aClass = conf.getClassByName(className)
                    .asSubclass(AbstractDirectDataSource.class);
                results.add(new DirectDataSourceProfile(key, aClass, path, config));
            }
            return results;
        } catch (ClassNotFoundException e) {
            throw new IllegalStateException(e);
        }
    }

    private static String normalizePath(String path) {
        assert path != null;
        StringBuilder buf = new StringBuilder();
        int offset = 0;
        for (int i = 0, n = path.length(); i < n; i++) {
            if (path.charAt(i) == '/') {
                offset = i + 1;
            } else {
                break;
            }
        }
        boolean sawSeparator = false;
        for (int i = offset, n = path.length(); i < n; i++) {
            char c = path.charAt(i);
            if (c == '/') {
                sawSeparator = true;
            } else {
                if (sawSeparator) {
                    buf.append('/');
                    sawSeparator = false;
                }
                buf.append(c);
            }
        }
        return buf.toString();
    }

    private static Map<String, String> getConfigMap(Configuration conf) {
        assert conf != null;
        Map<String, String> map = conf.getValByRegex(PREFIX_PATTERN.pattern());
        NavigableMap<String, String> prefixMap = createPrefixMap(map, PREFIX);
        return prefixMap;
    }

    private static NavigableMap<String, String> createPrefixMap(Map<?, ?> properties, String prefix) {
        assert properties != null;
        assert prefix != null;
        NavigableMap<String, String> results = new TreeMap<String, String>();
        for (Map.Entry<?, ?> entry : properties.entrySet()) {
            if ((entry.getKey() instanceof String) == false || (entry.getValue() instanceof String) == false) {
                continue;
            }
            String name = (String) entry.getKey();
            if (name.startsWith(prefix) == false) {
                continue;
            }
            results.put(name.substring(prefix.length()), (String) entry.getValue());
        }
        return results;
    }

    private static Set<String> getChildKeys(Map<String, String> properties, String delimitier) {
        assert properties != null;
        assert delimitier != null;
        Set<String> results = new TreeSet<String>();
        for (Map.Entry<String, String> entry : properties.entrySet()) {
            String name = entry.getKey();
            int index = name.indexOf(delimitier);
            if (index < 0) {
                results.add(name);
            } else {
                results.add(name.substring(0, index));
            }
        }
        return results;
    }

    /**
     * Loads {@link DirectDataSourceRepository} from {@link Configuration}.
     * @param conf configuration object
     * @return the created repository
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static DirectDataSourceRepository loadRepository(Configuration conf) {
        if (conf == null) {
            throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
        }
        List<DirectDataSourceProfile> profiles = loadProfiles(conf);
        return createRepository(conf, profiles);
    }

    static DirectDataSourceRepository createRepository(
            Configuration conf,
            List<DirectDataSourceProfile> profiles) {
        assert conf != null;
        assert profiles != null;
        List<DirectDataSourceProvider> providers = new ArrayList<DirectDataSourceProvider>();
        for (DirectDataSourceProfile profile : profiles) {
            providers.add(createProvider(conf, profile));
        }
        return new DirectDataSourceRepository(providers);
    }

    private static DirectDataSourceProvider createProvider(Configuration conf, DirectDataSourceProfile profile) {
        assert conf != null;
        assert profile != null;
        return new HadoopDataSourceProvider(conf, profile);
    }

    /**
     * Returns whether the local attempt output directory is defined.
     * @param localFileSystem current local file system
     * @return {@code true} to defined, otherwise {@code false}
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static boolean isLocalAttemptOutputDefined(LocalFileSystem localFileSystem) {
        try {
            return getLocalTemporaryDirectory(localFileSystem) != null;
        } catch (IOException e) {
            return false;
        }
    }

    /**
     * Returns the local temporary directory.
     * @param localFileSystem the local file system
     * @return the output path (must be on local fs), or {@code null} if not defined
     * @throws IOException if failed to compute the path
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static Path getLocalTemporaryDirectory(LocalFileSystem localFileSystem) throws IOException {
        if (localFileSystem == null) {
            throw new IllegalArgumentException("localFileSystem must not be null"); //$NON-NLS-1$
        }
        Configuration conf = localFileSystem.getConf();
        if (conf == null) {
            return null;
        }
        String path = conf.get(KEY_LOCAL_TEMPDIR);
        if (path == null) {
            return null;
        }
        LocalFileSystem fs = FileSystem.getLocal(conf);
        Path result = fs.makeQualified(new Path(path));
        return result;
    }

    /**
     * Creates output context from Hadoop context.
     * @param context current context in Hadoop
     * @param datasourceId datasource ID
     * @return the created context
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static OutputTransactionContext createContext(JobContext context, String datasourceId) {
        if (context == null) {
            throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
        }
        if (datasourceId == null) {
            throw new IllegalArgumentException("datasourceId must not be null"); //$NON-NLS-1$
        }
        String transactionId = getTransactionId(context, datasourceId);
        return new OutputTransactionContext(transactionId, datasourceId, createCounter(context));
    }

    /**
     * Creates output context from execution ID and datasource ID.
     * @param executionId current execution ID
     * @param datasourceId target datasource ID
     * @return output context
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static OutputTransactionContext createContext(String executionId, String datasourceId) {
        if (executionId == null) {
            throw new IllegalArgumentException("executionId must not be null"); //$NON-NLS-1$
        }
        if (datasourceId == null) {
            throw new IllegalArgumentException("datasourceId must not be null"); //$NON-NLS-1$
        }
        String transactionId = getTransactionId(executionId);
        return new OutputTransactionContext(transactionId, datasourceId, new Counter());
    }

    /**
     * Creates output context from Hadoop context.
     * @param context current context in Hadoop
     * @param datasourceId datasource ID
     * @return the created context
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static OutputAttemptContext createContext(TaskAttemptContext context, String datasourceId) {
        if (context == null) {
            throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
        }
        if (datasourceId == null) {
            throw new IllegalArgumentException("datasourceId must not be null"); //$NON-NLS-1$
        }
        String transactionId = getTransactionId(context, datasourceId);
        String attemptId = getAttemptId(context, datasourceId);
        return new OutputAttemptContext(transactionId, attemptId, datasourceId, createCounter(context));
    }

    private static String getTransactionId(JobContext jobContext, String datasourceId) {
        assert jobContext != null;
        assert datasourceId != null;
        String executionId = jobContext.getConfiguration().get(StageConstants.PROP_EXECUTION_ID);
        if (executionId == null) {
            executionId = jobContext.getJobID().toString();
        }
        return getTransactionId(executionId);
    }

    private static String getTransactionId(String executionId) {
        return executionId;
    }

    private static String getAttemptId(TaskAttemptContext taskContext, String datasourceId) {
        assert taskContext != null;
        assert datasourceId != null;
        return taskContext.getTaskAttemptID().toString();
    }

    private static Counter createCounter(JobContext context) {
        assert context != null;
        if (context instanceof Progressable) {
            return new ProgressableCounter((Progressable) context);
        } else if (context instanceof org.apache.hadoop.mapred.JobContext) {
            return new ProgressableCounter(((org.apache.hadoop.mapred.JobContext) context).getProgressible());
        } else {
            return new Counter();
        }
    }

    /**
     * Extracts an execution ID from the transaction info.
     * @param transactionInfoPath target path
     * @return execution ID, or {@code null} if is not a valid transaction info
     * @throws IllegalArgumentException if some parameters were {@code null}
     * @see #getCommitMarkPath(Configuration, String)
     */
    public static String getTransactionInfoExecutionId(Path transactionInfoPath) {
        if (transactionInfoPath == null) {
            throw new IllegalArgumentException("transactionInfoPath must not be null"); //$NON-NLS-1$
        }
        return getMarkPath(transactionInfoPath, Pattern.compile("tx-(.+)"));
    }

    private static String getMarkPath(Path path, Pattern pattern) {
        assert path != null;
        assert pattern != null;
        String name = path.getName();
        Matcher matcher = pattern.matcher(name);
        if (matcher.matches() == false) {
            return null;
        }
        return matcher.group(1);
    }

    /**
     * Returns the transaction info path.
     * @param conf the current configuration
     * @param executionId target transaction ID
     * @return target path
     * @throws IOException if failed to compute the path by I/O exception
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static Path getTransactionInfoPath(Configuration conf, String executionId) throws IOException {
        if (conf == null) {
            throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
        }
        if (executionId == null) {
            throw new IllegalArgumentException("transactionId must not be null"); //$NON-NLS-1$
        }
        return new Path(
                getTransactionInfoDir(conf),
                String.format("tx-%s", executionId));
    }

    /**
     * Returns the commit mark path.
     * @param conf the current configuration
     * @param executionId target transaction ID
     * @return target path
     * @throws IOException if failed to compute the path by I/O exception
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static Path getCommitMarkPath(Configuration conf, String executionId) throws IOException {
        if (conf == null) {
            throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
        }
        if (executionId == null) {
            throw new IllegalArgumentException("transactionId must not be null"); //$NON-NLS-1$
        }
        return new Path(
                getTransactionInfoDir(conf),
                String.format("commit-%s", executionId));
    }

    /**
     * Returns the all transaction info files.
     * @param conf the current configuration
     * @return target path
     * @throws IOException if failed to find files by I/O error
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static Collection<FileStatus> findAllTransactionInfoFiles(Configuration conf) throws IOException {
        if (conf == null) {
            throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
        }
        Path dir = getTransactionInfoDir(conf);
        FileSystem fs = dir.getFileSystem(conf);
        FileStatus[] statusArray;
        try {
            statusArray = fs.listStatus(dir);
        } catch (FileNotFoundException e) {
            statusArray = null;
            if (LOG.isDebugEnabled()) {
                LOG.debug(MessageFormat.format("Target file is not found: {0}", dir), e);
            }
        }
        if (statusArray == null || statusArray.length == 0) {
            return Collections.emptyList();
        }
        Collection<FileStatus> results = new ArrayList<FileStatus>();
        for (FileStatus stat : statusArray) {
            if (getTransactionInfoExecutionId(stat.getPath()) != null) {
                results.add(stat);
            }
        }
        return results;
    }

    private static Path getTransactionInfoDir(Configuration conf) throws IOException {
        if (conf == null) {
            throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
        }
        String working = conf.get(KEY_SYSTEM_DIR, DEFAULT_SYSTEM_DIR);
        Path path = new Path(working, TRANSACTION_INFO_DIR);
        return path.getFileSystem(conf).makeQualified(path);
    }

    /**
     * Searches file/directories by pattern.
     * @param fs target file system
     * @param base base path
     * @param pattern search pattern
     * @return found files, or an empty list if not found
     * @throws IOException if failed to search by I/O error
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static List<FileStatus> search(FileSystem fs, Path base, FilePattern pattern) throws IOException {
        if (fs == null) {
            throw new IllegalArgumentException("fs must not be null"); //$NON-NLS-1$
        }
        if (base == null) {
            throw new IllegalArgumentException("base must not be null"); //$NON-NLS-1$
        }
        if (pattern == null) {
            throw new IllegalArgumentException("pattern must not be null"); //$NON-NLS-1$
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format(
                    "Start searching for files (path={0}, resourcePattern={1})",
                    base,
                    pattern));
        }
        List<FileStatus> current = new ArrayList<FileStatus>(1);
        try {
            FileStatus stat = fs.getFileStatus(base);
            current.add(stat);
        } catch (FileNotFoundException e) {
            return Collections.emptyList();
        }
        int steps = 0;
        LinkedList<Segment> segments = new LinkedList<Segment>(pattern.getSegments());
        while (segments.isEmpty() == false) {
            if (segments.getFirst().isTraverse()) {
                segments.removeFirst();
                current = recursiveStep(fs, current);
            } else {
                List<Path> step = consumeStep(segments);
                current = globStep(fs, current, step);
            }
            steps++;
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format(
                    "Finish searching for files (path={0}, resourcePattern={1}, results={2}, steps={3})",
                    base,
                    pattern,
                    current.size(),
                    steps));
        }
        return current;
    }

    private static List<Path> consumeStep(LinkedList<Segment> segments) {
        assert segments != null;
        assert segments.isEmpty() == false;
        assert segments.getFirst().isTraverse() == false;
        List<Path> results = new ArrayList<Path>();

        Segment current = segments.removeFirst();
        for (String segment : resolve(current)) {
            results.add(new Path(segment));
        }
        while (isGlobRequired(current) && segments.isEmpty() == false && segments.getFirst().isTraverse() == false) {
            current = segments.removeFirst();
            Set<String> suffixCandidates = resolve(current);
            if (suffixCandidates.size() == 1) {
                String suffix = suffixCandidates.iterator().next();
                for (ListIterator<Path> i = results.listIterator(); i.hasNext();) {
                    Path parent = i.next();
                    i.set(new Path(parent, suffix));
                }
            } else {
                List<Path> nextResults = new ArrayList<Path>();
                for (Path parent : results) {
                    for (String suffix : suffixCandidates) {
                        nextResults.add(new Path(parent, suffix));
                    }
                }
                results = nextResults;
            }
        }

        Set<Path> saw = new HashSet<Path>();
        for (Iterator<Path> iter = results.iterator(); iter.hasNext();) {
            Path path = iter.next();
            if (saw.contains(path)) {
                iter.remove();
            } else {
                saw.add(path);
            }
        }
        return results;
    }

    private static boolean isGlobRequired(Segment segment) {
        assert segment != null;
        assert segment.isTraverse() == false;
        for (PatternElement element : segment.getElements()) {
            if (element.getKind() == PatternElementKind.WILDCARD) {
                return false;
            }
        }
        return true;
    }

    private static Set<String> resolve(Segment segment) {
        assert segment != null;
        assert segment.isTraverse() == false;
        List<Set<String>> candidates = new ArrayList<Set<String>>();
        for (PatternElement element : segment.getElements()) {
            switch (element.getKind()) {
            case TOKEN:
                candidates.add(Collections.singleton(element.getToken()));
                break;
            case WILDCARD:
                candidates.add(Collections.singleton("*"));
                break;
            case SELECTION:
                candidates.add(new TreeSet<String>(((Selection) element).getContents()));
                break;
            default:
                throw new AssertionError();
            }
        }
        List<String> results = stringCrossJoin(candidates);
        return new TreeSet<String>(results);
    }

    private static List<String> stringCrossJoin(List<Set<String>> candidates) {
        assert candidates != null;
        assert candidates.isEmpty() == false;
        List<String> results = new ArrayList<String>();
        Iterator<Set<String>> iter = candidates.iterator();
        assert iter.hasNext();
        results.addAll(iter.next());
        while (iter.hasNext()) {
            Set<String> next = iter.next();
            if (next.size() == 1) {
                String suffix = next.iterator().next();
                for (ListIterator<String> i = results.listIterator(); i.hasNext();) {
                    String vaule = i.next();
                    i.set(vaule + suffix);
                }
            } else {
                List<String> nextResults = new ArrayList<String>();
                for (String value : results) {
                    for (String suffix : next) {
                        nextResults.add(value + suffix);
                    }
                }
                results = nextResults;
            }
        }
        return results;
    }

    private static List<FileStatus> recursiveStep(FileSystem fs, List<FileStatus> current) throws IOException {
        assert fs != null;
        assert current != null;
        Set<Path> paths = new HashSet<Path>();
        List<FileStatus> results = new ArrayList<FileStatus>();
        LinkedList<FileStatus> work = new LinkedList<FileStatus>(current);
        while (work.isEmpty() == false) {
            FileStatus next = work.removeFirst();
            Path path = next.getPath();
            if (paths.contains(path) == false) {
                paths.add(path);
                results.add(next);
                if (FileSystemCompatibility.isDirectory(next)) {
                    FileStatus[] children;
                    try {
                        children = fs.listStatus(path);
                    } catch (FileNotFoundException e) {
                        children = null;
                        if (LOG.isDebugEnabled()) {
                            LOG.debug(MessageFormat.format("Target file is not found: {0}", path), e);
                        }
                    }
                    if (children != null) {
                        Collections.addAll(work, children);
                    }
                }
            }
        }
        return results;
    }

    private static List<FileStatus> globStep(
            FileSystem fs,
            List<FileStatus> current,
            List<Path> expressions) throws IOException {
        assert fs != null;
        assert current != null;
        assert expressions != null;
        Set<Path> paths = new HashSet<Path>();
        List<FileStatus> results = new ArrayList<FileStatus>();
        for (FileStatus status : current) {
            if (FileSystemCompatibility.isDirectory(status) == false) {
                continue;
            }
            for (Path expression : expressions) {
                Path path = new Path(status.getPath(), expression);
                FileStatus[] expanded = fs.globStatus(path);
                if (expanded != null) {
                    for (FileStatus s : expanded) {
                        Path p = s.getPath();
                        if (paths.contains(p) == false) {
                            paths.add(p);
                            results.add(s);
                        }
                    }
                }
            }
        }
        return results;
    }

    /**
     * Returns only minimal covered files.
     * If the parameter contains both directory and its children, this result includes only the directory.
     * @param statList target files
     * @return minimal covered
     */
    public static List<FileStatus> onlyMinimalCovered(List<FileStatus> statList) {
        assert statList != null;
        FileStatus[] stats = statList.toArray(new FileStatus[statList.size()]);
        for (int i = 0; i < stats.length; i++) {
            if (stats[i] == null || FileSystemCompatibility.isDirectory(stats[i]) == false) {
                continue;
            }
            for (int j = 0; j < stats.length; j++) {
                if (i == j || stats[j] == null) {
                    continue;
                }
                if (contains(stats[i], stats[j])) {
                    stats[j] = null;
                }
            }
        }
        List<FileStatus> results = new ArrayList<FileStatus>();
        for (int i = 0; i < stats.length; i++) {
            FileStatus stat = stats[i];
            if (stat != null) {
                results.add(stat);
            }
        }
        return results;
    }

    private static boolean contains(FileStatus dir, FileStatus target) {
        assert dir != null;
        assert target != null;
        assert FileSystemCompatibility.isDirectory(dir);
        Path parent = dir.getPath();
        Path child = target.getPath();
        return contains(parent, child);
    }

    /**
     * Returns whether the parent path contains the child path, or not.
     * If the parent and child is same, this returns {@code false}.
     * @param parent the parent path
     * @param child the child path
     * @return {@code true} if parent path strictly contains the child, otherwise {@code false}
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static boolean contains(Path parent, Path child) {
        if (parent == null) {
            throw new IllegalArgumentException("parent must not be null"); //$NON-NLS-1$
        }
        if (child == null) {
            throw new IllegalArgumentException("child must not be null"); //$NON-NLS-1$
        }
        if (parent.depth() >= child.depth()) {
            return false;
        }
        URI parentUri = parent.toUri();
        URI childUri = child.toUri();
        URI relative = parentUri.relativize(childUri);
        if (relative.equals(childUri) == false) {
            return true;
        }
        return false;
    }

    /**
     * Moves all files in source directory into target directory.
     * @param counter counter which accepts operations count
     * @param fs file system
     * @param from path to source directory
     * @param to path to target directory
     * @throws IOException if failed to move files
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static void move(
            Counter counter,
            FileSystem fs,
            Path from,
            Path to) throws IOException {
        move(counter, fs, from, fs, to, false);
    }

    /**
     * Moves all files in source directory into target directory.
     * @param counter counter which accepts operations count
     * @param localFs the local file system
     * @param fs the target file system
     * @param from path to source directory (must be on local file system)
     * @param to path to target directory
     * @throws IOException if failed to move files
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static void moveFromLocal(
            Counter counter,
            LocalFileSystem localFs,
            FileSystem fs,
            Path from,
            Path to) throws IOException {
        move(counter, localFs, from, fs, to, true);
    }

    private static void move(
            Counter counter,
            FileSystem fromFs, Path from,
            FileSystem toFs, Path to,
            boolean fromLocal) throws IOException {
        if (counter == null) {
            throw new IllegalArgumentException("counter must not be null"); //$NON-NLS-1$
        }
        if (fromFs == null) {
            throw new IllegalArgumentException("fromFs must not be null"); //$NON-NLS-1$
        }
        if (from == null) {
            throw new IllegalArgumentException("from must not be null"); //$NON-NLS-1$
        }
        if (toFs == null) {
            throw new IllegalArgumentException("toFs must not be null"); //$NON-NLS-1$
        }
        if (to == null) {
            throw new IllegalArgumentException("to must not be null"); //$NON-NLS-1$
        }
        if (fromLocal && isLocalPath(from) == false) {
            throw new IllegalArgumentException("from must be on local file system"); //$NON-NLS-1$
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format(
                    "Start moving files (from={0}, to={1})",
                    from,
                    to));
        }
        Path source = fromFs.makeQualified(from);
        Path target = toFs.makeQualified(to);
        List<Path> list = createFileListRelative(counter, fromFs, source);
        if (list.isEmpty()) {
            return;
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format(
                    "Process moving files (from={0}, to={1}, count={2})",
                    from,
                    to,
                    list.size()));
        }
        Set<Path> directoryCreated = new HashSet<Path>();
        for (Path path : list) {
            Path sourceFile = new Path(source, path);
            Path targetFile = new Path(target, path);
            if (LOG.isTraceEnabled()) {
                FileStatus stat = fromFs.getFileStatus(sourceFile);
                LOG.trace(MessageFormat.format(
                        "Moving file (from={0}, to={1}, size={2})",
                        sourceFile,
                        targetFile,
                        stat.getLen()));
            }
            try {
                FileStatus stat = toFs.getFileStatus(targetFile);
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format(
                            "Deleting file: {0}",
                            targetFile));
                }
                if (FileSystemCompatibility.isDirectory(stat)) {
                    toFs.delete(targetFile, true);
                } else {
                    toFs.delete(targetFile, false);
                }
            } catch (FileNotFoundException e) {
                Path targetParent = targetFile.getParent();
                if (directoryCreated.contains(targetParent) == false) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug(MessageFormat.format(
                                "Creating directory: {0}",
                                targetParent));
                    }
                    toFs.mkdirs(targetParent);
                    directoryCreated.add(targetParent);
                }
            }
            counter.add(1);
            if (fromLocal) {
                toFs.moveFromLocalFile(sourceFile, targetFile);
            } else {
                boolean succeed = toFs.rename(sourceFile, targetFile);
                if (succeed == false) {
                    throw new IOException(MessageFormat.format(
                            "Failed to move file (from={0}, to={1})",
                            sourceFile,
                            targetFile));
                }
            }
            counter.add(1);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format(
                    "Finish moving files (from={0}, to={1}, count={2})",
                    from,
                    to,
                    list.size()));
        }
    }

    private static boolean isLocalPath(Path path) {
        assert path != null;
        String scheme = path.toUri().getScheme();
        return scheme != null && scheme.equals("file");
    }

    @SuppressWarnings("unchecked")
    private static List<Path> createFileListRelative(Counter counter, FileSystem fs, Path source) throws IOException {
        assert counter != null;
        assert fs != null;
        assert source != null;
        assert source.isAbsolute();
        URI baseUri = source.toUri();
        FileStatus root;
        try {
            root = fs.getFileStatus(source);
        } catch (FileNotFoundException e) {
            LOG.warn(MessageFormat.format(
                    "Source path is not found: {0} (May be already moved)",
                    baseUri));
            return Collections.emptyList();
        }
        counter.add(1);
        List<FileStatus> all = recursiveStep(fs, Collections.singletonList(root));
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format(
                    "Source path contains {1} files/directories: {0}",
                    baseUri,
                    all.size()));
        }
        List<Path> results = new ArrayList<Path>();
        for (FileStatus stat : all) {
            if (FileSystemCompatibility.isDirectory(stat)) {
                continue;
            }
            Path path = stat.getPath();
            URI uri = path.toUri();
            URI relative = baseUri.relativize(uri);
            if (relative.equals(uri) == false) {
                results.add(new Path(relative));
            } else {
                throw new IOException(MessageFormat.format(
                        "Failed to compute relative path: base={0}, target={1}",
                        baseUri,
                        uri));
            }
            counter.add(1);
        }
        Collections.sort(results);
        return results;
    }

    private HadoopDataSourceUtil() {
        return;
    }

    private static class HadoopDataSourceProvider implements DirectDataSourceProvider {

        private final Configuration configuration;

        private final DirectDataSourceProfile profile;

        public HadoopDataSourceProvider(Configuration configuration, DirectDataSourceProfile profile) {
            assert configuration != null;
            assert profile != null;
            this.configuration = configuration;
            this.profile = profile;
        }

        @Override
        public String getId() {
            return profile.getId();
        }

        @Override
        public String getPath() {
            return profile.getPath();
        }

        @Override
        public DirectDataSource newInstance() throws IOException, InterruptedException {
            try {
                AbstractDirectDataSource instance = profile.getTargetClass().getConstructor().newInstance();
                if (instance instanceof Configurable) {
                    ((Configurable) instance).setConf(configuration);
                }
                instance.configure(profile);
                return instance;
            } catch (Exception e) {
                throw new IOException(MessageFormat.format(
                        "Failed to create data source instance: {0} ({1})",
                        PREFIX + profile.getId(),
                        profile.getTargetClass().getName()), e);
            }
        }
    }
}
TOP

Related Classes of com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.