Package com.asakusafw.windgate.hadoopfs

Source Code of com.asakusafw.windgate.hadoopfs.HadoopFsMirror

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.windgate.hadoopfs;

import static com.asakusafw.windgate.core.vocabulary.FileProcess.*;

import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.asakusafw.runtime.core.context.RuntimeContext;
import com.asakusafw.runtime.core.context.SimulationSupport;
import com.asakusafw.runtime.io.ModelOutput;
import com.asakusafw.runtime.io.util.VoidModelOutput;
import com.asakusafw.runtime.stage.temporary.TemporaryStorage;
import com.asakusafw.windgate.core.DriverScript;
import com.asakusafw.windgate.core.GateScript;
import com.asakusafw.windgate.core.ParameterList;
import com.asakusafw.windgate.core.ProcessScript;
import com.asakusafw.windgate.core.WindGateLogger;
import com.asakusafw.windgate.core.resource.DrainDriver;
import com.asakusafw.windgate.core.resource.ResourceMirror;
import com.asakusafw.windgate.core.resource.SourceDriver;
import com.asakusafw.windgate.core.vocabulary.FileProcess;
import com.asakusafw.windgate.hadoopfs.temporary.FileSystemModelInputProvider;
import com.asakusafw.windgate.hadoopfs.temporary.ModelInputProvider;
import com.asakusafw.windgate.hadoopfs.temporary.ModelInputSourceDriver;
import com.asakusafw.windgate.hadoopfs.temporary.ModelOutputDrainDriver;

/**
* An abstract implementation of {@link ResourceMirror} directly using Hadoop File System.
* @since 0.2.2
* @version 0.4.0
* @see FileProcess
*/
@SimulationSupport
public class HadoopFsMirror extends ResourceMirror {

    static final WindGateLogger WGLOG = new HadoopFsLogger(HadoopFsMirror.class);

    static final Logger LOG = LoggerFactory.getLogger(HadoopFsMirror.class);

    private final Configuration configuration;

    private final HadoopFsProfile profile;

    private final ParameterList arguments;

    /**
     * Creates a new instance.
     * @param configuration the hadoop configuration
     * @param profile the profile
     * @param arguments the arguments
     * @throws IllegalArgumentException if any parameter is {@code null}
     */
    public HadoopFsMirror(
            Configuration configuration,
            HadoopFsProfile profile,
            ParameterList arguments) {
        if (configuration == null) {
            throw new IllegalArgumentException("configuration must not be null"); //$NON-NLS-1$
        }
        if (profile == null) {
            throw new IllegalArgumentException("profile must not be null"); //$NON-NLS-1$
        }
        if (arguments == null) {
            throw new IllegalArgumentException("arguments must not be null"); //$NON-NLS-1$
        }
        this.configuration = configuration;
        this.profile = profile;
        this.arguments = arguments;
    }

    @Override
    public String getName() {
        return profile.getResourceName();
    }

    @Override
    public void prepare(GateScript script) throws IOException {
        if (script == null) {
            throw new IllegalArgumentException("script must not be null"); //$NON-NLS-1$
        }
        LOG.debug("Preparing Direct Hadoop FS resource: {}",
                getName());
        for (ProcessScript<?> process : script.getProcesses()) {
            if (process.getSourceScript().getResourceName().equals(getName())) {
                getPath(process, DriverScript.Kind.SOURCE);
            }
            if (process.getDrainScript().getResourceName().equals(getName())) {
                getPath(process, DriverScript.Kind.DRAIN);
            }
        }
    }

    @Override
    public <T> SourceDriver<T> createSource(ProcessScript<T> script) throws IOException {
        if (script == null) {
            throw new IllegalArgumentException("script must not be null"); //$NON-NLS-1$
        }
        LOG.debug("Creating source driver for resource \"{}\" in process \"{}\"",
                getName(),
                script.getName());
        List<Path> pathList = getPath(script, DriverScript.Kind.SOURCE);

        T value = newDataModel(script);
        ModelInputProvider<T> provider = null;
        boolean succeeded = false;
        try {
            FileSystem fs = FileSystem.get(profile.getBasePath().toUri(), configuration);
            provider = new FileSystemModelInputProvider<T>(configuration, fs, pathList, script.getDataClass());
            SourceDriver<T> result = new ModelInputSourceDriver<T>(provider, value);
            succeeded = true;
            return result;
        } finally {
            if (succeeded == false) {
                if (provider != null) {
                    try {
                        provider.close();
                    } catch (IOException e) {
                        WGLOG.warn(e, "W03001",
                                profile.getResourceName(),
                                script.getName(),
                                pathList);
                    }
                }
            }
        }
    }

    @Override
    public <T> DrainDriver<T> createDrain(ProcessScript<T> script) throws IOException {
        if (script == null) {
            throw new IllegalArgumentException("script must not be null"); //$NON-NLS-1$
        }
        LOG.debug("Creating drain driver for resource \"{}\" in process \"{}\"",
                getName(),
                script.getName());
        List<Path> pathList = getPath(script, DriverScript.Kind.DRAIN);
        assert pathList.size() == 1;
        Path path = pathList.get(0);

        ModelOutput<T> output = null;
        boolean succeeded = false;
        try {
            if (RuntimeContext.get().isSimulation()) {
                output = new VoidModelOutput<T>();
            } else {
                output = TemporaryStorage.openOutput(configuration, script.getDataClass(), path);
            }
            DrainDriver<T> result = new ModelOutputDrainDriver<T>(output);
            succeeded = true;
            return result;
        } finally {
            if (succeeded == false) {
                if (output != null) {
                    try {
                        output.close();
                    } catch (IOException e) {
                        WGLOG.warn(e, "W04001",
                                profile.getResourceName(),
                                script.getName(),
                                pathList);
                    }
                }
            }
        }
    }

    @Override
    public void close() throws IOException {
        return;
    }

    private List<Path> getPath(ProcessScript<?> proc, DriverScript.Kind kind) throws IOException {
        assert proc != null;
        assert kind != null;
        DriverScript script = proc.getDriverScript(kind);
        String pathString = script.getConfiguration().get(FILE.key());
        if (pathString == null) {
            WGLOG.error("E01001",
                    getName(),
                    proc.getName(),
                    kind.prefix,
                    FILE.key(),
                    null);
            throw new IOException(MessageFormat.format(
                    "Process \"{1}\" must declare \"{3}\": (resource={0}, kind={2})",
                    getName(),
                    proc.getName(),
                    kind.toString(),
                    FILE.key()));
        }
        List<Path> results = resolvePaths(proc, kind, pathString);
        if (kind == DriverScript.Kind.SOURCE && results.size() <= 0 && RuntimeContext.get().isSimulation() == false) {
            WGLOG.error("E01001",
                    getName(),
                    proc.getName(),
                    kind.prefix,
                    FILE.key(),
                    pathString);
            throw new IOException(MessageFormat.format(
                    "source path must be greater than 0: {2} (resource={0}, process={1})",
                    getName(),
                    proc.getName(),
                    results));
        }
        if (kind == DriverScript.Kind.DRAIN && results.size() != 1) {
            WGLOG.error("E01001",
                    getName(),
                    proc.getName(),
                    kind.prefix,
                    FILE.key(),
                    pathString);
            throw new IOException(MessageFormat.format(
                    "drain path must be one: {2} (resource={0}, process={1})",
                    getName(),
                    proc.getName(),
                    results));
        }
        return results;
    }

    private List<Path> resolvePaths(
            ProcessScript<?> proc,
            DriverScript.Kind kind,
            String pathString) throws IOException {
        assert proc != null;
        assert kind != null;
        assert pathString != null;
        Path basePath = profile.getBasePath();
        String[] paths = pathString.split("[ \t\r\n]+");
        List<Path> results = new ArrayList<Path>();
        for (String path : paths) {
            if (path.isEmpty()) {
                continue;
            }
            String resolved;
            try {
                resolved = arguments.replace(path, true);
            } catch (IllegalArgumentException e) {
                WGLOG.error(e, "E01001",
                        getName(),
                        proc.getName(),
                        kind.prefix,
                        FILE.key(),
                        pathString);
                throw new IOException(MessageFormat.format(
                        "Failed to resolve the {2} path: {3} (resource={0}, process={1})",
                        getName(),
                        proc.getName(),
                        kind.toString(),
                        path));
            }
            Path relative = new Path(resolved);
            if (relative.isAbsolute()) {
                WGLOG.warn("W01001",
                        getName(),
                        proc.getName(),
                        kind.prefix,
                        FILE.key(),
                        pathString);
            }
            results.add(new Path(basePath, relative));
        }
        return results;
    }

    private <T> T newDataModel(ProcessScript<T> script) throws IOException {
        assert script != null;
        Class<T> dataClass = script.getDataClass();
        LOG.debug("Creating data model object: {} (resource={}, process={})", new Object[] {
                dataClass.getName(),
                getName(),
                script.getName(),
        });
        try {
            return ReflectionUtils.newInstance(dataClass, configuration);
        } catch (Exception e) {
            WGLOG.error("E01002",
                    getName(),
                    script.getName(),
                    FILE.key(),
                    dataClass.getName());
            throw new IOException(MessageFormat.format(
                    "Failed to create a new instance: {2} (resource={0}, process={1})",
                    getName(),
                    script.getName(),
                    dataClass.getName()), e);
        }
    }
}
TOP

Related Classes of com.asakusafw.windgate.hadoopfs.HadoopFsMirror

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.