Package com.asakusafw.runtime.stage

Source Code of com.asakusafw.runtime.stage.AbstractStageClient

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.runtime.stage;

import static com.asakusafw.runtime.stage.StageConstants.*;

import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.ServiceLoader;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;

import com.asakusafw.runtime.compatibility.JobCompatibility;
import com.asakusafw.runtime.core.context.RuntimeContext;
import com.asakusafw.runtime.stage.input.StageInputDriver;
import com.asakusafw.runtime.stage.input.StageInputFormat;
import com.asakusafw.runtime.stage.input.StageInputMapper;
import com.asakusafw.runtime.stage.output.LegacyBridgeOutputCommitter;
import com.asakusafw.runtime.stage.output.StageOutputDriver;
import com.asakusafw.runtime.stage.output.StageOutputFormat;
import com.asakusafw.runtime.stage.resource.StageResourceDriver;
import com.asakusafw.runtime.util.VariableTable;
import com.asakusafw.runtime.util.VariableTable.RedefineStrategy;

/**
* ステージごとの処理を起動するクライアントの基底クラス。
* @since 0.1.0
* @version 0.6.0
*/
public abstract class AbstractStageClient extends BaseStageClient {

    /**
     * {@link #getStageOutputPath()}のメソッド名。
     */
    public static final String METHOD_STAGE_OUTPUT_PATH = "getStageOutputPath";

    /**
     * {@link #getStageInputs()}のメソッド名。
     */
    public static final String METHOD_STAGE_INPUTS = "getStageInputs";

    /**
     * {@link #getStageOutputs()}のメソッド名。
     */
    public static final String METHOD_STAGE_OUTPUTS = "getStageOutputs";

    /**
     * {@link #getStageResources()}のメソッド名。
     */
    public static final String METHOD_STAGE_RESOURCES = "getStageResources";

    /**
     * {@link #getShuffleKeyClassOrNull()}のメソッド名。
     */
    public static final String METHOD_SHUFFLE_KEY_CLASS = "getShuffleKeyClassOrNull";

    /**
     * {@link #getShuffleValueClassOrNull()}のメソッド名。
     */
    public static final String METHOD_SHUFFLE_VALUE_CLASS = "getShuffleValueClassOrNull";

    /**
     * {@link #getPartitionerClassOrNull()}のメソッド名。
     */
    public static final String METHOD_PARTITIONER_CLASS = "getPartitionerClassOrNull";

    /**
     * {@link #getCombinerClassOrNull()}のメソッド名。
     */
    public static final String METHOD_COMBINER_CLASS = "getCombinerClassOrNull";

    /**
     * {@link #getSortComparatorClassOrNull()}のメソッド名。
     */
    public static final String METHOD_SORT_COMPARATOR_CLASS = "getSortComparatorClassOrNull";

    /**
     * {@link #getGroupingComparatorClassOrNull()}のメソッド名。
     */
    public static final String METHOD_GROUPING_COMPARATOR_CLASS = "getGroupingComparatorClassOrNull";

    /**
     * {@link #getReducerClassOrNull()}のメソッド名。
     */
    public static final String METHOD_REDUCER_CLASS = "getReducerClassOrNull";

    static final Log LOG = LogFactory.getLog(AbstractStageClient.class);

    /**
     * Configures the {@link Job} object for this stage.
     * @param job the target job
     * @param variables current variable table
     * @throws IOException if failed to configure the job
     * @throws InterruptedException if interrupted while configuring {@link Job} object
     */
    protected void configureStage(Job job, VariableTable variables) throws IOException, InterruptedException {
        ClassLoader loader = job.getConfiguration().getClassLoader();
        for (StageConfigurator configurator : ServiceLoader.load(StageConfigurator.class, loader)) {
            configurator.configure(job);
        }
    }

    /**
     * このステージへの入力一覧を返す。
     * @return 入力一覧
     */
    protected abstract List<StageInput> getStageInputs();

    /**
     * このステージからの出力に利用するベースパスを返す。
     * @return ベースパス
     */
    protected abstract String getStageOutputPath();

    /**
     * このステージからの出力一覧を返す。
     * @return 出力一覧
     */
    protected List<StageOutput> getStageOutputs() {
        return Collections.emptyList();
    }

    /**
     * このステージで利用するキャッシュファイルの一覧を返す。
     * @return キャッシュファイル一覧
     */
    protected List<StageResource> getStageResources() {
        return Collections.emptyList();
    }

    /**
     * このステージのシャッフルフェーズで利用するキークラスを返す。
     * @return シャッフルフェーズで利用するキークラス、利用しない場合は{@code null}
     */
    protected Class<? extends Writable> getShuffleKeyClassOrNull() {
        return null;
    }

    /**
     * このステージのシャッフルフェーズで利用する値クラスを返す。
     * @return シャッフルフェーズで利用する値クラス、利用しない場合は{@code null}
     */
    protected Class<? extends Writable> getShuffleValueClassOrNull() {
        return null;
    }

    /**
     * このステージで利用するパーティショナークラスを返す。
     * @return パーティショナークラス、利用しない場合は{@code null}
     */
    @SuppressWarnings("rawtypes")
    protected Class<? extends Partitioner> getPartitionerClassOrNull() {
        return null;
    }

    /**
     * このステージで利用するコンバイナークラスを返す。
     * @return コンバイナークラス、利用しない場合は{@code null}
     */
    @SuppressWarnings("rawtypes")
    protected Class<? extends Reducer> getCombinerClassOrNull() {
        return null;
    }

    /**
     * キーの整列に利用する比較クラスを返す。
     * @return 比較クラス、利用しない場合は{@code null}
     */
    @SuppressWarnings("rawtypes")
    protected Class<? extends RawComparator> getSortComparatorClassOrNull() {
        return null;
    }

    /**
     * キーのグループ化に利用する比較クラスを返す。
     * @return 比較クラス、利用しない場合は{@code null}
     */
    @SuppressWarnings("rawtypes")
    protected Class<? extends RawComparator> getGroupingComparatorClassOrNull() {
        return null;
    }

    /**
     * このステージで利用するレデューサークラスを返す。
     * @return レデューサークラス、利用しない場合は{@code null}
     */
    @SuppressWarnings("rawtypes")
    protected Class<? extends Reducer> getReducerClassOrNull() {
        return null;
    }

    @Override
    protected int execute(String[] args) throws Exception {
        Configuration conf = getConf();
        conf.set(StageConstants.PROP_BATCH_ID, getBatchId());
        conf.set(StageConstants.PROP_FLOW_ID, getFlowId());
        Job job = createJob(conf);
        return submit(job);
    }

    /**
     * Creates a new job.
     * @param conf asakusa job configuration
     * @return the created job
     * @throws IOException if failed to create a new job
     * @throws InterruptedException if interrupted while creating {@link Job} object
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public Job createJob(Configuration conf) throws IOException, InterruptedException {
        if (conf == null) {
            throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
        }
        Job job = JobCompatibility.newJob(conf);
        VariableTable variables = getPathParser(job.getConfiguration());
        configureJobInfo(job, variables);
        configureStageInput(job, variables);
        configureStageOutput(job, variables);
        configureShuffle(job, variables);
        configureStageResource(job, variables);
        configureStage(job, variables);
        return job;
    }

    private int submit(Job job) throws IOException, InterruptedException, ClassNotFoundException {
        LOG.info(MessageFormat.format(
                "Submitting Job: {0}",
                job.getJobName()));
        long start = System.currentTimeMillis();
        boolean succeed;
        if (RuntimeContext.get().isSimulation()) {
            LOG.info(MessageFormat.format(
                    "Job is skipped because current execution status is in simulation mode: name={0}",
                    job.getJobName()));
            succeed = true;
        } else {
            job.submit();
            LOG.info(MessageFormat.format(
                    "Job Submitted: id={0}, name={1}",
                    job.getJobID(),
                    job.getJobName()));
            succeed = job.waitForCompletion(true);
        }
        long end = System.currentTimeMillis();
        LOG.info(MessageFormat.format(
                "Job Finished: elapsed=[{3}]ms, succeed={2}, id={0}, name={1}",
                job.getJobID(),
                job.getJobName(),
                succeed,
                String.valueOf(end - start)));

        return succeed ? ToolLauncher.JOB_SUCCEEDED : ToolLauncher.JOB_FAILED;
    }

    private void configureJobInfo(Job job, VariableTable variables) {
        Class<?> clientClass = getClass();
        String operationId = getOperationId();

        LOG.info(MessageFormat.format("Hadoop Job Client: {0}", clientClass.getName()));
        String jar = job.getConfiguration().get(PROP_APPLICATION_JAR);
        if (jar == null || (job.getConfiguration() instanceof JobConf) == false) {
            job.setJarByClass(clientClass);
        } else {
            ((JobConf) job.getConfiguration()).setJar(jar);
        }

        LOG.info(MessageFormat.format("Hadoop Job Name: {0}", operationId));
        job.setJobName(operationId);
    }

    private void configureStageInput(Job job, VariableTable variables) {
        List<StageInput> inputList = new ArrayList<StageInput>();
        for (StageInput input : getStageInputs()) {
            Class<? extends Mapper<?, ?, ?, ?>> mapperClass = input.getMapperClass();
            String pathString = input.getPathString();
            Class<? extends InputFormat<?, ?>> formatClass = input.getFormatClass();
            String expanded = variables.parse(pathString);
            Map<String, String> attributes = input.getAttributes();
            LOG.info(MessageFormat.format(
                    "Input: path={0}, format={1}, mapper={2}, attributes={3}",
                    expanded,
                    formatClass.getName(),
                    mapperClass.getName(),
                    attributes));
            inputList.add(new StageInput(expanded, formatClass, mapperClass, attributes));
        }
        StageInputDriver.set(job, inputList);
        job.setInputFormatClass(StageInputFormat.class);
        job.setMapperClass(StageInputMapper.class);
    }

    @SuppressWarnings("rawtypes")
    private void configureShuffle(Job job, VariableTable variables) {
        Class<? extends Reducer> reducer = getReducerClassOrNull();
        if (reducer != null) {
            LOG.info(MessageFormat.format("Reducer: {0}", reducer.getName()));
            job.setReducerClass(reducer);
        } else {
            LOG.info("Reducer: N/A");
            job.setNumReduceTasks(0);
            return;
        }

        Class<? extends Writable> outputKeyClass = or(getShuffleKeyClassOrNull(), NullWritable.class);
        Class<? extends Writable> outputValueClass = or(getShuffleValueClassOrNull(), NullWritable.class);
        LOG.info(MessageFormat.format(
                "Shuffle: key={0}, value={1}",
                outputKeyClass.getName(),
                outputValueClass.getName()));
        job.setMapOutputKeyClass(outputKeyClass);
        job.setMapOutputValueClass(outputValueClass);

        Class<? extends Reducer> combiner = getCombinerClassOrNull();
        if (combiner != null) {
            LOG.info(MessageFormat.format("Combiner: {0}", combiner.getName()));
            job.setCombinerClass(combiner);
        } else {
            LOG.info("Combiner: N/A");
        }

        Class<? extends Partitioner> partitioner = getPartitionerClassOrNull();
        if (partitioner != null) {
            LOG.info(MessageFormat.format("Partitioner: {0}", partitioner.getName()));
            job.setPartitionerClass(partitioner);
        } else {
            LOG.info("Partitioner: DEFAULT");
        }

        Class<? extends RawComparator> groupingComparator = getGroupingComparatorClassOrNull();
        if (groupingComparator != null) {
            LOG.info(MessageFormat.format("GroupingComparator: {0}", groupingComparator.getName()));
            job.setGroupingComparatorClass(groupingComparator);
        } else {
            LOG.info("GroupingComparator: DEFAULT");
        }

        Class<? extends RawComparator> sortComparator = getSortComparatorClassOrNull();
        if (sortComparator != null) {
            LOG.info(MessageFormat.format("SortComparator: {0}", sortComparator.getName()));
            job.setSortComparatorClass(sortComparator);
        } else {
            LOG.info("SortComparator: DEFAULT");
        }
    }

    private void configureStageResource(Job job, VariableTable variables) throws IOException {
        List<StageResource> resources = getStageResources();
        for (StageResource cache : resources) {
            String resolved = variables.parse(cache.getLocation());
            LOG.info(MessageFormat.format("Distributed Cache: {0} @ {1}", cache.getName(), resolved));
            if (RuntimeContext.get().isSimulation()) {
                LOG.info("Preparing distributed cache is skipped in simulation mode");
            } else {
                StageResourceDriver.add(job, resolved, cache.getName());
            }
        }
    }

    private void configureStageOutput(Job job, VariableTable variables) throws IOException {
        String outputPath = variables.parse(getStageOutputPath());
        List<StageOutput> outputList = new ArrayList<StageOutput>();
        for (StageOutput output : getStageOutputs()) {
            String name = output.getName();
            Class<?> keyClass = output.getKeyClass();
            Class<?> valueClass = output.getValueClass();
            Class<? extends OutputFormat<?, ?>> formatClass = output.getFormatClass();
            Map<String, String> attributes = output.getAttributes();
            LOG.info(MessageFormat.format(
                    "Output: path={0}/{1}-*, format={2}, key={3}, value={4}, attributes={5}",
                    outputPath,
                    name,
                    formatClass.getName(),
                    keyClass.getName(),
                    valueClass.getName(),
                    attributes));
            outputList.add(new StageOutput(name, keyClass, valueClass, formatClass, attributes));
        }
        StageOutputDriver.set(job, outputPath, outputList);

        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(NullWritable.class);
        job.setOutputFormatClass(StageOutputFormat.class);
        job.getConfiguration().setClass(
                "mapred.output.committer.class",
                LegacyBridgeOutputCommitter.class,
                org.apache.hadoop.mapred.OutputCommitter.class);
    }

    private <T> T or(T a, T b) {
        if (a != null) {
            return a;
        } else {
            return b;
        }
    }

    private VariableTable getPathParser(Configuration configuration) {
        assert configuration != null;
        VariableTable variables = new VariableTable(RedefineStrategy.IGNORE);
        variables.defineVariable(VAR_USER, getUser());
        variables.defineVariable(VAR_DEFINITION_ID, getDefinitionId());
        variables.defineVariable(VAR_STAGE_ID, getStageId());
        variables.defineVariable(VAR_BATCH_ID, getBatchId());
        variables.defineVariable(VAR_FLOW_ID, getFlowId());
        variables.defineVariable(VAR_EXECUTION_ID, getExecutionId());
        String arguments = configuration.get(PROP_ASAKUSA_BATCH_ARGS);
        if (arguments == null) {
            LOG.warn(MessageFormat.format(
                    "Missing configuration \"{0}\" (batch arguments)",
                    PROP_ASAKUSA_BATCH_ARGS));
        } else {
            variables.defineVariables(arguments);
        }

        // replace variables
        configuration.set(PROP_ASAKUSA_BATCH_ARGS, variables.toSerialString());
        return variables;
    }
}
TOP

Related Classes of com.asakusafw.runtime.stage.AbstractStageClient

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.