Package com.asakusafw.thundergate.runtime.cache.mapreduce

Source Code of com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.thundergate.runtime.cache.mapreduce;

import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;

import com.asakusafw.runtime.compatibility.JobCompatibility;
import com.asakusafw.runtime.stage.StageInput;
import com.asakusafw.runtime.stage.input.StageInputDriver;
import com.asakusafw.runtime.stage.input.StageInputFormat;
import com.asakusafw.runtime.stage.input.StageInputMapper;
import com.asakusafw.runtime.stage.input.TemporaryInputFormat;
import com.asakusafw.runtime.stage.output.LegacyBridgeOutputCommitter;
import com.asakusafw.runtime.stage.output.TemporaryOutputFormat;
import com.asakusafw.thundergate.runtime.cache.CacheStorage;

/**
* MapReduce job client for applying cache patch.
* This requires following command line arguments:
* <ol>
* <li> subcommand:
*   <ul>
*   <li> {@code "create"} - create a new cache head from {@code <directory>/PATCH} </li>
*   <li> {@code "update"} - update cache head with merging {@code <directory>/HEAD} and {@code <directory>/PATCH} </li>
*   <li> </li>
*   </ul>
* </li>
* <li> path to the cache directory </li>
* <li> fully qualified data model class name </li>
* </ol>
* @since 0.2.3
*/
public class CacheBuildClient extends Configured implements Tool {

    /**
     * Subcommand to create a new cache.
     */
    public static final String SUBCOMMAND_CREATE = "create";

    /**
     * Subcommand to update a cache.
     */
    public static final String SUBCOMMAND_UPDATE = "update";

    private static final String NEXT_DIRECTORY_NAME = "NEXT";

    private static final String ESCAPE_DIRECTORY_NAME = "PREVIOUS";

    static final Log LOG = LogFactory.getLog(CacheBuildClient.class);

    private CacheStorage storage;

    private Class<?> modelClass;

    @Override
    public int run(String[] args) throws Exception {
        if (args.length != 3) {
            throw new IllegalArgumentException(MessageFormat.format(
                    "Invalid arguments: {0}",
                    Arrays.toString(args)));
        }
        String subcommand = args[0];
        boolean create;
        if (subcommand.equals(SUBCOMMAND_CREATE)) {
            create = true;
        } else if (subcommand.equals(SUBCOMMAND_UPDATE)) {
            create = false;
        } else {
            throw new IllegalArgumentException(MessageFormat.format(
                    "Invalid arguments (unknown subcommand): {0}",
                    Arrays.toString(args)));
        }

        Path cacheDirectory = new Path(args[1]);
        modelClass = getConf().getClassByName(args[2]);
        this.storage = new CacheStorage(getConf(), cacheDirectory.toUri());
        try {
            clearNext();
            if (create) {
                create();
            } else {
                update();
            }
            switchHead();
        } finally {
            storage.close();
        }
        return 0;
    }

    private void clearNext() throws IOException {
        LOG.info(MessageFormat.format("Cleaning cache output directory: {0}",
                getNextDirectory()));
        storage.getFileSystem().delete(getNextDirectory(), true);
    }

    private void update() throws IOException, InterruptedException {
        Job job = JobCompatibility.newJob(getConf());
        job.setJobName("TGC-UPDATE-" + storage.getPatchDirectory());

        List<StageInput> inputList = new ArrayList<StageInput>();
        inputList.add(new StageInput(
                storage.getHeadContents("*").toString(),
                TemporaryInputFormat.class,
                BaseMapper.class));
        inputList.add(new StageInput(
                storage.getPatchContents("*").toString(),
                TemporaryInputFormat.class,
                PatchMapper.class));
        StageInputDriver.set(job, inputList);
        job.setInputFormatClass(StageInputFormat.class);
        job.setMapperClass(StageInputMapper.class);
        job.setMapOutputKeyClass(PatchApplyKey.class);
        job.setMapOutputValueClass(modelClass);

        // combiner may have no effect in normal cases
        job.setReducerClass(PatchApplyReducer.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(modelClass);
        job.setPartitionerClass(PatchApplyKey.Partitioner.class);
        job.setSortComparatorClass(PatchApplyKey.SortComparator.class);
        job.setGroupingComparatorClass(PatchApplyKey.GroupComparator.class);

        TemporaryOutputFormat.setOutputPath(job, getNextDirectory());
        job.setOutputFormatClass(TemporaryOutputFormat.class);
        job.getConfiguration().setClass(
                "mapred.output.committer.class",
                LegacyBridgeOutputCommitter.class,
                org.apache.hadoop.mapred.OutputCommitter.class);

        LOG.info(MessageFormat.format("Applying patch: {0} / {1} -> {2}",
                storage.getPatchContents("*"),
                storage.getHeadContents("*"),
                getNextContents()));
        try {
            boolean succeed = job.waitForCompletion(true);
            LOG.info(MessageFormat.format("Applied patch: succeed={0}, {1} / {2} -> {3}",
                    succeed,
                    storage.getPatchContents("*"),
                    storage.getHeadContents("*"),
                    getNextContents()));
            if (succeed == false) {
                throw new IOException(MessageFormat.format("Failed to apply patch: {0} / {1} -> {2}",
                        storage.getPatchContents("*"),
                        storage.getHeadContents("*"),
                        getNextContents()));
            }
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }

        LOG.info(MessageFormat.format("Setting patched properties: {0} -> {1}",
                storage.getPatchProperties(),
                getNextDirectory()));
        FileUtil.copy(
                storage.getFileSystem(),
                storage.getPatchProperties(),
                storage.getFileSystem(),
                getNextProperties(),
                false,
                storage.getConfiguration());
    }

    private void create() throws InterruptedException, IOException {
        Job job = JobCompatibility.newJob(getConf());
        job.setJobName("TGC-CREATE-" + storage.getPatchDirectory());

        List<StageInput> inputList = new ArrayList<StageInput>();
        inputList.add(new StageInput(
                storage.getPatchContents("*").toString(),
                TemporaryInputFormat.class,
                DeleteMapper.class));
        StageInputDriver.set(job, inputList);
        job.setInputFormatClass(StageInputFormat.class);
        job.setMapperClass(StageInputMapper.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(modelClass);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(modelClass);

        TemporaryOutputFormat.setOutputPath(job, getNextDirectory());
        job.setOutputFormatClass(TemporaryOutputFormat.class);
        job.getConfiguration().setClass(
                "mapred.output.committer.class",
                LegacyBridgeOutputCommitter.class,
                org.apache.hadoop.mapred.OutputCommitter.class);

        job.setNumReduceTasks(0);

        LOG.info(MessageFormat.format("Applying patch: {0} / (empty) -> {2}",
                storage.getPatchContents("*"),
                storage.getHeadContents("*"),
                getNextContents()));
        try {
            boolean succeed = job.waitForCompletion(true);
            LOG.info(MessageFormat.format("Applied patch: succeed={0}, {1} / (empty) -> {3}",
                    succeed,
                    storage.getPatchContents("*"),
                    storage.getHeadContents("*"),
                    getNextContents()));
            if (succeed == false) {
                throw new IOException(MessageFormat.format("Failed to apply patch: {0} / (empty) -> {2}",
                        storage.getPatchContents("*"),
                        storage.getHeadContents("*"),
                        getNextContents()));
            }
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }

        LOG.info(MessageFormat.format("Setting patched properties: {0} -> {1}",
                storage.getPatchProperties(),
                getNextDirectory()));
        FileUtil.copy(
                storage.getFileSystem(),
                storage.getPatchProperties(),
                storage.getFileSystem(),
                getNextProperties(),
                false,
                storage.getConfiguration());
    }

    private void switchHead() throws IOException {
        boolean hasHead = storage.getFileSystem().exists(storage.getHeadDirectory());
        if (hasHead) {
            LOG.info(MessageFormat.format(
                    "Escaping previous cache: {0} -> {1}",
                    storage.getHeadDirectory(),
                    getEscapeDir()));
            storage.getFileSystem().delete(getEscapeDir(), true);
            storage.getFileSystem().rename(storage.getHeadDirectory(), getEscapeDir());
        }

        LOG.info(MessageFormat.format(
                "Switching patched as HEAD: {0} -> {1}",
                getNextDirectory(),
                storage.getHeadDirectory()));
        storage.getFileSystem().rename(getNextDirectory(), storage.getHeadDirectory());

        if (hasHead) {
            LOG.info(MessageFormat.format(
                    "Cleaning previous cache: {0}",
                    storage.getHeadDirectory()));
            storage.getFileSystem().delete(getEscapeDir(), true);
        }
    }

    private Path getNextDirectory() {
        return new Path(storage.getTempoaryDirectory(), NEXT_DIRECTORY_NAME);
    }

    private Path getNextProperties() {
        return new Path(getNextDirectory(), CacheStorage.META_FILE_NAME);
    }

    private Path getNextContents() {
        return new Path(getNextDirectory(), CacheStorage.CONTENT_FILE_GLOB);
    }

    private Path getEscapeDir() {
        return new Path(storage.getTempoaryDirectory(), ESCAPE_DIRECTORY_NAME);
    }
}
TOP

Related Classes of com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.