Package com.asakusafw.compiler.directio

Source Code of com.asakusafw.compiler.directio.DirectFileIoProcessorRunTest$Output

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.compiler.directio;

import static org.hamcrest.Matchers.*;
import static org.junit.Assert.*;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Scanner;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.junit.Rule;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;

import com.asakusafw.compiler.directio.testing.model.Line1;
import com.asakusafw.compiler.directio.testing.model.Line2;
import com.asakusafw.compiler.util.tester.CompilerTester;
import com.asakusafw.runtime.directio.DataFormat;
import com.asakusafw.utils.collections.Lists;
import com.asakusafw.vocabulary.directio.DirectFileInputDescription;
import com.asakusafw.vocabulary.directio.DirectFileOutputDescription;
import com.asakusafw.vocabulary.external.ImporterDescription.DataSize;
import com.asakusafw.vocabulary.flow.In;
import com.asakusafw.vocabulary.flow.Out;

/**
* Test for {@link DirectFileIoProcessor}.
*/
@RunWith(Parameterized.class)
public class DirectFileIoProcessorRunTest {

    /**
     * Compiler tester.
     */
    @Rule
    public CompilerTester tester = new CompilerTester();

    private final Class<? extends DataFormat<Text>> format;

    /**
     * Returns the parameters.
     * @return the parameters
     */
    @Parameters
    public static List<Object[]> data() {
        return Arrays.asList(new Object[][] {
                { LineFormat.class },
                { LineFileFormat.class },
        });
    }

    /**
     * Creates a new instance.
     * @param format the format.
     */
    public DirectFileIoProcessorRunTest(Class<? extends DataFormat<Text>> format) {
        this.format = format;
    }

    /**
     * simple.
     * @throws Exception if failed
     */
    @Test
    public void simple() throws Exception {
        put("input/input.txt", "1Hello", "2Hello", "3Hello");
        In<Line1> in = tester.input("in1", new Input(format, "input", "*"));
        Out<Line1> out = tester.output("out1", new Output(format, "output", "output.txt"));
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        List<String> list = get("output/output.txt");
        assertThat(list.size(), is(3));
        assertThat(list, hasItem("1Hello"));
        assertThat(list, hasItem("2Hello"));
        assertThat(list, hasItem("3Hello"));
    }

    /**
     * tiny input.
     * @throws Exception if failed
     */
    @Test
    public void tiny() throws Exception {
        put("input/input.txt", "1Hello", "2Hello", "3Hello");
        In<Line1> in = tester.input("in1",
                new Input(Line1.class, format, "input", "*", DataSize.TINY));
        Out<Line1> out = tester.output("out1", new Output(format, "output", "output.txt"));
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        List<String> list = get("output/output.txt");
        assertThat(list.size(), is(3));
        assertThat(list, hasItem("1Hello"));
        assertThat(list, hasItem("2Hello"));
        assertThat(list, hasItem("3Hello"));
    }

    /**
     * multiple input.
     * @throws Exception if failed
     */
    @Test
    public void input_multi() throws Exception {
        put("input/input-1.txt", "1Hello");
        put("input/input-2.txt", "2Hello");
        put("input/input-3.txt", "3Hello");
        put("input/other.txt", "4Hello");
        In<Line1> in = tester.input("in1", new Input(format, "input", "input-*"));
        Out<Line1> out = tester.output("out1", new Output(format, "output", "output.txt"));
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        List<String> list = get("output/output.txt");
        assertThat(list.size(), is(3));
        assertThat(list, hasItem("1Hello"));
        assertThat(list, hasItem("2Hello"));
        assertThat(list, hasItem("3Hello"));
    }

    /**
     * ordering.
     * @throws Exception if failed
     */
    @Test
    public void order() throws Exception {
        put("input/input.txt", "1Hello", "2Hello", "3Hello");
        In<Line1> in = tester.input("in1", new Input(format, "input", "*"));
        Out<Line1> out = tester.output("out1", new Output(format, "output", "output.txt", "-value"));
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        assertThat(get("output/output.txt"), is(list("3Hello", "2Hello", "1Hello")));
    }

    /**
     * file partitioning.
     * @throws Exception if failed
     */
    @Test
    public void partition() throws Exception {
        put("input/input.txt", "a1", "b1", "b2", "c1", "c2", "c3");
        In<Line1> in = tester.input("in1", new Input(format, "input", "*"));
        Out<Line1> out = tester.output("out1", new Output(format, "output", "{first}-output.txt", "+value"));
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        assertThat(get("output/a-output.txt"), is(list("a1")));
        assertThat(get("output/b-output.txt"), is(list("b1", "b2")));
        assertThat(get("output/c-output.txt"), is(list("c1", "c2", "c3")));
    }

    /**
     * file partitioning by random.
     * @throws Exception if failed
     */
    @Test
    public void random() throws Exception {
        List<String> lines = Lists.create();
        for (int i = 0; i < 1000; i++) {
            lines.add(String.format("%03d", i));
        }
        put("input/input.txt", lines.toArray(new String[lines.size()]));
        In<Line1> in = tester.input("in1", new Input(format, "input", "*"));
        Out<Line1> out = tester.output("out1", new Output(format, "output", "output-[1..4].txt", "+value"));
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        List<String> o1 = get("output/output-1.txt");
        List<String> o2 = get("output/output-2.txt");
        List<String> o3 = get("output/output-3.txt");
        List<String> o4 = get("output/output-4.txt");

        // probably ok
        assertThat(o1.size(), is(greaterThan(0)));
        assertThat(o2.size(), is(greaterThan(0)));
        assertThat(o3.size(), is(greaterThan(0)));
        assertThat(o4.size(), is(greaterThan(0)));

        List<String> results = Lists.create();
        results.addAll(o1);
        results.addAll(o2);
        results.addAll(o3);
        results.addAll(o4);

        Collections.sort(results);
        assertThat(results, is(lines));
    }

    /**
     * wildcard.
     * @throws Exception if failed
     */
    @Test
    public void wildcard() throws Exception {
        put("input/input.txt", "1Hello", "2Hello", "3Hello");
        In<Line1> in = tester.input("in1", new Input(format, "input", "*"));
        Out<Line1> out = tester.output("out1", new Output(format, "output", "output-*.txt"));
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        List<String> list = get("output/output-*.txt");
        assertThat(list.size(), is(3));
        assertThat(list, hasItem("1Hello"));
        assertThat(list, hasItem("2Hello"));
        assertThat(list, hasItem("3Hello"));
    }

    /**
     * use variables.
     * @throws Exception if failed
     */
    @Test
    public void variable() throws Exception {
        put("input/input-1.txt", "1Hello");
        put("input/input-2.txt", "2Hello");
        put("input/input-3.txt", "3Hello");
        put("input/other.txt", "4Hello");
        In<Line1> in = tester.input("in1", new Input(format, "${input-dir}", "${input-pattern}"));
        Out<Line1> out = tester.output("out1", new Output(format, "${output-dir}", "${output-pattern}"));

        tester.variables().defineVariable("input-dir", "input");
        tester.variables().defineVariable("input-pattern", "input-*");
        tester.variables().defineVariable("output-dir", "output");
        tester.variables().defineVariable("output-pattern", "output.txt");
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        List<String> list = get("output/output.txt");
        assertThat(list.size(), is(3));
        assertThat(list, hasItem("1Hello"));
        assertThat(list, hasItem("2Hello"));
        assertThat(list, hasItem("3Hello"));
    }

    /**
     * use variables.
     * @throws Exception if failed
     */
    @Test
    public void variable_wildcard() throws Exception {
        put("input/input-1.txt", "1Hello");
        put("input/input-2.txt", "2Hello");
        put("input/input-3.txt", "3Hello");
        put("input/other.txt", "4Hello");
        In<Line1> in = tester.input("in1", new Input(format, "${input-dir}", "${input-pattern}"));
        Out<Line1> out = tester.output("out1", new Output(format, "${output-dir}", "${output-pattern}-*.txt"));

        tester.variables().defineVariable("input-dir", "input");
        tester.variables().defineVariable("input-pattern", "input-*");
        tester.variables().defineVariable("output-dir", "output");
        tester.variables().defineVariable("output-pattern", "output");
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        List<String> list = get("output/output-*.txt");
        assertThat(list.size(), is(3));
        assertThat(list, hasItem("1Hello"));
        assertThat(list, hasItem("2Hello"));
        assertThat(list, hasItem("3Hello"));
    }

    /**
     * use variables in base path.
     * @throws Exception if failed
     */
    @Test
    public void variable_basepath() throws Exception {
        useFrameworkConf("split-io-conf.xml");
        put("input-split/input.txt", "1Hello");
        put("other-split/other.txt", "2Hello");
        In<Line1> in = tester.input("in1", new Input(format, "${input}", "input.txt"));
        Out<Line1> out = tester.output("out1", new Output(format, "${output}", "output.txt"));

        tester.variables().defineVariable("input", "input");
        tester.variables().defineVariable("output", "output");
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        List<String> list = get("output-split/output.txt");
        assertThat(list.size(), is(1));
        assertThat(list, hasItem("1Hello"));
    }

    /**
     * delete file.
     * @throws Exception if failed
     */
    @Test
    public void delete() throws Exception {
        put("input/input.txt", "1Hello", "2Hello", "3Hello");
        put("output/deleted.txt", "4Hello");
        put("output/keep.txt", "5Hello");
        put("output/deleted-1.txt", "6Hello");
        put("output/deleted-2.txt", "7Hello");

        In<Line1> in = tester.input("in1", new Input(format, "input", "*"));
        Out<Line1> out = tester.output("out1", new Output(format, "output", "output.txt")
            .delete("deleted.txt")
            .delete("deleted-*.txt"));
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        List<String> list = get("output/*.txt");
        assertThat(list.size(), is(4));
        assertThat(list, hasItem("1Hello"));
        assertThat(list, hasItem("2Hello"));
        assertThat(list, hasItem("3Hello"));
        assertThat(list, hasItem("5Hello"));
    }

    /**
     * delete file.
     * @throws Exception if failed
     */
    @Test
    public void delete_variable() throws Exception {
        put("input/input.txt", "1Hello", "2Hello", "3Hello");
        put("output/keep.txt", "4Hello");
        put("output/deleted-1.txt", "5Hello");
        put("output/deleted-2.txt", "6Hello");

        In<Line1> in = tester.input("in1", new Input(format, "input", "*"));
        Out<Line1> out = tester.output("out1", new Output(format, "output", "output.txt")
            .delete("${pattern}-*.txt"));

        tester.variables().defineVariable("pattern", "deleted");
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        List<String> list = get("output/*.txt");
        assertThat(list.size(), is(4));
        assertThat(list, hasItem("1Hello"));
        assertThat(list, hasItem("2Hello"));
        assertThat(list, hasItem("3Hello"));
        assertThat(list, hasItem("4Hello"));
    }

    /**
     * empty input.
     * @throws Exception if failed
     */
    @Test
    public void input_empty() throws Exception {
        put("input/input-1.txt");
        put("input/input-2.txt");
        put("input/input-3.txt");
        put("input/other.txt", "Hello");
        In<Line1> in = tester.input("in1", new Input(format, "input", "input-*"));
        Out<Line1> out = tester.output("out1", new Output(format, "output-1", "output.txt"));
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        List<Path> list = find("output/output-*.txt");
        assertThat(list.toString(), list.size(), is(0));
    }

    /**
     * empty input.
     * @throws Exception if failed
     */
    @Test
    public void input_empty_noreduce() throws Exception {
        put("input/input-1.txt");
        put("input/input-2.txt");
        put("input/input-3.txt");
        put("input/other.txt", "Hello");
        In<Line1> in = tester.input("in1", new Input(format, "input", "input-*"));
        Out<Line1> out = tester.output("out1", new Output(format, "output", "output.txt"));
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        List<Path> list = find("output/output-*.txt");
        assertThat(list.toString(), list.size(), is(0));
    }

    /**
     * optional inputs.
     * @throws Exception if failed
     */
    @Test
    public void optional() throws Exception {
        put("input/input.txt", "1Hello", "2Hello", "3Hello");
        In<Line1> in = tester.input("in1", new Input(format, "input", "*", true));
        Out<Line1> out = tester.output("out1", new Output(format, "output", "output.txt"));
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));

        List<String> list = get("output/output.txt");
        assertThat(list.size(), is(3));
        assertThat(list, hasItem("1Hello"));
        assertThat(list, hasItem("2Hello"));
        assertThat(list, hasItem("3Hello"));
    }

    /**
     * dual in/out.
     * @throws Exception if failed
     */
    @Test
    public void dual_io() throws Exception {
        put("input/input-1.txt", "Hello1");
        put("input/input-2.txt", "Hello2");
        In<Line1> in1 = tester.input("in1",
                new Input(Line1.class, format, "input", "input-1.txt", DataSize.LARGE));
        In<Line2> in2 = tester.input("in2",
                new Input(Line2.class, format, "input", "input-2.txt", DataSize.TINY));
        Out<Line1> out1 = tester.output("out1",
                new Output(Line1.class, format, "output-1", "output.txt"));
        Out<Line2> out2 = tester.output("out2",
                new Output(Line2.class, format, "output-2", "output.txt"));
        assertThat(tester.runFlow(new DualIdentityFlow<Line1, Line2>(in1, in2, out1, out2)), is(true));

        assertThat(get("output-1/output.txt"), is(list("Hello1")));
        assertThat(get("output-2/output.txt"), is(list("Hello2")));
    }

    /**
     * dual in/out.
     * @throws Exception if failed
     */
    @Test
    public void dual_io_noreduce() throws Exception {
        put("input/input-1.txt", "Hello1");
        put("input/input-2.txt", "Hello2");
        In<Line1> in1 = tester.input("in1",
                new Input(Line1.class, format, "input", "input-1.txt", DataSize.LARGE));
        In<Line2> in2 = tester.input("in2",
                new Input(Line2.class, format, "input", "input-2.txt", DataSize.TINY));
        Out<Line1> out1 = tester.output("out1",
                new Output(Line1.class, format, "output-1", "output-*.txt"));
        Out<Line2> out2 = tester.output("out2",
                new Output(Line2.class, format, "output-2", "output-*.txt"));
        assertThat(tester.runFlow(new DualIdentityFlow<Line1, Line2>(in1, in2, out1, out2)), is(true));

        assertThat(get("output-1/output-*.txt"), is(list("Hello1")));
        assertThat(get("output-2/output-*.txt"), is(list("Hello2")));
    }

    /**
     * dual in/out.
     * @throws Exception if failed
     */
    @Test
    public void dual_io_mixed() throws Exception {
        put("input/input-1.txt", "Hello1");
        put("input/input-2.txt", "Hello2");
        In<Line1> in1 = tester.input("in1",
                new Input(Line1.class, format, "input", "input-1.txt", DataSize.LARGE));
        In<Line2> in2 = tester.input("in2",
                new Input(Line2.class, format, "input", "input-2.txt", DataSize.TINY));
        Out<Line1> out1 = tester.output("out1",
                new Output(Line1.class, format, "output-1", "output-1.txt"));
        Out<Line2> out2 = tester.output("out2",
                new Output(Line2.class, format, "output-2", "output-*.txt"));
        assertThat(tester.runFlow(new DualIdentityFlow<Line1, Line2>(in1, in2, out1, out2)), is(true));

        assertThat(get("output-1/output-*.txt"), is(list("Hello1")));
        assertThat(get("output-2/output-*.txt"), is(list("Hello2")));
    }

    /**
     * input is missing.
     * @throws Exception if failed
     */
    @Test
    public void input_missing() throws Exception {
        In<Line1> in = tester.input("in1", new Input(format, "input", "*"));
        Out<Line1> out = tester.output("out1", new Output(format, "output", "output.txt"));
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(false));
    }

    /**
     * input is missing, but its input is optional.
     * @throws Exception if failed
     */
    @Test
    public void input_missing_optional() throws Exception {
        In<Line1> in = tester.input("in1", new Input(format, "input", "*", true));
        Out<Line1> out = tester.output("out1", new Output(format, "output", "output.txt"));
        assertThat(tester.runFlow(new IdentityFlow<Line1>(in, out)), is(true));
    }

    private List<String> list(String... values) {
        return Arrays.asList(values);
    }

    private Path getPath(String target) {
        return new Path("target/testing/directio-fs", target);
    }

    private List<Path> find(String target) throws IOException {
        FileSystem fs = FileSystem.get(tester.configuration());
        FileStatus[] list = fs.globStatus(getPath(target));
        if (list == null) {
            return Collections.emptyList();
        }
        List<Path> results = Lists.create();
        for (FileStatus file : list) {
            results.add(file.getPath());
        }
        return results;
    }

    private List<String> get(String target) throws IOException {
        FileSystem fs = FileSystem.get(tester.configuration());
        List<String> results = Lists.create();
        for (Path path : find(target)) {
            InputStream input = fs.open(path);
            try {
                Scanner s = new Scanner(new InputStreamReader(input, "UTF-8"));
                while (s.hasNextLine()) {
                    results.add(s.nextLine());
                }
                s.close();
            } finally {
                input.close();
            }
        }
        return results;
    }

    private void put(String target, String... contents) throws IOException {
        FileSystem fs = FileSystem.get(tester.configuration());
        OutputStream output = fs.create(getPath(target), true);
        try {
            PrintWriter w = new PrintWriter(new OutputStreamWriter(output, "UTF-8"));
            for (String line : contents) {
                w.println(line);
            }
            w.close();
        } finally {
            output.close();
        }
    }

    private void useFrameworkConf(String name) {
        File file = tester.framework().getCoreConfigurationFile();
        if (file == null) {
            throw new AssertionError("Missing original framework conf");
        }
        InputStream input = getClass().getResourceAsStream(name);
        assertThat(name, input, is(notNullValue()));
        try {
            try {
                OutputStream output = new FileOutputStream(file);
                try {
                    byte[] buf = new byte[256];
                    while (true) {
                        int read = input.read(buf);
                        if (read < 0) {
                            break;
                        }
                        output.write(buf, 0, read);
                    }
                } finally {
                    output.close();
                }
            } finally {
                input.close();
            }
        } catch (IOException e) {
            throw new AssertionError(e);
        }
    }

    private static class Input extends DirectFileInputDescription {

        private final Class<?> modelType;
        private final Class<? extends DataFormat<?>> format;
        private final String basePath;
        private final String resourcePattern;
        private final Boolean optional;
        private final DataSize dataSize;

        Input(
                Class<?> modelType,
                Class<? extends DataFormat<?>> format,
                String basePath,
                String resourcePattern,
                DataSize dataSize) {
            this(modelType, format, basePath, resourcePattern, null, dataSize);
        }

        Input(
                Class<? extends DataFormat<?>> format,
                String basePath,
                String resourcePattern) {
            this(Line1.class, format, basePath, resourcePattern, null, null);
        }

        Input(
                Class<? extends DataFormat<?>> format,
                String basePath,
                String resourcePattern,
                boolean optional) {
            this(Line1.class, format, basePath, resourcePattern, optional, null);
        }

        Input(
                Class<?> modelType,
                Class<? extends DataFormat<?>> format,
                String basePath,
                String resourcePattern,
                Boolean optional,
                DataSize dataSize) {
            this.modelType = modelType;
            this.basePath = basePath;
            this.resourcePattern = resourcePattern;
            this.format = format;
            this.optional = optional;
            this.dataSize = dataSize;
        }

        @Override
        public Class<?> getModelType() {
            return modelType;
        }

        @Override
        public Class<? extends DataFormat<?>> getFormat() {
            return format;
        }

        @Override
        public String getBasePath() {
            return basePath;
        }

        @Override
        public String getResourcePattern() {
            return resourcePattern;
        }

        @Override
        public boolean isOptional() {
            if (optional != null) {
                return optional;
            }
            return super.isOptional();
        }

        @Override
        public DataSize getDataSize() {
            if (dataSize != null) {
                return dataSize;
            }
            return super.getDataSize();
        }
    }

    private static class Output extends DirectFileOutputDescription {

        private final Class<?> modelType;
        private final Class<? extends DataFormat<?>> format;
        private final String basePath;
        private final String resourcePattern;
        private final String[] order;
        private final List<String> deletes = Lists.create();

        Output(
                Class<?> modelType,
                Class<? extends DataFormat<?>> format,
                String basePath,
                String resourcePattern,
                String... order) {
            this.modelType = modelType;
            this.format = format;
            this.basePath = basePath;
            this.resourcePattern = resourcePattern;
            this.order = order;
        }

        Output(
                Class<? extends DataFormat<?>> format,
                String basePath,
                String resourcePattern,
                String... order) {
            this.modelType = Line1.class;
            this.format = format;
            this.basePath = basePath;
            this.resourcePattern = resourcePattern;
            this.order = order;
        }

        Output delete(String pattern) {
            deletes.add(pattern);
            return this;
        }

        @Override
        public Class<?> getModelType() {
            return modelType;
        }

        @Override
        public Class<? extends DataFormat<?>> getFormat() {
            return format;
        }

        @Override
        public String getBasePath() {
            return basePath;
        }

        @Override
        public String getResourcePattern() {
            return resourcePattern;
        }

        @Override
        public List<String> getOrder() {
            return Arrays.asList(order);
        }

        @Override
        public List<String> getDeletePatterns() {
            return deletes;
        }
    }
}
TOP

Related Classes of com.asakusafw.compiler.directio.DirectFileIoProcessorRunTest$Output

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.