Package org.lilyproject.mapreduce.test

Source Code of org.lilyproject.mapreduce.test.MapReduceTest

/*
* Copyright 2012 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lilyproject.mapreduce.test;

import java.io.File;
import java.io.IOException;

import com.google.common.collect.Lists;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.lilyproject.hadooptestfw.TestHelper;
import org.lilyproject.lilyservertestfw.LilyProxy;
import org.lilyproject.mapreduce.LilyMapReduceUtil;
import org.lilyproject.mapreduce.testjobs.Test1Mapper;
import org.lilyproject.repository.api.FieldType;
import org.lilyproject.repository.api.IdGenerator;
import org.lilyproject.repository.api.LRepository;
import org.lilyproject.repository.api.LTable;
import org.lilyproject.repository.api.QName;
import org.lilyproject.repository.api.RecordScan;
import org.lilyproject.repository.api.RecordType;
import org.lilyproject.repository.api.RepositoryManager;
import org.lilyproject.repository.api.Scope;
import org.lilyproject.repository.api.TypeManager;
import org.lilyproject.repository.model.api.RepositoryDefinition;
import org.lilyproject.repository.model.impl.RepositoryModelImpl;
import org.lilyproject.util.test.TestHomeUtil;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.lilyproject.repository.model.api.RepositoryDefinition.RepositoryLifecycleState;

public class MapReduceTest {
    private static LilyProxy lilyProxy;
    private static File tmpDir;

    @BeforeClass
    public static void setUpBeforeClass() throws Exception {
        TestHelper.setupLogging();
        lilyProxy = new LilyProxy();

        //
        // Make multiple record table splits, so that our MR job will have multiple map tasks
        //

        if (lilyProxy.getMode() == LilyProxy.Mode.CONNECT || lilyProxy.getMode() == LilyProxy.Mode.HADOOP_CONNECT) {
            // The record table will likely already exist and not be recreated, hence we won't be able to change
            // the number of regions. Therefore, drop the table.
            Configuration conf = HBaseConfiguration.create();
            conf.set("hbase.zookeeper.quorum", "localhost");
            HBaseAdmin hbaseAdmin = new HBaseAdmin(conf);
            if (hbaseAdmin.tableExists("record")) {
                hbaseAdmin.disableTable("record");
                hbaseAdmin.deleteTable("record");
            }
            HConnectionManager.deleteConnection(hbaseAdmin.getConfiguration(), true);
        }

        // Temp dir where we will create conf dir
        tmpDir = TestHomeUtil.createTestHome("lily-mapreduce-test-");

        File customConfDir = setupConfDirectory(tmpDir);
        String oldCustomConfDir = setProperty("lily.conf.customdir", customConfDir.getAbsolutePath());
        String oldRestoreTemplate = setProperty("lily.lilyproxy.restoretemplatedir", "false");

        try {
            lilyProxy.start();
        } finally {
            // Make sure the properties won't be used by later-running tests
            setProperty("lily.conf.customdir", oldCustomConfDir);
            setProperty("lily.lilyproxy.restoretemplatedir", oldRestoreTemplate);
        }
    }

    private static String setProperty(String name, String value) {
        String oldValue = System.getProperty(name);
        if (value == null) {
            System.getProperties().remove(name);
        } else {
            System.setProperty(name, value);
        }
        return oldValue;
    }

    @AfterClass
    public static void tearDownAfterClass() throws Exception {
        if (lilyProxy != null) {
            lilyProxy.stop();
        }
        TestHomeUtil.cleanupTestHome(tmpDir);

        if (lilyProxy.getMode() == LilyProxy.Mode.CONNECT || lilyProxy.getMode() == LilyProxy.Mode.HADOOP_CONNECT) {
            // We're in connect mode, drop the record table again so that the remainder of the tests
            // don't have the overhead of the extra splits
            Configuration conf = HBaseConfiguration.create();
            conf.set("hbase.zookeeper.quorum", "localhost");
            HBaseAdmin hbaseAdmin = new HBaseAdmin(conf);
            if (hbaseAdmin.tableExists("record")) {
                hbaseAdmin.disableTable("record");
                hbaseAdmin.deleteTable("record");
            }
            HConnectionManager.deleteConnection(hbaseAdmin.getConfiguration(), true);
        }
    }

    private static File setupConfDirectory(File tmpDir) throws Exception {
        File confDir = new File(tmpDir, "conf");

        File generalConfDir = new File(confDir, "general");
        FileUtils.forceMkdir(generalConfDir);

        // Write configuration to activate the decorator
        String tablesXml = "<tables xmlns:conf='http://lilyproject.org/configuration' conf:inherit='shallow'>" +
                "<table name='record'><splits><regionCount>5</regionCount>" +
                "<splitKeys>\\x00020,\\x00040,\\x00060,\\x00080</splitKeys></splits></table>" +
                "</tables>";

        FileUtils.writeStringToFile(new File(generalConfDir, "tables.xml"), tablesXml, "UTF-8");

        return confDir;
    }

    @Test
    public void testOne() throws Exception {
        RepositoryManager repoMgr = lilyProxy.getLilyServerProxy().getClient();

        //
        // Create some content in the default table of the default repository
        //
        LRepository repository = repoMgr.getDefaultRepository();
        TypeManager typeManager = repository.getTypeManager();
        IdGenerator idGenerator = repository.getIdGenerator();
        LTable table = repository.getDefaultTable();

        FieldType ft1 = typeManager.createFieldType("STRING", new QName("test", "field1"), Scope.NON_VERSIONED);

        RecordType rt1 = typeManager.recordTypeBuilder()
                .defaultNamespace("test")
                .name("rt1")
                .fieldEntry().use(ft1).add()
                .create();

        for (int i = 0; i < 100; i++) {
            table.recordBuilder()
                    .id(String.format("%1$03d", i))
                    .recordType(rt1.getName())
                    .field(ft1.getName(), "foo bar bar")
                    .create();
        }

        //
        // Also create some content in another repository with two tables
        //
        RepositoryModelImpl repositoryModel = new RepositoryModelImpl(lilyProxy.getLilyServerProxy().getZooKeeper());
        String repositoryName = "otherrepo";
        repositoryModel.create(repositoryName);
        assertTrue(repositoryModel.waitUntilRepositoryInState(repositoryName, RepositoryLifecycleState.ACTIVE, 60000L));
        repositoryModel.close();

        LRepository repository2 = repoMgr.getRepository(repositoryName);
        repository2.getTableManager().createTable("foobar");
        LTable table2 = repository2.getTable("foobar");
        LTable table3 = repository2.getDefaultTable();

        for (int i = 0; i < 50; i++) {
            table2.recordBuilder()
                    .id(String.valueOf(i))
                    .recordType(rt1.getName())
                    .field(ft1.getName(), "foo bar bar")
                    .create();
        }

        for (int i = 0; i < 20; i++) {
            table3.recordBuilder()
                    .id(String.valueOf(i))
                    .recordType(rt1.getName())
                    .field(ft1.getName(), "foo bar bar")
                    .create();
        }

        //
        // Launch MapReduce job on default repository
        //
        {
            Configuration config = HBaseConfiguration.create();

            config.set("mapred.job.tracker", "localhost:8021");
            config.set("fs.defaultFS", "hdfs://localhost:8020");

            Job job = new Job(config, "Test1");
            job.setJarByClass(Test1Mapper.class);

            job.setMapperClass(Test1Mapper.class);

            job.setOutputFormatClass(NullOutputFormat.class);

            job.setNumReduceTasks(0);

            LilyMapReduceUtil.initMapperJob(null, "localhost", repository, job);

            boolean b = job.waitForCompletion(true);
            if (!b) {
                throw new IOException("error with job!");
            }

            // Verify some counters
            assertEquals("Number of launched map tasks", 5L, getTotalLaunchedMaps(job));
            assertEquals("Number of input records", 100L, getTotalInputRecords(job));
        }

        //
        // Launch a job with a custom scan on the default repository
        //
        {
            Configuration config = HBaseConfiguration.create();

            config.set("mapred.job.tracker", "localhost:8021");
            config.set("fs.defaultFS", "hdfs://localhost:8020");

            Job job = new Job(config, "Test1");
            job.setJarByClass(Test1Mapper.class);

            job.setMapperClass(Test1Mapper.class);

            job.setOutputFormatClass(NullOutputFormat.class);

            job.setNumReduceTasks(0);

            RecordScan scan = new RecordScan();
            scan.setStartRecordId(idGenerator.newRecordId(String.format("%1$03d", 15)));
            scan.setStopRecordId(idGenerator.newRecordId(String.format("%1$03d", 25)));

            LilyMapReduceUtil.initMapperJob(scan, "localhost", repository, job);

            boolean b = job.waitForCompletion(true);
            if (!b) {
                throw new IOException("error with job!");
            }

            // expect 2 map tasks: our scan crossed the 020 border
            assertEquals("Number of launched map tasks", 2L, getTotalLaunchedMaps(job));
            assertEquals("Number of input records", 10L, getTotalInputRecords(job));

            /*
            for (CounterGroup cgroup: job.getCounters()) {
                for (Counter counter : cgroup) {
                    System.out.println(cgroup.getName() + " -> " + counter.getName() + " = " + counter.getValue());
                }
            }
            */
        }

        //
        // Launch MapReduce job on the custom repository - over all tables
        //
        {
            Configuration config = HBaseConfiguration.create();

            config.set("mapred.job.tracker", "localhost:8021");
            config.set("fs.defaultFS", "hdfs://localhost:8020");

            Job job = new Job(config, "Test1");
            job.setJarByClass(Test1Mapper.class);

            job.setMapperClass(Test1Mapper.class);

            job.setOutputFormatClass(NullOutputFormat.class);

            job.setNumReduceTasks(0);

            LilyMapReduceUtil.initMapperJob(null, "localhost", repository2, job);

            boolean b = job.waitForCompletion(true);
            if (!b) {
                throw new IOException("error with job!");
            }

            // Verify some counters
            assertEquals("Number of input records", 70L, getTotalInputRecords(job));
        }

        //
        // Launch MapReduce job on the custom repository - over one specific table
        //
        {
            Configuration config = HBaseConfiguration.create();

            config.set("mapred.job.tracker", "localhost:8021");
            config.set("fs.defaultFS", "hdfs://localhost:8020");

            Job job = new Job(config, "Test1");
            job.setJarByClass(Test1Mapper.class);

            job.setMapperClass(Test1Mapper.class);

            job.setOutputFormatClass(NullOutputFormat.class);

            job.setNumReduceTasks(0);

            LilyMapReduceUtil.initMapperJob(null, "localhost", repository2, job, Lists.newArrayList("foobar"));

            boolean b = job.waitForCompletion(true);
            if (!b) {
                throw new IOException("error with job!");
            }

            // Verify some counters
            assertEquals("Number of input records", 50L, getTotalInputRecords(job));
        }
    }

    private long getTotalLaunchedMaps(Job job) throws IOException {
        return job.getCounters().findCounter("org.apache.hadoop.mapreduce.JobCounter", "TOTAL_LAUNCHED_MAPS").getValue();
    }

    private long getTotalInputRecords(Job job) throws IOException {
        return job.getCounters().findCounter("org.apache.hadoop.mapreduce.TaskCounter", "MAP_INPUT_RECORDS").getValue();
    }
}
TOP

Related Classes of org.lilyproject.mapreduce.test.MapReduceTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.