/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.dfs.MiniDFSCluster;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.hbase.HBaseAdmin;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HScannerInterface;
import org.apache.hadoop.hbase.HStoreKey;
import org.apache.hadoop.hbase.HTable;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.MultiRegionTable;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapred.TableMap;
import org.apache.hadoop.hbase.mapred.TableOutputCollector;
import org.apache.hadoop.hbase.mapred.TableReduce;
import org.apache.hadoop.hbase.mapred.IdentityTableReduce;
/**
* Test Map/Reduce job over HBase tables
*/
public class TestTableMapReduce extends MultiRegionTable {
@SuppressWarnings("hiding")
private static final Log LOG =
LogFactory.getLog(TestTableMapReduce.class.getName());
static final String SINGLE_REGION_TABLE_NAME = "srtest";
static final String MULTI_REGION_TABLE_NAME = "mrtest";
static final String INPUT_COLUMN = "contents:";
static final Text TEXT_INPUT_COLUMN = new Text(INPUT_COLUMN);
static final String OUTPUT_COLUMN = "text:";
static final Text TEXT_OUTPUT_COLUMN = new Text(OUTPUT_COLUMN);
private MiniDFSCluster dfsCluster = null;
private FileSystem fs;
private Path dir;
private MiniHBaseCluster hCluster = null;
private static byte[][] values = null;
static {
try {
values = new byte[][] {
"0123".getBytes(HConstants.UTF8_ENCODING),
"abcd".getBytes(HConstants.UTF8_ENCODING),
"wxyz".getBytes(HConstants.UTF8_ENCODING),
"6789".getBytes(HConstants.UTF8_ENCODING)
};
} catch (UnsupportedEncodingException e) {
fail();
}
}
/** constructor */
public TestTableMapReduce() {
super();
// Make lease timeout longer, lease checks less frequent
conf.setInt("hbase.master.lease.period", 10 * 1000);
conf.setInt("hbase.master.lease.thread.wakefrequency", 5 * 1000);
}
/**
* {@inheritDoc}
*/
@Override
public void setUp() throws Exception {
super.setUp();
// This size is picked so the table is split into two
// after addContent in testMultiRegionTableMapReduce.
conf.setLong("hbase.hregion.max.filesize", 256 * 1024);
dfsCluster = new MiniDFSCluster(conf, 1, true, (String[])null);
try {
fs = dfsCluster.getFileSystem();
dir = new Path("/hbase");
fs.mkdirs(dir);
// Start up HBase cluster
hCluster = new MiniHBaseCluster(conf, 1, dfsCluster);
LOG.info("Master is at " + this.conf.get(HConstants.MASTER_ADDRESS));
} catch (Exception e) {
if (dfsCluster != null) {
dfsCluster.shutdown();
dfsCluster = null;
}
throw e;
}
}
/**
* {@inheritDoc}
*/
@Override
public void tearDown() throws Exception {
super.tearDown();
if(hCluster != null) {
hCluster.shutdown();
}
if (dfsCluster != null) {
dfsCluster.shutdown();
}
if (fs != null) {
try {
fs.close();
} catch (IOException e) {
LOG.info("During tear down got a " + e.getMessage());
}
}
}
/**
* Pass the given key and processed record reduce
*/
public static class ProcessContentsMapper extends TableMap {
/** constructor */
public ProcessContentsMapper() {
super();
}
/**
* Pass the key, and reversed value to reduce
*
* @see org.apache.hadoop.hbase.mapred.TableMap#map(org.apache.hadoop.hbase.HStoreKey, org.apache.hadoop.io.MapWritable, org.apache.hadoop.hbase.mapred.TableOutputCollector, org.apache.hadoop.mapred.Reporter)
*/
@SuppressWarnings("unchecked")
@Override
public void map(HStoreKey key, MapWritable value,
TableOutputCollector output,
@SuppressWarnings("unused") Reporter reporter) throws IOException {
Text tKey = key.getRow();
if(value.size() != 1) {
throw new IOException("There should only be one input column");
}
Text[] keys = value.keySet().toArray(new Text[value.size()]);
if(!keys[0].equals(TEXT_INPUT_COLUMN)) {
throw new IOException("Wrong input column. Expected: " + INPUT_COLUMN
+ " but got: " + keys[0]);
}
// Get the original value and reverse it
String originalValue =
new String(((ImmutableBytesWritable)value.get(keys[0])).get(),
HConstants.UTF8_ENCODING);
StringBuilder newValue = new StringBuilder();
for(int i = originalValue.length() - 1; i >= 0; i--) {
newValue.append(originalValue.charAt(i));
}
// Now set the value to be collected
MapWritable outval = new MapWritable();
outval.put(TEXT_OUTPUT_COLUMN, new ImmutableBytesWritable(
newValue.toString().getBytes(HConstants.UTF8_ENCODING)));
output.collect(tKey, outval);
}
}
/**
* Test hbase mapreduce jobs against single region and multi-region tables.
* @throws IOException
*/
public void testTableMapReduce() throws IOException {
localTestSingleRegionTable();
localTestMultiRegionTable();
}
/*
* Test against a single region.
* @throws IOException
*/
private void localTestSingleRegionTable() throws IOException {
HTableDescriptor desc = new HTableDescriptor(SINGLE_REGION_TABLE_NAME);
desc.addFamily(new HColumnDescriptor(INPUT_COLUMN));
desc.addFamily(new HColumnDescriptor(OUTPUT_COLUMN));
// Create a table.
HBaseAdmin admin = new HBaseAdmin(this.conf);
admin.createTable(desc);
// insert some data into the test table
HTable table = new HTable(conf, new Text(SINGLE_REGION_TABLE_NAME));
for(int i = 0; i < values.length; i++) {
long lockid = table.startUpdate(new Text("row_"
+ String.format("%1$05d", i)));
try {
table.put(lockid, TEXT_INPUT_COLUMN, values[i]);
table.commit(lockid, System.currentTimeMillis());
lockid = -1;
} finally {
if (lockid != -1)
table.abort(lockid);
}
}
LOG.info("Print table contents before map/reduce");
scanTable(conf, SINGLE_REGION_TABLE_NAME);
@SuppressWarnings("deprecation")
MiniMRCluster mrCluster = new MiniMRCluster(2, fs.getUri().toString(), 1);
try {
JobConf jobConf = new JobConf(conf, TestTableMapReduce.class);
jobConf.setJobName("process column contents");
jobConf.setNumMapTasks(1);
jobConf.setNumReduceTasks(1);
TableMap.initJob(SINGLE_REGION_TABLE_NAME, INPUT_COLUMN,
ProcessContentsMapper.class, jobConf);
TableReduce.initJob(SINGLE_REGION_TABLE_NAME,
IdentityTableReduce.class, jobConf);
JobClient.runJob(jobConf);
} finally {
mrCluster.shutdown();
}
LOG.info("Print table contents after map/reduce");
scanTable(conf, SINGLE_REGION_TABLE_NAME);
// verify map-reduce results
verify(conf, SINGLE_REGION_TABLE_NAME);
}
/*
* Test against multiple regions.
* @throws IOException
*/
private void localTestMultiRegionTable() throws IOException {
HTableDescriptor desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
desc.addFamily(new HColumnDescriptor(INPUT_COLUMN));
desc.addFamily(new HColumnDescriptor(OUTPUT_COLUMN));
// Create a table.
HBaseAdmin admin = new HBaseAdmin(this.conf);
admin.createTable(desc);
// Populate a table into multiple regions
MultiRegionTable.makeMultiRegionTable(conf, hCluster, fs,
MULTI_REGION_TABLE_NAME, INPUT_COLUMN);
// Verify table indeed has multiple regions
HTable table = new HTable(conf, new Text(MULTI_REGION_TABLE_NAME));
Text[] startKeys = table.getStartKeys();
assertTrue(startKeys.length > 1);
@SuppressWarnings("deprecation")
MiniMRCluster mrCluster = new MiniMRCluster(2, fs.getUri().toString(), 1);
try {
JobConf jobConf = new JobConf(conf, TestTableMapReduce.class);
jobConf.setJobName("process column contents");
jobConf.setNumMapTasks(2);
jobConf.setNumReduceTasks(1);
TableMap.initJob(MULTI_REGION_TABLE_NAME, INPUT_COLUMN,
ProcessContentsMapper.class, jobConf);
TableReduce.initJob(MULTI_REGION_TABLE_NAME,
IdentityTableReduce.class, jobConf);
JobClient.runJob(jobConf);
} finally {
mrCluster.shutdown();
}
// verify map-reduce results
verify(conf, MULTI_REGION_TABLE_NAME);
}
private void scanTable(Configuration conf, String tableName)
throws IOException {
HTable table = new HTable(conf, new Text(tableName));
Text[] columns = {
TEXT_INPUT_COLUMN,
TEXT_OUTPUT_COLUMN
};
HScannerInterface scanner =
table.obtainScanner(columns, HConstants.EMPTY_START_ROW);
try {
HStoreKey key = new HStoreKey();
TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
while(scanner.next(key, results)) {
LOG.info("row: " + key.getRow());
for(Map.Entry<Text, byte[]> e: results.entrySet()) {
LOG.info(" column: " + e.getKey() + " value: "
+ new String(e.getValue(), HConstants.UTF8_ENCODING));
}
}
} finally {
scanner.close();
}
}
@SuppressWarnings("null")
private void verify(Configuration conf, String tableName) throws IOException {
HTable table = new HTable(conf, new Text(tableName));
Text[] columns = {
TEXT_INPUT_COLUMN,
TEXT_OUTPUT_COLUMN
};
HScannerInterface scanner =
table.obtainScanner(columns, HConstants.EMPTY_START_ROW);
try {
HStoreKey key = new HStoreKey();
TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
while(scanner.next(key, results)) {
byte[] firstValue = null;
byte[] secondValue = null;
int count = 0;
for(Map.Entry<Text, byte[]> e: results.entrySet()) {
if (count == 0)
firstValue = e.getValue();
if (count == 1)
secondValue = e.getValue();
count++;
}
// verify second value is the reverse of the first
assertNotNull(firstValue);
assertNotNull(secondValue);
assertEquals(firstValue.length, secondValue.length);
for (int i=0; i<firstValue.length; i++) {
assertEquals(firstValue[i], secondValue[firstValue.length-i-1]);
}
}
} finally {
scanner.close();
}
}
}