/*
* This file is provided to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.basho.riak.hadoop.config;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputSplit;
import com.basho.riak.hadoop.keylisters.BucketKeyLister;
import com.basho.riak.hadoop.keylisters.KeyLister;
/**
* Helper class to make dealing with the hadoop {@link Configuration} object
* easier when setting up a Riak Map/Reduce job on Hadoop
*
* @author russell
*
*/
public final class RiakConfig {
public static final String LOCATIONS_PROPERTY = "com.basho.riak.hadoop.mr.riak.locations";
private static final String COMMA = ",";
public static final String CLUSTER_SIZE_PROPERTY = "com.basho.riak.hadoop.mr.cluster.size";
private static final String KEY_LISTER_CLASS_PROPERTY = "com.basho.riak.hadoop.mr.keylister.class";
private static final String KEY_LISTER_INIT_STRING_PROPERTY = "com.basho.riak.hadoop.mr.keylister.init_string";
private static final String OUTPUT_BUCKET_PROPERTY = "com.basho.riak.hadoop.mr.output.bucket";
private RiakConfig() {}
/**
* Add a riak location to the {@link Configuration} passed.
*
* @param conf
* the {@link Configuration} to add a location too
* @param location
* the {@link RiakLocation} to add
* @return the {@link Configuration} with <code>location</code> added to the
* location property
*/
public static Configuration addLocation(Configuration conf, RiakLocation location) {
StringBuilder sb = new StringBuilder();
String currentLocations = conf.get(LOCATIONS_PROPERTY);
if (currentLocations != null) {
sb.append(currentLocations);
}
if (sb.length() > 0) {
sb.append(COMMA);
}
sb.append(location.asString());
conf.set(LOCATIONS_PROPERTY, sb.toString());
return conf;
}
/**
* Get all the riak locations from the passed {@link Configuration}
*
* @param conf
* the {@link Configuration}
* @return an array of {@link RiakLocation} (may be empty, never null)
*/
public static RiakLocation[] getRiakLocatons(Configuration conf) {
String locations = conf.get(LOCATIONS_PROPERTY, "");
StringTokenizer st = new StringTokenizer(locations, COMMA);
List<RiakLocation> result = new ArrayList<RiakLocation>();
while (st.hasMoreTokens()) {
result.add(RiakLocation.fromString(st.nextToken()));
}
return result.toArray(new RiakLocation[result.size()]);
}
/**
* Set the size of the hadoop cluster, this is used by the
* {@link RiakInputFormat} to try and optimize the number of
* {@link InputSplit}s to create
*
* @param conf
* the {@link Configuration} to store the hadoop cluster size in
* @param hadoopClusterSize
* the size of the hadoop cluster
* @return the {@link Configuration} updated with the passed
* <code>hadoopClusterSize</code>
*/
public static Configuration setHadoopClusterSize(Configuration conf, int hadoopClusterSize) {
conf.setInt(CLUSTER_SIZE_PROPERTY, hadoopClusterSize);
return conf;
}
/**
* Get the hadoop cluster size property, provide a default in case it hasn't
* been set
*
* @param conf
* the {@link Configuration} to get the property value from
* @param defaultValue
* the default size to use if it hasn't been set
* @return the hadoop cluster size or <code>defaultValue</code>
*/
public static int getHadoopClusterSize(Configuration conf, int defaultValue) {
return conf.getInt(CLUSTER_SIZE_PROPERTY, defaultValue);
}
/**
* @param conf
* the {@link Configuration} to query
* @return the {@link KeyLister} the job was configured with
* @throws RuntimeException
* if a {@link IllegalAccessException} or
* {@link InstantiationException} is thrown creating a
* {@link KeyLister}
*/
public static KeyLister getKeyLister(Configuration conf) throws IOException {
Class<? extends KeyLister> clazz = conf.getClass(KEY_LISTER_CLASS_PROPERTY, BucketKeyLister.class,
KeyLister.class);
try {
KeyLister lister = clazz.newInstance();
lister.init(conf.get(KEY_LISTER_INIT_STRING_PROPERTY));
return lister;
} catch (IllegalAccessException e) {
throw new RuntimeException(e);
} catch (InstantiationException e) {
throw new RuntimeException(e);
}
}
/**
* Set the {@link KeyLister} implementation to use.
*
* @param conf
* the {@link Configuration} to update
* @param lister
* the {@link KeyLister} to use
* @return the configuration updated with a serialized version of the lister
* provided
*/
public static <T extends KeyLister> Configuration setKeyLister(Configuration conf, T lister) throws IOException {
conf.setClass(KEY_LISTER_CLASS_PROPERTY, lister.getClass(), KeyLister.class);
conf.setStrings(KEY_LISTER_INIT_STRING_PROPERTY, lister.getInitString());
return conf;
}
/**
* Get the configured output bucket for the job's results
*
* @param conf
* the {@link Configuration} to query
* @return the bucket name
*/
public static String getOutputBucket(Configuration conf) {
return conf.get(OUTPUT_BUCKET_PROPERTY);
}
/**
* Add the output bucket for the results to the config.
*
* @param conf
* the {@link Configuration} to update
* @param bucket
* the bucket to add
* @return the updated {@link Configuration}
*/
public static Configuration setOutputBucket(Configuration conf, String bucket) {
conf.set(OUTPUT_BUCKET_PROPERTY, bucket);
return conf;
}
}