/*
* Copyright 2013 Cloudera.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.cdk.data.hcatalog.impl;
import com.cloudera.cdk.data.DatasetRepositories;
import com.cloudera.cdk.data.DatasetRepository;
import com.cloudera.cdk.data.DatasetRepositoryException;
import com.cloudera.cdk.data.hcatalog.HCatalogDatasetRepository;
import com.cloudera.cdk.data.impl.Accessor;
import com.cloudera.cdk.data.spi.Loadable;
import com.cloudera.cdk.data.spi.OptionBuilder;
import com.cloudera.cdk.data.spi.URIPattern;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A Loader implementation to register URIs for FileSystemDatasetRepositories.
*/
public class Loader implements Loadable {
private static final Logger logger = LoggerFactory.getLogger(Loader.class);
public static final String HIVE_METASTORE_URI_PROP = "hive.metastore.uris";
private static final int UNSPECIFIED_PORT = -1;
/**
* This class builds configured instances of
* {@code FileSystemDatasetRepository} from a Map of options. This is for the
* URI system.
*/
private static class ExternalBuilder implements OptionBuilder<DatasetRepository> {
private final Configuration envConf;
public ExternalBuilder(Configuration envConf) {
this.envConf = envConf;
}
@Override
public DatasetRepository getFromOptions(Map<String, String> match) {
logger.debug("External URI options: {}", match);
final Path root;
String path = match.get("path");
if (path == null || path.isEmpty()) {
root = new Path(".");
} else if (match.containsKey("absolute")
&& Boolean.valueOf(match.get("absolute"))) {
root = new Path("/", path);
} else {
root = new Path(path);
}
final FileSystem fs;
try {
fs = FileSystem.get(fileSystemURI("hdfs", match), envConf);
} catch (IOException ex) {
throw new DatasetRepositoryException(
"Could not get a FileSystem", ex);
}
// make a modifiable copy and setup the MetaStore URI
Configuration conf = new Configuration(envConf);
setMetaStoreURI(conf, match);
return new HCatalogDatasetRepository.Builder()
.configuration(conf)
.rootDirectory(fs.makeQualified(root))
.build();
}
}
private static class ManagedBuilder implements OptionBuilder<DatasetRepository> {
private final Configuration envConf;
public ManagedBuilder(Configuration envConf) {
this.envConf = envConf;
}
@Override
public DatasetRepository getFromOptions(Map<String, String> match) {
logger.debug("Managed URI options: {}", match);
// make a modifiable copy and setup the MetaStore URI
Configuration conf = new Configuration(envConf);
setMetaStoreURI(conf, match);
return new HCatalogDatasetRepository.Builder()
.configuration(conf)
.build();
}
}
@Override
public void load() {
// get a default Configuration to configure defaults (so it's okay!)
final Configuration conf = new Configuration();
String hiveAuthority;
if (conf.get(HIVE_METASTORE_URI_PROP) != null) {
try {
hiveAuthority = new URI(conf.get(HIVE_METASTORE_URI_PROP))
.getAuthority();
} catch (URISyntaxException ex) {
hiveAuthority = "";
}
} else {
hiveAuthority = "";
}
// Hive-managed data sets
final OptionBuilder<DatasetRepository> managedBuilder =
new ManagedBuilder(conf);
Accessor.getDefault().registerDatasetRepository(
new URIPattern(URI.create("hive")), managedBuilder);
Accessor.getDefault().registerDatasetRepository(
new URIPattern(URI.create("hive://" + hiveAuthority + "/")),
managedBuilder);
// external data sets
final OptionBuilder<DatasetRepository> externalBuilder =
new ExternalBuilder(conf);
String hdfsAuthority;
try {
// Use a HDFS URI with no authority and the environment's configuration
// to find the default HDFS information
final URI hdfs = FileSystem.get(URI.create("hdfs:/"), conf).getUri();
hdfsAuthority = "&hdfs-host=" + hdfs.getHost() +
"&hdfs-port=" + hdfs.getPort();
} catch (IOException ex) {
logger.warn(
"Could not locate HDFS, hdfs-host and hdfs-port " +
"will not be set by default for Hive repositories.");
hdfsAuthority = "";
}
Accessor.getDefault().registerDatasetRepository(
new URIPattern(URI.create("hive://" + hiveAuthority +
"/*path?absolute=true" + hdfsAuthority)),
externalBuilder);
Accessor.getDefault().registerDatasetRepository(
new URIPattern(URI.create("hive:*path")), externalBuilder);
}
private static URI fileSystemURI(String scheme, Map<String, String> match) {
final String userInfo;
if (match.containsKey("username")) {
if (match.containsKey("password")) {
userInfo = match.get("username") + ":" +
match.get("password");
} else {
userInfo = match.get("username");
}
} else {
userInfo = null;
}
try {
int port = UNSPECIFIED_PORT;
if (match.containsKey("hdfs-port")) {
try {
port = Integer.parseInt(match.get("hdfs-port"));
} catch (NumberFormatException e) {
port = UNSPECIFIED_PORT;
}
}
return new URI(scheme, userInfo, match.get("hdfs-host"),
port, "/", null, null);
} catch (URISyntaxException ex) {
throw new DatasetRepositoryException("Could not build FS URI", ex);
}
}
/**
* Sets the MetaStore URI in the given Configuration, if there is a host in
* the match arguments. If there is no host, then the conf is not changed.
*
* @param conf a Configuration that will be used to connect to the MetaStore
* @param match URIPattern match results
*/
private static void setMetaStoreURI(
Configuration conf, Map<String, String> match) {
try {
int port = UNSPECIFIED_PORT;
if (match.containsKey("port")) {
try {
port = Integer.parseInt(match.get("port"));
} catch (NumberFormatException e) {
port = UNSPECIFIED_PORT;
}
}
// if either the host or the port is set, construct a new MetaStore URI
// and set the property in the Configuration. otherwise, this will not
// change the connection URI.
if (match.containsKey("host")) {
conf.set(HIVE_METASTORE_URI_PROP,
new URI("thrift", null, match.get("host"), port, "/", null, null)
.toString());
}
} catch (URISyntaxException ex) {
throw new DatasetRepositoryException(
"Could not build metastore URI", ex);
}
}
}