/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.flume.handlers.hdfs;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.Compressor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudera.flume.conf.Context;
import com.cloudera.flume.conf.FlumeBuilder;
import com.cloudera.flume.conf.FlumeConfiguration;
import com.cloudera.flume.conf.FlumeSpecException;
import com.cloudera.flume.conf.SinkFactory.SinkBuilder;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventSink;
import com.cloudera.flume.handlers.text.FormatFactory;
import com.cloudera.flume.handlers.text.output.OutputFormat;
import com.cloudera.flume.reporter.ReportEvent;
import com.cloudera.util.PathManager;
import com.google.common.base.Preconditions;
/**
* This creates a raw hadoop dfs file that outputs data formatted by the
* provided OutputFormat. It is assumed that the output is a file of some sort.
*/
public class CustomDfsSink extends EventSink.Base {
static final Logger LOG = LoggerFactory.getLogger(CustomDfsSink.class);
private static final String A_OUTPUTFORMAT = "recordformat";
boolean compressOutput;
OutputFormat format;
OutputStream writer;
AtomicLong count = new AtomicLong();
String path;
Path dstPath;
PathManager pathManager;
public CustomDfsSink(String path, OutputFormat format) {
Preconditions.checkArgument(path != null);
Preconditions.checkArgument(format != null);
this.path = path;
this.format = format;
this.writer = null;
}
@Override
public void append(Event e) throws IOException, InterruptedException {
if (writer == null) {
throw new IOException("Append failed, did you open the writer?");
}
format.format(writer, e);
count.getAndIncrement();
super.append(e);
}
@Override
public void close() throws IOException {
LOG.info("Closing HDFS file: " + pathManager.getOpenPath());
writer.flush();
LOG.info("done writing raw file to hdfs");
writer.close();
pathManager.close();
writer = null;
}
/**
* Hadoop Compression Codecs that use Native libs require an instance of a
* Configuration Object. They require this due to some check against knowing
* weather or not the native libs have been loaded. GzipCodec, LzoCodec,
* LzopCodec are all codecs that require Native libs. GZipCodec has a slight
* exception that if native libs are not accessible it will use Pure Java.
* This results in no errors just notices. BZip2Codec is an example codec that
* doesn't use native libs.
*/
@Override
public void open() throws IOException {
FlumeConfiguration conf = FlumeConfiguration.get();
FileSystem hdfs;
String codecName = conf.getCollectorDfsCompressCodec();
CompressionCodec codec = getCodec(conf, codecName);
if (codec == null) {
dstPath = new Path(path);
hdfs = dstPath.getFileSystem(conf);
pathManager = new PathManager(hdfs, dstPath.getParent(), dstPath.getName());
writer = pathManager.open();
LOG.info("Creating HDFS file: " + pathManager.getOpenPath());
return;
}
Compressor cmp = codec.createCompressor();
dstPath = new Path(path + codec.getDefaultExtension());
hdfs = dstPath.getFileSystem(conf);
pathManager = new PathManager(hdfs, dstPath.getParent(), dstPath.getName());
writer = pathManager.open();
try {
writer = codec.createOutputStream(writer, cmp);
} catch (NullPointerException npe) {
// tries to find "native" version of codec, if that fails, then tries to
// find java version. If there is no java version, the createOutputStream
// exits via NPE. We capture this and convert it into a IOE with a more
// useful error message.
LOG.error("Unable to load compression codec " + codec);
throw new IOException("Unable to load compression codec " + codec);
}
LOG.info("Creating " + codec + " compressed HDFS file: "
+ pathManager.getOpenPath());
}
private static boolean codecMatches(Class<? extends CompressionCodec> cls,
String codecName) {
String simpleName = cls.getSimpleName();
if (cls.getName().equals(codecName)
|| simpleName.equalsIgnoreCase(codecName)) {
return true;
}
if (simpleName.endsWith("Codec")) {
String prefix = simpleName.substring(0,
simpleName.length() - "Codec".length());
if (prefix.equalsIgnoreCase(codecName)) {
return true;
}
}
return false;
}
public static CompressionCodec getCodec(Configuration conf, String codecName) {
List<Class<? extends CompressionCodec>> codecs = CompressionCodecFactory
.getCodecClasses(FlumeConfiguration.get());
// Wish we could base this on DefaultCodec but appears not all codec's
// extend DefaultCodec(Lzo)
CompressionCodec codec = null;
ArrayList<String> codecStrs = new ArrayList<String>();
codecStrs.add("None");
for (Class<? extends CompressionCodec> cls : codecs) {
codecStrs.add(cls.getSimpleName());
if (codecMatches(cls, codecName)) {
try {
codec = cls.newInstance();
} catch (InstantiationException e) {
LOG.error("Unable to instantiate " + cls + " class");
} catch (IllegalAccessException e) {
LOG.error("Unable to access " + cls + " class");
}
}
}
if (codec == null) {
if (!codecName.equalsIgnoreCase("None")) {
throw new IllegalArgumentException("Unsupported compression codec "
+ codecName + ". Please choose from: " + codecStrs);
}
} else if (codec instanceof Configurable) {
// Must check instanceof codec as BZip2Codec doesn't inherit Configurable
// Must set the configuration for Configurable objects that may or do use
// native libs
((Configurable) codec).setConf(conf);
}
return codec;
}
public static SinkBuilder builder() {
return new SinkBuilder() {
@Override
public EventSink create(Context context, Object... args) {
if (args.length != 2 && args.length != 1) {
// TODO (jon) make this message easier.
throw new IllegalArgumentException(
"usage: customdfs(\"[(hdfs|file|s3n|...)://namenode[:port]]/path\", format)");
}
Object format = (args.length == 1) ? null : args[1];
OutputFormat fmt;
try {
fmt = FlumeBuilder.createFormat(FormatFactory.get(), format);
} catch (FlumeSpecException e) {
LOG.error("failed to load format " + format, e);
throw new IllegalArgumentException("failed to load format " + format);
}
return new CustomDfsSink(args[0].toString(), fmt);
}
@Deprecated
@Override
public EventSink build(Context context, String... args) {
// updated interface calls build(Context,Object...) instead
throw new RuntimeException(
"Old sink builder for CustomDfsSink should not be exercised");
}
};
}
@Override
public String getName() {
return "CustomDfs";
}
@Override
public ReportEvent getMetrics() {
ReportEvent rpt = super.getMetrics();
rpt.setStringMetric(A_OUTPUTFORMAT, format.getBuilder().getName());
rpt.setLongMetric(ReportEvent.A_COUNT, count.get());
return rpt;
}
@Deprecated
@Override
public ReportEvent getReport() {
ReportEvent rpt = super.getReport();
rpt.setStringMetric(A_OUTPUTFORMAT, format.getBuilder().getName());
rpt.setLongMetric(ReportEvent.A_COUNT, count.get());
return rpt;
}
}