Package org.anarres.lzo.hadoop.codec

Source Code of org.anarres.lzo.hadoop.codec.LzoCodec

/*
* This file is part of lzo-java, an implementation of LZO in Java.
* https://github.com/Karmasphere/lzo-java
*
* The Java portion of this library is:
* Copyright (C) 2011 Shevek <shevek@anarres.org>
* All Rights Reserved.
*
* This file is based on a file from hadoop-gpl-compression.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with the LZO library; see the file COPYING.
* If not, see <http://www.gnu.org/licenses/> or write to the
* Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301, USA.
*/
package org.anarres.lzo.hadoop.codec;

import java.io.IOException;
import java.io.OutputStream;
import java.io.InputStream;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.compress.BlockCompressorStream;
import org.apache.hadoop.io.compress.BlockDecompressorStream;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionInputStream;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.Compressor;
import org.apache.hadoop.io.compress.Decompressor;

/**
* A {@link org.apache.hadoop.io.compress.CompressionCodec} for a streaming
* <b>lzo</b> compression/decompression pair.
* http://www.oberhumer.com/opensource/lzo/
*
*/
public class LzoCodec extends Configured implements CompressionCodec {

    private static final Log LOG = LogFactory.getLog(LzoCodec.class.getName());
    public static final String LZO_COMPRESSOR_KEY = "io.compression.codec.lzo.compressor";
    public static final String LZO_DECOMPRESSOR_KEY = "io.compression.codec.lzo.decompressor";
    public static final String LZO_COMPRESSION_LEVEL_KEY = "io.compression.codec.lzo.compression.level";
    public static final String LZO_BUFFER_SIZE_KEY = "io.compression.codec.lzo.buffersize";
    public static final int DEFAULT_LZO_BUFFER_SIZE = 256 * 1024;
    public static final int MAX_BLOCK_SIZE = 64 * 1024 * 1024;
    public static final int UNDEFINED_COMPRESSION_LEVEL = -999// Constant from LzoCompressor.c

    static LzoCompressor.CompressionStrategy getCompressionStrategy(Configuration conf) {
        assert conf != null : "Configuration cannot be null!";
        return LzoCompressor.CompressionStrategy.valueOf(
                conf.get(LZO_COMPRESSOR_KEY,
                LzoCompressor.CompressionStrategy.LZO1X_1.name()));
    }

    static LzoDecompressor.CompressionStrategy getDecompressionStrategy(Configuration conf) {
        assert conf != null : "Configuration cannot be null!";
        return LzoDecompressor.CompressionStrategy.valueOf(
                conf.get(LZO_DECOMPRESSOR_KEY,
                LzoDecompressor.CompressionStrategy.LZO1X.name()));
    }

    static int getCompressionLevel(Configuration conf) {
        assert conf != null : "Configuration cannot be null!";
        return conf.getInt(LZO_COMPRESSION_LEVEL_KEY, UNDEFINED_COMPRESSION_LEVEL);
    }

    static int getBufferSize(Configuration conf) {
        assert conf != null : "Configuration cannot be null!";
        return conf.getInt(LZO_BUFFER_SIZE_KEY, DEFAULT_LZO_BUFFER_SIZE);
    }

    public static void setCompressionStrategy(Configuration conf,
            LzoCompressor.CompressionStrategy strategy) {
        assert conf != null : "Configuration cannot be null!";
        conf.set(LZO_COMPRESSOR_KEY, strategy.name());
    }

    public static void setDecompressionStrategy(Configuration conf,
            LzoDecompressor.CompressionStrategy strategy) {
        assert conf != null : "Configuration cannot be null!";
        conf.set(LZO_DECOMPRESSOR_KEY, strategy.name());
    }

    public static void setCompressionLevel(Configuration conf, int compressionLevel) {
        assert conf != null : "Configuration cannot be null!";
        conf.setInt(LZO_COMPRESSION_LEVEL_KEY, compressionLevel);
    }

    public static void setBufferSize(Configuration conf, int bufferSize) {
        assert conf != null : "Configuration cannot be null!";
        conf.setInt(LZO_BUFFER_SIZE_KEY, bufferSize);
    }

    @Override
    public CompressionOutputStream createOutputStream(OutputStream out)
            throws IOException {
        return createOutputStream(out, createCompressor());
    }

    @Override
    public CompressionOutputStream createOutputStream(OutputStream out,
            Compressor compressor) throws IOException {
        /**
         * <b>http://www.oberhumer.com/opensource/lzo/lzofaq.php</b>
         *
         * How much can my data expand during compression ?
         * ================================================
         * LZO will expand incompressible data by a little amount.
         * I still haven't computed the exact values, but I suggest using
         * these formulas for a worst-case expansion calculation:
         *
         * Algorithm LZO1, LZO1A, LZO1B, LZO1C, LZO1F, LZO1X, LZO1Y, LZO1Z:
         * ----------------------------------------------------------------
         * output_block_size = input_block_size + (input_block_size / 16) + 64 + 3
         *
         * This is about 106% for a large block size.
         *
         * Algorithm LZO2A:
         * ----------------
         * output_block_size = input_block_size + (input_block_size / 8) + 128 + 3
         */
        // Create the lzo output-stream
        Configuration conf = getConf();
        LzoCompressor.CompressionStrategy strategy = getCompressionStrategy(conf);
        int bufferSize = getBufferSize(conf);
        int compressionOverhead = strategy.name().contains("LZO1")
                ? (bufferSize >> 4) + 64 + 3
                : (bufferSize >> 3) + 128 + 3;

        return new BlockCompressorStream(out, compressor, bufferSize,
                compressionOverhead);
    }

    @Override
    public Class<? extends Compressor> getCompressorType() {
        return LzoCompressor.class;
    }

    @Override
    public Compressor createCompressor() {
        Configuration conf = getConf();
        LzoCompressor.CompressionStrategy strategy = getCompressionStrategy(conf);
        int bufferSize = getBufferSize(conf);
        return new LzoCompressor(strategy, bufferSize);
    }

    @Override
    public CompressionInputStream createInputStream(InputStream in)
            throws IOException {
        return createInputStream(in, createDecompressor());
    }

    @Override
    public CompressionInputStream createInputStream(InputStream in,
            Decompressor decompressor)
            throws IOException {
        Configuration conf = getConf();
        return new BlockDecompressorStream(in, decompressor, getBufferSize(conf));
    }

    @Override
    public Class<? extends Decompressor> getDecompressorType() {
        return LzoDecompressor.class;
    }

    @Override
    public Decompressor createDecompressor() {
        Configuration conf = getConf();
        LzoDecompressor.CompressionStrategy strategy = getDecompressionStrategy(conf);
        int bufferSize = getBufferSize(conf);
        return new LzoDecompressor(strategy, bufferSize);
    }

    /**
     * Get the default filename extension for this kind of compression.
     * @return the extension including the '.'
     */
    @Override
    public String getDefaultExtension() {
        return ".lzo_deflate";
    }
}
TOP

Related Classes of org.anarres.lzo.hadoop.codec.LzoCodec

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.