Package com.btaz.datautil.files

Source Code of com.btaz.datautil.files.FileReducer$QueueReader

package com.btaz.datautil.files;

import com.btaz.datautil.DataUtilDefaults;
import com.btaz.datautil.DataUtilException;
import com.btaz.datautil.files.mapreduce.KeyComparator;
import com.btaz.datautil.files.mapreduce.MapReduceException;
import com.btaz.datautil.files.mapreduce.OutputCollector;
import com.btaz.datautil.files.mapreduce.Reducer;

import java.io.*;
import java.util.ArrayList;
import java.util.LinkedList;

/**
* User: msundell
*/
public class FileReducer {
    /**
     * This method functions provide reduce (map-reduce) like functionality. For every row in the input data there is a
     * call to the reduce callback method. The input data must be pre-sorted matching the ReducableKeyMatcher.
     * @param inputFile input file
     * @param outputFile output file
     * @param reducer callback method, a class that implements the Reducer interface
     * @param comparator a key comparator, this is used to determine if two or more items share the same key.
     * @throws DataUtilException data util exception
     */
    public static void reduce(File inputFile, File outputFile, Reducer reducer, KeyComparator comparator)
            throws DataUtilException {
        // validations
        if(inputFile == null) {
            throw new DataUtilException("The inputFile parameter can not be a null value");
        }
        if(outputFile == null) {
            throw new DataUtilException("The outputFile parameter can not be a null value");
        }
        if(reducer == null) {
            throw new DataUtilException("The reducable parameter can not be a null value");
        }
        if(comparator == null) {
            throw new DataUtilException("The comparator parameter can not be a null value");
        }

        // reduce all data
        FileInputStream inputStream;
        OutputCollector collector = new OutputCollector(outputFile);
        QueueReader queueReader;

        // Aggregate all items matching the comparator and call the Reducable callback
        try {
            inputStream = new FileInputStream(inputFile);
            BufferedReader br = new BufferedReader(new InputStreamReader(inputStream,
                    DataUtilDefaults.charSet));
            queueReader = new QueueReader(br);

            // reduce data
            String prev = null;
            String curr;
            ArrayList<String> items = new ArrayList<String>();
            while(true) {
                curr = queueReader.readLine();
                if(curr == null) {
                    // no more data
                    reduceItems(collector, reducer, items);
                    break;
                } else if(prev == null) {
                    // first row in a new batch
                    items.add(curr);
                    prev = curr;
                } else if(comparator.compare(prev, curr) == 0) {
                    // same keys
                    items.add(curr);
                    prev = curr;
                } else {
                    // different keys
                    queueReader.push(curr);
                    reduceItems(collector, reducer, items);
                    items.clear();
                    prev = null;
                }
            }
            collector.close();
            inputStream.close();
        } catch (IOException e) {
            throw new DataUtilException(e);
        } catch (MapReduceException e) {
            throw new DataUtilException("Irrecoverable reduce operation", e);
        }
    }

    /**
     * Call reducer
     * @param writer writer
     * @param reducable reducable callback
     * @param items items
     * @throws IOException IO exception
     */
    private static void reduceItems(OutputCollector collector, Reducer reducable, ArrayList<String> items)
            throws IOException {
        if(items != null) {
            // call reducer
            reducable.reduce(items, collector);
        }
    }

    /**
     * This helper class applies a push-back queue on top of the BufferedReader
     */
    private static class QueueReader {
        private BufferedReader reader;
        private LinkedList<String> pushBackQueue;

        private QueueReader(BufferedReader reader) {
            this.reader = reader;
            pushBackQueue = new LinkedList<String>();
        }

        /**
         * Reds a line of text from the buffered reader. The method returns null if there's no more data.
         * A line of text is terminated by any of these:
         * - line feed ('\n')
         * - carriage return ('\r')
         * - carriage return + linefeed
         *
         * @return row as string not including any line-termination characters, or null if the end of the stream has
         * been reached. The method first attempts to readLine data from the push-back, and secondary from the reader.
         * @exception  IOException if an I/O error occurs
         */
        public String readLine() throws IOException {
            if(pushBackQueue.size() > 0) {
                return pushBackQueue.pop();
            } else {
                return reader.readLine();
            }
        }

        public void push(String row) throws IOException {
            pushBackQueue.push(row);
        }
    }
}
TOP

Related Classes of com.btaz.datautil.files.FileReducer$QueueReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.