Package com.mapr

Source Code of com.mapr.DirectoryObserver$Message

/*
* Copyright MapR Technologies, $year
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.mapr;

import backtype.storm.spout.SpoutOutputCollector;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.mapr.storm.DirectoryScanner;
import com.mapr.storm.PendingMessage;
import com.mapr.storm.SpoutState;
import com.mapr.storm.streamparser.StreamParser;
import com.mapr.storm.streamparser.StreamParserFactory;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.regex.Pattern;

/**
* This is a spout that reads records from files. New records can be appended to
* these files while this spout is running. This will cause these new records to
* be emitted the next time around the event loop. It is also possible that we
* will hit the end of a file of records and notice that a new file has been
* started. In that case, we will move to that new file and start emitting the
* records we read from that file.
* <p/>
* There are two modes in which a TailSpout can operate. In unreliable mode,
* transactions are read, but ack's and fail's are ignored so that if
* transactions are not processed, they are lost. The TailSpout will checkpoint
* it's own progress periodically and on deactivation so that when the TailSpout
* is restarted, it will start from close to where it left off. The restart will
* be exact with a clean shutdown and can be made arbitrarily precise with an
* unclean shutdown.
* <p/>
* In the reliable mode, an in-memory table of unacknowledged transactions is
* kept. Any that fail are retransmitted and those that succeed are removed from
* the table. When a checkpoint is done, the file name and offset of the
* earliest unacknowledged transaction in that file will be retained. On
* restart, each of the files with unacknowledged transactions will be replayed
* from that point. Usually only one file will be replaced (the current one) or
* possibly the current and previous file. This will generally replay a few
* extra transactions, but if there is a cool-down period on shutdown, this will
* should very few extra transactions transmitted on startup.
* <p/>
* One possible extension would be to allow the system to run in reliable mode,
* but not save the offsets for unacked transactions. This would be good during
* orderly shutdowns, but very bad in the event of an unorderly shutdown.
*/
public class DirectoryObserver implements Serializable, Closeable {
    private final Logger log = LoggerFactory.getLogger(DirectoryObserver.class);

    private DirectoryScanner scanner;
    private FileInputStream currentInput = null;

    // these are set in the constructors
    private StreamParserFactory factory;
    private File statusFile;

    // all others are created on the fly
    private Map<Long, PendingMessage> ackBuffer = Maps.newTreeMap();
    private long messageId = 0;

    // how often should we save our state?
    private long tuplesPerCheckpoint = 100;

    // time between forced checkpoints in seconds
    private double checkPointIntervalSeconds = 1.0;
    private Queue<PendingMessage> pendingReplays = Lists.newLinkedList();

    private StreamParser parser = null;
    private double nextCheckPointTime = 0;

    public DirectoryObserver(StreamParserFactory factory, File statusFile)
            throws IOException {
        this.factory = factory;
        scanner = SpoutState.restoreState(pendingReplays, statusFile);
    }

    public DirectoryObserver(StreamParserFactory factory, File statusFile,
                             File inputDirectory, final Pattern inputFileNamePattern) {
        this.factory = factory;
        this.statusFile = statusFile;
        scanner = new DirectoryScanner(inputDirectory, inputFileNamePattern);
    }

    public void setTuplesPerCheckpoint(long tuplesPerCheckpoint) {
        this.tuplesPerCheckpoint = tuplesPerCheckpoint;
    }

    public void setCheckPointIntervalSeconds(double checkPointIntervalSeconds) {
        this.checkPointIntervalSeconds = checkPointIntervalSeconds;
    }

    public void close() {
        SpoutState.recordCurrentState(ackBuffer, scanner, parser, statusFile);
    }

    public Message nextMessage() {
        if (currentInput == null) {
            currentInput = openNextInput();
        }

        try {
            // TODO need to persist current reading state somewhere
            while (currentInput != null) {
                // read a record
                long position = parser.currentOffset();
                List<Object> r = parser.nextRecord();

                // assert currentInput != null
                if (r == null) {
                    // reached end of current file
                    // (currentInput != null && r == null) so we enter loop at least
                    // once
                    while (currentInput != null && r == null) {
                        currentInput = openNextInput();

                        // assert r == null
                        if (currentInput != null) {
                            position = parser.currentOffset();
                            r = parser.nextRecord();
                        }
                        // r != null => currentInput != null
                    }
                    // post: r != null iff currentInput != null
                }
                // post: (r != null iff currentInput != null) || (r != null)
                // post: (r == null => currentInput == null)

                if (r != null) {
                    messageId++;
                    if (messageId % tuplesPerCheckpoint == 0
                            || System.nanoTime() / 1e9 > nextCheckPointTime) {
                        SpoutState.recordCurrentState(ackBuffer, scanner, parser,
                                statusFile);
                        nextCheckPointTime = System.nanoTime() / 1e9
                                + checkPointIntervalSeconds;
                    }
                    return new Message(messageId, r);
                }
            }
        } catch (IOException e) {
            log.error("DirectoryObserver threw I/O exception", e);
            throw new RuntimeException(e);
        }
        // exit only when all files have been processed completely
        return null;
    }

    public static class Message {
        private final long messageId;
        private final List<Object> tuple;

        public Message(long messageId, List<Object> tuple) {
            this.messageId = messageId;
            this.tuple = tuple;
        }

        public long getMessageId() {
            return messageId;
        }

        public List<Object> getTuple() {
            return tuple;
        }
    }

    private FileInputStream openNextInput() {
        PendingMessage next = pendingReplays.poll();
        while (next != null) {
            if (next.getFile().exists()) {
                return scanner.forceInput(next.getFile(), next.getOffset());
            } else {
                log.error("Replay file {} has disappeared", next.getFile());
            }
            next = pendingReplays.poll();
        }

        // look for a new file
        FileInputStream r = scanner.nextInput();
        parser = factory.createParser(r);
        return r;
    }
}
TOP

Related Classes of com.mapr.DirectoryObserver$Message

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.