Package com.cloudera.flume.handlers.batch

Source Code of com.cloudera.flume.handlers.batch.BatchingDecorator$TimeoutThread

/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.flume.handlers.batch;

import java.io.ByteArrayOutputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.flume.conf.Context;
import com.cloudera.flume.conf.SinkFactory.SinkDecoBuilder;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventImpl;
import com.cloudera.flume.core.EventSink;
import com.cloudera.flume.core.EventSinkDecorator;
import com.cloudera.flume.handlers.hdfs.WriteableEvent;
import com.cloudera.flume.reporter.ReportEvent;
import com.cloudera.util.Clock;
import com.google.common.base.Preconditions;

/**
* This takes many events and then batches them up into one batched event. If a
* small number of events has been received and a specified latency has passed
* since the last batch, the current batch is sent downstream..
*
* TODO (jon) This should not have its own thread -- close flushes the batch and
* only the roller (which closes) should have the thread.
*/
public class BatchingDecorator<S extends EventSink> extends
    EventSinkDecorator<S> {
  protected static final Logger LOG = LoggerFactory
      .getLogger(BatchingDecorator.class);

  public static final String BATCH_SIZE = "batchSize";
  public static final String BATCH_DATA = "batchData";

  // timeoutBatches + filledBatches + (emptyBatches) = triggeredBatches
  public static final String R_TIMEOUTS = "timeoutBatches";
  public static final String R_FILLED = "filledBatches";
  public static final String R_TRIGGERS = "triggeredBatches";
  public static final String R_EMPTY = "emptyBatches";

  AtomicLong timeoutCount = new AtomicLong(0);
  AtomicLong filledCount = new AtomicLong(0);
  AtomicLong totalCount = new AtomicLong(0);
  AtomicLong emptyCount = new AtomicLong(0);

  final int maxSize;
  final int maxLatency;

  List<Event> events;
  protected long lastBatchTime = 0;

  public BatchingDecorator(S s, int maxSize, int maxLatency) {
    super(s);
    this.maxSize = maxSize;
    this.maxLatency = maxLatency;
    events = new ArrayList<Event>(maxSize);
  }

  Event batchevent(List<Event> evts) throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream(2 >> 15);
    DataOutput out = new DataOutputStream(baos);
    for (Event evt : events) {
      WriteableEvent we = new WriteableEvent(evt);
      we.write(out);
    }

    Event be = new EventImpl(new byte[0]);
    ByteBuffer b = ByteBuffer.allocate(4);
    b.putInt(events.size());
    be.set(BATCH_SIZE, b.array());
    be.set(BATCH_DATA, baos.toByteArray());
    return be;
  }

  public static boolean isBatch(Event e) {
    return e.get(BATCH_SIZE) != null && e.get(BATCH_DATA) != null;
  }

  /**
   * This is a method setup seems trivial but is used in testing.
   */
  protected void endBatchTimeout() throws IOException, InterruptedException {
    endBatch();
    timeoutCount.incrementAndGet();

  }

  protected synchronized void endBatch() throws IOException,
      InterruptedException {
    if (events.size() > 0) {
      Event be = batchevent(events);
      super.append(be);
      events.clear();
    } else {
      emptyCount.incrementAndGet();
    }
    lastBatchTime = Clock.unixTime();
    totalCount.incrementAndGet();
  }

  /**
   * This thread wakes up to check if a batch should be committed, no matter how
   * small it is.
   */
  class TimeoutThread extends Thread {
    final CountDownLatch doneLatch = new CountDownLatch(1);
    final CountDownLatch startedLatch = new CountDownLatch(1);
    volatile boolean timeoutThreadDone = false;

    TimeoutThread() {
      super("BatchingDecorator-TimeoutThread");
    }

    public synchronized void doShutdown() {
      timeoutThreadDone = true;
      interrupt();
      try {
        if (!doneLatch.await(maxLatency * 2L, TimeUnit.MILLISECONDS)) {
          LOG.warn("Batch timeout thread did not exit in a timely fashion");
        }
      } catch (InterruptedException e) {
        LOG.warn("Interrupted while waiting for batch timeout thread to finish");
      }
    }

    public synchronized void doStart() {
      timeoutThreadDone = false;
      this.start();
      try {
        if (!startedLatch.await(maxLatency * 2L, TimeUnit.MILLISECONDS)) {
          LOG.warn("Batch timeout thread did not start in a timely fashion");
        }
      } catch (InterruptedException e) {
        LOG.warn("Interrupted while waiting for batch timeout thread to start");
      }
    }

    public void run() {
      startedLatch.countDown();
      lastBatchTime = Clock.unixTime();
      while (!timeoutThreadDone) {
        long now = Clock.unixTime();
        long msUntilTimeout = 0;
        synchronized (this) {
          msUntilTimeout = lastBatchTime + maxLatency - now;
        }
        if (msUntilTimeout > 0) {
          try {
            LOG.debug("Batching timeout sleeping for " + msUntilTimeout + "ms");
            Clock.sleep(msUntilTimeout);
          } catch (InterruptedException e) {
            LOG.warn("TimeoutThread interrupted", e);
          }
          continue;
        }
        try {
          endBatchTimeout();
        } catch (IOException e) {
          LOG.error("IOException when ending batch!", e);
          timeoutThreadDone = true;
        } catch (InterruptedException e) {
          // TODO verify this is correct
          LOG.error("Interrupted exception when ending batch", e);
          timeoutThreadDone = true;
        }
      }
      doneLatch.countDown();
    }
  }

  protected TimeoutThread timeoutThread = null;

  @Override
  public void open() throws IOException, InterruptedException {
    // TODO handle interruptions
    super.open();
    if (maxLatency > 0) {
      timeoutThread = new TimeoutThread();
      timeoutThread.doStart();
    }
  }

  /**
   * Synchronized so that events doesn't get manipulated by the TimeoutThread
   * calling endBatch.
   */
  @Override
  public synchronized void append(Event e) throws IOException,
      InterruptedException {
    events.add(e);
    if (events.size() >= maxSize) {
      // TODO handle interruptions
      endBatch();
      filledCount.incrementAndGet();
    }
  }

  @Override
  public void close() throws IOException, InterruptedException {
    // TODO handle interruptions

    // flush any left over events in queue.
    endBatch();
    if (timeoutThread != null) {
      timeoutThread.doShutdown();
    }
    super.close();
  }

  @Override
  public ReportEvent getMetrics() {
    ReportEvent rpt = super.getMetrics();
    rpt.setLongMetric(R_TIMEOUTS, timeoutCount.get());
    rpt.setLongMetric(R_FILLED, filledCount.get());
    rpt.setLongMetric(R_TRIGGERS, totalCount.get());
    rpt.setLongMetric(R_EMPTY, emptyCount.get());
    return rpt;
  }

  public static SinkDecoBuilder builder() {
    return new SinkDecoBuilder() {
      @Override
      public EventSinkDecorator<EventSink> build(Context context,
          String... argv) {
        Preconditions.checkArgument(argv.length >= 1 && argv.length < 3,
            "usage: batch(factor [,maxlatencymillis])");
        int factor = Integer.parseInt(argv[0]);
        int latency = 0;
        if (argv.length >= 2)
          latency = Integer.parseInt(argv[1]);
        return new BatchingDecorator<EventSink>(null, factor, latency);
      }
    };
  }

}
TOP

Related Classes of com.cloudera.flume.handlers.batch.BatchingDecorator$TimeoutThread

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.