Examples of io.druid.data.input.Firehose

io.druid.data.input.Firehose

  {
    final LinkedList<String> dimensions = new LinkedList<String>();
    dimensions.add("inColumn");
    dimensions.add("target");


    return new Firehose()
    {
      private final java.util.Random rand = (seed == 0L) ? new Random() : new Random(seed);


      private long rowCount = 0L;
      private boolean waitIfmaxGeneratedRows = true;

View Full Code Here

      }
    };


    // Create firehose + plumber
    final FireDepartmentMetrics metrics = new FireDepartmentMetrics();
    final Firehose firehose = firehoseFactory.connect(ingestionSchema.getDataSchema().getParser());
    final Plumber plumber = new YeOldePlumberSchool(
        interval,
        version,
        wrappedDataSegmentPusher,
        tmpDir
    ).findPlumber(
        schema,
        new RealtimeTuningConfig(null, null, null, null, null, null, null, shardSpec, null, null),
        metrics
    );


    // rowFlushBoundary for this job
    final int myRowFlushBoundary = rowFlushBoundary > 0
                                   ? rowFlushBoundary
                                   : toolbox.getConfig().getDefaultRowFlushBoundary();
    final QueryGranularity rollupGran = ingestionSchema.getDataSchema().getGranularitySpec().getQueryGranularity();
    try {
      plumber.startJob();


      while (firehose.hasMore()) {
        final InputRow inputRow = firehose.nextRow();


        if (shouldIndex(shardSpec, interval, inputRow, rollupGran)) {
          int numRows = plumber.add(inputRow);
          if (numRows == -1) {
            throw new ISE(
                String.format(
                    "Was expecting non-null sink for timestamp[%s]",
                    new DateTime(inputRow.getTimestampFromEpoch())
                )
            );
          }
          metrics.incrementProcessed();


          if (numRows >= myRowFlushBoundary) {
            plumber.persist(firehose.commit());
          }
        } else {
          metrics.incrementThrownAway();
        }
      }
    }
    finally {
      firehose.close();
    }


    plumber.persist(firehose.commit());


    try {
      plumber.finishJob();
    }
    finally {

View Full Code Here

  {


    final UpdateStream updateStream = factory.build();
    updateStream.start();


    return new Firehose()
    {
      Map<String, Object> map;
      private final Runnable doNothingRunnable = Runnables.getNoopRunnable();


      @Override

View Full Code Here


    twitterStream.addListener(statusListener);
    twitterStream.sample(); // creates a generic StatusStream
    log.info("returned from sample()");


    return new Firehose() {


      private final Runnable doNothingRunnable = new Runnable() {
        public void run()
        {
        }

View Full Code Here

    }


    final KafkaStream<byte[], byte[]> stream = streamList.get(0);
    final ConsumerIterator<byte[], byte[]> iter = stream.iterator();


    return new Firehose()
    {
      @Override
      public boolean hasMore()
      {
        return iter.hasNext();

View Full Code Here


    boolean normalExit = true;


    // Set up firehose
    final Period intermediatePersistPeriod = spec.getTuningConfig().getIntermediatePersistPeriod();
    final Firehose firehose = spec.getIOConfig().getFirehoseFactory().connect(spec.getDataSchema().getParser());


    // It would be nice to get the PlumberSchool in the constructor.  Although that will need jackson injectables for
    // stuff like the ServerView, which seems kind of odd?  Perhaps revisit this when Guice has been introduced.


    final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(this, toolbox);


    // NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
    // with the coordinator.  Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
    // which will typically be either the main data processing loop or the persist thread.


    // Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
    final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer()
    {
      @Override
      public void announceSegment(final DataSegment segment) throws IOException
      {
        // Side effect: Calling announceSegment causes a lock to be acquired
        toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
        toolbox.getSegmentAnnouncer().announceSegment(segment);
      }


      @Override
      public void unannounceSegment(final DataSegment segment) throws IOException
      {
        try {
          toolbox.getSegmentAnnouncer().unannounceSegment(segment);
        }
        finally {
          toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
        }
      }


      @Override
      public void announceSegments(Iterable<DataSegment> segments) throws IOException
      {
        // Side effect: Calling announceSegments causes locks to be acquired
        for (DataSegment segment : segments) {
          toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
        }
        toolbox.getSegmentAnnouncer().announceSegments(segments);
      }


      @Override
      public void unannounceSegments(Iterable<DataSegment> segments) throws IOException
      {
        try {
          toolbox.getSegmentAnnouncer().unannounceSegments(segments);
        }
        finally {
          for (DataSegment segment : segments) {
            toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
          }
        }
      }
    };


    // NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
    // NOTE: (and thus the firehose)


    // Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
    // realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
    // the plumber such that waiting for the coordinator doesn't block data processing.
    final VersioningPolicy versioningPolicy = new VersioningPolicy()
    {
      @Override
      public String getVersion(final Interval interval)
      {
        try {
          // Side effect: Calling getVersion causes a lock to be acquired
          final TaskLock myLock = toolbox.getTaskActionClient()
                                         .submit(new LockAcquireAction(interval));


          return myLock.getVersion();
        }
        catch (IOException e) {
          throw Throwables.propagate(e);
        }
      }
    };


    DataSchema dataSchema = spec.getDataSchema();
    RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
    RealtimeTuningConfig tuningConfig = spec.getTuningConfig()
                                              .withBasePersistDirectory(new File(toolbox.getTaskWorkDir(), "persist"))
                                              .withVersioningPolicy(versioningPolicy);


    final FireDepartment fireDepartment = new FireDepartment(
        dataSchema,
        realtimeIOConfig,
        tuningConfig,
        null,
        null,
        null,
        null
    );
    final RealtimeMetricsMonitor metricsMonitor = new RealtimeMetricsMonitor(ImmutableList.of(fireDepartment));
    this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();


    // NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
    // NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip and
    // NOTE: descriptor.json to mismatch, or it can cause historical nodes to load different instances of the
    // NOTE: "same" segment.
    final RealtimePlumberSchool plumberSchool = new RealtimePlumberSchool(
        toolbox.getEmitter(),
        toolbox.getQueryRunnerFactoryConglomerate(),
        toolbox.getSegmentPusher(),
        lockingSegmentAnnouncer,
        segmentPublisher,
        toolbox.getNewSegmentServerView(),
        toolbox.getQueryExecutorService(),
        null,
        null,
        null,
        null,
        null,
        null,
        0
    );


    this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, fireDepartment.getMetrics());


    try {
      plumber.startJob();


      // Set up metrics emission
      toolbox.getMonitorScheduler().addMonitor(metricsMonitor);


      // Time to read data!
      long nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
      while (firehose.hasMore()) {
        final InputRow inputRow;
        try {
          inputRow = firehose.nextRow();
          if (inputRow == null) {
            continue;
          }


          int currCount = plumber.add(inputRow);
          if (currCount == -1) {
            fireDepartment.getMetrics().incrementThrownAway();
            log.debug("Throwing away event[%s]", inputRow);


            if (System.currentTimeMillis() > nextFlush) {
              plumber.persist(firehose.commit());
              nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
            }


            continue;
          }


          fireDepartment.getMetrics().incrementProcessed();
          if (currCount >= tuningConfig.getMaxRowsInMemory() || System.currentTimeMillis() > nextFlush) {
            plumber.persist(firehose.commit());
            nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
          }
        }
        catch (ParseException e) {
          log.warn(e, "unparseable line");
          fireDepartment.getMetrics().incrementUnparseable();
        }
      }
    }
    catch (Throwable e) {
      normalExit = false;
      log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource())
         .emit();
      throw e;
    }
    finally {
      if (normalExit) {
        try {
          plumber.persist(firehose.commit());
          plumber.finishJob();
        }
        catch (Exception e) {
          log.makeAlert(e, "Failed to finish realtime task").emit();
        }

View Full Code Here

    // We create a QueueingConsumer that will not auto-acknowledge messages since that
    // happens on commit().
    final QueueingConsumer consumer = new QueueingConsumer(channel);
    channel.basicConsume(queue, false, consumer);


    return new Firehose()
    {
      /**
       * Storing the latest delivery as a member variable should be safe since this will only be run
       * by a single thread.
       */

View Full Code Here


  @Test
  public void testDimensions() throws Exception
  {
    InputRow inputRow;
    Firehose firehose = webbie.connect(null);
    if (firehose.hasMore()) {
      inputRow = firehose.nextRow();
    } else {
      throw new RuntimeException("queue is empty");
    }
    List<String> actualAnswer = inputRow.getDimensions();
    Collections.sort(actualAnswer);

View Full Code Here


  @Test
  public void testPosixTimeStamp() throws Exception
  {
    InputRow inputRow;
    Firehose firehose = webbie.connect(null);
    if (firehose.hasMore()) {
      inputRow = firehose.nextRow();
    } else {
      throw new RuntimeException("queue is empty");
    }
    long expectedTime = 1372121562L * 1000L;
    Assert.assertEquals(expectedTime, inputRow.getTimestampFromEpoch());

View Full Code Here

            return new MyUpdateStream(ImmutableMap.<String,Object>of("item1", "value1", "item2", 2, "time", "2013-07-08"));
          }
        },
        "auto"
    );
    Firehose firehose1 = webbie3.connect(null);
    if (firehose1.hasMore()) {
      long milliSeconds = firehose1.nextRow().getTimestampFromEpoch();
      DateTime date = new DateTime("2013-07-08");
      Assert.assertEquals(date.getMillis(), milliSeconds);
    } else {
      Assert.assertFalse("hasMore returned false", true);
    }

View Full Code Here

0 1

TOP

Related Classes of io.druid.data.input.Firehose

io.druid.examples.rand.RandomFirehoseFactory

io.druid.examples.twitter.TwitterSpritzerFirehoseFactory

io.druid.examples.web.WebFirehoseFactory

io.druid.examples.web.WebFirehoseFactoryTest

io.druid.firehose.kafka.KafkaEightFirehoseFactory

io.druid.firehose.kafka.KafkaSevenFirehoseFactory

io.druid.firehose.rabbitmq.RabbitMQFirehoseFactory

io.druid.indexing.common.task.IndexTask

io.druid.indexing.common.task.RealtimeIndexTask

io.druid.indexing.overlord.TaskLifecycleTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.