Package storm.trident

Examples of storm.trident.TridentTopology


    LocalCluster cluster = new LocalCluster();
    LocalDRPC localDRPC = new LocalDRPC();

    try {
      // Build topology
      TridentTopology toppology = new TridentTopology();

      // Training stream
      TridentState perceptronModel = toppology.newStream("nandsamples", new NANDSpout())
      // update classifier
          .partitionPersist(new MemoryMapState.Factory(), new Fields("instance"), new ClassifierUpdater<Boolean>("perceptron", new PerceptronClassifier()));

      // Classification stream
      toppology.newDRPCStream("predict", localDRPC)
      // convert DRPC args to instance
          .each(new Fields("args"), new DRPCArgsToInstance(), new Fields("instance"))

          // Query classifier to classify instance
          .stateQuery(perceptronModel, new Fields("instance"), new ClassifyQuery<Boolean>("perceptron"), new Fields("prediction")).project(new Fields("prediction"));
      cluster.submitTopology(this.getClass().getSimpleName(), new Config(), toppology.build());

      Thread.sleep(4000);

      assertEquals(Boolean.TRUE, extractPrediction(localDRPC.execute("predict", "1.0 0.0 0.0")));
      assertEquals(Boolean.TRUE, extractPrediction(localDRPC.execute("predict", "1.0 0.0 1.0")));
View Full Code Here


                new Values("four score and seven years ago"),
                new Values("how many apples can you eat"),
                new Values("to be or not to be the person"));
        spout.setCycle(true);
       
        TridentTopology topology = new TridentTopology();       
        TridentState wordCounts =
              topology.newStream("spout1", spout)
                .parallelismHint(16)
                .each(new Fields("sentence"), new Split(), new Fields("word"))
                .groupBy(new Fields("word"))
                .persistentAggregate(new MemoryMapState.Factory(),
                                     new Count(), new Fields("count"))        
                .parallelismHint(16);
               
        topology.newDRPCStream("words", drpc)
                .each(new Fields("args"), new Split(), new Fields("word"))
                .groupBy(new Fields("word"))
                .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))
                .each(new Fields("count"), new FilterNull())
                .aggregate(new Fields("count"), new Sum(), new Fields("sum"))
                ;
        return topology.build();
    }
View Full Code Here

        }

    }

    public static StormTopology buildTopology(LocalDRPC drpc) {
        TridentTopology topology = new TridentTopology();
        TridentState urlToTweeters =
                topology.newStaticState(
                    new StaticSingleKeyMapState.Factory(TWEETERS_DB));
        TridentState tweetersToFollowers =
                topology.newStaticState(
                    new StaticSingleKeyMapState.Factory(FOLLOWERS_DB));


        topology.newDRPCStream("reach", drpc)
                .stateQuery(urlToTweeters, new Fields("args"), new MapGet(), new Fields("tweeters"))
                .each(new Fields("tweeters"), new ExpandList(), new Fields("tweeter"))
                .shuffle()
                .stateQuery(tweetersToFollowers, new Fields("tweeter"), new MapGet(), new Fields("followers"))
                .each(new Fields("followers"), new ExpandList(), new Fields("follower"))
                .groupBy(new Fields("follower"))
                .aggregate(new One(), new Fields("one"))
                .aggregate(new Fields("one"), new Sum(), new Fields("reach"));
        return topology.build();
    }
View Full Code Here

    // Spouts create Tuples and Bolts manipulate then and possibly emit new ones.)

    // But in Trident we operate at a higher level.
    // Bolts are created and connected automatically out of higher-level constructs.
    // Also, Spouts are "batched".
    TridentTopology topology = new TridentTopology();

    // Each primitive allows us to apply either filters or functions to the stream
    // We always have to select the input fields.
    topology.newStream("filter", spout).each(new Fields("text", "actor"), new PereTweetsFilter())
        .each(new Fields("text", "actor"), new Utils.PrintFilter());

    // Functions describe their output fields, which are always appended to the input fields.
    topology.newStream("function", spout)
        .each(new Fields("text", "actor"), new UppercaseFunction(), new Fields("uppercased_text"))
        .each(new Fields("text", "uppercased_text"), new Utils.PrintFilter());

    // As you see, Each operations can be chained.

    // Stream can be parallelized with "parallelismHint"
    // Parallelism hint is applied downwards until a partitioning operation (we will see this later).
    // This topology creates 5 spouts and 5 bolts:
    // Let's debug that with TridentOperationContext . partitionIndex !
    topology.newStream("parallel", spout).each(new Fields("text", "actor"), new PereTweetsFilter())
        .parallelismHint(5).each(new Fields("text", "actor"), new Utils.PrintFilter());

    // A stream can be partitioned in various ways.
    // Let's partition it by "actor". What happens with previous example?
    topology.newStream("parallel_and_partitioned", spout).partitionBy(new Fields("actor"))
        .each(new Fields("text", "actor"), new PereTweetsFilter()).parallelismHint(5)
        .each(new Fields("text", "actor"), new Utils.PrintFilter());

    // Only one partition is filtering, which makes sense for the case.
    // If we remove the partitionBy we get the previous behavior.

    // Before we have parallelism = 5 everywhere. What if we want only one spout?
    // We need to specify a partitioning policy for that to happen.
    // (We said that parallelism hint is applied downwards until a partitioning operation is found).

    // But if we don't want to partition by any field, we can just use shuffle()
    // We could also choose global() - with care!
    topology.newStream("parallel_and_partitioned", spout).parallelismHint(1).shuffle()
        .each(new Fields("text", "actor"), new PereTweetsFilter()).parallelismHint(5)
        .each(new Fields("text", "actor"), new Utils.PrintFilter());

    // Because data is batched, we can aggregate batches for efficiency.
    // The aggregate primitive aggregates one full batch. Useful if we want to persist the result of each batch only
    // once.
    // The aggregation for each batch is executed in a random partition as can be seen:
    topology.newStream("aggregation", spout).parallelismHint(1)
        .aggregate(new Fields("location"), new LocationAggregator(), new Fields("aggregated_result"))
        .parallelismHint(5).each(new Fields("aggregated_result"), new Utils.PrintFilter());

    // The partitionAggregate on the other hand only executes the aggregator within one partition's part of the batch.
    // Let's debug that with TridentOperationContext . partitionIndex !
    topology
        .newStream("partial_aggregation", spout)
        .parallelismHint(1)
        .shuffle()
        .partitionAggregate(new Fields("location"), new LocationAggregator(),
            new Fields("aggregated_result")).parallelismHint(6)
        .each(new Fields("aggregated_result"), new Utils.PrintFilter());

    // (See what happens when we change the Spout batch size / parallelism)

    // A useful primitive is groupBy.
    // It splits the stream into groups so that aggregations only ocurr within a group.
    // Because now we are grouping, the aggregation function can be much simpler (Count())
    // We don't need to use HashMaps anymore.
    topology.newStream("aggregation", spout).parallelismHint(1).groupBy(new Fields("location"))
        .aggregate(new Fields("location"), new Count(), new Fields("count")).parallelismHint(5)
        .each(new Fields("location", "count"), new Utils.PrintFilter());

    // EXERCISE: Use Functions and Aggregators to parallelize per-hashtag counts.
    // Step by step: 1) Obtain and select hashtags, 2) Write the Aggregator.

    // Bonus 1: State API.
    // Bonus 2: "Trending" hashtags.
    return topology.build();
  }
View Full Code Here

public class Skeleton {

  public static StormTopology buildTopology(LocalDRPC drpc) throws IOException {
    FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout();

    TridentTopology topology = new TridentTopology();
    topology.newStream("spout", spout).each(new Fields("id", "text", "actor", "location", "date"),
        new Utils.PrintFilter());

    return topology.build();
  }
View Full Code Here

  }

  public static StormTopology buildTopology(LocalDRPC drpc) throws IOException {
    FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout();

    TridentTopology topology = new TridentTopology();
    topology.newStream("spout", spout)
        .parallelismHint(2)
        .partitionBy(new Fields("actor"))
        // .shuffle()
        .each(new Fields("actor", "text"), new PerActorTweetsFilter("dave")).parallelismHint(5)
        .each(new Fields("actor", "text"), new Utils.PrintFilter());

    return topology.build();
  }
View Full Code Here

  }
 
  public static StormTopology buildTopology(LocalDRPC drpc) throws IOException {
    FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout(100);

    TridentTopology topology = new TridentTopology();
    topology.newStream("spout", spout)
      .aggregate(new Fields("location"), new LocationAggregator(), new Fields("location_counts"))
      .each(new Fields("location_counts"), new Utils.PrintFilter());
   
    return topology.build();
  }
View Full Code Here

public class PerLocationCounts2 {

  public static StormTopology buildTopology(LocalDRPC drpc) throws IOException {
    FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout(100);

    TridentTopology topology = new TridentTopology();
    topology.newStream("spout", spout)
      .groupBy(new Fields("location"))
      .aggregate(new Fields("location"), new Count(), new Fields("count"))
      .each(new Fields("location", "count"), new Utils.PrintFilter());
   
    return topology.build();
  }
View Full Code Here

*/
public class GlobalTop20Hashtags {

    public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();
        TridentState count =
        topology
                .newStream("tweets", spout)
                .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
                .project(new Fields("content", "user"))
                .each(new Fields("content"), new OnlyHashtags())
                .each(new Fields("user"), new OnlyEnglish())
                .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
                .groupBy(new Fields("followerClass", "contentName"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
        ;


        topology
                .newDRPCStream("top_hashtags")
                .stateQuery(count, new TupleCollectionGet(), new Fields("followerClass", "contentName"))
                .stateQuery(count, new Fields("followerClass", "contentName"), new MapGet(), new Fields("count"))
                .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(5,"count", true), new Fields("contentName", "count"))
        ;

        return topology.build();
    }
View Full Code Here

*/
public class TopHashtagFollowerCountGrouping {

    public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();
        TridentState count =
        topology
                .newStream("tweets", spout)
                .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
                .project(new Fields("content", "user"))
                .each(new Fields("content"), new OnlyHashtags())
                .each(new Fields("user"), new OnlyEnglish())
                .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
                .parallelismHint(3)
                .groupBy(new Fields("followerClass", "contentName"))
                .persistentAggregate(new HazelCastStateFactory(), new Count(), new Fields("count"))
                .parallelismHint(3)
        ;


        topology
                .newDRPCStream("hashtag_count")
                .each(new Constants<String>("< 100", "< 10K", "< 100K", ">= 100K"), new Fields("followerClass"))
                .stateQuery(count, new Fields("followerClass", "args"), new MapGet(), new Fields("count"))
        ;

        return topology.build();
    }
View Full Code Here

TOP

Related Classes of storm.trident.TridentTopology

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.