package backtype.storm.topology;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import backtype.storm.Config;
import backtype.storm.generated.Bolt;
import backtype.storm.generated.ComponentCommon;
import backtype.storm.generated.ComponentObject;
import backtype.storm.generated.GlobalStreamId;
import backtype.storm.generated.Grouping;
import backtype.storm.generated.NullStruct;
import backtype.storm.generated.SpoutSpec;
import backtype.storm.generated.StateSpoutSpec;
import backtype.storm.generated.StormTopology;
import backtype.storm.grouping.CustomStreamGrouping;
import backtype.storm.tuple.Fields;
import backtype.storm.utils.Utils;
/**
* TopologyBuilder exposes the Java API for specifying a topology for Storm to
* execute. Topologies are Thrift structures in the end, but since the Thrift
* API is so verbose, TopologyBuilder greatly eases the process of creating
* topologies. The template for creating and submitting a topology looks
* something like:
*
* <pre>
* TopologyBuilder builder = new TopologyBuilder();
*
* builder.setSpout("1", new TestWordSpout(true), 5);
* builder.setSpout("2", new TestWordSpout(true), 3);
* builder.setBolt("3", new TestWordCounter(), 3)
* .fieldsGrouping("1", new Fields("word"))
* .fieldsGrouping("2", new Fields("word"));
* builder.setBolt("4", new TestGlobalCount()).globalGrouping("1");
*
* Map conf = new HashMap();
* conf.put(Config.TOPOLOGY_WORKERS, 4);
*
* StormSubmitter.submitTopology("mytopology", conf, builder.createTopology());
* </pre>
*
* Running the exact same topology in local mode (in process), and configuring
* it to log all tuples emitted, looks like the following. Note that it lets the
* topology run for 10 seconds before shutting down the local cluster.
*
* <pre>
* TopologyBuilder builder = new TopologyBuilder();
*
* builder.setSpout("1", new TestWordSpout(true), 5);
* builder.setSpout("2", new TestWordSpout(true), 3);
* builder.setBolt("3", new TestWordCounter(), 3)
* .fieldsGrouping("1", new Fields("word"))
* .fieldsGrouping("2", new Fields("word"));
* builder.setBolt("4", new TestGlobalCount()).globalGrouping("1");
*
* Map conf = new HashMap();
* conf.put(Config.TOPOLOGY_WORKERS, 4);
* conf.put(Config.TOPOLOGY_DEBUG, true);
*
* LocalCluster cluster = new LocalCluster();
* cluster.submitTopology("mytopology", conf, builder.createTopology());
* Utils.sleep(10000);
* cluster.shutdown();
* </pre>
*
* <p>
* The pattern for TopologyBuilder is to map component ids to components using
* the setSpout and setBolt methods. Those methods return objects that are then
* used to declare the inputs for that component.
* </p>
*/
public class TopologyBuilder {
private Map<String, IRichBolt> _bolts = new HashMap<String, IRichBolt>();
private Map<String, IRichSpout> _spouts = new HashMap<String, IRichSpout>();
private Map<String, ComponentCommon> _commons = new HashMap<String, ComponentCommon>();
// private Map<String, Map<GlobalStreamId, Grouping>> _inputs = new
// HashMap<String, Map<GlobalStreamId, Grouping>>();
private Map<String, StateSpoutSpec> _stateSpouts = new HashMap<String, StateSpoutSpec>();
public StormTopology createTopology() {
Map<String, Bolt> boltSpecs = new HashMap<String, Bolt>();
Map<String, SpoutSpec> spoutSpecs = new HashMap<String, SpoutSpec>();
for (String boltId : _bolts.keySet()) {
IRichBolt bolt = _bolts.get(boltId);
ComponentCommon common = getComponentCommon(boltId, bolt);
boltSpecs.put(
boltId,
new Bolt(ComponentObject.serialized_java(Utils
.serialize(bolt)), common));
}
for (String spoutId : _spouts.keySet()) {
IRichSpout spout = _spouts.get(spoutId);
ComponentCommon common = getComponentCommon(spoutId, spout);
spoutSpecs.put(
spoutId,
new SpoutSpec(ComponentObject.serialized_java(Utils
.serialize(spout)), common));
}
return new StormTopology(spoutSpecs, boltSpecs,
new HashMap<String, StateSpoutSpec>());
}
/**
* Define a new bolt in this topology with parallelism of just one thread.
*
* @param id
* the id of this component. This id is referenced by other
* components that want to consume this bolt's outputs.
* @param bolt
* the bolt
* @return use the returned object to declare the inputs to this component
*/
public BoltDeclarer setBolt(String id, IRichBolt bolt) {
return setBolt(id, bolt, null);
}
/**
* Define a new bolt in this topology with the specified amount of
* parallelism.
*
* @param id
* the id of this component. This id is referenced by other
* components that want to consume this bolt's outputs.
* @param bolt
* the bolt
* @param parallelism_hint
* the number of tasks that should be assigned to execute this
* bolt. Each task will run on a thread in a process somewhere
* around the cluster.
* @return use the returned object to declare the inputs to this component
*/
public BoltDeclarer setBolt(String id, IRichBolt bolt,
Number parallelism_hint) {
validateUnusedId(id);
initCommon(id, bolt, parallelism_hint);
_bolts.put(id, bolt);
return new BoltGetter(id);
}
/**
* Define a new bolt in this topology. This defines a basic bolt, which is a
* simpler to use but more restricted kind of bolt. Basic bolts are intended
* for non-aggregation processing and automate the anchoring/acking process
* to achieve proper reliability in the topology.
*
* @param id
* the id of this component. This id is referenced by other
* components that want to consume this bolt's outputs.
* @param bolt
* the basic bolt
* @return use the returned object to declare the inputs to this component
*/
public BoltDeclarer setBolt(String id, IBasicBolt bolt) {
return setBolt(id, bolt, null);
}
/**
* Define a new bolt in this topology. This defines a basic bolt, which is a
* simpler to use but more restricted kind of bolt. Basic bolts are intended
* for non-aggregation processing and automate the anchoring/acking process
* to achieve proper reliability in the topology.
*
* @param id
* the id of this component. This id is referenced by other
* components that want to consume this bolt's outputs.
* @param bolt
* the basic bolt
* @param parallelism_hint
* the number of tasks that should be assigned to execute this
* bolt. Each task will run on a thread in a process somwehere
* around the cluster.
* @return use the returned object to declare the inputs to this component
*/
public BoltDeclarer setBolt(String id, IBasicBolt bolt,
Number parallelism_hint) {
return setBolt(id, new BasicBoltExecutor(bolt), parallelism_hint);
}
/**
* Define a new spout in this topology.
*
* @param id
* the id of this component. This id is referenced by other
* components that want to consume this spout's outputs.
* @param spout
* the spout
*/
public SpoutDeclarer setSpout(String id, IRichSpout spout) {
return setSpout(id, spout, null);
}
/**
* Define a new spout in this topology with the specified parallelism. If
* the spout declares itself as non-distributed, the parallelism_hint will
* be ignored and only one task will be allocated to this component.
*
* @param id
* the id of this component. This id is referenced by other
* components that want to consume this spout's outputs.
* @param parallelism_hint
* the number of tasks that should be assigned to execute this
* spout. Each task will run on a thread in a process somwehere
* around the cluster.
* @param spout
* the spout
*/
public SpoutDeclarer setSpout(String id, IRichSpout spout,
Number parallelism_hint) {
validateUnusedId(id);
initCommon(id, spout, parallelism_hint);
_spouts.put(id, spout);
return new SpoutGetter(id);
}
public void setStateSpout(String id, IRichStateSpout stateSpout) {
setStateSpout(id, stateSpout, null);
}
public void setStateSpout(String id, IRichStateSpout stateSpout,
Number parallelism_hint) {
validateUnusedId(id);
// TODO: finish
}
private void validateUnusedId(String id) {
if (_bolts.containsKey(id)) {
throw new IllegalArgumentException(
"Bolt has already been declared for id " + id);
}
if (_spouts.containsKey(id)) {
throw new IllegalArgumentException(
"Spout has already been declared for id " + id);
}
if (_stateSpouts.containsKey(id)) {
throw new IllegalArgumentException(
"State spout has already been declared for id " + id);
}
}
private ComponentCommon getComponentCommon(String id, IComponent component) {
ComponentCommon ret = new ComponentCommon(_commons.get(id));
OutputFieldsGetter getter = new OutputFieldsGetter();
component.declareOutputFields(getter);
ret.set_streams(getter.getFieldsDeclaration());
return ret;
}
private void initCommon(String id, IComponent component, Number parallelism) {
ComponentCommon common = new ComponentCommon();
common.set_inputs(new HashMap<GlobalStreamId, Grouping>());
if (parallelism != null)
common.set_parallelism_hint(parallelism.intValue());
else {
common.set_parallelism_hint(Integer.valueOf(1));
}
Map conf = component.getComponentConfiguration();
if (conf != null)
common.set_json_conf(Utils.to_json(conf));
_commons.put(id, common);
}
protected class ConfigGetter<T extends ComponentConfigurationDeclarer>
extends BaseConfigurationDeclarer<T> {
String _id;
public ConfigGetter(String id) {
_id = id;
}
@Override
public T addConfigurations(Map conf) {
if (conf != null && conf.containsKey(Config.TOPOLOGY_KRYO_REGISTER)) {
throw new IllegalArgumentException(
"Cannot set serializations for a component using fluent API");
}
String currConf = _commons.get(_id).get_json_conf();
_commons.get(_id).set_json_conf(
mergeIntoJson(parseJson(currConf), conf));
return (T) this;
}
}
protected class SpoutGetter extends ConfigGetter<SpoutDeclarer> implements
SpoutDeclarer {
public SpoutGetter(String id) {
super(id);
}
}
protected class BoltGetter extends ConfigGetter<BoltDeclarer> implements
BoltDeclarer {
private String _boltId;
public BoltGetter(String boltId) {
super(boltId);
_boltId = boltId;
}
public BoltDeclarer fieldsGrouping(String componentId, Fields fields) {
return fieldsGrouping(componentId, Utils.DEFAULT_STREAM_ID, fields);
}
public BoltDeclarer fieldsGrouping(String componentId, String streamId,
Fields fields) {
return grouping(componentId, streamId,
Grouping.fields(fields.toList()));
}
public BoltDeclarer globalGrouping(String componentId) {
return globalGrouping(componentId, Utils.DEFAULT_STREAM_ID);
}
public BoltDeclarer globalGrouping(String componentId, String streamId) {
return grouping(componentId, streamId,
Grouping.fields(new ArrayList<String>()));
}
public BoltDeclarer shuffleGrouping(String componentId) {
return shuffleGrouping(componentId, Utils.DEFAULT_STREAM_ID);
}
public BoltDeclarer shuffleGrouping(String componentId, String streamId) {
return grouping(componentId, streamId,
Grouping.shuffle(new NullStruct()));
}
public BoltDeclarer localOrShuffleGrouping(String componentId) {
return localOrShuffleGrouping(componentId, Utils.DEFAULT_STREAM_ID);
}
public BoltDeclarer localOrShuffleGrouping(String componentId,
String streamId) {
return grouping(componentId, streamId,
Grouping.local_or_shuffle(new NullStruct()));
}
@Override
public BoltDeclarer localFirstGrouping(String componentId) {
return localFirstGrouping(componentId, Utils.DEFAULT_STREAM_ID);
}
@Override
public BoltDeclarer localFirstGrouping(String componentId,
String streamId) {
return grouping(componentId, streamId,
Grouping.localFirst(new NullStruct()));
}
public BoltDeclarer noneGrouping(String componentId) {
return noneGrouping(componentId, Utils.DEFAULT_STREAM_ID);
}
public BoltDeclarer noneGrouping(String componentId, String streamId) {
return grouping(componentId, streamId,
Grouping.none(new NullStruct()));
}
public BoltDeclarer allGrouping(String componentId) {
return allGrouping(componentId, Utils.DEFAULT_STREAM_ID);
}
public BoltDeclarer allGrouping(String componentId, String streamId) {
return grouping(componentId, streamId,
Grouping.all(new NullStruct()));
}
public BoltDeclarer directGrouping(String componentId) {
return directGrouping(componentId, Utils.DEFAULT_STREAM_ID);
}
public BoltDeclarer directGrouping(String componentId, String streamId) {
return grouping(componentId, streamId,
Grouping.direct(new NullStruct()));
}
private BoltDeclarer grouping(String componentId, String streamId,
Grouping grouping) {
_commons.get(_boltId).put_to_inputs(
new GlobalStreamId(componentId, streamId), grouping);
return this;
}
@Override
public BoltDeclarer customGrouping(String componentId,
CustomStreamGrouping grouping) {
return customGrouping(componentId, Utils.DEFAULT_STREAM_ID,
grouping);
}
@Override
public BoltDeclarer customGrouping(String componentId, String streamId,
CustomStreamGrouping grouping) {
return grouping(componentId, streamId,
Grouping.custom_serialized(Utils.serialize(grouping)));
}
@Override
public BoltDeclarer grouping(GlobalStreamId id, Grouping grouping) {
return grouping(id.get_componentId(), id.get_streamId(), grouping);
}
}
private static Map parseJson(String json) {
if (json == null)
return new HashMap();
else
return (Map) Utils.from_json(json);
}
private static String mergeIntoJson(Map into, Map newMap) {
Map res = new HashMap(into);
if (newMap != null)
res.putAll(newMap);
return Utils.to_json(res);
}
}