Package distributedRedditAnalyser.bolt

Source Code of distributedRedditAnalyser.bolt.DistributedOzaBoostBolt

package distributedRedditAnalyser.bolt;

import java.util.Map;

import distributedRedditAnalyser.ClassifierInstance;
import distributedRedditAnalyser.OzaBoost;

import weka.core.Instances;
import weka.core.SparseInstance;

import moa.classifiers.Classifier;
import moa.core.InstancesHeader;
import moa.options.ClassOption;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

/**
* DistributedOzaBoost runs instances through a custom OzaBoost object that has been designed to run over multiple bolts working in parallel
*
* @author Luke Barnett 1109967
* @author Tony Chen 1111377
*
*/
public class DistributedOzaBoostBolt extends BaseRichBolt {

  private static final long serialVersionUID = 8636714472891286811L;
  private final OzaBoost classifier;
  private final int id;
  private OutputCollector collector;
  private InstancesHeader INST_HEADERS;
 
  public DistributedOzaBoostBolt(String classifierName, int id){
    this.id = id;
    classifier = new OzaBoost();
    classifier.baseLearnerOption = new ClassOption("baseLearner", 'l',"Classifier to train.",Classifier.class, classifierName);
    classifier.ensembleSizeOption.setValue(50);
    classifier.prepareForUse();
  }

  @Override
  public void prepare(Map stormConf, TopologyContext context,  OutputCollector collector) {
    this.collector = collector;
  }

  @Override
  public void execute(Tuple input) {
   
    Object obj = input.getValue(0);
    //If we get the instance headers set them and reset
    if(obj.getClass() == Instances.class){
      INST_HEADERS = new InstancesHeader((Instances) obj);
      classifier.setModelContext(INST_HEADERS);
      classifier.resetLearningImpl();
    }else if(obj.getClass() == SparseInstance.class){
      //If it's an instance
      SparseInstance inst = (SparseInstance) obj;
      //Emit the entire prediction array and the correct value
      collector.emit(new Values(classifier.getVotesForInstance(inst), inst.classValue()));
      //Train on instance
      classifier.trainOnInstanceImpl(inst);
      //Send out our latest classifier
      ClassifierInstance latestClassifier = classifier.getLatestClassifier();
      if(latestClassifier != null)
        collector.emit(new Values(latestClassifier, (Integer)id));
    }else if(obj.getClass() == ClassifierInstance.class){
      //If it's a classifier try to add it to the ensemble
      //Make sure we aren't just adding our own classifier
      if(((Integer)input.getValue(1)).intValue() != id){
        classifier.addClassifier((ClassifierInstance)obj);
      }
    }
    collector.ack(input);
  }

  @Override
  public void declareOutputFields(OutputFieldsDeclarer declarer) {
    declarer.declare(new Fields("votesForInstance","actualClass"));
  }

}
TOP

Related Classes of distributedRedditAnalyser.bolt.DistributedOzaBoostBolt

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.