package joshua.discriminative.training.risk_annealer.hypergraph;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.logging.Logger;
import joshua.corpus.vocab.SymbolTable;
import joshua.discriminative.feature_related.feature_template.FeatureTemplate;
import joshua.discriminative.training.parallel.ProducerConsumerModel;
import joshua.discriminative.training.risk_annealer.GradientComputer;
import joshua.discriminative.training.risk_annealer.hypergraph.parallel.GradientConsumer;
import joshua.discriminative.training.risk_annealer.hypergraph.parallel.HGProducer;
public class HGRiskGradientComputer extends GradientComputer {
private int numSentence;
private boolean fixFirstFeature = false;
HyperGraphFactory hgFactory;
private double sumGain = 0; //negative risk
private double sumEntropy = 0;
int numCalls = 0;
int maxNumHGInQueue = 100;
int numThreads = 5;
boolean useSemiringV2 = false;
//risk and feature related
SymbolTable symbolTbl;
int ngramStateID;
int baselineLMOrder;
HashMap<String, Integer> featureStringToIntegerMap;
List<FeatureTemplate> featTemplates;
double[] linearCorpusGainThetas;
boolean haveRefereces = true;
double minFactor = 1.0; //minimize conditional entropy: 1; minimum risk: -1
/** Logger for this class. */
static final private Logger logger =
Logger.getLogger(HGRiskGradientComputer.class.getSimpleName());
public HGRiskGradientComputer(boolean useSemiringV2, int numSentence,
int numFeatures, double gainFactor, double scale, double temperature, boolean computeScalingGradient,
boolean fixFirstFeature, HyperGraphFactory hgFactory,
int maxNumHGInQueue, int numThreads,
//== feature and risk related
int ngramStateID, int baselineLMOrder, SymbolTable symbolTbl,
HashMap<String, Integer> featureStringToIntegerMap,
List<FeatureTemplate> featTemplates, double[] linearCorpusGainThetas, boolean haveRefereces){
super(numFeatures, gainFactor, scale, temperature, computeScalingGradient);
this.useSemiringV2 = useSemiringV2;
this.numSentence = numSentence;
this.fixFirstFeature = fixFirstFeature;
this.hgFactory = hgFactory;
this.maxNumHGInQueue = maxNumHGInQueue;
this.numThreads = numThreads;
// System.out.println("use HGRiskGradientComputer====");
//== feature and risk related
this.ngramStateID = ngramStateID;
this.baselineLMOrder = baselineLMOrder;
this.symbolTbl = symbolTbl;
this.featureStringToIntegerMap = featureStringToIntegerMap;
this.featTemplates = featTemplates;
this.linearCorpusGainThetas = linearCorpusGainThetas;
this.haveRefereces = haveRefereces;
this.minFactor = this.haveRefereces ? -1 : 1;
}
@Override
public void reComputeFunctionValueAndGradient(double[] weights) {
//==set up the current value
double[] weights2 = weights;
if(shouldComputeGradientForScalingFactor){//first weight is for scaling parameter
//==sanity check
if(weights.length!=numFeatures+1){
System.out.println("number of weights is not right");
System.exit(1);
}
scalingFactor = weights[0];//!!!!!!!!!! big bug: old code does not have this!!!!!!!!!!!!!!!
//System.out.println("scaling is " + annealing_scale + "; weight is " + weights[0]);
weights2 = new double[numFeatures];
for(int i=0; i<numFeatures; i++)
weights2[i] = weights[i+1];
}
//==reset values
for(int i=0; i<gradientsForTheta.length; i++)
gradientsForTheta[i] = 0;
if(shouldComputeGradientForScalingFactor)
gradientForScalingFactor = 0;
functionValue = 0;
sumGain =0;
sumEntropy = 0;
/*
System.out.println("=====optimizeScale: "+ shouldComputeGradientForScalingFactor + "; scalingFactor: "+ scalingFactor + "; temperature: " + temperature);
System.out.print("=====weights: ");
for(int i=0; i<weights2.length; i++)
System.out.print(" " + weights2[i]);
System.out.print("\n");
*/
//== compute gradients and function value
hgFactory.startLoop();
reComputeFunctionValueAndGradientHelper(weights2);
hgFactory.endLoop();
printLastestStatistics();
numCalls++;
//logger.info("numTimesCalled=" + numCalls);
/*if(numCalls>3)
System.exit(0);
*/
}
@Override
public void printLastestStatistics() {
//logger.info("one iteration (i.e. computing gradient and function value).......");
System.out.println("Func value=" + functionValue + "=" + sumGain+"*"+ gainFactor + "+" +temperature +"*" +sumEntropy);
/*System.out.print("Gradients are: ");
for(int i=0; i<numFeatures; i++){
System.out.print( gradientsForTheta[i]+" " );
}
System.out.print("\n");
System.out.println("Gradient for scale is : " + gradientForScalingFactor);
*/
}
private void reComputeFunctionValueAndGradientHelper(double[] weightsForTheta){
//== queue
//System.out.println("maxNumHGInQueue=" + maxNumHGInQueue);
BlockingQueue<HGAndReferences> queue = new ArrayBlockingQueue<HGAndReferences>(maxNumHGInQueue);
//== producer
HGProducer producer = new HGProducer(hgFactory, queue, numThreads, numSentence);
//== consumers
List<GradientConsumer> consumers = new ArrayList<GradientConsumer>();
for(int i=0; i<numThreads; i++){
RiskAndFeatureAnnotationOnLMHG riskAnnotatorNoEquiv = new RiskAndFeatureAnnotationOnLMHG(
this.baselineLMOrder, this.ngramStateID, this.linearCorpusGainThetas, this.symbolTbl,
this.featureStringToIntegerMap, this.featTemplates, this.haveRefereces);
GradientConsumer c = new GradientConsumer(this.useSemiringV2, this, queue, weightsForTheta,
riskAnnotatorNoEquiv, this.temperature, this.scalingFactor, this.shouldComputeGradientForScalingFactor);
consumers.add(c);
}
//== create model, and start parallel computing
ProducerConsumerModel<HGAndReferences, HGProducer, GradientConsumer>
model = new ProducerConsumerModel<HGAndReferences, HGProducer, GradientConsumer>(queue, producer, consumers);
model.runParallel();
}
/**GradientConsumer is going to calll this function ot accumate gradient
**/
/**The inputs are for risk-T*entropy*/
public synchronized void accumulateGradient(HashMap<Integer, Double> gradients, double gradientForScalingFactor, double funcVal, double risk, double entropy){
for(Map.Entry<Integer, Double> feature : gradients.entrySet()){
gradientsForTheta[feature.getKey()] += minFactor*feature.getValue(); //we are maximizing, instead of minizing
}
if(shouldComputeGradientForScalingFactor)
this.gradientForScalingFactor += minFactor*gradientForScalingFactor;//we are maximizing, instead of minizing
if(this.fixFirstFeature)//do not tune the baseline feature
gradientsForTheta[0]=0;
//== compute function value
functionValue += minFactor*funcVal;//we are maximizing, instead of minizing
sumGain += -1.0 * risk;
sumEntropy += entropy;
}
}