BagOfWordsData bagOfWordsData) throws IOException {
for (Map.Entry<Integer, String> entry : bagOfWordsData
.getDocumentClasses().entrySet()) {
double[] zeroValues = new double[instances.numAttributes()];
Arrays.fill(zeroValues, 0.0d);
SparseInstance wekaInstance = new SparseInstance(1.0d, zeroValues);
wekaInstance.setDataset(instances);
// set instance id
Attribute instanceId = instances.attribute(INSTANCE_ID);
wekaInstance.setValue(instanceId.index(), entry.getKey()
.doubleValue());
// set document class
Attribute classAttr = instances.attribute(CLASS);
wekaInstance.setValue(classAttr.index(),
classAttr.indexOfValue(entry.getValue()));
// set numeric words
if (bagOfWordsData.getInstanceNumericWords().get(entry.getKey()) != null) {
for (Map.Entry<String, Double> word : bagOfWordsData
.getInstanceNumericWords().get(entry.getKey())
.entrySet()) {
Attribute wordAttr = instances.attribute(word.getKey());
wekaInstance.setValue(wordAttr.index(), word.getValue()
.doubleValue());
}
}
// set nominal words
if (bagOfWordsData.getInstanceNominalWords().get(entry.getKey()) != null) {
for (Map.Entry<String, String> word : bagOfWordsData
.getInstanceNominalWords().get(entry.getKey())
.entrySet()) {
Attribute wordAttr = instances.attribute(word.getKey());
int valueIndex = wordAttr.indexOfValue(word.getValue());
if (valueIndex == -1) {
throw new IOException("oops! " + word);
}
wekaInstance.setValue(wordAttr.index(), valueIndex);
}
}
instances.add(wekaInstance);
}
}