// normalize instances
zeroOneNormalization(instanceList, instanceList.get(0).length);
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, LongWritable.class, VectorWritable.class);
for (int i = 0; i < instanceList.size(); ++i) {
DoubleVector vector = new DenseDoubleVector(instanceList.get(i));
writer.append(new LongWritable(i), new VectorWritable(vector));
}
writer.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (URISyntaxException e) {
e.printStackTrace();
}
AutoEncoder encoder = new AutoEncoder(3, 2);
String modelPath = "/tmp/autoencoder-modelpath";
encoder.setModelPath(modelPath);
Map<String, String> trainingParams = new HashMap<String, String>();
encoder.setLearningRate(0.5);
trainingParams.put("tasks", "5");
trainingParams.put("training.max.iterations", "3000");
trainingParams.put("training.batch.size", "200");
encoder.train(path, trainingParams);
double errorInstance = 0;
for (double[] instance : instanceList) {
DoubleVector vector = new DenseDoubleVector(instance);
DoubleVector decoded = encoder.getOutput(vector);
DoubleVector diff = vector.subtract(decoded);
double error = diff.dot(diff);
if (error > 0.1) {
++errorInstance;
}
}