int samples = (int) Math.min(Math.log(NOT_FROM_ONE_CLUSTER_PROBABILITY) / (Math.log(1 - Math.pow((1.0d / samplingLevel), dimension))), (double) currentids.size());
// System.out.println("Number of samples: " + samples);
Random r = new Random();
int remaining_retries = 100;
for(int i = 1; i <= samples; i++) {
DBIDs sample = DBIDUtil.randomSample(currentids, dimension + 1, r.nextLong());
final Iterator<DBID> iter = sample.iterator();
// Use first as origin
DBID origin = iter.next();
Vector originV = relation.get(origin).getColumnVector();
// Build orthogonal basis from remainder
Matrix basis;
{
List<Vector> vectors = new ArrayList<Vector>(sample.size() - 1);
while(iter.hasNext()) {
Vector vec = relation.get(iter.next()).getColumnVector();
vectors.add(vec.minusEquals(originV));
}
// generate orthogonal basis
basis = generateOrthonormalBasis(vectors);
if(basis == null) {
// new sample has to be taken.
i--;
remaining_retries--;
if(remaining_retries < 0) {
throw new AbortException("Too many retries in sampling, and always a linear dependant data set.");
}
continue;
}
}
// Generate and fill a histogram.
FlexiHistogram<Double, Double> histogram = FlexiHistogram.DoubleSumHistogram(BINS);
double w = 1.0 / currentids.size();
for(DBID point : currentids) {
// Skip sampled points
if(sample.contains(point)) {
continue;
}
Vector vec = relation.get(point).getColumnVector().minusEquals(originV);
final double distance = deviation(vec, basis);
histogram.aggregate(distance, w);