package com.googlecode.gaal.analysis.impl;
import java.util.Iterator;
import java.util.List;
import com.googlecode.gaal.analysis.api.VectorBuilder;
import com.googlecode.gaal.data.api.Corpus;
import com.googlecode.gaal.data.api.IntSequence;
import com.googlecode.gaal.data.api.Vector;
import com.googlecode.gaal.data.impl.SparseVector;
import com.googlecode.gaal.suffix.api.IntervalTree.Interval;
public class IntervalVectorBuilder<C extends Interval> implements VectorBuilder<C> {
@Override
public <T> void buildVectors(Iterator<C> iterator, List<Vector> vectors, List<C> intervals, Corpus<T> corpus,
int minVectorSize) {
while (iterator.hasNext()) {
C interval = iterator.next();
Vector vector = toVector(interval, corpus);
if (vector.size() > minVectorSize) {
intervals.add(interval);
vectors.add(vector);
}
}
}
public static <S> Vector toVector(Interval interval, Corpus<S> corpus) {
Vector vector = new SparseVector();
IntSequence indices = interval.indices();
for (int i = 0; i < indices.size(); i++) {
int start = indices.get(i);
int documentId = corpus.getDocumentId(start);
vector.add(documentId, 1);
}
return vector;
}
}