dataStartColumn = 2;
}
public Collection<RNAIDataSource> parse() {
// Create a buffer for the string split utility. We use a custom utility as opposed
AsciiLineReader reader = null;
List dataSources = null;
String nextLine = null;
InputStream probeMappingStream = null;
try {
reader = ParsingUtils.openAsciiReader(dataFileLocator);
String headerLine = null;
// Skip header rows
nextLine = reader.readLine();
nextLine = reader.readLine();
headerLine = reader.readLine();
// Parse column headings
int skip = 1;
String[] tokens = Globals.tabPattern.split(headerLine, -1);
int nTokens = tokens.length;
String description = (nTokens > descriptionColumn)
? new String(tokens[descriptionColumn]) : null;
int nColumns = (nTokens - dataStartColumn) / skip;
String[] columnHeadings = new String[nColumns];
for (int i = 0; i < nColumns; i++) {
String heading = tokens[dataStartColumn + i * skip].replace('\"', ' ').trim();
columnHeadings[i] = heading;
}
Map<String, String[]> rnaiProbeMap = getProbeMap();
HashMap<String, HashMap<String, Float>> sampleGeneScoreMap = new HashMap();
while ((nextLine = reader.readLine()) != null) {
tokens = Globals.tabPattern.split(nextLine, -1);
nTokens = tokens.length;
String probeId = new String(tokens[0]);
float[] values = new float[nColumns];
String[] identifiers = (String[]) rnaiProbeMap.get(probeId);
String identifier = null;
if (identifiers == null || identifiers.length == 0) {
log.info("Could not find mapping for: " + probeId);
continue;
} else {
identifier = identifiers[0];
}
NamedFeature gene = FeatureDB.getFeature(identifier.toUpperCase());
if (gene == null) {
log.debug("Unknown identifier: " + identifier);
continue;
}
for (int i = 0; i < nColumns; i++) {
try {
int dataIndex = dataStartColumn + i * skip;
// If we are out of value tokens, or the cell is blank, assign NAN to the cell.
if ((dataIndex >= nTokens) || (tokens[dataIndex].length() == 0)) {
values[i] = Float.NaN;
} else {
values[i] = Float.parseFloat(tokens[dataIndex]);
}
String sample = columnHeadings[i];
RNAIHairpinValue hairpin = new RNAIHairpinValue(probeId, values[i]);
RNAIHairpinCache.getInstance().addHairpinScore(sample, gene.getName(),
hairpin);
HashMap<String, Float> geneScoreMap = sampleGeneScoreMap.get(sample);
if (geneScoreMap == null) {
geneScoreMap = new HashMap();
sampleGeneScoreMap.put(sample, geneScoreMap);
}
Float geneScore = geneScoreMap.get(gene.getName());
if (geneScore == null) {
geneScore = values[i];
geneScoreMap.put(gene.getName(), geneScore);
} else {
geneScore = new Float(Math.min(values[i], geneScore.floatValue()));
geneScoreMap.put(gene.getName(), geneScore);
}
} catch (NumberFormatException numberFormatException) {
// This is an expected condition. IGV uses NaN to
// indicate non numbers (missing data values)
values[i] = Float.NaN;
}
}
}
dataSources = computeGeneScores(sampleGeneScoreMap);
} catch (IOException ex) {
log.error("Error parsing RNAi file", ex);
throw new RuntimeException(ex);
} finally {
if (probeMappingStream != null) {
try {
probeMappingStream.close();
} catch (IOException e) {
log.error("Error closing probe mapping stream", e);
}
}
if (reader != null) {
reader.close();
}
}
return dataSources;
}