long segmentStart = finalOut.getPos();
Writer writer =
new Writer(conf, finalOut, keyClass, valClass, codec, null, null);
writer.close();
TezIndexRecord rec =
new TezIndexRecord(
segmentStart,
writer.getRawLength(),
writer.getCompressedLength());
// Covers the case of multiple spills.
outputBytesWithOverheadCounter.increment(writer.getRawLength());
sr.putIndex(rec, i);
}
sr.writeToFile(finalIndexFile, conf);
} finally {
finalOut.close();
}
return;
}
else {
final TezSpillRecord spillRec = new TezSpillRecord(partitions);
for (int parts = 0; parts < partitions; parts++) {
//create the segments to be merged
List<Segment> segmentList =
new ArrayList<Segment>(numSpills);
for(int i = 0; i < numSpills; i++) {
TezIndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);
Segment s =
new Segment(conf, rfs, filename[i], indexRecord.getStartOffset(),
indexRecord.getPartLength(), codec, ifileReadAhead,
ifileReadAheadLength, ifileBufferSize, true);
segmentList.add(i, s);
if (LOG.isDebugEnabled()) {
LOG.debug("TaskIdentifier=" + taskIdentifier + " Partition=" + parts +
"Spill =" + i + "(" + indexRecord.getStartOffset() + "," +
indexRecord.getRawLength() + ", " +
indexRecord.getPartLength() + ")");
}
}
int mergeFactor =
this.conf.getInt(TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR,
TezJobConfig.DEFAULT_TEZ_RUNTIME_IO_SORT_FACTOR);
// sort the segments only if there are intermediate merges
boolean sortSegments = segmentList.size() > mergeFactor;
//merge
TezRawKeyValueIterator kvIter = TezMerger.merge(conf, rfs,
keyClass, valClass, codec,
segmentList, mergeFactor,
new Path(taskIdentifier),
(RawComparator)ConfigUtils.getIntermediateOutputKeyComparator(conf),
nullProgressable, sortSegments, true,
null, spilledRecordsCounter, additionalSpillBytesRead,
null); // Not using any Progress in TezMerger. Should just work.
//write merged output to disk
long segmentStart = finalOut.getPos();
Writer writer =
new Writer(conf, finalOut, keyClass, valClass, codec,
spilledRecordsCounter, null);
if (combiner == null || numSpills < minSpillsForCombine) {
TezMerger.writeFile(kvIter, writer,
nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
} else {
runCombineProcessor(kvIter, writer);
}
writer.close();
// record offsets
final TezIndexRecord rec =
new TezIndexRecord(
segmentStart,
writer.getRawLength(),
writer.getCompressedLength());
spillRec.putIndex(rec, parts);
}