Examples of TezSpillRecord


Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

    }

    // read in paged indices
    for (int i = indexCacheList.size(); i < numSpills; ++i) {
      Path indexFileName = mapOutputFile.getSpillIndexFile(i);
      indexCacheList.add(new TezSpillRecord(indexFileName, conf));
    }

    //make correction in the length to include the sequence file header
    //lengths for each partition
    finalOutFileSize += partitions * APPROX_HEADER_LENGTH;
    finalIndexFileSize = partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH;
    Path finalOutputFile =
        mapOutputFile.getOutputFileForWrite(finalOutFileSize);
    Path finalIndexFile =
        mapOutputFile.getOutputIndexFileForWrite(finalIndexFileSize);

    //The output stream for the final single output file
    FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);

    if (numSpills == 0) {
      // TODO Change event generation to say there is no data rather than generating a dummy file
      //create dummy files

      TezSpillRecord sr = new TezSpillRecord(partitions);
      try {
        for (int i = 0; i < partitions; i++) {
          long segmentStart = finalOut.getPos();
          Writer writer =
            new Writer(conf, finalOut, keyClass, valClass, codec, null, null);
          writer.close();

          TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          // Covers the case of multiple spills.
          outputBytesWithOverheadCounter.increment(writer.getRawLength());
          sr.putIndex(rec, i);
        }
        sr.writeToFile(finalIndexFile, conf);
      } finally {
        finalOut.close();
      }
      return;
    }
    else {
      final TezSpillRecord spillRec = new TezSpillRecord(partitions);
      for (int parts = 0; parts < partitions; parts++) {
        //create the segments to be merged
        List<Segment> segmentList =
          new ArrayList<Segment>(numSpills);
        for(int i = 0; i < numSpills; i++) {
          TezIndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);

          Segment s =
            new Segment(conf, rfs, filename[i], indexRecord.getStartOffset(),
                             indexRecord.getPartLength(), codec, ifileReadAhead,
                             ifileReadAheadLength, ifileBufferSize, true);
          segmentList.add(i, s);

          if (LOG.isDebugEnabled()) {
            LOG.debug("TaskIdentifier=" + taskIdentifier + " Partition=" + parts +
                "Spill =" + i + "(" + indexRecord.getStartOffset() + "," +
                indexRecord.getRawLength() + ", " +
                indexRecord.getPartLength() + ")");
          }
        }

        int mergeFactor =
            this.conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR,
                TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR_DEFAULT);
        // sort the segments only if there are intermediate merges
        boolean sortSegments = segmentList.size() > mergeFactor;
        //merge
        TezRawKeyValueIterator kvIter = TezMerger.merge(conf, rfs,
                       keyClass, valClass, codec,
                       segmentList, mergeFactor,
                       new Path(taskIdentifier),
                       (RawComparator)ConfigUtils.getIntermediateOutputKeyComparator(conf),
                       nullProgressable, sortSegments, true,
                       null, spilledRecordsCounter, additionalSpillBytesRead,
                       null); // Not using any Progress in TezMerger. Should just work.

        //write merged output to disk
        long segmentStart = finalOut.getPos();
        Writer writer =
            new Writer(conf, finalOut, keyClass, valClass, codec,
                spilledRecordsCounter, null);
        if (combiner == null || numSpills < minSpillsForCombine) {
          TezMerger.writeFile(kvIter, writer,
              nullProgressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT);
        } else {
          runCombineProcessor(kvIter, writer);
        }
        writer.close();

        // record offsets
        final TezIndexRecord rec =
            new TezIndexRecord(
                segmentStart,
                writer.getRawLength(),
                writer.getCompressedLength());
        spillRec.putIndex(rec, parts);
      }
      spillRec.writeToFile(finalIndexFile, conf);
      finalOut.close();
      for(int i = 0; i < numSpills; i++) {
        rfs.delete(filename[i],true);
      }
    }
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

  protected TezIndexRecord getTezIndexRecord(InputAttemptIdentifier srcAttemptId) throws
      IOException {
    TezIndexRecord idxRecord;
    Path indexFile = getShuffleInputFileName(srcAttemptId.getPathComponent(),
        Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING);
    TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
    idxRecord = spillRecord.getIndex(partition);
    return idxRecord;
  }
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

        : (bufvoid - bufend) + bufstart) +
                partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    try {
      // create spill file
      final TezSpillRecord spillRec = new TezSpillRecord(partitions);
      final Path filename =
          mapOutputFile.getSpillFileForWrite(numSpills, size);
      out = rfs.create(filename);

      int spindex = mstart;
      final InMemValBytes value = createInMemValBytes();
      for (int i = 0; i < partitions; ++i) {
        IFile.Writer writer = null;
        try {
          long segmentStart = out.getPos();
          writer = new Writer(conf, out, keyClass, valClass, codec,
                                    spilledRecordsCounter);
          if (combiner == null) {
            // spill directly
            DataInputBuffer key = new DataInputBuffer();
            while (spindex < mend &&
                kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
              final int kvoff = offsetFor(spindex);
              key.reset(
                  kvbuffer,
                  kvmeta.get(kvoff + KEYSTART),
                  (kvmeta.get(kvoff + VALSTART) - kvmeta.get(kvoff + KEYSTART))
                  );
              getVBytesForOffset(kvoff, value);
              writer.append(key, value);
              ++spindex;
            }
          } else {
            int spstart = spindex;
            while (spindex < mend &&
                kvmeta.get(offsetFor(spindex)
                          + PARTITION) == i) {
              ++spindex;
            }
            // Note: we would like to avoid the combiner if we've fewer
            // than some threshold of records for a partition
            if (spstart != spindex) {
              TezRawKeyValueIterator kvIter =
                new MRResultIterator(spstart, spindex);
              if (LOG.isDebugEnabled()) {
                LOG.debug("Running combine processor");
              }
              runCombineProcessor(kvIter, writer);
            }
          }

          // close the writer
          writer.close();

          // record offsets
          final TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          spillRec.putIndex(rec, i);

          writer = null;
        } finally {
          if (null != writer) writer.close();
        }
      }

      if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
        // create spill index file
        Path indexFilename =
            mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
                * MAP_OUTPUT_INDEX_RECORD_LENGTH);
        spillRec.writeToFile(indexFilename, conf);
      } else {
        indexCacheList.add(spillRec);
        totalIndexCacheMemory +=
          spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
      }
      LOG.info("Finished spill " + numSpills);
      ++numSpills;
    } finally {
      if (out != null) out.close();
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

                                 int partition) throws IOException {
    long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    try {
      // create spill file
      final TezSpillRecord spillRec = new TezSpillRecord(partitions);
      final Path filename =
          mapOutputFile.getSpillFileForWrite(numSpills, size);
      out = rfs.create(filename);

      // we don't run the combiner for a single record
      for (int i = 0; i < partitions; ++i) {
        IFile.Writer writer = null;
        try {
          long segmentStart = out.getPos();
          // Create a new codec, don't care!
          writer = new IFile.Writer(conf, out, keyClass, valClass, codec,
                                          spilledRecordsCounter);

          if (i == partition) {
            final long recordStart = out.getPos();
            writer.append(key, value);
            // Note that our map byte count will not be accurate with
            // compression
            mapOutputByteCounter.increment(out.getPos() - recordStart);
          }
          writer.close();

          // record offsets
          TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          spillRec.putIndex(rec, i);

          writer = null;
        } catch (IOException e) {
          if (null != writer) writer.close();
          throw e;
        }
      }
      if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
        // create spill index file
        Path indexFilename =
            mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
                * MAP_OUTPUT_INDEX_RECORD_LENGTH);
        spillRec.writeToFile(indexFilename, conf);
      } else {
        indexCacheList.add(spillRec);
        totalIndexCacheMemory +=
          spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
      }
      ++numSpills;
    } finally {
      if (out != null) out.close();
    }
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

    }

    // read in paged indices
    for (int i = indexCacheList.size(); i < numSpills; ++i) {
      Path indexFileName = mapOutputFile.getSpillIndexFile(i);
      indexCacheList.add(new TezSpillRecord(indexFileName, conf));
    }

    //make correction in the length to include the sequence file header
    //lengths for each partition
    finalOutFileSize += partitions * APPROX_HEADER_LENGTH;
    finalIndexFileSize = partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH;
    Path finalOutputFile =
        mapOutputFile.getOutputFileForWrite(finalOutFileSize);
    Path finalIndexFile =
        mapOutputFile.getOutputIndexFileForWrite(finalIndexFileSize);

    //The output stream for the final single output file
    FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);

    if (numSpills == 0) {
      //create dummy files

      TezSpillRecord sr = new TezSpillRecord(partitions);
      try {
        for (int i = 0; i < partitions; i++) {
          long segmentStart = finalOut.getPos();
          Writer writer =
            new Writer(conf, finalOut, keyClass, valClass, codec, null);
          writer.close();

          TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          sr.putIndex(rec, i);
        }
        sr.writeToFile(finalIndexFile, conf);
      } finally {
        finalOut.close();
      }
      return;
    }
    else {
      TezMerger.considerFinalMergeForProgress();

      final TezSpillRecord spillRec = new TezSpillRecord(partitions);
      for (int parts = 0; parts < partitions; parts++) {
        //create the segments to be merged
        List<Segment> segmentList =
          new ArrayList<Segment>(numSpills);
        for(int i = 0; i < numSpills; i++) {
          TezIndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);

          Segment s =
            new Segment(conf, rfs, filename[i], indexRecord.getStartOffset(),
                             indexRecord.getPartLength(), codec, ifileReadAhead,
                             ifileReadAheadLength, ifileBufferSize, true);
          segmentList.add(i, s);

          if (LOG.isDebugEnabled()) {
            LOG.debug("TaskIdentifier=" + taskIdentifier + " Partition=" + parts +
                "Spill =" + i + "(" + indexRecord.getStartOffset() + "," +
                indexRecord.getRawLength() + ", " +
                indexRecord.getPartLength() + ")");
          }
        }

        int mergeFactor =
            this.conf.getInt(TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR,
                TezJobConfig.DEFAULT_TEZ_RUNTIME_IO_SORT_FACTOR);
        // sort the segments only if there are intermediate merges
        boolean sortSegments = segmentList.size() > mergeFactor;
        //merge
        TezRawKeyValueIterator kvIter = TezMerger.merge(conf, rfs,
                       keyClass, valClass, codec,
                       segmentList, mergeFactor,
                       new Path(taskIdentifier),
                       (RawComparator)ConfigUtils.getIntermediateOutputKeyComparator(conf),
                       nullProgressable, sortSegments,
                       null, spilledRecordsCounter,
                       null); // Not using any Progress in TezMerger. Should just work.

        //write merged output to disk
        long segmentStart = finalOut.getPos();
        Writer writer =
            new Writer(conf, finalOut, keyClass, valClass, codec,
                spilledRecordsCounter);
        if (combiner == null || numSpills < minSpillsForCombine) {
          TezMerger.writeFile(kvIter, writer,
              nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
        } else {
          runCombineProcessor(kvIter, writer);
        }
        writer.close();

        // record offsets
        final TezIndexRecord rec =
            new TezIndexRecord(
                segmentStart,
                writer.getRawLength(),
                writer.getCompressedLength());
        spillRec.putIndex(rec, parts);
      }
      spillRec.writeToFile(finalIndexFile, conf);
      finalOut.close();
      for(int i = 0; i < numSpills; i++) {
        rfs.delete(filename[i],true);
      }
    }
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

  public boolean close() throws IOException {
    this.closed = true;
    this.writer.close();
    TezIndexRecord rec = new TezIndexRecord(0, writer.getRawLength(),
        writer.getCompressedLength());
    TezSpillRecord sr = new TezSpillRecord(1);
    sr.putIndex(rec, 0);

    this.indexPath = ouputFileManager
        .getOutputIndexFileForWrite(INDEX_RECORD_LENGTH);
    LOG.info("Writing index file: " + indexPath);
    sr.writeToFile(indexPath, conf);
    return numRecords > 0;
  }
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

        : (bufvoid - bufend) + bufstart) +
                partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    try {
      // create spill file
      final TezSpillRecord spillRec = new TezSpillRecord(partitions);
      final Path filename =
          mapOutputFile.getSpillFileForWrite(numSpills, size);
      out = rfs.create(filename);

      int spindex = mstart;
      final InMemValBytes value = createInMemValBytes();
      for (int i = 0; i < partitions; ++i) {
        IFile.Writer writer = null;
        try {
          long segmentStart = out.getPos();
          writer = new Writer(conf, out, keyClass, valClass, codec,
                                    spilledRecordsCounter);
          if (combiner == null) {
            // spill directly
            DataInputBuffer key = new DataInputBuffer();
            while (spindex < mend &&
                kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
              final int kvoff = offsetFor(spindex);
              key.reset(
                  kvbuffer,
                  kvmeta.get(kvoff + KEYSTART),
                  (kvmeta.get(kvoff + VALSTART) - kvmeta.get(kvoff + KEYSTART))
                  );
              getVBytesForOffset(kvoff, value);
              writer.append(key, value);
              ++spindex;
            }
          } else {
            int spstart = spindex;
            while (spindex < mend &&
                kvmeta.get(offsetFor(spindex)
                          + PARTITION) == i) {
              ++spindex;
            }
            // Note: we would like to avoid the combiner if we've fewer
            // than some threshold of records for a partition
            if (spstart != spindex) {
              TezRawKeyValueIterator kvIter =
                new MRResultIterator(spstart, spindex);
              if (LOG.isDebugEnabled()) {
                LOG.debug("Running combine processor");
              }
              runCombineProcessor(kvIter, writer);
            }
          }

          // close the writer
          writer.close();

          // record offsets
          final TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          spillRec.putIndex(rec, i);

          writer = null;
        } finally {
          if (null != writer) writer.close();
        }
      }

      if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
        // create spill index file
        Path indexFilename =
            mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
                * MAP_OUTPUT_INDEX_RECORD_LENGTH);
        spillRec.writeToFile(indexFilename, conf);
      } else {
        indexCacheList.add(spillRec);
        totalIndexCacheMemory +=
          spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
      }
      LOG.info("Finished spill " + numSpills);
      ++numSpills;
    } finally {
      if (out != null) out.close();
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

                                 int partition) throws IOException {
    long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    try {
      // create spill file
      final TezSpillRecord spillRec = new TezSpillRecord(partitions);
      final Path filename =
          mapOutputFile.getSpillFileForWrite(numSpills, size);
      out = rfs.create(filename);

      // we don't run the combiner for a single record
      for (int i = 0; i < partitions; ++i) {
        IFile.Writer writer = null;
        try {
          long segmentStart = out.getPos();
          // Create a new codec, don't care!
          writer = new IFile.Writer(conf, out, keyClass, valClass, codec,
                                          spilledRecordsCounter);

          if (i == partition) {
            final long recordStart = out.getPos();
            writer.append(key, value);
            // Note that our map byte count will not be accurate with
            // compression
            mapOutputByteCounter.increment(out.getPos() - recordStart);
          }
          writer.close();

          // record offsets
          TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          spillRec.putIndex(rec, i);

          writer = null;
        } catch (IOException e) {
          if (null != writer) writer.close();
          throw e;
        }
      }
      if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
        // create spill index file
        Path indexFilename =
            mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
                * MAP_OUTPUT_INDEX_RECORD_LENGTH);
        spillRec.writeToFile(indexFilename, conf);
      } else {
        indexCacheList.add(spillRec);
        totalIndexCacheMemory +=
          spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
      }
      ++numSpills;
    } finally {
      if (out != null) out.close();
    }
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

    }

    // read in paged indices
    for (int i = indexCacheList.size(); i < numSpills; ++i) {
      Path indexFileName = mapOutputFile.getSpillIndexFile(i);
      indexCacheList.add(new TezSpillRecord(indexFileName, conf));
    }

    //make correction in the length to include the sequence file header
    //lengths for each partition
    finalOutFileSize += partitions * APPROX_HEADER_LENGTH;
    finalIndexFileSize = partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH;
    Path finalOutputFile =
        mapOutputFile.getOutputFileForWrite(finalOutFileSize);
    Path finalIndexFile =
        mapOutputFile.getOutputIndexFileForWrite(finalIndexFileSize);

    //The output stream for the final single output file
    FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);

    if (numSpills == 0) {
      //create dummy files

      TezSpillRecord sr = new TezSpillRecord(partitions);
      try {
        for (int i = 0; i < partitions; i++) {
          long segmentStart = finalOut.getPos();
          Writer writer =
            new Writer(conf, finalOut, keyClass, valClass, codec, null);
          writer.close();

          TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          sr.putIndex(rec, i);
        }
        sr.writeToFile(finalIndexFile, conf);
      } finally {
        finalOut.close();
      }
      return;
    }
    else {
      TezMerger.considerFinalMergeForProgress();

      final TezSpillRecord spillRec = new TezSpillRecord(partitions);
      for (int parts = 0; parts < partitions; parts++) {
        //create the segments to be merged
        List<Segment> segmentList =
          new ArrayList<Segment>(numSpills);
        for(int i = 0; i < numSpills; i++) {
          TezIndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);

          Segment s =
            new Segment(conf, rfs, filename[i], indexRecord.getStartOffset(),
                             indexRecord.getPartLength(), codec, ifileReadAhead,
                             ifileReadAheadLength, ifileBufferSize, true);
          segmentList.add(i, s);

          if (LOG.isDebugEnabled()) {
            LOG.debug("TaskIdentifier=" + taskIdentifier + " Partition=" + parts +
                "Spill =" + i + "(" + indexRecord.getStartOffset() + "," +
                indexRecord.getRawLength() + ", " +
                indexRecord.getPartLength() + ")");
          }
        }

        int mergeFactor =
            this.conf.getInt(TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR,
                TezJobConfig.DEFAULT_TEZ_RUNTIME_IO_SORT_FACTOR);
        // sort the segments only if there are intermediate merges
        boolean sortSegments = segmentList.size() > mergeFactor;
        //merge
        TezRawKeyValueIterator kvIter = TezMerger.merge(conf, rfs,
                       keyClass, valClass, codec,
                       segmentList, mergeFactor,
                       new Path(taskIdentifier),
                       (RawComparator)ConfigUtils.getIntermediateOutputKeyComparator(conf),
                       nullProgressable, sortSegments,
                       null, spilledRecordsCounter,
                       null); // Not using any Progress in TezMerger. Should just work.

        //write merged output to disk
        long segmentStart = finalOut.getPos();
        Writer writer =
            new Writer(conf, finalOut, keyClass, valClass, codec,
                spilledRecordsCounter);
        if (combiner == null || numSpills < minSpillsForCombine) {
          TezMerger.writeFile(kvIter, writer,
              nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
        } else {
          runCombineProcessor(kvIter, writer);
        }
        writer.close();

        // record offsets
        final TezIndexRecord rec =
            new TezIndexRecord(
                segmentStart,
                writer.getRawLength(),
                writer.getCompressedLength());
        spillRec.putIndex(rec, parts);
      }
      spillRec.writeToFile(finalIndexFile, conf);
      finalOut.close();
      for(int i = 0; i < numSpills; i++) {
        rfs.delete(filename[i],true);
      }
    }
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

        : (bufvoid - bufend) + bufstart) +
                partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    try {
      // create spill file
      final TezSpillRecord spillRec = new TezSpillRecord(partitions);
      final Path filename =
          mapOutputFile.getSpillFileForWrite(numSpills, size);
      out = rfs.create(filename);

      int spindex = mstart;
      final InMemValBytes value = createInMemValBytes();
      for (int i = 0; i < partitions; ++i) {
        IFile.Writer writer = null;
        try {
          long segmentStart = out.getPos();
          writer = new Writer(conf, out, keyClass, valClass, codec,
                                    spilledRecordsCounter, null);
          if (combiner == null) {
            // spill directly
            DataInputBuffer key = new DataInputBuffer();
            while (spindex < mend &&
                kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
              final int kvoff = offsetFor(spindex);
              int keystart = kvmeta.get(kvoff + KEYSTART);
              int valstart = kvmeta.get(kvoff + VALSTART);
              key.reset(kvbuffer, keystart, valstart - keystart);
              getVBytesForOffset(kvoff, value);
              writer.append(key, value);
              ++spindex;
            }
          } else {
            int spstart = spindex;
            while (spindex < mend &&
                kvmeta.get(offsetFor(spindex)
                          + PARTITION) == i) {
              ++spindex;
            }
            // Note: we would like to avoid the combiner if we've fewer
            // than some threshold of records for a partition
            if (spstart != spindex) {
              TezRawKeyValueIterator kvIter =
                new MRResultIterator(spstart, spindex);
              if (LOG.isDebugEnabled()) {
                LOG.debug("Running combine processor");
              }
              runCombineProcessor(kvIter, writer);
            }
          }

          // close the writer
          writer.close();
          if (numSpills > 0) {
            additionalSpillBytesWritten.increment(writer.getCompressedLength());
            numAdditionalSpills.increment(1);
            // Reset the value will be set during the final merge.
            outputBytesWithOverheadCounter.setValue(0);
          } else {
            // Set this up for the first write only. Subsequent ones will be handled in the final merge.
            outputBytesWithOverheadCounter.increment(writer.getRawLength());
          }
          // record offsets
          final TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          spillRec.putIndex(rec, i);

          writer = null;
        } finally {
          if (null != writer) writer.close();
        }
      }

      if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
        // create spill index file
        Path indexFilename =
            mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
                * MAP_OUTPUT_INDEX_RECORD_LENGTH);
        spillRec.writeToFile(indexFilename, conf);
      } else {
        indexCacheList.add(spillRec);
        totalIndexCacheMemory +=
          spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
      }
      LOG.info("Finished spill " + numSpills);
      ++numSpills;
    } finally {
      if (out != null) out.close();
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.