Examples of TezSpillRecord

org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

  @VisibleForTesting
  protected TezIndexRecord getIndexRecord(String pathComponent, int partitionId)
      throws IOException {
    Path indexFile = getShuffleInputFileName(pathComponent,
        Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING);
    TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
    return spillRecord.getIndex(partitionId);
  }

View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord


    long indexFileSizeEstimate = numPartitions * Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH;
    Path finalOutPath = outputFileHandler.getOutputFileForWrite(expectedSize);
    Path finalIndexPath = outputFileHandler.getOutputIndexFileForWrite(indexFileSizeEstimate);


    TezSpillRecord finalSpillRecord = new TezSpillRecord(numPartitions);


    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();


    DataInputBuffer keyBufferIFile = new DataInputBuffer();
    DataInputBuffer valBufferIFile = new DataInputBuffer();


    FSDataOutputStream out = null;
    try {
      out = rfs.create(finalOutPath);
      Writer writer = null;


      for (int i = 0; i < numPartitions; i++) {
        long segmentStart = out.getPos();
        if (numRecordsPerPartition[i] == 0) {
          LOG.info("Skipping partition: " + i + " in final merge since it has no records");
          continue;
        }
        writer = new Writer(conf, out, keyClass, valClass, codec, null, null);
        try {
          if (currentBuffer.nextPosition != 0
              && currentBuffer.partitionPositions[i] != WrappedBuffer.PARTITION_ABSENT_POSITION) {
            // Write current buffer.
            writePartition(currentBuffer.partitionPositions[i], currentBuffer, writer, keyBuffer,
                valBuffer);
          }
          synchronized (spillInfoList) {
            for (SpillInfo spillInfo : spillInfoList) {
              TezIndexRecord indexRecord = spillInfo.spillRecord.getIndex(i);
              if (indexRecord.getPartLength() == 0) {
                // Skip empty partitions within a spill
                continue;
              }
              FSDataInputStream in = rfs.open(spillInfo.outPath);
              in.seek(indexRecord.getStartOffset());
              IFile.Reader reader = new IFile.Reader(in, indexRecord.getPartLength(), codec, null,
                  additionalSpillBytesReadCounter, ifileReadAhead, ifileReadAheadLength,
                  ifileBufferSize);
              while (reader.nextRawKey(keyBufferIFile)) {
                // TODO Inefficient. If spills are not compressed, a direct copy should be possible
                // given the current IFile format. Also exteremely inefficient for large records,
                // since the entire record will be read into memory.
                reader.nextRawValue(valBufferIFile);
                writer.append(keyBufferIFile, valBufferIFile);
              }
              reader.close();
            }
          }
          writer.close();
          fileOutputBytesCounter.increment(writer.getCompressedLength());
          TezIndexRecord indexRecord = new TezIndexRecord(segmentStart, writer.getRawLength(),
              writer.getCompressedLength());
          writer = null;
          finalSpillRecord.putIndex(indexRecord, i);
        } finally {
          if (writer != null) {
            writer.close();
          }
        }
      }
    } finally {
      if (out != null) {
        out.close();
      }
    }
    finalSpillRecord.writeToFile(finalIndexPath, conf);
    fileOutputBytesCounter.increment(indexFileSizeEstimate);
    LOG.info("Finished final spill after merging : " + numSpills.get() + " spills");
  }

View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

    long size = sizePerBuffer - (currentBuffer.numRecords * META_SIZE) - currentBuffer.skipSize
        + numPartitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    long outSize = 0;
    try {
      final TezSpillRecord spillRecord = new TezSpillRecord(numPartitions);
      final Path outPath = outputFileHandler.getSpillFileForWrite(spillNumber, size);
      out = rfs.create(outPath);
      for (int i = 0; i < numPartitions; i++) {
        final long recordStart = out.getPos();
        if (i == partition) {
          spilledRecordsCounter.increment(1);
          Writer writer = null;
          try {
            writer = new IFile.Writer(conf, out, keyClass, valClass, codec, null, null);
            writer.append(key, value);
            outputLargeRecordsCounter.increment(1);
            numRecordsPerPartition[i]++;
            writer.close();
            additionalSpillBytesWritternCounter.increment(writer.getCompressedLength());
            TezIndexRecord indexRecord = new TezIndexRecord(recordStart, writer.getRawLength(),
                writer.getCompressedLength());
            spillRecord.putIndex(indexRecord, i);
            outSize = writer.getCompressedLength();
            writer = null;
          } finally {
            if (writer != null) {
              writer.close();

View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

        outPath = outputFileHandler.getOutputFileForWrite(spillSize);
      } else {
        outPath = outputFileHandler.getSpillFileForWrite(spillNumber, spillSize);
      }
      FSDataOutputStream out = rfs.create(outPath);
      TezSpillRecord spillRecord = new TezSpillRecord(numPartitions);
      DataInputBuffer key = new DataInputBuffer();
      DataInputBuffer val = new DataInputBuffer();
      for (int i = 0; i < numPartitions; i++) {
        IFile.Writer writer = null;
        try {
          long segmentStart = out.getPos();
          if (wrappedBuffer.partitionPositions[i] == WrappedBuffer.PARTITION_ABSENT_POSITION) {
            // Skip empty partition.
            continue;
          }
          writer = new Writer(conf, out, keyClass, valClass, codec, numRecordsCounter, null);
          writePartition(wrappedBuffer.partitionPositions[i], wrappedBuffer, writer, key, val);
          writer.close();
          if (isFinalSpill) {
            fileOutputBytesCounter.increment(writer.getCompressedLength());
          } else {
            additionalSpillBytesWritternCounter.increment(writer.getCompressedLength());
          }
          spillResult = new SpillResult(writer.getCompressedLength(), this.wrappedBuffer);
          TezIndexRecord indexRecord = new TezIndexRecord(segmentStart, writer.getRawLength(),
              writer.getCompressedLength());
          spillRecord.putIndex(indexRecord, i);
          writer = null;
        } finally {
          if (writer != null) {
            writer.close();
          }
        }
      }
      if (isFinalSpill) {
        long indexFileSizeEstimate = numPartitions * Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH;
        Path finalSpillFile = outputFileHandler.getOutputIndexFileForWrite(indexFileSizeEstimate);
        spillRecord.writeToFile(finalSpillFile, conf);
        fileOutputBytesCounter.increment(indexFileSizeEstimate);
        LOG.info("Finished final and only spill");
      } else {
        SpillInfo spillInfo = new SpillInfo(spillRecord, outPath);
        spillInfoList.add(spillInfo);

View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

    long rawLen = writer.getRawLength();
    long compLen = writer.getCompressedLength();
    outputBytesCounterWithOverhead.increment(rawLen);
    outputMaterializedBytesCounter.increment(compLen);
    TezIndexRecord rec = new TezIndexRecord(0, rawLen, compLen);
    TezSpillRecord sr = new TezSpillRecord(1);
    sr.putIndex(rec, 0);


    this.indexPath = ouputFileManager
        .getOutputIndexFileForWrite(INDEX_RECORD_LENGTH);
    LOG.info("Writing index file: " + indexPath);
    sr.writeToFile(indexPath, conf);
    return numRecords > 0;
  }

View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

        return;
      }
    }


    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    Text keyDeser = new Text();
    Text valDeser = new Text();
    for (int i = 0; i < numPartitions; i++) {
      if (emptyPartitionBits.get(i)) {
        continue;
      }
      TezIndexRecord indexRecord = spillRecord.getIndex(i);
      FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
      inStream.seek(indexRecord.getStartOffset());
      IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null,
          null, false, 0, -1);
      while (reader.nextRawKey(keyBuffer)) {

View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

        return;
      }
    }


    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    IntWritable keyDeser = new IntWritable();
    LongWritable valDeser = new LongWritable();
    for (int i = 0; i < numOutputs; i++) {
      if (skippedPartitions != null && skippedPartitions.contains(i)) {
        continue;
      }
      TezIndexRecord indexRecord = spillRecord.getIndex(i);
      FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
      inStream.seek(indexRecord.getStartOffset());
      IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null,
          null, false, 0, -1);
      while (reader.nextRawKey(keyBuffer)) {

View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

        .newBuilder();


    boolean outputGenerated = true;
    if (sendEmptyPartitionDetails) {
      Path indexFile = sorter.getMapOutput().getOutputIndexFile();
      TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
      BitSet emptyPartitionDetails = new BitSet();
      int emptyPartitions = 0;
      for(int i=0;i<spillRecord.size();i++) {
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        if (!indexRecord.hasData()) {
          emptyPartitionDetails.set(i);
          emptyPartitions++;
        }
      }
      outputGenerated = (spillRecord.size() != emptyPartitions);
      if (emptyPartitions > 0) {
        ByteString emptyPartitionsBytesString =
            TezCommonUtils.compressByteArrayToByteString(
                TezUtilsInternal.toByteArray(emptyPartitionDetails));
        payloadBuilder.setEmptyPartitions(emptyPartitionsBytesString);

View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

        : (bufvoid - bufend) + bufstart) +
                partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    try {
      // create spill file
      final TezSpillRecord spillRec = new TezSpillRecord(partitions);
      final Path filename =
          mapOutputFile.getSpillFileForWrite(numSpills, size);
      out = rfs.create(filename);


      int spindex = mstart;
      final InMemValBytes value = createInMemValBytes();
      for (int i = 0; i < partitions; ++i) {
        IFile.Writer writer = null;
        try {
          long segmentStart = out.getPos();
          writer = new Writer(conf, out, keyClass, valClass, codec,
                                    spilledRecordsCounter, null);
          if (combiner == null) {
            // spill directly
            DataInputBuffer key = new DataInputBuffer();
            while (spindex < mend &&
                kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
              final int kvoff = offsetFor(spindex);
              int keystart = kvmeta.get(kvoff + KEYSTART);
              int valstart = kvmeta.get(kvoff + VALSTART);
              key.reset(kvbuffer, keystart, valstart - keystart);
              getVBytesForOffset(kvoff, value);
              writer.append(key, value);
              ++spindex;
            }
          } else {
            int spstart = spindex;
            while (spindex < mend &&
                kvmeta.get(offsetFor(spindex)
                          + PARTITION) == i) {
              ++spindex;
            }
            // Note: we would like to avoid the combiner if we've fewer
            // than some threshold of records for a partition
            if (spstart != spindex) {
              TezRawKeyValueIterator kvIter =
                new MRResultIterator(spstart, spindex);
              if (LOG.isDebugEnabled()) {
                LOG.debug("Running combine processor");
              }
              runCombineProcessor(kvIter, writer);
            }
          }


          // close the writer
          writer.close();
          if (numSpills > 0) {
            additionalSpillBytesWritten.increment(writer.getCompressedLength());
            numAdditionalSpills.increment(1);
            // Reset the value will be set during the final merge.
            outputBytesWithOverheadCounter.setValue(0);
          } else {
            // Set this up for the first write only. Subsequent ones will be handled in the final merge.
            outputBytesWithOverheadCounter.increment(writer.getRawLength());
          }
          // record offsets
          final TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          spillRec.putIndex(rec, i);


          writer = null;
        } finally {
          if (null != writer) writer.close();
        }
      }


      if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
        // create spill index file
        Path indexFilename =
            mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
                * MAP_OUTPUT_INDEX_RECORD_LENGTH);
        spillRec.writeToFile(indexFilename, conf);
      } else {
        indexCacheList.add(spillRec);
        totalIndexCacheMemory +=
          spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
      }
      LOG.info("Finished spill " + numSpills);
      ++numSpills;
    } finally {
      if (out != null) out.close();

View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord

                                 int partition) throws IOException {
    long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    try {
      // create spill file
      final TezSpillRecord spillRec = new TezSpillRecord(partitions);
      final Path filename =
          mapOutputFile.getSpillFileForWrite(numSpills, size);
      out = rfs.create(filename);


      // we don't run the combiner for a single record
      for (int i = 0; i < partitions; ++i) {
        IFile.Writer writer = null;
        try {
          long segmentStart = out.getPos();
          // Create a new codec, don't care!
          writer = new IFile.Writer(conf, out, keyClass, valClass, codec,
                                          spilledRecordsCounter, null);


          if (i == partition) {
            final long recordStart = out.getPos();
            writer.append(key, value);
            // Note that our map byte count will not be accurate with
            // compression
            mapOutputByteCounter.increment(out.getPos() - recordStart);
          }
          writer.close();


          if (numSpills > 0) {
            additionalSpillBytesWritten.increment(writer.getCompressedLength());
            numAdditionalSpills.increment(1);
            outputBytesWithOverheadCounter.setValue(0);
          } else {
            // Set this up for the first write only. Subsequent ones will be handled in the final merge.
            outputBytesWithOverheadCounter.increment(writer.getRawLength());
          }


          // record offsets
          TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          spillRec.putIndex(rec, i);


          writer = null;
        } catch (IOException e) {
          if (null != writer) writer.close();
          throw e;
        }
      }
      if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
        // create spill index file
        Path indexFilename =
            mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
                * MAP_OUTPUT_INDEX_RECORD_LENGTH);
        spillRec.writeToFile(indexFilename, conf);
      } else {
        indexCacheList.add(spillRec);
        totalIndexCacheMemory +=
          spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
      }
      ++numSpills;
    } finally {
      if (out != null) out.close();
    }

View Full Code Here

0 1 2

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.