Examples of TezIndexRecord


Examples of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord

      //TODO: check for shutdown? - See TEZ-1480
      long startTime = System.currentTimeMillis();

      FetchedInput fetchedInput = null;
      try {
        TezIndexRecord idxRecord;
        idxRecord = getTezIndexRecord(srcAttemptId);

        fetchedInput = new LocalDiskFetchedInput(idxRecord.getStartOffset(),
            idxRecord.getRawLength(), idxRecord.getPartLength(), srcAttemptId,
            getShuffleInputFileName(srcAttemptId.getPathComponent(), null), conf,
            new FetchedInputCallback() {
              @Override
              public void fetchComplete(FetchedInput fetchedInput) {}

              @Override
              public void fetchFailed(FetchedInput fetchedInput) {}

              @Override
              public void freeResources(FetchedInput fetchedInput) {}
            });
        LOG.info("fetcher" + " about to shuffle output of srcAttempt (direct disk)" + srcAttemptId
            + " decomp: " + idxRecord.getRawLength() + " len: " + idxRecord.getPartLength()
            + " to " + fetchedInput.getType());

        long endTime = System.currentTimeMillis();
        fetcherCallback.fetchSucceeded(host, srcAttemptId, fetchedInput, idxRecord.getPartLength(),
            idxRecord.getRawLength(), (endTime - startTime));
        iterator.remove();
      } catch (IOException e) {
        LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host + "(local fetch)",
            e);
        if (fetchedInput != null) {
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord

  }

  @VisibleForTesting
  protected TezIndexRecord getTezIndexRecord(InputAttemptIdentifier srcAttemptId) throws
      IOException {
    TezIndexRecord idxRecord;
    Path indexFile = getShuffleInputFileName(srcAttemptId.getPathComponent(),
        Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING);
    TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
    idxRecord = spillRecord.getIndex(partition);
    return idxRecord;
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord

            writePartition(currentBuffer.partitionPositions[i], currentBuffer, writer, keyBuffer,
                valBuffer);
          }
          synchronized (spillInfoList) {
            for (SpillInfo spillInfo : spillInfoList) {
              TezIndexRecord indexRecord = spillInfo.spillRecord.getIndex(i);
              if (indexRecord.getPartLength() == 0) {
                // Skip empty partitions within a spill
                continue;
              }
              FSDataInputStream in = rfs.open(spillInfo.outPath);
              in.seek(indexRecord.getStartOffset());
              IFile.Reader reader = new IFile.Reader(in, indexRecord.getPartLength(), codec, null,
                  additionalSpillBytesReadCounter, ifileReadAhead, ifileReadAheadLength,
                  ifileBufferSize);
              while (reader.nextRawKey(keyBufferIFile)) {
                // TODO Inefficient. If spills are not compressed, a direct copy should be possible
                // given the current IFile format. Also exteremely inefficient for large records,
                // since the entire record will be read into memory.
                reader.nextRawValue(valBufferIFile);
                writer.append(keyBufferIFile, valBufferIFile);
              }
              reader.close();
            }
          }
          writer.close();
          fileOutputBytesCounter.increment(writer.getCompressedLength());
          TezIndexRecord indexRecord = new TezIndexRecord(segmentStart, writer.getRawLength(),
              writer.getCompressedLength());
          writer = null;
          finalSpillRecord.putIndex(indexRecord, i);
        } finally {
          if (writer != null) {
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord

            writer.append(key, value);
            outputLargeRecordsCounter.increment(1);
            numRecordsPerPartition[i]++;
            writer.close();
            additionalSpillBytesWritternCounter.increment(writer.getCompressedLength());
            TezIndexRecord indexRecord = new TezIndexRecord(recordStart, writer.getRawLength(),
                writer.getCompressedLength());
            spillRecord.putIndex(indexRecord, i);
            outSize = writer.getCompressedLength();
            writer = null;
          } finally {
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord

          // close the writer
          writer.close();

          // record offsets
          final TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          spillRec.putIndex(rec, i);
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord

            mapOutputByteCounter.increment(out.getPos() - recordStart);
          }
          writer.close();

          // record offsets
          TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          spillRec.putIndex(rec, i);
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord

          long segmentStart = finalOut.getPos();
          Writer writer =
            new Writer(conf, finalOut, keyClass, valClass, codec, null);
          writer.close();

          TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          sr.putIndex(rec, i);
        }
        sr.writeToFile(finalIndexFile, conf);
      } finally {
        finalOut.close();
      }
      return;
    }
    else {
      TezMerger.considerFinalMergeForProgress();

      final TezSpillRecord spillRec = new TezSpillRecord(partitions);
      for (int parts = 0; parts < partitions; parts++) {
        //create the segments to be merged
        List<Segment> segmentList =
          new ArrayList<Segment>(numSpills);
        for(int i = 0; i < numSpills; i++) {
          TezIndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);

          Segment s =
            new Segment(conf, rfs, filename[i], indexRecord.getStartOffset(),
                             indexRecord.getPartLength(), codec, ifileReadAhead,
                             ifileReadAheadLength, ifileBufferSize, true);
          segmentList.add(i, s);

          if (LOG.isDebugEnabled()) {
            LOG.debug("TaskIdentifier=" + taskIdentifier + " Partition=" + parts +
                "Spill =" + i + "(" + indexRecord.getStartOffset() + "," +
                indexRecord.getRawLength() + ", " +
                indexRecord.getPartLength() + ")");
          }
        }

        int mergeFactor =
            this.conf.getInt(TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR,
                TezJobConfig.DEFAULT_TEZ_RUNTIME_IO_SORT_FACTOR);
        // sort the segments only if there are intermediate merges
        boolean sortSegments = segmentList.size() > mergeFactor;
        //merge
        TezRawKeyValueIterator kvIter = TezMerger.merge(conf, rfs,
                       keyClass, valClass, codec,
                       segmentList, mergeFactor,
                       new Path(taskIdentifier),
                       (RawComparator)ConfigUtils.getIntermediateOutputKeyComparator(conf),
                       nullProgressable, sortSegments,
                       null, spilledRecordsCounter,
                       null); // Not using any Progress in TezMerger. Should just work.

        //write merged output to disk
        long segmentStart = finalOut.getPos();
        Writer writer =
            new Writer(conf, finalOut, keyClass, valClass, codec,
                spilledRecordsCounter);
        if (combiner == null || numSpills < minSpillsForCombine) {
          TezMerger.writeFile(kvIter, writer,
              nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
        } else {
          runCombineProcessor(kvIter, writer);
        }
        writer.close();

        // record offsets
        final TezIndexRecord rec =
            new TezIndexRecord(
                segmentStart,
                writer.getRawLength(),
                writer.getCompressedLength());
        spillRec.putIndex(rec, parts);
      }
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord

   * @throws IOException
   */
  public boolean close() throws IOException {
    this.closed = true;
    this.writer.close();
    TezIndexRecord rec = new TezIndexRecord(0, writer.getRawLength(),
        writer.getCompressedLength());
    TezSpillRecord sr = new TezSpillRecord(1);
    sr.putIndex(rec, 0);

    this.indexPath = ouputFileManager
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord

          // close the writer
          writer.close();

          // record offsets
          final TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          spillRec.putIndex(rec, i);
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord

            mapOutputByteCounter.increment(out.getPos() - recordStart);
          }
          writer.close();

          // record offsets
          TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          spillRec.putIndex(rec, i);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.