Examples of TezRawKeyValueIterator


Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

    org.apache.hadoop.mapreduce.Reducer reducer =
        (org.apache.hadoop.mapreduce.Reducer)
        ReflectionUtils.newInstance(taskContext.getReducerClass(), job);

    // wrap value iterator to report progress.
    final TezRawKeyValueIterator rawIter = input.getIterator();
    TezRawKeyValueIterator rIter = new TezRawKeyValueIterator() {
      public void close() throws IOException {
        rawIter.close();
      }
      public DataInputBuffer getKey() throws IOException {
        return rawIter.getKey();
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

        return;
      }
      localShuffleCopy = shuffle;
    }

    TezRawKeyValueIterator localRawIter = localShuffleCopy.waitForInput();
    synchronized(this) {
      rawIter = localRawIter;
      createValuesIterator();
    }
  }
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

   * @return a KVReader over the sorted input.
   */
  @Override
  public KeyValuesReader getReader() throws IOException, TezException {
    // Cannot synchronize entire method since this is called form user code and can block.
    TezRawKeyValueIterator rawIterLocal;
    synchronized (this) {
      rawIterLocal = rawIter;
      if (getNumPhysicalInputs() == 0) {
        return new KeyValuesReader() {
          @Override
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

  @Private
  public TezRawKeyValueIterator getIterator() throws IOException, InterruptedException, TezException {
    // wait for input so that iterator is available
    synchronized(this) {
    if (getNumPhysicalInputs() == 0) {
      return new TezRawKeyValueIterator() {
        @Override
        public DataInputBuffer getKey() throws IOException {
          throw new RuntimeException("No data available in Input");
        }
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

    inMemoryMergedMapOutputs.clear();
    memory.addAll(inMemoryMapOutputs);
    inMemoryMapOutputs.clear();
    List<FileChunk> disk = new ArrayList<FileChunk>(onDiskMapOutputs);
    onDiskMapOutputs.clear();
    TezRawKeyValueIterator kvIter = finalMerge(conf, rfs, memory, disk);
    this.finalMergeComplete = true;
    return kvIter;
  }
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

        // must spill to disk, but can't retain in-mem for intermediate merge
        final Path outputPath =
          mapOutputFile.getInputFileForWrite(srcTaskId,
                                             inMemToDiskBytes).suffix(
                                                 Constants.MERGED_OUTPUT_PREFIX);
        final TezRawKeyValueIterator rIter = TezMerger.merge(job, fs, keyClass, valueClass,
            memDiskSegments, numMemDiskSegments, tmpDir, comparator, nullProgressable,
            spilledRecordsCounter, null, additionalBytesRead, null);
        final Writer writer = new Writer(job, fs, outputPath,
            keyClass, valueClass, codec, null, null);
        try {
          TezMerger.writeFile(rIter, writer, nullProgressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT);
        } catch (IOException e) {
          if (null != outputPath) {
            try {
              fs.delete(outputPath, true);
            } catch (IOException ie) {
              // NOTHING
            }
          }
          throw e;
        } finally {
          if (null != writer) {
            writer.close();
            additionalBytesWritten.increment(writer.getCompressedLength());
          }
        }

        final FileStatus fStatus = localFS.getFileStatus(outputPath);
        // add to list of final disk outputs.
        onDiskMapOutputs.add(new FileChunk(outputPath, 0, fStatus.getLen(), false));

        LOG.info("Merged " + numMemDiskSegments + " segments, " +
                 inMemToDiskBytes + " bytes to disk to satisfy " +
                 "reduce memory limit");
        inMemToDiskBytes = 0;
        memDiskSegments.clear();
      } else if (inMemToDiskBytes != 0) {
        LOG.info("Keeping " + numMemDiskSegments + " segments, " +
                 inMemToDiskBytes + " bytes in memory for " +
                 "intermediate, on-disk merge");
      }
    }

    // segments on disk
    List<Segment> diskSegments = new ArrayList<Segment>();
    long onDiskBytes = inMemToDiskBytes;
    FileChunk[] onDisk = onDiskMapOutputs.toArray(new FileChunk[onDiskMapOutputs.size()]);
    for (FileChunk fileChunk : onDisk) {
      final long fileLength = fileChunk.getLength();
      onDiskBytes += fileLength;
      LOG.debug("Disk file: " + fileChunk.getPath() + " Length is " + fileLength);

      final Path file = fileChunk.getPath();
      TezCounter counter =
          file.toString().endsWith(Constants.MERGED_OUTPUT_PREFIX) ? null : mergedMapOutputsCounter;

      final long fileOffset = fileChunk.getOffset();
      final boolean preserve = fileChunk.preserveAfterUse();
      diskSegments.add(new Segment(job, fs, file, fileOffset, fileLength, codec, ifileReadAhead,
                                   ifileReadAheadLength, ifileBufferSize, preserve, counter));
    }
    LOG.info("Merging " + onDisk.length + " files, " +
             onDiskBytes + " bytes from disk");
    Collections.sort(diskSegments, new Comparator<Segment>() {
      public int compare(Segment o1, Segment o2) {
        if (o1.getLength() == o2.getLength()) {
          return 0;
        }
        return o1.getLength() < o2.getLength() ? -1 : 1;
      }
    });

    // build final list of segments from merged backed by disk + in-mem
    List<Segment> finalSegments = new ArrayList<Segment>();
    long inMemBytes = createInMemorySegments(inMemoryMapOutputs,
                                             finalSegments, 0);
    LOG.info("Merging " + finalSegments.size() + " segments, " +
             inMemBytes + " bytes from memory into reduce");
    if (0 != onDiskBytes) {
      final int numInMemSegments = memDiskSegments.size();
      diskSegments.addAll(0, memDiskSegments);
      memDiskSegments.clear();
      TezRawKeyValueIterator diskMerge = TezMerger.merge(
          job, fs, keyClass, valueClass, codec, diskSegments,
          ioSortFactor, numInMemSegments, tmpDir, comparator,
          nullProgressable, false, spilledRecordsCounter, null, additionalBytesRead, null);
      diskSegments.clear();
      if (0 == finalSegments.size()) {
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

               " segments of total-size: " + mergeOutputSize);

      // Nothing will be materialized to disk because the sort factor is being
      // set to the number of in memory segments.
      // TODO Is this doing any combination ?
      TezRawKeyValueIterator rIter =
        TezMerger.merge(conf, rfs,
                       ConfigUtils.getIntermediateInputKeyClass(conf),
                       ConfigUtils.getIntermediateInputValueClass(conf),
                       inMemorySegments, inMemorySegments.size(),
                       new Path(inputContext.getUniqueIdentifier()),
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

            new Writer(conf, rfs, outputPath,
                (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
                (Class)ConfigUtils.getIntermediateInputValueClass(conf),
                codec, null, null);

        TezRawKeyValueIterator rIter = null;
        LOG.info("Initiating in-memory merge with " + noInMemorySegments +
            " segments...");

        // Nothing actually materialized to disk - controlled by setting sort-factor to #segments.
        rIter = TezMerger.merge(conf, rfs,
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

                        (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
                        (Class)ConfigUtils.getIntermediateInputValueClass(conf),
                        codec, null, null);
      Path tmpDir = new Path(inputContext.getUniqueIdentifier());
      try {
        TezRawKeyValueIterator iter = TezMerger.merge(conf, rfs,
            (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
            (Class)ConfigUtils.getIntermediateInputValueClass(conf),
            inputSegments,
            ioSortFactor, tmpDir,
            (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf),
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

  // ZZZ Deal with these methods.
  public TezRawKeyValueIterator waitForInput() throws IOException, InterruptedException,
      TezException {
    Preconditions.checkState(runShuffleFuture != null,
        "waitForInput can only be called after run");
    TezRawKeyValueIterator kvIter = null;
    try {
      kvIter = runShuffleFuture.get();
    } catch (ExecutionException e) {
      Throwable cause = e.getCause();
      handleThrowable(cause);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.