Examples of TezRawKeyValueIterator


Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

     
      // stop the scheduler
      cleanupShuffleScheduler(false);

      // Finish the on-going merges...
      TezRawKeyValueIterator kvIter = null;
      try {
        kvIter = merger.close();
      } catch (Throwable e) {
        throw new ShuffleError("Error while doing final merge " , e);
      }
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

              ++spindex;
            }
            // Note: we would like to avoid the combiner if we've fewer
            // than some threshold of records for a partition
            if (spstart != spindex) {
              TezRawKeyValueIterator kvIter =
                new MRResultIterator(spstart, spindex);
              if (LOG.isDebugEnabled()) {
                LOG.debug("Running combine processor");
              }
              runCombineProcessor(kvIter, writer);
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

            this.conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR,
                TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR_DEFAULT);
        // sort the segments only if there are intermediate merges
        boolean sortSegments = segmentList.size() > mergeFactor;
        //merge
        TezRawKeyValueIterator kvIter = TezMerger.merge(conf, rfs,
                       keyClass, valClass, codec,
                       segmentList, mergeFactor,
                       new Path(taskIdentifier),
                       (RawComparator)ConfigUtils.getIntermediateOutputKeyComparator(conf),
                       nullProgressable, sortSegments, true,
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

   
    List<MapOutput> memory =
      new ArrayList<MapOutput>(inMemoryMergedMapOutputs);
    memory.addAll(inMemoryMapOutputs);
    List<Path> disk = new ArrayList<Path>(onDiskMapOutputs);
    TezRawKeyValueIterator kvIter = finalMerge(conf, rfs, memory, disk);
    this.finalMergeComplete = true;
    return kvIter;
  }
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

        // must spill to disk, but can't retain in-mem for intermediate merge
        final Path outputPath =
          mapOutputFile.getInputFileForWrite(srcTaskId,
                                             inMemToDiskBytes).suffix(
                                                 Constants.MERGED_OUTPUT_PREFIX);
        final TezRawKeyValueIterator rIter = TezMerger.merge(job, fs,
            keyClass, valueClass, memDiskSegments, numMemDiskSegments,
            tmpDir, comparator, nullProgressable, spilledRecordsCounter, null, null);
        final Writer writer = new Writer(job, fs, outputPath,
            keyClass, valueClass, codec, null);
        try {
          TezMerger.writeFile(rIter, writer, nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
          // add to list of final disk outputs.
          onDiskMapOutputs.add(outputPath);
        } catch (IOException e) {
          if (null != outputPath) {
            try {
              fs.delete(outputPath, true);
            } catch (IOException ie) {
              // NOTHING
            }
          }
          throw e;
        } finally {
          if (null != writer) {
            writer.close();
          }
        }
        LOG.info("Merged " + numMemDiskSegments + " segments, " +
                 inMemToDiskBytes + " bytes to disk to satisfy " +
                 "reduce memory limit");
        inMemToDiskBytes = 0;
        memDiskSegments.clear();
      } else if (inMemToDiskBytes != 0) {
        LOG.info("Keeping " + numMemDiskSegments + " segments, " +
                 inMemToDiskBytes + " bytes in memory for " +
                 "intermediate, on-disk merge");
      }
    }

    // segments on disk
    List<Segment> diskSegments = new ArrayList<Segment>();
    long onDiskBytes = inMemToDiskBytes;
    Path[] onDisk = onDiskMapOutputs.toArray(new Path[onDiskMapOutputs.size()]);
    for (Path file : onDisk) {
      onDiskBytes += fs.getFileStatus(file).getLen();
      LOG.debug("Disk file: " + file + " Length is " +
          fs.getFileStatus(file).getLen());
      diskSegments.add(new Segment(job, fs, file, codec, ifileReadAhead,
                                   ifileReadAheadLength, ifileBufferSize, false,
                                         (file.toString().endsWith(
                                             Constants.MERGED_OUTPUT_PREFIX) ?
                                          null : mergedMapOutputsCounter)
                                        ));
    }
    LOG.info("Merging " + onDisk.length + " files, " +
             onDiskBytes + " bytes from disk");
    Collections.sort(diskSegments, new Comparator<Segment>() {
      public int compare(Segment o1, Segment o2) {
        if (o1.getLength() == o2.getLength()) {
          return 0;
        }
        return o1.getLength() < o2.getLength() ? -1 : 1;
      }
    });

    // build final list of segments from merged backed by disk + in-mem
    List<Segment> finalSegments = new ArrayList<Segment>();
    long inMemBytes = createInMemorySegments(inMemoryMapOutputs,
                                             finalSegments, 0);
    LOG.info("Merging " + finalSegments.size() + " segments, " +
             inMemBytes + " bytes from memory into reduce");
    if (0 != onDiskBytes) {
      final int numInMemSegments = memDiskSegments.size();
      diskSegments.addAll(0, memDiskSegments);
      memDiskSegments.clear();
      TezRawKeyValueIterator diskMerge = TezMerger.merge(
          job, fs, keyClass, valueClass, diskSegments,
          ioSortFactor, numInMemSegments, tmpDir, comparator,
          nullProgressable, false, spilledRecordsCounter, null, null);
      diskSegments.clear();
      if (0 == finalSegments.size()) {
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

        new InMemoryWriter(mergedMapOutputs.getArrayStream());
     
      LOG.info("Initiating Memory-to-Memory merge with " + noInMemorySegments +
               " segments of total-size: " + mergeOutputSize);

      TezRawKeyValueIterator rIter =
        TezMerger.merge(conf, rfs,
                       ConfigUtils.getIntermediateInputKeyClass(conf),
                       ConfigUtils.getIntermediateInputValueClass(conf),
                       inMemorySegments, inMemorySegments.size(),
                       new Path(inputContext.getUniqueIdentifier()),
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

            new Writer(conf, rfs, outputPath,
                (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
                (Class)ConfigUtils.getIntermediateInputValueClass(conf),
                codec, null);

        TezRawKeyValueIterator rIter = null;
        LOG.info("Initiating in-memory merge with " + noInMemorySegments +
            " segments...");

        rIter = TezMerger.merge(conf, rfs,
            (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

      Writer writer =
        new Writer(conf, rfs, outputPath,
                        (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
                        (Class)ConfigUtils.getIntermediateInputValueClass(conf),
                        codec, null);
      TezRawKeyValueIterator iter  = null;
      Path tmpDir = new Path(inputContext.getUniqueIdentifier());
      try {
        iter = TezMerger.merge(conf, rfs,
                            (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
                            (Class)ConfigUtils.getIntermediateInputValueClass(conf),
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

   * @throws InterruptedException
   */
  public TezRawKeyValueIterator waitForInput() throws IOException, InterruptedException {
    Preconditions.checkState(runShuffleFuture != null,
        "waitForInput can only be called after run");
    TezRawKeyValueIterator kvIter;
    try {
      kvIter = runShuffleFuture.get();
    } catch (ExecutionException e) {
      Throwable cause = e.getCause();
      if (cause instanceof IOException) {
View Full Code Here

Examples of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator

      // stop the scheduler
      scheduler.close();


      // Finish the on-going merges...
      TezRawKeyValueIterator kvIter = null;
      try {
        kvIter = merger.close();
      } catch (Throwable e) {
        throw new ShuffleError("Error while doing final merge " , e);
      }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.