Package org.apache.tez.runtime.library.common.sort.impl.IFile

Examples of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer


      TezSpillRecord sr = new TezSpillRecord(partitions);
      try {
        for (int i = 0; i < partitions; i++) {
          long segmentStart = finalOut.getPos();
          Writer writer =
            new Writer(conf, finalOut, keyClass, valClass, codec, null, null);
          writer.close();

          TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          // Covers the case of multiple spills.
          outputBytesWithOverheadCounter.increment(writer.getRawLength());
          sr.putIndex(rec, i);
        }
        sr.writeToFile(finalIndexFile, conf);
      } finally {
        finalOut.close();
      }
      return;
    }
    else {
      final TezSpillRecord spillRec = new TezSpillRecord(partitions);
      for (int parts = 0; parts < partitions; parts++) {
        //create the segments to be merged
        List<Segment> segmentList =
          new ArrayList<Segment>(numSpills);
        for(int i = 0; i < numSpills; i++) {
          TezIndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);

          Segment s =
            new Segment(conf, rfs, filename[i], indexRecord.getStartOffset(),
                             indexRecord.getPartLength(), codec, ifileReadAhead,
                             ifileReadAheadLength, ifileBufferSize, true);
          segmentList.add(i, s);

          if (LOG.isDebugEnabled()) {
            LOG.debug("TaskIdentifier=" + taskIdentifier + " Partition=" + parts +
                "Spill =" + i + "(" + indexRecord.getStartOffset() + "," +
                indexRecord.getRawLength() + ", " +
                indexRecord.getPartLength() + ")");
          }
        }

        int mergeFactor =
            this.conf.getInt(TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR,
                TezJobConfig.DEFAULT_TEZ_RUNTIME_IO_SORT_FACTOR);
        // sort the segments only if there are intermediate merges
        boolean sortSegments = segmentList.size() > mergeFactor;
        //merge
        TezRawKeyValueIterator kvIter = TezMerger.merge(conf, rfs,
                       keyClass, valClass, codec,
                       segmentList, mergeFactor,
                       new Path(taskIdentifier),
                       (RawComparator)ConfigUtils.getIntermediateOutputKeyComparator(conf),
                       nullProgressable, sortSegments, true,
                       null, spilledRecordsCounter, additionalSpillBytesRead,
                       null); // Not using any Progress in TezMerger. Should just work.

        //write merged output to disk
        long segmentStart = finalOut.getPos();
        Writer writer =
            new Writer(conf, finalOut, keyClass, valClass, codec,
                spilledRecordsCounter, null);
        if (combiner == null || numSpills < minSpillsForCombine) {
          TezMerger.writeFile(kvIter, writer,
              nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
        } else {
          runCombineProcessor(kvIter, writer);
        }
        writer.close();

        // record offsets
        final TezIndexRecord rec =
            new TezIndexRecord(
                segmentStart,
                writer.getRawLength(),
                writer.getCompressedLength());
        spillRec.putIndex(rec, parts);
      }
      spillRec.writeToFile(finalIndexFile, conf);
      finalOut.close();
      for(int i = 0; i < numSpills; i++) {
View Full Code Here


                                              tmpFilename.toString(),
                                              approxOutputSize, conf);

          // TODO Would it ever make sense to make this an in-memory writer ?
          // Merging because of too many disk segments - might fit in memory.
          Writer writer =
            new Writer(conf, fs, outputFile, keyClass, valueClass, codec,
                             writesCounter, null);

          writeFile(this, writer, reporter, recordsBeforeProgress);
          writer.close();
         
          //we finished one single level merge; now clean up the priority
          //queue
          this.close();
View Full Code Here

      LOG.info("Spilling to " + filename.toString());
      for (int i = 0; i < partitions; ++i) {
        TezRawKeyValueIterator kvIter = merger.filter(i);
        //write merged output to disk
        long segmentStart = out.getPos();
        Writer writer =
          new Writer(conf, out, keyClass, valClass, codec,
              spilledRecordsCounter, null);
        writer.setRLE(merger.needsRLE());
        if (combiner == null) {
          while(kvIter.next()) {
            writer.append(kvIter.getKey(), kvIter.getValue());
          }
        } else {         
          runCombineProcessor(kvIter, writer);
        }
        //close
        writer.close();

        // record offsets
        final TezIndexRecord rec =
            new TezIndexRecord(
                segmentStart,
                writer.getRawLength(),
                writer.getCompressedLength());
        spillRec.putIndex(rec, i);
      }

      Path indexFilename =
        mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
View Full Code Here

                     null, spilledRecordsCounter, null,
                     null); // Not using any Progress in TezMerger. Should just work.

      //write merged output to disk
      long segmentStart = finalOut.getPos();
      Writer writer =
          new Writer(conf, finalOut, keyClass, valClass, codec,
                           spilledRecordsCounter, null);
      writer.setRLE(merger.needsRLE());
      if (combiner == null || numSpills < minSpillsForCombine) {
        TezMerger.writeFile(kvIter, writer, nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
      } else {
        runCombineProcessor(kvIter, writer);
      }

      //close
      writer.close();

      // record offsets
      final TezIndexRecord rec =
          new TezIndexRecord(
              segmentStart,
              writer.getRawLength(),
              writer.getCompressedLength());
      spillRec.putIndex(rec, parts);
    }

    spillRec.writeToFile(finalIndexFile, conf);
    finalOut.close();
View Full Code Here

                                              tmpFilename.toString(),
                                              approxOutputSize, conf);

          // TODO Would it ever make sense to make this an in-memory writer ?
          // Merging because of too many disk segments - might fit in memory.
          Writer writer =
            new Writer(conf, fs, outputFile, keyClass, valueClass, codec,
                             writesCounter, null);

          writeFile(this, writer, reporter, recordsBeforeProgress);
          writer.close();
         
          //we finished one single level merge; now clean up the priority
          //queue
          this.close();
View Full Code Here

TOP

Related Classes of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.