Package org.apache.tez.runtime.library.common.sort.impl.IFile

Examples of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer


   *
   * @param data
   * @throws IOException
   */
  private void testWriterAndReader(List<KVPair> data) throws IOException {
    Writer writer = null;
    //No RLE, No RepeatKeys
    writer = writeTestFile(false, false, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);

    writer = writeTestFile(false, false, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);

    //No RLE, RepeatKeys
    writer = writeTestFile(false, true, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);

    writer = writeTestFile(false, true, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);

    //RLE, No RepeatKeys
    writer = writeTestFile(true, false, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);

    writer = writeTestFile(true, false, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);

    //RLE, RepeatKeys
    writer = writeTestFile(true, true, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);

    writer = writeTestFile(true, true, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
  }
View Full Code Here


   * @param data
   * @throws IOException
   */
  private void testWithDataBuffer(List<KVPair> data) throws
      IOException {
    Writer writer = null;
    //No RLE, No RepeatKeys
    writer = writeTestFileUsingDataBuffer(false, false, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);

    writer = writeTestFileUsingDataBuffer(false, false, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);

    //No RLE, RepeatKeys
    writer = writeTestFileUsingDataBuffer(false, true, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);

    writer = writeTestFileUsingDataBuffer(false, true, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);

    //RLE, No RepeatKeys
    writer = writeTestFileUsingDataBuffer(true, false, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);

    writer = writeTestFileUsingDataBuffer(true, false, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);

    //RLE, RepeatKeys
    writer = writeTestFileUsingDataBuffer(true, true, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);

    writer = writeTestFileUsingDataBuffer(true, true, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
  }
View Full Code Here

                                             inMemToDiskBytes).suffix(
                                                 Constants.MERGED_OUTPUT_PREFIX);
        final TezRawKeyValueIterator rIter = TezMerger.merge(job, fs, keyClass, valueClass,
            memDiskSegments, numMemDiskSegments, tmpDir, comparator, nullProgressable,
            spilledRecordsCounter, null, additionalBytesRead, null);
        final Writer writer = new Writer(job, fs, outputPath,
            keyClass, valueClass, codec, null, null);
        try {
          TezMerger.writeFile(rIter, writer, nullProgressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT);
        } catch (IOException e) {
          if (null != outputPath) {
            try {
              fs.delete(outputPath, true);
            } catch (IOException ie) {
              // NOTHING
            }
          }
          throw e;
        } finally {
          if (null != writer) {
            writer.close();
            additionalBytesWritten.increment(writer.getCompressedLength());
          }
        }

        final FileStatus fStatus = localFS.getFileStatus(outputPath);
        // add to list of final disk outputs.
View Full Code Here

      int noInMemorySegments = inMemorySegments.size();
     
      MapOutput mergedMapOutputs =
        unconditionalReserve(dummyMapId, mergeOutputSize, false);
     
      Writer writer =
        new InMemoryWriter(mergedMapOutputs.getArrayStream());

      LOG.info("Initiating Memory-to-Memory merge with " + noInMemorySegments +
               " segments of total-size: " + mergeOutputSize);

      // Nothing will be materialized to disk because the sort factor is being
      // set to the number of in memory segments.
      // TODO Is this doing any combination ?
      TezRawKeyValueIterator rIter =
        TezMerger.merge(conf, rfs,
                       ConfigUtils.getIntermediateInputKeyClass(conf),
                       ConfigUtils.getIntermediateInputValueClass(conf),
                       inMemorySegments, inMemorySegments.size(),
                       new Path(inputContext.getUniqueIdentifier()),
                       (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf),
                       nullProgressable, null, null, null, null);
      TezMerger.writeFile(rIter, writer, nullProgressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT);
      writer.close();

      LOG.info(inputContext.getUniqueIdentifier()
               " Memory-to-Memory merge of the " + noInMemorySegments +
               " files in-memory complete.");
View Full Code Here

      // adequate memory to keep all segments in memory.
      Path outputPath = mapOutputFile.getInputFileForWrite(
          srcTaskIdentifier.getInputIdentifier().getInputIndex(),
          mergeOutputSize).suffix(Constants.MERGED_OUTPUT_PREFIX);

      Writer writer = null;
      long outFileLen = 0;
      try {
        writer =
            new Writer(conf, rfs, outputPath,
                (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
                (Class)ConfigUtils.getIntermediateInputValueClass(conf),
                codec, null, null);

        TezRawKeyValueIterator rIter = null;
        LOG.info("Initiating in-memory merge with " + noInMemorySegments +
            " segments...");

        // Nothing actually materialized to disk - controlled by setting sort-factor to #segments.
        rIter = TezMerger.merge(conf, rfs,
            (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
            (Class)ConfigUtils.getIntermediateInputValueClass(conf),
            inMemorySegments, inMemorySegments.size(),
            new Path(inputContext.getUniqueIdentifier()),
            (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf),
            nullProgressable, spilledRecordsCounter, null, additionalBytesRead, null);
        // spilledRecordsCounter is tracking the number of keys that will be
        // read from each of the segments being merged - which is essentially
        // what will be written to disk.

        if (null == combiner) {
          TezMerger.writeFile(rIter, writer, nullProgressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT);
        } else {
          // TODO Counters for Combine
          runCombineProcessor(rIter, writer);
        }
        writer.close();
        additionalBytesWritten.increment(writer.getCompressedLength());
        writer = null;

        outFileLen = localFS.getFileStatus(outputPath).getLen();
        LOG.info(inputContext.getUniqueIdentifier() +
            " Merge of the " + noInMemorySegments +
            " files in-memory complete." +
            " Local file is " + outputPath + " of size " +
            outFileLen);
      } catch (IOException e) {
        //make sure that we delete the ondisk file that we created
        //earlier when we invoked cloneFileAttributes
        localFS.delete(outputPath, true);
        throw e;
      } finally {
        if (writer != null) {
          writer.close();
        }
      }

      // Note the output of the merge
      closeOnDiskFile(new FileChunk(outputPath, 0, outFileLen, false));
View Full Code Here

      // 2. Start the on-disk merge process
      Path outputPath =
        localDirAllocator.getLocalPathForWrite(inputs.get(0).getPath().toString(),
            approxOutputSize, conf).suffix(Constants.MERGED_OUTPUT_PREFIX);
      Writer writer =
        new Writer(conf, rfs, outputPath,
                        (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
                        (Class)ConfigUtils.getIntermediateInputValueClass(conf),
                        codec, null, null);
      Path tmpDir = new Path(inputContext.getUniqueIdentifier());
      try {
        TezRawKeyValueIterator iter = TezMerger.merge(conf, rfs,
            (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
            (Class)ConfigUtils.getIntermediateInputValueClass(conf),
            inputSegments,
            ioSortFactor, tmpDir,
            (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf),
            nullProgressable, true, spilledRecordsCounter, null,
            mergedMapOutputsCounter, null);

        // TODO Maybe differentiate between data written because of Merges and
        // the finalMerge (i.e. final mem available may be different from
        // initial merge mem)
        TezMerger.writeFile(iter, writer, nullProgressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT);
        writer.close();
        additionalBytesWritten.increment(writer.getCompressedLength());
      } catch (IOException e) {
        localFS.delete(outputPath, true);
        throw e;
      }
View Full Code Here

          long segmentStart = out.getPos();
          if (wrappedBuffer.partitionPositions[i] == WrappedBuffer.PARTITION_ABSENT_POSITION) {
            // Skip empty partition.
            continue;
          }
          writer = new Writer(conf, out, keyClass, valClass, codec, numRecordsCounter, null);
          writePartition(wrappedBuffer.partitionPositions[i], wrappedBuffer, writer, key, val);
          writer.close();
          if (isFinalSpill) {
            fileOutputBytesCounter.increment(writer.getCompressedLength());
          } else {
View Full Code Here

      final InMemValBytes value = createInMemValBytes();
      for (int i = 0; i < partitions; ++i) {
        IFile.Writer writer = null;
        try {
          long segmentStart = out.getPos();
          writer = new Writer(conf, out, keyClass, valClass, codec,
                                    spilledRecordsCounter, null);
          if (combiner == null) {
            // spill directly
            DataInputBuffer key = new DataInputBuffer();
            while (spindex < mend &&
View Full Code Here

      TezSpillRecord sr = new TezSpillRecord(partitions);
      try {
        for (int i = 0; i < partitions; i++) {
          long segmentStart = finalOut.getPos();
          Writer writer =
            new Writer(conf, finalOut, keyClass, valClass, codec, null, null);
          writer.close();

          TezIndexRecord rec =
              new TezIndexRecord(
                  segmentStart,
                  writer.getRawLength(),
                  writer.getCompressedLength());
          // Covers the case of multiple spills.
          outputBytesWithOverheadCounter.increment(writer.getRawLength());
          sr.putIndex(rec, i);
        }
        sr.writeToFile(finalIndexFile, conf);
      } finally {
        finalOut.close();
      }
      return;
    }
    else {
      final TezSpillRecord spillRec = new TezSpillRecord(partitions);
      for (int parts = 0; parts < partitions; parts++) {
        //create the segments to be merged
        List<Segment> segmentList =
          new ArrayList<Segment>(numSpills);
        for(int i = 0; i < numSpills; i++) {
          TezIndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);

          Segment s =
            new Segment(conf, rfs, filename[i], indexRecord.getStartOffset(),
                             indexRecord.getPartLength(), codec, ifileReadAhead,
                             ifileReadAheadLength, ifileBufferSize, true);
          segmentList.add(i, s);

          if (LOG.isDebugEnabled()) {
            LOG.debug("TaskIdentifier=" + taskIdentifier + " Partition=" + parts +
                "Spill =" + i + "(" + indexRecord.getStartOffset() + "," +
                indexRecord.getRawLength() + ", " +
                indexRecord.getPartLength() + ")");
          }
        }

        int mergeFactor =
            this.conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR,
                TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR_DEFAULT);
        // sort the segments only if there are intermediate merges
        boolean sortSegments = segmentList.size() > mergeFactor;
        //merge
        TezRawKeyValueIterator kvIter = TezMerger.merge(conf, rfs,
                       keyClass, valClass, codec,
                       segmentList, mergeFactor,
                       new Path(taskIdentifier),
                       (RawComparator)ConfigUtils.getIntermediateOutputKeyComparator(conf),
                       nullProgressable, sortSegments, true,
                       null, spilledRecordsCounter, additionalSpillBytesRead,
                       null); // Not using any Progress in TezMerger. Should just work.

        //write merged output to disk
        long segmentStart = finalOut.getPos();
        Writer writer =
            new Writer(conf, finalOut, keyClass, valClass, codec,
                spilledRecordsCounter, null);
        if (combiner == null || numSpills < minSpillsForCombine) {
          TezMerger.writeFile(kvIter, writer,
              nullProgressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT);
        } else {
          runCombineProcessor(kvIter, writer);
        }
        writer.close();

        // record offsets
        final TezIndexRecord rec =
            new TezIndexRecord(
                segmentStart,
                writer.getRawLength(),
                writer.getCompressedLength());
        spillRec.putIndex(rec, parts);
      }
      spillRec.writeToFile(finalIndexFile, conf);
      finalOut.close();
      for(int i = 0; i < numSpills; i++) {
View Full Code Here

                                              tmpFilename.toString(),
                                              approxOutputSize, conf);

          // TODO Would it ever make sense to make this an in-memory writer ?
          // Merging because of too many disk segments - might fit in memory.
          Writer writer =
            new Writer(conf, fs, outputFile, keyClass, valueClass, codec,
                             writesCounter, null);

          writeFile(this, writer, reporter, recordsBeforeProgress);
          writer.close();
         
          //we finished one single level merge; now clean up the priority
          //queue
          this.close();
View Full Code Here

TOP

Related Classes of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.