Package org.apache.tez.runtime.library.common.shuffle.impl

Source Code of org.apache.tez.runtime.library.common.shuffle.impl.Shuffle$ShuffleError

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tez.runtime.library.common.shuffle.impl;

import java.io.IOException;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.FutureTask;

import javax.crypto.SecretKey;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.DefaultCodec;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.tez.common.TezJobConfig;
import org.apache.tez.common.counters.TaskCounter;
import org.apache.tez.common.counters.TezCounter;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.runtime.api.Event;
import org.apache.tez.runtime.api.TezInputContext;
import org.apache.tez.runtime.library.common.ConfigUtils;
import org.apache.tez.runtime.library.common.TezRuntimeUtils;
import org.apache.tez.runtime.library.common.combine.Combiner;
import org.apache.tez.runtime.library.common.shuffle.server.ShuffleHandler;
import org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator;
import org.apache.tez.runtime.library.shuffle.common.ShuffleUtils;

import com.google.common.base.Preconditions;

@InterfaceAudience.Private
@InterfaceStability.Unstable
public class Shuffle implements ExceptionReporter {
 
  private static final Log LOG = LogFactory.getLog(Shuffle.class);
  private static final int PROGRESS_FREQUENCY = 2000;
 
  private final Configuration conf;
  private final TezInputContext inputContext;
  private final ShuffleClientMetrics metrics;

  private final ShuffleInputEventHandler eventHandler;
  private final ShuffleScheduler scheduler;
  private final MergeManager merger;
  private Throwable throwable = null;
  private String throwingThreadName = null;
  private final int numInputs;
  private final SecretKey jobTokenSecret;
  private final CompressionCodec codec;
  private final boolean ifileReadAhead;
  private final int ifileReadAheadLength;

  private FutureTask<TezRawKeyValueIterator> runShuffleFuture;

  public Shuffle(TezInputContext inputContext, Configuration conf, int numInputs) throws IOException {
    this.inputContext = inputContext;
    this.conf = conf;
    this.metrics = new ShuffleClientMetrics(inputContext.getDAGName(),
        inputContext.getTaskVertexName(), inputContext.getTaskIndex(),
        this.conf, UserGroupInformation.getCurrentUser().getShortUserName());
           
    this.numInputs = numInputs;
   
    this.jobTokenSecret = ShuffleUtils
        .getJobTokenSecretFromTokenBytes(inputContext
            .getServiceConsumerMetaData(ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID));
   
    if (ConfigUtils.isIntermediateInputCompressed(conf)) {
      Class<? extends CompressionCodec> codecClass =
          ConfigUtils.getIntermediateInputCompressorClass(conf, DefaultCodec.class);
      codec = ReflectionUtils.newInstance(codecClass, conf);
    } else {
      codec = null;
    }
    this.ifileReadAhead = conf.getBoolean(
        TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD,
        TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
    if (this.ifileReadAhead) {
      this.ifileReadAheadLength = conf.getInt(
          TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
          TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
    } else {
      this.ifileReadAheadLength = 0;
    }
   
    Combiner combiner = TezRuntimeUtils.instantiateCombiner(conf, inputContext);
   
    FileSystem localFS = FileSystem.getLocal(this.conf);
    LocalDirAllocator localDirAllocator =
        new LocalDirAllocator(TezJobConfig.LOCAL_DIRS);

    // TODO TEZ Get rid of Map / Reduce references.
    TezCounter shuffledMapsCounter =
        inputContext.getCounters().findCounter(TaskCounter.SHUFFLED_MAPS);
    TezCounter reduceShuffleBytes =
        inputContext.getCounters().findCounter(TaskCounter.REDUCE_SHUFFLE_BYTES);
    TezCounter failedShuffleCounter =
        inputContext.getCounters().findCounter(TaskCounter.FAILED_SHUFFLE);
    TezCounter spilledRecordsCounter =
        inputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter reduceCombineInputCounter =
        inputContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS);
    TezCounter mergedMapOutputsCounter =
        inputContext.getCounters().findCounter(TaskCounter.MERGED_MAP_OUTPUTS);
   
    LOG.info("Shuffle assigned with " + numInputs + " inputs" + ", codec: "
        + (codec == null ? "None" : codec.getClass().getName()) +
        "ifileReadAhead: " + ifileReadAhead);

    scheduler = new ShuffleScheduler(
          this.inputContext,
          this.conf,
          this.numInputs,
          this,
          shuffledMapsCounter,
          reduceShuffleBytes,
          failedShuffleCounter);
    eventHandler= new ShuffleInputEventHandler(
          inputContext,
          scheduler);
    merger = new MergeManager(
          this.conf,
          localFS,
          localDirAllocator,
          inputContext,
          combiner,
          spilledRecordsCounter,
          reduceCombineInputCounter,
          mergedMapOutputsCounter,
          this);
  }

  public void handleEvents(List<Event> events) {
    eventHandler.handleEvents(events);
  }
 
  /**
   * Indicates whether the Shuffle and Merge processing is complete.
   * @return false if not complete, true if complete or if an error occurred.
   */
  public boolean isInputReady() {
    if (runShuffleFuture == null) {
      return false;
    }
    return runShuffleFuture.isDone();
    //return scheduler.isDone() && merger.isMergeComplete();
  }

  /**
   * Waits for the Shuffle and Merge to complete, and returns an iterator over the input.
   * @return an iterator over the fetched input.
   * @throws IOException
   * @throws InterruptedException
   */
  public TezRawKeyValueIterator waitForInput() throws IOException, InterruptedException {
    Preconditions.checkState(runShuffleFuture != null,
        "waitForInput can only be called after run");
    TezRawKeyValueIterator kvIter;
    try {
      kvIter = runShuffleFuture.get();
    } catch (ExecutionException e) {
      Throwable cause = e.getCause();
      if (cause instanceof IOException) {
        throw (IOException) cause;
      } else if (cause instanceof InterruptedException) {
        throw (InterruptedException) cause;
      } else {
        throw new TezUncheckedException(
            "Unexpected exception type while running Shuffle and Merge", cause);
      }
    }
    return kvIter;
  }

  public void run() {
    RunShuffleCallable runShuffle = new RunShuffleCallable();
    runShuffleFuture = new FutureTask<TezRawKeyValueIterator>(runShuffle);
    new Thread(runShuffleFuture, "ShuffleMergeRunner").start();
  }
 
  private class RunShuffleCallable implements Callable<TezRawKeyValueIterator> {
    @Override
    public TezRawKeyValueIterator call() throws IOException, InterruptedException {
      // TODO NEWTEZ Limit # fetchers to number of inputs
      final int numFetchers =
          conf.getInt(
              TezJobConfig.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES,
              TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES);
      Fetcher[] fetchers = new Fetcher[numFetchers];
      for (int i = 0; i < numFetchers; ++i) {
        fetchers[i] = new Fetcher(conf, scheduler, merger, metrics,
            Shuffle.this, jobTokenSecret, ifileReadAhead, ifileReadAheadLength,
            codec, inputContext);
       
        fetchers[i].start();
      }
     
      while (!scheduler.waitUntilDone(PROGRESS_FREQUENCY)) {
        synchronized (this) {
          if (throwable != null) {
            throw new ShuffleError("error in shuffle in " + throwingThreadName,
                                   throwable);
          }
        }
      }
     
      // Stop the map-output fetcher threads
      for (Fetcher fetcher : fetchers) {
        fetcher.shutDown();
      }
      fetchers = null;
     
      // stop the scheduler
      scheduler.close();


      // Finish the on-going merges...
      TezRawKeyValueIterator kvIter = null;
      try {
        kvIter = merger.close();
      } catch (Throwable e) {
        throw new ShuffleError("Error while doing final merge " , e);
      }
     
      // Sanity check
      synchronized (Shuffle.this) {
        if (throwable != null) {
          throw new ShuffleError("error in shuffle in " + throwingThreadName,
                                 throwable);
        }
      }
      return kvIter;
    }
  }
 
  public synchronized void reportException(Throwable t) {
    if (throwable == null) {
      throwable = t;
      throwingThreadName = Thread.currentThread().getName();
      // Notify the scheduler so that the reporting thread finds the
      // exception immediately.
      synchronized (scheduler) {
        scheduler.notifyAll();
      }
    }
  }
 
  public static class ShuffleError extends IOException {
    private static final long serialVersionUID = 5753909320586607881L;

    ShuffleError(String msg, Throwable t) {
      super(msg, t);
    }
  }

}
TOP

Related Classes of org.apache.tez.runtime.library.common.shuffle.impl.Shuffle$ShuffleError

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.