Package org.apache.flink.runtime.io.network

Source Code of org.apache.flink.runtime.io.network.ChannelManager

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/


package org.apache.flink.runtime.io.network;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.flink.core.io.IOReadableWritable;
import org.apache.flink.runtime.AbstractID;
import org.apache.flink.runtime.execution.CancelTaskException;
import org.apache.flink.runtime.execution.Environment;
import org.apache.flink.runtime.execution.RuntimeEnvironment;
import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
import org.apache.flink.runtime.instance.InstanceConnectionInfo;
import org.apache.flink.runtime.io.network.bufferprovider.BufferProvider;
import org.apache.flink.runtime.io.network.bufferprovider.BufferProviderBroker;
import org.apache.flink.runtime.io.network.bufferprovider.DiscardBufferPool;
import org.apache.flink.runtime.io.network.bufferprovider.GlobalBufferPool;
import org.apache.flink.runtime.io.network.bufferprovider.LocalBufferPoolOwner;
import org.apache.flink.runtime.io.network.channels.Channel;
import org.apache.flink.runtime.io.network.channels.ChannelID;
import org.apache.flink.runtime.io.network.channels.ChannelType;
import org.apache.flink.runtime.io.network.channels.InputChannel;
import org.apache.flink.runtime.io.network.channels.OutputChannel;
import org.apache.flink.runtime.io.network.gates.GateID;
import org.apache.flink.runtime.io.network.gates.InputGate;
import org.apache.flink.runtime.io.network.gates.OutputGate;
import org.apache.flink.runtime.jobgraph.JobID;
import org.apache.flink.runtime.protocols.ChannelLookupProtocol;
import org.apache.flink.runtime.taskmanager.Task;
import org.apache.flink.util.ExceptionUtils;

import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

/**
* The channel manager sets up the network buffers and dispatches data between channels.
*/
public class ChannelManager implements EnvelopeDispatcher, BufferProviderBroker {

  private static final Logger LOG = LoggerFactory.getLogger(ChannelManager.class);

  private final ChannelLookupProtocol channelLookupService;

  private final InstanceConnectionInfo connectionInfo;

  private final Map<ChannelID, Channel> channels;

  private final Map<AbstractID, LocalBufferPoolOwner> localBuffersPools;

  private final Map<ChannelID, EnvelopeReceiverList> receiverCache;

  private final GlobalBufferPool globalBufferPool;

  private final NetworkConnectionManager networkConnectionManager;
 
  private final InetSocketAddress ourAddress;
 
  private final DiscardBufferPool discardBufferPool;

  // -----------------------------------------------------------------------------------------------------------------

  public ChannelManager(ChannelLookupProtocol channelLookupService, InstanceConnectionInfo connectionInfo,
      int numNetworkBuffers, int networkBufferSize, NetworkConnectionManager networkConnectionManager) throws IOException {

    this.channelLookupService = channelLookupService;
    this.connectionInfo = connectionInfo;

    try {
      this.globalBufferPool = new GlobalBufferPool(numNetworkBuffers, networkBufferSize);
    } catch (Throwable e) {
      throw new IOException("Failed to instantiate GlobalBufferPool.", e);
    }

    this.networkConnectionManager = networkConnectionManager;
    networkConnectionManager.start(this);

    // management data structures
    this.channels = new ConcurrentHashMap<ChannelID, Channel>();
    this.receiverCache = new ConcurrentHashMap<ChannelID, EnvelopeReceiverList>();
    this.localBuffersPools = new ConcurrentHashMap<AbstractID, LocalBufferPoolOwner>();
   
    this.ourAddress = new InetSocketAddress(connectionInfo.address(), connectionInfo.dataPort());
   
    // a special pool if the data is to be discarded
    this.discardBufferPool = new DiscardBufferPool();
  }

  public void shutdown() throws IOException {
    this.networkConnectionManager.shutdown();

    this.globalBufferPool.destroy();
  }

  public GlobalBufferPool getGlobalBufferPool() {
    return globalBufferPool;
  }
 
  // -----------------------------------------------------------------------------------------------------------------
  //                                               Task registration
  // -----------------------------------------------------------------------------------------------------------------

  /**
   * Registers the given task with the channel manager.
   *
   * @param task the task to be registered
   * @throws InsufficientResourcesException thrown if not enough buffers available to safely run this task
   */
  public void register(Task task) throws InsufficientResourcesException {
    // Check if we can safely run this task with the given buffers
    ensureBufferAvailability(task);

    RuntimeEnvironment environment = task.getEnvironment();

    // -------------------------------------------------------------------------------------------------------------
    //                                       Register output channels
    // -------------------------------------------------------------------------------------------------------------

    environment.registerGlobalBufferPool(this.globalBufferPool);

    if (this.localBuffersPools.containsKey(task.getExecutionId())) {
      throw new IllegalStateException("Execution " + task.getExecutionId() + " has a previous buffer pool owner");
    }

    for (OutputGate gate : environment.outputGates()) {
      // add receiver list hints
      for (OutputChannel channel : gate.channels()) {
        // register envelope dispatcher with the channel
        channel.registerEnvelopeDispatcher(this);

        switch (channel.getChannelType()) {
          case IN_MEMORY:
            addReceiverListHint(channel.getID(), channel.getConnectedId());
            break;
          case NETWORK:
            addReceiverListHint(channel.getConnectedId(), channel.getID());
            break;
        }

        this.channels.put(channel.getID(), channel);
      }
    }

    this.localBuffersPools.put(task.getExecutionId(), environment);

    // -------------------------------------------------------------------------------------------------------------
    //                                       Register input channels
    // -------------------------------------------------------------------------------------------------------------

    // register global
    for (InputGate<?> gate : environment.inputGates()) {
      gate.registerGlobalBufferPool(this.globalBufferPool);

      for (int i = 0; i < gate.getNumberOfInputChannels(); i++) {
        InputChannel<? extends IOReadableWritable> channel = gate.getInputChannel(i);
        channel.registerEnvelopeDispatcher(this);

        if (channel.getChannelType() == ChannelType.IN_MEMORY) {
          addReceiverListHint(channel.getID(), channel.getConnectedId());
        }

        this.channels.put(channel.getID(), channel);
      }

      this.localBuffersPools.put(gate.getGateID(), gate);
    }

    // the number of channels per buffers has changed after unregistering the task
    // => redistribute the number of designated buffers of the registered local buffer pools
    redistributeBuffers();
  }

  /**
   * Unregisters the given task from the channel manager.
   *
   * @param executionId the ID of the task to be unregistered
   * @param task the task to be unregistered
   */
  public void unregister(ExecutionAttemptID executionId, Task task) {
    final Environment environment = task.getEnvironment();
    if (environment == null) {
      return;
    }

    // destroy and remove OUTPUT channels from registered channels and cache
    for (ChannelID id : environment.getOutputChannelIDs()) {
      Channel channel = this.channels.remove(id);
      if (channel != null) {
        channel.destroy();
        this.receiverCache.remove(channel);
      }
    }

    // destroy and remove INPUT channels from registered channels and cache
    for (ChannelID id : environment.getInputChannelIDs()) {
      Channel channel = this.channels.remove(id);
      if (channel != null) {
        channel.destroy();
        this.receiverCache.remove(channel);
      }
    }

    // clear and remove INPUT side buffer pools
    for (GateID id : environment.getInputGateIDs()) {
      LocalBufferPoolOwner bufferPool = this.localBuffersPools.remove(id);
      if (bufferPool != null) {
        bufferPool.clearLocalBufferPool();
      }
    }

    // clear and remove OUTPUT side buffer pool
    LocalBufferPoolOwner bufferPool = this.localBuffersPools.remove(executionId);
    if (bufferPool != null) {
      bufferPool.clearLocalBufferPool();
    }

    // the number of channels per buffers has changed after unregistering the task
    // => redistribute the number of designated buffers of the registered local buffer pools
    redistributeBuffers();
  }

  /**
   * Ensures that the channel manager has enough buffers to execute the given task.
   * <p>
   * If there is less than one buffer per channel available, an InsufficientResourcesException will be thrown,
   * because of possible deadlocks. With more then one buffer per channel, deadlock-freedom is guaranteed.
   *
   * @param task task to be executed
   * @throws InsufficientResourcesException thrown if not enough buffers available to execute the task
   */
  private void ensureBufferAvailability(Task task) throws InsufficientResourcesException {
    Environment env = task.getEnvironment();

    int numBuffers = this.globalBufferPool.numBuffers();
    // existing channels + channels of the task
    int numChannels = this.channels.size() + env.getNumberOfOutputChannels() + env.getNumberOfInputChannels();

    // need at least one buffer per channel
    if (numChannels > 0 && numBuffers / numChannels < 1) {
      String msg = String.format("%s has not enough buffers to safely execute %s (%d buffers missing)",
          this.connectionInfo.getFQDNHostname(), env.getTaskName(), numChannels - numBuffers);

      throw new InsufficientResourcesException(msg);
    }
  }

  /**
   * Redistributes the buffers among the registered buffer pools. This method is called after each task registration
   * and unregistration.
   * <p>
   * Every registered buffer pool gets buffers according to its number of channels weighted by the current buffer to
   * channel ratio.
   */
  private void redistributeBuffers() {
    if (this.localBuffersPools.isEmpty() | this.channels.size() == 0) {
      return;
    }

    int numBuffers = this.globalBufferPool.numBuffers();
    int numChannels = this.channels.size();

    double buffersPerChannel = numBuffers / (double) numChannels;

    if (buffersPerChannel < 1.0) {
      throw new RuntimeException("System has not enough buffers to execute tasks.");
    }

    // redistribute number of designated buffers per buffer pool
    for (LocalBufferPoolOwner bufferPool : this.localBuffersPools.values()) {
      int numDesignatedBuffers = (int) Math.ceil(buffersPerChannel * bufferPool.getNumberOfChannels());
      bufferPool.setDesignatedNumberOfBuffers(numDesignatedBuffers);
    }
  }

  // -----------------------------------------------------------------------------------------------------------------
  //                                           Envelope processing
  // -----------------------------------------------------------------------------------------------------------------

  private void releaseEnvelope(Envelope envelope) {
    Buffer buffer = envelope.getBuffer();
    if (buffer != null) {
      buffer.recycleBuffer();
    }
  }

  private void addReceiverListHint(ChannelID source, ChannelID localReceiver) {
    EnvelopeReceiverList receiverList = new EnvelopeReceiverList(localReceiver);

    if (this.receiverCache.put(source, receiverList) != null) {
      LOG.warn("Receiver cache already contained entry for " + source);
    }
  }

  private void addReceiverListHint(ChannelID source, RemoteReceiver remoteReceiver) {
    EnvelopeReceiverList receiverList = new EnvelopeReceiverList(remoteReceiver);

    if (this.receiverCache.put(source, receiverList) != null) {
      LOG.warn("Receiver cache already contained entry for " + source);
    }
  }

  private void generateSenderHint(Envelope envelope, RemoteReceiver receiver) throws IOException {
    Channel channel = this.channels.get(envelope.getSource());
    if (channel == null) {
      LOG.error("Cannot find channel for channel ID " + envelope.getSource());
      return;
    }

    // Only generate sender hints for output channels
    if (channel.isInputChannel()) {
      return;
    }

    final ChannelID targetChannelID = channel.getConnectedId();
    final int connectionIndex = receiver.getConnectionIndex();

    final RemoteReceiver ourAddress = new RemoteReceiver(this.ourAddress, connectionIndex);
    final Envelope senderHint = SenderHintEvent.createEnvelopeWithEvent(envelope, targetChannelID, ourAddress);

    this.networkConnectionManager.enqueue(senderHint, receiver);
  }

  /**
   * Returns the list of receivers for transfer envelopes produced by the channel with the given source channel ID.
   *
   * @param jobID
   *        the ID of the job the given channel ID belongs to
   * @param sourceChannelID
   *        the source channel ID for which the receiver list shall be retrieved
   * @return the list of receivers or <code>null</code> if the receiver could not be determined
   * @throws IOException
   */
  private EnvelopeReceiverList getReceiverList(JobID jobID, ChannelID sourceChannelID, boolean reportException) throws IOException {
    EnvelopeReceiverList receiverList = this.receiverCache.get(sourceChannelID);

    if (receiverList != null) {
      return receiverList;
    }

    while (true) {
      ConnectionInfoLookupResponse lookupResponse;
      synchronized (this.channelLookupService) {
        lookupResponse = this.channelLookupService.lookupConnectionInfo(this.connectionInfo, jobID, sourceChannelID);
      }

      if (lookupResponse.receiverReady()) {
        receiverList = new EnvelopeReceiverList(lookupResponse);
        break;
      }
      else if (lookupResponse.receiverNotReady()) {
        try {
          Thread.sleep(100);
        } catch (InterruptedException e) {
          if (reportException) {
            throw new IOException("Lookup was interrupted.");
          } else {
            return null;
          }
        }
      }
      else if (lookupResponse.isJobAborting()) {
        if (reportException) {
          throw new CancelTaskException();
        } else {
          return null;
        }
      }
      else if (lookupResponse.receiverNotFound()) {
        if (reportException) {
          throw new IOException("Could not find the receiver for Job " + jobID + ", channel with source id " + sourceChannelID);
        } else {
          return null;
        }
      }
      else {
        throw new IllegalStateException("Unrecognized response to channel lookup.");
      }
    }

    if (channels.containsKey(sourceChannelID)) {
      this.receiverCache.put(sourceChannelID, receiverList);
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug(String.format("Receiver for %s: %s [%s])",
          sourceChannelID,
          receiverList.hasLocalReceiver() ? receiverList.getLocalReceiver() : receiverList.getRemoteReceiver(),
          receiverList.hasLocalReceiver() ? "local" : "remote"));
    }

    return receiverList;
  }

  /**
   * Invalidates the entries identified by the given channel IDs from the receiver lookup cache.
   *
   * @param channelIDs channel IDs for entries to invalidate
   */
  public void invalidateLookupCacheEntries(Set<ChannelID> channelIDs) {
    for (ChannelID id : channelIDs) {
      this.receiverCache.remove(id);
    }
  }

  // -----------------------------------------------------------------------------------------------------------------
  //                                       EnvelopeDispatcher methods
  // -----------------------------------------------------------------------------------------------------------------

  @Override
  public void dispatchFromOutputChannel(Envelope envelope) throws IOException, InterruptedException {
    EnvelopeReceiverList receiverList = getReceiverListForEnvelope(envelope, true);

    Buffer srcBuffer = envelope.getBuffer();
    Buffer destBuffer = null;
   
    boolean success = false;
   
    try {
      if (receiverList.hasLocalReceiver()) {
        ChannelID receiver = receiverList.getLocalReceiver();
        Channel channel = this.channels.get(receiver);

        if (channel == null) {
          throw new LocalReceiverCancelledException(receiver);
        }

        if (!channel.isInputChannel()) {
          throw new IOException("Local receiver " + receiver + " is not an input channel.");
        }

        InputChannel<?> inputChannel = (InputChannel<?>) channel;
       
        // copy the buffer into the memory space of the receiver
        if (srcBuffer != null) {
          try {
            destBuffer = inputChannel.requestBufferBlocking(srcBuffer.size());
          } catch (InterruptedException e) {
            throw new IOException(e.getMessage());
          }

          srcBuffer.copyToBuffer(destBuffer);
          envelope.setBuffer(destBuffer);
          srcBuffer.recycleBuffer();
        }
       
        inputChannel.queueEnvelope(envelope);
        success = true;
      }
      else if (receiverList.hasRemoteReceiver()) {
        RemoteReceiver remoteReceiver = receiverList.getRemoteReceiver();

        // Generate sender hint before sending the first envelope over the network
        if (envelope.getSequenceNumber() == 0) {
          generateSenderHint(envelope, remoteReceiver);
        }

        this.networkConnectionManager.enqueue(envelope, remoteReceiver);
        success = true;
      }
    } finally {
      if (!success) {
        if (srcBuffer != null) {
          srcBuffer.recycleBuffer();
        }
        if (destBuffer != null) {
          destBuffer.recycleBuffer();
        }
      }
    }
  }

  @Override
  public void dispatchFromInputChannel(Envelope envelope) throws IOException, InterruptedException {
    // this method sends only events back from input channels to output channels
    // sanity check that we have no buffer
    if (envelope.getBuffer() != null) {
      throw new RuntimeException("Error: This method can only process envelopes without buffers.");
    }
   
    EnvelopeReceiverList receiverList = getReceiverListForEnvelope(envelope, true);

    if (receiverList.hasLocalReceiver()) {
      ChannelID receiver = receiverList.getLocalReceiver();
      Channel channel = this.channels.get(receiver);

      if (channel == null) {
        throw new LocalReceiverCancelledException(receiver);
      }

      if (channel.isInputChannel()) {
        throw new IOException("Local receiver " + receiver + " of backward event is not an output channel.");
      }

      OutputChannel outputChannel = (OutputChannel) channel;
      outputChannel.queueEnvelope(envelope);
    }
    else if (receiverList.hasRemoteReceiver()) {
      RemoteReceiver remoteReceiver = receiverList.getRemoteReceiver();

      // Generate sender hint before sending the first envelope over the network
      if (envelope.getSequenceNumber() == 0) {
        generateSenderHint(envelope, remoteReceiver);
      }

      this.networkConnectionManager.enqueue(envelope, remoteReceiver);
    }
  }

  /**
   *
   */
  @Override
  public void dispatchFromNetwork(Envelope envelope) throws IOException, InterruptedException {
    // ========================================================================================
    //  IMPORTANT
    // 
    //  This method is called by the network I/O thread that reads the incoming TCP
    //  connections. This method must have minimal overhead and not throw exception if
    //  something is wrong with a job or individual transmission, but only when something
    //  is fundamentally broken in the system.
    // ========================================================================================
   
    // the sender hint event is to let the receiver know where exactly the envelope came from.
    // the receiver will cache the sender id and its connection info in its local lookup table
    // that allows the receiver to send envelopes to the sender without first pinging the job manager
    // for the sender's connection info
   
    // Check if the envelope is the special envelope with the sender hint event
    if (SenderHintEvent.isSenderHintEvent(envelope)) {
      // Check if this is the final destination of the sender hint event before adding it
      final SenderHintEvent seh = (SenderHintEvent) envelope.deserializeEvents().get(0);
      if (this.channels.get(seh.getSource()) != null) {
        addReceiverListHint(seh.getSource(), seh.getRemoteReceiver());
        return;
      }
    }
   
    // try and get the receiver list. if we cannot get it anymore, the task has been cleared
    // the code frees the envelope on exception, so we need not to anything
    EnvelopeReceiverList receiverList = getReceiverListForEnvelope(envelope, false);
    if (receiverList == null) {
      // receiver is cancelled and cleaned away
      releaseEnvelope(envelope);
      if (LOG.isDebugEnabled()) {
        LOG.debug("Dropping envelope for cleaned up receiver.");
      }

      return;
    }

    if (!receiverList.hasLocalReceiver() || receiverList.hasRemoteReceiver()) {
      throw new IOException("Bug in network stack: Envelope dispatched from the incoming network pipe has no local receiver or has a remote receiver");
    }

    ChannelID localReceiver = receiverList.getLocalReceiver();
    Channel channel = this.channels.get(localReceiver);
   
    // if the channel is null, it means that receiver has been cleared already (cancelled or failed).
    // release the buffer immediately
    if (channel == null) {
      releaseEnvelope(envelope);
      if (LOG.isDebugEnabled()) {
        LOG.debug("Dropping envelope for cancelled receiver " + localReceiver);
      }
    }
    else {
      channel.queueEnvelope(envelope);
    }
  }

  /**
   *
   * Upon an exception, this method frees the envelope.
   */
  private final EnvelopeReceiverList getReceiverListForEnvelope(Envelope envelope, boolean reportException) throws IOException {
    try {
      return getReceiverList(envelope.getJobID(), envelope.getSource(), reportException);
    } catch (IOException e) {
      releaseEnvelope(envelope);
      throw e;
    } catch (CancelTaskException e) {
      releaseEnvelope(envelope);
      throw e;
    } catch (Throwable t) {
      releaseEnvelope(envelope);
      ExceptionUtils.rethrow(t, "Error while requesting receiver list.");
      return null; // silence the compiler
    }
  }
 
  // -----------------------------------------------------------------------------------------------------------------
  //                                       BufferProviderBroker methods
  // -----------------------------------------------------------------------------------------------------------------

  @Override
  public BufferProvider getBufferProvider(JobID jobID, ChannelID sourceChannelID) throws IOException {
    EnvelopeReceiverList receiverList = getReceiverList(jobID, sourceChannelID, false);
   
    // check if the receiver is already gone
    if (receiverList == null) {
      return this.discardBufferPool;
    }

    if (!receiverList.hasLocalReceiver() || receiverList.hasRemoteReceiver()) {
      throw new IOException("The destination to be looked up is not a single local endpoint.");
    }
   

    ChannelID localReceiver = receiverList.getLocalReceiver();
    Channel channel = this.channels.get(localReceiver);
   
    if (channel == null) {
      // receiver is already canceled
      return this.discardBufferPool;
    }

    if (!channel.isInputChannel()) {
      throw new IOException("Channel context for local receiver " + localReceiver + " is not an input channel context");
    }

    return (InputChannel<?>) channel;
  }

  // -----------------------------------------------------------------------------------------------------------------

  public void logBufferUtilization() {
    System.out.println("Buffer utilization at " + System.currentTimeMillis());

    System.out.println("\tUnused global buffers: " + this.globalBufferPool.numAvailableBuffers());

    System.out.println("\tLocal buffer pool status:");

    for (LocalBufferPoolOwner bufferPool : this.localBuffersPools.values()) {
      bufferPool.logBufferUtilization();
    }

    System.out.println("\tIncoming connections:");

    for (Channel channel : this.channels.values()) {
      if (channel.isInputChannel()) {
        ((InputChannel<?>) channel).logQueuedEnvelopes();
      }
    }
  }
 
  public void verifyAllCachesEmpty() {
    if (!channels.isEmpty()) {
      throw new IllegalStateException("Channel manager caches not empty: There are still registered channels.");
    }
    if (!localBuffersPools.isEmpty()) {
      throw new IllegalStateException("Channel manager caches not empty: There are still local buffer pools.");
    }
    if (!receiverCache.isEmpty()) {
      throw new IllegalStateException("Channel manager caches not empty: There are still entries in the receiver cache.");
    }
  }
}
TOP

Related Classes of org.apache.flink.runtime.io.network.ChannelManager

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.