Source Code of eu.stratosphere.compiler.plantranslate.NepheleJobGraphGenerator$TaskInChain

/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/


package eu.stratosphere.compiler.plantranslate;


import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;


import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;


import eu.stratosphere.api.common.aggregators.AggregatorRegistry;
import eu.stratosphere.api.common.aggregators.AggregatorWithName;
import eu.stratosphere.api.common.aggregators.ConvergenceCriterion;
import eu.stratosphere.api.common.aggregators.LongSumAggregator;
import eu.stratosphere.api.common.cache.DistributedCache;
import eu.stratosphere.api.common.distributions.DataDistribution;
import eu.stratosphere.api.common.typeutils.TypeSerializerFactory;
import eu.stratosphere.compiler.CompilerException;
import eu.stratosphere.compiler.dag.TempMode;
import eu.stratosphere.compiler.plan.BulkIterationPlanNode;
import eu.stratosphere.compiler.plan.BulkPartialSolutionPlanNode;
import eu.stratosphere.compiler.plan.Channel;
import eu.stratosphere.compiler.plan.DualInputPlanNode;
import eu.stratosphere.compiler.plan.IterationPlanNode;
import eu.stratosphere.compiler.plan.NAryUnionPlanNode;
import eu.stratosphere.compiler.plan.NamedChannel;
import eu.stratosphere.compiler.plan.OptimizedPlan;
import eu.stratosphere.compiler.plan.PlanNode;
import eu.stratosphere.compiler.plan.SingleInputPlanNode;
import eu.stratosphere.compiler.plan.SinkPlanNode;
import eu.stratosphere.compiler.plan.SolutionSetPlanNode;
import eu.stratosphere.compiler.plan.SourcePlanNode;
import eu.stratosphere.compiler.plan.WorksetIterationPlanNode;
import eu.stratosphere.compiler.plan.WorksetPlanNode;
import eu.stratosphere.configuration.ConfigConstants;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.configuration.GlobalConfiguration;
import eu.stratosphere.configuration.ConfigConstants;
import eu.stratosphere.nephele.jobgraph.DistributionPattern;
import eu.stratosphere.runtime.io.channels.ChannelType;
import eu.stratosphere.nephele.jobgraph.AbstractJobOutputVertex;
import eu.stratosphere.nephele.jobgraph.AbstractJobVertex;
import eu.stratosphere.nephele.jobgraph.JobGraph;
import eu.stratosphere.nephele.jobgraph.JobGraphDefinitionException;
import eu.stratosphere.nephele.jobgraph.JobInputVertex;
import eu.stratosphere.nephele.jobgraph.JobOutputVertex;
import eu.stratosphere.nephele.jobgraph.JobTaskVertex;
import eu.stratosphere.nephele.template.AbstractInputTask;
import eu.stratosphere.pact.runtime.iterative.convergence.WorksetEmptyConvergenceCriterion;
import eu.stratosphere.pact.runtime.iterative.io.FakeOutputTask;
import eu.stratosphere.pact.runtime.iterative.task.IterationHeadPactTask;
import eu.stratosphere.pact.runtime.iterative.task.IterationIntermediatePactTask;
import eu.stratosphere.pact.runtime.iterative.task.IterationSynchronizationSinkTask;
import eu.stratosphere.pact.runtime.iterative.task.IterationTailPactTask;
import eu.stratosphere.pact.runtime.shipping.ShipStrategyType;
import eu.stratosphere.pact.runtime.task.CoGroupDriver;
import eu.stratosphere.pact.runtime.task.CoGroupWithSolutionSetFirstDriver;
import eu.stratosphere.pact.runtime.task.CoGroupWithSolutionSetSecondDriver;
import eu.stratosphere.pact.runtime.task.DataSinkTask;
import eu.stratosphere.pact.runtime.task.DataSourceTask;
import eu.stratosphere.pact.runtime.task.DriverStrategy;
import eu.stratosphere.pact.runtime.task.JoinWithSolutionSetFirstDriver;
import eu.stratosphere.pact.runtime.task.JoinWithSolutionSetSecondDriver;
import eu.stratosphere.pact.runtime.task.MatchDriver;
import eu.stratosphere.pact.runtime.task.NoOpDriver;
import eu.stratosphere.pact.runtime.task.RegularPactTask;
import eu.stratosphere.pact.runtime.task.chaining.ChainedDriver;
import eu.stratosphere.pact.runtime.task.util.LocalStrategy;
import eu.stratosphere.pact.runtime.task.util.TaskConfig;
import eu.stratosphere.util.Visitor;


/**
 * This component translates the optimizer's resulting plan a nephele job graph. The
 * translation is a one to one mapping. All decisions are made by the optimizer, this class
 * simply creates nephele data structures and descriptions corresponding to the optimizer's
 * result.
 * <p>
 * The basic method of operation is a top down traversal over the plan graph. On the way down, tasks are created
 * for the plan nodes, on the way back up, the nodes connect their predecessor.
 */
public class NepheleJobGraphGenerator implements Visitor<PlanNode> {
  
  public static final String MERGE_ITERATION_AUX_TASKS_KEY = "compiler.merge-iteration-aux";
  
  private static final boolean mergeIterationAuxTasks = GlobalConfiguration.getBoolean(MERGE_ITERATION_AUX_TASKS_KEY, true);
  
  private static final Log LOG = LogFactory.getLog(NepheleJobGraphGenerator.class);
  
  private static final TaskInChain ALREADY_VISITED_PLACEHOLDER = new TaskInChain(null, null, null);
  
  // ------------------------------------------------------------------------


  private JobGraph jobGraph; // the job that is currently built


  private Map<PlanNode, AbstractJobVertex> vertices; // a map from optimizer nodes to nephele vertices
  
  private Map<PlanNode, TaskInChain> chainedTasks; // a map from optimizer nodes to nephele vertices
  
  private Map<IterationPlanNode, IterationDescriptor> iterations;
  
  private List<TaskInChain> chainedTasksInSequence;
  
  private List<AbstractJobVertex> auxVertices; // auxiliary vertices which are added during job graph generation


  private AbstractJobVertex maxDegreeVertex; // the vertex with the highest degree of parallelism
  
  private final int defaultMaxFan;
  
  private final float defaultSortSpillingThreshold;
  
  private int iterationIdEnumerator = 1;
  
  private IterationPlanNode currentIteration;  // hack: as long as no nesting is possible, remember the enclosing iteration
  
  // ------------------------------------------------------------------------


  /**
   * Creates a new job graph generator that uses the default values for its resource configuration.
   */
  public NepheleJobGraphGenerator() {
    this.defaultMaxFan = ConfigConstants.DEFAULT_SPILLING_MAX_FAN;
    this.defaultSortSpillingThreshold = ConfigConstants.DEFAULT_SORT_SPILLING_THRESHOLD;
  }
  
  public NepheleJobGraphGenerator(Configuration config) {
    this.defaultMaxFan = config.getInteger(ConfigConstants.DEFAULT_SPILLING_MAX_FAN_KEY, 
        ConfigConstants.DEFAULT_SPILLING_MAX_FAN);
    this.defaultSortSpillingThreshold = config.getFloat(ConfigConstants.DEFAULT_SORT_SPILLING_THRESHOLD_KEY,
      ConfigConstants.DEFAULT_SORT_SPILLING_THRESHOLD);
  }


  /**
   * Translates a {@link eu.stratosphere.compiler.plan.OptimizedPlan} into a
   * {@link eu.stratosphere.nephele.jobgraph.JobGraph}.
   * This is an 1-to-1 mapping. No optimization whatsoever is applied.
   * 
   * @param program
   *        Optimized PACT plan that is translated into a JobGraph.
   * @return JobGraph generated from PACT plan.
   */
  public JobGraph compileJobGraph(OptimizedPlan program) {
    this.jobGraph = new JobGraph(program.getJobName());
    this.vertices = new HashMap<PlanNode, AbstractJobVertex>();
    this.chainedTasks = new HashMap<PlanNode, TaskInChain>();
    this.chainedTasksInSequence = new ArrayList<TaskInChain>();
    this.auxVertices = new ArrayList<AbstractJobVertex>();
    this.iterations = new HashMap<IterationPlanNode, IterationDescriptor>();
    this.maxDegreeVertex = null;
    
    // generate Nephele job graph
    program.accept(this);
    
    // finalize the iterations
    for (IterationDescriptor iteration : this.iterations.values()) {
      if (iteration.getIterationNode() instanceof BulkIterationPlanNode) {
        finalizeBulkIteration(iteration);
      } else if (iteration.getIterationNode() instanceof WorksetIterationPlanNode) {
        finalizeWorksetIteration(iteration);
      } else {
        throw new CompilerException();
      }
    }
    
    // now that the traversal is done, we have the chained tasks write their configs into their
    // parents' configurations
    for (int i = 0; i < this.chainedTasksInSequence.size(); i++) {
      TaskInChain tic = this.chainedTasksInSequence.get(i);
      TaskConfig t = new TaskConfig(tic.getContainingVertex().getConfiguration());
      t.addChainedTask(tic.getChainedTask(), tic.getTaskConfig(), tic.getTaskName());
    }


    // now that all have been created, make sure that all share their instances with the one
    // with the highest degree of parallelism
    if (program.getInstanceTypeName() != null) {
      this.maxDegreeVertex.setInstanceType(program.getInstanceTypeName());
    } else {
      LOG.warn("No instance type assigned to JobVertex.");
    }
    for (AbstractJobVertex vertex : this.vertices.values()) {
      if (vertex != this.maxDegreeVertex) {
        vertex.setVertexToShareInstancesWith(this.maxDegreeVertex);
      }
    }
    
    for (AbstractJobVertex vertex : this.auxVertices) {
      if (vertex != this.maxDegreeVertex) {
        vertex.setVertexToShareInstancesWith(this.maxDegreeVertex);
      }
    }


    // add registered cache file into job configuration
    for (Entry<String, String> e: program.getOriginalPactPlan().getCachedFiles()) {
      DistributedCache.addCachedFile(e.getKey(), e.getValue(), this.jobGraph.getJobConfiguration());
    }
    JobGraph graph = this.jobGraph;


    // release all references again
    this.maxDegreeVertex = null;
    this.vertices = null;
    this.chainedTasks = null;
    this.chainedTasksInSequence = null;
    this.auxVertices = null;
    this.iterations = null;
    this.jobGraph = null;


    // return job graph
    return graph;
  }
  
  /**
   * This methods implements the pre-visiting during a depth-first traversal. It create the job vertex and
   * sets local strategy.
   * 
   * @param node
   *        The node that is currently processed.
   * @return True, if the visitor should descend to the node's children, false if not.
   * @see eu.stratosphere.util.Visitor#preVisit(eu.stratosphere.pact.common.plan.Visitable)
   */
  @Override
  public boolean preVisit(PlanNode node) {
    // check if we have visited this node before. in non-tree graphs, this happens
    if (this.vertices.containsKey(node) || this.chainedTasks.containsKey(node) || this.iterations.containsKey(node)) {
      // return false to prevent further descend
      return false;
    }


    // the vertex to be created for the current node
    final AbstractJobVertex vertex;
    try {
      if (node instanceof SinkPlanNode) {
        vertex = createDataSinkVertex((SinkPlanNode) node);
      }
      else if (node instanceof SourcePlanNode) {
        vertex = createDataSourceVertex((SourcePlanNode) node);
      }
      else if (node instanceof BulkIterationPlanNode) {
        BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
        // for the bulk iteration, we skip creating anything for now. we create the graph
        // for the step function in the post visit.
        
        // check that the root of the step function has the same DOP as the iteration.
        // because the tail must have the same DOP as the head, we can only merge the last
        // operator with the tail, if they have the same DOP. not merging is currently not
        // implemented
        PlanNode root = iterationNode.getRootOfStepFunction();
        if (root.getDegreeOfParallelism() != node.getDegreeOfParallelism() || 
            root.getSubtasksPerInstance() != node.getSubtasksPerInstance()) 
        {
          throw new CompilerException("Error: The final operator of the step " +
              "function has a different degree of parallelism than the iteration operator itself.");
        }
        
        IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
        this.iterations.put(iterationNode, descr);
        vertex = null;
      }
      else if (node instanceof WorksetIterationPlanNode) {
        WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;


        // we have the same constraints as for the bulk iteration
        PlanNode nextWorkSet = iterationNode.getNextWorkSetPlanNode();
        PlanNode solutionSetDelta  = iterationNode.getSolutionSetDeltaPlanNode();
        
        if (nextWorkSet.getDegreeOfParallelism() != node.getDegreeOfParallelism() || 
          nextWorkSet.getSubtasksPerInstance() != node.getSubtasksPerInstance())
        {
          throw new CompilerException("It is currently not supported that the final operator of the step " +
              "function has a different degree of parallelism than the iteration operator itself.");
        }
        if (solutionSetDelta.getDegreeOfParallelism() != node.getDegreeOfParallelism() || 
          solutionSetDelta.getSubtasksPerInstance() != node.getSubtasksPerInstance())
        {
          throw new CompilerException("It is currently not supported that the final operator of the step " +
              "function has a different degree of parallelism than the iteration operator itself.");
        }
        
        IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
        this.iterations.put(iterationNode, descr);
        vertex = null;
      }
      else if (node instanceof SingleInputPlanNode) {
        vertex = createSingleInputVertex((SingleInputPlanNode) node);
      }
      else if (node instanceof DualInputPlanNode) {
        vertex = createDualInputVertex((DualInputPlanNode) node);
      }
      else if (node instanceof NAryUnionPlanNode) {
        // skip the union for now
        vertex = null;
      }
      else if (node instanceof BulkPartialSolutionPlanNode) {
        // create a head node (or not, if it is merged into its successor)
        vertex = createBulkIterationHead((BulkPartialSolutionPlanNode) node);
      }
      else if (node instanceof SolutionSetPlanNode) {
        // this represents an access into the solution set index.
        // we do not create a vertex for the solution set here (we create the head at the workset place holder)
        
        // we adjust the joins / cogroups that go into the solution set here
        for (Channel c : node.getOutgoingChannels()) {
          DualInputPlanNode target = (DualInputPlanNode) c.getTarget();
          AbstractJobVertex accessingVertex = this.vertices.get(target);
          TaskConfig conf = new TaskConfig(accessingVertex.getConfiguration());
          int inputNum = c == target.getInput1() ? 0 : c == target.getInput2() ? 1 : -1;
          
          // sanity checks
          if (inputNum == -1) {
            throw new CompilerException();
          }
          
          // adjust the driver
          if (conf.getDriver().equals(MatchDriver.class)) {
            conf.setDriver(inputNum == 0 ? JoinWithSolutionSetFirstDriver.class : JoinWithSolutionSetSecondDriver.class);
          }
          else if (conf.getDriver().equals(CoGroupDriver.class)) {
            conf.setDriver(inputNum == 0 ? CoGroupWithSolutionSetFirstDriver.class : CoGroupWithSolutionSetSecondDriver.class);
          }
          else {
            throw new CompilerException("Found join with solution set using incompatible operator (only Join/CoGroup are valid).");
          }
        }
        
        // make sure we do not visit this node again. for that, we add a 'already seen' entry into one of the sets
        this.chainedTasks.put(node, ALREADY_VISITED_PLACEHOLDER);
        
        vertex = null;
      }
      else if (node instanceof WorksetPlanNode) {
        // create the iteration head here
        vertex = createWorksetIterationHead((WorksetPlanNode) node);
      }
      else {
        throw new CompilerException("Unrecognized node type: " + node.getClass().getName());
      }
    }
    catch (Exception e) {
      throw new CompilerException("Error translating node '" + node + "': " + e.getMessage(), e);
    }
    
    // check if a vertex was created, or if it was chained or skipped
    if (vertex != null) {
      // set degree of parallelism
      int pd = node.getDegreeOfParallelism();
      vertex.setNumberOfSubtasks(pd);
  
      // check whether this is the vertex with the highest degree of parallelism
      if (this.maxDegreeVertex == null || this.maxDegreeVertex.getNumberOfSubtasks() < pd) {
        this.maxDegreeVertex = vertex;
      }
  
      // set the number of tasks per instance
      if (node.getSubtasksPerInstance() >= 1) {
        vertex.setNumberOfSubtasksPerInstance(node.getSubtasksPerInstance());
      }
      
      // check whether this vertex is part of an iteration step function
      if (this.currentIteration != null) {
        // check that the task has the same DOP as the iteration as such
        PlanNode iterationNode = (PlanNode) this.currentIteration;
        if (iterationNode.getDegreeOfParallelism() < pd) {
          throw new CompilerException("Error: All functions that are part of an iteration must have the same, or a lower, degree-of-parallelism than the iteration operator.");
        }
        if (iterationNode.getSubtasksPerInstance() < node.getSubtasksPerInstance()) {
          throw new CompilerException("Error: All functions that are part of an iteration must have the same, or a lower, number of subtasks-per-node than the iteration operator.");
        }
        
        // store the id of the iterations the step functions participate in
        IterationDescriptor descr = this.iterations.get(this.currentIteration);
        new TaskConfig(vertex.getConfiguration()).setIterationId(descr.getId());
      }
  
      // store in the map
      this.vertices.put(node, vertex);
    }


    // returning true causes deeper descend
    return true;
  }


  /**
   * This method implements the post-visit during the depth-first traversal. When the post visit happens,
   * all of the descendants have been processed, so this method connects all of the current node's
   * predecessors to the current node.
   * 
   * @param node
   *        The node currently processed during the post-visit.
   * @see eu.stratosphere.util.Visitor#postVisit(eu.stratosphere.pact.common.plan.Visitable)
   */
  @Override
  public void postVisit(PlanNode node) {
    try {
      // --------- check special cases for which we handle post visit differently ----------
      
      // skip data source node (they have no inputs)
      // also, do nothing for union nodes, we connect them later when gathering the inputs for a task
      // solution sets have no input. the initial solution set input is connected when the iteration node is in its postVisit
      if (node instanceof SourcePlanNode || node instanceof NAryUnionPlanNode || node instanceof SolutionSetPlanNode) {
        return;
      }
      
      // check if we have an iteration. in that case, translate the step function now
      if (node instanceof IterationPlanNode) {
        // for now, prevent nested iterations
        if (this.currentIteration != null) {
          throw new CompilerException("Nested Iterations are not possible at the moment!");
        }
        this.currentIteration = (IterationPlanNode) node;
        this.currentIteration.acceptForStepFunction(this);
        this.currentIteration = null;
        
        // inputs for initial bulk partial solution or initial workset are already connected to the iteration head in the head's post visit.
        // connect the initial solution set now.
        if (node instanceof WorksetIterationPlanNode) {
          // connect the initial solution set
          WorksetIterationPlanNode wsNode = (WorksetIterationPlanNode) node;
          AbstractJobVertex headVertex = this.iterations.get(wsNode).getHeadTask();
          TaskConfig headConfig = new TaskConfig(headVertex.getConfiguration());
          int inputIndex = headConfig.getDriverStrategy().getNumInputs();
          headConfig.setIterationHeadSolutionSetInputIndex(inputIndex);
          translateChannel(wsNode.getInitialSolutionSetInput(), inputIndex, headVertex, headConfig, false);
        }
        
        return;
      }
      
      // --------- Main Path: Translation of channels ----------
      // 
      // There are two paths of translation: One for chained tasks (or merged tasks in general),
      // which do not have their own task vertex. The other for tasks that have their own vertex,
      // or are the primary task in a vertex (to which the others are chained).
      
      final AbstractJobVertex targetVertex = this.vertices.get(node);
      
      // check whether this node has its own task, or is merged with another one
      if (targetVertex == null) {
        // node's task is merged with another task. it is either chained, of a merged head vertex
        // from an iteration
        final TaskInChain chainedTask;
        if ((chainedTask = this.chainedTasks.get(node)) != null) {
          // Chained Task. Sanity check first...
          final Iterator<Channel> inConns = node.getInputs();
          if (!inConns.hasNext()) {
            throw new CompilerException("Bug: Found chained task with no input.");
          }
          final Channel inConn = inConns.next();
          
          if (inConns.hasNext()) {
            throw new CompilerException("Bug: Found a chained task with more than one input!");
          }
          if (inConn.getLocalStrategy() != null && inConn.getLocalStrategy() != LocalStrategy.NONE) {
            throw new CompilerException("Bug: Found a chained task with an input local strategy.");
          }
          if (inConn.getShipStrategy() != null && inConn.getShipStrategy() != ShipStrategyType.FORWARD) {
            throw new CompilerException("Bug: Found a chained task with an input ship strategy other than FORWARD.");
          }
  
          AbstractJobVertex container = chainedTask.getContainingVertex();
          
          if (container == null) {
            final PlanNode sourceNode = inConn.getSource();
            container = this.vertices.get(sourceNode);
            if (container == null) {
              // predecessor is itself chained
              container = this.chainedTasks.get(sourceNode).getContainingVertex();
              if (container == null) {
                throw new IllegalStateException("Bug: Chained task predecessor has not been assigned its containing vertex.");
              }
            } else {
              // predecessor is a proper task job vertex and this is the first chained task. add a forward connection entry.
              new TaskConfig(container.getConfiguration()).addOutputShipStrategy(ShipStrategyType.FORWARD);
            }
            chainedTask.setContainingVertex(container);
          }
          
          // add info about the input serializer type
          chainedTask.getTaskConfig().setInputSerializer(inConn.getSerializer(), 0);
          
          // update name of container task
          String containerTaskName = container.getName();
          if(containerTaskName.startsWith("CHAIN ")) {
            container.setName(containerTaskName+" -> "+chainedTask.getTaskName());
          } else {
            container.setName("CHAIN "+containerTaskName+" -> "+chainedTask.getTaskName());
          }
          
          this.chainedTasksInSequence.add(chainedTask);
          return;
        }
        else if (node instanceof BulkPartialSolutionPlanNode ||
            node instanceof WorksetPlanNode)
        {
          // merged iteration head task. the task that the head is merged with will take care of it
          return;
        } else {
          throw new CompilerException("Bug: Unrecognized merged task vertex.");
        }
      }
      
      // -------- Here, we translate non-chained tasks -------------
      
      // create the config that will contain all the description of the inputs
      final TaskConfig targetVertexConfig = new TaskConfig(targetVertex.getConfiguration());
            
      // get the inputs. if this node is the head of an iteration, we obtain the inputs from the
      // enclosing iteration node, because the inputs are the initial inputs to the iteration.
      final Iterator<Channel> inConns;
      if (node instanceof BulkPartialSolutionPlanNode) {
        inConns = ((BulkPartialSolutionPlanNode) node).getContainingIterationNode().getInputs();
        // because the partial solution has its own vertex, is has only one (logical) input.
        // note this in the task configuration
        targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
      } else if (node instanceof WorksetPlanNode) {
        WorksetPlanNode wspn = (WorksetPlanNode) node;
        // input that is the initial workset
        inConns = Collections.singleton(wspn.getContainingIterationNode().getInput2()).iterator();
        
        // because we have a stand-alone (non-merged) workset iteration head, the initial workset will
        // be input 0 and the solution set will be input 1
        targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
        targetVertexConfig.setIterationHeadSolutionSetInputIndex(1);
      } else {
        inConns = node.getInputs();
      }
      if (!inConns.hasNext()) {
        throw new CompilerException("Bug: Found a non-source task with no input.");
      }
      
      int inputIndex = 0;
      while (inConns.hasNext()) {
        Channel input = inConns.next();
        inputIndex += translateChannel(input, inputIndex, targetVertex, targetVertexConfig, false);
      }
      // broadcast variables
      int broadcastInputIndex = 0;
      for (NamedChannel broadcastInput: node.getBroadcastInputs()) {
        int broadcastInputIndexDelta = translateChannel(broadcastInput, broadcastInputIndex, targetVertex, targetVertexConfig, true);
        targetVertexConfig.setBroadcastInputName(broadcastInput.getName(), broadcastInputIndex);
        targetVertexConfig.setBroadcastInputSerializer(broadcastInput.getSerializer(), broadcastInputIndex);
        broadcastInputIndex += broadcastInputIndexDelta;
      }
    } catch (Exception e) {
      throw new CompilerException(
        "An error occurred while translating the optimized plan to a nephele JobGraph: " + e.getMessage(), e);
    }
  }
  
  private int translateChannel(Channel input, int inputIndex, AbstractJobVertex targetVertex,
      TaskConfig targetVertexConfig, boolean isBroadcast) throws Exception
  {
    final PlanNode inputPlanNode = input.getSource();
    final Iterator<Channel> allInChannels;
    
    if (inputPlanNode instanceof NAryUnionPlanNode) {
      allInChannels = ((NAryUnionPlanNode) inputPlanNode).getListOfInputs().iterator();
    }
    else if (inputPlanNode instanceof BulkPartialSolutionPlanNode) {
      if (this.vertices.get(inputPlanNode) == null) {
        // merged iteration head
        final BulkPartialSolutionPlanNode pspn = (BulkPartialSolutionPlanNode) inputPlanNode;
        final BulkIterationPlanNode iterationNode = pspn.getContainingIterationNode();
        
        // check if the iteration's input is a union
        if (iterationNode.getInput().getSource() instanceof NAryUnionPlanNode) {
          allInChannels = ((NAryUnionPlanNode) iterationNode.getInput().getSource()).getInputs();
        } else {
          allInChannels = Collections.singletonList(iterationNode.getInput()).iterator();
        }
        
        // also, set the index of the gate with the partial solution
        targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
      } else {
        // standalone iteration head
        allInChannels = Collections.singletonList(input).iterator();
      }
    } else if (inputPlanNode instanceof WorksetPlanNode) {
      if (this.vertices.get(inputPlanNode) == null) {
        // merged iteration head
        final WorksetPlanNode wspn = (WorksetPlanNode) inputPlanNode;
        final WorksetIterationPlanNode iterationNode = wspn.getContainingIterationNode();
        
        // check if the iteration's input is a union
        if (iterationNode.getInput2().getSource() instanceof NAryUnionPlanNode) {
          allInChannels = ((NAryUnionPlanNode) iterationNode.getInput2().getSource()).getInputs();
        } else {
          allInChannels = Collections.singletonList(iterationNode.getInput2()).iterator();
        }
        
        // also, set the index of the gate with the partial solution
        targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
      } else {
        // standalone iteration head
        allInChannels = Collections.singletonList(input).iterator();
      }
    } else if (inputPlanNode instanceof SolutionSetPlanNode) {
      // for now, skip connections with the solution set node, as this is a local index access (later to be parameterized here)
      // rather than a vertex connection
      return 0;
    } else {
      allInChannels = Collections.singletonList(input).iterator();
    }
    
    // check that the type serializer is consistent
    TypeSerializerFactory<?> typeSerFact = null;
    
    // accounting for channels on the dynamic path
    int numChannelsTotal = 0;
    int numChannelsDynamicPath = 0;
    int numDynamicSenderTasksTotal = 0;
    


    // expand the channel to all the union channels, in case there is a union operator at its source
    while (allInChannels.hasNext()) {
      final Channel inConn = allInChannels.next();
      
      // sanity check the common serializer
      if (typeSerFact == null) {
        typeSerFact = inConn.getSerializer();
      } else if (!typeSerFact.equals(inConn.getSerializer())) {
        throw new CompilerException("Conflicting types in union operator.");
      }
      
      final PlanNode sourceNode = inConn.getSource();
      AbstractJobVertex sourceVertex = this.vertices.get(sourceNode);
      TaskConfig sourceVertexConfig;


      if (sourceVertex == null) {
        // this predecessor is chained to another task or an iteration
        final TaskInChain chainedTask;
        final IterationDescriptor iteration;
        if ((chainedTask = this.chainedTasks.get(sourceNode)) != null) {
          // push chained task
          if (chainedTask.getContainingVertex() == null) {
            throw new IllegalStateException("Bug: Chained task has not been assigned its containing vertex when connecting.");
          }
          sourceVertex = chainedTask.getContainingVertex();
          sourceVertexConfig = chainedTask.getTaskConfig();
        } else if ((iteration = this.iterations.get(sourceNode)) != null) {
          // predecessor is an iteration
          sourceVertex = iteration.getHeadTask();
          sourceVertexConfig = iteration.getHeadFinalResultConfig();
        } else {
          throw new CompilerException("Bug: Could not resolve source node for a channel.");
        }
      } else {
        // predecessor is its own vertex
        sourceVertexConfig = new TaskConfig(sourceVertex.getConfiguration());
      }
      DistributionPattern pattern = connectJobVertices(
        inConn, inputIndex, sourceVertex, sourceVertexConfig, targetVertex, targetVertexConfig, isBroadcast);
      
      // accounting on channels and senders
      numChannelsTotal++;
      if (inConn.isOnDynamicPath()) {
        numChannelsDynamicPath++;
        numDynamicSenderTasksTotal += getNumberOfSendersPerReceiver(pattern,
          sourceVertex.getNumberOfSubtasks(), targetVertex.getNumberOfSubtasks());
      }
    }
    
    // for the iterations, check that the number of dynamic channels is the same as the number
    // of channels for this logical input. this condition is violated at the moment, if there
    // is a union between nodes on the static and nodes on the dynamic path
    if (numChannelsDynamicPath > 0 && numChannelsTotal != numChannelsDynamicPath) {
      throw new CompilerException("Error: It is currently not supported to union between dynamic and static path in an iteration.");
    }
    if (numDynamicSenderTasksTotal > 0) {
      if (isBroadcast) {
        targetVertexConfig.setBroadcastGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
      } else {
        targetVertexConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
      }
    }
    
    // the local strategy is added only once. in non-union case that is the actual edge,
    // in the union case, it is the edge between union and the target node
    addLocalInfoFromChannelToConfig(input, targetVertexConfig, inputIndex, isBroadcast);
    return 1;
  }
  
  private int getNumberOfSendersPerReceiver(DistributionPattern pattern, int numSenders, int numReceivers) {
    if (pattern == DistributionPattern.BIPARTITE) {
      return numSenders;
    } else if (pattern == DistributionPattern.POINTWISE) {
      if (numSenders != numReceivers) {
        if (numReceivers == 1) {
          return numSenders;
        }
        else if (numSenders == 1) {
          return 1;
        }
        else {
          throw new CompilerException("Error: A changing degree of parallelism is currently " +
              "not supported between tasks within an iteration.");
        }
      } else {
        return 1;
      }
    } else {
      throw new CompilerException("Unknown distribution pattern for channels: " + pattern);
    }
  }
  
  // ------------------------------------------------------------------------
  // Methods for creating individual vertices
  // ------------------------------------------------------------------------
  
  private JobTaskVertex createSingleInputVertex(SingleInputPlanNode node) throws CompilerException {
    final String taskName = node.getNodeName();
    final DriverStrategy ds = node.getDriverStrategy();
    
    // check, whether chaining is possible
    boolean chaining = false;
    {
      Channel inConn = node.getInput();
      PlanNode pred = inConn.getSource();
      chaining = ds.getPushChainDriverClass() != null &&
          !(pred instanceof NAryUnionPlanNode) &&  // first op after union is stand-alone, because union is merged
          !(pred instanceof BulkPartialSolutionPlanNode) &&  // partial solution merges anyways
          !(pred instanceof WorksetPlanNode) &&  // workset merges anyways
          !(pred instanceof IterationPlanNode) && // cannot chain with iteration heads currently
          inConn.getShipStrategy() == ShipStrategyType.FORWARD &&
          inConn.getLocalStrategy() == LocalStrategy.NONE &&
          pred.getOutgoingChannels().size() == 1 &&
          node.getDegreeOfParallelism() == pred.getDegreeOfParallelism() && 
          node.getSubtasksPerInstance() == pred.getSubtasksPerInstance() &&
          node.getBroadcastInputs().isEmpty();
      
      // cannot chain the nodes that produce the next workset or the next solution set, if they are not the
      // in a tail 
      if (this.currentIteration != null && this.currentIteration instanceof WorksetIterationPlanNode &&
          node.getOutgoingChannels().size() > 0)
      {
        WorksetIterationPlanNode wspn = (WorksetIterationPlanNode) this.currentIteration;
        if (wspn.getSolutionSetDeltaPlanNode() == pred || wspn.getNextWorkSetPlanNode() == pred) {
          chaining = false;
        }
      }
      // cannot chain the nodes that produce the next workset in a bulk iteration if a termination criterion follows
      if (this.currentIteration != null && this.currentIteration instanceof BulkIterationPlanNode)
      {
        BulkIterationPlanNode wspn = (BulkIterationPlanNode) this.currentIteration;
        if (node == wspn.getRootOfTerminationCriterion() && wspn.getRootOfStepFunction() == pred){
          chaining = false;
        }else if(node.getOutgoingChannels().size() > 0 &&(wspn.getRootOfStepFunction() == pred ||
            wspn.getRootOfTerminationCriterion() == pred)) {
          chaining = false;
        }
      }
    }
    
    final JobTaskVertex vertex;
    final TaskConfig config;
    
    if (chaining) {
      vertex = null;
      config = new TaskConfig(new Configuration());
      this.chainedTasks.put(node, new TaskInChain(ds.getPushChainDriverClass(), config, taskName));
    } else {
      // create task vertex
      vertex = new JobTaskVertex(taskName, this.jobGraph);
      vertex.setTaskClass( (this.currentIteration != null && node.isOnDynamicPath()) ? IterationIntermediatePactTask.class : RegularPactTask.class);
      
      config = new TaskConfig(vertex.getConfiguration());
      config.setDriver(ds.getDriverClass());
    }
    
    // set user code
    config.setStubWrapper(node.getPactContract().getUserCodeWrapper());
    config.setStubParameters(node.getPactContract().getParameters());
    
    // set the driver strategy
    config.setDriverStrategy(ds);
    if (node.getComparator() != null) {
      config.setDriverComparator(node.getComparator(), 0);
    }
    
    // assign memory, file-handles, etc.
    assignDriverResources(node, config);
    return vertex;
  }


  private JobTaskVertex createDualInputVertex(DualInputPlanNode node) throws CompilerException {
    final String taskName = node.getNodeName();
    final DriverStrategy ds = node.getDriverStrategy();
    final JobTaskVertex vertex = new JobTaskVertex(taskName, this.jobGraph);
    final TaskConfig config = new TaskConfig(vertex.getConfiguration());
    vertex.setTaskClass( (this.currentIteration != null && node.isOnDynamicPath()) ? IterationIntermediatePactTask.class : RegularPactTask.class);
    
    // set user code
    config.setStubWrapper(node.getPactContract().getUserCodeWrapper());
    config.setStubParameters(node.getPactContract().getParameters());
    
    // set the driver strategy
    config.setDriver(ds.getDriverClass());
    config.setDriverStrategy(ds);
    if (node.getComparator1() != null) {
      config.setDriverComparator(node.getComparator1(), 0);
    }
    if (node.getComparator2() != null) {
      config.setDriverComparator(node.getComparator2(), 1);
    }
    if (node.getPairComparator() != null) {
      config.setDriverPairComparator(node.getPairComparator());
    }
    
    // assign memory, file-handles, etc.
    assignDriverResources(node, config);
    return vertex;
  }


  private JobInputVertex createDataSourceVertex(SourcePlanNode node) throws CompilerException {
    final JobInputVertex vertex = new JobInputVertex(node.getNodeName(), this.jobGraph);
    final TaskConfig config = new TaskConfig(vertex.getConfiguration());
    
    // set task class
    @SuppressWarnings("unchecked")
    final Class<AbstractInputTask<?>> clazz = (Class<AbstractInputTask<?>>) (Class<?>) DataSourceTask.class;
    vertex.setInputClass(clazz);


    // set user code
    config.setStubWrapper(node.getPactContract().getUserCodeWrapper());
    config.setStubParameters(node.getPactContract().getParameters());
    
    config.setOutputSerializer(node.getSerializer());
    return vertex;
  }


  private AbstractJobOutputVertex createDataSinkVertex(SinkPlanNode node) throws CompilerException {
    final JobOutputVertex vertex = new JobOutputVertex(node.getNodeName(), this.jobGraph);
    final TaskConfig config = new TaskConfig(vertex.getConfiguration());
    
    vertex.setOutputClass(DataSinkTask.class);
    vertex.getConfiguration().setInteger(DataSinkTask.DEGREE_OF_PARALLELISM_KEY, node.getDegreeOfParallelism());
    
    // set user code
    config.setStubWrapper(node.getPactContract().getUserCodeWrapper());
    config.setStubParameters(node.getPactContract().getParameters());
    
    return vertex;
  }
  
  private JobTaskVertex createBulkIterationHead(BulkPartialSolutionPlanNode pspn) {
    // get the bulk iteration that corresponds to this partial solution node
    final BulkIterationPlanNode iteration = pspn.getContainingIterationNode();
    
    // check whether we need an individual vertex for the partial solution, or whether we
    // attach ourselves to the vertex of the parent node. We can combine the head with a node of 
    // the step function, if
    // 1) There is one parent that the partial solution connects to via a forward pattern and no
    //    local strategy
    // 2) DOP and the number of subtasks per instance does not change
    // 3) That successor is not a union
    // 4) That successor is not itself the last node of the step function
    // 5) There is no local strategy on the edge for the initial partial solution, as
    //    this translates to a local strategy that would only be executed in the first iteration
    
    final boolean merge;
    if (mergeIterationAuxTasks && pspn.getOutgoingChannels().size() == 1) {
      final Channel c = pspn.getOutgoingChannels().get(0);
      final PlanNode successor = c.getTarget();
      merge = c.getShipStrategy() == ShipStrategyType.FORWARD &&
          c.getLocalStrategy() == LocalStrategy.NONE &&
          c.getTempMode() == TempMode.NONE &&
          successor.getDegreeOfParallelism() == pspn.getDegreeOfParallelism() &&
          successor.getSubtasksPerInstance() == pspn.getSubtasksPerInstance() &&
          !(successor instanceof NAryUnionPlanNode) &&
          successor != iteration.getRootOfStepFunction() &&
          iteration.getInput().getLocalStrategy() == LocalStrategy.NONE;
    } else {
      merge = false;
    }
    
    // create or adopt the head vertex
    final JobTaskVertex toReturn;
    final JobTaskVertex headVertex;
    final TaskConfig headConfig;
    if (merge) {
      final PlanNode successor = pspn.getOutgoingChannels().get(0).getTarget();
      headVertex = (JobTaskVertex) this.vertices.get(successor);
      
      if (headVertex == null) {
        throw new CompilerException(
          "Bug: Trying to merge solution set with its sucessor, but successor has not been created.");
      }
      
      // reset the vertex type to iteration head
      headVertex.setTaskClass(IterationHeadPactTask.class);
      headConfig = new TaskConfig(headVertex.getConfiguration());
      toReturn = null;
    } else {
      // instantiate the head vertex and give it a no-op driver as the driver strategy.
      // everything else happens in the post visit, after the input (the initial partial solution)
      // is connected.
      headVertex = new JobTaskVertex("PartialSolution ("+iteration.getNodeName()+")", this.jobGraph);
      headVertex.setTaskClass(IterationHeadPactTask.class);
      headConfig = new TaskConfig(headVertex.getConfiguration());
      headConfig.setDriver(NoOpDriver.class);
      toReturn = headVertex;
    }
    
    // create the iteration descriptor and the iteration to it
    IterationDescriptor descr = this.iterations.get(iteration);
    if (descr == null) {
      throw new CompilerException("Bug: Iteration descriptor was not created at when translating the iteration node.");
    }
    descr.setHeadTask(headVertex, headConfig);
    
    return toReturn;
  }
  
  private JobTaskVertex createWorksetIterationHead(WorksetPlanNode wspn) {
    // get the bulk iteration that corresponds to this partial solution node
    final WorksetIterationPlanNode iteration = wspn.getContainingIterationNode();
    
    // check whether we need an individual vertex for the partial solution, or whether we
    // attach ourselves to the vertex of the parent node. We can combine the head with a node of 
    // the step function, if
    // 1) There is one parent that the partial solution connects to via a forward pattern and no
    //    local strategy
    // 2) DOP and the number of subtasks per instance does not change
    // 3) That successor is not a union
    // 4) That successor is not itself the last node of the step function
    // 5) There is no local strategy on the edge for the initial workset, as
    //    this translates to a local strategy that would only be executed in the first superstep
    
    final boolean merge;
    if (mergeIterationAuxTasks && wspn.getOutgoingChannels().size() == 1) {
      final Channel c = wspn.getOutgoingChannels().get(0);
      final PlanNode successor = c.getTarget();
      merge = c.getShipStrategy() == ShipStrategyType.FORWARD &&
          c.getLocalStrategy() == LocalStrategy.NONE &&
          c.getTempMode() == TempMode.NONE &&
          successor.getDegreeOfParallelism() == wspn.getDegreeOfParallelism() &&
          successor.getSubtasksPerInstance() == wspn.getSubtasksPerInstance() &&
          !(successor instanceof NAryUnionPlanNode) &&
          successor != iteration.getNextWorkSetPlanNode() &&
          iteration.getInitialWorksetInput().getLocalStrategy() == LocalStrategy.NONE;
    } else {
      merge = false;
    }
    
    // create or adopt the head vertex
    final JobTaskVertex toReturn;
    final JobTaskVertex headVertex;
    final TaskConfig headConfig;
    if (merge) {
      final PlanNode successor = wspn.getOutgoingChannels().get(0).getTarget();
      headVertex = (JobTaskVertex) this.vertices.get(successor);
      
      if (headVertex == null) {
        throw new CompilerException(
          "Bug: Trying to merge solution set with its sucessor, but successor has not been created.");
      }
      
      // reset the vertex type to iteration head
      headVertex.setTaskClass(IterationHeadPactTask.class);
      headConfig = new TaskConfig(headVertex.getConfiguration());
      toReturn = null;
    } else {
      // instantiate the head vertex and give it a no-op driver as the driver strategy.
      // everything else happens in the post visit, after the input (the initial partial solution)
      // is connected.
      headVertex = new JobTaskVertex("IterationHead("+iteration.getNodeName()+")", this.jobGraph);
      headVertex.setTaskClass(IterationHeadPactTask.class);
      headConfig = new TaskConfig(headVertex.getConfiguration());
      headConfig.setDriver(NoOpDriver.class);
      toReturn = headVertex;
    }
    
    // create the iteration descriptor and the iteration to it
    IterationDescriptor descr = this.iterations.get(iteration);
    if (descr == null) {
      throw new CompilerException("Bug: Iteration descriptor was not created at when translating the iteration node.");
    }
    descr.setHeadTask(headVertex, headConfig);
    
    return toReturn;
  }
  
  private void assignDriverResources(PlanNode node, TaskConfig config) {
    final long mem = node.getMemoryPerSubTask();
    if (mem > 0) {
      config.setMemoryDriver(mem);
      config.setFilehandlesDriver(this.defaultMaxFan);
      config.setSpillingThresholdDriver(this.defaultSortSpillingThreshold);
    }
  }
  
  private void assignLocalStrategyResources(Channel c, TaskConfig config, int inputNum) {
    if (c.getMemoryLocalStrategy() > 0) {
      config.setMemoryInput(inputNum, c.getMemoryLocalStrategy());
      config.setFilehandlesInput(inputNum, this.defaultMaxFan);
      config.setSpillingThresholdInput(inputNum, this.defaultSortSpillingThreshold);
    }
  }


  // ------------------------------------------------------------------------
  // Connecting Vertices
  // ------------------------------------------------------------------------


  /**
   * NOTE: The channel for global and local strategies are different if we connect a union. The global strategy
   * channel is then the channel into the union node, the local strategy channel the one from the union to the
   * actual target operator.
   *
   * @param channelForGlobalStrategy
   * @param channelForLocalStrategy
   * @param inputNumber
   * @param sourceVertex
   * @param sourceConfig
   * @param targetVertex
   * @param targetConfig
   * @throws JobGraphDefinitionException
   * @throws CompilerException
   */
  private DistributionPattern connectJobVertices(Channel channel, int inputNumber,
      final AbstractJobVertex sourceVertex, final TaskConfig sourceConfig,
      final AbstractJobVertex targetVertex, final TaskConfig targetConfig, boolean isBroadcast)
  throws JobGraphDefinitionException, CompilerException
  {
    // ------------ connect the vertices to the job graph --------------
    final ChannelType channelType;
    final DistributionPattern distributionPattern;


    switch (channel.getShipStrategy()) {
      case FORWARD:
      case PARTITION_LOCAL_HASH:
        distributionPattern = DistributionPattern.POINTWISE;
        channelType = ChannelType.NETWORK;
        break;
      case PARTITION_RANDOM:
      case BROADCAST:
      case PARTITION_HASH:
      case PARTITION_RANGE:
        distributionPattern = DistributionPattern.BIPARTITE;
        channelType = ChannelType.NETWORK;
        break;
      default:
        throw new RuntimeException("Unknown runtime ship strategy: " + channel.getShipStrategy());
    }
    
    sourceVertex.connectTo(targetVertex, channelType, distributionPattern);


    // -------------- configure the source task's ship strategy strategies in task config --------------
    final int outputIndex = sourceConfig.getNumOutputs();
    sourceConfig.addOutputShipStrategy(channel.getShipStrategy());
    if (outputIndex == 0) {
      sourceConfig.setOutputSerializer(channel.getSerializer());
    }
    if (channel.getShipStrategyComparator() != null) {
      sourceConfig.setOutputComparator(channel.getShipStrategyComparator(), outputIndex);
    }
    
    if (channel.getShipStrategy() == ShipStrategyType.PARTITION_RANGE) {
      
      final DataDistribution dataDistribution = channel.getDataDistribution();
      if(dataDistribution != null) {
        sourceConfig.setOutputDataDistribution(dataDistribution, outputIndex);
      } else {
        throw new RuntimeException("Range partitioning requires data distribution");
        // TODO: inject code and configuration for automatic histogram generation
      }
    }
//    if (targetContract instanceof GenericDataSink) {
//      final DataDistribution distri = ((GenericDataSink) targetContract).getDataDistribution();
//      if (distri != null) {
//        configForOutputShipStrategy.setOutputDataDistribution(distri);
//      }
//    }
    
    // ---------------- configure the receiver -------------------
    if (isBroadcast) {
      targetConfig.addBroadcastInputToGroup(inputNumber);
    } else {
      targetConfig.addInputToGroup(inputNumber);
    }
    return distributionPattern;
  }
  
  private void addLocalInfoFromChannelToConfig(Channel channel, TaskConfig config, int inputNum, boolean isBroadcastChannel) {
    // serializer
    if (isBroadcastChannel) {
      config.setBroadcastInputSerializer(channel.getSerializer(), inputNum);
      
      if (channel.getLocalStrategy() != LocalStrategy.NONE || (channel.getTempMode() != null && channel.getTempMode() != TempMode.NONE)) {
        throw new CompilerException("Found local strategy or temp mode on a broadcast variable channel.");
      } else {
        return;
      }
    } else {
      config.setInputSerializer(channel.getSerializer(), inputNum);
    }
    
    // local strategy
    if (channel.getLocalStrategy() != LocalStrategy.NONE) {
      config.setInputLocalStrategy(inputNum, channel.getLocalStrategy());
      if (channel.getLocalStrategyComparator() != null) {
        config.setInputComparator(channel.getLocalStrategyComparator(), inputNum);
      }
    }
    
    assignLocalStrategyResources(channel, config, inputNum);
    
    // materialization / caching
    if (channel.getTempMode() != null) {
      final TempMode tm = channel.getTempMode();


      boolean needsMemory = false;
      if (tm.breaksPipeline()) {
        config.setInputAsynchronouslyMaterialized(inputNum, true);
        needsMemory = true;
      }
      if (tm.isCached()) {
        config.setInputCached(inputNum, true);
        needsMemory = true;
      }
      
      if (needsMemory) {
        // sanity check
        if (tm == null || tm == TempMode.NONE || channel.getTempMemory() < 1) {
          throw new CompilerException("Bug in compiler: Inconsistent description of input materialization.");
        }
        config.setInputMaterializationMemory(inputNum, channel.getTempMemory());
      }
    }
  }
  
  private void finalizeBulkIteration(IterationDescriptor descr) {
    
    final BulkIterationPlanNode bulkNode = (BulkIterationPlanNode) descr.getIterationNode();
    final JobTaskVertex headVertex = descr.getHeadTask();
    final TaskConfig headConfig = new TaskConfig(headVertex.getConfiguration());
    final TaskConfig headFinalOutputConfig = descr.getHeadFinalResultConfig();
    
    // ------------ finalize the head config with the final outputs and the sync gate ------------
    final int numStepFunctionOuts = headConfig.getNumOutputs();
    final int numFinalOuts = headFinalOutputConfig.getNumOutputs();
    headConfig.setIterationHeadFinalOutputConfig(headFinalOutputConfig);
    headConfig.setIterationHeadIndexOfSyncOutput(numStepFunctionOuts + numFinalOuts);
    final long memForBackChannel = bulkNode.getMemoryPerSubTask();
    if (memForBackChannel <= 0) {
      throw new CompilerException("Bug: No memory has been assigned to the iteration back channel.");
    }
    headConfig.setBackChannelMemory(memForBackChannel);
    
    // --------------------------- create the sync task ---------------------------
    final JobOutputVertex sync = new JobOutputVertex("Sync(" +
          bulkNode.getNodeName() + ")", this.jobGraph);
    sync.setOutputClass(IterationSynchronizationSinkTask.class);
    sync.setNumberOfSubtasks(1);
    this.auxVertices.add(sync);
    
    final TaskConfig syncConfig = new TaskConfig(sync.getConfiguration());
    syncConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, headVertex.getNumberOfSubtasks());


    // set the number of iteration / convergence criterion for the sync
    final int maxNumIterations = bulkNode.getIterationNode().getIterationContract().getMaximumNumberOfIterations();
    if (maxNumIterations < 1) {
      throw new CompilerException("Cannot create bulk iteration with unspecified maximum number of iterations.");
    }
    syncConfig.setNumberOfIterations(maxNumIterations);
    
    // connect the sync task
    try {
      headVertex.connectTo(sync, ChannelType.NETWORK, DistributionPattern.POINTWISE);
    } catch (JobGraphDefinitionException e) {
      throw new CompilerException("Bug: Cannot connect head vertex to sync task.");
    }
    
    
    // ----------------------------- create the iteration tail ------------------------------
    
    final PlanNode rootOfTerminationCriterion = bulkNode.getRootOfTerminationCriterion();
    final PlanNode rootOfStepFunction = bulkNode.getRootOfStepFunction();
    final TaskConfig tailConfig;
    
    JobTaskVertex rootOfStepFunctionVertex = (JobTaskVertex) this.vertices.get(rootOfStepFunction);
    if (rootOfStepFunctionVertex == null) {
      // last op is chained
      final TaskInChain taskInChain = this.chainedTasks.get(rootOfStepFunction);
      if (taskInChain == null) {
        throw new CompilerException("Bug: Tail of step function not found as vertex or chained task.");
      }
      rootOfStepFunctionVertex = (JobTaskVertex) taskInChain.getContainingVertex();


      // the fake channel is statically typed to pact record. no data is sent over this channel anyways.
      tailConfig = taskInChain.getTaskConfig();
    } else {
      tailConfig = new TaskConfig(rootOfStepFunctionVertex.getConfiguration());
    }
    
    tailConfig.setIsWorksetUpdate();
    
    // No following termination criterion
    if(rootOfStepFunction.getOutgoingChannels().isEmpty()) {
      
      rootOfStepFunctionVertex.setTaskClass(IterationTailPactTask.class);
      
      tailConfig.setOutputSerializer(bulkNode.getSerializerForIterationChannel());
      tailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
      
      // create the fake output task
      JobOutputVertex fakeTail = new JobOutputVertex("Fake Tail", this.jobGraph);
      fakeTail.setOutputClass(FakeOutputTask.class);
      fakeTail.setNumberOfSubtasks(headVertex.getNumberOfSubtasks());
      fakeTail.setNumberOfSubtasksPerInstance(headVertex.getNumberOfSubtasksPerInstance());
      this.auxVertices.add(fakeTail);
      
      // connect the fake tail
      try {
        rootOfStepFunctionVertex.connectTo(fakeTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
      } catch (JobGraphDefinitionException e) {
        throw new CompilerException("Bug: Cannot connect iteration tail vertex fake tail task");
      }
      
    }
    
    
    // create the fake output task for termination criterion, if needed
    final TaskConfig tailConfigOfTerminationCriterion;
    // If we have a termination criterion and it is not an intermediate node
    if(rootOfTerminationCriterion != null && rootOfTerminationCriterion.getOutgoingChannels().isEmpty()) {
      JobTaskVertex rootOfTerminationCriterionVertex = (JobTaskVertex) this.vertices.get(rootOfTerminationCriterion);
      
      
      if (rootOfTerminationCriterionVertex == null) {
        // last op is chained
        final TaskInChain taskInChain = this.chainedTasks.get(rootOfTerminationCriterion);
        if (taskInChain == null) {
          throw new CompilerException("Bug: Tail of termination criterion not found as vertex or chained task.");
        }
        rootOfTerminationCriterionVertex = (JobTaskVertex) taskInChain.getContainingVertex();


        // the fake channel is statically typed to pact record. no data is sent over this channel anyways.
        tailConfigOfTerminationCriterion = taskInChain.getTaskConfig();
      } else {
        tailConfigOfTerminationCriterion = new TaskConfig(rootOfTerminationCriterionVertex.getConfiguration());
      }
      
      rootOfTerminationCriterionVertex.setTaskClass(IterationTailPactTask.class);
      // Hack
      tailConfigOfTerminationCriterion.setIsSolutionSetUpdate();
      tailConfigOfTerminationCriterion.setOutputSerializer(bulkNode.getSerializerForIterationChannel());
      tailConfigOfTerminationCriterion.addOutputShipStrategy(ShipStrategyType.FORWARD);
      
      JobOutputVertex fakeTailTerminationCriterion = new JobOutputVertex("Fake Tail for Termination Criterion", this.jobGraph);
      fakeTailTerminationCriterion.setOutputClass(FakeOutputTask.class);
      fakeTailTerminationCriterion.setNumberOfSubtasks(headVertex.getNumberOfSubtasks());
      fakeTailTerminationCriterion.setNumberOfSubtasksPerInstance(headVertex.getNumberOfSubtasksPerInstance());
      this.auxVertices.add(fakeTailTerminationCriterion);
    
      // connect the fake tail
      try {
        rootOfTerminationCriterionVertex.connectTo(fakeTailTerminationCriterion, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
      } catch (JobGraphDefinitionException e) {
        throw new CompilerException("Bug: Cannot connect iteration tail vertex fake tail task for termination criterion");
      }
      
      // tell the head that it needs to wait for the solution set updates
      headConfig.setWaitForSolutionSetUpdate();
    }
    
    // ------------------- register the aggregators -------------------
    AggregatorRegistry aggs = bulkNode.getIterationNode().getIterationContract().getAggregators();
    Collection<AggregatorWithName<?>> allAggregators = aggs.getAllRegisteredAggregators();
    
    headConfig.addIterationAggregators(allAggregators);
    syncConfig.addIterationAggregators(allAggregators);
    
    String convAggName = aggs.getConvergenceCriterionAggregatorName();
    Class<? extends ConvergenceCriterion<?>> convCriterion = aggs.getConvergenceCriterion();
    
    if (convCriterion != null || convAggName != null) {
      if (convCriterion == null) {
        throw new CompilerException("Error: Convergence criterion aggregator set, but criterion is null.");
      }
      if (convAggName == null) {
        throw new CompilerException("Error: Aggregator convergence criterion set, but aggregator is null.");
      }
      
      syncConfig.setConvergenceCriterion(convAggName, convCriterion);
    }
  }
  
  private void finalizeWorksetIteration(IterationDescriptor descr) {
    final WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) descr.getIterationNode();
    final JobTaskVertex headVertex = descr.getHeadTask();
    final TaskConfig headConfig = new TaskConfig(headVertex.getConfiguration());
    final TaskConfig headFinalOutputConfig = descr.getHeadFinalResultConfig();
    
    // ------------ finalize the head config with the final outputs and the sync gate ------------
    {
      final int numStepFunctionOuts = headConfig.getNumOutputs();
      final int numFinalOuts = headFinalOutputConfig.getNumOutputs();
      headConfig.setIterationHeadFinalOutputConfig(headFinalOutputConfig);
      headConfig.setIterationHeadIndexOfSyncOutput(numStepFunctionOuts + numFinalOuts);
      final long mem = iterNode.getMemoryPerSubTask();
      if (mem <= 0) {
        throw new CompilerException("Bug: No memory has been assigned to the workset iteration.");
      }
      
      headConfig.setIsWorksetIteration();
      headConfig.setBackChannelMemory(mem / 2);
      headConfig.setSolutionSetMemory(mem / 2);
      
      // set the solution set serializer and comparator
      headConfig.setSolutionSetSerializer(iterNode.getSolutionSetSerializer());
      headConfig.setSolutionSetComparator(iterNode.getSolutionSetComparator());
    }
    
    // --------------------------- create the sync task ---------------------------
    final TaskConfig syncConfig;
    {
      final JobOutputVertex sync = new JobOutputVertex("Sync (" +
            iterNode.getNodeName() + ")", this.jobGraph);
      sync.setOutputClass(IterationSynchronizationSinkTask.class);
      sync.setNumberOfSubtasks(1);
      this.auxVertices.add(sync);
      
      syncConfig = new TaskConfig(sync.getConfiguration());
      syncConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, headVertex.getNumberOfSubtasks());
  
      // set the number of iteration / convergence criterion for the sync
      final int maxNumIterations = iterNode.getIterationNode().getIterationContract().getMaximumNumberOfIterations();
      if (maxNumIterations < 1) {
        throw new CompilerException("Cannot create workset iteration with unspecified maximum number of iterations.");
      }
      syncConfig.setNumberOfIterations(maxNumIterations);
      
      // connect the sync task
      try {
        headVertex.connectTo(sync, ChannelType.NETWORK, DistributionPattern.POINTWISE);
      } catch (JobGraphDefinitionException e) {
        throw new CompilerException("Bug: Cannot connect head vertex to sync task.");
      }
    }
    
    // ----------------------------- create the iteration tails -----------------------------
    // ----------------------- for next workset and solution set delta-----------------------


    {
      // we have three possible cases:
      // 1) Two tails, one for workset update, one for solution set update
      // 2) One tail for workset update, solution set update happens in an intermediate task
      // 3) One tail for solution set update, workset update happens in an intermediate task
      
      final PlanNode nextWorksetNode = iterNode.getNextWorkSetPlanNode();
      final PlanNode solutionDeltaNode = iterNode.getSolutionSetDeltaPlanNode();
      
      final boolean hasWorksetTail = nextWorksetNode.getOutgoingChannels().isEmpty();
      final boolean hasSolutionSetTail = (!iterNode.isImmediateSolutionSetUpdate()) || (!hasWorksetTail);
      
      {
        // get the vertex for the workset update
        final TaskConfig worksetTailConfig;
        JobTaskVertex nextWorksetVertex = (JobTaskVertex) this.vertices.get(nextWorksetNode);
        if (nextWorksetVertex == null) {
          // nextWorksetVertex is chained
          TaskInChain taskInChain = this.chainedTasks.get(nextWorksetNode);
          if (taskInChain == null) {
            throw new CompilerException("Bug: Next workset node not found as vertex or chained task.");
          }
          nextWorksetVertex = (JobTaskVertex) taskInChain.getContainingVertex();
          worksetTailConfig = taskInChain.getTaskConfig();
        } else {
          worksetTailConfig = new TaskConfig(nextWorksetVertex.getConfiguration());
        }
        
        // mark the node to perform workset updates
        worksetTailConfig.setIsWorksetIteration();
        worksetTailConfig.setIsWorksetUpdate();
        
        if (hasWorksetTail) {
          nextWorksetVertex.setTaskClass(IterationTailPactTask.class);
          
          worksetTailConfig.setOutputSerializer(iterNode.getWorksetSerializer());
          worksetTailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
          
          // create the fake output task
          JobOutputVertex fakeTail = new JobOutputVertex("Fake Tail", this.jobGraph);
          fakeTail.setOutputClass(FakeOutputTask.class);
          fakeTail.setNumberOfSubtasks(headVertex.getNumberOfSubtasks());
          fakeTail.setNumberOfSubtasksPerInstance(headVertex.getNumberOfSubtasksPerInstance());
          this.auxVertices.add(fakeTail);
          
          // connect the fake tail
          try {
            nextWorksetVertex.connectTo(fakeTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
          } catch (JobGraphDefinitionException e) {
            throw new CompilerException("Bug: Cannot connect iteration tail vertex fake tail task");
          }
        }
      }
      {
        final TaskConfig solutionDeltaConfig;
        JobTaskVertex solutionDeltaVertex = (JobTaskVertex) this.vertices.get(solutionDeltaNode);
        if (solutionDeltaVertex == null) {
          // last op is chained
          TaskInChain taskInChain = this.chainedTasks.get(solutionDeltaNode);
          if (taskInChain == null) {
            throw new CompilerException("Bug: Solution Set Delta not found as vertex or chained task.");
          }
          solutionDeltaVertex = (JobTaskVertex) taskInChain.getContainingVertex();
          solutionDeltaConfig = taskInChain.getTaskConfig();
        } else {
          solutionDeltaConfig = new TaskConfig(solutionDeltaVertex.getConfiguration());
        }
        
        solutionDeltaConfig.setIsWorksetIteration();
        solutionDeltaConfig.setIsSolutionSetUpdate();
        
        if (hasSolutionSetTail) {
          solutionDeltaVertex.setTaskClass(IterationTailPactTask.class);
          
          solutionDeltaConfig.setOutputSerializer(iterNode.getSolutionSetSerializer());
          solutionDeltaConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
  
          // create the fake output task
          JobOutputVertex fakeTail = new JobOutputVertex("Fake Tail", this.jobGraph);
          fakeTail.setOutputClass(FakeOutputTask.class);
          fakeTail.setNumberOfSubtasks(headVertex.getNumberOfSubtasks());
          fakeTail.setNumberOfSubtasksPerInstance(headVertex.getNumberOfSubtasksPerInstance());
          this.auxVertices.add(fakeTail);
          
          // connect the fake tail
          try {
            solutionDeltaVertex.connectTo(fakeTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
          } catch (JobGraphDefinitionException e) {
            throw new CompilerException("Bug: Cannot connect iteration tail vertex fake tail task");
          }
          
          // tell the head that it needs to wait for the solution set updates
          headConfig.setWaitForSolutionSetUpdate();
        }
        else {
          // no tail, intermediate update. must be immediate update
          if (!iterNode.isImmediateSolutionSetUpdate()) {
            throw new CompilerException("A solution set update without dedicated tail is not set to perform immediate updates.");
          }
          solutionDeltaConfig.setIsSolutionSetUpdateWithoutReprobe();
        }
      }
    }
    
    // ------------------- register the aggregators -------------------
    AggregatorRegistry aggs = iterNode.getIterationNode().getIterationContract().getAggregators();
    Collection<AggregatorWithName<?>> allAggregators = aggs.getAllRegisteredAggregators();
    
    for (AggregatorWithName<?> agg : allAggregators) {
      if (agg.getName().equals(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME)) {
        throw new CompilerException("User defined aggregator used the same name as built-in workset " +
            "termination check aggregator: " + WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME);
      }
    }
    
    headConfig.addIterationAggregators(allAggregators);
    syncConfig.addIterationAggregators(allAggregators);
    
    String convAggName = aggs.getConvergenceCriterionAggregatorName();
    Class<? extends ConvergenceCriterion<?>> convCriterion = aggs.getConvergenceCriterion();
    
    if (convCriterion != null || convAggName != null) {
      throw new CompilerException("Error: Cannot use custom convergence criterion with workset iteration. Workset iterations have implicit convergence criterion where workset is empty.");
    }
    
    headConfig.addIterationAggregator(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME, LongSumAggregator.class);
    syncConfig.addIterationAggregator(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME, LongSumAggregator.class);
    syncConfig.setConvergenceCriterion(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME, WorksetEmptyConvergenceCriterion.class);
  }


  // -------------------------------------------------------------------------------------
  // Descriptors for tasks / configurations that are chained or merged with other tasks
  // -------------------------------------------------------------------------------------
  
  /**
   * Utility class that describes a task in a sequence of chained tasks. Chained tasks are tasks that run
   * together in one thread.
   */
  private static final class TaskInChain {
    
    private final Class<? extends ChainedDriver<?, ?>> chainedTask;
    
    private final TaskConfig taskConfig;
    
    private final String taskName;
    
    private AbstractJobVertex containingVertex;


    @SuppressWarnings("unchecked")
    TaskInChain(@SuppressWarnings("rawtypes") Class<? extends ChainedDriver> chainedTask, TaskConfig taskConfig, String taskName) {
      this.chainedTask = (Class<? extends ChainedDriver<?, ?>>) chainedTask;
      this.taskConfig = taskConfig;
      this.taskName = taskName;
    }
    
    public Class<? extends ChainedDriver<?, ?>> getChainedTask() {
      return this.chainedTask;
    }
    
    public TaskConfig getTaskConfig() {
      return this.taskConfig;
    }
    
    public String getTaskName() {
      return this.taskName;
    }
    
    public AbstractJobVertex getContainingVertex() {
      return this.containingVertex;
    }
    
    public void setContainingVertex(AbstractJobVertex containingVertex) {
      this.containingVertex = containingVertex;
    }
  }
  
  private static final class IterationDescriptor {
    
    private final IterationPlanNode iterationNode;
    
    private JobTaskVertex headTask;
    
    private TaskConfig headConfig;
    
    private TaskConfig  headFinalResultConfig;
    
    private final int id;


    public IterationDescriptor(IterationPlanNode iterationNode, int id) {
      this.iterationNode = iterationNode;
      this.id = id;
    }
    
    public IterationPlanNode getIterationNode() {
      return iterationNode;
    }
    
    public void setHeadTask(JobTaskVertex headTask, TaskConfig headConfig) {
      this.headTask = headTask;
      this.headFinalResultConfig = new TaskConfig(new Configuration());
      
      // check if we already had a configuration, for example if the solution set was 
      if (this.headConfig != null) {
        headConfig.getConfiguration().addAll(this.headConfig.getConfiguration());
      }
      
      this.headConfig = headConfig;
    }
    
    public JobTaskVertex getHeadTask() {
      return headTask;
    }
    
    public TaskConfig getHeadFinalResultConfig() {
      return headFinalResultConfig;
    }
    
    public int getId() {
      return this.id;
    }
  }
}
Source Code of eu.stratosphere.compiler.plantranslate.NepheleJobGraphGenerator$TaskInChain

Related Classes of eu.stratosphere.compiler.plantranslate.NepheleJobGraphGenerator$TaskInChain