Package org.apache.drill.exec.planner.fragment

Source Code of org.apache.drill.exec.planner.fragment.SimpleParallelizer

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.planner.fragment;

import java.util.Collection;
import java.util.List;

import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.exceptions.PhysicalOperatorSetupException;
import org.apache.drill.exec.exception.FragmentSetupException;
import org.apache.drill.exec.physical.base.FragmentRoot;
import org.apache.drill.exec.physical.base.PhysicalOperator;
import org.apache.drill.exec.planner.PhysicalPlanReader;
import org.apache.drill.exec.planner.fragment.Materializer.IndexedFragmentNode;
import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint;
import org.apache.drill.exec.proto.ExecProtos.FragmentHandle;
import org.apache.drill.exec.proto.ExecProtos.PlanFragment;
import org.apache.drill.exec.proto.UserBitShared.QueryId;
import org.apache.drill.exec.work.QueryWorkUnit;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;

/**
* The simple parallelizer determines the level of parallelization of a plan based on the cost of the underlying
* operations.  It doesn't take into account system load or other factors.  Based on the cost of the query, the
* parallelization for each major fragment will be determined.  Once the amount of parallelization is done, assignment
* is done based on round robin assignment ordered by operator affinity (locality) to available execution Drillbits.
*/
public class SimpleParallelizer {
  static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(SimpleParallelizer.class);
  private final Materializer materializer = new Materializer();

  /**
   * Generate a set of assigned fragments based on the provided planningSet. Do not allow parallelization stages to go
   * beyond the global max width.
   *
   * @param foremanNode     The driving/foreman node for this query.  (this node)
   * @param queryId         The queryId for this query.
   * @param activeEndpoints The list of endpoints to consider for inclusion in planning this query.
   * @param reader          Tool used to read JSON plans
   * @param rootNode        The root node of the PhysicalPlan that we will parallelizing.
   * @param planningSet     The set of queries with collected statistics that we'll work with.
   * @param globalMaxWidth  The maximum level or parallelization any stage of the query can do. Note that while this
   *                        might be the number of active Drillbits, realistically, this could be well beyond that
   *                        number of we want to do things like speed results return.
   * @return The list of generated PlanFragment protobuf objects to be assigned out to the individual nodes.
   * @throws ExecutionSetupException
   */
  public QueryWorkUnit getFragments(DrillbitEndpoint foremanNode, QueryId queryId, Collection<DrillbitEndpoint> activeEndpoints, PhysicalPlanReader reader, Fragment rootNode, PlanningSet planningSet,
                                    int globalMaxWidth) throws ExecutionSetupException {
    assignEndpoints(activeEndpoints, planningSet, globalMaxWidth);
    return generateWorkUnit(foremanNode, queryId, reader, rootNode, planningSet);
  }

  private QueryWorkUnit generateWorkUnit(DrillbitEndpoint foremanNode, QueryId queryId, PhysicalPlanReader reader, Fragment rootNode,
                                         PlanningSet planningSet) throws ExecutionSetupException {

    List<PlanFragment> fragments = Lists.newArrayList();

    PlanFragment rootFragment = null;
    FragmentRoot rootOperator = null;

    // now we generate all the individual plan fragments and associated assignments. Note, we need all endpoints
    // assigned before we can materialize, so we start a new loop here rather than utilizing the previous one.
    for (Wrapper wrapper : planningSet) {
      Fragment node = wrapper.getNode();
      Stats stats = node.getStats();
      final PhysicalOperator physicalOperatorRoot = node.getRoot();
      boolean isRootNode = rootNode == node;

      if (isRootNode && wrapper.getWidth() != 1)
        throw new FragmentSetupException(
            String.format(
                "Failure while trying to setup fragment.  The root fragment must always have parallelization one.  In the current case, the width was set to %d.",
                wrapper.getWidth()));
      // a fragment is self driven if it doesn't rely on any other exchanges.
      boolean isLeafFragment = node.getReceivingExchangePairs().size() == 0;

      // Create a minorFragment for each major fragment.
      for (int minorFragmentId = 0; minorFragmentId < wrapper.getWidth(); minorFragmentId++) {
        IndexedFragmentNode iNode = new IndexedFragmentNode(minorFragmentId, wrapper);
        PhysicalOperator op = physicalOperatorRoot.accept(materializer, iNode);
        Preconditions.checkArgument(op instanceof FragmentRoot);
        FragmentRoot root = (FragmentRoot) op;

        // get plan as JSON
        String plan;
        try {
          plan = reader.writeJson(root);
        } catch (JsonProcessingException e) {
          throw new FragmentSetupException("Failure while trying to convert fragment into json.", e);
        }

        FragmentHandle handle = FragmentHandle //
            .newBuilder() //
            .setMajorFragmentId(wrapper.getMajorFragmentId()) //
            .setMinorFragmentId(minorFragmentId) //
            .setQueryId(queryId) //
            .build();
        PlanFragment fragment = PlanFragment.newBuilder() //
            .setCpuCost(stats.getCpuCost()) //
            .setDiskCost(stats.getDiskCost()) //
            .setForeman(foremanNode) //
            .setMemoryCost(stats.getMemoryCost()) //
            .setNetworkCost(stats.getNetworkCost()) //
            .setFragmentJson(plan) //
            .setHandle(handle) //
            .setAssignment(wrapper.getAssignedEndpoint(minorFragmentId)) //
            .setLeafFragment(isLeafFragment) //
            .build();

        if (isRootNode) {
          logger.debug("Root fragment {}", fragment);
          rootFragment = fragment;
          rootOperator = root;
        } else {
          logger.debug("Remote fragment {}", fragment);
          fragments.add(fragment);
        }
      }
    }

    return new QueryWorkUnit(rootOperator, rootFragment, fragments);

  }

  private void assignEndpoints(Collection<DrillbitEndpoint> allNodes, PlanningSet planningSet,
                               int globalMaxWidth) throws PhysicalOperatorSetupException {
    // First we determine the amount of parallelization for a fragment. This will be between 1 and maxWidth based on
    // cost. (Later could also be based on cluster operation.) then we decide endpoints based on affinity (later this
    // could be based on endpoint load)
    for (Wrapper wrapper : planningSet) {

      Stats stats = wrapper.getStats();

      // figure out width.
      int width = Math.min(stats.getMaxWidth(), globalMaxWidth);
      float diskCost = stats.getDiskCost();
//      logger.debug("Frag max width: {} and diskCost: {}", stats.getMaxWidth(), diskCost);

      // TODO: right now we'll just assume that each task is cost 1 so we'll set the breadth at the lesser of the number
      // of tasks or the maximum width of the fragment.
      if (diskCost < width) {
        width = (int) diskCost;
      }

      if (width < 1) width = 1;
//      logger.debug("Setting width {} on fragment {}", width, wrapper);
      wrapper.setWidth(width);
      // figure out endpoint assignments. also informs the exchanges about their respective endpoints.
      wrapper.assignEndpoints(allNodes);
    }
  }
}
TOP

Related Classes of org.apache.drill.exec.planner.fragment.SimpleParallelizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.