Package org.apache.helix.controller.strategy

Source Code of org.apache.helix.controller.strategy.AutoRebalanceStrategy$DefaultPlacementScheme

package org.apache.helix.controller.strategy;

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

import org.apache.helix.HelixManager;
import org.apache.helix.ZNRecord;
import org.apache.helix.api.State;
import org.apache.helix.api.id.ParticipantId;
import org.apache.helix.api.id.PartitionId;
import org.apache.helix.api.id.ResourceId;
import org.apache.helix.model.ResourceAssignment;
import org.apache.log4j.Logger;

import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.collect.Lists;

public class AutoRebalanceStrategy {

  private static Logger logger = Logger.getLogger(AutoRebalanceStrategy.class);

  private final ResourceId _resourceId;
  private final List<PartitionId> _partitions;
  private final LinkedHashMap<State, Integer> _states;
  private final int _maximumPerNode;
  private final ReplicaPlacementScheme _placementScheme;

  private Map<ParticipantId, Node> _nodeMap;
  private List<Node> _liveNodesList;
  private Map<Integer, State> _stateMap;

  private Map<Replica, Node> _preferredAssignment;
  private Map<Replica, Node> _existingPreferredAssignment;
  private Map<Replica, Node> _existingNonPreferredAssignment;
  private Set<Replica> _orphaned;

  /**
   * Initialize this strategy for a resource
   * @param resourceName the resource for which an assignment will be computed
   * @param partitions the partition names for the resource
   * @param states the states and the number of replicas that should be in each state
   * @param maximumPerNode the maximum number of replicas any note can hold
   * @param placementScheme the scheme to use for preferred replica locations. If null, this is
   *          {@link DefaultPlacementScheme}
   */
  public AutoRebalanceStrategy(String resourceName, final List<String> partitions,
      final LinkedHashMap<String, Integer> states, int maximumPerNode,
      ReplicaPlacementScheme placementScheme) {
    _resourceId = ResourceId.from(resourceName);
    _partitions =
        Lists.newArrayList(Lists.transform(partitions, new Function<String, PartitionId>() {
          @Override
          public PartitionId apply(String input) {
            return PartitionId.from(input);
          }
        }));
    _states = new LinkedHashMap<State, Integer>();
    for (String state : states.keySet()) {
      _states.put(State.from(state), states.get(state));
    }
    _maximumPerNode = maximumPerNode;
    if (placementScheme != null) {
      _placementScheme = placementScheme;
    } else {
      _placementScheme = new DefaultPlacementScheme();
    }
  }

  /**
   * Initialize the strategy with a default placement scheme
   * @see #AutoRebalanceStrategy(String, List, LinkedHashMap, int, ReplicaPlacementScheme)
   */
  public AutoRebalanceStrategy(String resourceName, final List<String> partitions,
      final LinkedHashMap<String, Integer> states) {
    this(resourceName, partitions, states, Integer.MAX_VALUE, new DefaultPlacementScheme());
  }

  /**
   * Constructor to support logically-typed Helix components
   * @param resourceId the resource for which to compute an assignment
   * @param partitions the partitions of the resource
   * @param states the states and counts for each state
   * @param maximumPerNode the maximum number of replicas per node
   * @param placementScheme the scheme to use for preferred replica locations. If null, this is
   *          {@link DefaultPlacementScheme}
   */
  public AutoRebalanceStrategy(ResourceId resourceId, final List<PartitionId> partitions,
      final LinkedHashMap<State, Integer> states, int maximumPerNode,
      ReplicaPlacementScheme placementScheme) {
    _resourceId = resourceId;
    _partitions = partitions;
    _states = states;
    _maximumPerNode = maximumPerNode;
    if (placementScheme != null) {
      _placementScheme = placementScheme;
    } else {
      _placementScheme = new DefaultPlacementScheme();
    }
  }

  /**
   * Wrap {@link #computePartitionAssignment(List, Map, List)} with a function that takes concrete
   * types
   * @param liveNodes list of live participant ids
   * @param currentMapping map of partition id to map of participant id to state
   * @param allNodes list of all participant ids
   * @return the preference list and replica mapping
   */
  public ZNRecord typedComputePartitionAssignment(final List<ParticipantId> liveNodes,
      final Map<PartitionId, Map<ParticipantId, State>> currentMapping,
      final List<ParticipantId> allNodes) {
    Comparator<ParticipantId> nodeComparator = new NodeComparator();
    List<ParticipantId> sortedLiveNodes = new ArrayList<ParticipantId>(liveNodes);
    Collections.sort(sortedLiveNodes, nodeComparator);
    List<ParticipantId> sortedAllNodes = new ArrayList<ParticipantId>(allNodes);
    Collections.sort(sortedAllNodes, nodeComparator);
    List<String> sortedNodeNames =
        Lists.newArrayList(Lists.transform(sortedAllNodes, Functions.toStringFunction()));
    int numReplicas = countStateReplicas();
    ZNRecord znRecord = new ZNRecord(_resourceId.stringify());
    if (sortedLiveNodes.size() == 0) {
      return znRecord;
    }
    int distRemainder = (numReplicas * _partitions.size()) % sortedLiveNodes.size();
    int distFloor = (numReplicas * _partitions.size()) / sortedLiveNodes.size();
    _nodeMap = new HashMap<ParticipantId, Node>();
    _liveNodesList = new ArrayList<Node>();

    for (ParticipantId id : sortedAllNodes) {
      Node node = new Node(id);
      node.capacity = 0;
      node.hasCeilingCapacity = false;
      _nodeMap.put(id, node);
    }
    for (int i = 0; i < sortedLiveNodes.size(); i++) {
      boolean usingCeiling = false;
      int targetSize = (_maximumPerNode > 0) ? Math.min(distFloor, _maximumPerNode) : distFloor;
      if (distRemainder > 0 && targetSize < _maximumPerNode) {
        targetSize += 1;
        distRemainder = distRemainder - 1;
        usingCeiling = true;
      }
      Node node = _nodeMap.get(sortedLiveNodes.get(i));
      node.isAlive = true;
      node.capacity = targetSize;
      node.hasCeilingCapacity = usingCeiling;
      _liveNodesList.add(node);
    }

    // compute states for all replica ids
    _stateMap = generateStateMap();

    // compute the preferred mapping if all nodes were up
    _preferredAssignment = computePreferredPlacement(sortedNodeNames);

    // logger.info("preferred mapping:"+ preferredAssignment);
    // from current mapping derive the ones in preferred location
    // this will update the nodes with their current fill status
    _existingPreferredAssignment = computeExistingPreferredPlacement(currentMapping);

    // from current mapping derive the ones not in preferred location
    _existingNonPreferredAssignment = computeExistingNonPreferredPlacement(currentMapping);

    // compute orphaned replicas that are not assigned to any node
    _orphaned = computeOrphaned();
    if (logger.isInfoEnabled()) {
      logger.info("orphan = " + _orphaned);
    }

    moveNonPreferredReplicasToPreferred();

    assignOrphans();

    moveExcessReplicas();

    prepareResult(znRecord);
    return znRecord;
  }

  /**
   * Determine a preference list and mapping of partitions to nodes for all replicas
   * @param liveNodes the current list of live participants
   * @param currentMapping the current assignment of replicas to nodes
   * @param allNodes the full list of known nodes in the system
   * @return the preference list and replica mapping
   */
  public ZNRecord computePartitionAssignment(final List<String> liveNodes,
      final Map<String, Map<String, String>> currentMapping, final List<String> allNodes) {

    Function<String, ParticipantId> participantConverter = new Function<String, ParticipantId>() {
      @Override
      public ParticipantId apply(String participantId) {
        return ParticipantId.from(participantId);
      }
    };
    List<ParticipantId> typedLiveNodes =
        Lists.newArrayList(Lists.transform(liveNodes, participantConverter));
    List<ParticipantId> typedAllNodes =
        Lists.newArrayList(Lists.transform(allNodes, participantConverter));
    Map<PartitionId, Map<ParticipantId, State>> typedCurrentMapping =
        ResourceAssignment.replicaMapsFromStringMaps(currentMapping);
    return typedComputePartitionAssignment(typedLiveNodes, typedCurrentMapping, typedAllNodes);
  }

  /**
   * Move replicas assigned to non-preferred nodes if their current node is at capacity
   * and its preferred node is under capacity.
   */
  private void moveNonPreferredReplicasToPreferred() {
    // iterate through non preferred and see if we can move them to the
    // preferred location if the donor has more than it should and stealer has
    // enough capacity
    Iterator<Entry<Replica, Node>> iterator = _existingNonPreferredAssignment.entrySet().iterator();
    while (iterator.hasNext()) {
      Entry<Replica, Node> entry = iterator.next();
      Replica replica = entry.getKey();
      Node donor = entry.getValue();
      Node receiver = _preferredAssignment.get(replica);
      if (donor.capacity < donor.currentlyAssigned
          && receiver.capacity > receiver.currentlyAssigned && receiver.canAdd(replica)) {
        donor.currentlyAssigned = donor.currentlyAssigned - 1;
        receiver.currentlyAssigned = receiver.currentlyAssigned + 1;
        donor.nonPreferred.remove(replica);
        receiver.preferred.add(replica);
        donor.newReplicas.remove(replica);
        receiver.newReplicas.add(replica);
        iterator.remove();
      }
    }
  }

  /**
   * Slot in orphaned partitions randomly so as to maintain even load on live nodes.
   */
  private void assignOrphans() {
    // now iterate over nodes and remaining orphaned partitions and assign
    // partitions randomly
    // Better to iterate over orphaned partitions first
    Iterator<Replica> it = _orphaned.iterator();
    while (it.hasNext()) {
      Replica replica = it.next();
      boolean added = false;
      int startIndex = computeRandomStartIndex(replica);
      for (int index = startIndex; index < startIndex + _liveNodesList.size(); index++) {
        Node receiver = _liveNodesList.get(index % _liveNodesList.size());
        if (receiver.capacity > receiver.currentlyAssigned && receiver.canAdd(replica)) {
          receiver.currentlyAssigned = receiver.currentlyAssigned + 1;
          receiver.nonPreferred.add(replica);
          receiver.newReplicas.add(replica);
          added = true;
          break;
        }
      }
      if (!added) {
        // try adding the replica by making room for it
        added = assignOrphanByMakingRoom(replica);
      }
      if (added) {
        it.remove();
      }
    }
    if (_orphaned.size() > 0 && logger.isInfoEnabled()) {
      logger.info("could not assign nodes to partitions: " + _orphaned);
    }
  }

  /**
   * If an orphan can't be assigned normally, see if a node can borrow capacity to accept it
   * @param replica The replica to assign
   * @return true if the assignment succeeded, false otherwise
   */
  private boolean assignOrphanByMakingRoom(Replica replica) {
    Node capacityDonor = null;
    Node capacityAcceptor = null;
    int startIndex = computeRandomStartIndex(replica);
    for (int index = startIndex; index < startIndex + _liveNodesList.size(); index++) {
      Node current = _liveNodesList.get(index % _liveNodesList.size());
      if (current.hasCeilingCapacity && current.capacity > current.currentlyAssigned
          && !current.canAddIfCapacity(replica) && capacityDonor == null) {
        // this node has space but cannot accept the node
        capacityDonor = current;
      } else if (!current.hasCeilingCapacity && current.capacity == current.currentlyAssigned
          && current.canAddIfCapacity(replica) && capacityAcceptor == null) {
        // this node would be able to accept the replica if it has ceiling capacity
        capacityAcceptor = current;
      }
      if (capacityDonor != null && capacityAcceptor != null) {
        break;
      }
    }
    if (capacityDonor != null && capacityAcceptor != null) {
      // transfer ceiling capacity and add the node
      capacityAcceptor.steal(capacityDonor, replica);
      return true;
    }
    return false;
  }

  /**
   * Move replicas from too-full nodes to nodes that can accept the replicas
   */
  private void moveExcessReplicas() {
    // iterate over nodes and move extra load
    Iterator<Replica> it;
    for (Node donor : _liveNodesList) {
      if (donor.capacity < donor.currentlyAssigned) {
        Collections.sort(donor.nonPreferred);
        it = donor.nonPreferred.iterator();
        while (it.hasNext()) {
          Replica replica = it.next();
          int startIndex = computeRandomStartIndex(replica);
          for (int index = startIndex; index < startIndex + _liveNodesList.size(); index++) {
            Node receiver = _liveNodesList.get(index % _liveNodesList.size());
            if (receiver.canAdd(replica)) {
              receiver.currentlyAssigned = receiver.currentlyAssigned + 1;
              receiver.nonPreferred.add(replica);
              donor.currentlyAssigned = donor.currentlyAssigned - 1;
              it.remove();
              break;
            }
          }
          if (donor.capacity >= donor.currentlyAssigned) {
            break;
          }
        }
        if (donor.capacity < donor.currentlyAssigned) {
          logger.warn("Could not take partitions out of node:" + donor.id);
        }
      }
    }
  }

  /**
   * Update a ZNRecord with the results of the rebalancing.
   * @param znRecord
   */
  private void prepareResult(ZNRecord znRecord) {
    // The map fields are keyed on partition name to a pair of node and state, i.e. it
    // indicates that the partition with given state is served by that node
    //
    // The list fields are also keyed on partition and list all the nodes serving that partition.
    // This is useful to verify that there is no node serving multiple replicas of the same
    // partition.
    Map<String, List<String>> newPreferences = new TreeMap<String, List<String>>();
    for (PartitionId partition : _partitions) {
      String partitionName = partition.stringify();
      znRecord.setMapField(partitionName, new TreeMap<String, String>());
      znRecord.setListField(partitionName, new ArrayList<String>());
      newPreferences.put(partitionName, new ArrayList<String>());
    }

    // for preference lists, the rough priority that we want is:
    // [existing preferred, existing non-preferred, non-existing preferred, non-existing
    // non-preferred]
    for (Node node : _liveNodesList) {
      for (Replica replica : node.preferred) {
        if (node.newReplicas.contains(replica)) {
          newPreferences.get(replica.partition.toString()).add(node.id.toString());
        } else {
          znRecord.getListField(replica.partition.toString()).add(node.id.toString());
        }
      }
    }
    for (Node node : _liveNodesList) {
      for (Replica replica : node.nonPreferred) {
        if (node.newReplicas.contains(replica)) {
          newPreferences.get(replica.partition.toString()).add(node.id.toString());
        } else {
          znRecord.getListField(replica.partition.toString()).add(node.id.toString());
        }
      }
    }
    normalizePreferenceLists(znRecord.getListFields(), newPreferences);

    // generate preference maps based on the preference lists
    for (PartitionId partition : _partitions) {
      List<String> preferenceList = znRecord.getListField(partition.toString());
      int i = 0;
      for (String participant : preferenceList) {
        znRecord.getMapField(partition.toString()).put(participant, _stateMap.get(i).toString());
        i++;
      }
    }
  }

  /**
   * Adjust preference lists to reduce the number of same replicas on an instance. This will
   * separately normalize two sets of preference lists, and then append the results of the second
   * set to those of the first. This basically ensures that existing replicas are automatically
   * preferred.
   * @param preferenceLists map of (partition --> list of nodes)
   * @param newPreferences map containing node preferences not consistent with the current
   *          assignment
   */
  private void normalizePreferenceLists(Map<String, List<String>> preferenceLists,
      Map<String, List<String>> newPreferences) {
    Map<String, Map<String, Integer>> nodeReplicaCounts =
        new HashMap<String, Map<String, Integer>>();
    for (String partition : preferenceLists.keySet()) {
      normalizePreferenceList(preferenceLists.get(partition), nodeReplicaCounts);
    }
    for (String partition : newPreferences.keySet()) {
      normalizePreferenceList(newPreferences.get(partition), nodeReplicaCounts);
      preferenceLists.get(partition).addAll(newPreferences.get(partition));
    }
  }

  /**
   * Adjust a single preference list for replica assignment imbalance
   * @param preferenceList list of node names
   * @param nodeReplicaCounts map of (node --> state --> count)
   */
  private void normalizePreferenceList(List<String> preferenceList,
      Map<String, Map<String, Integer>> nodeReplicaCounts) {
    // make this a LinkedHashSet to preserve iteration order
    Set<String> notAssigned = new LinkedHashSet<String>(preferenceList);
    List<String> newPreferenceList = new ArrayList<String>();
    int replicas = Math.min(countStateReplicas(), preferenceList.size());
    for (int i = 0; i < replicas; i++) {
      State state = _stateMap.get(i);
      String node = getMinimumNodeForReplica(state, notAssigned, nodeReplicaCounts);
      newPreferenceList.add(node);
      notAssigned.remove(node);
      Map<String, Integer> counts = nodeReplicaCounts.get(node);
      counts.put(state.toString(), counts.get(state.toString()) + 1);
    }
    preferenceList.clear();
    preferenceList.addAll(newPreferenceList);
  }

  /**
   * Get the node which hosts the fewest of a given replica
   * @param state the state
   * @param nodes nodes to check
   * @param nodeReplicaCounts current assignment of replicas
   * @return the node most willing to accept the replica
   */
  private String getMinimumNodeForReplica(State state, Set<String> nodes,
      Map<String, Map<String, Integer>> nodeReplicaCounts) {
    String minimalNode = null;
    int minimalCount = Integer.MAX_VALUE;
    for (String node : nodes) {
      int count = getReplicaCountForNode(state, node, nodeReplicaCounts);
      if (count < minimalCount) {
        minimalCount = count;
        minimalNode = node;
      }
    }
    return minimalNode;
  }

  /**
   * Safe check for the number of replicas of a given id assiged to a node
   * @param state the state to assign
   * @param node the node to check
   * @param nodeReplicaCounts a map of node to replica id and counts
   * @return the number of currently assigned replicas of the given id
   */
  private int getReplicaCountForNode(State state, String node,
      Map<String, Map<String, Integer>> nodeReplicaCounts) {
    if (!nodeReplicaCounts.containsKey(node)) {
      Map<String, Integer> replicaCounts = new HashMap<String, Integer>();
      replicaCounts.put(state.toString(), 0);
      nodeReplicaCounts.put(node, replicaCounts);
      return 0;
    }
    Map<String, Integer> replicaCounts = nodeReplicaCounts.get(node);
    if (!replicaCounts.containsKey(state)) {
      replicaCounts.put(state.toString(), 0);
      return 0;
    }
    return replicaCounts.get(state);
  }

  /**
   * Compute the subset of the current mapping where replicas are not mapped according to their
   * preferred assignment.
   * @param currentMapping Current mapping of replicas to nodes
   * @return The current assignments that do not conform to the preferred assignment
   */
  private Map<Replica, Node> computeExistingNonPreferredPlacement(
      Map<PartitionId, Map<ParticipantId, State>> currentMapping) {
    Map<Replica, Node> existingNonPreferredAssignment = new TreeMap<Replica, Node>();
    int count = countStateReplicas();
    for (PartitionId partition : currentMapping.keySet()) {
      Map<ParticipantId, State> nodeStateMap = currentMapping.get(partition);
      nodeStateMap.keySet().retainAll(_nodeMap.keySet());
      for (ParticipantId nodeId : nodeStateMap.keySet()) {
        Node node = _nodeMap.get(nodeId);
        boolean skip = false;
        for (Replica replica : node.preferred) {
          if (replica.partition.equals(partition)) {
            skip = true;
            break;
          }
        }
        if (skip) {
          continue;
        }
        // check if its in one of the preferred position
        for (int replicaId = 0; replicaId < count; replicaId++) {
          Replica replica = new Replica(partition, replicaId);
          if (_preferredAssignment.get(replica).id != node.id
              && !_existingPreferredAssignment.containsKey(replica)
              && !existingNonPreferredAssignment.containsKey(replica)) {
            existingNonPreferredAssignment.put(replica, node);
            node.nonPreferred.add(replica);
            break;
          }
        }
      }
    }
    return existingNonPreferredAssignment;
  }

  /**
   * Get a live node index to try first for a replica so that each possible start index is
   * roughly uniformly assigned.
   * @param replica The replica to assign
   * @return The starting node index to try
   */
  private int computeRandomStartIndex(final Replica replica) {
    return (replica.hashCode() & 0x7FFFFFFF) % _liveNodesList.size();
  }

  /**
   * Get a set of replicas not currently assigned to any node
   * @return Unassigned replicas
   */
  private Set<Replica> computeOrphaned() {
    Set<Replica> orphanedPartitions = new TreeSet<Replica>(_preferredAssignment.keySet());
    for (Replica r : _existingPreferredAssignment.keySet()) {
      if (orphanedPartitions.contains(r)) {
        orphanedPartitions.remove(r);
      }
    }
    for (Replica r : _existingNonPreferredAssignment.keySet()) {
      if (orphanedPartitions.contains(r)) {
        orphanedPartitions.remove(r);
      }
    }

    return orphanedPartitions;
  }

  /**
   * Determine the replicas already assigned to their preferred nodes
   * @param currentMapping Current assignment of replicas to nodes
   * @return Assignments that conform to the preferred placement
   */
  private Map<Replica, Node> computeExistingPreferredPlacement(
      final Map<PartitionId, Map<ParticipantId, State>> currentMapping) {
    Map<Replica, Node> existingPreferredAssignment = new TreeMap<Replica, Node>();
    int count = countStateReplicas();
    for (PartitionId partition : currentMapping.keySet()) {
      Map<ParticipantId, State> nodeStateMap = currentMapping.get(partition);
      nodeStateMap.keySet().retainAll(_nodeMap.keySet());
      for (ParticipantId nodeId : nodeStateMap.keySet()) {
        Node node = _nodeMap.get(nodeId);
        node.currentlyAssigned = node.currentlyAssigned + 1;
        // check if its in one of the preferred position
        for (int replicaId = 0; replicaId < count; replicaId++) {
          Replica replica = new Replica(partition, replicaId);
          if (_preferredAssignment.containsKey(replica)
              && !existingPreferredAssignment.containsKey(replica)
              && _preferredAssignment.get(replica).id == node.id) {
            existingPreferredAssignment.put(replica, node);
            node.preferred.add(replica);
            break;
          }
        }
      }
    }

    return existingPreferredAssignment;
  }

  /**
   * Given a predefined set of all possible nodes, compute an assignment of replicas to
   * nodes that evenly assigns all replicas to nodes.
   * @param allNodes Identifiers to all nodes, live and non-live
   * @return Preferred assignment of replicas
   */
  private Map<Replica, Node> computePreferredPlacement(final List<String> nodeNames) {
    Map<Replica, Node> preferredMapping;
    preferredMapping = new HashMap<Replica, Node>();
    int partitionId = 0;
    int numReplicas = countStateReplicas();
    int count = countStateReplicas();
    for (PartitionId partition : _partitions) {
      for (int replicaId = 0; replicaId < count; replicaId++) {
        Replica replica = new Replica(partition, replicaId);
        ParticipantId nodeName =
            ParticipantId.from(_placementScheme.getLocation(partitionId, replicaId,
                _partitions.size(), numReplicas, nodeNames));
        preferredMapping.put(replica, _nodeMap.get(nodeName));
      }
      partitionId = partitionId + 1;
    }
    return preferredMapping;
  }

  /**
   * Counts the total number of replicas given a state-count mapping
   * @param states
   * @return
   */
  private int countStateReplicas() {
    int total = 0;
    for (Integer count : _states.values()) {
      total += count;
    }
    return total;
  }

  /**
   * Compute a map of replica ids to state names
   * @return Map: replica id -> state name
   */
  private Map<Integer, State> generateStateMap() {
    int replicaId = 0;
    Map<Integer, State> stateMap = new HashMap<Integer, State>();
    for (State state : _states.keySet()) {
      Integer count = _states.get(state);
      for (int i = 0; i < count; i++) {
        stateMap.put(replicaId, state);
        replicaId++;
      }
    }
    return stateMap;
  }

  /**
   * A Node is an entity that can serve replicas. It has a capacity and knowledge
   * of replicas assigned to it, so it can decide if it can receive additional replicas.
   */
  class Node {
    public int currentlyAssigned;
    public int capacity;
    public boolean hasCeilingCapacity;
    private ParticipantId id;
    boolean isAlive;
    private List<Replica> preferred;
    private List<Replica> nonPreferred;
    private Set<Replica> newReplicas;

    public Node(ParticipantId id) {
      preferred = new ArrayList<Replica>();
      nonPreferred = new ArrayList<Replica>();
      newReplicas = new TreeSet<Replica>();
      currentlyAssigned = 0;
      isAlive = false;
      this.id = id;
    }

    /**
     * Check if this replica can be legally added to this node
     * @param replica The replica to test
     * @return true if the assignment can be made, false otherwise
     */
    public boolean canAdd(Replica replica) {
      if (currentlyAssigned >= capacity) {
        return false;
      }
      return canAddIfCapacity(replica);
    }

    /**
     * Check if this replica can be legally added to this node, provided that it has enough
     * capacity.
     * @param replica The replica to test
     * @return true if the assignment can be made, false otherwise
     */
    public boolean canAddIfCapacity(Replica replica) {
      if (!isAlive) {
        return false;
      }
      for (Replica r : preferred) {
        if (r.partition.equals(replica.partition)) {
          return false;
        }
      }
      for (Replica r : nonPreferred) {
        if (r.partition.equals(replica.partition)) {
          return false;
        }
      }
      return true;
    }

    /**
     * Receive a replica by stealing capacity from another Node
     * @param donor The node that has excess capacity
     * @param replica The replica to receive
     */
    public void steal(Node donor, Replica replica) {
      donor.hasCeilingCapacity = false;
      donor.capacity--;
      hasCeilingCapacity = true;
      capacity++;
      currentlyAssigned++;
      nonPreferred.add(replica);
      newReplicas.add(replica);
    }

    @Override
    public String toString() {
      StringBuilder sb = new StringBuilder();
      sb.append("##########\nname=").append(id.toString()).append("\npreferred:")
          .append(preferred.size()).append("\nnonpreferred:").append(nonPreferred.size());
      return sb.toString();
    }
  }

  /**
   * A Replica is a combination of a partition of the resource, the state the replica is in
   * and an identifier signifying a specific replica of a given partition and state.
   */
  class Replica implements Comparable<Replica> {
    private PartitionId partition;
    private int replicaId; // this is a partition-relative id
    private String format;

    public Replica(PartitionId partition, int replicaId) {
      this.partition = partition;
      this.replicaId = replicaId;
      this.format = this.partition.toString() + "|" + this.replicaId;
    }

    @Override
    public String toString() {
      return format;
    }

    @Override
    public boolean equals(Object that) {
      if (that instanceof Replica) {
        return this.format.equals(((Replica) that).format);
      }
      return false;
    }

    @Override
    public int hashCode() {
      return this.format.hashCode();
    }

    @Override
    public int compareTo(Replica that) {
      if (that instanceof Replica) {
        return this.format.compareTo(that.format);
      }
      return -1;
    }
  }

  /**
   * Interface for providing a custom approach to computing a replica's affinity to a node.
   */
  public interface ReplicaPlacementScheme {
    /**
     * Initialize global state
     * @param manager The instance to which this placement is associated
     */
    public void init(final HelixManager manager);

    /**
     * Given properties of this replica, determine the node it would prefer to be served by
     * @param partitionId The current partition
     * @param replicaId The current replica with respect to the current partition
     * @param numPartitions The total number of partitions
     * @param numReplicas The total number of replicas per partition
     * @param nodeNames A list of identifiers of all nodes, live and non-live
     * @return The name of the node that would prefer to serve this replica
     */
    public String getLocation(int partitionId, int replicaId, int numPartitions, int numReplicas,
        final List<String> nodeNames);
  }

  /**
   * Compute preferred placements based on a default strategy that assigns replicas to nodes as
   * evenly as possible while avoiding placing two replicas of the same partition on any node.
   */
  public static class DefaultPlacementScheme implements ReplicaPlacementScheme {
    @Override
    public void init(final HelixManager manager) {
      // do nothing since this is independent of the manager
    }

    @Override
    public String getLocation(int partitionId, int replicaId, int numPartitions, int numReplicas,
        final List<String> nodeNames) {
      int index;
      if (nodeNames.size() > numPartitions) {
        // assign replicas in partition order in case there are more nodes than partitions
        index = (partitionId + replicaId * numPartitions) % nodeNames.size();
      } else if (nodeNames.size() == numPartitions) {
        // need a replica offset in case the sizes of these sets are the same
        index =
            ((partitionId + replicaId * numPartitions) % nodeNames.size() + replicaId)
                % nodeNames.size();
      } else {
        // in all other cases, assigning a replica at a time for each partition is reasonable
        index = (partitionId + replicaId) % nodeNames.size();
      }
      return nodeNames.get(index);
    }
  }

  private static class NodeComparator implements Comparator<ParticipantId> {
    @Override
    public int compare(ParticipantId o1, ParticipantId o2) {
      return o1.toString().compareTo(o2.toString());
    }
  }
}
TOP

Related Classes of org.apache.helix.controller.strategy.AutoRebalanceStrategy$DefaultPlacementScheme

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.