Package eu.stratosphere.nephele.jobmanager.splitassigner.file

Source Code of eu.stratosphere.nephele.jobmanager.splitassigner.file.FileInputSplitList$QueueElem

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.nephele.jobmanager.splitassigner.file;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import eu.stratosphere.core.fs.FileInputSplit;
import eu.stratosphere.nephele.instance.AbstractInstance;

/**
* The file input split list stores the file input splits for an input vertex that are still expected to be consumed.
* Besides simply storing the splits, the file input split list also computes the distance all {@link AbstractInstance}
* objects which request a input split and its nearest storage location with respect to the underlying network topology.
* That way input splits are always given to consuming vertices in a way that data locality is preserved as well as
* possible.
* <p>
* This class is not thread-safe.
*
*/
public final class FileInputSplitList {

  /**
   * The logging object which is used to report information and errors.
   */
  private static final Log LOG = LogFactory.getLog(FileInputSplitList.class);

  /**
   * The set containing all the file input splits that still must be consumed.
   */
  private Set<FileInputSplit> masterSet = new HashSet<FileInputSplit>();

  /**
   * The map caching the specific file input split lists for each {@link AbstractInstance}.
   */
  private Map<AbstractInstance, Queue<QueueElem>> instanceMap = new HashMap<AbstractInstance, Queue<QueueElem>>();

  /**
   * This is an auxiliary class to store the minimum distance between a file input split's storage locations and an
   * {@link AbstractInstance}.
   *
   */
  private final class QueueElem implements Comparable<QueueElem> {

    /**
     * The file input split the distance applies to.
     */
    final FileInputSplit inputSplit;

    /**
     * The minimum distance between the file input split's storage locations and the instance this object has been
     * created for.
     */
    final int distance;

    /**
     * Creates a new queue element.
     *
     * @param inputSplit
     *        the file input split to be stored
     * @param distance
     *        the minimum distance between the stored input split's storage locations and the instance this object
     *        has been created for
     */
    private QueueElem(final FileInputSplit inputSplit, final int distance) {
      this.inputSplit = inputSplit;
      this.distance = distance;
    }

    /**
     * Returns the file input split stored within this object.
     *
     * @return the file input split
     */
    private FileInputSplit getInputSplit() {
      return this.inputSplit;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public int compareTo(final QueueElem o) {

      return (this.distance - o.distance);
    }

  }

  /**
   * Adds the given file input split to the set of file input splits to be consumed.
   *
   * @param fileInputSplit
   *        the file input split to be added
   */
  synchronized void addSplit(final FileInputSplit fileInputSplit) {

    this.masterSet.add(fileInputSplit);
  }

  /**
   * Returns the next file input split to be consumed by the given instance. The returned input split is selected in a
   * way that the distance between the split's storage location and the requesting {@link AbstractInstance} is as
   * short as possible.
   *
   * @param instance
   *        the instance requesting the next file input split
   * @return the next input split to be consumed by the given instance or <code>null</code> if all input splits have
   *         already been consumed.
   */
  synchronized FileInputSplit getNextInputSplit(final AbstractInstance instance) {

    final Queue<QueueElem> instanceSplitList = getInstanceSplitList(instance);

    while (true) {

      final QueueElem candidate = instanceSplitList.poll();
      if (candidate == null) {
        return null;
      }

      if (this.masterSet.remove(candidate.getInputSplit())) {
        if (LOG.isInfoEnabled()) {
          if (candidate.distance == 0) {
            LOG.info(instance + " receives local file input split");
          } else {
            LOG.info(instance + " receives remote file input split (distance " + candidate.distance + ")");
          }
        }
        return candidate.getInputSplit();
      }

      if (this.masterSet.isEmpty()) {
        return null;
      }
    }
  }

  /**
   * Returns a list of file input splits specifically ordered for the given {@link AbstractInstance}. When the list is
   * initially created, it contains all the unconsumed file input splits at that point in time, ascendingly ordered by
   * the minimum distance between the input splits' storage locations and the given {@link AbstractInstance}.
   *
   * @param instance
   *        the instance for which the file input split list has been computed
   * @return the list of file input splits ordered specifically for the given instance
   */
  private Queue<QueueElem> getInstanceSplitList(final AbstractInstance instance) {

    Queue<QueueElem> instanceSplitList = this.instanceMap.get(instance);
    if (instanceSplitList == null) {

      // Create and populate instance specific split list
      instanceSplitList = new PriorityQueue<FileInputSplitList.QueueElem>();
      final Iterator<FileInputSplit> it = this.masterSet.iterator();
      while (it.hasNext()) {

        final FileInputSplit split = it.next();
        final String[] hostNames = split.getHostNames();
        if (hostNames == null) {
          instanceSplitList.add(new QueueElem(split, Integer.MAX_VALUE));

        } else {

          int minDistance = Integer.MAX_VALUE;
          for (int i = 0; i < hostNames.length; ++i) {
            final int distance = instance.getDistance(hostNames[i]);
            if (LOG.isDebugEnabled()) {
              LOG.debug("Distance between " + instance + " and " + hostNames[i] + " is " + distance);
            }
            if (distance < minDistance) {
              minDistance = distance;
            }
          }

          instanceSplitList.add(new QueueElem(split, minDistance));
        }
      }

      this.instanceMap.put(instance, instanceSplitList);
    }

    return instanceSplitList;
  }
}
TOP

Related Classes of eu.stratosphere.nephele.jobmanager.splitassigner.file.FileInputSplitList$QueueElem

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.