Package org.apache.hama.bsp

Source Code of org.apache.hama.bsp.SimpleTaskScheduler$JvmCollector

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hama.bsp;

import static java.util.concurrent.TimeUnit.SECONDS;
import static org.apache.hama.monitor.fd.NodeStatus.Dead;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hama.HamaConfiguration;
import org.apache.hama.bsp.sync.ZKSyncBSPMasterClient;
import org.apache.hama.ipc.GroomProtocol;
import org.apache.hama.monitor.Federator;
import org.apache.hama.monitor.Federator.Act;
import org.apache.hama.monitor.Federator.CollectorHandler;
import org.apache.hama.monitor.Metric;
import org.apache.hama.monitor.MetricsRecord;
import org.apache.hama.monitor.Monitor;
import org.apache.hama.monitor.ZKCollector;
import org.apache.hama.monitor.fd.NodeEventListener;
import org.apache.hama.monitor.fd.NodeStatus;
import org.apache.zookeeper.ZooKeeper;

/**
* A simple task scheduler with FCFS processing queue.
*/
class SimpleTaskScheduler extends TaskScheduler {

  private static final Log LOG = LogFactory.getLog(SimpleTaskScheduler.class);

  public static final String WAIT_QUEUE = "waitQueue";
  public static final String PROCESSING_QUEUE = "processingQueue";
  public static final String FINISHED_QUEUE = "finishedQueue";

  private final AtomicReference<QueueManager> queueManager = new AtomicReference<QueueManager>();
  private AtomicBoolean initialized = new AtomicBoolean(false);
  private final JobListener jobListener;
  private final JobProcessor jobProcessor;
  private final AtomicReference<Federator> federator = new AtomicReference<Federator>();
  /** <String, MetricsRecord> maps to <groom server, metrics record> */
  private final ConcurrentMap<String, MetricsRecord> repository = new ConcurrentHashMap<String, MetricsRecord>();
  private final ScheduledExecutorService scheduler;

  final class NodeWatcher implements NodeEventListener {
    final GroomServerManager groomManager;
    final TaskScheduler _sched;

    NodeWatcher(GroomServerManager groomManager, TaskScheduler _sched) {
      this.groomManager = groomManager;
      this._sched = _sched;
    }

    @Override
    public NodeStatus[] interest() {
      return new NodeStatus[] { Dead };
    }

    @Override
    public String name() {
      return SimpleTaskScheduler.class.getSimpleName() + "'s "
          + NodeWatcher.class.getSimpleName();
    }

    /**
     * Trigger to reschedule all tasks running on a failed GroomSever. Note that
     * this method is trigger only when a groom server fails (detected by
     * failure detector). BSPMaster has no way to send kill directive to the
     * groom server because a failed GroomServer can't respond.
     *
     * @param status of the groom server, reported by failure detector.
     * @param host is the groom server on which tasks run.
     */
    @Override
    public void notify(NodeStatus status, String host) {
      // TODO:
    }
  }

  private class JobListener extends JobInProgressListener {
    @Override
    public void jobAdded(JobInProgress job) throws IOException {
      queueManager.get().initJob(job); // init task
      queueManager.get().addJob(WAIT_QUEUE, job);
    }

    @Override
    public void jobRemoved(JobInProgress job) throws IOException {
      queueManager.get().moveJob(PROCESSING_QUEUE, FINISHED_QUEUE, job);
    }

    @Override
    public void recoverTaskInJob(JobInProgress job) throws IOException {
      queueManager.get().addJob(WAIT_QUEUE, job);
    }

  }

  private class JobProcessor extends Thread implements Schedulable {

    final ExecutorService sched;

    JobProcessor() {
      super("JobProcessor");
      this.sched = Executors.newCachedThreadPool();
    }

    /**
     * Main logic of scheduling tasks to GroomServer(s). Also, it will move
     * JobInProgress from Wait Queue to Processing Queue.
     */
    @Override
    public void run() {
      if (!initialized.get()) {
        throw new IllegalStateException("SimpleTaskScheduler initialization"
            + " is not yet finished!");
      }
      while (initialized.get()) {
        Queue<JobInProgress> queue = queueManager.get().findQueue(WAIT_QUEUE);
        if (null == queue) {
          LOG.error(WAIT_QUEUE + " does not exist.");
          throw new NullPointerException(WAIT_QUEUE + " does not exist.");
        }
        // move a job from the wait queue to the processing queue
        JobInProgress job = queue.removeJob();
        queueManager.get().addJob(PROCESSING_QUEUE, job);
        // schedule
        Collection<GroomServerStatus> glist = groomServerManager.get()
            .groomServerStatusKeySet();
        schedule(job, glist.toArray(new GroomServerStatus[glist.size()]));
      }
    }

    /**
     * Schedule job to designated GroomServer(s) immediately.
     *
     * @param Targeted GroomServer(s).
     * @param Job to be scheduled.
     */
    @Override
    public void schedule(JobInProgress job, GroomServerStatus... statuses) {
      ClusterStatus clusterStatus = groomServerManager.get().getClusterStatus(
          false);
      final int numGroomServers = clusterStatus.getGroomServers();

      Future<Boolean> jobScheduleResult = sched.submit(new TaskWorker(statuses,
          numGroomServers, job));

      Boolean jobResult = Boolean.FALSE;

      try {
        jobResult = jobScheduleResult.get();
      } catch (InterruptedException e) {
        jobResult = Boolean.FALSE;
        LOG.error("Error submitting job", e);
      } catch (ExecutionException e) {
        jobResult = Boolean.FALSE;
        LOG.error("Error submitting job", e);
      }
      if (Boolean.FALSE.equals(jobResult)) {
        LOG.error(new StringBuffer(512).append("Scheduling of job ")
            .append(job.getJobName())
            .append(" could not be done successfully. Killing it!").toString());
        job.kill();
      }
    }

    @Override
    public void interrupt() {
      super.interrupt();
      this.sched.shutdown();
    }
  }

  private class TaskWorker implements Callable<Boolean> {
    private final Map<String, GroomServerStatus> groomStatuses;
    private final int groomNum;
    private final JobInProgress jip;

    TaskWorker(final GroomServerStatus[] stus, final int num,
        final JobInProgress jip) {
      this.groomStatuses = new HashMap<String, GroomServerStatus>(2 * num);
      for (GroomServerStatus status : stus) {
        this.groomStatuses.put(status.hostName, status);
      }
      this.groomNum = num;
      this.jip = jip;
      if (null == this.groomStatuses)
        throw new NullPointerException("Target groom server is not "
            + "specified.");
      if (-1 == this.groomNum)
        throw new IllegalArgumentException("Groom number is not specified.");
      if (null == this.jip)
        throw new NullPointerException("No job is specified.");
    }

    private Boolean scheduleNewTasks() {

      // Action to be sent for each task to the respective groom server.
      Map<GroomServerStatus, List<GroomServerAction>> actionMap = new HashMap<GroomServerStatus, List<GroomServerAction>>(
          2 * this.groomStatuses.size());
      Set<Task> taskSet = new HashSet<Task>(2 * jip.tasks.length);
      Task t = null;
      int cnt = 0;
      while ((t = jip.obtainNewTask(this.groomStatuses)) != null) {
        taskSet.add(t);
        // Scheduled all tasks
        if (++cnt == this.jip.tasks.length) {
          break;
        }
      }

      // if all tasks could not be scheduled
      if (cnt != this.jip.tasks.length) {
        LOG.error("Could not schedule all tasks!");
        return Boolean.FALSE;
      }

      // assembly into actions
      Iterator<Task> taskIter = taskSet.iterator();
      while (taskIter.hasNext()) {
        Task task = taskIter.next();
        GroomServerStatus groomStatus = jip.getGroomStatusForTask(task);
        List<GroomServerAction> taskActions = actionMap.get(groomStatus);
        if (taskActions == null) {
          taskActions = new ArrayList<GroomServerAction>(
              groomStatus.getMaxTasks());
        }
        taskActions.add(new LaunchTaskAction(task));
        actionMap.put(groomStatus, taskActions);
      }

      sendDirectivesToGrooms(actionMap);

      return Boolean.TRUE;
    }

    /**
     * Schedule recovery tasks.
     *
     * @return TRUE object if scheduling is successful else returns FALSE
     */
    private Boolean scheduleRecoveryTasks() {

      // Action to be sent for each task to the respective groom server.
      Map<GroomServerStatus, List<GroomServerAction>> actionMap = new HashMap<GroomServerStatus, List<GroomServerAction>>(
          2 * this.groomStatuses.size());

      try {
        jip.recoverTasks(groomStatuses, actionMap);
      } catch (IOException e) {
        return Boolean.FALSE;
      }
      return sendDirectivesToGrooms(actionMap);

    }

    private Boolean sendDirectivesToGrooms(
        Map<GroomServerStatus, List<GroomServerAction>> actionMap) {
      Iterator<GroomServerStatus> groomIter = actionMap.keySet().iterator();
      while ((jip.getStatus().getRunState() == JobStatus.RUNNING || jip
          .getStatus().getRunState() == JobStatus.RECOVERING)
          && groomIter.hasNext()) {

        GroomServerStatus groomStatus = groomIter.next();
        List<GroomServerAction> actionList = actionMap.get(groomStatus);

        GroomProtocol worker = groomServerManager.get().findGroomServer(
            groomStatus);
        try {
          // dispatch() to the groom server
          GroomServerAction[] actions = new GroomServerAction[actionList.size()];
          actionList.toArray(actions);
          Directive d1 = new DispatchTasksDirective(actions);
          worker.dispatch(d1);
        } catch (IOException ioe) {
          LOG.error(
              "Fail to dispatch tasks to GroomServer "
                  + groomStatus.getGroomName(), ioe);
          return Boolean.FALSE;
        }

      }

      if (groomIter.hasNext()
          && (jip.getStatus().getRunState() != JobStatus.RUNNING || jip
              .getStatus().getRunState() != JobStatus.RECOVERING)) {
        LOG.warn("Currently master only shcedules job in running state. "
            + "This may be refined in the future. JobId:" + jip.getJobID());
        return Boolean.FALSE;
      }

      return Boolean.TRUE;
    }

    @Override
    public Boolean call() {
      if (jip.isRecoveryPending()) {
        return scheduleRecoveryTasks();
      } else {
        return scheduleNewTasks();
      }
    }
  }

  /**
   * Periodically collect metrics info.
   */
  private class JvmCollector implements Runnable {
    final Federator federator;
    final ZooKeeper zk;

    JvmCollector(final Federator federator, final ZooKeeper zk) {
      this.federator = federator;
      this.zk = zk;
    }

    @Override
    public void run() {
      for (GroomServerStatus status : groomServerManager.get()
          .groomServerStatusKeySet()) {
        final String groom = status.getGroomName();
        final String jvmPath = Monitor.MONITOR_ROOT_PATH + groom
            + "/metrics/jvm";
        final Act act = new Act(new ZKCollector(zk, "jvm", "Jvm metrics.",
            jvmPath), new CollectorHandler() {
          @Override
          public void handle(@SuppressWarnings("rawtypes")
          Future future) {
            try {
              MetricsRecord record = (MetricsRecord) future.get();
              if (null != record) {
                if (LOG.isDebugEnabled()) {
                  for (@SuppressWarnings("rawtypes")
                  Metric metric : record.metrics()) {
                    LOG.debug("Metric name:" + metric.name() + " metric value:"
                        + metric.value());
                  }
                }
                repository.put(groom, record);
              }
            } catch (InterruptedException ie) {
              LOG.warn(ie);
              Thread.currentThread().interrupt();
            } catch (ExecutionException ee) {
              LOG.warn(ee.getCause());
            }
          }
        });
        this.federator.register(act);
      }
    }
  }

  public SimpleTaskScheduler() {
    this.jobListener = new JobListener();
    this.jobProcessor = new JobProcessor();
    this.scheduler = Executors.newSingleThreadScheduledExecutor();
  }

  @Override
  public void start() {
    if (!initialized.compareAndSet(false, true)) {
      throw new IllegalStateException(SimpleTaskScheduler.class.getSimpleName()
          + " is started.");
    }
    this.queueManager.set(new QueueManager(getConf()));
    this.federator.set(new Federator((HamaConfiguration) getConf()));
    this.queueManager.get().createFCFSQueue(WAIT_QUEUE);
    this.queueManager.get().createFCFSQueue(PROCESSING_QUEUE);
    this.queueManager.get().createFCFSQueue(FINISHED_QUEUE);
    groomServerManager.get().addJobInProgressListener(this.jobListener);

    if (null != getConf()
        && getConf().getBoolean("bsp.federator.enabled", false)) {
      this.federator.get().start();
    }
    this.jobProcessor.start();
    if (null != getConf()
        && getConf().getBoolean("bsp.federator.enabled", false)) {
      this.scheduler.scheduleAtFixedRate(
          new JvmCollector(federator.get(),
              ((ZKSyncBSPMasterClient) ((BSPMaster) groomServerManager.get())
                  .getSyncClient()).getZK()), 5, 5, SECONDS);
    }

    if (null != monitorManager.get()) {
      if (null != monitorManager.get().supervisor()) {
        monitorManager.get().supervisor()
            .register(new NodeWatcher(groomServerManager.get(), this));
      }
    }
  }

  @Override
  public void terminate() {
    this.initialized.set(false);
    if (null != this.jobListener)
      groomServerManager.get().removeJobInProgressListener(this.jobListener);
    this.jobProcessor.interrupt();
    this.federator.get().interrupt();
  }

  @Override
  public Collection<JobInProgress> getJobs(String queue) {
    return (queueManager.get().findQueue(queue)).jobs();
  }

  @Override
  public JobInProgress findJobById(BSPJobID id) {
    for (JobInProgress job : getJobs(PROCESSING_QUEUE)) {
      if (job.getJobID().equals(id)) {
        return job;
      }
    }
    return null;
  }

}
TOP

Related Classes of org.apache.hama.bsp.SimpleTaskScheduler$JvmCollector

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.