Package org.nebulaframework.grid.cluster.manager.services.jobs.tracking

Source Code of org.nebulaframework.grid.cluster.manager.services.jobs.tracking.GridJobTaskTracker

/*
* Copyright (C) 2008 Yohan Liyanage.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.nebulaframework.grid.cluster.manager.services.jobs.tracking;

import java.util.LinkedList;
import java.util.Queue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.nebulaframework.core.job.GridJobState;
import org.nebulaframework.core.job.splitaggregate.SplitAggregateGridJob;
import org.nebulaframework.grid.cluster.manager.services.jobs.GridJobProfile;
import org.nebulaframework.grid.cluster.manager.services.jobs.JobExecutionManager;
import org.nebulaframework.grid.service.event.ServiceEventsSupport;
import org.nebulaframework.grid.service.event.ServiceHookCallback;
import org.nebulaframework.grid.service.message.ServiceMessage;
import org.nebulaframework.grid.service.message.ServiceMessageType;

/**
* TaskTracker which tracks the status of tasks for
* a given GridJob.
* <p>
* Instance of this class monitors the task execution of
* a GridJob, and re-enqueues potentially failed tasks.
* The average execution duration for a task is constantly measured
* by the
*
* @author Yohan Liyanage
* @version 1.0
*/
public class GridJobTaskTracker {

  private static final Log log = LogFactory.getLog(GridJobTaskTracker.class);
 
  private GridJobProfile profile;
  private JobExecutionManager executionManager;

  private ExecutorService executors = Executors.newCachedThreadPool();
 
  private boolean started;
  private boolean stopped;
 
  // Average Execution Time, in Seconds
  private int averageTaskDuration = -1;
 
  // Enqueued Tasks
  private Queue<Integer> enqueued = new LinkedList<Integer>();
 
  // Tasks which are potentially failed
  // (results received for tasks enqueued after)
  private Queue<Integer> potential = new LinkedList<Integer>();
 
  // Tasks Marked as Failed and to be re-enqueued
  private Queue<Integer> marked = new LinkedList<Integer>();
 
  // Active Worker Count (as of last update)
  private int workerCount = 0;
 
  private int renqueued = 0; // TODO Remove
 
  /**
   * Constructs a Task Tracker for given job.
   * <p>
   * Note that this is a <b>private</b> constructor.
   *
   * @param profile Job Profile
   * @param executionManager JobExecutionManager for job
   */
  private GridJobTaskTracker(GridJobProfile profile,
      JobExecutionManager executionManager) {
    super();
    this.profile = profile;
    this.executionManager = executionManager;
  }
 
  /**
   * Starts a tracker instance for given Job.
   *
   * @param profile Profile
   * @param executionManager Job Execution Manager
   * @return constructed instance
   */
  public static GridJobTaskTracker startTracker(GridJobProfile profile,
      JobExecutionManager executionManager) {
   
    final GridJobTaskTracker instance = new GridJobTaskTracker(profile, executionManager);
   
   
    // Create Job-End Cleanup Hook
    ServiceEventsSupport.addServiceHook(new ServiceHookCallback() {

      @Override
      public void onServiceEvent(ServiceMessage message) {
        instance.stop();
      }
     
    }, profile.getJobId(), ServiceMessageType.JOB_CANCEL, ServiceMessageType.JOB_END);
   
    return instance;
  }

  /**
   * Starts the tracker, which then
   * continues to monitor and re-enqueue possibly
   * failed jobs.
   */
  public void start() {
   
    // Ignore all start() calls if already started
    if (started) return;
   
    new Thread(new Runnable() {

      @Override
      public void run() {
        try {
          // Initial Startup Delay (wait for tasks to execute)
          Thread.sleep(2000);
        } catch (InterruptedException e) {
          log.error("[GridJobTaskTracker] Interrupted", e);
       
       
        // Start Task Tracker
        GridJobTaskTracker.this.startTracking();
      }
    }).start();
   
  }
 
  /**
   * Internal method to start the tracking process.
   */
  protected void startTracking() {
   
    log.debug("[GridJobTaskTracker] Started Tracking for " + profile.getJobId());
   
    // Wait until average information is available
    while (averageTaskDuration < 0 || (profile.getFuture().getState()!=GridJobState.EXECUTING)) {
     
      // If Stopped
      if (stopped) {
        log.debug("[GridJobTaskTracker] Stopped Tracking for " + profile.getJobId());
        return;
      }
     
      try {
        // Wait for average to be set
        Thread.sleep(2000);
      } catch (InterruptedException e) {
        log.error("[GridJobTaskTracker] Interrupted", e);
      }
    }
   
    // Retrieve Worker Count
    workerCount = profile.getWorkerCount();
   
    // We have duration information now
    while (!stopped) {
     
      synchronized (this) {
        renqueueMarked();
        potentialToMarked();
        enqueuedToPotential();
      }

      // If this is the final stage of Job Execution (90%)
      if (isFinalStage()) {
        // Duplicate Tasks (Increases throughput)
        log.trace("In Final Stage");
        enqueuedToMarked();
      }
     
      // Update Worker Count
      workerCount = profile.getWorkerCount();
     
      try {
        // Sleep for some duration, determined by average * getMultiple()
        Thread.sleep((long) Math.floor(averageTaskDuration * getMultipler()));
      } catch (InterruptedException e) {
        log.error("[GridJobTaskTracker] Interrupted", e);
      }
    }
   
    log.debug("[GridJobTaskTracker] Stopping for Job " + profile.getJobId());
   
  }

  private boolean isFinalStage() {
    return (this.profile.getWorkerCount() >= this.profile.getTaskCount());
  }

  /**
   * Returns the sleep duration multiplier. The tracker
   * thread sleeps for {@code average duration * multiplier} amount of
   * time (milliseconds). This multiplier is higher for tasks with lower
   * duration, and lower for tasks with higher duration.
   *
   * @return multiplier
   */
  private long getMultipler() {
   
    // 2 Times Delay for Durations Less Than 2 Seconds
    if (averageTaskDuration < 2) {
      return 2000;
    }
    // 1.5 Times Delay for Durations 2-4 Secs
    else if (averageTaskDuration < 4) {
      return 1500;
    }
    // 1.25 Time Delay for Durations > 5 Seconds
    else if (averageTaskDuration < 10) {
      return 1250;
    }
    else {
      return 1100;
    }
   
  }

  /**
   * Re-enqueues jobs which are marked as
   * possibly failed.
   */
  private synchronized void renqueueMarked() {
   
    if (marked.size() == 0) return;
   
    Integer[] tasks = marked.toArray(new Integer[marked.size()]);
   
    log.trace("Reenqueuing " + tasks.length + "tasks");
   
    marked.clear();
   
    for (int task : tasks) {
      executionManager.reEnqueueTask(profile.getJobId(), task);
      log.trace("Re-enqueued " + task);
      log.trace("Re-enqueued " + (++renqueued) + " tasks so far");
    }
   
  }

  /**
   * Moves the potentially failed tasks to
   * marked task queue.
   */
  private synchronized void potentialToMarked() {
   
    if (profile.getJob() instanceof SplitAggregateGridJob) {
     
      // For Split-Aggregate style, all tasks are enqueued in beginning.
      // But we should only consider about the tasks which will be
      // executed by workers.
      int activeRange = workerCount;
      activeRange = (potential.size() > activeRange) ? activeRange : potential.size();
     
      for (int i=0; i <activeRange; i++) {
        marked.add(potential.remove());
      }
     
      log.trace("Potential to Marked " + activeRange + "tasks");
     
    }
    else {
      for (int i=0; i <potential.size(); i++) {
        marked.add(potential.remove());
      }
     
      log.trace("Potential to Marked " + potential.size() + "tasks");
    }
   
   
   
  }
 
  /**
   * Moves the existing enqueued tasks queue to
   * marked queue.
   */
  private synchronized void enqueuedToPotential() {
   
    if (profile.getJob() instanceof SplitAggregateGridJob) {
     
      // For Split-Aggregate style, all tasks are enqueued in beginning.
      // But we should only consider about the tasks which will be
      // executed by workers.
      int activeRange = workerCount;
      activeRange = (enqueued.size()-1 > activeRange) ? activeRange : enqueued.size() -1;
     
      for (int i=0; i <activeRange; i++) {
        potential.add(enqueued.remove());
      }
      log.trace("Enqueued to Potential " + activeRange + "tasks");
    }
    else {
      for (int i=0; i <enqueued.size(); i++) {
        potential.add(enqueued.remove());
        log.trace("Enqueued to Potential " + enqueued.size() + "tasks");
      }
    }
  }
 

  /**
   * Moves the existing enqueued tasks queue to
   * marked queue.
   */
  private synchronized void enqueuedToMarked() {
   
    log.trace("Enqueued to Marked " + enqueued.size() + "tasks");
   
    for (int i=0; i <enqueued.size(); i++) {
      marked.add(enqueued.remove());
    }
  }

  /**
   * Invoked to notify that a new task was enqueued.
   *
   * @param taskId TaskId of enqueued task
   */
  public synchronized void taskEnqueued(int taskId) {
   
    if (stopped) return;
   
    log.trace("Enqueued " + taskId);
    enqueued.add(taskId);
  }
 
  /**
   * Invoked to notify that a result was received
   *
   * @param taskId taskId of result
   * @param executionTime duration taken to execute
   */
  public void resultReceived(final Integer taskId, final long executionTime) {
   
    if (stopped) return;
   
    executors.execute(new Runnable() {

      @Override
      public void run() {
       
       
        synchronized (GridJobTaskTracker.this) {
       
          // Remove from any list (enqueued, marked)
          if (enqueued.contains(taskId)) {
           
            // This task is not the first item in queue
            // Remove all tasks ahead in queue
            // and put to potentially failed
            // list
//            while (! enqueued.peek().equals(taskId)) {
//              potential.add(enqueued.remove());
//            }
           
            // TODO Remove above block
           
            enqueued.remove(taskId);
          }
          if (potential.contains(taskId)) {
            potential.remove(taskId);
          }
          if (marked.contains(taskId)) {
            marked.remove(taskId);
          }
         
          // Calculate Task Average Duration
          double seconds = Math.floor(((double)(executionTime/1000)));
         
          if (seconds==0) seconds = 1;

          if (averageTaskDuration <=0) {
            averageTaskDuration = (int) seconds;
          }
          else {
            averageTaskDuration = (int) Math.floor(((averageTaskDuration + seconds) / 2));
          }
        }
      }
    });
  }

  /**
   * Stops the TaskTracker
   */
  public synchronized void stop() {
    this.stopped = true;
    destory();
  }
 
  /**
   * Destroys the tracker instance.
   */
  private void destory() {
    this.enqueued = null;
    this.marked = null;
  }
}
TOP

Related Classes of org.nebulaframework.grid.cluster.manager.services.jobs.tracking.GridJobTaskTracker

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.