Package org.apache.hadoop.mapred

Source Code of org.apache.hadoop.mapred.TestTaskChildsKilling

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.mapred;

import org.apache.commons.logging.Log;

import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.mapreduce.test.system.JTProtocol;
import org.apache.hadoop.mapreduce.test.system.JobInfo;
import org.apache.hadoop.mapreduce.test.system.MRCluster;
import org.apache.hadoop.mapreduce.test.system.TTClient;
import org.apache.hadoop.mapreduce.test.system.JTClient;
import org.apache.hadoop.mapreduce.test.system.TTProtocol;
import org.apache.hadoop.mapreduce.test.system.TTTaskInfo;
import org.apache.hadoop.mapreduce.test.system.TaskInfo;
import org.apache.hadoop.mapreduce.test.system.FinishTaskControlAction;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.util.Tool;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
import java.io.DataOutputStream;
import java.util.Collection;
import testjar.GenerateTaskChildProcess;
import java.util.Hashtable;

/**
* Submit a job which would spawn child processes and
* verify whether the task child processes are cleaned up
* or not after either job killed or task killed or task failed.
*/
public class TestTaskChildsKilling {
  private static final Log LOG = LogFactory
      .getLog(TestTaskChildsKilling.class);
  private static MRCluster cluster;
  private static Path inputDir = new Path("input");
  private static Path outputDir = new Path("output");
  private static Configuration conf = new Configuration();
  private static String confFile = "mapred-site.xml";

  @BeforeClass
  public static void before() throws Exception {
    Hashtable<String,Object> prop = new Hashtable<String,Object>();
    prop.put("mapred.map.max.attempts", 1L);
    prop.put("mapreduce.job.complete.cancel.delegation.tokens",false);
    String [] expExcludeList = {"java.net.ConnectException",
        "java.io.IOException"};
    cluster = MRCluster.createCluster(conf);
    cluster.setExcludeExpList(expExcludeList);
    cluster.setUp();
    cluster.restartClusterWithNewConfig(prop, confFile);
    UtilsForTests.waitFor(1000);
    conf = cluster.getJTClient().getProxy().getDaemonConf();
    createInput(inputDir, conf);
  }

  @AfterClass
  public static void after() throws Exception {
    cleanup(inputDir, conf);
    cleanup(outputDir, conf);
    cluster.tearDown();
    cluster.restart();
    UtilsForTests.waitFor(1000);
  }

  /**
   * Verifying the process tree cleanup of a task after task is killed
   * by using -kill-task option.
   */
  @Test
  public void testProcessTreeCleanupOfKilledTask1() throws
      Exception {
    TaskInfo taskInfo = null;
    TaskID tID = null;
    TTTaskInfo [] ttTaskinfo = null;
    String pid = null;
    TTProtocol ttIns = null;
    TTClient ttClientIns = null;
    int counter = 0;

    JobConf jobConf = new JobConf(conf);
    jobConf.setJobName("Message Display");
    jobConf.setJarByClass(GenerateTaskChildProcess.class);
    jobConf.setMapperClass(GenerateTaskChildProcess.StrDisplayMapper.class);
    jobConf.setNumMapTasks(1);
    jobConf.setNumReduceTasks(0);
    jobConf.setMaxMapAttempts(1);
    cleanup(outputDir, conf);
    FileInputFormat.setInputPaths(jobConf, inputDir);
    FileOutputFormat.setOutputPath(jobConf, outputDir);
   
    JTClient jtClient = cluster.getJTClient();
    JobClient client = jtClient.getClient();
    JTProtocol wovenClient = cluster.getJTClient().getProxy();
    RunningJob runJob = client.submitJob(jobConf);
    JobID id = runJob.getID();
    JobInfo jInfo = wovenClient.getJobInfo(id);
    Assert.assertNotNull("Job information is null",jInfo);

    Assert.assertTrue("Job has not been started for 1 min.",
        jtClient.isJobStarted(id));

    TaskInfo[] taskInfos = wovenClient.getTaskInfo(id);
    for (TaskInfo taskinfo : taskInfos) {
      if (!taskinfo.isSetupOrCleanup()) {
        taskInfo = taskinfo;
        break;
      }
    }

    Assert.assertTrue("Task has not been started for 1 min.",
        jtClient.isTaskStarted(taskInfo));

    tID = TaskID.downgrade(taskInfo.getTaskID());
    TaskAttemptID tAttID = new TaskAttemptID(tID,0);
    FinishTaskControlAction action = new FinishTaskControlAction(tID);

    Collection<TTClient> ttClients = cluster.getTTClients();
    for (TTClient ttClient : ttClients) {
      TTProtocol tt = ttClient.getProxy();
      tt.sendAction(action);
      ttTaskinfo = tt.getTasks();
      for (TTTaskInfo tttInfo : ttTaskinfo) {
        if (!tttInfo.isTaskCleanupTask()) {
          pid = tttInfo.getPid();
          ttClientIns = ttClient;
          ttIns = tt;
          break;
        }
      }
      if (ttClientIns != null) {
        break;
      }
    }


    Assert.assertTrue("Map process tree is not alive before task kills.",
        ttIns.isProcessTreeAlive(pid));

    String args[] = new String[] { "-kill-task", tAttID.toString() };
    int exitCode = runTool(jobConf, client, args);
    Assert.assertEquals("Exit Code:", 0, exitCode);
   
    LOG.info("Waiting till the task is killed...");
    counter = 0;
    while (counter < 30) {
      if (taskInfo.getTaskStatus().length > 0) {
        if (taskInfo.getTaskStatus()[0].getRunState() ==
            TaskStatus.State.KILLED) {
          break;
        }
      }
      UtilsForTests.waitFor(1000);
      taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID());
      counter ++;
    }

    runJob.killJob();
    LOG.info("Waiting till the job is completed...");
    counter = 0;
    while (counter < 60) {
      if (jInfo.getStatus().isJobComplete()) {
        break;
      }
      UtilsForTests.waitFor(1000);
      jInfo = wovenClient.getJobInfo(id);
      counter ++;
    }
    Assert.assertTrue("Job has not  been completed for 1 min.",
        counter != 60 );
    ttIns = ttClientIns.getProxy();
    UtilsForTests.waitFor(1000);
    Assert.assertTrue("Map process is still alive after task has been killed.",
        !ttIns.isProcessTreeAlive(pid));
  }

  /**
   * Verifying the process tree cleanup of a particular task
   * after task is killed.
   */
  @Test
  public void testProcessTreeCleanupOfKilledTask2() throws
      IOException {
    TaskInfo taskInfo = null;
    TaskID tID = null;
    TaskAttemptID taskAttID = null;
    TTTaskInfo [] ttTaskinfo = null;
    String pid = null;
    TTProtocol ttIns = null;
    TTClient ttClientIns = null;
    int counter = 0;

    JobConf jobConf = new JobConf(conf);
    jobConf.setJobName("Message Display");
    jobConf.setJarByClass(GenerateTaskChildProcess.class);
    jobConf.setMapperClass(GenerateTaskChildProcess.StrDisplayMapper.class);
    jobConf.setNumMapTasks(1);
    jobConf.setNumReduceTasks(0);
    cleanup(outputDir, conf);
    FileInputFormat.setInputPaths(jobConf, inputDir);
    FileOutputFormat.setOutputPath(jobConf, outputDir);
    JTClient jtClient = cluster.getJTClient();
    JobClient client = jtClient.getClient();
    JTProtocol wovenClient = cluster.getJTClient().getProxy();
    RunningJob runJob = client.submitJob(jobConf);
    JobID id = runJob.getID();
    JobInfo jInfo = wovenClient.getJobInfo(id);   
    Assert.assertNotNull("Job information is null", jInfo);

    Assert.assertTrue("Job has not been started for 1 min.",
        jtClient.isJobStarted(id));

    TaskInfo[] taskInfos = wovenClient.getTaskInfo(id);
    for (TaskInfo taskinfo : taskInfos) {
      if (!taskinfo.isSetupOrCleanup()) {
        taskInfo = taskinfo;
        break;
      }
    }

    Assert.assertTrue("Task has not been started for 1 min.",
        jtClient.isTaskStarted(taskInfo));

    tID = TaskID.downgrade(taskInfo.getTaskID());
    taskAttID = new TaskAttemptID(tID,0);
    FinishTaskControlAction action = new FinishTaskControlAction(tID);
    Collection<TTClient> ttClients = cluster.getTTClients();
    for (TTClient ttClient : ttClients) {
      TTProtocol tt = ttClient.getProxy();
      tt.sendAction(action);
      ttTaskinfo = tt.getTasks();
      for (TTTaskInfo tttInfo : ttTaskinfo) {
        if (!tttInfo.isTaskCleanupTask()) {
          pid = tttInfo.getPid();
          ttClientIns = ttClient;
          ttIns = tt;
          break;
        }
      }
      if (ttClientIns != null) {
        break;
      }
    }


    Assert.assertTrue("Map process is not alive before task kills.",
        ttIns.isProcessTreeAlive(pid));

    runJob.killTask(taskAttID, false);

    LOG.info("Waiting till the task is killed...");
    taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID());
    counter = 0;
    while (counter < 30) {
      if (taskInfo.getTaskStatus()[0].getRunState() ==
              TaskStatus.State.KILLED) {
        break;
      }
      UtilsForTests.waitFor(1000);
      taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID());
      counter ++;
    }
    runJob.killJob();
    LOG.info("Waiting till the job is completed...");
    counter = 0;
    while (counter < 60) {
      if (jInfo.getStatus().isJobComplete()) {
        break;
      }
      UtilsForTests.waitFor(1000);
      jInfo = wovenClient.getJobInfo(id);
      counter ++;
    }
    Assert.assertTrue("Job has not been completed for 1 min.",
        counter != 60);
    UtilsForTests.waitFor(2000);
    ttIns = ttClientIns.getProxy();   
    Assert.assertTrue("Map process is still alive after task has been killed.",
        !ttIns.isProcessTreeAlive(pid));
  }

  /**
   * Verifying the child process tree clean up of a task which fails due
   * to an exception.
   */
  @Test
  public void testProcessTreeCleanupOfFailedTask1() throws IOException {
    TaskInfo taskInfo = null;
    TaskID tID = null;
    TTTaskInfo [] ttTaskinfo = null;
    String pid = null;
    TTProtocol ttIns = null;
    TTClient ttClientIns = null;
    int counter = 0;

    JobConf jobConf = new JobConf(conf);
    jobConf.setJobName("Message Display");
    jobConf.setJarByClass(GenerateTaskChildProcess.class);
    jobConf.setMapperClass(GenerateTaskChildProcess.FailedMapper.class);
    jobConf.setNumMapTasks(1);
    jobConf.setNumReduceTasks(0);
    cleanup(outputDir, conf);
    FileInputFormat.setInputPaths(jobConf, inputDir);
    FileOutputFormat.setOutputPath(jobConf, outputDir);

    JTClient jtClient = cluster.getJTClient();
    JobClient client = jtClient.getClient();
    JTProtocol wovenClient = cluster.getJTClient().getProxy();
    RunningJob runJob = client.submitJob(jobConf);
    JobID id = runJob.getID();
    JobInfo jInfo = wovenClient.getJobInfo(id);
    Assert.assertNotNull("Job information is null", jInfo);

    Assert.assertTrue("Job has not been started for 1 min.",
        jtClient.isJobStarted(id));

    TaskInfo[] taskInfos = wovenClient.getTaskInfo(id);
    for (TaskInfo taskinfo : taskInfos) {
      if (!taskinfo.isSetupOrCleanup()) {
        taskInfo = taskinfo;
        break;
      }
    }

    Assert.assertTrue("Task has not been started for 1 min.",
        jtClient.isTaskStarted(taskInfo));

    tID = TaskID.downgrade(taskInfo.getTaskID());   
    FinishTaskControlAction action = new FinishTaskControlAction(tID);
   
    Collection<TTClient> ttClients = cluster.getTTClients();
    for (TTClient ttClient : ttClients) {
      TTProtocol tt = ttClient.getProxy();
      tt.sendAction(action);
      ttTaskinfo = tt.getTasks();
      for (TTTaskInfo tttInfo : ttTaskinfo) {
        if (!tttInfo.isTaskCleanupTask()) {
          pid = tttInfo.getPid();
          ttClientIns = ttClient;
          ttIns = tt;
          break;
        }
      }
      if (ttClientIns != null) {
        break;
      }
    }

    Assert.assertTrue("Map process is not alive before task fails.",
            ttIns.isProcessTreeAlive(pid));

    LOG.info("Waiting till the task is failed...");
    counter = 0;
    while (counter < 60) {
      if (taskInfo.getTaskStatus().length > 0) {
        if (taskInfo.getTaskStatus()[0].getRunState() ==
            TaskStatus.State.FAILED) {
          break;
        }
      }
      UtilsForTests.waitFor(1000);
      taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID());
      counter++;
    }

    LOG.info("Waiting till the job is completed...");
    counter = 0;
    while (counter < 60) {
      if (jInfo.getStatus().isJobComplete()) {
        break;
      }
      UtilsForTests.waitFor(1000);
      jInfo = wovenClient.getJobInfo(id);
      counter ++;
    }
    Assert.assertTrue("Job has not been completed for 1 min.",
        counter != 60);
    ttIns = ttClientIns.getProxy();
    UtilsForTests.waitFor(2000);
    Assert.assertTrue("Map process is still alive after task has been failed.",
            !ttIns.isProcessTreeAlive(pid));
  }

  /**
   * Verifying the process tree cleanup of a task after task is failed
   * by using -fail-task option.
   */
  @Test
  public void testProcessTreeCleanupOfFailedTask2() throws
      Exception {
    TaskInfo taskInfo = null;
    TaskID tID = null;
    TTTaskInfo [] ttTaskinfo = null;
    String pid = null;
    TTProtocol ttIns = null;
    TTClient ttClientIns = null;
    int counter = 0;
   
    JobConf jobConf = new JobConf(conf);
    jobConf.setJobName("Message Display");
    jobConf.setJarByClass(GenerateTaskChildProcess.class);
    jobConf.setMapperClass(GenerateTaskChildProcess.StrDisplayMapper.class);
    jobConf.setNumMapTasks(1);
    jobConf.setNumReduceTasks(0);
    cleanup(outputDir, conf);
    FileInputFormat.setInputPaths(jobConf, inputDir);
    FileOutputFormat.setOutputPath(jobConf, outputDir);

    JTClient jtClient = cluster.getJTClient();
    JobClient client = jtClient.getClient();
    JTProtocol wovenClient = cluster.getJTClient().getProxy();
    RunningJob runJob = client.submitJob(jobConf);
    JobID id = runJob.getID();
    JobInfo jInfo = wovenClient.getJobInfo(id);
    Assert.assertNotNull("Job information is null", jInfo);

    Assert.assertTrue("Job has not been started for 1 min.",
        jtClient.isJobStarted(id));

    TaskInfo[] taskInfos = wovenClient.getTaskInfo(id);
    for (TaskInfo taskinfo : taskInfos) {
      if (!taskinfo.isSetupOrCleanup()) {
        taskInfo = taskinfo;
        break;
      }
    }

    Assert.assertTrue("Task has not been started for 1 min.",
        jtClient.isTaskStarted(taskInfo));

    tID = TaskID.downgrade(taskInfo.getTaskID());
    TaskAttemptID tAttID = new TaskAttemptID(tID,0);
    FinishTaskControlAction action = new FinishTaskControlAction(tID);

    Collection<TTClient> ttClients = cluster.getTTClients();
    for (TTClient ttClient : ttClients) {
      TTProtocol tt = ttClient.getProxy();
      tt.sendAction(action);
      ttTaskinfo = tt.getTasks();
      for (TTTaskInfo tttInfo : ttTaskinfo) {
        if (!tttInfo.isTaskCleanupTask()) {
          pid = tttInfo.getPid();
          ttClientIns = ttClient;
          ttIns = tt;
          break;
        }
      }
      if (ttClientIns != null) {
        break;
      }
    }


    Assert.assertTrue("Map process is not alive before task fails.",
        ttIns.isProcessTreeAlive(pid));

    String args[] = new String[] { "-fail-task", tAttID.toString() };
    int exitCode = runTool(jobConf, client, args);
    Assert.assertEquals("Exit Code:", 0, exitCode);

    LOG.info("Waiting till the task is failed...");
    taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID());
    counter = 0;
    while (counter < 60) {
      if (taskInfo.getTaskStatus().length > 0) {
        if (taskInfo.getTaskStatus()[0].getRunState() ==
            TaskStatus.State.FAILED) {
          break;
        }
      }
      UtilsForTests.waitFor(1000);
      taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID());
      counter ++;
    }
    counter = 0;
    LOG.info("Waiting till the job is completed...");
    while (counter < 60) {
      if (jInfo.getStatus().isJobComplete()) {
        break;
      }
      UtilsForTests.waitFor(1000);
      jInfo = wovenClient.getJobInfo(id);
      counter ++;
    }

    Assert.assertTrue("Job has not been completed for 1 min",
        counter != 60);
    ttIns = ttClientIns.getProxy();
    UtilsForTests.waitFor(1000);
    Assert.assertTrue("Map process is still alive after task has been failed.",
        !ttIns.isProcessTreeAlive(pid));
  }

  private int runTool(Configuration job, Tool tool,
      String[] jobArgs) throws Exception {
    int returnStatus = ToolRunner.run(job, tool, jobArgs);
    return returnStatus;
  }

  private static void cleanup(Path dir, Configuration conf) throws
      IOException {
    FileSystem fs = dir.getFileSystem(conf);
    fs.delete(dir, true);
  }
  private static void createInput(Path inDir, Configuration conf) throws
      IOException {
    String input = "Hadoop is framework for data intensive distributed "
        + "applications.\n Hadoop enables applications"
        + " to work with thousands of nodes.";
    FileSystem fs = inDir.getFileSystem(conf);
    if (!fs.mkdirs(inDir)) {
      throw new IOException("Failed to create the input directory:"
          + inDir.toString());
    }
    fs.setPermission(inDir, new FsPermission(FsAction.ALL,
        FsAction.ALL, FsAction.ALL));
    DataOutputStream file = fs.create(new Path(inDir, "data.txt"));
    int i = 0;
    while(i < 10) {
      file.writeBytes(input);
      i++;
    }
    file.close();
  }
}
TOP

Related Classes of org.apache.hadoop.mapred.TestTaskChildsKilling

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.