Examples of JobControl


Examples of org.apache.hadoop.mapred.jobcontrol.JobControl

        ExecutionEngine exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        Configuration conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        JobControl jobControl = jcc.compile(mrPlan, "Test");
        Job job = jobControl.getWaitingJobs().get(0);
        int parallel = job.getJobConf().getNumReduceTasks();

        assertTrue(parallel==100);
       
        pc.defaultParallel = -1;       
View Full Code Here

Examples of org.apache.hadoop.mapred.jobcontrol.JobControl

        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        List<Job> failedJobs = new LinkedList<Job>();
        List<Job> completeFailedJobsInThisRun = new LinkedList<Job>();
        List<Job> succJobs = new LinkedList<Job>();
        JobControl jc;
        int totalMRJobs = mrp.size();
        int numMRJobsCompl = 0;
        double lastProg = -1;
       
        //create the exception handler for the job control thread
        //and register the handler with the job control thread
        JobControlThreadExceptionHandler jctExceptionHandler = new JobControlThreadExceptionHandler();

        while((jc = jcc.compile(mrp, grpName)) != null) {
           
            List<Job> waitingJobs = jc.getWaitingJobs();
            completeFailedJobsInThisRun.clear();
           
            Thread jcThread = new Thread(jc);
            jcThread.setUncaughtExceptionHandler(jctExceptionHandler);
            jcThread.start();

            Thread.sleep(jobidDelayTime);
            String jobTrackerAdd;
            String port;
           
            try{
                for (Job job : waitingJobs){
                    JobConf jConf = job.getJobConf();
                    port = jConf.get("mapred.job.tracker.http.address");
                    port = port.substring(port.indexOf(":"));
                    jobTrackerAdd = jConf.get(HExecutionEngine.JOB_TRACKER_LOCATION);
                    jobTrackerAdd = jobTrackerAdd.substring(0,jobTrackerAdd.indexOf(":"));
                    if (job.getAssignedJobID()!=null)
                    {
                        log.info("Submitting job: "+job.getAssignedJobID()+" to execution engine.");
                        log.info("More information at: http://"+ jobTrackerAdd+port+
                                "/jobdetails.jsp?jobid="+job.getAssignedJobID());
                        log.info("To kill this job, use: kill "+job.getAssignedJobID());
                    }
                    else
                        log.info("Cannot get jobid for this job");
                }
            }
            catch(Exception e){
                /* This is extra information Pig is providing to user.
                   If exception occurs here, job may still complete successfully.
                   So, pig shouldn't die or even give confusing message to the user.
                   So we just log information and move on. */
                log.info("Cannot get jobid for this job");
            }
           
            while(!jc.allFinished()){
                try {
                    Thread.sleep(sleepTime);
                } catch (InterruptedException e) {}
                double prog = (numMRJobsCompl+calculateProgress(jc, jobClient))/totalMRJobs;
                if(prog>=(lastProg+0.01)){
                    int perCom = (int)(prog * 100);
                    if(perCom!=100)
                        log.info( perCom + "% complete");
                }
                lastProg = prog;
            }

            //check for the jobControlException first
            //if the job controller fails before launching the jobs then there are
            //no jobs to check for failure
            if(jobControlException != null) {
                if(jobControlException instanceof PigException) {
                        if(jobControlExceptionStackTrace != null) {
                            LogUtils.writeLog("Error message from job controller", jobControlExceptionStackTrace,
                                    pc.getProperties().getProperty("pig.logfile"),
                                    log);
                        }
                        throw jobControlException;
                } else {
                        int errCode = 2117;
                        String msg = "Unexpected error when launching map reduce job.";         
                        throw new ExecException(msg, errCode, PigException.BUG, jobControlException);
                }
            }

            if (!jc.getFailedJobs().isEmpty() )
            {
                if ("true".equalsIgnoreCase(
                  pc.getProperties().getProperty("stop.on.failure","false"))) {
                    int errCode = 6017;
                    StringBuilder msg = new StringBuilder();
                   
                    for (int i=0;i<jc.getFailedJobs().size();i++) {
                        Job j = jc.getFailedJobs().get(i);
                        msg.append(getFirstLineFromMessage(j.getMessage()));
                        if (i!=jc.getFailedJobs().size()-1)
                            msg.append("\n");
                    }
                   
                    throw new ExecException(msg.toString(),
                                            errCode, PigException.REMOTE_ENVIRONMENT);
                }
                // If we only have one store and that job fail, then we sure that the job completely fail, and we shall stop dependent jobs
                for (Job job : jc.getFailedJobs())
                {
                    List<POStore> sts = jcc.getStores(job);
                    if (sts.size()==1)
                        completeFailedJobsInThisRun.add(job);
                }
                failedJobs.addAll(jc.getFailedJobs());
            }
           
            int removedMROp = jcc.updateMROpPlan(completeFailedJobsInThisRun);
           
            numMRJobsCompl += removedMROp;

            List<Job> jobs = jc.getSuccessfulJobs();
            jcc.moveResults(jobs);
            succJobs.addAll(jobs);
           
           
            stats.setJobClient(jobClient);
            stats.setJobControl(jc);
            stats.accumulateStats();
           
            jc.stop();
        }

        log.info( "100% complete");

        boolean failed = false;
View Full Code Here

Examples of org.apache.hadoop.mapred.jobcontrol.JobControl

        ExecutionEngine exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        Configuration conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        JobControl jobControl = jcc.compile(mrPlan, "Test");
        Job job = jobControl.getWaitingJobs().get(0);
        int parallel = job.getJobConf().getNumReduceTasks();

        assertTrue(parallel==100);
       
        pc.defaultParallel = -1;       
View Full Code Here

Examples of org.apache.hadoop.mapred.jobcontrol.JobControl

        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        Configuration conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        // Get the sort job
        JobControl jobControl = jcc.compile(mrPlan, "Test");
        jcc.updateMROpPlan(new ArrayList<Job>());
        jobControl = jcc.compile(mrPlan, "Test");
        jcc.updateMROpPlan(new ArrayList<Job>());
        jobControl = jcc.compile(mrPlan, "Test");
        Job job = jobControl.getWaitingJobs().get(0);
        int parallel = job.getJobConf().getNumReduceTasks();

        assertTrue(parallel==100);
       
        pc.defaultParallel = -1;       
View Full Code Here

Examples of org.apache.hadoop.mapred.jobcontrol.JobControl

        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        Configuration conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        // Get the skew join job
        JobControl jobControl = jcc.compile(mrPlan, "Test");
        jcc.updateMROpPlan(new ArrayList<Job>());
        jobControl = jcc.compile(mrPlan, "Test");
        jcc.updateMROpPlan(new ArrayList<Job>());
        jobControl = jcc.compile(mrPlan, "Test");
        Job job = jobControl.getWaitingJobs().get(0);
        int parallel = job.getJobConf().getNumReduceTasks();

        assertTrue(parallel==100);
       
        pc.defaultParallel = -1;       
View Full Code Here

Examples of org.apache.hadoop.mapred.jobcontrol.JobControl

        if (plan.size() == 0) {
            return null;
        }

        JobControl jobCtrl = new JobControl(grpName);

        try {
            List<MapReduceOper> roots = new LinkedList<MapReduceOper>();
            roots.addAll(plan.getRoots());
            for (MapReduceOper mro: roots) {
                Job job = getJob(mro, conf, pigContext);
                jobMroMap.put(job, mro);
                jobCtrl.addJob(job);
            }
        } catch (JobCreationException jce) {
          throw jce;
        } catch(Exception e) {
            int errCode = 2017;
View Full Code Here

Examples of org.apache.hadoop.mapred.jobcontrol.JobControl

          throw new RuntimeException("Invalid configuration " +
              "pig.jobcontrol.sleep=" + pigJobControlSleep +
              " should be a time in ms. default=" + defaultPigJobControlSleep, e);
        }

        JobControl jobCtrl = HadoopShims.newJobControl(grpName, timeToSleep);

        try {
            List<MapReduceOper> roots = new LinkedList<MapReduceOper>();
            roots.addAll(plan.getRoots());
            for (MapReduceOper mro: roots) {
                if(mro instanceof NativeMapReduceOper) {
                    return null;
                }
                Job job = getJob(plan, mro, conf, pigContext);
                jobMroMap.put(job, mro);
                jobCtrl.addJob(job);
            }
        } catch (JobCreationException jce) {
          throw jce;
        } catch(Exception e) {
            int errCode = 2017;
View Full Code Here

Examples of org.apache.hadoop.mapred.jobcontrol.JobControl

        ConfigurationValidator.validatePigProperties(pc.getProperties());
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);

        JobControl jobControl = jcc.compile(mrPlan, "Test");
        Job job = jobControl.getWaitingJobs().get(0);
        int parallel = job.getJobConf().getNumReduceTasks();

        assertEquals(100, parallel);
        Util.assertParallelValues(100, -1, -1, 100, job.getJobConf());
View Full Code Here

Examples of org.apache.hadoop.mapred.jobcontrol.JobControl

        pc.getConf().setProperty("pig.exec.reducers.max", "10");
        pc.getConf().setProperty(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort));
        ConfigurationValidator.validatePigProperties(pc.getProperties());
        conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
        JobControl jc=jcc.compile(mrPlan, "Test");
        Job job = jc.getWaitingJobs().get(0);
        long reducer=Math.min((long)Math.ceil(new File("test/org/apache/pig/test/data/passwd").length()/100.0), 10);

        Util.assertParallelValues(-1, -1, reducer, reducer, job.getJobConf());

        // use the PARALLEL key word, it will override the estimated reducer number
        query = "a = load '/passwd';" +
                "b = group a by $0 PARALLEL 2;" +
                "store b into 'output';";
        pp = Util.buildPp(ps, query);
        mrPlan = Util.buildMRPlan(pp, pc);

        pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
        pc.getConf().setProperty("pig.exec.reducers.max", "10");
        ConfigurationValidator.validatePigProperties(pc.getProperties());
        conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        jcc = new JobControlCompiler(pc, conf);
        jc=jcc.compile(mrPlan, "Test");
        job = jc.getWaitingJobs().get(0);

        Util.assertParallelValues(-1, 2, -1, 2, job.getJobConf());

        final byte[] COLUMNFAMILY = Bytes.toBytes("pig");
        util.createTable(Bytes.toBytesBinary("test_table"), COLUMNFAMILY);

        // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase
        query = "a = load 'hbase://test_table' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');" +
                "b = group a by $0 ;" +
                "store b into 'output';";
        pp = Util.buildPp(ps, query);
        mrPlan = Util.buildMRPlan(pp, pc);

        pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
        pc.getConf().setProperty("pig.exec.reducers.max", "10");

        ConfigurationValidator.validatePigProperties(pc.getProperties());
        conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        jcc = new JobControlCompiler(pc, conf);
        jc=jcc.compile(mrPlan, "Test");
        job = jc.getWaitingJobs().get(0);

        Util.assertParallelValues(-1, -1, -1, 1, job.getJobConf());

        util.deleteTable(Bytes.toBytesBinary("test_table"));
        // In HBase 0.90.1 and above we can use util.shutdownMiniHBaseCluster()
View Full Code Here

Examples of org.apache.hadoop.mapred.jobcontrol.JobControl

        PhysicalPlan pp = Util.buildPp(ps, query);

        MROperPlan mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
        JobControl jobControl = jcc.compile(mrPlan, query);

        assertEquals(2, mrPlan.size());

        // first job uses a single reducer for the sampling
        Util.assertParallelValues(-1, 1, -1, 1, jobControl.getWaitingJobs().get(0).getJobConf());

        // Simulate the first job having run so estimation kicks in.
        MapReduceOper sort = mrPlan.getLeaves().get(0);
        jcc.updateMROpPlan(jobControl.getReadyJobs());
        FileLocalizer.create(sort.getQuantFile(), pc);
        jobControl = jcc.compile(mrPlan, query);

        sort = mrPlan.getLeaves().get(0);
        long reducer=Math.min((long)Math.ceil(new File("test/org/apache/pig/test/data/passwd").length()/100.0), 10);
        assertEquals(reducer, sort.getRequestedParallelism());

        // the second job estimates reducers
        Util.assertParallelValues(-1, -1, reducer, reducer, jobControl.getWaitingJobs().get(0).getJobConf());

        // use the PARALLEL key word, it will override the estimated reducer number
        query = "a = load '/passwd';" + "b = order a by $0 PARALLEL 2;" +
                "store b into 'output';";
        pp = Util.buildPp(ps, query);

        mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);

        assertEquals(2, mrPlan.size());

        sort = mrPlan.getLeaves().get(0);
        assertEquals(2, sort.getRequestedParallelism());

        // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase
        query = "a = load 'hbase://passwd' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');" +
                "b = order a by $0 ;" +
                "store b into 'output';";
        pp = Util.buildPp(ps, query);

        mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
        assertEquals(2, mrPlan.size());

        sort = mrPlan.getLeaves().get(0);

        // the requested parallel will be -1 if users don't set any of default_parallel, paralllel
        // and the estimation doesn't take effect. MR framework will finally set it to 1.
        assertEquals(-1, sort.getRequestedParallelism());

        // test order by with three jobs (after optimization)
        query = "a = load '/passwd';" +
                "b = foreach a generate $0, $1, $2;" +
                "c = order b by $0;" +
                "store c into 'output';";
        pp = Util.buildPp(ps, query);

        mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
        assertEquals(3, mrPlan.size());

        // Simulate the first 2 jobs having run so estimation kicks in.
        sort = mrPlan.getLeaves().get(0);
        FileLocalizer.create(sort.getQuantFile(), pc);

        jobControl = jcc.compile(mrPlan, query);
        Util.copyFromLocalToCluster(cluster, "test/org/apache/pig/test/data/passwd", ((POLoad) sort.mapPlan.getRoots().get(0)).getLFile().getFileName());

        //First job is just foreach with projection, mapper-only job, so estimate gets ignored
        Util.assertParallelValues(-1, -1, reducer, 0, jobControl.getWaitingJobs().get(0).getJobConf());

        jcc.updateMROpPlan(jobControl.getReadyJobs());
        jobControl = jcc.compile(mrPlan, query);
        jcc.updateMROpPlan(jobControl.getReadyJobs());

        //Second job is a sampler, which requests and gets 1 reducer
        Util.assertParallelValues(-1, 1, -1, 1, jobControl.getWaitingJobs().get(0).getJobConf());

        jobControl = jcc.compile(mrPlan, query);
        sort = mrPlan.getLeaves().get(0);
        assertEquals(reducer, sort.getRequestedParallelism());

        //Third job is the order, which uses the estimated number of reducers
        Util.assertParallelValues(-1, -1, reducer, reducer, jobControl.getWaitingJobs().get(0).getJobConf());
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.