Package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer

Examples of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler


    public void checkGroupNonConstWithParallelResult(PhysicalPlan pp, PigContext pc) throws Exception {
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
       
        ConfigurationValidator.validatePigProperties(pc.getProperties());
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        JobControl jobControl = jcc.compile(mrPlan, "Test");
        Job job = jobControl.getWaitingJobs().get(0);
        int parallel = job.getJobConf().getNumReduceTasks();
       
        assertEquals("parallism", 100, parallel);
    }
View Full Code Here


                ".tar.gz", ".tar");
    }

    private JobConf compileTestJob(final PigContext pigContext, Configuration conf)
            throws JobCreationException {
        final JobControlCompiler jobControlCompiler = new JobControlCompiler(
                pigContext, conf);

        final MROperPlan plan = new MROperPlan();
        plan.add(new MapReduceOper(new OperatorKey()));

        final JobControl jobControl = jobControlCompiler.compile(plan, "test");
        final JobConf jobConf = jobControl.getWaitingJobs().get(0).getJobConf();
        return jobConf;
    }
View Full Code Here

            }
        }
       
        ConfigurationValidator.validatePigProperties(pc.getProperties());
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
        try {
            jcc.compile(mrPlan, "Test");
        } catch (JobCreationException jce) {
            assertTrue(jce.getErrorCode() == 1068);
        }
    }
View Full Code Here

        PhysicalPlan pp = Util.buildPp(ps, query);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

        ConfigurationValidator.validatePigProperties(pc.getProperties());
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);

        JobControl jobControl = jcc.compile(mrPlan, "Test");
        Job job = jobControl.getWaitingJobs().get(0);
        int parallel = job.getJobConf().getNumReduceTasks();

        assertEquals(100, parallel);
        Util.assertParallelValues(100, -1, -1, 100, job.getJobConf());
View Full Code Here

        pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
        pc.getConf().setProperty("pig.exec.reducers.max", "10");
        pc.getConf().setProperty(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort));
        ConfigurationValidator.validatePigProperties(pc.getProperties());
        conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
        JobControl jc=jcc.compile(mrPlan, "Test");
        Job job = jc.getWaitingJobs().get(0);
        long reducer=Math.min((long)Math.ceil(new File("test/org/apache/pig/test/data/passwd").length()/100.0), 10);

        Util.assertParallelValues(-1, -1, reducer, reducer, job.getJobConf());

        // use the PARALLEL key word, it will override the estimated reducer number
        query = "a = load '/passwd';" +
                "b = group a by $0 PARALLEL 2;" +
                "store b into 'output';";
        pp = Util.buildPp(ps, query);
        mrPlan = Util.buildMRPlan(pp, pc);

        pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
        pc.getConf().setProperty("pig.exec.reducers.max", "10");
        ConfigurationValidator.validatePigProperties(pc.getProperties());
        conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        jcc = new JobControlCompiler(pc, conf);
        jc=jcc.compile(mrPlan, "Test");
        job = jc.getWaitingJobs().get(0);

        Util.assertParallelValues(-1, 2, -1, 2, job.getJobConf());

        final byte[] COLUMNFAMILY = Bytes.toBytes("pig");
        util.createTable(Bytes.toBytesBinary("test_table"), COLUMNFAMILY);

        // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase
        query = "a = load 'hbase://test_table' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');" +
                "b = group a by $0 ;" +
                "store b into 'output';";
        pp = Util.buildPp(ps, query);
        mrPlan = Util.buildMRPlan(pp, pc);

        pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
        pc.getConf().setProperty("pig.exec.reducers.max", "10");

        ConfigurationValidator.validatePigProperties(pc.getProperties());
        conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        jcc = new JobControlCompiler(pc, conf);
        jc=jcc.compile(mrPlan, "Test");
        job = jc.getWaitingJobs().get(0);

        Util.assertParallelValues(-1, -1, -1, 1, job.getJobConf());

        util.deleteTable(Bytes.toBytesBinary("test_table"));
View Full Code Here

        PigServer ps = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
        PhysicalPlan pp = Util.buildPp(ps, query);

        MROperPlan mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
        JobControl jobControl = jcc.compile(mrPlan, query);

        assertEquals(2, mrPlan.size());

        // first job uses a single reducer for the sampling
        Util.assertParallelValues(-1, 1, -1, 1, jobControl.getWaitingJobs().get(0).getJobConf());

        // Simulate the first job having run so estimation kicks in.
        MapReduceOper sort = mrPlan.getLeaves().get(0);
        jcc.updateMROpPlan(jobControl.getReadyJobs());
        FileLocalizer.create(sort.getQuantFile(), pc);
        jobControl = jcc.compile(mrPlan, query);

        sort = mrPlan.getLeaves().get(0);
        long reducer=Math.min((long)Math.ceil(new File("test/org/apache/pig/test/data/passwd").length()/100.0), 10);
        assertEquals(reducer, sort.getRequestedParallelism());

        // the second job estimates reducers
        Util.assertParallelValues(-1, -1, reducer, reducer, jobControl.getWaitingJobs().get(0).getJobConf());

        // use the PARALLEL key word, it will override the estimated reducer number
        query = "a = load '/passwd';" + "b = order a by $0 PARALLEL 2;" +
                "store b into 'output';";
        pp = Util.buildPp(ps, query);

        mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);

        assertEquals(2, mrPlan.size());

        sort = mrPlan.getLeaves().get(0);
        assertEquals(2, sort.getRequestedParallelism());

        // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase
        query = "a = load 'hbase://passwd' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');" +
                "b = order a by $0 ;" +
                "store b into 'output';";
        pp = Util.buildPp(ps, query);

        mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
        assertEquals(2, mrPlan.size());

        sort = mrPlan.getLeaves().get(0);

        // the requested parallel will be -1 if users don't set any of default_parallel, paralllel
        // and the estimation doesn't take effect. MR framework will finally set it to 1.
        assertEquals(-1, sort.getRequestedParallelism());

        // test order by with three jobs (after optimization)
        query = "a = load '/passwd';" +
                "b = foreach a generate $0, $1, $2;" +
                "c = order b by $0;" +
                "store c into 'output';";
        pp = Util.buildPp(ps, query);

        mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
        assertEquals(3, mrPlan.size());

        // Simulate the first 2 jobs having run so estimation kicks in.
        sort = mrPlan.getLeaves().get(0);
        FileLocalizer.create(sort.getQuantFile(), pc);

        jobControl = jcc.compile(mrPlan, query);
        Util.copyFromLocalToCluster(cluster, "test/org/apache/pig/test/data/passwd", ((POLoad) sort.mapPlan.getRoots().get(0)).getLFile().getFileName());

        //First job is just foreach with projection, mapper-only job, so estimate gets ignored
        Util.assertParallelValues(-1, -1, -1, 0, jobControl.getWaitingJobs().get(0).getJobConf());

        jcc.updateMROpPlan(jobControl.getReadyJobs());
        jobControl = jcc.compile(mrPlan, query);
        jcc.updateMROpPlan(jobControl.getReadyJobs());

        //Second job is a sampler, which requests and gets 1 reducer
        Util.assertParallelValues(-1, 1, -1, 1, jobControl.getWaitingJobs().get(0).getJobConf());

        jobControl = jcc.compile(mrPlan, query);
        sort = mrPlan.getLeaves().get(0);
        assertEquals(reducer, sort.getRequestedParallelism());

        //Third job is the order, which uses the estimated number of reducers
        Util.assertParallelValues(-1, -1, reducer, reducer, jobControl.getWaitingJobs().get(0).getJobConf());
View Full Code Here

      }
     
        ExecutionEngine exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        Configuration conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
        try {
          jcc.compile(mrPlan, "Test");
        } catch (JobCreationException jce) {
            assertTrue(jce.getErrorCode() == 1068);
        }
    }
View Full Code Here

        MROperPlan mrp = launcher.compile(php, pc);
               
        ConfigurationValidator.validatePigProperties(pc.getProperties());
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
       
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        JobControl jc;
        int numMRJobsCompl = 0;
        DataBag input;
        List<Pair<PigNullableWritable, Writable>> intermediateData = new ArrayList<Pair<PigNullableWritable, Writable>>();

        Map<Job, MapReduceOper> jobToMroMap = jcc.getJobMroMap();
        HashMap<String, DataBag> output = new HashMap<String, DataBag>();
        Configuration jobConf;
        // jc is null only when mrp.size == 0
        boolean needFileInput;
        final ArrayList<OperatorKey> emptyInpTargets = new ArrayList<OperatorKey>();
        while(mrp.size() != 0) {
            jc = jcc.compile(mrp, "Illustrator");
            if(jc == null) {
                throw new ExecException("Native execution is not supported");
            }
            List<Job> jobs = jc.getWaitingJobs();
            for (Job job : jobs) {
                jobConf = job.getJobConf();
                FileLocalizer.setInitialized(false);
                ArrayList<ArrayList<OperatorKey>> inpTargets =
                    (ArrayList<ArrayList<OperatorKey>>)
                      ObjectSerializer.deserialize(jobConf.get("pig.inpTargets"));
                intermediateData.clear();
                MapReduceOper mro = jobToMroMap.get(job);
                PigSplit split = null;
                List<POStore> stores = null;
                PhysicalOperator pack = null;
                // revisit as there are new physical operators from MR compilation
                if (!mro.mapPlan.isEmpty())
                    attacher.revisit(mro.mapPlan);
                if (!mro.reducePlan.isEmpty()) {
                    attacher.revisit(mro.reducePlan);
                    pack = mro.reducePlan.getRoots().get(0);
                }
               
                List<POLoad> lds = PlanHelper.getLoads(mro.mapPlan);
                if (!mro.mapPlan.isEmpty()) {
                    stores = PlanHelper.getStores(mro.mapPlan);
                }
                if (!mro.reducePlan.isEmpty()) {
                    if (stores == null)
                        stores = PlanHelper.getStores(mro.reducePlan);
                    else
                        stores.addAll(PlanHelper.getStores(mro.reducePlan));
                }

                for (POStore store : stores) {
                    output.put(store.getSFile().getFileName(), attacher.getDataMap().get(store));
                }
              
                OutputAttacher oa = new OutputAttacher(mro.mapPlan, output);
                oa.visit();
               
                if (!mro.reducePlan.isEmpty()) {
                    oa = new OutputAttacher(mro.reducePlan, output);
                    oa.visit();
                }
                int index = 0;
                for (POLoad ld : lds) {
                    input = output.get(ld.getLFile().getFileName());
                    if (input == null && baseData != null) {
                        for (LogicalRelationalOperator lo : baseData.keySet()) {
                            if (((LOLoad) lo).getSchemaFile().equals(ld.getLFile().getFileName()))
                            {
                                 input = baseData.get(lo);
                                 break;
                            }
                        }
                    }
                    if (input != null)
                        mro.mapPlan.remove(ld);
                }
                for (POLoad ld : lds) {
                    // check newly generated data first
                    input = output.get(ld.getLFile().getFileName());
                    if (input == null && baseData != null) {
                        if (input == null && baseData != null) {
                            for (LogicalRelationalOperator lo : baseData.keySet()) {
                                if (((LOLoad) lo).getSchemaFile().equals(ld.getLFile().getFileName()))
                                {
                                     input = baseData.get(lo);
                                     break;
                                }
                            }
                        }
                    }
                    needFileInput = (input == null);
                    split = new PigSplit(null, index, needFileInput ? emptyInpTargets : inpTargets.get(index), 0);
                    ++index;
                    Mapper<Text, Tuple, PigNullableWritable, Writable> map;
                   
                    if (mro.reducePlan.isEmpty()) {
                        // map-only
                        map = new PigMapOnly.Map();
                        ((PigMapBase) map).setMapPlan(mro.mapPlan);
                        Mapper<Text, Tuple, PigNullableWritable, Writable>.Context context = ((PigMapOnly.Map) map)
                          .getIllustratorContext(jobConf, input, intermediateData, split);
                        map.run(context);
                    } else {
                        if ("true".equals(jobConf.get("pig.usercomparator")))
                            map = new PigMapReduce.MapWithComparator();
                        else if (!"".equals(jobConf.get("pig.keyDistFile", "")))
                            map = new PigMapReduce.MapWithPartitionIndex();
                        else
                            map = new PigMapReduce.Map();
                        Mapper<Text, Tuple, PigNullableWritable, Writable>.Context context = ((PigMapBase) map)
                          .getIllustratorContext(jobConf, input, intermediateData, split);
                        ((PigMapBase) map).setMapPlan(mro.mapPlan);
                        map.run(context);
                    }
                }
               
                if (!mro.reducePlan.isEmpty())
                {
                    if (pack instanceof POPackage)
                        mro.reducePlan.remove(pack);
                    // reducer run
                    PigMapReduce.Reduce reduce;
                    if ("true".equals(jobConf.get("pig.usercomparator")))
                        reduce = new PigMapReduce.ReduceWithComparator();
                    else
                        reduce = new PigMapReduce.Reduce();
                    reduce.setReducePlan(mro.reducePlan);
                    Reducer<PigNullableWritable, NullableTuple, PigNullableWritable, Writable>.Context
                        context = reduce.getIllustratorContext(job, intermediateData, (POPackage) pack);
                    reduce.run(context);
                }
                for (PhysicalOperator key : mro.phyToMRMap.keySet())
                    for (PhysicalOperator value : mro.phyToMRMap.get(key))
                        phyToMRMap.put(key, value);
            }
           
           
            int removedMROp = jcc.updateMROpPlan(new LinkedList<Job>());
           
            numMRJobsCompl += removedMROp;
        }
               
        jcc.reset();
    }
View Full Code Here

        }
      }
     
        ConfigurationValidator.validatePigProperties(pc.getProperties());
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
        try {
          jcc.compile(mrPlan, "Test");
        } catch (JobCreationException jce) {
            assertTrue(jce.getErrorCode() == 1068);
        }
    }
View Full Code Here

        PhysicalPlan pp = Util.buildPp(ps, query);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

        ConfigurationValidator.validatePigProperties(pc.getProperties());
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        JobControl jobControl = jcc.compile(mrPlan, "Test");
        Job job = jobControl.getWaitingJobs().get(0);
        int parallel = job.getJobConf().getNumReduceTasks();

        assertTrue(parallel==100);
       
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.