Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore


        LogicalPlanTester planTester = new LogicalPlanTester(pc) ;
        Util.copyFromLocalToCluster(cluster, "test/org/apache/pig/test/data/passwd", "/passwd");
        planTester.buildPlan("a = load '/passwd';");
        LogicalPlan lp = planTester.buildPlan("b = group a by $0;");
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        POStore store = GenPhyOp.dummyPigStorageOp();
        pp.addAsLeaf(store);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
              
        pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
        pc.getConf().setProperty("pig.exec.reducers.max", "10");
View Full Code Here


        LogicalPlanTester planTester = new LogicalPlanTester(pc) ;
        Util.copyFromLocalToCluster(cluster, "test/org/apache/pig/test/data/passwd", "/passwd");
        planTester.buildPlan("a = load '/passwd';");
        LogicalPlan lp = planTester.buildPlan("b = order a by $0;");
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        POStore store = GenPhyOp.dummyPigStorageOp();
        pp.addAsLeaf(store);

        MROperPlan mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
        assertEquals(2, mrPlan.size());    
       
View Full Code Here

    }
   
    @Override
    public void visit(LOStore loStore) throws VisitorException {
        String scope = loStore.getOperatorKey().scope;
        POStore store = new POStore(new OperatorKey(scope, nodeGen
                .getNextNodeId(scope)));
        store.setSFile(loStore.getOutputFile());
        store.setInputSpec(loStore.getInputSpec());
        try {
            // create a new schema for ourselves so that when
            // we serialize we are not serializing objects that
            // contain the schema - apparently Java tries to
            // serialize the object containing the schema if
            // we are trying to serialize the schema reference in
            // the containing object. The schema here will be serialized
            // in JobControlCompiler
            store.setSchema(new Schema(loStore.getSchema()));
        } catch (FrontendException e1) {
            int errorCode = 1060;
            String message = "Cannot resolve Store output schema"
            throw new VisitorException(message, errorCode, PigException.BUG, e1);   
        }
View Full Code Here

        LogicalPlanTester planTester = new LogicalPlanTester() ;
        planTester.buildPlan("a = load 'input';");
        LogicalPlan lp = planTester.buildPlan("b = group a by 1;");
       
        PhysicalPlan pp = Util.getNewOptimizedPhysicalPlan(lp, pc);
        POStore store = GenPhyOp.dummyPigStorageOp();
        pp.addAsLeaf(store);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

        HExecutionEngine exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
View Full Code Here

        LogicalPlanTester planTester = new LogicalPlanTester() ;
        planTester.buildPlan("a = load 'input';");
        LogicalPlan lp = planTester.buildPlan("b = group a by $0;");
       
        PhysicalPlan pp = Util.getNewOptimizedPhysicalPlan(lp, pc);
        POStore store = GenPhyOp.dummyPigStorageOp();
        pp.addAsLeaf(store);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

        HExecutionEngine exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
View Full Code Here

            nwJob.setOutputFormatClass(PigOutputFormat.class);
           
            if (mapStores.size() + reduceStores.size() == 1) { // single store case
                log.info("Setting up single store job");
               
                POStore st;
                if (reduceStores.isEmpty()) {
                    st = mapStores.get(0);
                    mro.mapPlan.remove(st);
                }
                else {
                    st = reduceStores.get(0);
                    mro.reducePlan.remove(st);
                }

                // set out filespecs
                String outputPath = st.getSFile().getFileName();
                FuncSpec outputFuncSpec = st.getSFile().getFuncSpec();
               
                conf.set("pig.streaming.log.dir",
                            new Path(outputPath, LOG_DIR).toString());
                conf.set("pig.streaming.task.output.dir", outputPath);
            }
           else { // multi store case
                log.info("Setting up multi store job");
                String tmpLocationStr =  FileLocalizer
                .getTemporaryPath(pigContext).toString();
                tmpLocation = new Path(tmpLocationStr);

                nwJob.setOutputFormatClass(PigOutputFormat.class);
               
                for (POStore sto: storeLocations) {
                    sto.setMultiStore(true);
                }
                conf.set("pig.streaming.log.dir",
                            new Path(tmpLocation, LOG_DIR).toString());
                conf.set("pig.streaming.task.output.dir", tmpLocation.toString());
           }

            // store map key type
            // this is needed when the key is null to create
            // an appropriate NullableXXXWritable object
            conf.set("pig.map.keytype", ObjectSerializer.serialize(new byte[] { mro.mapKeyType }));

            // set parent plan in all operators in map and reduce plans
            // currently the parent plan is really used only when POStream is present in the plan
            new PhyPlanSetter(mro.mapPlan).visit();
            new PhyPlanSetter(mro.reducePlan).visit();
           
            // this call modifies the ReplFiles names of POFRJoin operators
            // within the MR plans, must be called before the plans are
            // serialized
            setupDistributedCacheForJoin(mro, pigContext, conf);

            POPackage pack = null;
            if(mro.reducePlan.isEmpty()){
                //MapOnly Job
                nwJob.setMapperClass(PigMapOnly.Map.class);
                nwJob.setNumReduceTasks(0);
                conf.set("pig.mapPlan", ObjectSerializer.serialize(mro.mapPlan));
                if(mro.isEndOfAllInputSetInMap()) {
                    // this is used in Map.close() to decide whether the
                    // pipeline needs to be rerun one more time in the close()
                    // The pipeline is rerun if there either was a stream or POMergeJoin
                    conf.set(END_OF_INP_IN_MAP, "true");
                }
            }
            else{
                //Map Reduce Job
                //Process the POPackage operator and remove it from the reduce plan
                if(!mro.combinePlan.isEmpty()){
                    POPackage combPack = (POPackage)mro.combinePlan.getRoots().get(0);
                    mro.combinePlan.remove(combPack);
                    nwJob.setCombinerClass(PigCombiner.Combine.class);
                    conf.set("pig.combinePlan", ObjectSerializer.serialize(mro.combinePlan));
                    conf.set("pig.combine.package", ObjectSerializer.serialize(combPack));
                } else if (mro.needsDistinctCombiner()) {
                    nwJob.setCombinerClass(DistinctCombiner.Combine.class);
                    log.info("Setting identity combiner class.");
                }
                pack = (POPackage)mro.reducePlan.getRoots().get(0);
                mro.reducePlan.remove(pack);
                nwJob.setMapperClass(PigMapReduce.Map.class);
                nwJob.setReducerClass(PigMapReduce.Reduce.class);
               
                // first check the PARALLE in query, then check the defaultParallel in PigContext, and last do estimation
                if (mro.requestedParallelism > 0)
                    nwJob.setNumReduceTasks(mro.requestedParallelism);
    else if (pigContext.defaultParallel > 0)
                    conf.set("mapred.reduce.tasks", ""+pigContext.defaultParallel);
                else
                    estimateNumberOfReducers(conf,lds);
               
                if (mro.customPartitioner != null)
                  nwJob.setPartitionerClass(PigContext.resolveClassName(mro.customPartitioner));

                conf.set("pig.mapPlan", ObjectSerializer.serialize(mro.mapPlan));
                if(mro.isEndOfAllInputSetInMap()) {
                    // this is used in Map.close() to decide whether the
                    // pipeline needs to be rerun one more time in the close()
                    // The pipeline is rerun only if there was a stream or merge-join.
                    conf.set(END_OF_INP_IN_MAP, "true");
                }
                conf.set("pig.reducePlan", ObjectSerializer.serialize(mro.reducePlan));
                if(mro.isEndOfAllInputSetInReduce()) {
                    // this is used in Map.close() to decide whether the
                    // pipeline needs to be rerun one more time in the close()
                    // The pipeline is rerun only if there was a stream
                    conf.set("pig.stream.in.reduce", "true");
                }
                conf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
                conf.set("pig.reduce.key.type", Byte.toString(pack.getKeyType()));
               
                if (mro.getUseSecondaryKey()) {
                    nwJob.setGroupingComparatorClass(PigSecondaryKeyGroupComparator.class);
                    nwJob.setPartitionerClass(SecondaryKeyPartitioner.class);
                    nwJob.setSortComparatorClass(PigSecondaryKeyComparator.class);
                    nwJob.setOutputKeyClass(NullableTuple.class);
                    conf.set("pig.secondarySortOrder",
                            ObjectSerializer.serialize(mro.getSecondarySortOrder()));

                }
                else
                {
                    Class<? extends WritableComparable> keyClass = HDataType.getWritableComparableTypes(pack.getKeyType()).getClass();
                    nwJob.setOutputKeyClass(keyClass);
                    selectComparator(mro, pack.getKeyType(), nwJob);
                }
                nwJob.setOutputValueClass(NullableTuple.class);
            }
       
            if(mro.isGlobalSort() || mro.isLimitAfterSort()){
                // Only set the quantiles file and sort partitioner if we're a
                // global sort, not for limit after sort.
                if (mro.isGlobalSort()) {
                    String symlink = addSingleFileToDistributedCache(
                            pigContext, conf, mro.getQuantFile(), "pigsample");
                    conf.set("pig.quantilesFile", symlink);
                    nwJob.setPartitionerClass(WeightedRangePartitioner.class);
                }
               
                if (mro.isUDFComparatorUsed) { 
                    boolean usercomparator = false;
                    for (String compFuncSpec : mro.UDFs) {
                        Class comparator = PigContext.resolveClassName(compFuncSpec);
                        if(ComparisonFunc.class.isAssignableFrom(comparator)) {
                            nwJob.setMapperClass(PigMapReduce.MapWithComparator.class);
                            nwJob.setReducerClass(PigMapReduce.ReduceWithComparator.class);
                            conf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
                            conf.set("pig.usercomparator", "true");
                            nwJob.setOutputKeyClass(NullableTuple.class);
                            nwJob.setSortComparatorClass(comparator);
                            usercomparator = true;
                            break;
                        }
                    }
                    if (!usercomparator) {
                        String msg = "Internal error. Can't find the UDF comparator";
                        throw new IOException (msg);
                    }
                   
                } else {
                    conf.set("pig.sortOrder",
                        ObjectSerializer.serialize(mro.getSortOrder()));
                }
            }
           
            if (mro.isSkewedJoin()) {
                String symlink = addSingleFileToDistributedCache(pigContext,
                        conf, mro.getSkewedJoinPartitionFile(), "pigdistkey");
                conf.set("pig.keyDistFile", symlink);
                nwJob.setPartitionerClass(SkewedPartitioner.class);
                nwJob.setMapperClass(PigMapReduce.MapWithPartitionIndex.class);
                nwJob.setMapOutputKeyClass(NullablePartitionWritable.class);
                nwJob.setGroupingComparatorClass(PigGroupingPartitionWritableComparator.class);
            }
           
            // unset inputs for POStore, otherwise, map/reduce plan will be unnecessarily deserialized
            for (POStore st: mapStores) { st.setInputs(null); st.setParentPlan(null);}
            for (POStore st: reduceStores) { st.setInputs(null); st.setParentPlan(null);}

            // tmp file compression setups
            if (Utils.tmpFileCompression(pigContext)) {
                conf.setBoolean("pig.tmpfilecompression", true);
                conf.set("pig.tmpfilecompression.codec", Utils.tmpFileCompressionCodec(pigContext));
View Full Code Here

                    fixProjectionAfterLimit(limitAdjustMROp, mr);
                    limitAdjustMROp.setLimitAfterSort(true);
                    limitAdjustMROp.setSortOrder(mr.getSortOrder());
                }
               
                POStore st = getStore();
                st.setSFile(oldSpec);
                st.setIsTmpStore(oldIsTmpStore);
                limitAdjustMROp.reducePlan.addAsLeaf(st);
                limitAdjustMROp.requestedParallelism = 1;
                limitAdjustMROp.setLimitOnly(true);
               
                List<MapReduceOper> successorList = MRPlan.getSuccessors(mr);
View Full Code Here

        JobContext jc = new JobContext(conf, new JobID());
        StoreFuncInterface sfunc = (StoreFuncInterface)PigContext.instantiateFuncFromSpec(
                storeFuncSpec);
        OutputFormat<?,?> of = sfunc.getOutputFormat();
       
        POStore store = new POStore(new OperatorKey());
        store.setSFile(new FileSpec(file, storeFuncSpec));
        PigOutputFormat.setLocation(jc, store);
        OutputCommitter oc;
        // create a simulated TaskAttemptContext
        TaskAttemptContext tac = new TaskAttemptContext(conf, new TaskAttemptID());
        PigOutputFormat.setLocation(tac, store);
View Full Code Here

    public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext taskattemptcontext)
                throws IOException, InterruptedException {
        setupUdfEnvAndStores(taskattemptcontext);
        if(mapStores.size() + reduceStores.size() == 1) {
            // single store case
            POStore store;
            if(mapStores.size() == 1) {
                store = mapStores.get(0);
            } else {
                store = reduceStores.get(0);
            }
            StoreFuncInterface sFunc = store.getStoreFunc();
            // set output location
            PigOutputFormat.setLocation(taskattemptcontext, store);
            // The above call should have update the conf in the JobContext
            // to have the output location - now call checkOutputSpecs()
            RecordWriter writer = sFunc.getOutputFormat().getRecordWriter(
View Full Code Here

                        FileSpec newSpec = getTempFileSpec();
                       
                        // replace oldSpec in mro with newSpec
                        new FindStoreNameVisitor(pl, newSpec, oldSpec).visit();
                       
                        POStore newSto = getStore();
                        newSto.setSFile(oldSpec);
                        if (MRPlan.getPredecessors(mrOp)!=null &&
                                MRPlan.getPredecessors(mrOp).contains(mro))
                            MRPlan.disconnect(mro, mrOp);
                        MapReduceOper catMROp = getConcatenateJob(newSpec, mro, newSto);
                        MRPlan.connect(catMROp, mrOp);  
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.