Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad


        if ((System.getProperty("os.name").toUpperCase().startsWith("WINDOWS")))
            inpDir="/"+FileLocalizer.parseCygPath(inpDir, FileLocalizer.STYLE_WINDOWS);
        // copy passwd file to cluster and set that as the input location for the load
        Util.copyFromLocalToCluster(cluster, inpDir + "passwd", "passwd");
        FileSpec inpFSpec = new FileSpec("passwd", new FuncSpec(PigStorage.class.getName(), new String[]{":"}));
        POLoad ld = GenPhyOp.topLoadOp();
        ld.setLFile(inpFSpec);
        ld.setPc(pc);
       
        DataBag inpDB = DefaultBagFactory.getInstance().newDefaultBag();
        BufferedReader br = new BufferedReader(new FileReader("test/org/apache/pig/test/data/InputFiles/passwd"));
       
        for(String line = br.readLine();line!=null;line=br.readLine()){
            String[] flds = line.split(":",-1);
            Tuple t = new DefaultTuple();
            for (String fld : flds) {
                t.append((fld.compareTo("")!=0 ? new DataByteArray(fld.getBytes()) : null));
            }
            inpDB.add(t);
        }
        Tuple t=null;
        int size = 0;
        for(Result res = ld.getNext(t);res.returnStatus!=POStatus.STATUS_EOP;res=ld.getNext(t)){
            assertEquals(true, TestHelper.bagContains(inpDB, (Tuple)res.result));
            ++size;
        }
        assertEquals(true, size==inpDB.size());
    }
View Full Code Here


    private MapReduceOper getMROp(){
        return new MapReduceOper(new OperatorKey(scope,nig.getNextNodeId(scope)));
    }
   
    private POLoad getLoad(){
        POLoad ld = new POLoad(new OperatorKey(scope,nig.getNextNodeId(scope)));
        ld.setPc(pigContext);
        return ld;
    }
View Full Code Here

            leaf = leaves.get(0);

        for (MapReduceOper mmro : mergedPlans) {
            mmro.setReduceDone(true);
            FileSpec fileSpec = getTempFileSpec();
            POLoad ld = getLoad();
            ld.setLFile(fileSpec);
            POStore str = getStore();
            str.setSFile(fileSpec);
            mmro.reducePlan.addAsLeaf(str);
            mro.mapPlan.add(ld);
            if(leaf!=null)
View Full Code Here

     * @return
     * @throws IOException
     * @throws PlanException
     */
    private MapReduceOper startNew(FileSpec fSpec, MapReduceOper old) throws PlanException{
        POLoad ld = getLoad();
        ld.setLFile(fSpec);
        MapReduceOper ret = getMROp();
        ret.mapPlan.add(ld);
        MRPlan.add(ret);
        MRPlan.connect(old, ret);
        return ret;
View Full Code Here

                int errCode = 2172;
                String errMsg = "Expected physical operator at root to be POLoad. Found : "+phyOp.getClass().getCanonicalName();
                throw new MRCompilerException(errMsg,errCode,PigException.BUG);
            }
           
            POLoad loader = (POLoad)phyOp;
            LoadFunc loadFunc = (LoadFunc) PigContext.instantiateFuncFromSpec(loader.getLFile().getFuncSpec());
            try {
                if(!(loadFunc instanceof CollectableLoadFunc)){
                    throw new MRCompilerException("While using 'collected' on group; data must be loaded via loader implementing CollectableLoadFunc.");
                }
                loadFunc.setUDFContextSignature(loader.getSignature());
                ((CollectableLoadFunc)loadFunc).ensureAllKeyInstancesInSameSplit();
            } catch (MRCompilerException e){
                throw (e);
            } catch (IOException e) {
                int errCode = 2034;
View Full Code Here

           
        }
       
        private void setUpHashTable() throws IOException {
            FileSpec replFile = new FileSpec(repl,new FuncSpec(PigStorage.class.getName()+"()"));
            POLoad ld = new POLoad(new OperatorKey("Repl File Loader", 1L), replFile);
            PigContext pc = new PigContext(ExecType.MAPREDUCE,ConfigurationUtil.toProperties(PigMapReduce.sJobConf));
            try {
                pc.connect();
           
                ld.setPc(pc);
                Tuple dummyTuple = null;
                for(Result res=ld.getNext(dummyTuple);res.returnStatus!=POStatus.STATUS_EOP;res=ld.getNext(dummyTuple)){
                    Tuple tup = (Tuple) res.result;
                    LoadFunc lf = ((LoadFunc)pc.instantiateFuncFromSpec(ld.getLFile().getFuncSpec()));
                    String key = lf.getLoadCaster().bytesToCharArray(((DataByteArray)tup.get(keyField)).get());
                    Tuple csttup = TupleFactory.getInstance().newTuple(2);
                    csttup.set(0, key);
                    csttup.set(1, lf.getLoadCaster().bytesToInteger(((DataByteArray)tup.get(1)).get()));
                    DataBag vals = null;
View Full Code Here

    public void visitLOLoad(LOLoad loLoad) throws IOException {
        String scope = DEFAULT_SCOPE;
//        System.err.println("Entering Load");
        // The last parameter here is set to true as we assume all files are
        // splittable due to LoadStore Refactor
        POLoad load = new POLoad(new OperatorKey(scope, nodeGen
                .getNextNodeId(scope)));
        load.setAlias(loLoad.getAlias());
        load.setLFile(loLoad.getFileSpec());
        load.setPc(pc);
        load.setResultType(DataType.BAG);
        load.setSignature(loLoad.getAlias());
        currentPlan.add(load);
        logToPhyMap.put(loLoad, load);

        // Load is typically a root operator, but in the multiquery
        // case it might have a store as a predecessor.
View Full Code Here

        // the keys are sent in a tuple. If there is really only
        // 1 join key, it would be the first field of the tuple. If
        // there are multiple Join keys, the tuple itself represents
        // the join key
        Object firstLeftKey = (keys.size() == 1 ? keys.get(0): keys);
        POLoad ld = new POLoad(genKey(), new FileSpec(indexFile, new FuncSpec(indexFileLoadFuncSpec)));
               
        Properties props = new Properties();                                         
        props.setProperty(MapRedUtil.FILE_SYSTEM_NAME, "file:///");
        PigContext pc = new PigContext(ExecType.LOCAL, props);
        ld.setPc(pc);
        index = new LinkedList<Tuple>();
        for(Result res=ld.getNext(dummyTuple);res.returnStatus!=POStatus.STATUS_EOP;res=ld.getNext(dummyTuple))
            index.offer((Tuple) res.result);  

       
        Tuple prevIdxEntry = null;
        Tuple matchedEntry;
View Full Code Here

           
            joinOp.setupRightPipeline(rightPipelinePlan);
      rightMROpr.requestedParallelism = 1; // we need exactly one reducer for indexing job.       
           
            // At this point, we must be operating on map plan of right input and it would contain nothing else other then a POLoad.
            POLoad rightLoader = (POLoad)rightMROpr.mapPlan.getRoots().get(0);           
            LoadFunc rightLoadFunc = (LoadFunc) PigContext.instantiateFuncFromSpec(rightLoader.getLFile().getFuncSpec());
            joinOp.setSignature(rightLoader.getSignature());
            if(rightLoadFunc instanceof IndexableLoadFunc) {
                joinOp.setRightLoaderFuncSpec(rightLoader.getLFile().getFuncSpec());
                joinOp.setRightInputFileName(rightLoader.getLFile().getFileName());
               
                // we don't need the right MROper since
                // the right loader is an IndexableLoadFunc which can handle the index
                // itself
                MRPlan.remove(rightMROpr);
                if(rightMROpr == compiledInputs[0]) {
                    compiledInputs[0] = null;
                } else if(rightMROpr == compiledInputs[1]) {
                    compiledInputs[1] = null;
                }
                rightMROpr = null;
               
                // validate that the join keys in merge join are only                                                                                                                                                                             
                // simple column projections or '*' and not expression - expressions                                                                                                                                                              
                // cannot be handled when the index is built by the storage layer on the sorted                                                                                                                                                   
                // data when the sorted data (and corresponding index) is written.                                                                                                                                                                
                // So merge join will be restricted not have expressions as                                                                                                                                                                       
                // join keys     
                int numInputs = mPlan.getPredecessors(joinOp).size(); // should be 2
                for(int i = 0; i < numInputs; i++) {
                    List<PhysicalPlan> keyPlans = joinOp.getInnerPlansOf(i);
                    for (PhysicalPlan keyPlan : keyPlans) {
                        for(PhysicalOperator op : keyPlan) {
                            if(!(op instanceof POProject)) {
                                int errCode = 1106;
                                String errMsg = "Merge join is possible only for simple column or '*' join keys when using " +
                                rightLoader.getLFile().getFuncSpec() + " as the loader";
                                throw new MRCompilerException(errMsg, errCode, PigException.INPUT);
                            }
                        }
                    }
                }
            } else {
                // Replace POLoad with  indexer.
                String[] indexerArgs = new String[3];
                FileSpec origRightLoaderFileSpec = rightLoader.getLFile();
                indexerArgs[0] = origRightLoaderFileSpec.getFuncSpec().toString();
                if (! (PigContext.instantiateFuncFromSpec(indexerArgs[0]) instanceof OrderedLoadFunc)){
                    int errCode = 1104;
                    String errMsg = "Right input of merge-join must implement " +
                    "OrderedLoadFunc interface. The specified loader "
                    + indexerArgs[0] + " doesn't implement it";
                    throw new MRCompilerException(errMsg,errCode);
                }
                List<PhysicalPlan> rightInpPlans = joinOp.getInnerPlansOf(1);
                indexerArgs[1] = ObjectSerializer.serialize((Serializable)rightInpPlans);
                indexerArgs[2] = ObjectSerializer.serialize(rightPipelinePlan);
                FileSpec lFile = new FileSpec(rightLoader.getLFile().getFileName(),new FuncSpec(MergeJoinIndexer.class.getName(), indexerArgs));
                rightLoader.setLFile(lFile);
   
                // Loader of mro will return a tuple of form -
                // (keyFirst1, keyFirst2, .. , position, splitIndex) See MergeJoinIndexer
                // Now set up a POLocalRearrange which has "all" as the key and tuple fetched
                // by loader as the "value" of POLocalRearrange
View Full Code Here

                FileSpec fSpec = getTempFileSpec();
                ((POStore)mpLeaf).setSFile(fSpec);
                ((POStore)mpLeaf).setIsTmpStore(true);
                mr.setReduceDone(true);
                MapReduceOper limitAdjustMROp = getMROp();
                POLoad ld = getLoad();
                ld.setLFile(fSpec);
                limitAdjustMROp.mapPlan.add(ld);
                POLimit pLimit = new POLimit(new OperatorKey(scope,nig.getNextNodeId(scope)));
                pLimit.setLimit(mr.limit);
                limitAdjustMROp.mapPlan.addAsLeaf(pLimit);
                if (mr.isGlobalSort()) {
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.