Package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer

Examples of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.SortKeyInfo


    }

    // Build sort key structure from POLocalRearrange
    private static SortKeyInfo getSortKeyInfo(POLocalRearrange rearrange) throws ExecException {
        SortKeyInfo result = new SortKeyInfo();
        List<PhysicalPlan> plans = rearrange.getPlans();
        nextPlan: for (int i = 0; i < plans.size(); i++) {
            PhysicalPlan plan = plans.get(i);
            ColumnChainInfo columnChainInfo = new ColumnChainInfo();
            if (plan.getRoots() == null) {
                log.debug("POLocalRearrange plan is null");
                return null;
            } else if (plan.getRoots().size() != 1) {
                // POLocalRearrange plan contains more than 1 root.
                // Probably there is an Expression operator in the local
                // rearrangement plan, skip this plan
                continue nextPlan;
            } else {
                List<Integer> columns = new ArrayList<Integer>();
                columns
                        .add(rearrange.getIndex()
                                & PigNullableWritable.idxSpace);

                // The first item inside columnChainInfo is set to type Tuple.
                // This value is not actually in use, but it intends to match
                // the type of POProject in reduce side
                columnChainInfo.insert(columns, DataType.TUPLE);

                PhysicalOperator node = plan.getRoots().get(0);
                while (node != null) {
                    if (node instanceof POProject) {
                        POProject project = (POProject) node;
                        if(project.isProjectToEnd()){
                            columnChainInfo.insert(project.getStartCol(),
                                    project.getResultType());
                        }else {
                            columnChainInfo.insert(
                                    project.getColumns(), project.getResultType());
                        }

                        if (plan.getSuccessors(node) == null)
                            node = null;
                        else if (plan.getSuccessors(node).size() != 1) {
                            log.debug(node + " have more than 1 successor");
                            node = null;
                        } else
                            node = plan.getSuccessors(node).get(0);
                    } else
                        // constant, UDF, we will pass
                        continue nextPlan;
                }
            }
            // Let's assume all main key is sorted ascendant, we can further
            // optimize it to match one of the nested sort/distinct key, because we do not
            // really care about how cogroup key are sorted; But it may not be the case
            // if sometime we switch all the comparator to byte comparator, so just
            // leave it as it is for now
            result.insertColumnChainInfo(i, columnChainInfo, true);
        }
        return result;
    }
View Full Code Here


    public static SecondaryKeyOptimizerInfo applySecondaryKeySort(PhysicalPlan mapPlan, PhysicalPlan reducePlan) throws VisitorException {
        log.trace("Entering SecondaryKeyOptimizerUtil.addSecondaryKeySort");
        SecondaryKeyOptimizerInfo secKeyOptimizerInfo = new SecondaryKeyOptimizerInfo();
        List<SortKeyInfo> sortKeyInfos = new ArrayList<SortKeyInfo>();
        SortKeyInfo secondarySortKeyInfo = null;
        List<POToChange> sortsToRemove = null;
        List<POToChange> distinctsToChange = null;


        List<PhysicalOperator> mapLeaves = mapPlan.getLeaves();
        if (mapLeaves == null || mapLeaves.size() != 1) {
            log.debug("Expected map to have single leaf! Skip secondary key optimizing");
            return null;
        }
        PhysicalOperator mapLeaf = mapLeaves.get(0);

        // Figure out the main key of the map-reduce job from POLocalRearrange
        try {
            if (mapLeaf instanceof POLocalRearrange) {
                SortKeyInfo sortKeyInfo = getSortKeyInfo((POLocalRearrange) mapLeaf);
                if (sortKeyInfo == null) {
                    log.debug("Cannot get sortKeyInfo from POLocalRearrange, skip secondary key optimizing");
                    return null;
                }
                sortKeyInfos.add(sortKeyInfo);
            } else if (mapLeaf instanceof POUnion) {
                List<PhysicalOperator> preds = mapPlan
                        .getPredecessors(mapLeaf);
                for (PhysicalOperator pred : preds) {
                    if (pred instanceof POLocalRearrange) {
                        SortKeyInfo sortKeyInfo = getSortKeyInfo((POLocalRearrange) pred);
                        if (sortKeyInfo == null) {
                            log.debug("Cannot get sortKeyInfo from POLocalRearrange, skip secondary key optimizing");
                            return null;
                        }
                        sortKeyInfos.add(sortKeyInfo);
View Full Code Here

            }
        }

        // We see PODistinct, check which key it is using
        public boolean processDistinct(PODistinct distinct) throws FrontendException {
            SortKeyInfo keyInfos = new SortKeyInfo();
            try {
                keyInfos.insertColumnChainInfo(0,
                        (ColumnChainInfo) columnChainInfo.clone(), true);
            } catch (CloneNotSupportedException e) { // We implement Clonable,
                                                     // impossible to get here
            }
View Full Code Here

            return false;
        }

        // We see POSort, check which key it is using
        public boolean processSort(POSort sort) throws FrontendException{
            SortKeyInfo keyInfo = new SortKeyInfo();
            for (int i = 0; i < sort.getSortPlans().size(); i++) {
                PhysicalPlan sortPlan = sort.getSortPlans().get(i);
                ColumnChainInfo sortChainInfo = null;
                try {
                    sortChainInfo = (ColumnChainInfo) columnChainInfo.clone();
                } catch (CloneNotSupportedException e) { // We implement
                                                         // Clonable, impossible
                                                         // to get here
                }
                boolean r = false;
                try {
                    r = collectColumnChain(sortPlan, sortChainInfo);
                } catch (PlanException e) {
                    int errorCode = 2206;
                    throw new FrontendException("Error visiting POSort inner plan",
                            errorCode, e);
                }
                if (r==true) // if we saw physical operator other than project in sort plan
                {
                    return true;
                }
                keyInfo.insertColumnChainInfo(i, sortChainInfo, sort
                        .getMAscCols().get(i));
            }
            // if it is part of main key
            for (SortKeyInfo sortKeyInfo : sortKeyInfos) {
                if (sortKeyInfo.moreSpecificThan(keyInfo)) {
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.SortKeyInfo

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.