Package com.inadco.hbl.client.impl

Source Code of com.inadco.hbl.client.impl.AggregateQueryImpl

/*
*
*  Copyright © 2010, 2011 Inadco, Inc. All rights reserved.
*     Licensed under the Apache License, Version 2.0 (the "License");
*     you may not use this file except in compliance with the License.
*     You may obtain a copy of the License at
*         http://www.apache.org/licenses/LICENSE-2.0
*     Unless required by applicable law or agreed to in writing, software
*     distributed under the License is distributed on an "AS IS" BASIS,
*     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*     See the License for the specific language governing permissions and
*     limitations under the License.
*/
package com.inadco.hbl.client.impl;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;

import org.apache.commons.lang.Validate;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.util.Bytes;

import com.inadco.hbl.api.AggregateFunctionRegistry;
import com.inadco.hbl.api.Cube;
import com.inadco.hbl.api.Cuboid;
import com.inadco.hbl.api.Dimension;
import com.inadco.hbl.api.Measure;
import com.inadco.hbl.api.Range;
import com.inadco.hbl.client.AggregateQuery;
import com.inadco.hbl.client.AggregateResultSet;
import com.inadco.hbl.client.HblException;
import com.inadco.hbl.client.HblQueryClient;
import com.inadco.hbl.client.impl.scanner.ScanSpec;

/**
* Projection query implementation.
* <P>
*
* @author dmitriy
*
*/
public class AggregateQueryImpl implements AggregateQuery {

    protected HblQueryClient            client;
    protected Cube                      cube;
    private ExecutorService             es;
    /**
     * dim name -> range slice requested
     */
    private Map<String, List<Slice>>    dimSlices       = new HashMap<String, List<Slice>>();
    private Set<String>                 measures        = new HashSet<String>();

    protected List<String>              groupDimensions = new ArrayList<String>();
    private HTablePool                  tpool;
    protected AggregateFunctionRegistry afr;
    protected boolean                   allowComplements;

    public AggregateQueryImpl(HblQueryClient client, ExecutorService es, HTablePool tpool) {
        super();
        this.es = es;
        this.tpool = tpool;
        this.client = client;
    }

    public AggregateQuery setCube(String cubeName) throws HblException {
        if (cube == null || !cube.getName().equals(cubeName)) {
            cube = client.getCube(cubeName);
            afr = cube.getAggregateFunctionRegistry();
        }
        return this;
    }

    @Override
    public AggregateQuery addMeasure(String measure) {
        Validate.notNull(measure);
        Validate.notNull(cube, "A cube not set");
        Validate.isTrue(cube.getMeasures().containsKey(measure), "Unknown measure name");
        measures.add(measure);
        return this;
    }

    @Override
    public AggregateQuery addGroupBy(String dimName) {
        Validate.notNull(dimName);
        Validate.notNull(cube, "A cube not set");
        Validate.isTrue(cube.getDimensions().containsKey(dimName), "no such dimension found");

        groupDimensions.add(dimName);
        return this;
    }

    @Override
    public AggregateQuery addClosedSlice(String dimension, Object leftBound, Object rightBound) {
        return addSlice(dimension, leftBound, false, rightBound, false);
    }

    @Override
    public AggregateQuery addOpenSlice(String dimension, Object leftBound, Object rightBound) {
        return addSlice(dimension, leftBound, true, rightBound, true);
    }

    @Override
    public AggregateQuery addHalfOpenSlice(String dimension, Object leftBound, Object rightBound) {
        return addSlice(dimension, leftBound, false, rightBound, true);
    }

    @Override
    public AggregateQuery addSlice(String dimension,
                                   Object leftBound,
                                   boolean leftOpen,
                                   Object rightBound,
                                   boolean rightOpen) {
        Validate.notNull(dimension);
        Validate.notNull(cube, "A cube not set");
        Validate.isTrue(cube.getDimensions().containsKey(dimension));

        if (leftBound == null && rightBound == null) {
            dimSlices.remove(dimension);
            return this;
        }
        List<Slice> sliceSet = dimSlices.get(dimension);
        if (sliceSet == null)
            dimSlices.put(dimension, sliceSet = new ArrayList<Slice>(4));
        sliceSet.add(new Slice(leftBound, leftOpen, rightBound, rightOpen));

        return this;
    }

    public List<ScanSpec> generateScanSpecs(Map<String, Integer> dimName2GroupKeyOffsetMap,
                                            Map<String, Integer> measureName2indexMap) throws IOException, HblException {
        if (dimName2GroupKeyOffsetMap == null)
            dimName2GroupKeyOffsetMap = new HashMap<String, Integer>(11);
        if (measureName2indexMap == null)
            measureName2indexMap = new HashMap<String, Integer>(11);
       
        Validate.notNull(cube, "A cube not set");
        Cuboid cuboid = findCuboid();

        Validate.notNull(cuboid, "Unable to find a suitable cuboid for the slice query.");

        /*
         * FIXME, TODO: check slices for overlapping. otherwise, if slices
         * overlap, not only we'd be performing more scans than needed, but they
         * will also contain duplicate counts.
         *
         * for now we just have to assume that slices will not overlap.
         */
        List<ScanSpec> scanSpecs = new ArrayList<ScanSpec>();

        List<Range> partialSpec = new ArrayList<Range>();

        int groupKeyLen = 0, curKeyLen = 0;

        for (Dimension dim : cuboid.getCuboidDimensions()) {
            String dimName = dim.getName();
            dimName2GroupKeyOffsetMap.put(dimName, curKeyLen);
            curKeyLen += dim.getKeyLen();
            if (groupDimensions.contains(dim.getName()))
                groupKeyLen = curKeyLen;
        }

        // for (int i = 0; i < numGroupKeys; i++) {
        // Dimension dim = cuboid.getCuboidDimensions().get(i);
        // dimName2GroupKeyOffsetMap.put(dim.getName(), groupKeyLen);
        // groupKeyLen += dim.getKeyLen();
        // }

        byte[][] measureQualifiers = new byte[measures.size()][];
        int mCnt = 0;
        for (String mName : measures) {
            measureName2indexMap.put(mName, mCnt);
            measureQualifiers[mCnt++] = Bytes.toBytes(mName);
        }

//        Measure[] measuresArr = new Measure[measures.size()];
//
//        int i = 0;
//        Map<String, ? extends Measure> measureMap = cube.getMeasures();
//        // we already validated measure names are valid during add()
//        for (String measure : measures)
//            measuresArr[i++] = measureMap.get(measure);

        generateScanSpecs(cuboid, scanSpecs, partialSpec, 0, groupKeyLen, SliceOperation.ADD, measureQualifiers);

        return scanSpecs;

    }

    @Override
    public AggregateResultSet execute() throws HblException {
        try {
            Map<String, Integer> dimName2GroupKeyOffsetMap = new HashMap<String, Integer>();
            Map<String, Integer> measureName2indexMap = new HashMap<String, Integer>();
            List<ScanSpec> scanSpecs = generateScanSpecs(dimName2GroupKeyOffsetMap, measureName2indexMap);

            return createResultSet(scanSpecs,
                                   es,
                                   tpool,
                                   afr,
                                   measureName2indexMap,
                                   dimName2GroupKeyOffsetMap,
                                   null,
                                   null,
                                   null);
        } catch (IOException exc) {
            throw new HblException(exc.getMessage(), exc);
        } finally {
            reset();
        }
    }

    public AggregateResultSet execute(byte[] startSplitKey, byte[] endSplitKey, String enforcedCuboidTableName)
        throws HblException {
        try {
            Map<String, Integer> dimName2GroupKeyOffsetMap = new HashMap<String, Integer>();
            Map<String, Integer> measureName2indexMap = new HashMap<String, Integer>();
            List<ScanSpec> scanSpecs = generateScanSpecs(dimName2GroupKeyOffsetMap, measureName2indexMap);

            return createResultSet(scanSpecs,
                                   es,
                                   tpool,
                                   afr,
                                   measureName2indexMap,
                                   dimName2GroupKeyOffsetMap,
                                   startSplitKey,
                                   endSplitKey,
                                   enforcedCuboidTableName);
        } catch (IOException exc) {
            throw new HblException(exc.getMessage(), exc);
        } finally {
            reset();
        }

    }

    protected void reset() {
        dimSlices.clear();
        measures.clear();
        groupDimensions.clear();
    }

    protected boolean isAllowComplements() {
        return allowComplements;
    }

    protected void setAllowComplements(boolean allowComplements) {
        this.allowComplements = allowComplements;
    }

    /**
     *
     * @param scanSpecs
     * @param es
     * @param tpool
     * @param afr
     * @param measureName2IndexMap
     * @param dimName2GroupKeyOffsetMap
     * @param startSplitKey
     *            optional: if given, enforce MR split constraints per half-open
     *            [startSplitKey,endSplitKey). note that in this case
     *            endSplitKey can still be null and means
     *            "till the end of the table".
     * @param endSplitKey
     *            optional.
     * @param enforcedCuboidTableName
     *            optional. if passed on, ensures that the cuboid table selected
     *            is the same as given, thus inforcing idempotent cuboid. Used
     *            only by HblInputSplit to assert idempotent optimizer
     *            processing.
     * @return
     * @throws IOException
     */
    protected AggregateResultSetImpl createResultSet(final List<ScanSpec> scanSpecs,
                                                     final ExecutorService es,
                                                     final HTablePool tpool,
                                                     final AggregateFunctionRegistry afr,
                                                     final Map<String, Integer> measureName2IndexMap,
                                                     final Map<String, Integer> dimName2GroupKeyOffsetMap,
                                                     final byte[] startSplitKey,
                                                     final byte[] endSplitKey,
                                                     final String enforcedCuboidTableName) throws IOException {
        return new AggregateResultSetImpl(
            scanSpecs,
            es,
            tpool,
            afr,
            measureName2IndexMap,
            dimName2GroupKeyOffsetMap,
            startSplitKey,
            endSplitKey,
            enforcedCuboidTableName);
    }

    /**
     * Generate cartesian product of all individual dimension scans and also
     * flip scan operation between ADD and COMPLEMENT types
     *
     * @param cuboid
     * @param scanHolder
     * @param partialSpec
     * @param dimIndex
     * @param numGroupKeys
     * @param so
     */
    private void generateScanSpecs(Cuboid cuboid,
                                   List<ScanSpec> scanHolder,
                                   List<Range> partialSpec,
                                   int dimIndex,
                                   int groupKeyLen,
                                   SliceOperation so,
                                   byte[][] measureQualifiers) {
        List<Dimension> dimensions = cuboid.getCuboidDimensions();
        if (dimIndex == dimensions.size()) {
            // add leaf
            scanHolder.add(new ScanSpec(
                measureQualifiers,
                groupKeyLen,
                partialSpec.toArray(new Range[dimIndex]),
                cuboid,
                so));
            return;
        }
        Dimension dim = dimensions.get(dimIndex);
        List<Slice> slices = dimSlices.get(dim.getName());
        if (slices == null) {

            // generate 'total' slice
            Range allRange = dim.allRange();

            if (partialSpec.size() == dimIndex)
                partialSpec.add(allRange);
            else
                partialSpec.set(dimIndex, allRange);

            generateScanSpecs(cuboid, scanHolder, partialSpec, dimIndex + 1, groupKeyLen, so, measureQualifiers);
        } else {
            if (slices.size() != 1)
                throw new UnsupportedOperationException(
                    "queries to multiple slices of the same dimension are not supported (yet)!");
            Slice slice = slices.iterator().next();
            Range[] ranges = dim.optimizeSliceScan(slice, allowComplements);

            Validate.notEmpty(ranges);

            for (Range r : ranges) {
                SliceOperation nextSo = so;

                /*
                 * clarification:
                 *
                 * we introduce slice operation (a complement operation in a
                 * plan or additive operation) just for the dimension to be able
                 * to advise us on most optimal combination of such. Note that a
                 * complement slice S as in A\S becomes a union as in A\(S\S2) =
                 * (A\S) U S2 if S2 \subset S. so due to similar argumentation
                 * accross different dimensions, we just invert slice operations
                 * in each subsequent dimension encountered (since each
                 * subsequent dimension always operates on a subset of prior
                 * dimension's hyper slice).
                 */
                if (r.getSliceOperation() == SliceOperation.COMPLEMENT) {
                    if (nextSo == SliceOperation.ADD)
                        nextSo = SliceOperation.COMPLEMENT;
                    else
                        nextSo = SliceOperation.ADD;
                }
                if (partialSpec.size() == dimIndex)
                    partialSpec.add(r);
                else
                    partialSpec.set(dimIndex, r);
                generateScanSpecs(cuboid, scanHolder, partialSpec, dimIndex + 1, groupKeyLen, nextSo, measureQualifiers);
            }

        }
    }

    private Cuboid findCuboid() {

        /*
         * we need to find cuboid with composite keys where grouping dimensions
         * are stacked on the left but where all of the sliced dimensions are
         * also present (in any position).
         */

        Set<String> dimensionSubset = new HashSet<String>();
        dimensionSubset.addAll(dimSlices.keySet());
        dimensionSubset.addAll(groupDimensions);

        Cuboid cuboid = null;

        // find a suitable cuboid per above with fewest extra dimensions.
        for (Cuboid c : cube.getCuboids()) {

            List<String> cPath = c.getCuboidPath();
            // filter out smaller cubes
            if (cPath.size() < dimensionSubset.size())
                continue;

            // filter out those that don't contain all the dimensions we need
            if (!cPath.containsAll(dimensionSubset))
                continue;

            // now check group dimensions that must be stacked on the left.
            int cnt = groupDimensions.size();
            for (String dimName : cPath) {
                if (groupDimensions.contains(dimName)) {
                    cnt--;
                } else {
                    /*
                     * easy but still quite effective optimization is that if
                     * slices are degenerate, their dimension could be pushed
                     * left in the cuboid without breaking inline grouping
                     * prerequisites. This is surprisingly much more often the
                     * case as degenerate slicing is quite common.
                     */
                    List<Slice> slices = dimSlices.get(dimName);
                    if (slices == null || slices.size() != 1)
                        break; // clearly not degenerate

                    Slice slice = slices.get(0);
                    if (slice.isLeftOpen() || slice.isRightOpen()
                        || !slice.getLeftBound().equals(slice.getRightBound()))
                        break; // not degenerate.
                }
            }
            if (cnt > 0)
                continue;

            // found qualifying cuboid. good.
            if (cuboid == null || cPath.size() < cuboid.getCuboidPath().size())
                cuboid = c;
        }

        return cuboid;

    }

}
TOP

Related Classes of com.inadco.hbl.client.impl.AggregateQueryImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.