Package org.voltdb.planner

Source Code of org.voltdb.planner.ParsedSelectStmt

/* This file is part of VoltDB.
* Copyright (C) 2008-2014 VoltDB Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with VoltDB.  If not, see <http://www.gnu.org/licenses/>.
*/

package org.voltdb.planner;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.hsqldb_voltpatches.VoltXMLElement;
import org.json_voltpatches.JSONException;
import org.voltdb.VoltType;
import org.voltdb.catalog.Column;
import org.voltdb.catalog.ColumnRef;
import org.voltdb.catalog.Database;
import org.voltdb.catalog.Index;
import org.voltdb.catalog.Table;
import org.voltdb.compiler.StatementCompiler;
import org.voltdb.expressions.AbstractExpression;
import org.voltdb.expressions.AggregateExpression;
import org.voltdb.expressions.ConstantValueExpression;
import org.voltdb.expressions.ExpressionUtil;
import org.voltdb.expressions.OperatorExpression;
import org.voltdb.expressions.ParameterValueExpression;
import org.voltdb.expressions.TupleValueExpression;
import org.voltdb.planner.parseinfo.BranchNode;
import org.voltdb.planner.parseinfo.JoinNode;
import org.voltdb.planner.parseinfo.StmtSubqueryScan;
import org.voltdb.planner.parseinfo.StmtTableScan;
import org.voltdb.planner.parseinfo.StmtTargetTableScan;
import org.voltdb.plannodes.LimitPlanNode;
import org.voltdb.plannodes.NodeSchema;
import org.voltdb.plannodes.SchemaColumn;
import org.voltdb.types.ExpressionType;
import org.voltdb.types.JoinType;

public class ParsedSelectStmt extends AbstractParsedStmt {

    public static class ParsedColInfo implements Cloneable{
        public String alias = null;
        public String columnName = null;
        public String tableName = null;
        public String tableAlias = null;
        public AbstractExpression expression = null;
        public int index = 0;

        // orderby stuff
        public boolean orderBy = false;
        public boolean ascending = true;

        // groupby
        public boolean groupBy = false;

        @Override
        public boolean equals (Object obj) {
            if (obj == null) return false;
            if (obj instanceof ParsedColInfo == false) return false;
            ParsedColInfo col = (ParsedColInfo) obj;
            if ( columnName != null && columnName.equals(col.columnName) &&
                    tableName != null && tableName.equals(col.tableName) &&
                    tableAlias != null && tableAlias.equals(col.tableAlias) &&
                    expression != null && expression.equals(col.expression) )
                return true;
            return false;
        }

        // Based on implementation on equals().
        @Override
        public int hashCode() {
            int result = new HashCodeBuilder(17, 31).
                    append(columnName).append(tableName).append(tableAlias).
                    toHashCode();
            if (expression != null) {
                result += expression.hashCode();
            }
            return result;
        }

        @Override
        public ParsedColInfo clone() {
            ParsedColInfo col = null;
            try {
                col = (ParsedColInfo) super.clone();
            } catch (CloneNotSupportedException e) {
                e.printStackTrace();
            }
            col.expression = (AbstractExpression) expression.clone();
            return col;
        }
    }

    public ArrayList<ParsedColInfo> m_displayColumns = new ArrayList<ParsedColInfo>();
    public ArrayList<ParsedColInfo> m_orderColumns = new ArrayList<ParsedColInfo>();
    public AbstractExpression m_having = null;
    public ArrayList<ParsedColInfo> m_groupByColumns = new ArrayList<ParsedColInfo>();
    private boolean m_groupAndOrderByPermutationWasTested = false;
    private boolean m_groupAndOrderByPermutationResult = false;

    // It will store the final projection node schema for this plan if it is needed.
    // Calculate once, and use it everywhere else.
    private NodeSchema m_projectSchema = null;

    // It may has the consistent element order as the displayColumns
    public ArrayList<ParsedColInfo> m_aggResultColumns = new ArrayList<ParsedColInfo>();
    public Map<String, AbstractExpression> m_groupByExpressions = null;

    private ArrayList<ParsedColInfo> m_avgPushdownDisplayColumns = null;
    private ArrayList<ParsedColInfo> m_avgPushdownAggResultColumns = null;
    private ArrayList<ParsedColInfo> m_avgPushdownOrderColumns = null;
    private AbstractExpression m_avgPushdownHaving = null;
    private NodeSchema m_avgPushdownNewAggSchema;
    private boolean m_hasPartitionColumnInGroupby = false;
    private boolean m_hasAggregateDistinct = false;

    // Limit plan node information.
    private LimitPlanNode m_limitNodeTop = null;
    private LimitPlanNode m_limitNodeDist = null;
    public boolean m_limitCanPushdown;
    private long m_limit = -1;
    private long m_offset = 0;
    private long m_limitParameterId = -1;
    private long m_offsetParameterId = -1;

    private boolean m_distinct = false;
    private boolean m_hasComplexAgg = false;
    private boolean m_hasComplexGroupby = false;
    private boolean m_hasAggregateExpression = false;
    private boolean m_hasAverage = false;

    public MaterializedViewFixInfo m_mvFixInfo = new MaterializedViewFixInfo();

    private boolean m_hasLargeNumberOfTableJoins = false;
    // this list is the join order either from the user or parser if it has large number of table joins.
    private final ArrayList<JoinNode> m_joinOrderList = new ArrayList<>();

    /**
    * Class constructor
    * @param paramValues
    * @param db
    */
    public ParsedSelectStmt(String[] paramValues, Database db) {
        super(paramValues, db);
    }

    @Override
    void parse(VoltXMLElement stmtNode) {
        String node;
        if ((node = stmtNode.attributes.get("distinct")) != null)
            m_distinct = Boolean.parseBoolean(node);

        VoltXMLElement limitElement = null, offsetElement = null, havingElement = null;
        VoltXMLElement displayElement = null, orderbyElement = null, groupbyElement = null;
        for (VoltXMLElement child : stmtNode.children) {
            if (child.name.equalsIgnoreCase("limit")) {
                limitElement = child;
            } else if (child.name.equalsIgnoreCase("offset")) {
                offsetElement = child;
            } else if (child.name.equalsIgnoreCase("columns")) {
                displayElement = child;
            } else if (child.name.equalsIgnoreCase("ordercolumns")) {
                orderbyElement = child;
            } else if (child.name.equalsIgnoreCase("groupcolumns")) {
                groupbyElement = child;
            } else if (child.name.equalsIgnoreCase("having")) {
                havingElement = child;
            }
        }
        parseLimitAndOffset(limitElement, offsetElement);

        if (m_aggregationList == null) {
            m_aggregationList = new ArrayList<AbstractExpression>();
        }
        // We want to extract display first, groupBy second before processing orderBy
        // Because groupBy and orderBy need display columns to tag its columns
        // OrderBy needs groupBy columns to stop recursively finding TVEs for pass-through columns.
        assert(displayElement != null);
        parseDisplayColumns(displayElement, false);

        if (groupbyElement != null) {
            parseGroupByColumns(groupbyElement);
            insertToColumnList(m_aggResultColumns, m_groupByColumns);
        }

        if (orderbyElement != null && ! hasAOneRowResult()) {
            parseOrderColumns(orderbyElement, false);
        }

        if (havingElement != null) {
            parseHavingExpression(havingElement, false);
        }
        // At this point, we have collected all aggregations in the select statement.
        // We do not need aggregationList container in parseXMLtree
        // Make it null to prevent others adding elements to it when parsing the tree
        m_aggregationList = null;

        if (needComplexAggregation()) {
            fillUpAggResultColumns();
        } else {
            m_aggResultColumns = m_displayColumns;
        }
        placeTVEsinColumns();

        // prepare the limit plan node if it needs one.
        prepareLimitPlanNode();

        // Prepare for the AVG push-down optimization only if it might be required.
        if (mayNeedAvgPushdown()) {
            processAvgPushdownOptimization(displayElement, orderbyElement, groupbyElement, havingElement);
        }

        prepareMVBasedQueryFix();
    }

    private void processAvgPushdownOptimization (VoltXMLElement displayElement,
            VoltXMLElement orderbyElement, VoltXMLElement groupbyElement, VoltXMLElement havingElement) {

        ArrayList<ParsedColInfo> tmpDisplayColumns = m_displayColumns;
        m_displayColumns = new ArrayList<ParsedColInfo>();
        ArrayList<ParsedColInfo> tmpAggResultColumns = m_aggResultColumns;
        m_aggResultColumns = new ArrayList<ParsedColInfo>();
        ArrayList<ParsedColInfo> tmpOrderColumns = m_orderColumns;
        m_orderColumns = new ArrayList<ParsedColInfo>();
        AbstractExpression tmpHaving = m_having;


        boolean tmpHasComplexAgg = hasComplexAgg();
        NodeSchema tmpNodeSchema = m_projectSchema;

        // Make final schema output null to get a new schema when calling placeTVEsinColumns().
        m_projectSchema = null;

        m_aggregationList = new ArrayList<AbstractExpression>();
        assert(displayElement != null);
        parseDisplayColumns(displayElement, true);

        if (groupbyElement != null) {
            insertToColumnList(m_aggResultColumns, m_groupByColumns);
        }
        if (orderbyElement != null) {
            parseOrderColumns(orderbyElement, true);
        }
        if (havingElement != null) {
            parseHavingExpression(havingElement, true);
        }
        m_aggregationList = null;
        fillUpAggResultColumns();
        placeTVEsinColumns();

        // Switch them back
        m_avgPushdownDisplayColumns = m_displayColumns;
        m_avgPushdownAggResultColumns = m_aggResultColumns;
        m_avgPushdownOrderColumns = m_orderColumns;
        m_avgPushdownNewAggSchema = m_projectSchema;
        m_avgPushdownHaving = m_having;

        m_displayColumns = tmpDisplayColumns;
        m_aggResultColumns = tmpAggResultColumns;
        m_orderColumns = tmpOrderColumns;
        m_projectSchema = tmpNodeSchema;
        m_hasComplexAgg = tmpHasComplexAgg;
        m_having = tmpHaving;
    }

    /**
     * Switch the optimal set for pushing down AVG
     */
    public void switchOptimalSuiteForAvgPushdown () {
        m_displayColumns = m_avgPushdownDisplayColumns;
        m_aggResultColumns = m_avgPushdownAggResultColumns;
        m_orderColumns = m_avgPushdownOrderColumns;
        m_projectSchema = m_avgPushdownNewAggSchema;
        m_hasComplexAgg = true;
        m_having = m_avgPushdownHaving;
    }

    /**
     * Prepare for the mv based distributed query fix only if it might be required.
     */
    private void prepareMVBasedQueryFix() {

        // ENG-5386: Edge cases query returning correct answers with aggregation push down does not need reAggregation work.
        if (m_hasComplexGroupby) {
            m_mvFixInfo.setEdgeCaseQueryNoFixNeeded(false);
        }

        // Handle joined query case case.
        // MV partitioned table without partition column can only join with replicated tables.
        // For all tables in this query, the # of tables that need to be fixed should not exceed one.
        for (StmtTableScan mvTableScan: m_tableAliasMap.values()) {
            Set<SchemaColumn> mvNewScanColumns = new HashSet<SchemaColumn>();

            Collection<SchemaColumn> columns = mvTableScan.getScanColumns();
            // For a COUNT(*)-only scan, a table may have no scan columns.
            // For a joined query without processed columns from table TB, TB has no scan columns
            if (columns != null) {
                mvNewScanColumns.addAll(columns);
            }
            // ENG-5669: HAVING aggregation and order by aggregation also need to be checked.
            if (m_mvFixInfo.processMVBasedQueryFix(mvTableScan, mvNewScanColumns, m_joinTree, m_aggResultColumns, groupByColumns())) {
                break;
            }
        }
    }

    private boolean needComplexAggregation () {
        if (!hasAggregateExpression() && !isGrouped()) {
            m_hasComplexAgg = false;
            return false;
        }
        if (hasComplexAgg()) return true;

        int numDisplayCols = m_displayColumns.size();
        if (m_aggResultColumns.size() > numDisplayCols) {
            m_hasComplexAgg = true;
            return true;
        }

        for (ParsedColInfo col : m_displayColumns) {
            if (!m_aggResultColumns.contains(col)) {
                // Now Only TVEs in displayColumns are left for AggResultColumns
                if (col.expression instanceof TupleValueExpression) {
                    m_aggResultColumns.add(col);
                } else {
                    // Col must be complex expression (like: TVE + 1, TVE + AGG)
                    m_hasComplexAgg = true;
                    return true;
                }
            }
        }
        // size of aggResultColumns list should be the same as numDisplayCols
        // as it would be a substitue of DisplayCols.
        if (m_aggResultColumns.size() != numDisplayCols) {
            // Display columns have duplicated Aggs or TVEs (less than case)
            // Display columns have several pass-through columns if group by primary key (larger than case)
            m_hasComplexAgg = true;
            return true;
        }

        // Case: Groupby cols do not appear in SELECT list
        // Find duplicates
        HashSet <ParsedColInfo> tmpContainer = new HashSet<ParsedColInfo>();

        for (int i=0; i < numDisplayCols; i++) {
            ParsedColInfo icol = m_displayColumns.get(i);
            if (tmpContainer.contains(icol)) {
                m_hasComplexAgg = true;
                return true;
            } else {
                tmpContainer.add(icol);
            }
        }

        return false;
    }

    /**
     * Continue adding TVEs from DisplayColumns that are left in function needComplexAggregation().
     * After this function, aggResultColumns construction work.
     */
    private void fillUpAggResultColumns () {
        for (ParsedColInfo col: m_displayColumns) {
            if (!m_aggResultColumns.contains(col)) {
                if (col.expression instanceof TupleValueExpression) {
                    m_aggResultColumns.add(col);
                } else {
                    // Col must be complex expression (like: TVE + 1, TVE + AGG)
                    List<TupleValueExpression> tveList = new ArrayList<TupleValueExpression>();
                    findAllTVEs(col.expression, tveList);
                    insertTVEsToAggResultColumns(tveList);
                }
            }
        }
    }

    /**
     * Generate new output Schema and Place TVEs for display columns if needed.
     * Place TVEs for order by columns always.
     */
    private void placeTVEsinColumns () {
        // Build the association between the table column with its index
        Map <AbstractExpression, Integer> aggTableIndexMap = new HashMap <AbstractExpression,Integer>();
        Map <Integer, ParsedColInfo> indexToColumnMap = new HashMap <Integer, ParsedColInfo>();
        int index = 0;
        for (ParsedColInfo col: m_aggResultColumns) {
            aggTableIndexMap.put(col.expression, index);
            if ( col.alias == null) {
                // hack any unique string
                col.alias = "$$_" + col.expression.getExpressionType().symbol() + "_$$_" + index;
            }
            indexToColumnMap.put(index, col);
            index++;
        }

        // Replace TVE for display columns
        if (m_projectSchema == null) {
            m_projectSchema = new NodeSchema();
            for (ParsedColInfo col : m_displayColumns) {
                AbstractExpression expr = col.expression;
                if (hasComplexAgg()) {
                    expr = col.expression.replaceWithTVE(aggTableIndexMap, indexToColumnMap);
                }
                SchemaColumn schema_col = new SchemaColumn(col.tableName, col.tableAlias, col.columnName, col.alias, expr);
                m_projectSchema.addColumn(schema_col);
            }
        }

        // Replace TVE for order by columns
        for (ParsedColInfo orderCol : m_orderColumns) {
            AbstractExpression expr = orderCol.expression.replaceWithTVE(aggTableIndexMap, indexToColumnMap);

            if (hasComplexAgg()) {
                orderCol.expression = expr;
            } else {
                // This if case checking is to rule out cases like: select PKEY + A_INT from O1 order by PKEY + A_INT,
                // This case later needs a projection node on top of sort node to make it work.

                // Assuming the restrictions: Order by columns are (1) columns from table
                // (2) tag from display columns (3) actual expressions from display columns
                // Currently, we do not allow order by complex expressions that are not in display columns

                // If there is a complexGroupby at his point, it means that Display columns contain all the order by columns.
                // In that way, this plan does not require another projection node on top of sort node.
                if (orderCol.expression.hasAnySubexpressionOfClass(AggregateExpression.class) ||
                        hasComplexGroupby()) {
                    orderCol.expression = expr;
                }
            }
        }

        // Replace TVE for group by columns
        m_groupByExpressions = new HashMap<String, AbstractExpression>();
        for (ParsedColInfo groupbyCol: m_groupByColumns) {
            assert(aggTableIndexMap.get(groupbyCol.expression) != null);
            assert(m_groupByExpressions.get(groupbyCol.alias) == null);
            AbstractExpression expr = groupbyCol.expression.replaceWithTVE(aggTableIndexMap, indexToColumnMap);
            m_groupByExpressions.put(groupbyCol.alias,expr);
        }

        if (m_having != null) {
            m_having = m_having.replaceWithTVE(aggTableIndexMap, indexToColumnMap);
            ExpressionUtil.finalizeValueTypes(m_having);
        }

    }

    /**
     * ParseDisplayColumns and ParseOrderColumns will call this function
     * to add Aggregation expressions to aggResultColumns
     * @param aggColumns
     * @param cookedCol
     */
    private void insertAggExpressionsToAggResultColumns (List<AbstractExpression> aggColumns, ParsedColInfo cookedCol) {
        for (AbstractExpression expr: aggColumns) {
            ParsedColInfo col = new ParsedColInfo();
            col.expression = (AbstractExpression) expr.clone();
            assert(col.expression instanceof AggregateExpression);
            if (col.expression.getExpressionType() == ExpressionType.AGGREGATE_AVG) {
                m_hasAverage = true;
            }
            if (aggColumns.size() == 1 && cookedCol.expression.equals(aggColumns.get(0))) {
                col.alias = cookedCol.alias;
                col.tableName = cookedCol.tableName;
                col.tableAlias = cookedCol.tableAlias;
                col.columnName = cookedCol.columnName;
                if (!m_aggResultColumns.contains(col)) {
                    m_aggResultColumns.add(col);
                }
                return;
            }
            // Try to check complexAggs earlier
            m_hasComplexAgg = true;
            // Aggregation column use the the hacky stuff
            col.tableName = "VOLT_TEMP_TABLE";
            col.tableAlias = "VOLT_TEMP_TABLE";
            col.columnName = "";
            if (!m_aggResultColumns.contains(col)) {
                m_aggResultColumns.add(col);
            }
            ExpressionUtil.finalizeValueTypes(col.expression);
        }
    }

    private void insertTVEsToAggResultColumns (List<TupleValueExpression> colCollection) {
        // TVEs do not need to take care
        for (TupleValueExpression tve: colCollection) {
            ParsedColInfo col = new ParsedColInfo();
            col.alias = tve.getColumnAlias();
            col.columnName = tve.getColumnName();
            col.tableName = tve.getTableName();
            col.tableAlias = tve.getTableAlias();
            col.expression = tve;
            if (!m_aggResultColumns.contains(col)) {
                m_aggResultColumns.add(col);
            }
        }
    }

    // Concat elements to the XXXColumns list
    private static void insertToColumnList (List<ParsedColInfo>columnList,
            List<ParsedColInfo> newCols) {
        for (ParsedColInfo col: newCols) {
            if (!columnList.contains(col)) {
                columnList.add(col);
            }
        }
    }

    private static boolean isNewtoColumnList(List<ParsedColInfo>columnList, AbstractExpression expr) {
        boolean isNew = true;
        for (ParsedColInfo ic: columnList) {
            if (ic.expression.equals(expr)) {
                isNew = false;
                break;
            }
        }
        return isNew;
    }

    /**
     * Find all TVEs except inside of AggregationExpression
     * @param expr
     * @param tveList
     */
    private void findAllTVEs(AbstractExpression expr, List<TupleValueExpression> tveList) {
        if (!isNewtoColumnList(m_aggResultColumns, expr))
            return;
        if (expr instanceof TupleValueExpression) {
            tveList.add((TupleValueExpression) expr.clone());
            return;
        }
        if ( expr.getLeft() != null) {
            findAllTVEs(expr.getLeft(), tveList);
        }
        if (expr.getRight() != null) {
            findAllTVEs(expr.getRight(), tveList);
        }
        if (expr.getArgs() != null) {
            for (AbstractExpression ae: expr.getArgs()) {
                findAllTVEs(ae, tveList);
            }
        }
    }

    private void updateAvgExpressions () {
        List<AbstractExpression> optimalAvgAggs = new ArrayList<AbstractExpression>();
        Iterator<AbstractExpression> itr = m_aggregationList.iterator();
        while(itr.hasNext()) {
            AbstractExpression aggExpr = itr.next();
            assert(aggExpr instanceof AggregateExpression);
            if (aggExpr.getExpressionType() == ExpressionType.AGGREGATE_AVG) {
                itr.remove();

                AbstractExpression left = new AggregateExpression(ExpressionType.AGGREGATE_SUM);
                left.setLeft(aggExpr.getLeft());
                AbstractExpression right = new AggregateExpression(ExpressionType.AGGREGATE_COUNT);
                right.setLeft(aggExpr.getLeft());

                optimalAvgAggs.add(left);
                optimalAvgAggs.add(right);
            }
        }
        m_aggregationList.addAll(optimalAvgAggs);
    }

    private void parseLimitAndOffset(VoltXMLElement limitNode, VoltXMLElement offsetNode) {
        String node;
        if (limitNode != null) {
            // Parse limit
            if ((node = limitNode.attributes.get("limit_paramid")) != null)
                m_limitParameterId = Long.parseLong(node);
            else {
                assert(limitNode.children.size() == 1);
                VoltXMLElement valueNode = limitNode.children.get(0);
                String isParam = valueNode.attributes.get("isparam");
                if ((isParam != null) && (isParam.equalsIgnoreCase("true"))) {
                    m_limitParameterId = Long.parseLong(valueNode.attributes.get("id"));
                } else {
                    node = limitNode.attributes.get("limit");
                    assert(node != null);
                    m_limit = Long.parseLong(node);
                }
            }
        }
        if (offsetNode != null) {
            // Parse offset
            if ((node = offsetNode.attributes.get("offset_paramid")) != null)
                m_offsetParameterId = Long.parseLong(node);
            else {
                if (offsetNode.children.size() == 1) {
                    VoltXMLElement valueNode = offsetNode.children.get(0);
                    String isParam = valueNode.attributes.get("isparam");
                    if ((isParam != null) && (isParam.equalsIgnoreCase("true"))) {
                        m_offsetParameterId = Long.parseLong(valueNode.attributes.get("id"));
                    } else {
                        node = offsetNode.attributes.get("offset");
                        assert(node != null);
                        m_offset = Long.parseLong(node);
                    }
                }
            }
        }

        // limit and offset can't have both value and parameter
        if (m_limit != -1) assert m_limitParameterId == -1 : "Parsed value and param. limit.";
        if (m_offset != 0) assert m_offsetParameterId == -1 : "Parsed value and param. offset.";
    }

    private void parseDisplayColumns(VoltXMLElement columnsNode, boolean isDistributed) {
        for (VoltXMLElement child : columnsNode.children) {
            ParsedColInfo col = new ParsedColInfo();
            m_aggregationList.clear();
            col.expression = parseExpressionTree(child);
            if (col.expression instanceof ConstantValueExpression) {
                assert(col.expression.getValueType() != VoltType.NUMERIC);
            }
            assert(col.expression != null);
            if (isDistributed) {
                col.expression = col.expression.replaceAVG();
                updateAvgExpressions();
            }
            ExpressionUtil.finalizeValueTypes(col.expression);

            if (child.name.equals("columnref")) {
                col.columnName = child.attributes.get("column");
                col.tableName = child.attributes.get("table");
                col.tableAlias = child.attributes.get("tablealias");
                if (col.tableAlias == null) {
                    col.tableAlias = col.tableName;
                }
            }
            else
            {
                // XXX hacky, assume all non-column refs come from a temp table
                col.tableName = "VOLT_TEMP_TABLE";
                col.tableAlias = "VOLT_TEMP_TABLE";
                col.columnName = "";
            }
            col.alias = child.attributes.get("alias");
            if (col.alias == null) {
                col.alias = col.columnName;
            }
           // This index calculation is only used for sanity checking
            // materialized views (which use the parsed select statement but
            // don't go through the planner pass that does more involved
            // column index resolution).
            col.index = m_displayColumns.size();

            insertAggExpressionsToAggResultColumns(m_aggregationList, col);
            if (m_aggregationList.size() >= 1) {
                m_hasAggregateExpression = true;

                for (AbstractExpression agg: m_aggregationList) {
                    assert(agg instanceof AggregateExpression);
                    if (! m_hasAggregateDistinct &&
                            ((AggregateExpression)agg).isDistinct() ) {
                        m_hasAggregateDistinct = true;
                        break;
                    }
                }
            }

            m_displayColumns.add(col);
        }
    }

    private void parseGroupByColumns(VoltXMLElement columnsNode) {
        for (VoltXMLElement child : columnsNode.children) {
            parseGroupByColumn(child);
        }
    }

    private void parseGroupByColumn(VoltXMLElement groupByNode) {
        ParsedColInfo groupbyCol = new ParsedColInfo();
        groupbyCol.expression = parseExpressionTree(groupByNode);
        assert(groupbyCol.expression != null);
        ExpressionUtil.finalizeValueTypes(groupbyCol.expression);
        groupbyCol.groupBy = true;

        if (groupByNode.name.equals("columnref"))
        {
            groupbyCol.alias = groupByNode.attributes.get("alias");
            groupbyCol.columnName = groupByNode.attributes.get("column");
            groupbyCol.tableName = groupByNode.attributes.get("table");
            groupbyCol.tableAlias = groupByNode.attributes.get("tablealias");
            if (groupbyCol.tableAlias == null) {
                groupbyCol.tableAlias = groupbyCol.tableName;
            }

            // This col.index set up is only useful for Materialized view.
            Table tb = getTableFromDB(groupbyCol.tableName);
            if (tb != null) {
                org.voltdb.catalog.Column catalogColumn =
                        tb.getColumns().getExact(groupbyCol.columnName);
                groupbyCol.index = catalogColumn.getIndex();
            }
        }
        else
        {
            // TODO(XIN): throw a error for Materialized view when possible.
            // XXX hacky, assume all non-column refs come from a temp table
            groupbyCol.tableName = "VOLT_TEMP_TABLE";
            groupbyCol.tableAlias = "VOLT_TEMP_TABLE";
            groupbyCol.columnName = "";
            m_hasComplexGroupby = true;

            ParsedColInfo orig_col = null;
            for (int i = 0; i < m_displayColumns.size(); ++i) {
                ParsedColInfo col = m_displayColumns.get(i);
                if (col.expression.equals(groupbyCol.expression)) {
                    groupbyCol.alias = col.alias;
                    orig_col = col;
                    break;
                }
            }
            if (orig_col != null) {
                orig_col.groupBy = true;
            }
        }
        m_groupByColumns.add(groupbyCol);
    }

    private void parseOrderColumns(VoltXMLElement columnsNode, boolean isDistributed) {
        for (VoltXMLElement child : columnsNode.children) {
            parseOrderColumn(child, isDistributed);
        }
    }

    private void parseOrderColumn(VoltXMLElement orderByNode, boolean isDistributed) {
        // make sure everything is kosher
        assert(orderByNode.name.equalsIgnoreCase("orderby"));

        // get desc/asc
        String desc = orderByNode.attributes.get("desc");
        boolean descending = (desc != null) && (desc.equalsIgnoreCase("true"));

        // get the columnref or other expression inside the orderby node
        VoltXMLElement child = orderByNode.children.get(0);
        assert(child != null);

        // create the orderby column
        ParsedColInfo order_col = new ParsedColInfo();
        order_col.orderBy = true;
        order_col.ascending = !descending;
        m_aggregationList.clear();
        AbstractExpression order_exp = parseExpressionTree(child);
        assert(order_exp != null);
        if (isDistributed) {
            order_exp = order_exp.replaceAVG();
            updateAvgExpressions();
        }
        order_col.expression = order_exp;
        ExpressionUtil.finalizeValueTypes(order_col.expression);

        // Cases:
        // child could be columnref, in which case it's just a normal column.
        // Just make a ParsedColInfo object for it and the planner will do the right thing later
        if (child.name.equals("columnref")) {
            assert(order_exp instanceof TupleValueExpression);
            TupleValueExpression tve = (TupleValueExpression) order_exp;
            order_col.columnName = tve.getColumnName();
            order_col.tableName = tve.getTableName();
            order_col.tableAlias = tve.getTableAlias();
            if (order_col.tableAlias == null) {
                order_col.tableAlias = order_col.tableName;
            }

            order_col.alias = tve.getColumnAlias();
        } else {
            String alias = child.attributes.get("alias");
            order_col.alias = alias;
            order_col.tableName = "VOLT_TEMP_TABLE";
            order_col.tableAlias = "VOLT_TEMP_TABLE";
            order_col.columnName = "";
            // Replace its expression to TVE after we build the ExpressionIndexMap

            if ((child.name.equals("operation") == false) &&
                    (child.name.equals("aggregation") == false) &&
                    (child.name.equals("function") == false)) {
               throw new RuntimeException("ORDER BY parsed with strange child node type: " + child.name);
           }
        }

        // Mark the order by column if it is in displayColumns
        // The ORDER BY column MAY be identical to a simple display column, in which case,
        // tagging the actual display column as being also an order by column
        // helps later when trying to determine ORDER BY coverage (for determinism).
        ParsedColInfo orig_col = null;
        for (ParsedColInfo col : m_displayColumns) {
            if (col.alias.equals(order_col.alias) || col.expression.equals(order_exp)) {
                orig_col = col;
                break;
            }
        }
        if (orig_col != null) {
            orig_col.orderBy = true;
            orig_col.ascending = order_col.ascending;

            order_col.alias = orig_col.alias;
            order_col.columnName = orig_col.columnName;
            order_col.tableName = orig_col.tableName;
        }
        assert( ! (order_exp instanceof ConstantValueExpression));
        assert( ! (order_exp instanceof ParameterValueExpression));

        insertAggExpressionsToAggResultColumns(m_aggregationList, order_col);
        if (m_aggregationList.size() >= 1) {
            m_hasAggregateExpression = true;
        }
        // Add TVEs in ORDER BY statement if we have, stop recursive finding when we have it in AggResultColumns
        List<TupleValueExpression> tveList = new ArrayList<TupleValueExpression>();
        findAllTVEs(order_col.expression, tveList);
        insertTVEsToAggResultColumns(tveList);
        m_orderColumns.add(order_col);
    }

    private void parseHavingExpression(VoltXMLElement havingNode, boolean isDistributed) {
        m_aggregationList.clear();
        assert(havingNode.children.size() == 1);
        m_having = parseExpressionTree(havingNode.children.get(0));
        assert(m_having != null);
        if (isDistributed) {
            m_having = m_having.replaceAVG();
            updateAvgExpressions();
        }
        ExpressionUtil.finalizeValueTypes(m_having);
        if (m_aggregationList.size() >= 1) {
            m_hasAggregateExpression = true;
        }

        for (AbstractExpression expr: m_aggregationList) {
            ParsedColInfo col = new ParsedColInfo();
            col.expression = (AbstractExpression) expr.clone();
            assert(col.expression instanceof AggregateExpression);
            if (col.expression.getExpressionType() == ExpressionType.AGGREGATE_AVG) {
                m_hasAverage = true;
            }

            col.tableName = "VOLT_TEMP_TABLE";
            col.tableAlias = "VOLT_TEMP_TABLE";
            col.columnName = "";

            if (!m_aggResultColumns.contains(col)) {
                // Try to check complexAggs earlier
                m_hasComplexAgg = true;
                ExpressionUtil.finalizeValueTypes(col.expression);
                m_aggResultColumns.add(col);
            }
        }
    }

    private void prepareLimitPlanNode () {
        if (!hasLimitOrOffset()) {
            return;
        }

        int limitParamIndex = parameterCountIndexById(m_limitParameterId);
        int offsetParamIndex = parameterCountIndexById(m_offsetParameterId);;

        // The coordinator's top limit graph fragment for a MP plan.
        // If planning "order by ... limit", getNextSelectPlan()
        // will have already added an order by to the coordinator frag.
        // This is the only limit node in a SP plan
        m_limitNodeTop = new LimitPlanNode();
        m_limitNodeTop.setLimit((int) m_limit);
        m_limitNodeTop.setOffset((int) m_offset);
        m_limitNodeTop.setLimitParameterIndex(limitParamIndex);
        m_limitNodeTop.setOffsetParameterIndex(offsetParamIndex);

        // check if limit can be pushed down
        m_limitCanPushdown = !m_distinct;
        if (m_limitCanPushdown) {
            for (ParsedColInfo col : m_displayColumns) {
                AbstractExpression rootExpr = col.expression;
                if (rootExpr instanceof AggregateExpression) {
                    if (((AggregateExpression)rootExpr).isDistinct()) {
                        m_limitCanPushdown = false;
                        break;
                    }
                }
            }
        }

        if (m_limitCanPushdown) {
            m_limitNodeDist = new LimitPlanNode();
            // Offset on a pushed-down limit node makes no sense, just defaults to 0
            // -- the original offset must be factored into the pushed-down limit as a pad on the limit.
            if (m_limit != -1) {
                m_limitNodeDist.setLimit((int) (m_limit + m_offset));
            }

            if (hasLimitOrOffsetParameters()) {
                AbstractExpression left = getParameterOrConstantAsExpression(m_offsetParameterId, m_offset);
                assert (left != null);
                AbstractExpression right = getParameterOrConstantAsExpression(m_limitParameterId, m_limit);
                assert (right != null);
                OperatorExpression expr = new OperatorExpression(ExpressionType.OPERATOR_PLUS, left, right);
                expr.setValueType(VoltType.INTEGER);
                expr.setValueSize(VoltType.INTEGER.getLengthInBytesForFixedTypes());
                m_limitNodeDist.setLimitExpression(expr);
            }
            // else let the parameterized forms of offset/limit default to unused/invalid.
        }
    }

    public LimitPlanNode getLimitNodeTop() {
        return new LimitPlanNode(m_limitNodeTop);
    }

    public LimitPlanNode getLimitNodeDist() {
        return new LimitPlanNode(m_limitNodeDist);
    }

    @Override
    public String toString() {
        String retval = super.toString() + "\n";

        retval += "LIMIT " + String.valueOf(m_limit) + "\n";
        retval += "OFFSET " + String.valueOf(m_offset) + "\n";

        retval += "DISPLAY COLUMNS:\n";
        for (ParsedColInfo col : m_displayColumns) {
            retval += "\tColumn: " + col.alias + ": ";
            retval += col.expression.toString() + "\n";
        }

        retval += "ORDER COLUMNS:\n";
        for (ParsedColInfo col : m_orderColumns) {
            retval += "\tColumn: " + col.alias + ": ASC?: " + col.ascending + ": ";
            retval += col.expression.toString() + "\n";
        }

        retval += "GROUP_BY COLUMNS:\n";
        for (ParsedColInfo col : m_groupByColumns) {
            retval += "\tColumn: " + col.alias + ": ";
            retval += col.expression.toString() + "\n";
        }

        retval = retval.trim();

        return retval;
    }

    public boolean hasJoinOrder() {
        return m_joinOrder != null || m_hasLargeNumberOfTableJoins;
    }

    public ArrayList<JoinNode> getJoinOrder() {
        return m_joinOrderList;
    }

    @Override
    void postParse(String sql, String joinOrder) {
        super.postParse(sql, joinOrder);

        if (m_joinOrder != null) {
            // User indicates a join order already
            tryAddOneJoinOrder(m_joinOrder);
            return;
        }

        // prepare the join order if needed
        if (m_joinOrder == null &&
                m_tableAliasList.size() > StatementCompiler.DEFAULT_MAX_JOIN_TABLES) {
            // When there are large number of table joins, give up the all permutations.
            // By default, try the join order with the SQL query table order first.
            m_hasLargeNumberOfTableJoins = true;

            StringBuilder sb = new StringBuilder();
            String separator = "";
            for (int ii = 0; ii < m_tableAliasList.size(); ii++) {
                String tableAlias = m_tableAliasList.get(ii);
                sb.append(separator).append(tableAlias);
                separator = ",";
            }
            if (tryAddOneJoinOrder(sb.toString())) {
                return;
            }

            // The input join order is not vailid
            // Find one valid join order to run, which may not be the most efficient.
            ArrayDeque<JoinNode> joinOrderQueue =
                    SelectSubPlanAssembler.queueJoinOrders(m_joinTree, false);

            // Currently, we get one join order, but it is easy to change the hard coded number
            // to get more join orders for large table joins.
            assert(joinOrderQueue.size() == 1);
            assert(m_joinOrderList.size() == 0);
            m_joinOrderList.addAll(joinOrderQueue);
        }
    }

    private boolean tryAddOneJoinOrder(String joinOrder) {
        ArrayList<String> tableAliases = new ArrayList<String>();
        //Don't allow dups for now since self joins aren't supported
        HashSet<String> dupCheck = new HashSet<String>();
        // Calling trim() up front is important only in the case of a trailing comma.
        // It allows a trailing comma followed by whitespace as in "A,B, " to be ignored
        // like a normal trailing comma as in "A,B,". The alternatives would be to treat
        // these as different cases (strange) or to complain about both -- which could be
        // accomplished by appending an additional space to the join order here
        // instead of calling trim.
        for (String element : joinOrder.trim().split(",")) {
            String alias = element.trim().toUpperCase();
            tableAliases.add(alias);
            if (!dupCheck.add(alias)) {
                if (m_hasLargeNumberOfTableJoins) return false;

                StringBuilder sb = new StringBuilder();
                sb.append("The specified join order \"").append(joinOrder);
                sb.append("\" contains a duplicate element \"").append(alias).append("\".");
                throw new PlanningErrorException(sb.toString());
            }
        }

        //TODO: now that the table aliases list is built, the remaining validations
        // here and in isValidJoinOrder should be combined in one AbstractParsedStmt function
        // that generates a JoinNode tree or throws an exception.
        if (m_tableAliasMap.size() != tableAliases.size()) {
            if (m_hasLargeNumberOfTableJoins) return false;

            StringBuilder sb = new StringBuilder();
            sb.append("The specified join order \"");
            sb.append(joinOrder).append("\" does not contain the correct number of elements\n");
            sb.append("Expected ").append(m_tableList.size());
            sb.append(" but found ").append(tableAliases.size()).append(" elements.");
            throw new PlanningErrorException(sb.toString());
        }

        Set<String> aliasSet = m_tableAliasMap.keySet();
        Set<String> specifiedNames = new HashSet<String>(tableAliases);
        specifiedNames.removeAll(aliasSet);
        if (specifiedNames.isEmpty() == false) {
            if (m_hasLargeNumberOfTableJoins) return false;

            StringBuilder sb = new StringBuilder();
            sb.append("The specified join order \"");
            sb.append(joinOrder).append("\" contains ");
            int i = 0;
            for (String name : specifiedNames) {
                sb.append(name);
                if (++i != specifiedNames.size()) {
                    sb.append(',');
                }
            }
            sb.append(" which ");
            if (specifiedNames.size() == 1) {
                sb.append("doesn't ");
            } else {
                sb.append("don't ");
            }
            sb.append("exist in the FROM clause");
            throw new PlanningErrorException(sb.toString());
        }

        // Now check whether the specified join order is valid or not
        if ( ! isValidJoinOrder(tableAliases)) {
            if (m_hasLargeNumberOfTableJoins) return false;
            throw new PlanningErrorException("The specified join order is invalid for the given query");
        }

        // Inserted one join tree to the list
        assert(m_joinOrderList.size() > 0);
        m_joinTree = m_joinOrderList.get(0);
        return true;
    }

    /**
     * Validate the specified join order against the join tree.
     * In general, outer joins are not associative and commutative. Not all orders are valid.
     * In case of a valid join order, the initial join tree is rebuilt to match the specified order
     * @param tables list of table aliases(or tables) to join
     * @return true if the join order is valid
     */
    private boolean isValidJoinOrder(List<String> tableAliases)
    {
        assert(m_joinTree != null);

        // Split the original tree into the sub-trees having the same join type for all nodes
        List<JoinNode> subTrees = m_joinTree.extractSubTrees();

        // For a sub-tree with inner joins only any join order is valid. The only requirement is that
        // each and every table from that sub-tree constitute an uninterrupted sequence in the specified join order
        // The outer joins are associative but changing the join order precedence
        // includes moving ON clauses to preserve the initial SQL semantics. For example,
        // T1 right join T2 on T1.C1 = T2.C1 left join T3 on T2.C2=T3.C2 can be rewritten as
        // T1 right join (T2 left join T3 on T2.C2=T3.C2) on T1.C1 = T2.C1
        // At the moment, such transformations are not supported. The specified joined order must
        // match the SQL order
        int tableNameIdx = 0;
        List<JoinNode> finalSubTrees = new ArrayList<JoinNode>();
        // we need to process the sub-trees last one first because the top sub-tree is the first one on the list
        for (int i = subTrees.size() - 1; i >= 0; --i) {
            JoinNode subTree = subTrees.get(i);
            // Get all tables for the subTree
            List<JoinNode> subTableNodes = subTree.generateLeafNodesJoinOrder();
            JoinNode joinOrderSubTree;
            if ((subTree instanceof BranchNode) && ((BranchNode)subTree).getJoinType() != JoinType.INNER) {
                // add the sub-tree as is
                joinOrderSubTree = subTree;
                for (JoinNode tableNode : subTableNodes) {
                    if (tableNode.getId() >= 0) {
                        String tableAlias = tableNode.getTableAlias();
                        if ( ! tableAliases.get(tableNameIdx++).equals(tableAlias)) {
                            return false;
                        }
                    }
                }
            } else {
                // Collect all the "real" tables from the sub-tree skipping the nodes representing
                // the sub-trees with the different join type (id < 0)
                Map<String, JoinNode> nodeNameMap = new HashMap<String, JoinNode>();
                for (JoinNode tableNode : subTableNodes) {
                    if (tableNode.getId() >= 0) {
                        nodeNameMap.put(tableNode.getTableAlias(), tableNode);
                    }
                }

                // rearrange the sub tree to match the order
                List<JoinNode> joinOrderSubNodes = new ArrayList<JoinNode>();
                for (int j = 0; j < subTableNodes.size(); ++j) {
                    if (subTableNodes.get(j).getId() >= 0) {
                        assert(tableNameIdx < tableAliases.size());
                        String tableAlias = tableAliases.get(tableNameIdx);
                        if (tableAlias == null || ! nodeNameMap.containsKey(tableAlias)) {
                            return false;
                        }
                        joinOrderSubNodes.add(nodeNameMap.get(tableAlias));
                        ++tableNameIdx;
                    } else {
                        // It's dummy node
                        joinOrderSubNodes.add(subTableNodes.get(j));
                    }
                }
                joinOrderSubTree = JoinNode.reconstructJoinTreeFromTableNodes(joinOrderSubNodes);
                //Collect all the join/where conditions to reassign them later
                AbstractExpression combinedWhereExpr = subTree.getAllInnerJoinFilters();
                if (combinedWhereExpr != null) {
                    joinOrderSubTree.setWhereExpression((AbstractExpression)combinedWhereExpr.clone());
                }
                // The new tree root node id must match the original one to be able to reconnect the
                // subtrees
                joinOrderSubTree.setId(subTree.getId());
            }
            finalSubTrees.add(0, joinOrderSubTree);
        }
        // if we got there the join order is OK. Rebuild the whole tree
        JoinNode newNode = JoinNode.reconstructJoinTreeFromSubTrees(finalSubTrees);
        m_joinOrderList.add(newNode);
        return true;
    }


    public boolean hasAggregateExpression () {
        return m_hasAggregateExpression;
    }

    public boolean hasAggregateOrGroupby() {
        return m_hasAggregateExpression || isGrouped();
    }

    public NodeSchema getFinalProjectionSchema () {
        return m_projectSchema;
    }

    public boolean hasComplexGroupby() {
        return m_hasComplexGroupby;
    }

    public boolean hasComplexAgg() {
        return m_hasComplexAgg;
    }

    public boolean mayNeedAvgPushdown() {
        return m_hasAverage;
    }

    public boolean hasPartitionColumnInGroupby() {
        return m_hasPartitionColumnInGroupby;
    }

    public void setHasPartitionColumnInGroupby() {
        m_hasPartitionColumnInGroupby = true;
    }

    public boolean hasOrderByColumns() {
        return ! m_orderColumns.isEmpty();
    }

    public boolean hasDistinct() {
        return m_distinct;
    }

    public boolean hasAggregateDistinct() {
        return m_hasAggregateDistinct;
    }

    public List<ParsedColInfo> displayColumns() {
        return Collections.unmodifiableList(m_displayColumns);
    }

    public List<ParsedColInfo> groupByColumns() {
        return Collections.unmodifiableList(m_groupByColumns);
    }

    public List<ParsedColInfo> orderByColumns() {
        return Collections.unmodifiableList(m_orderColumns);
    }

    @Override
    public boolean hasLimitOrOffset() {
        if ((m_limit != -1) || (m_limitParameterId != -1) ||
            (m_offset > 0) || (m_offsetParameterId != -1)) {
            return true;
        }
        return false;
    }

    public boolean hasLimitOrOffsetParameters() {
        return m_limitParameterId != -1 || m_offsetParameterId != -1;
    }

    /// This is for use with integer-valued row count parameters, namely LIMITs and OFFSETs.
    /// It should be called (at least) once for each LIMIT or OFFSET parameter to establish that
    /// the parameter is being used in a BIGINT context.
    /// There may be limitations elsewhere that restrict limits and offsets to 31-bit unsigned values,
    /// but enforcing that at parameter passing/checking time seems a little arbitrary, so we keep
    /// the parameters at maximum width -- a 63-bit unsigned BIGINT.
    private int parameterCountIndexById(long paramId) {
        if (paramId == -1) {
            return -1;
        }
        assert(m_paramsById.containsKey(paramId));
        ParameterValueExpression pve = m_paramsById.get(paramId);
        // As a side effect, re-establish these parameters as integer-typed
        // -- this helps to catch type errors earlier in the invocation process
        // and prevents a more serious error in HSQLBackend statement reconstruction.
        // The HSQL parser originally had these correctly pegged as BIGINTs,
        // but the VoltDB code ( @see AbstractParsedStmt#parseParameters )
        // skeptically second-guesses that pending its own verification. This case is now verified.
        pve.refineValueType(VoltType.BIGINT, VoltType.BIGINT.getLengthInBytesForFixedTypes());
        return pve.getParameterIndex();
    }

    public int getLimitParameterIndex() {
        return parameterCountIndexById(m_limitParameterId);
    }

    public int getOffsetParameterIndex() {
        return parameterCountIndexById(m_offsetParameterId);
    }

    private AbstractExpression getParameterOrConstantAsExpression(long id, long value) {
        // The id was previously passed to parameterCountIndexById, so if not -1,
        // it has already been asserted to be a valid id for a parameter, and the
        // parameter's type has been refined to INTEGER.
        if (id != -1) {
            return m_paramsById.get(id);
        }
        // The limit/offset is a non-parameterized literal value that needs to be wrapped in a
        // BIGINT constant so it can be used in the addition expression for the pushed-down limit.
        ConstantValueExpression constant = new ConstantValueExpression();
        constant.setValue(Long.toString(value));
        constant.refineValueType(VoltType.BIGINT, VoltType.BIGINT.getLengthInBytesForFixedTypes());
        return constant;
    }


    @Override
    public boolean isOrderDeterministic()
    {
        if ( ! hasTopLevelScans()) {
            // This currently applies to parent queries that do all their scanning in subqueries and so
            // take on the order determinism of their subqueries. This might have to be rethought to allow
            // ordering in parent queries to effect determinism of unordered "FROM CLAUSE" subquery results.
            return true;
        }
        if (hasAOneRowResult()) {
            return true;
        }
        if ( ! hasOrderByColumns() ) {
            return false;
        }

        // The nonOrdered expression list is used as a short-cut -- if an expression has been
        // determined to be non-ordered when encountered as a GROUP BY expression,
        // it will also be non-ordered when encountered in the select list.
        ArrayList<AbstractExpression> nonOrdered = new ArrayList<AbstractExpression>();

        if (isGrouped()) {
            // Does the ordering of a statements's GROUP BY columns ensure determinism?
            // All display columns and order-by expressions are functionally dependent on the GROUP BY
            // columns even if the display column's values are not ordered or unique,
            // so ordering by ALL of the GROUP BY columns is enough to get full determinism,
            // EVEN if ordering by other (dependent) expressions,
            // regardless of the placement of non-GROUP BY expressions in the ORDER BY list.
            if (orderByColumnsDetermineAllColumns(m_groupByColumns, nonOrdered)) {
                return true;
            }
            if (orderByColumnsDetermineAllDisplayColumns(nonOrdered)) {
                return true;
            }
        } else {
            if (orderByColumnsDetermineAllDisplayColumns(nonOrdered)) {
                return true;
            }
            if (orderByColumnsCoverUniqueKeys()) {
                return true;
            }
        }
        return false;
    }

    @Override
    public boolean isOrderDeterministicInSpiteOfUnorderedSubqueries()
    {
        if (hasAOneRowResult()) {
            return true;
        }
        if ( ! hasOrderByColumns() ) {
            return false;
        }

        // This is a trivial empty container.
        // In other code paths, it would list expressions that have been pre-determined to be nonOrdered.
        ArrayList<AbstractExpression> nonOrdered = new ArrayList<AbstractExpression>();

        if (orderByColumnsDetermineAllDisplayColumns(nonOrdered)) {
            return true;
        }
        return false;
    }

    private boolean orderByColumnsCoverUniqueKeys()
    {
        // In theory, if EVERY table in the query has a uniqueness constraint
        // (primary key or other unique index) on columns that are all listed in the ORDER BY values,
        // the result is deterministic.
        // This holds regardless of whether the associated index is actually used in the selected plan,
        // so this check is plan-independent.
        HashMap<String, List<AbstractExpression> > baseTableAliases =
                new HashMap<String, List<AbstractExpression> >();
        for (ParsedColInfo col : m_orderColumns) {
            AbstractExpression expr = col.expression;
            List<AbstractExpression> baseTVEs = expr.findBaseTVEs();
            if (baseTVEs.size() != 1) {
                // Table-spanning ORDER BYs -- like ORDER BY A.X + B.Y are not helpful.
                // Neither are (nonsense) constant (table-less) expressions.
                continue;
            }
            // This loops exactly once.
            AbstractExpression baseTVE = baseTVEs.get(0);
            String nextTableAlias = ((TupleValueExpression)baseTVE).getTableAlias();
            assert(nextTableAlias != null);
            List<AbstractExpression> perTable = baseTableAliases.get(nextTableAlias);
            if (perTable == null) {
                perTable = new ArrayList<AbstractExpression>();
                baseTableAliases.put(nextTableAlias, perTable);
            }
            perTable.add(expr);
        }

        if (m_tableAliasMap.size() > baseTableAliases.size()) {
            // FIXME: This would be one of the tricky cases where the goal would be to prove that the
            // row with no ORDER BY component came from the right side of a 1-to-1 or many-to-1 join.
            return false;
        }
        boolean allScansAreDeterministic = true;
        for (Entry<String, List<AbstractExpression>> orderedAlias : baseTableAliases.entrySet()) {
            List<AbstractExpression> orderedAliasExprs = orderedAlias.getValue();
            StmtTableScan tableScan = m_tableAliasMap.get(orderedAlias.getKey());
            if (tableScan == null) {
                assert(false);
                return false;
            }

            if (tableScan instanceof StmtSubqueryScan) {
                return false; // don't yet handle FROM clause subquery, here.
            }

            Table table = ((StmtTargetTableScan)tableScan).getTargetTable();

            // This table's scans need to be proven deterministic.
            allScansAreDeterministic = false;
            // Search indexes for one that makes the order by deterministic
            for (Index index : table.getIndexes()) {
                // skip non-unique indexes
                if ( ! index.getUnique()) {
                    continue;
                }

                // get the list of expressions for the index
                List<AbstractExpression> indexExpressions = new ArrayList<AbstractExpression>();

                String jsonExpr = index.getExpressionsjson();
                // if this is a pure-column index...
                if (jsonExpr.isEmpty()) {
                    for (ColumnRef cref : index.getColumns()) {
                        Column col = cref.getColumn();
                        TupleValueExpression tve = new TupleValueExpression(table.getTypeName(),
                                                                            orderedAlias.getKey(),
                                                                            col.getName(),
                                                                            col.getName(),
                                                                            col.getIndex());
                        indexExpressions.add(tve);
                    }
                }
                // if this is a fancy expression-based index...
                else {
                    try {
                        indexExpressions = AbstractExpression.fromJSONArrayString(jsonExpr, tableScan);
                    } catch (JSONException e) {
                        e.printStackTrace();
                        assert(false);
                        continue;
                    }
                }

                // If the sort covers the index, then it's a unique sort.
                //TODO: The statement's equivalence sets would be handy here to recognize cases like
                //    WHERE B.unique_id = A.b_id
                //    ORDER BY A.unique_id, A.b_id
                if (orderedAliasExprs.containsAll(indexExpressions)) {
                    allScansAreDeterministic = true;
                    break;
                }
            }
            // ALL tables' scans need to have proved deterministic
            if ( ! allScansAreDeterministic) {
                return false;
            }
        }
        return true;
    }

    private boolean orderByColumnsDetermineAllDisplayColumns(ArrayList<AbstractExpression> nonOrdered)
    {
        ArrayList<ParsedColInfo> candidateColumns = new ArrayList<ParsedSelectStmt.ParsedColInfo>();
        for (ParsedColInfo displayCol : m_displayColumns) {
            if (displayCol.orderBy) {
                continue;
            }
            if (displayCol.groupBy) {
                AbstractExpression displayExpr = displayCol.expression;
                // Round up the usual suspects -- if there were uncooperative GROUP BY expressions,
                // they will often also be uncooperative display column expressions.
                for (AbstractExpression nonStarter : nonOrdered) {
                     if (displayExpr.equals(nonStarter)) {
                         return false;
                     }
                }
                // A GROUP BY expression that was not nonOrdered must be covered.
                continue;
            }
            candidateColumns.add(displayCol);
        }
        return orderByColumnsDetermineAllColumns(candidateColumns, null);
    }

    private boolean orderByColumnsDetermineAllColumns(ArrayList<ParsedColInfo> candidateColumns,
                                                      ArrayList<AbstractExpression> outNonOrdered) {
        HashSet<AbstractExpression> orderByExprs = null;
        ArrayList<AbstractExpression> candidateExprHardCases = null;
        // First try to get away with a brute force N by M search for exact equalities.
        for (ParsedColInfo candidateCol : candidateColumns)
        {
            if (candidateCol.orderBy) {
                continue;
            }
            AbstractExpression candidateExpr = candidateCol.expression;
            if (orderByExprs == null) {
                orderByExprs = new HashSet<AbstractExpression>();
                for (ParsedColInfo orderByCol : m_orderColumns) {
                    orderByExprs.add(orderByCol.expression);
                }
            }
            if (orderByExprs.contains(candidateExpr)) {
                continue;
            }
            if (candidateExpr instanceof TupleValueExpression) {
                // Simple column references can only be exactly equal to but not "based on" an ORDER BY.
                return false;
            }

            if (candidateExprHardCases == null) {
                candidateExprHardCases = new ArrayList<AbstractExpression>();
            }
            candidateExprHardCases.add(candidateExpr);
        }

        if (candidateExprHardCases == null) {
            return true;
        }

        // Plan B. profile the ORDER BY list and try to include/exclude the hard cases on that basis.
        HashSet<AbstractExpression> orderByTVEs = new HashSet<AbstractExpression>();
        ArrayList<AbstractExpression> orderByNonTVEs = new ArrayList<AbstractExpression>();
        ArrayList<List<AbstractExpression>> orderByNonTVEBaseTVEs = new ArrayList<List<AbstractExpression>>();
        HashSet<AbstractExpression> orderByAllBaseTVEs = new HashSet<AbstractExpression>();

        for (AbstractExpression orderByExpr : orderByExprs) {
            if (orderByExpr instanceof TupleValueExpression) {
                orderByTVEs.add(orderByExpr);
                orderByAllBaseTVEs.add(orderByExpr);
            } else {
                orderByNonTVEs.add(orderByExpr);
                List<AbstractExpression> baseTVEs = orderByExpr.findBaseTVEs();
                orderByNonTVEBaseTVEs.add(baseTVEs);
                orderByAllBaseTVEs.addAll(baseTVEs);
            }
        }

        boolean result = true;

        for (AbstractExpression candidateExpr : candidateExprHardCases)
        {
            Collection<AbstractExpression> candidateBases = candidateExpr.findBaseTVEs();
            if (orderByTVEs.containsAll(candidateBases)) {
                continue;
            }
            if (orderByAllBaseTVEs.containsAll(candidateBases) == false) {
                if (outNonOrdered == null) {
                    // Short-circuit if the remaining non-qualifying expressions are not of interest.
                    return false;
                }
                result = false;
                outNonOrdered.add(candidateExpr);
                continue;
            }
            // At this point, if the candidateExpr is a match,
            // then it is based on but not equal to one or more orderByNonTVE(s) and optionally orderByTVE(s).
            // The simplest example is like "SELECT a+(b-c) ... ORDER BY a, b-c;"
            // If it is a non-match, it is an original expression based on orderByAllBaseTVEs
            // The simplest example is like "SELECT a+b ... ORDER BY a, b-c;"
            // TODO: process REALLY HARD CASES
            // TODO: issue a warning, short-term?
            // For now, err on the side of non-determinism.
            if (outNonOrdered == null) {
                // Short-circuit if the remaining non-qualifying expressions are not of interest.
                return false;
            }
            outNonOrdered.add(candidateExpr);
            result = false;
        }
        return result;
    }

    private boolean hasAOneRowResult()
    {
        if ( ( ! isGrouped() ) && displaysAgg()) {
            return true;
        }
        return false;
    }

    private boolean hasTopLevelScans() {
        for (StmtTableScan scan : m_tableAliasMap.values()) {
            if (scan instanceof StmtTargetTableScan) {
                return true;
            }
        }
        return false;
    }

    private boolean displaysAgg() {
        for (ParsedColInfo displayCol : m_displayColumns) {
            if (displayCol.expression.hasAnySubexpressionOfClass(AggregateExpression.class)) {
                return true;
            }
        }
        return false;
    }

    public boolean isGrouped() { return ! m_groupByColumns.isEmpty(); }

    public boolean displayColumnsContainAllGroupByColumns() {
        for (ParsedColInfo groupedCol : m_groupByColumns) {
            boolean missing = true;
            for (ParsedColInfo displayCol : m_displayColumns) {
                if (displayCol.groupBy) {
                    if (groupedCol.equals(displayCol)) {
                        missing = false;
                        break;
                    }
                }
            }
            if (missing) {
                return false;
            }
        }
        return true;
    }

    boolean groupByIsAnOrderByPermutation() {
        if (m_groupAndOrderByPermutationWasTested) {
            return m_groupAndOrderByPermutationResult;
        }
        m_groupAndOrderByPermutationWasTested = true;

        int size = m_groupByColumns.size();
        if (size != m_orderColumns.size()) {
            return false;
        }
        Set<AbstractExpression> orderPrefixExprs = new HashSet<>(size);
        Set<AbstractExpression> groupExprs = new HashSet<>(size);
        int ii = 0;
        for (ParsedColInfo gb : m_groupByColumns) {
            AbstractExpression gexpr = gb.expression;
            if (gb.expression == null) {
                return false;
            }
            AbstractExpression oexpr = m_orderColumns.get(ii).expression;
            ++ii;
            // Save some cycles in the common case of matching by position.
            if (gb.expression.equals(oexpr)) {
                continue;
            }
            groupExprs.add(gexpr);
            orderPrefixExprs.add(oexpr);
        }
        m_groupAndOrderByPermutationResult = groupExprs.equals(orderPrefixExprs);
        return m_groupAndOrderByPermutationResult;
    }

    void checkPlanColumnMatch(List<SchemaColumn> columns) {
        // Sanity-check the output NodeSchema columns against the display columns
        if (m_displayColumns.size() != columns.size()) {
            throw new PlanningErrorException(
                    "Mismatched plan output cols to parsed display columns");
        }
        int ii = 0;
        for (ParsedColInfo display_col : m_displayColumns) {
            SchemaColumn sc = columns.get(ii);
            ++ii;
            boolean sameTable = false;

            if (display_col.tableAlias != null) {
                if (display_col.tableAlias.equals(sc.getTableAlias())) {
                    sameTable = true;
                }
            } else if (display_col.tableName.equals(sc.getTableName())) {
                sameTable = true;
            }
            if (sameTable) {
                if (display_col.alias != null && ! display_col.alias.equals("")) {
                    if (display_col.alias.equals(sc.getColumnAlias())) {
                        continue;
                    }
                }
                else if (display_col.columnName != null && ! display_col.columnName.equals("")) {
                    if (display_col.columnName.equals(sc.getColumnName())) {
                        continue;
                    }
                }
            }
            throw new PlanningErrorException(
                    "Mismatched plan output cols to parsed display columns");
        }
    }
}
TOP

Related Classes of org.voltdb.planner.ParsedSelectStmt

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.