Examples of org.apache.hadoop.hive.ql.plan.ColStatistics

org.apache.hadoop.hive.ql.plan.ColStatistics

              // in filter expression since it will be taken care by partitio pruner
              if (neededCols != null && !neededCols.contains(colName)) {
                return numRows;
              }


              ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName);
              if (cs != null) {
                long dvs = cs.getCountDistint();
                numRows = dvs == 0 ? numRows / 2 : numRows / dvs;
                return numRows;
              }
            } else if (leaf instanceof ExprNodeColumnDesc) {
              ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf;
              colName = colDesc.getColumn();
              tabAlias = colDesc.getTabAlias();


              // if const is first argument then evaluate the result
              if (isConst) {


                // if column name is not contained in needed column list then it
                // is a partition column. We do not need to evaluate partition columns
                // in filter expression since it will be taken care by partitio pruner
                if (neededCols != null && neededCols.indexOf(colName) == -1) {
                  return numRows;
                }


                ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName);
                if (cs != null) {
                  long dvs = cs.getCountDistint();
                  numRows = dvs == 0 ? numRows / 2 : numRows / dvs;
                  return numRows;
                }
              }
            }

View Full Code Here

            // for those newly added columns
            if (!colExprMap.containsKey(ci.getInternalName())) {
              String colName = ci.getInternalName();
              String tabAlias = ci.getTabAlias();
              String colType = ci.getTypeName();
              ColStatistics cs = new ColStatistics(tabAlias, colName, colType);
              cs.setCountDistint(stats.getNumRows());
              cs.setNumNulls(0);
              cs.setAvgColLen(StatsUtils.getAvgColLenOfFixedLengthTypes(colType));
              aggColStats.add(cs);
            }
          }


          // add the new aggregate column and recompute data size

View Full Code Here

          if (numAttr > 1) {
            List<Long> perAttrDVs = Lists.newArrayList();
            for (int idx = 0; idx < numAttr; idx++) {
              for (Integer i : joinKeys.keySet()) {
                String col = joinKeys.get(i).get(idx);
                ColStatistics cs = joinedColStats.get(col);
                if (cs != null) {
                  perAttrDVs.add(cs.getCountDistint());
                }
              }


              distinctVals.add(getDenominator(perAttrDVs));
              perAttrDVs.clear();
            }


            if (numAttr > numParent) {
              // To avoid denominator getting larger and aggressively reducing
              // number of rows, we will ease out denominator.
              denom = getEasedOutDenominator(distinctVals);
            } else {
              for (Long l : distinctVals) {
                denom = StatsUtils.safeMult(denom, l);
              }
            }
          } else {
            for (List<String> jkeys : joinKeys.values()) {
              for (String jk : jkeys) {
                ColStatistics cs = joinedColStats.get(jk);
                if (cs != null) {
                  distinctVals.add(cs.getCountDistint());
                }
              }
            }
            denom = getDenominator(distinctVals);
          }


          // Update NDV of joined columns to be min(V(R,y), V(S,y))
          updateJoinColumnsNDV(joinKeys, joinedColStats, numAttr);


          // column statistics from different sources are put together and rename
          // fully qualified column names based on output schema of join operator
          Map<String, ExprNodeDesc> colExprMap = jop.getColumnExprMap();
          RowSchema rs = jop.getSchema();
          List<ColStatistics> outColStats = Lists.newArrayList();
          Map<String, String> outInTabAlias = new HashMap<String, String>();
          for (ColumnInfo ci : rs.getSignature()) {
            String key = ci.getInternalName();
            ExprNodeDesc end = colExprMap.get(key);
            if (end instanceof ExprNodeColumnDesc) {
              String colName = ((ExprNodeColumnDesc) end).getColumn();
              String tabAlias = ((ExprNodeColumnDesc) end).getTabAlias();
              String fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName);
              ColStatistics cs = joinedColStats.get(fqColName);
              String outColName = key;
              String outTabAlias = ci.getTabAlias();
              outInTabAlias.put(outTabAlias, tabAlias);
              if (cs != null) {
                cs.setColumnName(outColName);
                cs.setTableAlias(outTabAlias);
              }
              outColStats.add(cs);
            }
          }

View Full Code Here

        // in case of fact to many dimensional tables join, the join key in fact table will be
        // mostly foreign key which will have corresponding primary key in dimension table.
        // The selectivity of fact table in that case will be product of all selectivities of
        // dimension tables (assumes conjunctivity)
        for (Integer id : parentsWithPK) {
          ColStatistics csPK = null;
          Operator<? extends OperatorDesc> parent = parents.get(id);
          for (ColStatistics cs : parent.getStatistics().getColumnStats()) {
            if (cs.isPrimaryKey()) {
              csPK = cs;
              break;

View Full Code Here

          List<String> fqCols = StatsUtils.getFullyQualifedReducerKeyNames(keys,
              rsOp.getColumnExprMap());
          if (fqCols.size() == 1) {
            String joinCol = fqCols.get(0);
            if (rsOp.getStatistics() != null) {
              ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromFQColName(joinCol);
              if (cs != null && !cs.isPrimaryKey()) {
                if (StatsUtils.inferForeignKey(csPK, cs)) {
                  result.add(i);
                }
              }
            }

View Full Code Here

            List<String> fqCols = StatsUtils.getFullyQualifedReducerKeyNames(keys,
                rsOp.getColumnExprMap());
            if (fqCols.size() == 1) {
              String joinCol = fqCols.get(0);
              if (rsOp.getStatistics() != null) {
                ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromFQColName(joinCol);
                if (cs != null && cs.isPrimaryKey()) {
                  result.add(i);
                }
              }
            }
          }

View Full Code Here

        long minNDV = Long.MAX_VALUE;


        // find min NDV for joining columns
        for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) {
          String key = entry.getValue().get(joinColIdx);
          ColStatistics cs = joinedColStats.get(key);
          if (cs != null && cs.getCountDistint() < minNDV) {
            minNDV = cs.getCountDistint();
          }
        }


        // set min NDV value to both columns involved in join
        if (minNDV != Long.MAX_VALUE) {
          for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) {
            String key = entry.getValue().get(joinColIdx);
            ColStatistics cs = joinedColStats.get(key);
            if (cs != null) {
              cs.setCountDistint(minNDV);
            }
          }
        }


        joinColIdx++;

View Full Code Here

            List<ColStatistics> colStats = Lists.newArrayList();
            for (String key : outKeyColNames) {
              String prefixedKey = Utilities.ReduceField.KEY.toString() + "." + key;
              ExprNodeDesc end = colExprMap.get(prefixedKey);
              if (end != null) {
                ColStatistics cs = StatsUtils
                    .getColStatisticsFromExpression(conf, parentStats, end);
                if (cs != null) {
                  cs.setColumnName(prefixedKey);
                  colStats.add(cs);
                }
              }
            }


            for (String val : outValueColNames) {
              String prefixedVal = Utilities.ReduceField.VALUE.toString() + "." + val;
              ExprNodeDesc end = colExprMap.get(prefixedVal);
              if (end != null) {
                ColStatistics cs = StatsUtils
                    .getColStatisticsFromExpression(conf, parentStats, end);
                if (cs != null) {
                  cs.setColumnName(prefixedVal);
                  colStats.add(cs);
                }
              }
            }

View Full Code Here

        ExprNodeColumnDesc encd = (ExprNodeColumnDesc) pred;
        String colName = encd.getColumn();
        String tabAlias = encd.getTabAlias();
        String colType = encd.getTypeString();
        if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
          ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName);
          if (cs != null) {
            return cs.getNumTrues();
          }
        }


        // if not boolean column return half the number of rows
        return stats.getNumRows() / 2;

View Full Code Here

            ExprNodeColumnDesc encd = (ExprNodeColumnDesc) leaf;
            String colName = encd.getColumn();
            String tabAlias = encd.getTabAlias();
            String colType = encd.getTypeString();
            if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
              ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName);
              if (cs != null) {
                return cs.getNumFalses();
              }
            }
            // if not boolean column return half the number of rows
            return numRows / 2;
          }

View Full Code Here

0 1 2 3

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.ColStatistics

org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable

org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc

org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$FilterStatsRule

org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$GroupByStatsRule

org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$JoinStatsRule

org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$ReduceSinkStatsRule

org.apache.hadoop.hive.ql.stats.StatsUtils

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.