Examples of StatsWork


Examples of org.apache.hadoop.hive.ql.plan.StatsWork

   */
  public static void addStatsTask(FileSinkOperator nd, MoveTask mvTask,
      Task<? extends Serializable> currTask, HiveConf hconf) {

    MoveWork mvWork = mvTask.getWork();
    StatsWork statsWork = null;
    if (mvWork.getLoadTableWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadTableWork());
    } else if (mvWork.getLoadFileWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadFileWork());
    }
    assert statsWork != null : "Error when genereting StatsTask";

    statsWork.setSourceTask(currTask);
    statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));

    if (currTask.getWork() instanceof MapredWork) {
      MapredWork mrWork = (MapredWork) currTask.getWork();
      mrWork.getMapWork().setGatheringStats(true);
      if (mrWork.getReduceWork() != null) {
        mrWork.getReduceWork().setGatheringStats(true);
      }
    } else {
      TezWork work = (TezWork) currTask.getWork();
      for (BaseWork w: work.getAllWork()) {
        w.setGatheringStats(true);
      }
    }

    // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix
    // in FileSinkDesc is used for stats publishing. They should be consistent.
    statsWork.setAggKey(nd.getConf().getStatsAggPrefix());
    Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf);

    // mark the MapredWork and FileSinkOperator for gathering stats
    nd.getConf().setGatherStats(true);
    nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.StatsWork

   */
  public static void addStatsTask(FileSinkOperator nd, MoveTask mvTask,
      Task<? extends Serializable> currTask, HiveConf hconf) {

    MoveWork mvWork = mvTask.getWork();
    StatsWork statsWork = null;
    if (mvWork.getLoadTableWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadTableWork());
    } else if (mvWork.getLoadFileWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadFileWork());
    }
    assert statsWork != null : "Error when genereting StatsTask";

    statsWork.setSourceTask(currTask);
    statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));

    if (currTask.getWork() instanceof MapredWork) {
      MapredWork mrWork = (MapredWork) currTask.getWork();
      mrWork.getMapWork().setGatheringStats(true);
      if (mrWork.getReduceWork() != null) {
        mrWork.getReduceWork().setGatheringStats(true);
      }
    } else {
      TezWork work = (TezWork) currTask.getWork();
      for (BaseWork w: work.getAllWork()) {
        w.setGatheringStats(true);
      }
    }

    // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix
    // in FileSinkDesc is used for stats publishing. They should be consistent.
    statsWork.setAggKey(nd.getConf().getStatsAggPrefix());
    Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf);

    // mark the MapredWork and FileSinkOperator for gathering stats
    nd.getConf().setGatherStats(true);
    nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.StatsWork

            conf);
        truncateTask.addDependentTask(moveTsk);

        // Recalculate the HDFS stats if auto gather stats is set
        if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
          StatsWork statDesc;
          if (oldTblPartLoc.equals(newTblPartLoc)) {
            // If we're merging to the same location, we can avoid some metastore calls
            tableSpec tablepart = new tableSpec(this.db, conf, root);
            statDesc = new StatsWork(tablepart);
          } else {
            statDesc = new StatsWork(ltd);
          }
          statDesc.setNoStatsAggregator(true);
          statDesc.setClearAggregatorStats(true);
          statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
          Task<? extends Serializable> statTask = TaskFactory.get(statDesc, conf);
          moveTsk.addDependentTask(statTask);
        }
      } catch (HiveException e) {
        throw new SemanticException(e);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.StatsWork

      Task<MoveWork> moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false),
          conf);
      mergeTask.addDependentTask(moveTsk);

      if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
        StatsWork statDesc;
        if (oldTblPartLoc.equals(newTblPartLoc)) {
          // If we're merging to the same location, we can avoid some metastore calls
          tableSpec tablepart = new tableSpec(this.db, conf, tablePartAST);
          statDesc = new StatsWork(tablepart);
        } else {
          statDesc = new StatsWork(ltd);
        }
        statDesc.setNoStatsAggregator(true);
        statDesc.setClearAggregatorStats(true);
        statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
        Task<? extends Serializable> statTask = TaskFactory.get(statDesc, conf);
        moveTsk.addDependentTask(statTask);
      }

      rootTasks.add(mergeTask);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.StatsWork

      // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
      // The plan consists of a simple TezTask followed by a StatsTask.
      // The Tez task is just a simple TableScanOperator
     
      StatsWork statsWork = new StatsWork(parseInfo.getTableSpec());
      statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix());
      statsWork.setSourceTask(context.currentTask);
      statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
      Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseContext.getConf());
      context.currentTask.addDependentTask(statsTask);

      // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
      // The plan consists of a StatsTask only.
      if (parseInfo.isNoScanAnalyzeCommand()) {
        statsTask.setParentTasks(null);
        statsWork.setNoScanAnalyzeCommand(true);
        context.rootTasks.remove(context.currentTask);
        context.rootTasks.add(statsTask);
      }

      // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.StatsWork

    // Some stats like number of rows require a scan of the data
    // However, some other stats, like number of files, do not require a complete scan
    // Update the stats which do not require a complete scan.
    Task<? extends Serializable> statTask = null;
    if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
      StatsWork statDesc = new StatsWork(loadTableWork);
      statDesc.setNoStatsAggregator(true);
      statDesc.setClearAggregatorStats(true);
      statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
      statTask = TaskFactory.get(statDesc, conf);
    }

    // HIVE-3334 has been filed for load file with index auto update
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.StatsWork

            conf);
        truncateTask.addDependentTask(moveTsk);

        // Recalculate the HDFS stats if auto gather stats is set
        if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
          StatsWork statDesc;
          if (oldTblPartLoc.equals(newTblPartLoc)) {
            // If we're merging to the same location, we can avoid some metastore calls
            tableSpec tablepart = new tableSpec(this.db, conf, root);
            statDesc = new StatsWork(tablepart);
          } else {
            statDesc = new StatsWork(ltd);
          }
          statDesc.setNoStatsAggregator(true);
          statDesc.setClearAggregatorStats(true);
          statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
          Task<? extends Serializable> statTask = TaskFactory.get(statDesc, conf);
          moveTsk.addDependentTask(statTask);
        }
      } catch (HiveException e) {
        throw new SemanticException(e);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.StatsWork

      Task<MoveWork> moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false),
          conf);
      mergeTask.addDependentTask(moveTsk);

      if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
        StatsWork statDesc;
        if (oldTblPartLoc.equals(newTblPartLoc)) {
          // If we're merging to the same location, we can avoid some metastore calls
          tableSpec tablepart = new tableSpec(this.db, conf, tablePartAST);
          statDesc = new StatsWork(tablepart);
        } else {
          statDesc = new StatsWork(ltd);
        }
        statDesc.setNoStatsAggregator(true);
        statDesc.setClearAggregatorStats(true);
        statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
        Task<? extends Serializable> statTask = TaskFactory.get(statDesc, conf);
        moveTsk.addDependentTask(statTask);
      }

      rootTasks.add(mergeTask);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.StatsWork

   */
  private void addStatsTask(FileSinkOperator nd, MoveTask mvTask,
      Task<? extends Serializable> currTask, HiveConf hconf) {

    MoveWork mvWork = ((MoveTask) mvTask).getWork();
    StatsWork statsWork = null;
    if (mvWork.getLoadTableWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadTableWork());
    } else if (mvWork.getLoadFileWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadFileWork());
    }
    assert statsWork != null : "Error when genereting StatsTask";
    statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
    MapredWork mrWork = (MapredWork) currTask.getWork();

    // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix
    // in FileSinkDesc is used for stats publishing. They should be consistent.
    statsWork.setAggKey(((FileSinkOperator) nd).getConf().getStatsAggPrefix());
    Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf);

    // mark the MapredWork and FileSinkOperator for gathering stats
    nd.getConf().setGatherStats(true);
    mrWork.getMapWork().setGatheringStats(true);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.StatsWork

    // Some stats like number of rows require a scan of the data
    // However, some other stats, like number of files, do not require a complete scan
    // Update the stats which do not require a complete scan.
    Task<? extends Serializable> statTask = null;
    if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
      StatsWork statDesc = new StatsWork(loadTableWork);
      statDesc.setNoStatsAggregator(true);
      statDesc.setClearAggregatorStats(true);
      statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
      statTask = TaskFactory.get(statDesc, conf);
    }

    // HIVE-3334 has been filed for load file with index auto update
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.