Examples of org.apache.hadoop.hive.ql.stats.StatsPublisher

org.apache.hadoop.hive.ql.stats.StatsPublisher
An interface for any possible implementation for publishing statics.

        getConf().getTableInfo());
  }


  private void publishStats() {
    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);


    if (statsPublisher == null || !statsPublisher.connect(hconf)) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: cannot connect to database");
      return;
    }


    String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(hconf));
    String spSpec = conf.getStaticSpec() != null ? conf.getStaticSpec() : "";


    for (String fspKey : valToPaths.keySet()) {
      FSPaths fspValue = valToPaths.get(fspKey);
      String key;


      // construct the key(fileID) to insert into the intermediate stats table
      if (fspKey == "") {
        // for non-partitioned/static partitioned table, the key for temp storage is
        // common key prefix + static partition spec +  taskID
        key = conf.getStatsAggPrefix() + spSpec + taskID ;
      } else {
        // for partitioned table, the key is
        // common key prefix + static partition spec + DynamicPartSpec + taskID
        key = conf.getStatsAggPrefix() + spSpec + fspKey + Path.SEPARATOR + taskID;
      }
      statsPublisher.publishStat(key, StatsSetupConst.ROW_COUNT, Long.toString(fspValue.stat.getNumRows()));
    }
    statsPublisher.closeConnection();
  }

View Full Code Here

    return OperatorType.TABLESCAN;
  }


  private void publishStats() {
    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);
    if (!statsPublisher.connect(jc)) {
      // just return, stats gathering should not block the main query.
      LOG.info("StatsPublishing error: cannot connect to database.");
      return;
    }


    String key;
    String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(hconf));
    if (partitionSpecs.isEmpty()) {
      // In case of a non-partitioned table, the key for temp storage is just
      // "tableName + taskID"
      key = conf.getStatsAggPrefix() + taskID;
    } else {
      // In case of a partition, the key for temp storage is
      // "tableName + partitionSpecs + taskID"
      key = conf.getStatsAggPrefix() + partitionSpecs + Path.SEPARATOR + taskID;
    }
    statsPublisher.publishStat(key, StatsSetupConst.ROW_COUNT, Long.toString(stat.getNumRows()));
    statsPublisher.closeConnection();
  }

View Full Code Here

        String statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
        StatsFactory.setImplementation(statsImplementationClass, conf);
        if (work.isNoScanAnalyzeCommand()){
          // initialize stats publishing table for noscan which has only stats task
          // the rest of MR task following stats task initializes it in ExecDriver.java
          StatsPublisher statsPublisher = StatsFactory.getStatsPublisher();
          if (!statsPublisher.init(conf)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw
                new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }

View Full Code Here

      // make this client wait if job trcker is not behaving well.
      Throttle.checkJobTracker(job, LOG);


      if (work.isGatheringStats()) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        String statsImplementationClass = HiveConf.getVar(job, HiveConf.ConfVars.HIVESTATSDBCLASS);
        if (StatsFactory.setImplementation(statsImplementationClass, job)) {
          statsPublisher = StatsFactory.getStatsPublisher();
          if (!statsPublisher.init(job)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw
                new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }

View Full Code Here

      // make this client wait if job trcker is not behaving well.
      Throttle.checkJobTracker(job, LOG);


      if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        String statsImplementationClass = HiveConf.getVar(job, HiveConf.ConfVars.HIVESTATSDBCLASS);
        if (StatsFactory.setImplementation(statsImplementationClass, job)) {
          statsPublisher = StatsFactory.getStatsPublisher();
          if (!statsPublisher.init(job)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw
                new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }

View Full Code Here

        getConf().getTableInfo());
  }


  private void publishStats() {
    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);


    if (statsPublisher == null) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
      return;
    }
    if (!statsPublisher.connect(hconf)) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: cannot connect to database");
      return;
    }


    String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(hconf));
    String spSpec = conf.getStaticSpec() != null ? conf.getStaticSpec() : "";


    for (String fspKey : valToPaths.keySet()) {
      FSPaths fspValue = valToPaths.get(fspKey);
      String key;


      // construct the key(fileID) to insert into the intermediate stats table
      if (fspKey == "") {
        // for non-partitioned/static partitioned table, the key for temp storage is
        // common key prefix + static partition spec + taskID
        key = conf.getStatsAggPrefix() + spSpec + taskID;
      } else {
        // for partitioned table, the key is
        // common key prefix + static partition spec + DynamicPartSpec + taskID
        key = conf.getStatsAggPrefix() + spSpec + fspKey + Path.SEPARATOR + taskID;
      }
      Map<String, String> statsToPublish = new HashMap<String, String>();
      for (String statType : fspValue.stat.getStoredStats()) {
        statsToPublish.put(statType, Long.toString(fspValue.stat.getStat(statType)));
      }
      statsPublisher.publishStat(key, statsToPublish);
    }
    statsPublisher.closeConnection();
  }

View Full Code Here

      // make this client wait if job trcker is not behaving well.
      Throttle.checkJobTracker(job, LOG);


      if (work.isGatheringStats()) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        String statsImplementationClass = HiveConf.getVar(job, HiveConf.ConfVars.HIVESTATSDBCLASS);
        if (StatsFactory.setImplementation(statsImplementationClass, job)) {
          statsPublisher = StatsFactory.getStatsPublisher();
          statsPublisher.init(job); // creating stats table if not exists
        }
      }


      // Finally SUBMIT the JOB!
      rj = jc.submitJob(job);

View Full Code Here

    return OperatorType.TABLESCAN;
  }


  private void publishStats() {
    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);
    if (!statsPublisher.connect(jc)) {
      // just return, stats gathering should not block the main query.
      LOG.info("StatsPublishing error: cannot connect to database.");
      return;
    }


    String key;
    String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(hconf));
    Map<String, String> statsToPublish = new HashMap<String, String>();


    for (String pspecs : stats.keySet()) {
      statsToPublish.clear();
      if (pspecs.isEmpty()) {
        // In case of a non-partitioned table, the key for temp storage is just
        // "tableName + taskID"
        key = conf.getStatsAggPrefix() + taskID;
      } else {
        // In case of a partition, the key for temp storage is
        // "tableName + partitionSpecs + taskID"
        key = conf.getStatsAggPrefix() + pspecs + Path.SEPARATOR + taskID;
      }
      for(String statType : stats.get(pspecs).getStoredStats()) {
        statsToPublish.put(statType, Long.toString(stats.get(pspecs).getStat(statType)));
      }
      statsPublisher.publishStat(key, statsToPublish);
      LOG.info("publishing : " + key + " : " + statsToPublish.toString());
    }
    statsPublisher.closeConnection();
  }

View Full Code Here


  private void publishStats() throws HiveException {
    boolean isStatsReliable = conf.isStatsReliable();


    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);


    if (statsPublisher == null) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
      if (isStatsReliable) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
      }
      return;
    }


    if (!statsPublisher.connect(hconf)) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: cannot connect to database");
      if (isStatsReliable) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
      }
      return;
    }


    String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(hconf));
    String spSpec = conf.getStaticSpec();


    int maxKeyLength = conf.getMaxStatsKeyPrefixLength();
    boolean taskIndependent = statsPublisher instanceof StatsCollectionTaskIndependent;


    for (Map.Entry<String, FSPaths> entry : valToPaths.entrySet()) {
      String fspKey = entry.getKey();     // DP/LB
      FSPaths fspValue = entry.getValue();


      // for bucketed tables, hive.optimize.sort.dynamic.partition optimization
      // adds the taskId to the fspKey.
      if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
        taskID = Utilities.getTaskIdFromFilename(fspKey);
        // if length of (prefix/ds=__HIVE_DEFAULT_PARTITION__/000000_0) is greater than max key prefix
        // and if (prefix/ds=10/000000_0) is less than max key prefix, then former will get hashed
        // to a smaller prefix (MD5hash/000000_0) and later will stored as such in staging stats table.
        // When stats gets aggregated in StatsTask only the keys that starts with "prefix" will be fetched.
        // Now that (prefix/ds=__HIVE_DEFAULT_PARTITION__) is hashed to a smaller prefix it will
        // not be retrieved from staging table and hence not aggregated. To avoid this issue
        // we will remove the taskId from the key which is redundant anyway.
        fspKey = fspKey.split(taskID)[0];
      }


      // split[0] = DP, split[1] = LB
      String[] split = splitKey(fspKey);
      String dpSpec = split[0];
      String lbSpec = split[1];


      String prefix;
      String postfix=null;
      if (taskIndependent) {
        // key = "database.table/SP/DP/"LB/
        prefix = conf.getTableInfo().getTableName();
      } else {
        // key = "prefix/SP/DP/"LB/taskID/
        prefix = conf.getStatsAggPrefix();
        postfix = Utilities.join(lbSpec, taskID);
      }
      prefix = Utilities.join(prefix, spSpec, dpSpec);
      prefix = Utilities.getHashedStatsPrefix(prefix, maxKeyLength);


      String key = Utilities.join(prefix, postfix);


      Map<String, String> statsToPublish = new HashMap<String, String>();
      for (String statType : fspValue.stat.getStoredStats()) {
        statsToPublish.put(statType, Long.toString(fspValue.stat.getStat(statType)));
      }
      if (!statsPublisher.publishStat(key, statsToPublish)) {
        // The original exception is lost.
        // Not changing the interface to maintain backward compatibility
        if (isStatsReliable) {
          throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
        }
      }
    }
    if (!statsPublisher.closeConnection()) {
      // The original exception is lost.
      // Not changing the interface to maintain backward compatibility
      if (isStatsReliable) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
      }

View Full Code Here

      throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg());
    }


    // initialize stats publisher if necessary
    if (work.isGatheringStats()) {
      StatsPublisher statsPublisher;
      StatsFactory factory = StatsFactory.newFactory(conf);
      if (factory != null) {
        statsPublisher = factory.getStatsPublisher();
        if (!statsPublisher.init(conf)) { // creating stats table if not exists
          if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
            throw
              new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
          }
        }

View Full Code Here

0 1 2 3 4 5 6

TOP

Related Classes of org.apache.hadoop.hive.ql.stats.StatsPublisher

org.apache.hadoop.hive.ql.exec.ExecDriver

org.apache.hadoop.hive.ql.exec.FileSinkOperator

org.apache.hadoop.hive.ql.exec.mr.ExecDriver

org.apache.hadoop.hive.ql.exec.StatsTask

org.apache.hadoop.hive.ql.exec.TableScanOperator

org.apache.hadoop.hive.ql.exec.TestStatsPublisherEnhanced

org.apache.hadoop.hive.ql.exec.tez.DagUtils

org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanMapper

org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanTask

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.