Package org.apache.hadoop.hive.ql.stats

Examples of org.apache.hadoop.hive.ql.stats.StatsPublisher


  public void testStatsPublisherOneStat() throws Throwable {
    try {
      System.out.println("StatsPublisher - one stat published per key - aggregating matching key");

      // instantiate stats publisher
      StatsPublisher statsPublisher = Utilities.getStatsPublisher((JobConf) conf);
      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = factory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf, null));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000", stats));
      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001", stats));


      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("200", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("1000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("400", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize1);

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out
          .println("StatsPublisher - one stat published per key - aggregating matching key - OK");
    } catch (Throwable e) {
View Full Code Here


  public void testStatsPublisher() throws Throwable {
    try {
      System.out.println("StatsPublisher - basic functionality");

      // instantiate stats publisher
      StatsPublisher statsPublisher = Utilities.getStatsPublisher(
          (JobConf) conf);
      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = factory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf, null));
      // statsAggregator.cleanUp("file_0000");
      // assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("900", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("7000", usize1);

      // aggregate non-existent stats
      String rowsX = statsAggregator.aggregateStats("file_00002", StatsSetupConst.ROW_COUNT);
      assertEquals("0", rowsX);
      String usizeX = statsAggregator.aggregateStats("file_00002",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usizeX);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsPublisher - basic functionality - OK");
    } catch (Throwable e) {
      e.printStackTrace();
View Full Code Here

  public void testStatsPublisherMultipleUpdates() throws Throwable {
    try {
      System.out.println("StatsPublisher - multiple updates");

      // instantiate stats publisher
      StatsPublisher statsPublisher = Utilities.getStatsPublisher((JobConf) conf);
      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = factory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf, null));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // update which should not take any effect
      fillStatMap("190", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("290", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // update that should take effect
      fillStatMap("500", "5000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("600", "6000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("1100", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("11000", usize1);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsPublisher - multiple updates - OK");
    } catch (Throwable e) {
      e.printStackTrace();
View Full Code Here

    try {
      System.out
          .println("StatsPublisher - (multiple updates + publishing subset of supported statistics)");

      // instantiate stats publisher
      StatsPublisher statsPublisher = Utilities.getStatsPublisher((JobConf) conf);
      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = factory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf, null));

      // publish stats
      fillStatMap("200", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));


      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("2000", usize0);

      // update which should not take any effect - plus the map published is a supset of supported
      // stats
      fillStatMap("190", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("290", "");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // nothing changed
      rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("2000", usize0);

      fillStatMap("500", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("500", "");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // changed + the rawDataSize size was overwriten !!!
      rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("1000", rows0);
      usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usize0);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out
          .println("StatsPublisher - (multiple updates + publishing subset of supported statistics) - OK");
    } catch (Throwable e) {
View Full Code Here

  public void testStatsAggregatorCleanUp() throws Throwable {
    try {
      System.out.println("StatsAggregator - clean-up");

      // instantiate stats publisher
      StatsPublisher statsPublisher = Utilities.getStatsPublisher((JobConf) conf);
      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = factory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf, null));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // cleanUp
      assertTrue(statsAggregator.cleanUp("file_00000"));

      // now clean-up just for one key
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("0", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usize0);

      // this should still be in the table
      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("900", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("7000", usize1);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsAggregator - clean-up - OK");
    } catch (Throwable e) {
      e.printStackTrace();
View Full Code Here

    if (factory == null) {
      throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
    }
    // initialize stats publishing table for noscan which has only stats task
    // the rest of MR task following stats task initializes it in ExecDriver.java
    StatsPublisher statsPublisher = factory.getStatsPublisher();
    if (!statsPublisher.init(conf)) { // creating stats table if not exists
      throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
    }
    Task sourceTask = getWork().getSourceTask();
    if (sourceTask == null) {
      throw new HiveException(ErrorMsg.STATSAGGREGATOR_SOURCETASK_NULL.getErrorCodedMsg());
View Full Code Here

  private void publishStats() throws HiveException {
    boolean isStatsReliable = conf.isStatsReliable();

    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);
    if (!statsPublisher.connect(jc)) {
      // just return, stats gathering should not block the main query.
      LOG.info("StatsPublishing error: cannot connect to database.");
      if (isStatsReliable) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
      }
      return;
    }

    String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(hconf));
    Map<String, String> statsToPublish = new HashMap<String, String>();

    for (String pspecs : stats.keySet()) {
      statsToPublish.clear();
      String prefix = Utilities.join(conf.getStatsAggPrefix(), pspecs);

      int maxKeyLength = conf.getMaxStatsKeyPrefixLength();
      String key = Utilities.getHashedStatsPrefix(prefix, maxKeyLength);
      if (!(statsPublisher instanceof StatsCollectionTaskIndependent)) {
        // stats publisher except counter or fs type needs postfix 'taskID'
        key = Utilities.join(prefix, taskID);
      }
      for(String statType : stats.get(pspecs).getStoredStats()) {
        statsToPublish.put(statType, Long.toString(stats.get(pspecs).getStat(statType)));
      }
      if (!statsPublisher.publishStat(key, statsToPublish)) {
        if (isStatsReliable) {
          throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
        }
      }
      LOG.info("publishing : " + key + " : " + statsToPublish.toString());
    }
    if (!statsPublisher.closeConnection()) {
      if (isStatsReliable) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
      }
    }
  }
View Full Code Here

      throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg());
    }

    // initialize stats publisher if necessary
    if (work.isGatheringStats()) {
      StatsPublisher statsPublisher;
      StatsFactory factory = StatsFactory.newFactory(conf);
      if (factory != null) {
        statsPublisher = factory.getStatsPublisher();
        if (!statsPublisher.init(conf)) { // creating stats table if not exists
          if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
            throw
              new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
          }
        }
View Full Code Here

      // make this client wait if job trcker is not behaving well.
      Throttle.checkJobTracker(job, LOG);

      if (work.isGatheringStats()) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        StatsFactory factory = StatsFactory.newFactory(job);
        if (factory != null) {
          statsPublisher = factory.getStatsPublisher();
          if (!statsPublisher.init(job)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw
                new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }
View Full Code Here

   *
   * @throws HiveException
   */
  private void publishStats() throws HiveException {
    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);

    if (statsPublisher == null) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
      throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
    }

    if (!statsPublisher.connect(jc)) {
      // should fail since stats gathering is main purpose of the job
      LOG.error("StatsPublishing error: cannot connect to database");
      throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
    }

    int maxPrefixLength = StatsFactory.getMaxPrefixLength(jc);
    // construct key used to store stats in intermediate db
    String key = Utilities.getHashedStatsPrefix(statsAggKeyPrefix, maxPrefixLength);
    if (!(statsPublisher instanceof CounterStatsPublisher)) {
      String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(jc));
      key = Utilities.join(key, taskID);
    }

    // construct statistics to be stored
    Map<String, String> statsToPublish = new HashMap<String, String>();
    statsToPublish.put(StatsSetupConst.RAW_DATA_SIZE, Long.toString(uncompressedFileSize));
    statsToPublish.put(StatsSetupConst.ROW_COUNT, Long.toString(rowNo));

    if (!statsPublisher.publishStat(key, statsToPublish)) {
      // The original exception is lost.
      // Not changing the interface to maintain backward compatibility
      throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
    }

    if (!statsPublisher.closeConnection()) {
      // The original exception is lost.
      // Not changing the interface to maintain backward compatibility
      throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.stats.StatsPublisher

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.