Examples of StatsAggregator


Examples of org.apache.hadoop.hive.ql.stats.StatsAggregator

      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));


      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("2000", usize0);

      // update which should not take any effect - plus the map published is a supset of supported
      // stats
      fillStatMap("190", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("290", "");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // nothing changed
      rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("2000", usize0);

      fillStatMap("500", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("500", "");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // changed + the rawDataSize size was overwriten !!!
      rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("1000", rows0);
      usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usize0);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out
          .println("StatsPublisher - (multiple updates + publishing subset of supported statistics) - OK");
    } catch (Throwable e) {
      e.printStackTrace();
View Full Code Here

Examples of org.apache.hadoop.hive.ql.stats.StatsAggregator

      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // cleanUp
      assertTrue(statsAggregator.cleanUp("file_00000"));

      // now clean-up just for one key
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("0", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usize0);

      // this should still be in the table
      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("900", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("7000", usize1);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsAggregator - clean-up - OK");
    } catch (Throwable e) {
      e.printStackTrace();
      throw e;
View Full Code Here

Examples of org.apache.hadoop.hive.ql.stats.StatsAggregator

      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = factory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf, null));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // cleanUp
      assertTrue(statsAggregator.cleanUp("file_00000"));

      // now clean-up just for one key
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("0", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usize0);

      // this should still be in the table
      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("900", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("7000", usize1);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsAggregator - clean-up - OK");
    } catch (Throwable e) {
      e.printStackTrace();
      throw e;
View Full Code Here

Examples of org.apache.hadoop.hive.ql.stats.StatsAggregator

    return "STATS";
  }

  private int aggregateStats() {

    StatsAggregator statsAggregator = null;
    int ret = 0;

    try {
      // Stats setup:
      Warehouse wh = new Warehouse(conf);
      if (!getWork().getNoStatsAggregator() && !getWork().isNoScanAnalyzeCommand()) {
        try {
          statsAggregator = createStatsAggregator(conf);
        } catch (HiveException e) {
          if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
            throw e;
          }
          console.printError(ErrorMsg.STATS_SKIPPING_BY_ERROR.getErrorCodedMsg(e.toString()));
        }
      }

      List<Partition> partitions = getPartitionsList();
      boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);

      String tableFullName = table.getDbName() + "." + table.getTableName();

      int maxPrefixLength = StatsFactory.getMaxPrefixLength(conf);

      // "counter" or "fs" type does not need to collect stats per task
      boolean taskIndependent = statsAggregator instanceof StatsCollectionTaskIndependent;
      if (partitions == null) {
        org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
        Map<String, String> parameters = tTable.getParameters();
        // non-partitioned tables:
        if (!existStats(parameters) && atomic) {
          return 0;
        }

        // The collectable stats for the aggregator needs to be cleared.
        // For eg. if a file is being loaded, the old number of rows are not valid
        if (work.isClearAggregatorStats()) {
          clearStats(parameters);
        }

        if (statsAggregator != null) {
          String prefix = getAggregationPrefix(taskIndependent, table, null);
          updateStats(statsAggregator, parameters, prefix, maxPrefixLength, atomic);
        }

        updateQuickStats(wh, parameters, tTable.getSd());

        // write table stats to metastore
        parameters.put(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK, StatsSetupConst.TRUE);

        db.alterTable(tableFullName, new Table(tTable));

        console.printInfo("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
      } else {
        // Partitioned table:
        // Need to get the old stats of the partition
        // and update the table stats based on the old and new stats.
        List<Partition> updates = new ArrayList<Partition>();
        for (Partition partn : partitions) {
          //
          // get the old partition stats
          //
          org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
          Map<String, String> parameters = tPart.getParameters();
          if (!existStats(parameters) && atomic) {
            continue;
          }

          // The collectable stats for the aggregator needs to be cleared.
          // For eg. if a file is being loaded, the old number of rows are not valid
          if (work.isClearAggregatorStats()) {
            clearStats(parameters);
          }

          if (statsAggregator != null) {
            String prefix = getAggregationPrefix(taskIndependent, table, partn);
            updateStats(statsAggregator, parameters, prefix, maxPrefixLength, atomic);
          }

          updateQuickStats(wh, parameters, tPart.getSd());

          parameters.put(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK, StatsSetupConst.TRUE);
          updates.add(new Partition(table, tPart));

          console.printInfo("Partition " + tableFullName + partn.getSpec() +
              " stats: [" + toString(parameters) + ']');
        }
        if (!updates.isEmpty()) {
          db.alterPartitions(tableFullName, updates);
        }
      }

    } catch (Exception e) {
      console.printInfo("[Warning] could not update stats.",
          "Failed with exception " + e.getMessage() + "\n"
              + StringUtils.stringifyException(e));

      // Fail the query if the stats are supposed to be reliable
      if (work.isStatsReliable()) {
        ret = 1;
      }
    } finally {
      if (statsAggregator != null) {
        statsAggregator.closeConnection();
      }
    }
    // The return value of 0 indicates success,
    // anything else indicates failure
    return ret;
View Full Code Here

Examples of org.apache.hadoop.hive.ql.stats.StatsAggregator

    Task sourceTask = getWork().getSourceTask();
    if (sourceTask == null) {
      throw new HiveException(ErrorMsg.STATSAGGREGATOR_SOURCETASK_NULL.getErrorCodedMsg());
    }
    // manufacture a StatsAggregator
    StatsAggregator statsAggregator = factory.getStatsAggregator();
    if (!statsAggregator.connect(conf, sourceTask)) {
      throw new HiveException(ErrorMsg.STATSAGGREGATOR_CONNECTION_ERROR.getErrorCodedMsg(statsImpl));
    }
    return statsAggregator;
  }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.stats.StatsAggregator

    stats = new HashMap<String, String>();
  }

  @Override
  protected void tearDown() {
    StatsAggregator sa = factory.getStatsAggregator();
    assertNotNull(sa);
    assertTrue(sa.connect(conf, null));
    assertTrue(sa.cleanUp("file_0"));
    assertTrue(sa.closeConnection());
  }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.stats.StatsAggregator

      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = factory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf, null));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000", stats));
      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001", stats));


      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("200", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("1000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("400", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize1);

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out
          .println("StatsPublisher - one stat published per key - aggregating matching key - OK");
    } catch (Throwable e) {
      e.printStackTrace();
View Full Code Here

Examples of org.apache.hadoop.hive.ql.stats.StatsAggregator

      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = factory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf, null));
      // statsAggregator.cleanUp("file_0000");
      // assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("900", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("7000", usize1);

      // aggregate non-existent stats
      String rowsX = statsAggregator.aggregateStats("file_00002", StatsSetupConst.ROW_COUNT);
      assertEquals("0", rowsX);
      String usizeX = statsAggregator.aggregateStats("file_00002",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usizeX);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsPublisher - basic functionality - OK");
    } catch (Throwable e) {
      e.printStackTrace();
      throw e;
View Full Code Here

Examples of org.apache.hadoop.hive.ql.stats.StatsAggregator

      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = factory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf, null));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // update which should not take any effect
      fillStatMap("190", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("290", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // update that should take effect
      fillStatMap("500", "5000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("600", "6000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("1100", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("11000", usize1);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsPublisher - multiple updates - OK");
    } catch (Throwable e) {
      e.printStackTrace();
      throw e;
View Full Code Here

Examples of org.apache.hadoop.hive.ql.stats.StatsAggregator

      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = factory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf, null));

      // publish stats
      fillStatMap("200", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));


      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("2000", usize0);

      // update which should not take any effect - plus the map published is a supset of supported
      // stats
      fillStatMap("190", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("290", "");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // nothing changed
      rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("2000", usize0);

      fillStatMap("500", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("500", "");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // changed + the rawDataSize size was overwriten !!!
      rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("1000", rows0);
      usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usize0);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out
          .println("StatsPublisher - (multiple updates + publishing subset of supported statistics) - OK");
    } catch (Throwable e) {
      e.printStackTrace();
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.