Examples of org.apache.hadoop.hive.ql.metadata.Table

Package org.apache.hadoop.hive.ql.metadata

Examples of org.apache.hadoop.hive.ql.metadata.Table

org.apache.hadoop.hive.ql.metadata.Table
A Hive Table: is a fundamental unit of data in Hive that shares a common schema/DDL

    return 0;
  }


  private HiveLockObject getHiveObject(String tabName,
                                       Map<String, String> partSpec) throws HiveException {
    Table  tbl = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tabName);
    if (tbl == null) {
      throw new HiveException("Table " + tabName + " does not exist ");
    }


    HiveLockObject obj = null;

View Full Code Here

    // get the tables for the desired pattenn - populate the output stream
    List<Table> tbls = new ArrayList<Table>();
    Map<String, String> part = showTblStatus.getPartSpec();
    Partition par = null;
    if (part != null) {
      Table tbl = db.getTable(showTblStatus.getDbName(), showTblStatus.getPattern());
      par = db.getPartition(tbl, part, false);
      if (par == null) {
        throw new HiveException("Partition " + part + " for table "
            + showTblStatus.getPattern() + " does not exist.");
      }
      tbls.add(tbl);
    } else {
      LOG.info("pattern: " + showTblStatus.getPattern());
      List<String> tblStr = db.getTablesForDb(showTblStatus.getDbName(),
          showTblStatus.getPattern());
      SortedSet<String> sortedTbls = new TreeSet<String>(tblStr);
      Iterator<String> iterTbls = sortedTbls.iterator();
      while (iterTbls.hasNext()) {
        // create a row per table name
        String tblName = iterTbls.next();
        Table tbl = db.getTable(showTblStatus.getDbName(), tblName);
        tbls.add(tbl);
      }
      LOG.info("results : " + tblStr.size());
    }


    // write the results in the file
    try {
      Path resFile = new Path(showTblStatus.getResFile());
      FileSystem fs = resFile.getFileSystem(conf);
      DataOutput outStream = fs.create(resFile);


      Iterator<Table> iterTables = tbls.iterator();
      while (iterTables.hasNext()) {
        // create a row per table name
        Table tbl = iterTables.next();
        String tableName = tbl.getTableName();
        String tblLoc = null;
        String inputFormattCls = null;
        String outputFormattCls = null;
        if (part != null) {
          if (par != null) {
            tblLoc = par.getDataLocation().toString();
            inputFormattCls = par.getInputFormatClass().getName();
            outputFormattCls = par.getOutputFormatClass().getName();
          }
        } else {
          tblLoc = tbl.getDataLocation().toString();
          inputFormattCls = tbl.getInputFormatClass().getName();
          outputFormattCls = tbl.getOutputFormatClass().getName();
        }


        String owner = tbl.getOwner();
        List<FieldSchema> cols = tbl.getCols();
        String ddlCols = MetaStoreUtils.getDDLFromFieldSchema("columns", cols);
        boolean isPartitioned = tbl.isPartitioned();
        String partitionCols = "";
        if (isPartitioned) {
          partitionCols = MetaStoreUtils.getDDLFromFieldSchema(
              "partition_columns", tbl.getPartCols());
        }


        outStream.writeBytes("tableName:" + tableName);
        outStream.write(terminator);
        outStream.writeBytes("owner:" + owner);
        outStream.write(terminator);
        outStream.writeBytes("location:" + tblLoc);
        outStream.write(terminator);
        outStream.writeBytes("inputformat:" + inputFormattCls);
        outStream.write(terminator);
        outStream.writeBytes("outputformat:" + outputFormattCls);
        outStream.write(terminator);
        outStream.writeBytes("columns:" + ddlCols);
        outStream.write(terminator);
        outStream.writeBytes("partitioned:" + isPartitioned);
        outStream.write(terminator);
        outStream.writeBytes("partitionColumns:" + partitionCols);
        outStream.write(terminator);
        // output file system information
        Path tablLoc = tbl.getPath();
        List<Path> locations = new ArrayList<Path>();
        if (isPartitioned) {
          if (par == null) {
            for (Partition curPart : db.getPartitions(tbl)) {
              locations.add(new Path(curPart.getTPartition().getSd()

View Full Code Here

    String colPath = descTbl.getTableName();
    String tableName = colPath.substring(0,
        colPath.indexOf('.') == -1 ? colPath.length() : colPath.indexOf('.'));


    // describe the table - populate the output stream
    Table tbl = db.getTable(db.getCurrentDatabase(), tableName, false);
    Partition part = null;
    try {
      Path resFile = new Path(descTbl.getResFile());
      if (tbl == null) {
        FileSystem fs = resFile.getFileSystem(conf);
        DataOutput outStream = (DataOutput) fs.open(resFile);
        String errMsg = "Table " + tableName + " does not exist";
        outStream.write(errMsg.getBytes("UTF-8"));
        ((FSDataOutputStream) outStream).close();
        return 0;
      }
      if (descTbl.getPartSpec() != null) {
        part = db.getPartition(tbl, descTbl.getPartSpec(), false);
        if (part == null) {
          FileSystem fs = resFile.getFileSystem(conf);
          DataOutput outStream = (DataOutput) fs.open(resFile);
          String errMsg = "Partition " + descTbl.getPartSpec() + " for table "
              + tableName + " does not exist";
          outStream.write(errMsg.getBytes("UTF-8"));
          ((FSDataOutputStream) outStream).close();
          return 0;
        }
        tbl = part.getTable();
      }
    } catch (FileNotFoundException e) {
      LOG.info("describe table: " + stringifyException(e));
      return 1;
    } catch (IOException e) {
      LOG.info("describe table: " + stringifyException(e));
      return 1;
    }


    try {


      LOG.info("DDLTask: got data for " + tbl.getTableName());


      Path resFile = new Path(descTbl.getResFile());
      FileSystem fs = resFile.getFileSystem(conf);
      DataOutput outStream = fs.create(resFile);


      if (colPath.equals(tableName)) {
        if (!descTbl.isFormatted()) {
          List<FieldSchema> cols = tbl.getCols();
          if (tableName.equals(colPath)) {
            cols.addAll(tbl.getPartCols());
          }
          outStream.writeBytes(MetaDataFormatUtils.displayColsUnformatted(cols));
        } else {
          outStream.writeBytes(MetaDataFormatUtils.getAllColumnsInformation(tbl));
        }
      } else {
        List<FieldSchema> cols = Hive.getFieldsFromDeserializer(colPath, tbl.getDeserializer());
        if (descTbl.isFormatted()) {
          outStream.writeBytes(MetaDataFormatUtils.getAllColumnsInformation(cols));
        } else {
          outStream.writeBytes(MetaDataFormatUtils.displayColsUnformatted(cols));
        }
      }


      if (tableName.equals(colPath)) {


        if (descTbl.isFormatted()) {
          if (part != null) {
            outStream.writeBytes(MetaDataFormatUtils.getPartitionInformation(part));
          } else {
            outStream.writeBytes(MetaDataFormatUtils.getTableInformation(tbl));
          }
        }


        // if extended desc table then show the complete details of the table
        if (descTbl.isExt()) {
          // add empty line
          outStream.write(terminator);
          if (part != null) {
            // show partition information
            outStream.writeBytes("Detailed Partition Information");
            outStream.write(separator);
            outStream.writeBytes(part.getTPartition().toString());
            outStream.write(separator);
            // comment column is empty
            outStream.write(terminator);
          } else {
            // show table information
            outStream.writeBytes("Detailed Table Information");
            outStream.write(separator);
            outStream.writeBytes(tbl.getTTable().toString());
            outStream.write(separator);
            outStream.write(terminator);
          }
        }
      }


      LOG.info("DDLTask: written data for " + tbl.getTableName());
      ((FSDataOutputStream) outStream).close();


    } catch (FileNotFoundException e) {
      LOG.info("describe table: " + stringifyException(e));
      return 1;

View Full Code Here

   * @throws HiveException
   *           Throws this exception if an unexpected error occurs.
   */
  private int alterTable(Hive db, AlterTableDesc alterTbl) throws HiveException {
    // alter the table
    Table tbl = db.getTable(alterTbl.getOldName());


    Partition part = null;
    if(alterTbl.getPartSpec() != null) {
      part = db.getPartition(tbl, alterTbl.getPartSpec(), false);
      if(part == null) {
        console.printError("Partition : " + alterTbl.getPartSpec().toString()
            + " does not exist.");
        return 1;
      }
    }


    validateAlterTableType(tbl, alterTbl.getOp());


    if (tbl.isView()) {
      if (!alterTbl.getExpectView()) {
        throw new HiveException("Cannot alter a view with ALTER TABLE");
      }
    } else {
      if (alterTbl.getExpectView()) {
        throw new HiveException("Cannot alter a base table with ALTER VIEW");
      }
    }


    Table oldTbl = tbl.copy();


    if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAME) {
      tbl.setTableName(alterTbl.getNewName());
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCOLS) {
      List<FieldSchema> newCols = alterTbl.getNewCols();

View Full Code Here

   */
  private int dropTable(Hive db, DropTableDesc dropTbl) throws HiveException {
    // We need to fetch the table before it is dropped so that it can be passed
    // to
    // post-execution hook
    Table tbl = null;
    try {
      tbl = db.getTable(dropTbl.getTableName());
    } catch (InvalidTableException e) {
      // drop table is idempotent
    }


    if (tbl != null) {
      if (tbl.isView()) {
        if (!dropTbl.getExpectView()) {
          throw new HiveException("Cannot drop a view with DROP TABLE");
        }
      } else {
        if (dropTbl.getExpectView()) {
          throw new HiveException("Cannot drop a base table with DROP VIEW");
        }
      }
    }


    if (dropTbl.getPartSpecs() == null) {
      if (tbl != null && !tbl.canDrop()) {
        throw new HiveException("Table " + tbl.getTableName() +
            " is protected from being dropped");
      }


      // We should check that all the partitions of the table can be dropped
      if (tbl != null && tbl.isPartitioned()) {
        List<Partition> listPartitions = db.getPartitions(tbl);
        for (Partition p: listPartitions) {
            if (!p.canDrop()) {
              throw new HiveException("Table " + tbl.getTableName() +
                  " Partition" + p.getName() +
                  " is protected from being dropped");
            }
        }
      }


      // drop the table
      db.dropTable(db.getCurrentDatabase(), dropTbl.getTableName());
      if (tbl != null) {
        work.getOutputs().add(new WriteEntity(tbl));
      }
    } else {
      // get all partitions of the table
      List<String> partitionNames =
        db.getPartitionNames(db.getCurrentDatabase(), dropTbl.getTableName(), (short) -1);
      Set<Map<String, String>> partitions = new HashSet<Map<String, String>>();
      for (String partitionName : partitionNames) {
        try {
          partitions.add(Warehouse.makeSpecFromName(partitionName));
        } catch (MetaException e) {
          LOG.warn("Unrecognized partition name from metastore: " + partitionName);
        }
      }
      // drop partitions in the list
      List<Partition> partsToDelete = new ArrayList<Partition>();
      for (Map<String, String> partSpec : dropTbl.getPartSpecs()) {
        Iterator<Map<String, String>> it = partitions.iterator();
        while (it.hasNext()) {
          Map<String, String> part = it.next();
          // test if partSpec matches part
          boolean match = true;
          for (Map.Entry<String, String> item : partSpec.entrySet()) {
            if (!item.getValue().equals(part.get(item.getKey()))) {
              match = false;
              break;
            }
          }
          if (match) {
            Partition p = db.getPartition(tbl, part, false);
            if (!p.canDrop()) {
              throw new HiveException("Table " + tbl.getTableName() +
                  " Partition " + p.getName() +
                  " is protected from being dropped");
            }


            partsToDelete.add(p);

View Full Code Here

      parameters.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(tblStats.getNumPartitions()));
      parameters.put(StatsSetupConst.NUM_FILES, Integer.toString(tblStats.getNumFiles()));
      parameters.put(StatsSetupConst.TOTAL_SIZE, Long.toString(tblStats.getSize()));
      tTable.setParameters(parameters);


      db.alterTable(table.getTableName(), new Table(tTable));


      console.printInfo("Table " + table.getTableName() + " stats: [" + tblStats.toString() + ']');


      return 0;
    }

View Full Code Here

      // We have to materialize the table alias list since we might
      // modify it in the middle for view rewrite.
      List<String> tabAliases = new ArrayList<String>(qb.getTabAliases());
      for (String alias : tabAliases) {
        String tab_name = qb.getTabNameForAlias(alias);
        Table tab = null;
        try {
          tab = db.getTable(tab_name);
        } catch (InvalidTableException ite) {
          throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(qb
              .getParseInfo().getSrcForAlias(alias)));
        }


        // We check offline of the table, as if people only select from an
        // non-existing partition of an offline table, the partition won't
        // be added to inputs and validate() won't have the information to
        // check the table's offline status.
        // TODO: Modify the code to remove the checking here and consolidate
        // it in validate()
        //
        if (tab.isOffline()) {
          throw new SemanticException(ErrorMsg.OFFLINE_TABLE_OR_PARTITION.
              getMsg("Table " + qb.getParseInfo().getSrcForAlias(alias)));
        }


        if (tab.isView()) {
          replaceViewReferenceWithDefinition(qb, tab, tab_name, alias);
          continue;
        }


        if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
          throw new SemanticException(ErrorMsg.INVALID_INPUT_FORMAT_TYPE
              .getMsg(qb.getParseInfo().getSrcForAlias(alias)));
        }


        qb.getMetaData().setSrcForAlias(alias, tab);

View Full Code Here


    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    QBMetaData qbm = qb.getMetaData();
    Integer dest_type = qbm.getDestTypeForAlias(dest);


    Table dest_tab = null;     // destination table if any
    Partition dest_part = null;// destination partition if any
    String queryTmpdir = null; // the intermediate destination directory
    Path dest_path = null; // the final destination directory
    TableDesc table_desc = null;
    int currentTableId = 0;
    boolean isLocal = false;
    SortBucketRSCtx rsCtx = new SortBucketRSCtx();
    DynamicPartitionCtx dpCtx = null;
    LoadTableDesc ltd = null;
    boolean holdDDLTime = checkHoldDDLTime(qb);


    switch (dest_type.intValue()) {
    case QBMetaData.DEST_TABLE: {


      dest_tab = qbm.getDestTableForAlias(dest);


      Map<String, String> partSpec = qbm.getPartSpecForAlias(dest);
      dest_path = dest_tab.getPath();


      // check for partition
      List<FieldSchema> parts = dest_tab.getPartitionKeys();
      if (parts != null && parts.size() > 0) { // table is partitioned
        if (partSpec== null || partSpec.size() == 0) { // user did NOT specify partition
          throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg());
        }
        // the HOLD_DDLTIIME hint should not be used with dynamic partition since the
        // newly generated partitions should always update their DDLTIME
        if (holdDDLTime) {
          throw new SemanticException(ErrorMsg.HOLD_DDLTIME_ON_NONEXIST_PARTITIONS.getMsg());
        }
        dpCtx = qbm.getDPCtx(dest);
        if (dpCtx == null) {
          Utilities.validatePartSpec(dest_tab, partSpec);
          dpCtx = new DynamicPartitionCtx(dest_tab, partSpec,
              conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME),
              conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
          qbm.setDPCtx(dest, dpCtx);
        }


        if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) { // allow DP
          if (dpCtx.getNumDPCols() > 0 &&
              (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPFILES) ||
               HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPREDFILES)) &&
               Utilities.supportCombineFileInputFormat() == false) {
            // Do not support merge for Hadoop versions (pre-0.20) that do not
            // support CombineHiveInputFormat
            HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPFILES, false);
            HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPREDFILES, false);
          }
          // turn on hive.task.progress to update # of partitions created to the JT
          HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEJOBPROGRESS, true);


        } else { // QBMetaData.DEST_PARTITION capture the all-SP case
          throw new SemanticException(ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg());
        }
        if (dpCtx.getSPPath() != null) {
          dest_path = new Path(dest_tab.getPath(), dpCtx.getSPPath());
        }
        if ((dest_tab.getNumBuckets() > 0) &&
            (conf.getBoolVar(HiveConf.ConfVars.HIVEENFORCEBUCKETING))) {
          dpCtx.setNumBuckets(dest_tab.getNumBuckets());
        }
      }


      boolean isNonNativeTable = dest_tab.isNonNative();
      if (isNonNativeTable) {
        queryTmpdir = dest_path.toUri().getPath();
      } else {
        queryTmpdir = ctx.getExternalTmpFileURI(dest_path.toUri());
      }
      if (dpCtx != null) {
        // set the root of the temporay path where dynamic partition columns will populate
        dpCtx.setRootPath(queryTmpdir);
      }
      // this table_desc does not contain the partitioning columns
      table_desc = Utilities.getTableDesc(dest_tab);


      // Add sorting/bucketing if needed
      input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);


      idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
      currentTableId = destTableId;
      destTableId++;


      // Create the work for moving the table
      // NOTE: specify Dynamic partitions in dest_tab for WriteEntity
      if (!isNonNativeTable) {
        ltd = new LoadTableDesc(queryTmpdir, ctx.getExternalTmpFileURI(dest_path.toUri()),
            table_desc, dpCtx);
        if (holdDDLTime) {
          LOG.info("this query will not update transient_lastDdlTime!");
          ltd.setHoldDDLTime(true);
        }
        loadTableWork.add(ltd);
      }


      // Here only register the whole table for post-exec hook if no DP present
      // in the case of DP, we will register WriteEntity in MoveTask when the
      // list of dynamically created partitions are known.
      if ((dpCtx == null || dpCtx.getNumDPCols() == 0) &&
          !outputs.add(new WriteEntity(dest_tab))) {
        throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES
            .getMsg(dest_tab.getTableName()));
      }
      if ((dpCtx != null) && (dpCtx.getNumDPCols() >= 0)) {
        // No static partition specified
        if (dpCtx.getNumSPCols() == 0) {
          outputs.add(new WriteEntity(dest_tab, false));
        }
        // part of the partition specified
        // Create a DummyPartition in this case. Since, the metastore does not store partial
        // partitions currently, we need to store dummy partitions
        else {
          try {
            String ppath = dpCtx.getSPPath();
            ppath = ppath.substring(0, ppath.length()-1);
            DummyPartition p =
              new DummyPartition(dest_tab,
                                 dest_tab.getDbName() + "@" + dest_tab.getTableName() + "@" + ppath);
            outputs.add(new WriteEntity(p, false));
          } catch (HiveException e) {
            throw new SemanticException(e.getMessage());
          }
        }
      }


      break;
    }
    case QBMetaData.DEST_PARTITION: {


      dest_part = qbm.getDestPartitionForAlias(dest);
      dest_tab = dest_part.getTable();
      Path tabPath = dest_tab.getPath();
      Path partPath = dest_part.getPartitionPath();


        // if the table is in a different dfs than the partition,
        // replace the partition's dfs with the table's dfs.
      dest_path = new Path(tabPath.toUri().getScheme(), tabPath.toUri()
          .getAuthority(), partPath.toUri().getPath());


      if ("har".equalsIgnoreCase(dest_path.toUri().getScheme())) {
        throw new SemanticException(ErrorMsg.OVERWRITE_ARCHIVED_PART
            .getMsg());
      }
      queryTmpdir = ctx.getExternalTmpFileURI(dest_path.toUri());
      table_desc = Utilities.getTableDesc(dest_tab);


      // Add sorting/bucketing if needed
      input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);


      idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
      currentTableId = destTableId;
      destTableId++;


      ltd = new LoadTableDesc(queryTmpdir, ctx.getExternalTmpFileURI(dest_path.toUri()),
          table_desc, dest_part.getSpec());
      if (holdDDLTime) {
        try {
          Partition part = db.getPartition(dest_tab, dest_part.getSpec(), false);
          if (part == null) {
            throw new SemanticException(ErrorMsg.HOLD_DDLTIME_ON_NONEXIST_PARTITIONS.getMsg());
          }
        } catch (HiveException e) {
          throw new SemanticException(e);
        }
        LOG.info("this query will not update transient_lastDdlTime!");
        ltd.setHoldDDLTime(true);
      }
      loadTableWork.add(ltd);
      if (!outputs.add(new WriteEntity(dest_part))) {
        throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES
            .getMsg(dest_tab.getTableName() + "@" + dest_part.getName()));
      }
      break;
    }
    case QBMetaData.DEST_LOCAL_FILE:
      isLocal = true;

View Full Code Here


  @SuppressWarnings("nls")
  private Operator genTablePlan(String alias, QB qb) throws SemanticException {


    String alias_id = (qb.getId() == null ? alias : qb.getId() + ":" + alias);
    Table tab = qb.getMetaData().getSrcForAlias(alias);
    RowResolver rwsch;


    // is the table already present
    Operator<? extends Serializable> top = topOps.get(alias_id);
    Operator<? extends Serializable> dummySel = topSelOps.get(alias_id);
    if (dummySel != null) {
      top = dummySel;
    }


    if (top == null) {
      rwsch = new RowResolver();
      try {
        StructObjectInspector rowObjectInspector = (StructObjectInspector) tab
            .getDeserializer().getObjectInspector();
        List<? extends StructField> fields = rowObjectInspector
            .getAllStructFieldRefs();
        for (int i = 0; i < fields.size(); i++) {
          rwsch.put(alias, fields.get(i).getFieldName(), new ColumnInfo(fields
              .get(i).getFieldName(), TypeInfoUtils
              .getTypeInfoFromObjectInspector(fields.get(i)
              .getFieldObjectInspector()), alias, false));
        }
      } catch (SerDeException e) {
        throw new RuntimeException(e);
      }
      // Hack!! - refactor once the metadata APIs with types are ready
      // Finally add the partitioning columns
      for (FieldSchema part_col : tab.getPartCols()) {
        LOG.trace("Adding partition col: " + part_col);
        // TODO: use the right type by calling part_col.getType() instead of
        // String.class
        rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(),
            TypeInfoFactory.stringTypeInfo, alias, true));
      }


      //put all virutal columns in RowResolver.
      Iterator<VirtualColumn> vcs = VirtualColumn.registry.values().iterator();
      //use a list for easy cumtomize
      List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
      while (vcs.hasNext()) {
        VirtualColumn vc = vcs.next();
        rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(),
            vc.getTypeInfo(), alias, true, vc.getIsHidden()));
        vcList.add(vc);
      }


      // Create the root of the operator tree
      TableScanDesc tsDesc = new TableScanDesc(alias, vcList);
      setupStats(tsDesc, qb.getParseInfo(), tab, alias);


      top = putOpInsertMap(OperatorFactory.get(tsDesc,
          new RowSchema(rwsch.getColumnInfos())), rwsch);


      // Add this to the list of top operators - we always start from a table
      // scan
      topOps.put(alias_id, top);


      // Add a mapping from the table scan operator to Table
      topToTable.put((TableScanOperator) top, tab);
    } else {
      rwsch = opParseCtx.get(top).getRowResolver();
      top.setChildOperators(null);
    }


    // check if this table is sampled and needs more than input pruning
    Operator<? extends Serializable> tableOp = top;
    TableSample ts = qb.getParseInfo().getTabSample(alias);
    if (ts != null) {
      int num = ts.getNumerator();
      int den = ts.getDenominator();
      ArrayList<ASTNode> sampleExprs = ts.getExprs();


      // TODO: Do the type checking of the expressions
      List<String> tabBucketCols = tab.getBucketCols();
      int numBuckets = tab.getNumBuckets();


      // If there are no sample cols and no bucket cols then throw an error
      if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) {
        throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " "
            + tab.getTableName());
      }


      if (num > den) {
        throw new SemanticException(
            ErrorMsg.BUCKETED_NUMBERATOR_BIGGER_DENOMINATOR.getMsg() + " "
            + tab.getTableName());
      }


      // check if a predicate is needed
      // predicate is needed if either input pruning is not enough
      // or if input pruning is not possible


      // check if the sample columns are the same as the table bucket columns
      boolean colsEqual = true;
      if ((sampleExprs.size() != tabBucketCols.size())
          && (sampleExprs.size() != 0)) {
        colsEqual = false;
      }


      for (int i = 0; i < sampleExprs.size() && colsEqual; i++) {
        boolean colFound = false;
        for (int j = 0; j < tabBucketCols.size() && !colFound; j++) {
          if (sampleExprs.get(i).getToken().getType() != HiveParser.TOK_TABLE_OR_COL) {
            break;
          }


          if (((ASTNode) sampleExprs.get(i).getChild(0)).getText()
              .equalsIgnoreCase(tabBucketCols.get(j))) {
            colFound = true;
          }
        }
        colsEqual = (colsEqual && colFound);
      }


      // Check if input can be pruned
      ts.setInputPruning((sampleExprs == null || sampleExprs.size() == 0 || colsEqual));


      // check if input pruning is enough
      if ((sampleExprs == null || sampleExprs.size() == 0 || colsEqual)
          && (num == den || (den % numBuckets == 0 || numBuckets % den == 0))) {


        // input pruning is enough; add the filter for the optimizer to use it
        // later
        LOG.info("No need for sample filter");
        ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols,
            colsEqual, alias, rwsch, qb.getMetaData(), null);
        tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
            samplePredicate, true, new sampleDesc(ts.getNumerator(), ts
            .getDenominator(), tabBucketCols, true)),
            new RowSchema(rwsch.getColumnInfos()), top);
      } else {
        // need to add filter
        // create tableOp to be filterDesc and set as child to 'top'
        LOG.info("Need sample filter");
        ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols,
            colsEqual, alias, rwsch, qb.getMetaData(), null);
        tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
            samplePredicate, true),
            new RowSchema(rwsch.getColumnInfos()), top);
      }
    } else {
      boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE);
      if (testMode) {
        String tabName = tab.getTableName();


        // has the user explicitly asked not to sample this table
        String unSampleTblList = conf
            .getVar(HiveConf.ConfVars.HIVETESTMODENOSAMPLE);
        String[] unSampleTbls = unSampleTblList.split(",");
        boolean unsample = false;
        for (String unSampleTbl : unSampleTbls) {
          if (tabName.equalsIgnoreCase(unSampleTbl)) {
            unsample = true;
          }
        }


        if (!unsample) {
          int numBuckets = tab.getNumBuckets();


          // If the input table is bucketed, choose the first bucket
          if (numBuckets > 0) {
            TableSample tsSample = new TableSample(1, numBuckets);
            tsSample.setInputPruning(true);
            qb.getParseInfo().setTabSample(alias, tsSample);
            ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab
                .getBucketCols(), true, alias, rwsch, qb.getMetaData(), null);
            tableOp = OperatorFactory
                .getAndMakeChild(new FilterDesc(samplePred, true,
                new sampleDesc(tsSample.getNumerator(), tsSample
                .getDenominator(), tab.getBucketCols(), true)),
                new RowSchema(rwsch.getColumnInfos()), top);
            LOG.info("No need for sample filter");
          } else {
            // The table is not bucketed, add a dummy filter :: rand()
            int freq = conf.getIntVar(HiveConf.ConfVars.HIVETESTMODESAMPLEFREQ);

View Full Code Here

        && qbParseInfo.getDestToSortBy().isEmpty()) {
      boolean noMapRed = false;


      Iterator<Map.Entry<String, Table>> iter = qb.getMetaData()
          .getAliasToTable().entrySet().iterator();
      Table tab = (iter.next()).getValue();
      if (!tab.isPartitioned()) {
        if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
          fetch = new FetchWork(tab.getPath().toString(), Utilities
              .getTableDesc(tab), qb.getParseInfo().getOuterQueryLimit());
          noMapRed = true;
          inputs.add(new ReadEntity(tab));
        }
      } else {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.hive.ql.metadata.Table

com.cloudera.cdk.data.hcatalog.HCatalogExternalMetadataProvider

com.cloudera.cdk.data.hcatalog.HCatalogManagedMetadataProvider

org.apache.drill.exec.store.hive.schema.DrillHiveTable

org.apache.hadoop.fs.FileStatus

org.apache.hadoop.fs.FileSystem

org.apache.hadoop.fs.Path

org.apache.hadoop.hive.metastore.api.FieldSchema

org.apache.hadoop.hive.metastore.api.SerDeInfo

org.apache.hadoop.hive.metastore.api.SkewedInfo

org.apache.hadoop.hive.metastore.api.StorageDescriptor

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.