Examples of FetchWork

Examples of org.apache.hadoop.hive.ql.plan.FetchWork

    prop.setProperty(Constants.SERIALIZATION_NULL_FORMAT, " ");
    String[] colTypes = schema.split("#");
    prop.setProperty("columns", colTypes[0]);
    prop.setProperty("columns.types", colTypes[1]);


    FetchWork fetch = new FetchWork(ctx.getResFile().toString(), new TableDesc(
        LazySimpleSerDe.class, TextInputFormat.class,
        IgnoreKeyTextOutputFormat.class, prop), -1);
    fetch.setSerializationNullFormat(" ");
    return (FetchTask) TaskFactory.get(fetch, conf);
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FetchWork

        Byte small_alias = tags[j];
        Operator<? extends Serializable> tblScan_op2 = parentOps[j];
        localPlan.getAliasToWork().put(small_alias.toString(), tblScan_op2);
        Path tblDir = new Path(smallTblDirs.get(small_alias));
        localPlan.getAliasToFetchWork().put(small_alias.toString(),
            new FetchWork(tblDir.toString(), tableDescList.get(small_alias)));
      }


      newPlan.setMapLocalWork(localPlan);


      // construct a map join and set it as the child operator of tblScan_op

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FetchWork

      assert localPlan.getAliasToFetchWork().get(alias_id) == null;
      localPlan.getAliasToWork().put(alias_id, topOp);
      if (tblDir == null) {
        localPlan.getAliasToFetchWork().put(
            alias_id,
            new FetchWork(FetchWork.convertPathToStringArray(partDir), partDesc));
      } else {
        localPlan.getAliasToFetchWork().put(alias_id,
            new FetchWork(tblDir.toString(), tblDesc));
      }
      plan.setMapLocalWork(localPlan);
    }
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FetchWork

      }


      assert localPlan.getAliasToWork().get(alias) == null;
      assert localPlan.getAliasToFetchWork().get(alias) == null;
      localPlan.getAliasToWork().put(alias, topOp);
      localPlan.getAliasToFetchWork().put(alias, new FetchWork(alias, tt_desc));
      plan.setMapLocalWork(localPlan);
    }
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FetchWork

    }
  }


  @SuppressWarnings("nls")
  private void genMapRedTasks(QB qb) throws SemanticException {
    FetchWork fetch = null;
    List<Task<? extends Serializable>> mvTask = new ArrayList<Task<? extends Serializable>>();
    FetchTask fetchTask = null;


    QBParseInfo qbParseInfo = qb.getParseInfo();


    // Does this query need reduce job
    if (qb.isSelectStarQuery() && qbParseInfo.getDestToClusterBy().isEmpty()
        && qbParseInfo.getDestToDistributeBy().isEmpty()
        && qbParseInfo.getDestToOrderBy().isEmpty()
        && qbParseInfo.getDestToSortBy().isEmpty()) {
      boolean noMapRed = false;


      Iterator<Map.Entry<String, Table>> iter = qb.getMetaData()
          .getAliasToTable().entrySet().iterator();
      Table tab = (iter.next()).getValue();
      if (!tab.isPartitioned()) {
        if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
          fetch = new FetchWork(tab.getPath().toString(), Utilities
              .getTableDesc(tab), qb.getParseInfo().getOuterQueryLimit());
          noMapRed = true;
          inputs.add(new ReadEntity(tab));
        }
      } else {


        if (topOps.size() == 1) {
          TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];


          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(topToTable.get(ts),
              opToPartPruner.get(ts))) {


            PrunedPartitionList partsList = null;
            try {
              partsList = opToPartList.get(ts);
              if (partsList == null) {
                partsList = PartitionPruner.prune(topToTable.get(ts),
                    opToPartPruner.get(ts), conf, (String) topOps.keySet()
                    .toArray()[0], prunedPartitions);
                opToPartList.put(ts, partsList);
              }
            } catch (HiveException e) {
              // Has to use full name to make sure it does not conflict with
              // org.apache.commons.lang.StringUtils
              LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
              throw new SemanticException(e.getMessage(), e);
            }


            // If there is any unknown partition, create a map-reduce job for
            // the filter to prune correctly
            if ((partsList.getUnknownPartns().size() == 0)) {
              List<String> listP = new ArrayList<String>();
              List<PartitionDesc> partP = new ArrayList<PartitionDesc>();


              Set<Partition> parts = partsList.getConfirmedPartns();
              Iterator<Partition> iterParts = parts.iterator();
              while (iterParts.hasNext()) {
                Partition part = iterParts.next();


                listP.add(part.getPartitionPath().toString());
                try {
                  partP.add(Utilities.getPartitionDesc(part));
                } catch (HiveException e) {
                  throw new SemanticException(e.getMessage(), e);
                }
                inputs.add(new ReadEntity(part));
              }


              fetch = new FetchWork(listP, partP, qb.getParseInfo()
                  .getOuterQueryLimit());
              noMapRed = true;
            }
          }
        }
      }


      if (noMapRed) {
        if (fetch.getTblDesc() != null) {
          PlanUtils.configureTableJobPropertiesForStorageHandler(
            fetch.getTblDesc());
        }
        fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
        setFetchTask(fetchTask);


        // remove root tasks if any
        rootTasks.clear();
        return;
      }
    }


    // In case of a select, use a fetch task instead of a move task
    if (qb.getIsQuery()) {
      if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
        throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg());
      }
      String cols = loadFileWork.get(0).getColumns();
      String colTypes = loadFileWork.get(0).getColumnTypes();


      String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
      TableDesc resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat);


      fetch = new FetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(),
          resultTab, qb.getParseInfo().getOuterQueryLimit());


      fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
      setFetchTask(fetchTask);
    } else {

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FetchWork

    conf.addResource("hive-site.xml");
    ArrayList<String> results = new ArrayList<String>();
    ArrayList<String> temp = new ArrayList<String>();
    Hive hive = Hive.get(conf);
    org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table);
    FetchWork work;
    if (!tbl.getPartCols().isEmpty()) {
      List<Partition> partitions = hive.getPartitions(tbl);
      List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
      List<String> partLocs = new ArrayList<String>();
      for (Partition part : partitions) {
        partLocs.add(part.getLocation());
        partDesc.add(Utilities.getPartitionDesc(part));
      }
      work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl));
      work.setLimit(100);
    } else {
      work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl));
    }
    FetchTask task = new FetchTask();
    task.setWork(work);
    task.initialize(conf, null, null);
    task.fetch(temp);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FetchWork

    conf.addResource("hive-site.xml");
    ArrayList<String> results = new ArrayList<String>();
    ArrayList<String> temp = new ArrayList<String>();
    Hive hive = Hive.get(conf);
    org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table);
    FetchWork work;
    if (!tbl.getPartCols().isEmpty()) {
      List<Partition> partitions = hive.getPartitions(tbl);
      List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
      List<String> partLocs = new ArrayList<String>();
      for (Partition part : partitions) {
        partLocs.add(part.getLocation());
        partDesc.add(Utilities.getPartitionDesc(part));
      }
      work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl));
      work.setLimit(100);
    } else {
      work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl));
    }
    FetchTask task = new FetchTask();
    task.setWork(work);
    task.initialize(conf, null, null);
    task.fetch(temp);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FetchWork

      if (resultTab == null) {
        String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
        resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat);
      }


      FetchWork fetch = new FetchWork(new Path(loadFileDesc.getSourceDir()).toString(),
          resultTab, qb.getParseInfo().getOuterQueryLimit());
      fetch.setSource(pCtx.getFetchSource());
      fetch.setSink(pCtx.getFetchSink());


      pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch, conf));


      // For the FetchTask, the limit optimization requires we fetch all the rows
      // in memory and count how many rows we get. It's not practical if the

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FetchWork

  private void genColumnStatsTask(QB qb, List<LoadTableDesc> loadTableWork,
      List<LoadFileDesc> loadFileWork, List<Task<? extends Serializable>> rootTasks) {
    QBParseInfo qbParseInfo = qb.getParseInfo();
    ColumnStatsTask cStatsTask = null;
    ColumnStatsWork cStatsWork = null;
    FetchWork fetch = null;
    String tableName = qbParseInfo.getTableName();
    String partName = qbParseInfo.getPartName();
    List<String> colName = qbParseInfo.getColName();
    List<String> colType = qbParseInfo.getColType();
    boolean isTblLevel = qbParseInfo.isTblLvl();


    String cols = loadFileWork.get(0).getColumns();
    String colTypes = loadFileWork.get(0).getColumnTypes();


    String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
    TableDesc resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat);


    fetch = new FetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(),
        resultTab, qb.getParseInfo().getOuterQueryLimit());


    ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, partName,
        colName, colType, isTblLevel);
    cStatsWork = new ColumnStatsWork(fetch, cStatsDesc);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FetchWork

      for (String path : emptyPath) {
        pathToAliases.remove(path);
      }


      // create fetch work
      FetchWork fetchWork = null;
      List<String> partDir = new ArrayList<String>();
      List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();


      for (String tablePath : pathSet) {
        PartitionDesc partitionDesc = newWork.getMapWork().getPathToPartitionInfo().get(tablePath);
        // create fetchwork for non partitioned table
        if (partitionDesc.getPartSpec() == null || partitionDesc.getPartSpec().size() == 0) {
          fetchWork = new FetchWork(tablePath, partitionDesc.getTableDesc());
          break;
        }
        // if table is partitioned,add partDir and partitionDesc
        partDir.add(tablePath);
        partDesc.add(partitionDesc);
      }
      // create fetchwork for partitioned table
      if (fetchWork == null) {
        TableDesc table = newWork.getMapWork().getAliasToPartnInfo().get(alias).getTableDesc();
        fetchWork = new FetchWork(partDir, partDesc, table);
      }
      // set alias to fetch work
      newLocalWork.getAliasToFetchWork().put(alias, fetchWork);
    }
    // remove small table ailias from aliasToWork;Avoid concurrent modification

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.