Examples of FetchTask

bixo.fetcher.FetchTask
Runnable instance for fetching a set of URLs from the same server, using keep-alive.
com.ptaack.seobot.tasks.FetchTask
org.apache.hadoop.hive.ql.exec.FetchTask
FetchTask implementation.

Examples of org.apache.hadoop.hive.ql.exec.FetchTask

      BaseSemanticAnalyzer sem = plan.getPlan();
      if (!sem.getFetchTaskInit()) {
        sem.setFetchTaskInit(true);
        sem.getFetchTask().initialize(conf);
      }
      FetchTask ft = (FetchTask) sem.getFetchTask();
      ft.setMaxRows(maxRows);
      return ft.fetch(res);
    }


    if (resStream == null)
      resStream = ctx.getStream();
    if (resStream == null)

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask

      // can't get any info without a plan
    } else if (sem.getResultSchema() != null) {
      List<FieldSchema> lst = sem.getResultSchema();
      schema = new Schema(lst, null);
    } else if (sem.getFetchTask() != null) {
      FetchTask ft = sem.getFetchTask();
      TableDesc td = ft.getTblDesc();
      // partitioned tables don't have tableDesc set on the FetchTask. Instead
      // they have a list of PartitionDesc objects, each with a table desc.
      // Let's
      // try to fetch the desc for the first partition and use it's
      // deserializer.
      if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) {
        if (ft.getWork().getPartDesc().size() > 0) {
          td = ft.getWork().getPartDesc().get(0).getTableDesc();
        }
      }


      if (td == null) {
        LOG.info("No returning schema.");

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask

    }
  }


  public boolean getResults(ArrayList<String> res) throws IOException, CommandNeedRetryException {
    if (plan != null && plan.getFetchTask() != null) {
      FetchTask ft = plan.getFetchTask();
      ft.setMaxRows(maxRows);
      return ft.fetch(res);
    }


    if (resStream == null) {
      resStream = ctx.getStream();
    }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask




  public int close() {
    try {
      if (plan != null) {
        FetchTask fetchTask = plan.getFetchTask();
        if (null != fetchTask) {
          try {
            fetchTask.clearFetch();
          } catch (Exception e) {
            LOG.debug(" Exception while clearing the Fetch task ", e);
          }
        }
      }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask


  @SuppressWarnings("nls")
  private void genMapRedTasks(QB qb) throws SemanticException {
    FetchWork fetch = null;
    List<Task<? extends Serializable>> mvTask = new ArrayList<Task<? extends Serializable>>();
    FetchTask fetchTask = null;


    QBParseInfo qbParseInfo = qb.getParseInfo();


    // Does this query need reduce job
    if (qb.isSelectStarQuery() && qbParseInfo.getDestToClusterBy().isEmpty()
        && qbParseInfo.getDestToDistributeBy().isEmpty()
        && qbParseInfo.getDestToOrderBy().isEmpty()
        && qbParseInfo.getDestToSortBy().isEmpty()) {
      boolean noMapRed = false;


      Iterator<Map.Entry<String, Table>> iter = qb.getMetaData()
          .getAliasToTable().entrySet().iterator();
      Table tab = (iter.next()).getValue();
      if (!tab.isPartitioned()) {
        if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
          fetch = new FetchWork(tab.getPath().toString(), Utilities
              .getTableDesc(tab), qb.getParseInfo().getOuterQueryLimit());
          noMapRed = true;
          inputs.add(new ReadEntity(tab));
        }
      } else {


        if (topOps.size() == 1) {
          TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];


          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(topToTable.get(ts),
              opToPartPruner.get(ts))) {


            PrunedPartitionList partsList = null;
            try {
              partsList = opToPartList.get(ts);
              if (partsList == null) {
                partsList = PartitionPruner.prune(topToTable.get(ts),
                    opToPartPruner.get(ts), conf, (String) topOps.keySet()
                    .toArray()[0], prunedPartitions);
                opToPartList.put(ts, partsList);
              }
            } catch (HiveException e) {
              // Has to use full name to make sure it does not conflict with
              // org.apache.commons.lang.StringUtils
              LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
              throw new SemanticException(e.getMessage(), e);
            }


            // If there is any unknown partition, create a map-reduce job for
            // the filter to prune correctly
            if ((partsList.getUnknownPartns().size() == 0)) {
              List<String> listP = new ArrayList<String>();
              List<PartitionDesc> partP = new ArrayList<PartitionDesc>();


              Set<Partition> parts = partsList.getConfirmedPartns();
              Iterator<Partition> iterParts = parts.iterator();
              while (iterParts.hasNext()) {
                Partition part = iterParts.next();


                listP.add(part.getPartitionPath().toString());
                try {
                  partP.add(Utilities.getPartitionDesc(part));
                } catch (HiveException e) {
                  throw new SemanticException(e.getMessage(), e);
                }
                inputs.add(new ReadEntity(part));
              }


              fetch = new FetchWork(listP, partP, qb.getParseInfo()
                  .getOuterQueryLimit());
              noMapRed = true;
            }
          }
        }
      }


      if (noMapRed) {
        if (fetch.getTblDesc() != null) {
          PlanUtils.configureTableJobPropertiesForStorageHandler(
            fetch.getTblDesc());
        }
        fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
        setFetchTask(fetchTask);


        // remove root tasks if any
        rootTasks.clear();
        return;
      }
    }


    // determine the query qualifies reduce input size for LIMIT
    // The query only qualifies when there are only one top operator
    // and there is no transformer or UDTF and no block sampling
    // is used.
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVELIMITOPTENABLE)
        && ctx.getTryCount() == 0 && topOps.size() == 1
        && !globalLimitCtx.ifHasTransformOrUDTF() &&
        nameToSplitSample.isEmpty()) {


      // Here we recursively check:
      // 1. whether there are exact one LIMIT in the query
      // 2. whether there is no aggregation, group-by, distinct, sort by,
      //    distributed by, or table sampling in any of the sub-query.
      // The query only qualifies if both conditions are satisfied.
      //
      // Example qualified queries:
      //    CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
      //    INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
      //                               FROM ... LIMIT...
      //    SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
      //
      Integer tempGlobalLimit = checkQbpForGlobalLimit(qb);


      // query qualify for the optimization
      if (tempGlobalLimit != null && tempGlobalLimit != 0)  {
        TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];
        Table tab = topToTable.get(ts);


        if (!tab.isPartitioned()) {
          if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
            globalLimitCtx.enableOpt(tempGlobalLimit);
          }
        } else {
          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(tab,
              opToPartPruner.get(ts))) {


            PrunedPartitionList partsList = null;
            try {
              partsList = opToPartList.get(ts);
              if (partsList == null) {
                partsList = PartitionPruner.prune(tab,
                    opToPartPruner.get(ts), conf, (String) topOps.keySet()
                    .toArray()[0], prunedPartitions);
                opToPartList.put(ts, partsList);
              }
            } catch (HiveException e) {
              // Has to use full name to make sure it does not conflict with
              // org.apache.commons.lang.StringUtils
              LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
              throw new SemanticException(e.getMessage(), e);
            }


            // If there is any unknown partition, create a map-reduce job for
            // the filter to prune correctly
            if ((partsList.getUnknownPartns().size() == 0)) {
              globalLimitCtx.enableOpt(tempGlobalLimit);
            }
          }
        }
        if (globalLimitCtx.isEnable()) {
          LOG.info("Qualify the optimize that reduces input size for 'limit' for limit "
              + globalLimitCtx.getGlobalLimit());
        }
      }
    }


    // In case of a select, use a fetch task instead of a move task
    if (qb.getIsQuery()) {
      if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
        throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg());
      }
      String cols = loadFileWork.get(0).getColumns();
      String colTypes = loadFileWork.get(0).getColumnTypes();


      String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
      TableDesc resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat);


      fetch = new FetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(),
          resultTab, qb.getParseInfo().getOuterQueryLimit());


      fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
      setFetchTask(fetchTask);


      // For the FetchTask, the limit optimiztion requires we fetch all the rows
      // in memory and count how many rows we get. It's not practical if the
      // limit factor is too big
      int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
      if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) {
        LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit
            + ". Doesn't qualify limit optimiztion.");
        globalLimitCtx.disableOpt();
      }


    } else {
      for (LoadTableDesc ltd : loadTableWork) {
        Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false),
            conf);
        mvTask.add(tsk);
        // Check to see if we are stale'ing any indexes and auto-update them if we want
        if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
          IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, getInputs(), conf);
          try {
            List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks();
            for (Task<? extends Serializable> updateTask : indexUpdateTasks) {
              tsk.addDependentTask(updateTask);
            }
          } catch (HiveException e) {
            console.printInfo("WARNING: could not auto-update stale indexes, indexes are not in of sync");
          }
        }
      }




      boolean oneLoadFile = true;
      for (LoadFileDesc lfd : loadFileWork) {
        if (qb.isCTAS()) {
          assert (oneLoadFile); // should not have more than 1 load file for
          // CTAS
          // make the movetask's destination directory the table's destination.
          String location = qb.getTableDesc().getLocation();
          if (location == null) {
            // get the table's default location
            Table dumpTable;
            Path targetPath;
            try {
              dumpTable = db.newTable(qb.getTableDesc().getTableName());
              if (!db.databaseExists(dumpTable.getDbName())) {
                throw new SemanticException("ERROR: The database " + dumpTable.getDbName()
                    + " does not exist.");
              }
              Warehouse wh = new Warehouse(conf);
              targetPath = wh.getTablePath(db.getDatabase(dumpTable.getDbName()), dumpTable
                  .getTableName());
            } catch (HiveException e) {
              throw new SemanticException(e);
            } catch (MetaException e) {
              throw new SemanticException(e);
            }


            location = targetPath.toString();
          }
          lfd.setTargetDir(location);


          oneLoadFile = false;
        }
        mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false),
            conf));
      }
    }


    // generate map reduce plans
    ParseContext tempParseContext = getParseContext();
    GenMRProcContext procCtx = new GenMRProcContext(
        conf,
        new HashMap<Operator<? extends Serializable>, Task<? extends Serializable>>(),
        new ArrayList<Operator<? extends Serializable>>(), tempParseContext,
        mvTask, rootTasks,
        new LinkedHashMap<Operator<? extends Serializable>, GenMapRedCtx>(),
        inputs, outputs);


    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    // The dispatcher generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp(new String("R1"), "TS%"), new GenMRTableScan1());
    opRules.put(new RuleRegExp(new String("R2"), "TS%.*RS%"),
        new GenMRRedSink1());
    opRules.put(new RuleRegExp(new String("R3"), "RS%.*RS%"),
        new GenMRRedSink2());
    opRules.put(new RuleRegExp(new String("R4"), "FS%"), new GenMRFileSink1());
    opRules.put(new RuleRegExp(new String("R5"), "UNION%"), new GenMRUnion1());
    opRules.put(new RuleRegExp(new String("R6"), "UNION%.*RS%"),
        new GenMRRedSink3());
    opRules.put(new RuleRegExp(new String("R6"), "MAPJOIN%.*RS%"),
        new GenMRRedSink4());
    opRules.put(new RuleRegExp(new String("R7"), "TS%.*MAPJOIN%"),
        MapJoinFactory.getTableScanMapJoin());
    opRules.put(new RuleRegExp(new String("R8"), "RS%.*MAPJOIN%"),
        MapJoinFactory.getReduceSinkMapJoin());
    opRules.put(new RuleRegExp(new String("R9"), "UNION%.*MAPJOIN%"),
        MapJoinFactory.getUnionMapJoin());
    opRules.put(new RuleRegExp(new String("R10"), "MAPJOIN%.*MAPJOIN%"),
        MapJoinFactory.getMapJoinMapJoin());
    opRules.put(new RuleRegExp(new String("R11"), "MAPJOIN%SEL%"),
        MapJoinFactory.getMapJoin());


    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules,
        procCtx);


    GraphWalker ogw = new GenMapRedWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(topOps.values());
    ogw.startWalking(topNodes, null);


    // reduce sink does not have any kids - since the plan by now has been
    // broken up into multiple
    // tasks, iterate over all tasks.
    // For each task, go over all operators recursively
    for (Task<? extends Serializable> rootTask : rootTasks) {
      breakTaskTree(rootTask);
    }


    // For each task, set the key descriptor for the reducer
    for (Task<? extends Serializable> rootTask : rootTasks) {
      setKeyDescTaskTree(rootTask);
    }


    PhysicalContext physicalContext = new PhysicalContext(conf,
        getParseContext(), ctx, rootTasks, fetchTask);
    PhysicalOptimizer physicalOptimizer = new PhysicalOptimizer(
        physicalContext, conf);
    physicalOptimizer.optimize();


    // For each operator, generate the counters if needed
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEJOBPROGRESS)) {
      for (Task<? extends Serializable> rootTask : rootTasks) {
        generateCountersTask(rootTask);
      }
    }


    decideExecMode(rootTasks, ctx, globalLimitCtx);


    if (qb.isCTAS()) {
      // generate a DDL task and make it a dependent task of the leaf
      CreateTableDesc crtTblDesc = qb.getTableDesc();


      validateCreateTable(crtTblDesc);


      // Clear the output for CTAS since we don't need the output from the
      // mapredWork, the
      // DDLWork at the tail of the chain will have the output
      getOutputs().clear();


      Task<? extends Serializable> crtTblTask = TaskFactory.get(new DDLWork(
          getInputs(), getOutputs(), crtTblDesc), conf);


      // find all leaf tasks and make the DDLTask as a dependent task of all of
      // them
      HashSet<Task<? extends Serializable>> leaves = new HashSet<Task<? extends Serializable>>();
      getLeafTasks(rootTasks, leaves);
      assert (leaves.size() > 0);
      for (Task<? extends Serializable> task : leaves) {
        if (task instanceof StatsTask){
          //StatsTask require table to already exist
          for (Task<? extends Serializable> parentOfStatsTask : task.getParentTasks()){
            parentOfStatsTask.addDependentTask(crtTblTask);
          }
          for (Task<? extends Serializable> parentOfCrtTblTask : crtTblTask.getParentTasks()){
            parentOfCrtTblTask.removeDependentTask(task);
          }
          crtTblTask.addDependentTask(task);
        } else {
          task.addDependentTask(crtTblTask);
        }
      }
    }


    if (globalLimitCtx.isEnable() && fetchTask != null) {
      int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
        LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit());
        fetchTask.getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit());
    }


    if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) {
      LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit());
      globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit());

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask


  @SuppressWarnings("nls")
  private void genMapRedTasks(QB qb) throws SemanticException {
    FetchWork fetch = null;
    List<Task<? extends Serializable>> mvTask = new ArrayList<Task<? extends Serializable>>();
    FetchTask fetchTask = null;


    QBParseInfo qbParseInfo = qb.getParseInfo();


    // Does this query need reduce job
    if (qb.isSelectStarQuery() && qbParseInfo.getDestToClusterBy().isEmpty()
        && qbParseInfo.getDestToDistributeBy().isEmpty()
        && qbParseInfo.getDestToOrderBy().isEmpty()
        && qbParseInfo.getDestToSortBy().isEmpty()) {
      boolean noMapRed = false;


      Iterator<Map.Entry<String, Table>> iter = qb.getMetaData()
          .getAliasToTable().entrySet().iterator();
      Table tab = (iter.next()).getValue();
      if (!tab.isPartitioned()) {
        if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
          fetch = new FetchWork(tab.getPath().toString(), Utilities
              .getTableDesc(tab), qb.getParseInfo().getOuterQueryLimit());
          noMapRed = true;
          inputs.add(new ReadEntity(tab));
        }
      } else {


        if (topOps.size() == 1) {
          TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];


          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(topToTable.get(ts),
              opToPartPruner.get(ts))) {


            PrunedPartitionList partsList = null;
            try {
              partsList = opToPartList.get(ts);
              if (partsList == null) {
                partsList = PartitionPruner.prune(topToTable.get(ts),
                    opToPartPruner.get(ts), conf, (String) topOps.keySet()
                    .toArray()[0], prunedPartitions);
                opToPartList.put(ts, partsList);
              }
            } catch (HiveException e) {
              // Has to use full name to make sure it does not conflict with
              // org.apache.commons.lang.StringUtils
              LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
              throw new SemanticException(e.getMessage(), e);
            }


            // If there is any unknown partition, create a map-reduce job for
            // the filter to prune correctly
            if ((partsList.getUnknownPartns().size() == 0)) {
              List<String> listP = new ArrayList<String>();
              List<PartitionDesc> partP = new ArrayList<PartitionDesc>();


              Set<Partition> parts = partsList.getConfirmedPartns();
              Iterator<Partition> iterParts = parts.iterator();
              while (iterParts.hasNext()) {
                Partition part = iterParts.next();


                listP.add(part.getPartitionPath().toString());
                try {
                  partP.add(Utilities.getPartitionDesc(part));
                } catch (HiveException e) {
                  throw new SemanticException(e.getMessage(), e);
                }
                inputs.add(new ReadEntity(part));
              }


              fetch = new FetchWork(listP, partP, qb.getParseInfo()
                  .getOuterQueryLimit());
              noMapRed = true;
            }
          }
        }
      }


      if (noMapRed) {
        if (fetch.getTblDesc() != null) {
          PlanUtils.configureInputJobPropertiesForStorageHandler(
            fetch.getTblDesc());
        } else if ( (fetch.getPartDesc() != null) && (!fetch.getPartDesc().isEmpty())){
            PartitionDesc pd0 = fetch.getPartDesc().get(0);
            TableDesc td = pd0.getTableDesc();
            if ((td != null)&&(td.getProperties() != null)
                && td.getProperties().containsKey(
                    org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_STORAGE)){
              PlanUtils.configureInputJobPropertiesForStorageHandler(td);
            }
        }
        fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
        setFetchTask(fetchTask);


        // remove root tasks if any
        rootTasks.clear();
        return;
      }
    }


    // determine the query qualifies reduce input size for LIMIT
    // The query only qualifies when there are only one top operator
    // and there is no transformer or UDTF and no block sampling
    // is used.
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVELIMITOPTENABLE)
        && ctx.getTryCount() == 0 && topOps.size() == 1
        && !globalLimitCtx.ifHasTransformOrUDTF() &&
        nameToSplitSample.isEmpty()) {


      // Here we recursively check:
      // 1. whether there are exact one LIMIT in the query
      // 2. whether there is no aggregation, group-by, distinct, sort by,
      //    distributed by, or table sampling in any of the sub-query.
      // The query only qualifies if both conditions are satisfied.
      //
      // Example qualified queries:
      //    CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
      //    INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
      //                               FROM ... LIMIT...
      //    SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
      //
      Integer tempGlobalLimit = checkQbpForGlobalLimit(qb);


      // query qualify for the optimization
      if (tempGlobalLimit != null && tempGlobalLimit != 0)  {
        TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];
        Table tab = topToTable.get(ts);


        if (!tab.isPartitioned()) {
          if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
            globalLimitCtx.enableOpt(tempGlobalLimit);
          }
        } else {
          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(tab,
              opToPartPruner.get(ts))) {


            PrunedPartitionList partsList = null;
            try {
              partsList = opToPartList.get(ts);
              if (partsList == null) {
                partsList = PartitionPruner.prune(tab,
                    opToPartPruner.get(ts), conf, (String) topOps.keySet()
                    .toArray()[0], prunedPartitions);
                opToPartList.put(ts, partsList);
              }
            } catch (HiveException e) {
              // Has to use full name to make sure it does not conflict with
              // org.apache.commons.lang.StringUtils
              LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
              throw new SemanticException(e.getMessage(), e);
            }


            // If there is any unknown partition, create a map-reduce job for
            // the filter to prune correctly
            if ((partsList.getUnknownPartns().size() == 0)) {
              globalLimitCtx.enableOpt(tempGlobalLimit);
            }
          }
        }
        if (globalLimitCtx.isEnable()) {
          LOG.info("Qualify the optimize that reduces input size for 'limit' for limit "
              + globalLimitCtx.getGlobalLimit());
        }
      }
    }


    // In case of a select, use a fetch task instead of a move task
    if (qb.getIsQuery()) {
      if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
        throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg());
      }
      String cols = loadFileWork.get(0).getColumns();
      String colTypes = loadFileWork.get(0).getColumnTypes();


      String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
      TableDesc resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat);


      fetch = new FetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(),
          resultTab, qb.getParseInfo().getOuterQueryLimit());


      fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
      setFetchTask(fetchTask);


      // For the FetchTask, the limit optimiztion requires we fetch all the rows
      // in memory and count how many rows we get. It's not practical if the
      // limit factor is too big
      int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
      if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) {
        LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit
            + ". Doesn't qualify limit optimiztion.");
        globalLimitCtx.disableOpt();
      }


    } else {
      for (LoadTableDesc ltd : loadTableWork) {
        Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false),
            conf);
        mvTask.add(tsk);
        // Check to see if we are stale'ing any indexes and auto-update them if we want
        if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
          IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, getInputs(), conf);
          try {
            List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks();
            for (Task<? extends Serializable> updateTask : indexUpdateTasks) {
              tsk.addDependentTask(updateTask);
            }
          } catch (HiveException e) {
            console.printInfo("WARNING: could not auto-update stale indexes, indexes are not in of sync");
          }
        }
      }




      boolean oneLoadFile = true;
      for (LoadFileDesc lfd : loadFileWork) {
        if (qb.isCTAS()) {
          assert (oneLoadFile); // should not have more than 1 load file for
          // CTAS
          // make the movetask's destination directory the table's destination.
          String location = qb.getTableDesc().getLocation();
          if (location == null) {
            // get the table's default location
            Table dumpTable;
            Path targetPath;
            try {
              dumpTable = db.newTable(qb.getTableDesc().getTableName());
              if (!db.databaseExists(dumpTable.getDbName())) {
                throw new SemanticException("ERROR: The database " + dumpTable.getDbName()
                    + " does not exist.");
              }
              Warehouse wh = new Warehouse(conf);
              targetPath = wh.getTablePath(db.getDatabase(dumpTable.getDbName()), dumpTable
                  .getTableName());
            } catch (HiveException e) {
              throw new SemanticException(e);
            } catch (MetaException e) {
              throw new SemanticException(e);
            }


            location = targetPath.toString();
          }
          lfd.setTargetDir(location);


          oneLoadFile = false;
        }
        mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false),
            conf));
      }
    }


    // generate map reduce plans
    ParseContext tempParseContext = getParseContext();
    GenMRProcContext procCtx = new GenMRProcContext(
        conf,
        new HashMap<Operator<? extends Serializable>, Task<? extends Serializable>>(),
        new ArrayList<Operator<? extends Serializable>>(), tempParseContext,
        mvTask, rootTasks,
        new LinkedHashMap<Operator<? extends Serializable>, GenMapRedCtx>(),
        inputs, outputs);


    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    // The dispatcher generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp(new String("R1"), "TS%"), new GenMRTableScan1());
    opRules.put(new RuleRegExp(new String("R2"), "TS%.*RS%"),
        new GenMRRedSink1());
    opRules.put(new RuleRegExp(new String("R3"), "RS%.*RS%"),
        new GenMRRedSink2());
    opRules.put(new RuleRegExp(new String("R4"), "FS%"), new GenMRFileSink1());
    opRules.put(new RuleRegExp(new String("R5"), "UNION%"), new GenMRUnion1());
    opRules.put(new RuleRegExp(new String("R6"), "UNION%.*RS%"),
        new GenMRRedSink3());
    opRules.put(new RuleRegExp(new String("R6"), "MAPJOIN%.*RS%"),
        new GenMRRedSink4());
    opRules.put(new RuleRegExp(new String("R7"), "TS%.*MAPJOIN%"),
        MapJoinFactory.getTableScanMapJoin());
    opRules.put(new RuleRegExp(new String("R8"), "RS%.*MAPJOIN%"),
        MapJoinFactory.getReduceSinkMapJoin());
    opRules.put(new RuleRegExp(new String("R9"), "UNION%.*MAPJOIN%"),
        MapJoinFactory.getUnionMapJoin());
    opRules.put(new RuleRegExp(new String("R10"), "MAPJOIN%.*MAPJOIN%"),
        MapJoinFactory.getMapJoinMapJoin());
    opRules.put(new RuleRegExp(new String("R11"), "MAPJOIN%SEL%"),
        MapJoinFactory.getMapJoin());


    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules,
        procCtx);


    GraphWalker ogw = new GenMapRedWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(topOps.values());
    ogw.startWalking(topNodes, null);


    // reduce sink does not have any kids - since the plan by now has been
    // broken up into multiple
    // tasks, iterate over all tasks.
    // For each task, go over all operators recursively
    for (Task<? extends Serializable> rootTask : rootTasks) {
      breakTaskTree(rootTask);
    }


    // For each task, set the key descriptor for the reducer
    for (Task<? extends Serializable> rootTask : rootTasks) {
      setKeyDescTaskTree(rootTask);
    }


    PhysicalContext physicalContext = new PhysicalContext(conf,
        getParseContext(), ctx, rootTasks, fetchTask);
    PhysicalOptimizer physicalOptimizer = new PhysicalOptimizer(
        physicalContext, conf);
    physicalOptimizer.optimize();


    // For each operator, generate the counters if needed
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEJOBPROGRESS)) {
      for (Task<? extends Serializable> rootTask : rootTasks) {
        generateCountersTask(rootTask);
      }
    }


    decideExecMode(rootTasks, ctx, globalLimitCtx);


    if (qb.isCTAS()) {
      // generate a DDL task and make it a dependent task of the leaf
      CreateTableDesc crtTblDesc = qb.getTableDesc();


      validateCreateTable(crtTblDesc);


      // Clear the output for CTAS since we don't need the output from the
      // mapredWork, the
      // DDLWork at the tail of the chain will have the output
      getOutputs().clear();


      Task<? extends Serializable> crtTblTask = TaskFactory.get(new DDLWork(
          getInputs(), getOutputs(), crtTblDesc), conf);


      // find all leaf tasks and make the DDLTask as a dependent task of all of
      // them
      HashSet<Task<? extends Serializable>> leaves = new HashSet<Task<? extends Serializable>>();
      getLeafTasks(rootTasks, leaves);
      assert (leaves.size() > 0);
      for (Task<? extends Serializable> task : leaves) {
        if (task instanceof StatsTask){
          //StatsTask require table to already exist
          for (Task<? extends Serializable> parentOfStatsTask : task.getParentTasks()){
            parentOfStatsTask.addDependentTask(crtTblTask);
          }
          for (Task<? extends Serializable> parentOfCrtTblTask : crtTblTask.getParentTasks()){
            parentOfCrtTblTask.removeDependentTask(task);
          }
          crtTblTask.addDependentTask(task);
        } else {
          task.addDependentTask(crtTblTask);
        }
      }
    }


    if (globalLimitCtx.isEnable() && fetchTask != null) {
      int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
        LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit());
        fetchTask.getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit());
    }


    if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) {
      LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit());
      globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit());

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask

      // can't get any info without a plan
    } else if (sem.getResultSchema() != null) {
      List<FieldSchema> lst = sem.getResultSchema();
      schema = new Schema(lst, null);
    } else if (sem.getFetchTask() != null) {
      FetchTask ft = sem.getFetchTask();
      TableDesc td = ft.getTblDesc();
      // partitioned tables don't have tableDesc set on the FetchTask. Instead
      // they have a list of PartitionDesc objects, each with a table desc.
      // Let's
      // try to fetch the desc for the first partition and use it's
      // deserializer.
      if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) {
        if (ft.getWork().getPartDesc().size() > 0) {
          td = ft.getWork().getPartDesc().get(0).getTableDesc();
        }
      }


      if (td == null) {
        LOG.info("No returning schema.");

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask

    }
  }


  public boolean getResults(ArrayList<String> res) throws IOException, CommandNeedRetryException {
    if (plan != null && plan.getFetchTask() != null) {
      FetchTask ft = plan.getFetchTask();
      ft.setMaxRows(maxRows);
      return ft.fetch(res);
    }


    if (resStream == null) {
      resStream = ctx.getStream();
    }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask




  public int close() {
    try {
      if (plan != null) {
        FetchTask fetchTask = plan.getFetchTask();
        if (null != fetchTask) {
          try {
            fetchTask.clearFetch();
          } catch (Exception e) {
            LOG.debug(" Exception while clearing the Fetch task ", e);
          }
        }
      }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask

      work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl));
      work.setLimit(100);
    } else {
      work = new FetchWork(tbl.getDataLocation(), Utilities.getTableDesc(tbl));
    }
    FetchTask task = new FetchTask();
    task.setWork(work);
    task.initialize(conf, null, null);
    task.fetch(temp);
    for (String str : temp) {
      results.add(str.replace("\t", ","));
    }
    return results;
  }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.