Examples of FetchTask

bixo.fetcher.FetchTask
Runnable instance for fetching a set of URLs from the same server, using keep-alive.
com.ptaack.seobot.tasks.FetchTask
org.apache.hadoop.hive.ql.exec.FetchTask
FetchTask implementation.

Examples of org.apache.hadoop.hive.ql.exec.FetchTask

      // can't get any info without a plan
    } else if (sem.getResultSchema() != null) {
      List<FieldSchema> lst = sem.getResultSchema();
      schema = new Schema(lst, null);
    } else if (sem.getFetchTask() != null) {
      FetchTask ft = sem.getFetchTask();
      TableDesc td = ft.getTblDesc();
      // partitioned tables don't have tableDesc set on the FetchTask. Instead
      // they have a list of PartitionDesc objects, each with a table desc.
      // Let's
      // try to fetch the desc for the first partition and use it's
      // deserializer.
      if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) {
        if (ft.getWork().getPartDesc().size() > 0) {
          td = ft.getWork().getPartDesc().get(0).getTableDesc();
        }
      }


      if (td == null) {
        LOG.info("No returning schema.");

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask

    }
  }


  public boolean getResults(ArrayList<String> res) throws IOException, CommandNeedRetryException {
    if (plan != null && plan.getFetchTask() != null) {
      FetchTask ft = plan.getFetchTask();
      ft.setMaxRows(maxRows);
      return ft.fetch(res);
    }


    if (resStream == null) {
      resStream = ctx.getStream();
    }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask




  public int close() {
    try {
      if (plan != null) {
        FetchTask fetchTask = plan.getFetchTask();
        if (null != fetchTask) {
          try {
            fetchTask.clearFetch();
          } catch (Exception e) {
            LOG.debug(" Exception while clearing the Fetch task ", e);
          }
        }
      }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask

      // no CTAS or insert, not analyze command, and single sourced.
      String alias = (String) pctx.getTopOps().keySet().toArray()[0];
      Operator topOp = (Operator) pctx.getTopOps().values().toArray()[0];
      if (topOp instanceof TableScanOperator) {
        try {
          FetchTask fetchTask = optimize(pctx, alias, (TableScanOperator) topOp);
          if (fetchTask != null) {
            pctx.setFetchTask(fetchTask);
          }
        } catch (HiveException e) {
          // Has to use full name to make sure it does not conflict with

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask

    boolean aggressive = "more".equals(mode);
    FetchData fetch = checkTree(aggressive, pctx, alias, source);
    if (fetch != null) {
      int limit = pctx.getQB().getParseInfo().getOuterQueryLimit();
      FetchWork fetchWork = fetch.convertToWork();
      FetchTask fetchTask = (FetchTask) TaskFactory.get(fetchWork, pctx.getConf());
      fetchWork.setSink(fetch.completed(pctx, fetchWork));
      fetchWork.setSource(source);
      fetchWork.setLimit(limit);
      return fetchTask;
    }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask

            work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl));
            work.setLimit(100);
        } else {
            work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl));
        }
        FetchTask task = new FetchTask();
        task.setWork(work);
        task.initialize(conf, null, null);
        task.fetch(temp);
        for (String str : temp) {
            results.add(str.replace("\t", ","));
        }
        return results;
    }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask

      TableDesc resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat);


      FetchWork fetch = new FetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(),
          resultTab, qb.getParseInfo().getOuterQueryLimit());


      FetchTask fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
      setFetchTask(fetchTask);


      // For the FetchTask, the limit optimiztion requires we fetch all the rows
      // in memory and count how many rows we get. It's not practical if the
      // limit factor is too big
      int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
      if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) {
        LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit
            + ". Doesn't qualify limit optimiztion.");
        globalLimitCtx.disableOpt();
      }
    } else if (!isCStats) {
      for (LoadTableDesc ltd : loadTableWork) {
        Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false),
            conf);
        mvTask.add(tsk);
        // Check to see if we are stale'ing any indexes and auto-update them if we want
        if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
          IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, getInputs(), conf);
          try {
            List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks();
            for (Task<? extends Serializable> updateTask : indexUpdateTasks) {
              tsk.addDependentTask(updateTask);
            }
          } catch (HiveException e) {
            console.printInfo("WARNING: could not auto-update stale indexes, indexes are not in of sync");
          }
        }
      }


      boolean oneLoadFile = true;
      for (LoadFileDesc lfd : loadFileWork) {
        if (qb.isCTAS()) {
          assert (oneLoadFile); // should not have more than 1 load file for
          // CTAS
          // make the movetask's destination directory the table's destination.
          String location = qb.getTableDesc().getLocation();
          if (location == null) {
            // get the table's default location
            Table dumpTable;
            Path targetPath;
            try {
              dumpTable = db.newTable(qb.getTableDesc().getTableName());
              if (!db.databaseExists(dumpTable.getDbName())) {
                throw new SemanticException("ERROR: The database " + dumpTable.getDbName()
                    + " does not exist.");
              }
              Warehouse wh = new Warehouse(conf);
              targetPath = wh.getTablePath(db.getDatabase(dumpTable.getDbName()), dumpTable
                  .getTableName());
            } catch (HiveException e) {
              throw new SemanticException(e);
            } catch (MetaException e) {
              throw new SemanticException(e);
            }


            location = targetPath.toString();
          }
          lfd.setTargetDir(location);


          oneLoadFile = false;
        }
        mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false),
            conf));
      }
    }


    // generate map reduce plans
    ParseContext tempParseContext = getParseContext();
    GenMRProcContext procCtx = new GenMRProcContext(
        conf,
        new HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>>(),
        new ArrayList<Operator<? extends OperatorDesc>>(), tempParseContext,
        mvTask, rootTasks,
        new LinkedHashMap<Operator<? extends OperatorDesc>, GenMapRedCtx>(),
        inputs, outputs);


    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    // The dispatcher generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp(new String("R1"),
      TableScanOperator.getOperatorName() + "%"),
      new GenMRTableScan1());
    opRules.put(new RuleRegExp(new String("R2"),
      TableScanOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"),
      new GenMRRedSink1());
    opRules.put(new RuleRegExp(new String("R3"),
      ReduceSinkOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"),
      new GenMRRedSink2());
    opRules.put(new RuleRegExp(new String("R4"),
      FileSinkOperator.getOperatorName() + "%"),
      new GenMRFileSink1());
    opRules.put(new RuleRegExp(new String("R5"),
      UnionOperator.getOperatorName() + "%"),
      new GenMRUnion1());
    opRules.put(new RuleRegExp(new String("R6"),
      UnionOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"),
      new GenMRRedSink3());
    opRules.put(new RuleRegExp(new String("R6"),
      MapJoinOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"),
      new GenMRRedSink4());
    opRules.put(new RuleRegExp(new String("R7"),
      TableScanOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"),
      MapJoinFactory.getTableScanMapJoin());
    opRules.put(new RuleRegExp(new String("R8"),
      ReduceSinkOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"),
      MapJoinFactory.getReduceSinkMapJoin());
    opRules.put(new RuleRegExp(new String("R9"),
      UnionOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"),
      MapJoinFactory.getUnionMapJoin());
    opRules.put(new RuleRegExp(new String("R10"),
      MapJoinOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"),
      MapJoinFactory.getMapJoinMapJoin());
    opRules.put(new RuleRegExp(new String("R11"),
      MapJoinOperator.getOperatorName() + "%" + SelectOperator.getOperatorName() + "%"),
      MapJoinFactory.getMapJoin());


    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules,
        procCtx);


    GraphWalker ogw = new GenMapRedWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(topOps.values());
    ogw.startWalking(topNodes, null);


    /* If the query was the result of analyze table column compute statistics rewrite, create
     * a column stats task instead of a fetch task to persist stats to the metastore.
     */
    if (isCStats) {
      genColumnStatsTask(qb);
    }


    // reduce sink does not have any kids - since the plan by now has been
    // broken up into multiple
    // tasks, iterate over all tasks.
    // For each task, go over all operators recursively
    for (Task<? extends Serializable> rootTask : rootTasks) {
      breakTaskTree(rootTask);
    }


    // For each task, set the key descriptor for the reducer
    for (Task<? extends Serializable> rootTask : rootTasks) {
      setKeyDescTaskTree(rootTask);
    }


    // If a task contains an operator which instructs bucketizedhiveinputformat
    // to be used, please do so
    for (Task<? extends Serializable> rootTask : rootTasks) {
      setInputFormat(rootTask);
    }


    PhysicalContext physicalContext = new PhysicalContext(conf,
        getParseContext(), ctx, rootTasks, fetchTask);
    PhysicalOptimizer physicalOptimizer = new PhysicalOptimizer(
        physicalContext, conf);
    physicalOptimizer.optimize();


    // For each operator, generate the counters if needed
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEJOBPROGRESS)) {
      for (Task<? extends Serializable> rootTask : rootTasks) {
        generateCountersTask(rootTask);
      }
    }


    decideExecMode(rootTasks, ctx, globalLimitCtx);


    if (qb.isCTAS()) {
      // generate a DDL task and make it a dependent task of the leaf
      CreateTableDesc crtTblDesc = qb.getTableDesc();


      crtTblDesc.validate();


      // Clear the output for CTAS since we don't need the output from the
      // mapredWork, the
      // DDLWork at the tail of the chain will have the output
      getOutputs().clear();


      Task<? extends Serializable> crtTblTask = TaskFactory.get(new DDLWork(
          getInputs(), getOutputs(), crtTblDesc), conf);


      // find all leaf tasks and make the DDLTask as a dependent task of all of
      // them
      HashSet<Task<? extends Serializable>> leaves = new HashSet<Task<? extends Serializable>>();
      getLeafTasks(rootTasks, leaves);
      assert (leaves.size() > 0);
      for (Task<? extends Serializable> task : leaves) {
        if (task instanceof StatsTask){
          //StatsTask require table to already exist
          for (Task<? extends Serializable> parentOfStatsTask : task.getParentTasks()){
            parentOfStatsTask.addDependentTask(crtTblTask);
          }
          for (Task<? extends Serializable> parentOfCrtTblTask : crtTblTask.getParentTasks()){
            parentOfCrtTblTask.removeDependentTask(task);
          }
          crtTblTask.addDependentTask(task);
        } else {
          task.addDependentTask(crtTblTask);
        }
      }
    }


    if (globalLimitCtx.isEnable() && fetchTask != null) {
      int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
        LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit());
        fetchTask.getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit());
    }


    if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) {
      LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit());
      globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit());

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask

      // can't get any info without a plan
    } else if (sem.getResultSchema() != null) {
      List<FieldSchema> lst = sem.getResultSchema();
      schema = new Schema(lst, null);
    } else if (sem.getFetchTask() != null) {
      FetchTask ft = sem.getFetchTask();
      TableDesc td = ft.getTblDesc();
      // partitioned tables don't have tableDesc set on the FetchTask. Instead
      // they have a list of PartitionDesc objects, each with a table desc.
      // Let's
      // try to fetch the desc for the first partition and use it's
      // deserializer.
      if (td == null && ft.getWork() != null
          && ft.getWork().getPartDesc() != null) {
        if (ft.getWork().getPartDesc().size() > 0) {
          td = ft.getWork().getPartDesc().get(0).getTableDesc();
        }
      }


      if (td == null) {
        LOG.info("No returning schema.");

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask

    }
  }


  public boolean getResults(ArrayList<String> res) throws IOException {
    if (plan != null && plan.getFetchTask() != null) {
      FetchTask ft = plan.getFetchTask();
      ft.setMaxRows(maxRows);
      return ft.fetch(res);
    }


    if (resStream == null) {
      resStream = ctx.getStream();
    }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.FetchTask


        if (!sem.getFetchTaskInit()) {
          sem.setFetchTaskInit(true);
          sem.getFetchTask().initialize(conf);
        }
        FetchTask ft = (FetchTask) sem.getFetchTask();


        tableDesc td = ft.getTblDesc();
        // partitioned tables don't have tableDesc set on the FetchTask. Instead
        // they have a list of PartitionDesc objects, each with a table desc. Let's
        // try to fetch the desc for the first partition and use it's deserializer.
        if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) {
          if (ft.getWork().getPartDesc().size() > 0) {
            td = ft.getWork().getPartDesc().get(0).getTableDesc();
          }
        }


        if (td == null) {
          throw new Exception("No table description found for fetch task: " + ft);

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.