Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.mapredWork


    } else if (mvWork.getLoadFileWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadFileWork());
    }
    assert statsWork != null : "Error when genereting StatsTask";
    statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
    MapredWork mrWork = (MapredWork) currTask.getWork();

    // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix
    // in FileSinkDesc is used for stats publishing. They should be consistent.
    statsWork.setAggKey(((FileSinkOperator) nd).getConf().getStatsAggPrefix());
    Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf);

    // mark the MapredWork and FileSinkOperator for gathering stats
    nd.getConf().setGatherStats(true);
    mrWork.getMapWork().setGatheringStats(true);
    if (mrWork.getReduceWork() != null) {
      mrWork.getReduceWork().setGatheringStats(true);
    }
    nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
    nd.getConf().setMaxStatsKeyPrefixLength(
        hconf.getIntVar(ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH));
    // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName());
View Full Code Here


        throw new SemanticException(msg);
      }

    } else {
      cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc);
      work = new MapredWork();
      ((MapredWork)work).setMapWork(cplan);
      // use CombineHiveInputFormat for map-only merging
    }
    cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat");
    // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't
View Full Code Here

    String inputDir = fsDesc.getFinalDirName();
    TableDesc tblDesc = fsDesc.getTableInfo();
    aliases.add(inputDir); // dummy alias: just use the input path

    // constructing the default MapredWork
    MapredWork cMrPlan = GenMapRedUtils.getMapRedWorkFromConf(conf);
    MapWork cplan = cMrPlan.getMapWork();
    cplan.getPathToAliases().put(inputDir, aliases);
    cplan.getPathToPartitionInfo().put(inputDir, new PartitionDesc(tblDesc, null));
    cplan.getAliasToWork().put(inputDir, topOp);
    cplan.setMapperCannotSpanPartns(true);
View Full Code Here

    Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx =
        opProcCtx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
    MapredWork plan = (MapredWork) currTask.getWork();
    HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap =
        opProcCtx.getOpTaskMap();
    Operator<? extends OperatorDesc> currTopOp = opProcCtx.getCurrTopOp();

    opTaskMap.put(reducer, currTask);
    plan.setReduceWork(new ReduceWork());
    plan.getReduceWork().setReducer(reducer);
    ReduceSinkDesc desc = op.getConf();

    plan.getReduceWork().setNumReduceTasks(desc.getNumReducers());

    if (needsTagging(plan.getReduceWork())) {
      plan.getReduceWork().setNeedsTagging(true);
    }

    assert currTopOp != null;
    String currAliasId = opProcCtx.getCurrAliasId();
View Full Code Here

  public static void initUnionPlan(ReduceSinkOperator op, UnionOperator currUnionOp,
      GenMRProcContext opProcCtx,
      Task<? extends Serializable> unionTask) throws SemanticException {
    Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);

    MapredWork plan = (MapredWork) unionTask.getWork();
    HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap =
        opProcCtx.getOpTaskMap();

    opTaskMap.put(reducer, unionTask);

    plan.setReduceWork(new ReduceWork());
    plan.getReduceWork().setReducer(reducer);
    plan.getReduceWork().setReducer(reducer);
    ReduceSinkDesc desc = op.getConf();

    plan.getReduceWork().setNumReduceTasks(desc.getNumReducers());

    if (needsTagging(plan.getReduceWork())) {
      plan.getReduceWork().setNeedsTagging(true);
    }

    initUnionPlan(opProcCtx, currUnionOp, unionTask, false);
  }
View Full Code Here

    }

    try {
      addInputPaths(job, work);

      MapredWork mrWork = new MapredWork();
      mrWork.setMapWork(work);
      Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpFileURI());

      // remove the pwd from conf file so that job tracker doesn't show this
      // logs
      String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
View Full Code Here

        work.getAggKey());

    try {
      addInputPaths(job, work);

      MapredWork mrWork = new MapredWork();
      mrWork.setMapWork(work);
      Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpFileURI());

      // remove the pwd from conf file so that job tracker doesn't show this
      // logs
      String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
View Full Code Here

            }

            // If it's a standard map reduce task, check what, if anything, it inferred about
            // the directory this move task is moving
            if (task instanceof MapRedTask) {
              MapredWork work = (MapredWork)task.getWork();
              MapWork mapWork = work.getMapWork();
              bucketCols = mapWork.getBucketedColsByDirectory().get(path);
              sortCols = mapWork.getSortedColsByDirectory().get(path);
              if (work.getReduceWork() != null) {
                numBuckets = work.getReduceWork().getNumReduceTasks();
              }

              if (bucketCols != null || sortCols != null) {
                // This must be a final map reduce task (the task containing the file sink
                // operator that writes the final output)
                assert work.isFinalMapRed();
              }
              break;
            }

            // If it's a move task, get the path the files were moved from, this is what any
            // preceding map reduce task inferred information about, and moving does not invalidate
            // those assumptions
            // This can happen when a conditional merge is added before the final MoveTask, but the
            // condition for merging is not met, see GenMRFileSink1.
            if (task instanceof MoveTask) {
              if (((MoveTask)task).getWork().getLoadFileWork() != null) {
                path = ((MoveTask)task).getWork().getLoadFileWork().getSourceDir();
              }
            }
          }
          // deal with dynamic partitions
          DynamicPartitionCtx dpCtx = tbd.getDPCtx();
          if (dpCtx != null && dpCtx.getNumDPCols() > 0) { // dynamic partitions

            List<LinkedHashMap<String, String>> dps = Utilities.getFullDPSpecs(conf, dpCtx);

            // publish DP columns to its subscribers
            if (dps != null && dps.size() > 0) {
              pushFeed(FeedType.DYNAMIC_PARTITIONS, dps);
            }

            // load the list of DP partitions and return the list of partition specs
            // TODO: In a follow-up to HIVE-1361, we should refactor loadDynamicPartitions
            // to use Utilities.getFullDPSpecs() to get the list of full partSpecs.
            // After that check the number of DPs created to not exceed the limit and
            // iterate over it and call loadPartition() here.
            // The reason we don't do inside HIVE-1361 is the latter is large and we
            // want to isolate any potential issue it may introduce.
            ArrayList<LinkedHashMap<String, String>> dp =
              db.loadDynamicPartitions(
                  new Path(tbd.getSourceDir()),
                  tbd.getTable().getTableName(),
                  tbd.getPartitionSpec(),
                  tbd.getReplace(),
                  dpCtx.getNumDPCols(),
                  tbd.getHoldDDLTime(),
                  isSkewedStoredAsDirs(tbd));

            if (dp.size() == 0 && conf.getBoolVar(HiveConf.ConfVars.HIVE_ERROR_ON_EMPTY_PARTITION)) {
              throw new HiveException("This query creates no partitions." +
                  " To turn off this error, set hive.error.on.empty.partition=false.");
            }

            // for each partition spec, get the partition
            // and put it to WriteEntity for post-exec hook
            for (LinkedHashMap<String, String> partSpec: dp) {
              Partition partn = db.getPartition(table, partSpec, false);

              if (bucketCols != null || sortCols != null) {
                updatePartitionBucketSortColumns(table, partn, bucketCols, numBuckets, sortCols);
              }

              WriteEntity enty = new WriteEntity(partn, true);
              if (work.getOutputs() != null) {
                work.getOutputs().add(enty);
              }
              // Need to update the queryPlan's output as well so that post-exec hook get executed.
              // This is only needed for dynamic partitioning since for SP the the WriteEntity is
              // constructed at compile time and the queryPlan already contains that.
              // For DP, WriteEntity creation is deferred at this stage so we need to update
              // queryPlan here.
              if (queryPlan.getOutputs() == null) {
                queryPlan.setOutputs(new HashSet<WriteEntity>());
              }
              queryPlan.getOutputs().add(enty);

              // update columnar lineage for each partition
              dc = new DataContainer(table.getTTable(), partn.getTPartition());

              if (SessionState.get() != null) {
                SessionState.get().getLineageState().setLineage(tbd.getSourceDir(), dc,
                    table.getCols());
              }

              console.printInfo("\tLoading partition " + partSpec);
            }
            dc = null; // reset data container to prevent it being added again.
          } else { // static partitions
            List<String> partVals = MetaStoreUtils.getPvals(table.getPartCols(),
                tbd.getPartitionSpec());
            db.validatePartitionNameCharacters(partVals);
            db.loadPartition(new Path(tbd.getSourceDir()), tbd.getTable().getTableName(),
                tbd.getPartitionSpec(), tbd.getReplace(), tbd.getHoldDDLTime(),
                tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd));
            Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);

            if (bucketCols != null || sortCols != null) {
              updatePartitionBucketSortColumns(table, partn, bucketCols, numBuckets, sortCols);
            }

            dc = new DataContainer(table.getTTable(), partn.getTPartition());
            // add this partition to post-execution hook
            if (work.getOutputs() != null) {
              work.getOutputs().add(new WriteEntity(partn, true));
            }
         }
        }
        if (SessionState.get() != null && dc != null) {
          SessionState.get().getLineageState().setLineage(tbd.getSourceDir(), dc,
View Full Code Here

      String hiveConfArgs = generateCmdLine(conf, ctx);

      // write out the plan to a local file
      Path planPath = new Path(ctx.getLocalTmpFileURI(), "plan.xml");
      OutputStream out = FileSystem.getLocal(conf).create(planPath);
      MapredWork plan = getWork();
      LOG.info("Generating plan file " + planPath.toString());
      Utilities.serializePlan(plan, out);

      String isSilent = "true".equalsIgnoreCase(System
          .getProperty("test.silent")) ? "-nolog" : "";
View Full Code Here

      MapredLocalWork plan = (MapredLocalWork) Utilities.deserializePlan(pathData);
      MapredLocalTask ed = new MapredLocalTask(plan, conf, isSilent);
      ret = ed.executeFromChildJVM(new DriverContext());

    } else {
      MapredWork plan = (MapredWork) Utilities.deserializePlan(pathData);
      ExecDriver ed = new ExecDriver(plan, conf, isSilent);
      ret = ed.execute(new DriverContext());
    }

    if (ret != 0) {
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.mapredWork

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.