Examples of org.apache.hadoop.hive.ql.parse.ParseContext

org.apache.hadoop.hive.ql.parse.ParseContext
Parse Context: The current parse context. This is passed to the optimizer which then transforms the operator tree using the parse context. All the optimizations are performed sequentially and then the new parse context populated. Note that since the parse context contains the operator tree, it can be easily retrieved by the next optimization step or finally for task generation after the plan has been completely optimized.

   */
  public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx)
    throws SemanticException {
    // Generate a new task
    mapredWork cplan = getMapRedWork();
    ParseContext parseCtx = opProcCtx.getParseCtx();
    Task<? extends Serializable> redTask = TaskFactory.get(cplan, parseCtx.getConf());
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);


    // Add the reducer
    cplan.setReducer(reducer);
    reduceSinkDesc desc = (reduceSinkDesc)op.getConf();

View Full Code Here

   * @param opProcCtx processing context
   */
  public static void setTaskPlan(String alias_id, Operator<? extends Serializable> topOp,
      mapredWork plan, boolean local, GenMRProcContext opProcCtx)
    throws SemanticException {
    ParseContext parseCtx = opProcCtx.getParseCtx();
    Set<ReadEntity> inputs = opProcCtx.getInputs();


    ArrayList<Path> partDir = new ArrayList<Path>();
    ArrayList<partitionDesc> partDesc = new ArrayList<partitionDesc>();


    Path       tblDir  = null;
    tableDesc  tblDesc = null;


    PrunedPartitionList partsList = null;


    try {
      partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp),
                                        parseCtx.getOpToPartPruner().get(topOp),
                                        opProcCtx.getConf(), alias_id, parseCtx.getPrunedPartitions());
    } catch (SemanticException e) {
      throw e;
    } catch (HiveException e) {
      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
      throw new SemanticException(e.getMessage(), e);
    }


    // Generate the map work for this alias_id
    Set<Partition> parts = null;
    // pass both confirmed and unknown partitions through the map-reduce framework


    parts = partsList.getConfirmedPartns();
    parts.addAll(partsList.getUnknownPartns());
    partitionDesc aliasPartnDesc = null;
    try{
      if (parts.isEmpty()) {
        if (!partsList.getDeniedPartns().isEmpty())
          aliasPartnDesc = Utilities.getPartitionDesc(partsList.getDeniedPartns()
              .iterator().next());
      } else {
        aliasPartnDesc = Utilities.getPartitionDesc(parts.iterator().next());
      }
    } catch (HiveException e) {
      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
      throw new SemanticException(e.getMessage(), e);
    }


    // The table does not have any partitions
    if (aliasPartnDesc == null)
      aliasPartnDesc = new partitionDesc(Utilities.getTableDesc(parseCtx.getTopToTable().get(topOp)), null);


    plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc);


    for (Partition part : parts) {
      if (part.getTable().isPartitioned())
        inputs.add(new ReadEntity(part));
      else
        inputs.add(new ReadEntity(part.getTable()));


      // Later the properties have to come from the partition as opposed
      // to from the table in order to support versioning.
      Path paths[];
      sampleDesc sampleDescr = parseCtx.getOpToSamplePruner().get(topOp);


      if (sampleDescr != null) {
        paths = SamplePruner.prune(part, sampleDescr);
      }
      else {

View Full Code Here

                                 GenMRProcContext opProcCtx, boolean setReducer,
                                 boolean local, int posn) throws SemanticException {
    mapredWork plan = (mapredWork) childTask.getWork();
    Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();


    ParseContext parseCtx = opProcCtx.getParseCtx();
    parentTask.addDependentTask(childTask);


    // Root Task cannot depend on any other task, therefore childTask cannot be a root Task
    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();
    if (rootTasks.contains(childTask))
      rootTasks.remove(childTask);


    // generate the temporary file
    Context baseCtx = parseCtx.getContext();
    String taskTmpDir = baseCtx.getMRTmpFileURI();


    Operator<? extends Serializable> parent = op.getParentOperators().get(posn);
    tableDesc tt_desc =
      PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));


    // Create a file sink operator for this file name
    boolean compressIntermediate = parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE);
    fileSinkDesc desc = new fileSinkDesc(taskTmpDir, tt_desc, compressIntermediate);
    if (compressIntermediate) {
      desc.setCompressCodec(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC));
      desc.setCompressType(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE));
    }
    Operator<? extends Serializable> fs_op = putOpInsertMap(OperatorFactory.get(desc, parent.getSchema()), null, parseCtx);


    // replace the reduce child with this operator
    List<Operator<? extends Serializable>> childOpList = parent.getChildOperators();

View Full Code Here

    opProcCtx.setCurrAliasId(currAliasId);
    opProcCtx.setCurrTask(childTask);
  }


  static public void mergeMapJoinUnion(UnionOperator union, GenMRProcContext ctx, int pos) throws SemanticException {
    ParseContext parseCtx = ctx.getParseCtx();
    UnionProcContext uCtx = parseCtx.getUCtx();


    UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union);
    assert uPrsCtx != null;


    Task<? extends Serializable> currTask = ctx.getCurrTask();


    GenMRUnionCtx uCtxTask = ctx.getUnionTask(union);
    Task<? extends Serializable> uTask = null;


    Operator<? extends Serializable> parent = union.getParentOperators().get(pos);
    mapredWork uPlan = null;


    // union is encountered for the first time
    if (uCtxTask == null) {
      uCtxTask = new GenMRUnionCtx();
      uPlan = GenMapRedUtils.getMapRedWork();
      uTask = TaskFactory.get(uPlan, parseCtx.getConf());
      uCtxTask.setUTask(uTask);
      ctx.setUnionTask(union, uCtxTask);
    }
    else {
      uTask = uCtxTask.getUTask();

View Full Code Here

        Object... nodeOutputs) throws SemanticException {
      MapJoinOperator mapJoin = (MapJoinOperator)nd;
      GenMRProcContext opProcCtx = (GenMRProcContext)procCtx;
      
      mapredWork cplan = GenMapRedUtils.getMapRedWork();
      ParseContext parseCtx = opProcCtx.getParseCtx();
      Task<? extends Serializable> redTask  = TaskFactory.get(cplan, parseCtx.getConf());
      Task<? extends Serializable> currTask = opProcCtx.getCurrTask();


      // find the branch on which this processor was invoked
      int pos = getPositionParent(mapJoin, stack);
      boolean local = (pos == ((mapJoinDesc)mapJoin.getConf()).getPosBigTable()) ? false : true;

View Full Code Here

      SelectOperator  sel     = (SelectOperator)nd;
      MapJoinOperator mapJoin = (MapJoinOperator)sel.getParentOperators().get(0);
      assert sel.getParentOperators().size() == 1;
      
      GenMRProcContext ctx = (GenMRProcContext)procCtx;
      ParseContext parseCtx = ctx.getParseCtx();
      
      // is the mapjoin followed by a reducer
      List<MapJoinOperator> listMapJoinOps = parseCtx.getListMapJoinOpsNoReducer();
      
      if (listMapJoinOps.contains(mapJoin)) {
        ctx.setCurrAliasId(null);
        ctx.setCurrTopOp(null);
        Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
        mapCurrCtx.put((Operator<? extends Serializable>)nd, new GenMapRedCtx(ctx.getCurrTask(), null, null));
        return null;
      }


      ctx.setCurrMapJoinOp(mapJoin);
      
      Task<? extends Serializable> currTask = ctx.getCurrTask();
      GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin);
      if (mjCtx == null) {
        mjCtx = new GenMRMapJoinCtx();
        ctx.setMapJoinCtx(mapJoin, mjCtx);
      }
      
      mapredWork mjPlan = GenMapRedUtils.getMapRedWork();
      Task<? extends Serializable> mjTask = TaskFactory.get(mjPlan, parseCtx.getConf());
      
      tableDesc tt_desc = 
        PlanUtils.getIntermediateFileTableDesc(
            PlanUtils.getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol")); 
      
      // generate the temporary file
      Context baseCtx = parseCtx.getContext();
      String taskTmpDir = baseCtx.getMRTmpFileURI();
      
      // Add the path to alias mapping
      mjCtx.setTaskTmpDir(taskTmpDir);
      mjCtx.setTTDesc(tt_desc);
      mjCtx.setRootMapJoinOp(sel);
      
      sel.setParentOperators(null);
      
      // Create a file sink operator for this file name
      Operator<? extends Serializable> fs_op =
        OperatorFactory.get
        (new fileSinkDesc(taskTmpDir, tt_desc,
                          parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)),
         mapJoin.getSchema());
      
      assert mapJoin.getChildOperators().size() == 1;
      mapJoin.getChildOperators().set(0, fs_op);

View Full Code Here

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      MapJoinOperator mapJoin = (MapJoinOperator)nd;
      GenMRProcContext ctx = (GenMRProcContext)procCtx;


      ParseContext parseCtx = ctx.getParseCtx();
      MapJoinOperator oldMapJoin = ctx.getCurrMapJoinOp();
      assert oldMapJoin != null;
      GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin);
      if (mjCtx != null)
        mjCtx.setOldMapJoin(oldMapJoin);

View Full Code Here

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      GenMRProcContext ctx = (GenMRProcContext)procCtx;


      ParseContext parseCtx = ctx.getParseCtx();
      UnionProcContext uCtx = parseCtx.getUCtx();


      // union was map only - no special processing needed
      if (uCtx.isMapOnlySubq())
        return (new TableScanMapJoin()).process(nd, stack, procCtx, nodeOutputs);

View Full Code Here

   * @param opProcCtx context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
    UnionOperator union = (UnionOperator)nd;
    GenMRProcContext ctx = (GenMRProcContext)opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    UnionProcContext uCtx = parseCtx.getUCtx();


    // Map-only subqueries can be optimized in future to not write to a file in future
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();


    // The plan needs to be broken only if one of the sub-queries involve a map-reduce job
    if (uCtx.isMapOnlySubq()) {
      UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union);
      if ((uPrsCtx != null) && (uPrsCtx.getMapJoinQuery())) {
        GenMapRedUtils.mergeMapJoinUnion(union, ctx, UnionProcFactory.getPositionParent(union, stack));
      }
      else
        mapCurrCtx.put((Operator<? extends Serializable>)nd, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId()));
      return null;
    }


    ctx.setCurrUnionOp(union);


    UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union);
    assert uPrsCtx != null;


    Task<? extends Serializable> currTask = ctx.getCurrTask();
    int pos = UnionProcFactory.getPositionParent(union, stack);


    // is the current task a root task
    if (uPrsCtx.getRootTask(pos) && (!ctx.getRootTasks().contains(currTask)))
      ctx.getRootTasks().add(currTask);


    GenMRUnionCtx uCtxTask = ctx.getUnionTask(union);
    Task<? extends Serializable> uTask = null;


    Operator<? extends Serializable> parent = union.getParentOperators().get(pos);
    mapredWork uPlan = null;


    // union is encountered for the first time
    if (uCtxTask == null) {
      uCtxTask = new GenMRUnionCtx();
      uPlan = GenMapRedUtils.getMapRedWork();
      uTask = TaskFactory.get(uPlan, parseCtx.getConf());
      uCtxTask.setUTask(uTask);
      ctx.setUnionTask(union, uCtxTask);
    }
    else {
      uTask = uCtxTask.getUTask();
      uPlan = (mapredWork)uTask.getWork();
    }


    // If there is a mapjoin at position 'pos'
    if (uPrsCtx.getMapJoinSubq(pos)) {
      MapJoinOperator mjOp = ctx.getCurrMapJoinOp();
      assert mjOp != null;
      GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mjOp);
      assert mjCtx != null;
      mapredWork plan = (mapredWork) currTask.getWork();


      String taskTmpDir = mjCtx.getTaskTmpDir();
      tableDesc tt_desc = mjCtx.getTTDesc();
      assert plan.getPathToAliases().get(taskTmpDir) == null;
      plan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
      plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
      plan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(tt_desc, null));
      plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp());
    }


    tableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(
          PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));


    // generate the temporary file
    Context baseCtx = parseCtx.getContext();
    String taskTmpDir = baseCtx.getMRTmpFileURI();


    // Add the path to alias mapping
    uCtxTask.addTaskTmpDir(taskTmpDir);
    uCtxTask.addTTDesc(tt_desc);


    // The union task is empty. The files created for all the inputs are assembled in the
    // union context and later used to initialize the union plan


    // Create a file sink operator for this file name
    Operator<? extends Serializable> fs_op =
      OperatorFactory.get
      (new fileSinkDesc(taskTmpDir, tt_desc,
                        parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)),
       parent.getSchema());


    assert parent.getChildOperators().size() == 1;
    parent.getChildOperators().set(0, fs_op);

View Full Code Here

   * @param nd the file sink operator encountered
   * @param opProcCtx context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
    GenMRProcContext ctx = (GenMRProcContext)opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    boolean chDir = false;
    Task<? extends Serializable> currTask = ctx.getCurrTask();


    // Has the user enabled merging of files for map-only jobs or for all jobs
    if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) 
    {
      List<Task<? extends Serializable>> mvTasks = ctx.getMvTask();


      // In case of unions or map-joins, it is possible that the file has already been seen.
      // So, no need to attempt to merge the files again.
      if ((ctx.getSeenFileSinkOps() == null) ||
          (!ctx.getSeenFileSinkOps().contains((FileSinkOperator)nd)))  {
        
        // no need of merging if the move is to a local file system
        MoveTask mvTask = (MoveTask)findMoveTask(mvTasks, (FileSinkOperator)nd);
        if ((mvTask != null) && !mvTask.isLocal())
        {
          // There are separate configuration parameters to control whether to merge for a map-only job
          // or for a map-reduce job
          if ((parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES) &&
              (((mapredWork)currTask.getWork()).getReducer() == null)) ||
              (parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES) &&
              (((mapredWork)currTask.getWork()).getReducer() != null)))
            chDir = true;
        }
      }
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.hive.ql.parse.ParseContext

org.apache.hadoop.hive.ql.Driver

org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin

org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization

org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils

org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1

org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3

org.apache.hadoop.hive.ql.optimizer.GenMRRedSink4

org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1

org.apache.hadoop.hive.ql.optimizer.GenMRUnion1

org.apache.hadoop.hive.ql.optimizer.index.RewriteParseContextGenerator

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.