Examples of org.apache.hadoop.hive.ql.parse.ParseContext

org.apache.hadoop.hive.ql.parse.ParseContext
Parse Context: The current parse context. This is passed to the optimizer which then transforms the operator tree using the parse context. All the optimizations are performed sequentially and then the new parse context populated. Note that since the parse context contains the operator tree, it can be easily retrieved by the next optimization step or finally for task generation after the plan has been completely optimized.

    if (chDir) {
      dest = fsOp.getConf().getFinalDirName();


      // generate the temporary file
      // it must be on the same file system as the current destination
      ParseContext parseCtx = ctx.getParseCtx();
      Context baseCtx = parseCtx.getContext();
      String tmpDir = baseCtx.getExternalTmpFileURI((new Path(dest)).toUri());


      FileSinkDesc fileSinkDesc = fsOp.getConf();
      // Change all the linked file sink descriptors
      if (fileSinkDesc.isLinkedFileSink()) {

View Full Code Here

   * @param nd the file sink operator encountered
   * @param opProcCtx context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
    GenMRProcContext ctx = (GenMRProcContext)opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    boolean chDir = false;
    Task<? extends Serializable> currTask = ctx.getCurrTask();


    // Has the user enabled merging of files for map-only jobs or for all jobs
    if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) 
    {
      List<Task<? extends Serializable>> mvTasks = ctx.getMvTask();


      // In case of unions or map-joins, it is possible that the file has already been seen.
      // So, no need to attempt to merge the files again.
      if ((ctx.getSeenFileSinkOps() == null) ||
          (!ctx.getSeenFileSinkOps().contains((FileSinkOperator)nd)))  {
        
        // no need of merging if the move is to a local file system
        MoveTask mvTask = (MoveTask)findMoveTask(mvTasks, (FileSinkOperator)nd);
        if ((mvTask != null) && !mvTask.isLocal())
        {
          // There are separate configuration parameters to control whether to merge for a map-only job
          // or for a map-reduce job
          if ((parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES) &&
              (((mapredWork)currTask.getWork()).getReducer() == null)) ||
              parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES))
            chDir = true;
        }
      }
    }

View Full Code Here

    
    reduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(new ArrayList<exprNodeDesc>(), valueCols, 
                                                        outputColumns, false, -1, -1, -1); 
    ReduceSinkOperator rsOp = (ReduceSinkOperator)OperatorFactory.getAndMakeChild(rsDesc, fsRS, ts_op);
    mapredWork cplan = GenMapRedUtils.getMapRedWork();
    ParseContext parseCtx = ctx.getParseCtx();


    Task<? extends Serializable> mergeTask = TaskFactory.get(cplan, parseCtx.getConf());
    fileSinkDesc fsConf = fsOp.getConf();
    
    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRR();
    Integer pos = Integer.valueOf(0);
    for(ColumnInfo colInfo: interim_rwsch.getColumnInfos()) {
      String [] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(info[0], info[1],
                    new ColumnInfo(pos.toString(), colInfo.getType(), info[0], 
                          colInfo.getIsPartitionCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }


    Operator extract = 
      OperatorFactory.getAndMakeChild(
        new extractDesc(new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, 
                                Utilities.ReduceField.VALUE.toString(), "", false)),
        new RowSchema(out_rwsch.getColumnInfos()));
    
    tableDesc ts = (tableDesc)fsConf.getTableInfo().clone();
    fsConf.getTableInfo().getProperties().remove(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);
    FileSinkOperator newOutput = 
      (FileSinkOperator)OperatorFactory.getAndMakeChild(
         new fileSinkDesc(finalName, ts, 
                          parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT)),
         fsRS, extract);


    cplan.setReducer(extract);
    ArrayList<String> aliases = new ArrayList<String>();
    aliases.add(fsConf.getDirName());

View Full Code Here


    if (chDir) {
      dest = fsOp.getConf().getDirName();


      // generate the temporary file
      ParseContext parseCtx = ctx.getParseCtx();
      Context baseCtx = parseCtx.getContext();
      String tmpDir = baseCtx.getMRTmpFileURI();
      
      fsOp.getConf().setDirName(tmpDir);
    }

View Full Code Here

        Object... nodeOutputs) throws SemanticException {
      MapJoinOperator mapJoin = (MapJoinOperator)nd;
      GenMRProcContext opProcCtx = (GenMRProcContext)procCtx;
      
      mapredWork cplan = GenMapRedUtils.getMapRedWork();
      ParseContext parseCtx = opProcCtx.getParseCtx();
      Task<? extends Serializable> redTask  = TaskFactory.get(cplan, parseCtx.getConf());
      Task<? extends Serializable> currTask = opProcCtx.getCurrTask();


      // find the branch on which this processor was invoked
      int pos = getPositionParent(mapJoin, stack);
      boolean local = (pos == ((mapJoinDesc)mapJoin.getConf()).getPosBigTable()) ? false : true;

View Full Code Here

      SelectOperator  sel     = (SelectOperator)nd;
      MapJoinOperator mapJoin = (MapJoinOperator)sel.getParentOperators().get(0);
      assert sel.getParentOperators().size() == 1;
      
      GenMRProcContext ctx = (GenMRProcContext)procCtx;
      ParseContext parseCtx = ctx.getParseCtx();
      
      // is the mapjoin followed by a reducer
      List<MapJoinOperator> listMapJoinOps = parseCtx.getListMapJoinOpsNoReducer();
      
      if (listMapJoinOps.contains(mapJoin)) {
        ctx.setCurrAliasId(null);
        ctx.setCurrTopOp(null);
        Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
        mapCurrCtx.put((Operator<? extends Serializable>)nd, new GenMapRedCtx(ctx.getCurrTask(), null, null));
        return null;
      }


      ctx.setCurrMapJoinOp(mapJoin);
      
      Task<? extends Serializable> currTask = ctx.getCurrTask();
      GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin);
      if (mjCtx == null) {
        mjCtx = new GenMRMapJoinCtx();
        ctx.setMapJoinCtx(mapJoin, mjCtx);
      }
      
      mapredWork mjPlan = GenMapRedUtils.getMapRedWork();
      Task<? extends Serializable> mjTask = TaskFactory.get(mjPlan, parseCtx.getConf());
      
      tableDesc tt_desc = 
        PlanUtils.getIntermediateFileTableDesc(PlanUtils.sortFieldSchemas(
            PlanUtils.getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol"))); 
      
      // generate the temporary file
      Context baseCtx = parseCtx.getContext();
      String taskTmpDir = baseCtx.getMRTmpFileURI();
      
      // Add the path to alias mapping
      mjCtx.setTaskTmpDir(taskTmpDir);
      mjCtx.setTTDesc(tt_desc);
      mjCtx.setRootMapJoinOp(sel);
      
      sel.setParentOperators(null);
      
      // Create a file sink operator for this file name
      Operator<? extends Serializable> fs_op =
        OperatorFactory.get
        (new fileSinkDesc(taskTmpDir, tt_desc,
                          parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)),
         mapJoin.getSchema());
      
      assert mapJoin.getChildOperators().size() == 1;
      mapJoin.getChildOperators().set(0, fs_op);

View Full Code Here

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      MapJoinOperator mapJoin = (MapJoinOperator)nd;
      GenMRProcContext ctx = (GenMRProcContext)procCtx;


      ParseContext parseCtx = ctx.getParseCtx();
      MapJoinOperator oldMapJoin = ctx.getCurrMapJoinOp();
      assert oldMapJoin != null;
      GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin);
      if (mjCtx != null)
        mjCtx.setOldMapJoin(oldMapJoin);

View Full Code Here

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      GenMRProcContext ctx = (GenMRProcContext)procCtx;


      ParseContext parseCtx = ctx.getParseCtx();
      UnionProcContext uCtx = parseCtx.getUCtx();


      // union was map only - no special processing needed
      if (uCtx.isMapOnlySubq())
        return (new TableScanMapJoin()).process(nd, stack, procCtx, nodeOutputs);

View Full Code Here

   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
    ReduceSinkOperator op = (ReduceSinkOperator)nd;
    GenMRProcContext ctx = (GenMRProcContext)opProcCtx;


    ParseContext parseCtx = ctx.getParseCtx();
    UnionProcContext uCtx = parseCtx.getUCtx();


    // union was map only - no special processing needed
    if (uCtx.isMapOnlySubq())
      return (new GenMRRedSink1()).process(nd, stack, opProcCtx, nodeOutputs);

View Full Code Here

   * @param opProcCtx context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
    UnionOperator union = (UnionOperator)nd;
    GenMRProcContext ctx = (GenMRProcContext)opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    UnionProcContext uCtx = parseCtx.getUCtx();


    // Map-only subqueries can be optimized in future to not write to a file in future
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();


    // The plan needs to be broken only if one of the sub-queries involve a map-reduce job
    if (uCtx.isMapOnlySubq()) {
      mapCurrCtx.put((Operator<? extends Serializable>)nd, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId()));
      return null;
    }


    ctx.setCurrUnionOp(union);


    UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union);
    assert uPrsCtx != null;


    Task<? extends Serializable> currTask = ctx.getCurrTask();
    int pos = UnionProcFactory.getPositionParent(union, stack);


    // is the current task a root task
    if (uPrsCtx.getRootTask(pos) && (!ctx.getRootTasks().contains(currTask)))
      ctx.getRootTasks().add(currTask);
    
    GenMRUnionCtx uCtxTask = ctx.getUnionTask(union);
    Task<? extends Serializable> uTask = null;


    Operator<? extends Serializable> parent = union.getParentOperators().get(pos);   
    mapredWork uPlan = null;


    // union is encountered for the first time
    if (uCtxTask == null) {
      uCtxTask = new GenMRUnionCtx();
      uPlan = GenMapRedUtils.getMapRedWork();
      uTask = TaskFactory.get(uPlan, parseCtx.getConf());
      uCtxTask.setUTask(uTask);
      ctx.setUnionTask(union, uCtxTask);
    }
    else {
      uTask = uCtxTask.getUTask();
      uPlan = (mapredWork)uTask.getWork();
    }


    tableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(
          PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol")); 
    
    // generate the temporary file
    Context baseCtx = parseCtx.getContext();
    String taskTmpDir = baseCtx.getMRTmpFileURI();
    
    // Add the path to alias mapping
    uCtxTask.addTaskTmpDir(taskTmpDir);
    uCtxTask.addTTDesc(tt_desc);


    // The union task is empty. The files created for all the inputs are assembled in the
    // union context and later used to initialize the union plan
    
    // Create a file sink operator for this file name
    Operator<? extends Serializable> fs_op =
      OperatorFactory.get
      (new fileSinkDesc(taskTmpDir, tt_desc,
                        parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)),
       parent.getSchema());
    
    assert parent.getChildOperators().size() == 1;
    parent.getChildOperators().set(0, fs_op);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.hive.ql.parse.ParseContext

org.apache.hadoop.hive.ql.Driver

org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin

org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization

org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils

org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1

org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3

org.apache.hadoop.hive.ql.optimizer.GenMRRedSink4

org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1

org.apache.hadoop.hive.ql.optimizer.GenMRUnion1

org.apache.hadoop.hive.ql.optimizer.index.RewriteParseContextGenerator

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.