Package org.apache.drill.exec.physical.impl.join

Source Code of org.apache.drill.exec.physical.impl.join.MergeJoinBatch

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.impl.join;

import static org.apache.drill.exec.compile.sig.GeneratorMapping.GM;

import java.io.IOException;

import org.apache.drill.common.expression.ErrorCollector;
import org.apache.drill.common.expression.ErrorCollectorImpl;
import org.apache.drill.common.expression.ExpressionPosition;
import org.apache.drill.common.expression.FunctionCall;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.logical.data.Join;
import org.apache.drill.common.logical.data.JoinCondition;
import org.apache.drill.exec.compile.sig.MappingSet;
import org.apache.drill.exec.exception.ClassTransformationException;
import org.apache.drill.exec.exception.SchemaChangeException;
import org.apache.drill.exec.expr.CodeGenerator;
import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
import org.apache.drill.exec.expr.HoldingContainerExpression;
import org.apache.drill.exec.expr.TypeHelper;
import org.apache.drill.exec.expr.fn.impl.ComparatorFunctions;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.physical.config.MergeJoinPOP;
import org.apache.drill.exec.physical.impl.filter.ReturnValueExpression;
import org.apache.drill.exec.physical.impl.join.JoinWorker.JoinOutcome;
import org.apache.drill.exec.record.AbstractRecordBatch;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.RecordBatch;
import org.apache.drill.exec.record.TypedFieldId;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.allocator.VectorAllocator;

import com.google.common.collect.ImmutableList;
import com.sun.codemodel.JClass;
import com.sun.codemodel.JExpr;
import com.sun.codemodel.JMod;
import com.sun.codemodel.JType;
import com.sun.codemodel.JVar;

/**
* A merge join combining to incoming in-order batches.
*/
public class MergeJoinBatch extends AbstractRecordBatch<MergeJoinPOP> {

  static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MergeJoinBatch.class);
 
//  private static GeneratorMapping setup = GM("doSetup", "doSetup");
//  private static GeneratorMapping copyLeft = GM("doSetup", "doCopyLeft");
//  private static GeneratorMapping copyRight = GM("doSetup", "doCopyRight");
//  private static GeneratorMapping compare = GM("doSetup", "doCompare");
//  private static GeneratorMapping compareLeft= GM("doSetup", "doCompareNextLeftKey");
// 
//  private static final MappingSet SETUP_MAPPING = new MappingSet((String) null, null, setup, setup);
//  private static final MappingSet COPY_LEFT_MAPPING = new MappingSet("leftIndex", "outIndex", copyLeft, copyLeft);
//  private static final MappingSet COPY_RIGHT_MAPPING = new MappingSet("rightIndex", "outIndex", copyRight, copyRight);
//  private static final MappingSet COMPARE_MAPPING = new MappingSet("leftIndex", "rightIndex", compare, compare);
//  private static final MappingSet COMPARE_RIGHT_MAPPING = new MappingSet("rightIndex", null, compare, compare);
//  private static final MappingSet COMPARE_LEFT_MAPPING = new MappingSet("leftIndex", "null", compareLeft, compareLeft);
//  private static final MappingSet COMPARE_NEXT_LEFT_MAPPING = new MappingSet("nextLeftIndex", "null", compareLeft, compareLeft);
// 
  public static final MappingSet SETUP_MAPPING =
      new MappingSet("null", "null",
                     GM("doSetup", "doSetup", null, null),
                     GM("doSetup", "doSetup", null, null));
  public static final MappingSet COPY_LEFT_MAPPING =
      new MappingSet("leftIndex", "outIndex",
                     GM("doSetup", "doCopyLeft", null, null),
                     GM("doSetup", "doCopyLeft", null, null));
  public static final MappingSet COPY_RIGHT_MAPPING =
      new MappingSet("rightIndex", "outIndex",
                     GM("doSetup", "doCopyRight", null, null),
                     GM("doSetup", "doCopyRight", null, null));
  public static final MappingSet COMPARE_MAPPING =
      new MappingSet("leftIndex", "rightIndex",
                     GM("doSetup", "doCompare", null, null),
                     GM("doSetup", "doCompare", null, null));
  public static final MappingSet COMPARE_RIGHT_MAPPING =
      new MappingSet("rightIndex", "null",
                     GM("doSetup", "doCompare", null, null),
                     GM("doSetup", "doCompare", null, null));
  public static final MappingSet COMPARE_LEFT_MAPPING =
      new MappingSet("leftIndex", "null",
                     GM("doSetup", "doCompareNextLeftKey", null, null),
                     GM("doSetup", "doCompareNextLeftKey", null, null));
  public static final MappingSet COMPARE_NEXT_LEFT_MAPPING =
      new MappingSet("nextLeftIndex", "null",
                     GM("doSetup", "doCompareNextLeftKey", null, null),
                     GM("doSetup", "doCompareNextLeftKey", null, null));

 
  private final RecordBatch left;
  private final RecordBatch right;
  private final JoinStatus status;
  private final JoinCondition condition;
  private final Join.JoinType joinType;
  private JoinWorker worker;
  public MergeJoinBatchBuilder batchBuilder;
 
  protected MergeJoinBatch(MergeJoinPOP popConfig, FragmentContext context, RecordBatch left, RecordBatch right) {
    super(popConfig, context);
    this.left = left;
    this.right = right;
    this.status = new JoinStatus(left, right, this);
    this.batchBuilder = new MergeJoinBatchBuilder(context, status);
    this.joinType = popConfig.getJoinType();
    this.condition = popConfig.getConditions().get(0);
    // currently only one join condition is supported
    assert popConfig.getConditions().size() == 1;
  }

  @Override
  public int getRecordCount() {
    return status.getOutPosition();
  }

  @Override
  public IterOutcome next() {
   
    // we do this in the here instead of the constructor because don't necessary want to start consuming on construction.
    status.ensureInitial();
   
    // loop so we can start over again if we find a new batch was created.
    while(true){

      // if the previous outcome was a change in schema or we sent a batch, we have to set up a new batch.
      if (status.getOutcome() == JoinOutcome.BATCH_RETURNED ||
          status.getOutcome() == JoinOutcome.SCHEMA_CHANGED)
        allocateBatch();

      // reset the output position to zero after our parent iterates this RecordBatch
      if (status.getOutcome() == JoinOutcome.BATCH_RETURNED ||
          status.getOutcome() == JoinOutcome.SCHEMA_CHANGED ||
          status.getOutcome() == JoinOutcome.NO_MORE_DATA)
        status.resetOutputPos();

      boolean first = false;
      if(worker == null){
        try {
          logger.debug("Creating New Worker");
          this.worker = generateNewWorker();
          first = true;
        } catch (ClassTransformationException | IOException | SchemaChangeException e) {
          context.fail(new SchemaChangeException(e));
          kill();
          return IterOutcome.STOP;
        }
      }

      // join until we have a complete outgoing batch
      worker.doJoin(status);

      // get the outcome of the join.
      switch(status.getOutcome()){
      case BATCH_RETURNED:
        // only return new schema if new worker has been setup.
        logger.debug("BATCH RETURNED; returning {}", (first ? "OK_NEW_SCHEMA" : "OK"));
        return first ? IterOutcome.OK_NEW_SCHEMA : IterOutcome.OK;
      case FAILURE:
        kill();
        return IterOutcome.STOP;
      case NO_MORE_DATA:
        logger.debug("NO MORE DATA; returning {}", (status.getOutPosition() > 0 ? (first ? "OK_NEW_SCHEMA" : "OK") : "NONE"));
        return status.getOutPosition() > 0 ? (first ? IterOutcome.OK_NEW_SCHEMA : IterOutcome.OK): IterOutcome.NONE;
      case SCHEMA_CHANGED:
        worker = null;
        if(status.getOutPosition() > 0){
          // if we have current data, let's return that.
          logger.debug("SCHEMA CHANGED; returning {} ", (first ? "OK_NEW_SCHEMA" : "OK"));
          return first ? IterOutcome.OK_NEW_SCHEMA : IterOutcome.OK;
        }else{
          // loop again to rebuild worker.
          continue;
        }
      case WAITING:
        return IterOutcome.NOT_YET;
      default:
        throw new IllegalStateException();
      }
    }
  }

  public void resetBatchBuilder() {
    batchBuilder = new MergeJoinBatchBuilder(context, status);
  }

  public void addRightToBatchBuilder() {
    batchBuilder.add(right);
  }

  @Override
  protected void killIncoming() {
    left.kill();
    right.kill();
  }

  private JoinWorker generateNewWorker() throws ClassTransformationException, IOException, SchemaChangeException{

    final CodeGenerator<JoinWorker> cg = new CodeGenerator<>(JoinWorker.TEMPLATE_DEFINITION, context.getFunctionRegistry());
    final ErrorCollector collector = new ErrorCollectorImpl();
    final LogicalExpression leftFieldExpr = condition.getLeft();
    final LogicalExpression rightFieldExpr = condition.getRight();

    // Generate members and initialization code
    /////////////////////////////////////////

    // declare and assign JoinStatus member
    cg.setMappingSet(SETUP_MAPPING);
    JClass joinStatusClass = cg.getModel().ref(JoinStatus.class);
    JVar joinStatus = cg.clazz.field(JMod.NONE, joinStatusClass, "status");
    cg.getSetupBlock().assign(JExpr._this().ref(joinStatus), JExpr.direct("status"));

    // declare and assign outgoing VectorContainer member
    JClass vectorContainerClass = cg.getModel().ref(VectorContainer.class);
    JVar outgoingVectorContainer = cg.clazz.field(JMod.NONE, vectorContainerClass, "outgoing");
    cg.getSetupBlock().assign(JExpr._this().ref(outgoingVectorContainer), JExpr.direct("outgoing"));

    // declare and assign incoming left RecordBatch member
    JClass recordBatchClass = cg.getModel().ref(RecordBatch.class);
    JVar incomingLeftRecordBatch = cg.clazz.field(JMod.NONE, recordBatchClass, "incomingLeft");
    cg.getSetupBlock().assign(JExpr._this().ref(incomingLeftRecordBatch), joinStatus.ref("left"));

    // declare and assign incoming right RecordBatch member
    JVar incomingRightRecordBatch = cg.clazz.field(JMod.NONE, recordBatchClass, "incomingRight");
    cg.getSetupBlock().assign(JExpr._this().ref(incomingRightRecordBatch), joinStatus.ref("right"));

    // declare 'incoming' member so VVReadExpr generated code can point to the left or right batch
    JVar incomingRecordBatch = cg.clazz.field(JMod.NONE, recordBatchClass, "incoming");

    // materialize value vector readers from join expression
    final LogicalExpression materializedLeftExpr = ExpressionTreeMaterializer.materialize(leftFieldExpr, left, collector);
    if (collector.hasErrors())
      throw new ClassTransformationException(String.format(
          "Failure while trying to materialize incoming left field.  Errors:\n %s.", collector.toErrorString()));

    final LogicalExpression materializedRightExpr = ExpressionTreeMaterializer.materialize(rightFieldExpr, right, collector);
    if (collector.hasErrors())
      throw new ClassTransformationException(String.format(
          "Failure while trying to materialize incoming right field.  Errors:\n %s.", collector.toErrorString()));


    // generate compare()
    ////////////////////////
    cg.setMappingSet(COMPARE_MAPPING);
    cg.getSetupBlock().assign(JExpr._this().ref(incomingRecordBatch), JExpr._this().ref(incomingLeftRecordBatch));
    CodeGenerator.HoldingContainer compareLeftExprHolder = cg.addExpr(materializedLeftExpr, false);
    cg.setMappingSet(COMPARE_RIGHT_MAPPING);
    cg.getSetupBlock().assign(JExpr._this().ref(incomingRecordBatch), JExpr._this().ref(incomingRightRecordBatch));
    CodeGenerator.HoldingContainer compareRightExprHolder = cg.addExpr(materializedRightExpr, false);

    if (compareLeftExprHolder.isOptional() && compareRightExprHolder.isOptional()) {
      // handle null == null
      cg.getEvalBlock()._if(compareLeftExprHolder.getIsSet().eq(JExpr.lit(0))
          .cand(compareRightExprHolder.getIsSet().eq(JExpr.lit(0))))
          ._then()
          ._return(JExpr.lit(0));
 
      // handle null == !null
      cg.getEvalBlock()._if(compareLeftExprHolder.getIsSet().eq(JExpr.lit(0))
          .cor(compareRightExprHolder.getIsSet().eq(JExpr.lit(0))))
          ._then()
          ._return(JExpr.lit(1));

    } else if (compareLeftExprHolder.isOptional()) {
      // handle null == required (null is less than any value)
      cg.getEvalBlock()._if(compareLeftExprHolder.getIsSet().eq(JExpr.lit(0)))
          ._then()
          ._return(JExpr.lit(-1));

    } else if (compareRightExprHolder.isOptional()) {
      // handle required == null (null is less than any value)
      cg.getEvalBlock()._if(compareRightExprHolder.getIsSet().eq(JExpr.lit(0)))
          ._then()
          ._return(JExpr.lit(1));
    }

    FunctionCall f = new FunctionCall(ComparatorFunctions.COMPARE_TO, ImmutableList.of((LogicalExpression) new HoldingContainerExpression(compareLeftExprHolder), (LogicalExpressionnew HoldingContainerExpression(compareRightExprHolder)), ExpressionPosition.UNKNOWN);
    cg.addExpr(new ReturnValueExpression(f, false), false);
//   
//    // equality
//    cg.getEvalBlock()._if(compareLeftExprHolder.getValue().eq(compareRightExprHolder.getValue()))
//                     ._then()
//                       ._return(JExpr.lit(0));
//    // less than
//    cg.getEvalBlock()._if(compareLeftExprHolder.getValue().lt(compareRightExprHolder.getValue()))
//                     ._then()
//                       ._return(JExpr.lit(-1));
//    // greater than
//    cg.getEvalBlock()._return(JExpr.lit(1));


    // generate compareNextLeftKey()
    ////////////////////////////////
    cg.setMappingSet(COMPARE_LEFT_MAPPING);
    cg.getSetupBlock().assign(JExpr._this().ref(incomingRecordBatch), JExpr._this().ref(incomingLeftRecordBatch));

    // int nextLeftIndex = leftIndex + 1;
    cg.getEvalBlock().decl(JType.parse(cg.getModel(), "int"), "nextLeftIndex", JExpr.direct("leftIndex").plus(JExpr.lit(1)));

    // check if the next key is in this batch
    cg.getEvalBlock()._if(joinStatus.invoke("isNextLeftPositionInCurrentBatch").eq(JExpr.lit(false)))
                     ._then()
                       ._return(JExpr.lit(-1));

    // generate VV read expressions
    CodeGenerator.HoldingContainer compareThisLeftExprHolder = cg.addExpr(materializedLeftExpr, false);
    cg.setMappingSet(COMPARE_NEXT_LEFT_MAPPING); // change mapping from 'leftIndex' to 'nextLeftIndex'
    CodeGenerator.HoldingContainer compareNextLeftExprHolder = cg.addExpr(materializedLeftExpr, false);

    if (compareThisLeftExprHolder.isOptional()) {
      // handle null == null
      cg.getEvalBlock()._if(compareThisLeftExprHolder.getIsSet().eq(JExpr.lit(0))
                            .cand(compareNextLeftExprHolder.getIsSet().eq(JExpr.lit(0))))
                       ._then()
                         ._return(JExpr.lit(0));
 
      // handle null == !null
      cg.getEvalBlock()._if(compareThisLeftExprHolder.getIsSet().eq(JExpr.lit(0))
                            .cor(compareNextLeftExprHolder.getIsSet().eq(JExpr.lit(0))))
                       ._then()
                         ._return(JExpr.lit(1));
    }

    // check value equality
    cg.getEvalBlock()._if(compareThisLeftExprHolder.getValue().eq(compareNextLeftExprHolder.getValue()))
                     ._then()
                       ._return(JExpr.lit(0));

    // no match if reached
    cg.getEvalBlock()._return(JExpr.lit(1));


    // generate copyLeft()
    //////////////////////
    cg.setMappingSet(COPY_LEFT_MAPPING);
    int vectorId = 0;
    for (VectorWrapper<?> vw : left) {
      JVar vvIn = cg.declareVectorValueSetupAndMember("incomingLeft",
                                                      new TypedFieldId(vw.getField().getType(), vectorId));
      JVar vvOut = cg.declareVectorValueSetupAndMember("outgoing",
                                                       new TypedFieldId(vw.getField().getType(),vectorId));
      // todo: check result of copyFromSafe and grow allocation
      cg.getEvalBlock().add(vvOut.invoke("copyFromSafe")
                                   .arg(COPY_LEFT_MAPPING.getValueReadIndex())
                                   .arg(COPY_LEFT_MAPPING.getValueWriteIndex())
                                   .arg(vvIn));
      ++vectorId;
    }
    cg.getEvalBlock()._return(JExpr.lit(true));

    // generate copyRight()
    ///////////////////////
    cg.setMappingSet(COPY_RIGHT_MAPPING);

    int rightVectorBase = vectorId;
    for (VectorWrapper<?> vw : right) {
      JVar vvIn = cg.declareVectorValueSetupAndMember("incomingRight",
                                                      new TypedFieldId(vw.getField().getType(), vectorId - rightVectorBase));
      JVar vvOut = cg.declareVectorValueSetupAndMember("outgoing",
                                                       new TypedFieldId(vw.getField().getType(),vectorId));
      // todo: check result of copyFromSafe and grow allocation
      cg.getEvalBlock().add(vvOut.invoke("copyFromSafe")
          .arg(COPY_RIGHT_MAPPING.getValueReadIndex())
          .arg(COPY_RIGHT_MAPPING.getValueWriteIndex())
          .arg(vvIn));
      ++vectorId;
    }
    cg.getEvalBlock()._return(JExpr.lit(true));

    JoinWorker w = context.getImplementationClass(cg);
    w.setupJoin(context, status, this.container);
    return w;
  }

  private void allocateBatch() {
    // allocate new batch space.
    container.clear();

    // add fields from both batches
    for (VectorWrapper<?> w : left) {
      ValueVector outgoingVector = TypeHelper.getNewVector(w.getField(), context.getAllocator());
      VectorAllocator.getAllocator(outgoingVector, (int) Math.ceil(w.getValueVector().getBufferSize() / left.getRecordCount())).alloc(left.getRecordCount() * 16);
      container.add(outgoingVector);
    }

    for (VectorWrapper<?> w : right) {
      ValueVector outgoingVector = TypeHelper.getNewVector(w.getField(), context.getAllocator());
      VectorAllocator.getAllocator(outgoingVector, (int) Math.ceil(w.getValueVector().getBufferSize() / right.getRecordCount())).alloc(right.getRecordCount() * 16);
      container.add(outgoingVector);
    }

    container.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    logger.debug("Built joined schema: {}", container.getSchema());
  }

}
TOP

Related Classes of org.apache.drill.exec.physical.impl.join.MergeJoinBatch

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.