Package eu.stratosphere.api.java.operators

Source Code of eu.stratosphere.api.java.operators.CoGroupOperatorSets$CoGroupOperatorSetsPredicate$CoGroupOperatorWithoutFunction

/***********************************************************************************************************************
*
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.api.java.operators;

import java.security.InvalidParameterException;

import eu.stratosphere.api.common.InvalidProgramException;
import eu.stratosphere.api.common.functions.GenericCoGrouper;
import eu.stratosphere.api.common.functions.GenericMap;
import eu.stratosphere.api.common.operators.BinaryOperatorInformation;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.UnaryOperatorInformation;
import eu.stratosphere.api.common.operators.base.CoGroupOperatorBase;
import eu.stratosphere.api.common.operators.base.MapOperatorBase;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.functions.CoGroupFunction;
import eu.stratosphere.api.java.functions.KeySelector;
import eu.stratosphere.api.java.operators.translation.KeyExtractingMapper;
import eu.stratosphere.api.java.operators.translation.PlanUnwrappingCoGroupOperator;
import eu.stratosphere.api.java.operators.translation.TupleKeyExtractingMapper;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.api.java.typeutils.TupleTypeInfo;
import eu.stratosphere.api.java.typeutils.TypeExtractor;
import eu.stratosphere.types.TypeInformation;

/**
* A {@link DataSet} that is the result of a CoGroup transformation.
*
* @param <I1> The type of the first input DataSet of the CoGroup transformation.
* @param <I2> The type of the second input DataSet of the CoGroup transformation.
* @param <OUT> The type of the result of the CoGroup transformation.
*
* @see DataSet
*/
public class CoGroupOperator<I1, I2, OUT> extends TwoInputUdfOperator<I1, I2, OUT, CoGroupOperator<I1, I2, OUT>> {

  private final CoGroupFunction<I1, I2, OUT> function;

  private final Keys<I1> keys1;
  private final Keys<I2> keys2;


  protected CoGroupOperator(DataSet<I1> input1, DataSet<I2> input2,
              Keys<I1> keys1, Keys<I2> keys2,
              CoGroupFunction<I1, I2, OUT> function,
              TypeInformation<OUT> returnType)
  {
    super(input1, input2, returnType);

    this.function = function;

    if (keys1 == null || keys2 == null) {
      throw new NullPointerException();
    }

    this.keys1 = keys1;
    this.keys2 = keys2;
   
    extractSemanticAnnotationsFromUdf(function.getClass());
  }

  protected Keys<I1> getKeys1() {
    return this.keys1;
  }

  protected Keys<I2> getKeys2() {
    return this.keys2;
  }

  @Override
  protected eu.stratosphere.api.common.operators.base.CoGroupOperatorBase<?, ?, OUT, ?> translateToDataFlow(Operator<I1> input1, Operator<I2> input2) {
   
    String name = getName() != null ? getName() : function.getClass().getName();

    if (keys1 instanceof Keys.SelectorFunctionKeys
        && keys2 instanceof Keys.SelectorFunctionKeys
        && keys1.areCompatibale(keys2)) {

      @SuppressWarnings("unchecked")
      Keys.SelectorFunctionKeys<I1, ?> selectorKeys1 = (Keys.SelectorFunctionKeys<I1, ?>) keys1;
      @SuppressWarnings("unchecked")
      Keys.SelectorFunctionKeys<I2, ?> selectorKeys2 = (Keys.SelectorFunctionKeys<I2, ?>) keys2;

      PlanUnwrappingCoGroupOperator<I1, I2, OUT, ?> po =
          translateSelectorFunctionCoGroup(selectorKeys1, selectorKeys2, function,
          getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);

      // set dop
      po.setDegreeOfParallelism(this.getParallelism());

      return po;

    }
    else if (keys1 instanceof Keys.FieldPositionKeys
        && keys2 instanceof Keys.FieldPositionKeys
        && keys1.areCompatibale(keys2)
      ) {

      int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();
      int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();
     
      CoGroupOperatorBase<I1, I2, OUT, GenericCoGrouper<I1, I2, OUT>> po =
          new CoGroupOperatorBase<I1, I2, OUT, GenericCoGrouper<I1, I2, OUT>>(
              function, new BinaryOperatorInformation<I1, I2, OUT>(getInput1Type(), getInput2Type(), getResultType()),
              logicalKeyPositions1, logicalKeyPositions2, name);
     
      // set inputs
      po.setFirstInput(input1);
      po.setSecondInput(input2);

      // set dop
      po.setDegreeOfParallelism(this.getParallelism());

      return po;

    }
    else if (keys1 instanceof Keys.FieldPositionKeys
        && keys2 instanceof Keys.SelectorFunctionKeys
        && keys1.areCompatibale(keys2)
      ) {

      int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();

      @SuppressWarnings("unchecked")
      Keys.SelectorFunctionKeys<I2, ?> selectorKeys2 = (Keys.SelectorFunctionKeys<I2, ?>) keys2;

      PlanUnwrappingCoGroupOperator<I1, I2, OUT, ?> po =
          translateSelectorFunctionCoGroupRight(logicalKeyPositions1, selectorKeys2, function,
          getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);

      // set dop
      po.setDegreeOfParallelism(this.getParallelism());

      return po;
    }
    else if (keys1 instanceof Keys.SelectorFunctionKeys
        && keys2 instanceof Keys.FieldPositionKeys
        && keys1.areCompatibale(keys2)
      ) {

      @SuppressWarnings("unchecked")
      Keys.SelectorFunctionKeys<I1, ?> selectorKeys1 = (Keys.SelectorFunctionKeys<I1, ?>) keys1;

      int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();

      PlanUnwrappingCoGroupOperator<I1, I2, OUT, ?> po =
          translateSelectorFunctionCoGroupLeft(selectorKeys1, logicalKeyPositions2, function,
          getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);

      // set dop
      po.setDegreeOfParallelism(this.getParallelism());

      return po;
    }
    else {
      throw new UnsupportedOperationException("Unrecognized or incompatible key types.");
    }
  }


  private static <I1, I2, K, OUT> PlanUnwrappingCoGroupOperator<I1, I2, OUT, K> translateSelectorFunctionCoGroup(
      Keys.SelectorFunctionKeys<I1, ?> rawKeys1, Keys.SelectorFunctionKeys<I2, ?> rawKeys2,
      CoGroupFunction<I1, I2, OUT> function,
      TypeInformation<I1> inputType1, TypeInformation<I2> inputType2, TypeInformation<OUT> outputType, String name,
      Operator<I1> input1, Operator<I2> input2)
  {
    @SuppressWarnings("unchecked")
    final Keys.SelectorFunctionKeys<I1, K> keys1 = (Keys.SelectorFunctionKeys<I1, K>) rawKeys1;
    @SuppressWarnings("unchecked")
    final Keys.SelectorFunctionKeys<I2, K> keys2 = (Keys.SelectorFunctionKeys<I2, K>) rawKeys2;

    final TypeInformation<Tuple2<K, I1>> typeInfoWithKey1 = new TupleTypeInfo<Tuple2<K, I1>>(keys1.getKeyType(), inputType1);
    final TypeInformation<Tuple2<K, I2>> typeInfoWithKey2 = new TupleTypeInfo<Tuple2<K, I2>>(keys2.getKeyType(), inputType2);

    final KeyExtractingMapper<I1, K> extractor1 = new KeyExtractingMapper<I1, K>(keys1.getKeyExtractor());
    final KeyExtractingMapper<I2, K> extractor2 = new KeyExtractingMapper<I2, K>(keys2.getKeyExtractor());
   
    final MapOperatorBase<I1, Tuple2<K, I1>, GenericMap<I1, Tuple2<K, I1>>> keyMapper1 =
        new MapOperatorBase<I1, Tuple2<K, I1>, GenericMap<I1, Tuple2<K, I1>>>(extractor1, new UnaryOperatorInformation<I1, Tuple2<K, I1>>(inputType1, typeInfoWithKey1), "Key Extractor 1");
    final MapOperatorBase<I2, Tuple2<K, I2>, GenericMap<I2, Tuple2<K, I2>>> keyMapper2 =
        new MapOperatorBase<I2, Tuple2<K, I2>, GenericMap<I2, Tuple2<K, I2>>>(extractor2, new UnaryOperatorInformation<I2, Tuple2<K, I2>>(inputType2, typeInfoWithKey2), "Key Extractor 2");
    final PlanUnwrappingCoGroupOperator<I1, I2, OUT, K> cogroup = new PlanUnwrappingCoGroupOperator<I1, I2, OUT, K>(function, keys1, keys2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);

    cogroup.setFirstInput(keyMapper1);
    cogroup.setSecondInput(keyMapper2);

    keyMapper1.setInput(input1);
    keyMapper2.setInput(input2);
    // set dop
    keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
    keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());

    return cogroup;
  }

  private static <I1, I2, K, OUT> PlanUnwrappingCoGroupOperator<I1, I2, OUT, K> translateSelectorFunctionCoGroupRight(
      int[] logicalKeyPositions1, Keys.SelectorFunctionKeys<I2, ?> rawKeys2,
      CoGroupFunction<I1, I2, OUT> function,
      TypeInformation<I1> inputType1, TypeInformation<I2> inputType2, TypeInformation<OUT> outputType, String name,
      Operator<I1> input1, Operator<I2> input2)
  {
    if(!inputType1.isTupleType()) {
      throw new InvalidParameterException("Should not happen.");
    }

    @SuppressWarnings("unchecked")
    final Keys.SelectorFunctionKeys<I2, K> keys2 = (Keys.SelectorFunctionKeys<I2, K>) rawKeys2;

    final TypeInformation<Tuple2<K, I1>> typeInfoWithKey1 = new TupleTypeInfo<Tuple2<K, I1>>(keys2.getKeyType(), inputType1); // assume same key, checked by Key.areCompatibale() before
    final TypeInformation<Tuple2<K, I2>> typeInfoWithKey2 = new TupleTypeInfo<Tuple2<K, I2>>(keys2.getKeyType(), inputType2);

    final TupleKeyExtractingMapper<I1, K> extractor1 = new TupleKeyExtractingMapper<I1, K>(logicalKeyPositions1[0]);
    final KeyExtractingMapper<I2, K> extractor2 = new KeyExtractingMapper<I2, K>(keys2.getKeyExtractor());

    final MapOperatorBase<I1, Tuple2<K, I1>, GenericMap<I1, Tuple2<K, I1>>> keyMapper1 =
        new MapOperatorBase<I1, Tuple2<K, I1>, GenericMap<I1, Tuple2<K, I1>>>(extractor1, new UnaryOperatorInformation<I1, Tuple2<K, I1>>(inputType1, typeInfoWithKey1), "Key Extractor 1");
    final MapOperatorBase<I2, Tuple2<K, I2>, GenericMap<I2, Tuple2<K, I2>>> keyMapper2 =
        new MapOperatorBase<I2, Tuple2<K, I2>, GenericMap<I2, Tuple2<K, I2>>>(extractor2, new UnaryOperatorInformation<I2, Tuple2<K, I2>>(inputType2, typeInfoWithKey2), "Key Extractor 2");
   
    final PlanUnwrappingCoGroupOperator<I1, I2, OUT, K> cogroup = new PlanUnwrappingCoGroupOperator<I1, I2, OUT, K>(function, logicalKeyPositions1, keys2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);

    cogroup.setFirstInput(keyMapper1);
    cogroup.setSecondInput(keyMapper2);

    keyMapper1.setInput(input1);
    keyMapper2.setInput(input2);
    // set dop
    keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
    keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());

    return cogroup;
  }

  private static <I1, I2, K, OUT> PlanUnwrappingCoGroupOperator<I1, I2, OUT, K> translateSelectorFunctionCoGroupLeft(
      Keys.SelectorFunctionKeys<I1, ?> rawKeys1, int[] logicalKeyPositions2,
      CoGroupFunction<I1, I2, OUT> function,
      TypeInformation<I1> inputType1, TypeInformation<I2> inputType2, TypeInformation<OUT> outputType, String name,
      Operator<I1> input1, Operator<I2> input2)
  {
    if(!inputType2.isTupleType()) {
      throw new InvalidParameterException("Should not happen.");
    }

    @SuppressWarnings("unchecked")
    final Keys.SelectorFunctionKeys<I1, K> keys1 = (Keys.SelectorFunctionKeys<I1, K>) rawKeys1;

    final TypeInformation<Tuple2<K, I1>> typeInfoWithKey1 = new TupleTypeInfo<Tuple2<K, I1>>(keys1.getKeyType(), inputType1); // assume same key, checked by Key.areCompatibale() before
    final TypeInformation<Tuple2<K, I2>> typeInfoWithKey2 = new TupleTypeInfo<Tuple2<K, I2>>(keys1.getKeyType(), inputType2);

    final KeyExtractingMapper<I1, K> extractor1 = new KeyExtractingMapper<I1, K>(keys1.getKeyExtractor());
    final TupleKeyExtractingMapper<I2, K> extractor2 = new TupleKeyExtractingMapper<I2, K>(logicalKeyPositions2[0]);

    final MapOperatorBase<I1, Tuple2<K, I1>, GenericMap<I1, Tuple2<K, I1>>> keyMapper1 =
        new MapOperatorBase<I1, Tuple2<K, I1>, GenericMap<I1, Tuple2<K, I1>>>(extractor1, new UnaryOperatorInformation<I1, Tuple2<K, I1>>(inputType1, typeInfoWithKey1), "Key Extractor 1");
    final MapOperatorBase<I2, Tuple2<K, I2>, GenericMap<I2, Tuple2<K, I2>>> keyMapper2 =
        new MapOperatorBase<I2, Tuple2<K, I2>, GenericMap<I2, Tuple2<K, I2>>>(extractor2, new UnaryOperatorInformation<I2, Tuple2<K, I2>>(inputType2, typeInfoWithKey2), "Key Extractor 2");
   
    final PlanUnwrappingCoGroupOperator<I1, I2, OUT, K> cogroup = new PlanUnwrappingCoGroupOperator<I1, I2, OUT, K>(function, keys1, logicalKeyPositions2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);

    cogroup.setFirstInput(keyMapper1);
    cogroup.setSecondInput(keyMapper2);

    keyMapper1.setInput(input1);
    keyMapper2.setInput(input2);
    // set dop
    keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
    keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());

    return cogroup;
  }

  // --------------------------------------------------------------------------------------------
  // Builder classes for incremental construction
  // --------------------------------------------------------------------------------------------

  /**
   * Intermediate step of a CoGroup transformation. <br/>
   * To continue the CoGroup transformation, select the grouping key of the first input {@link DataSet} by calling
   * {@link CoGroupOperatorSets#where(int...)} or {@link CoGroupOperatorSets#where(KeySelector)}.
   *
   * @param <I1> The type of the first input DataSet of the CoGroup transformation.
   * @param <I2> The type of the second input DataSet of the CoGroup transformation.
   */
  public static final class CoGroupOperatorSets<I1, I2> {

    private final DataSet<I1> input1;
    private final DataSet<I2> input2;

    public CoGroupOperatorSets(DataSet<I1> input1, DataSet<I2> input2) {
      if (input1 == null || input2 == null) {
        throw new NullPointerException();
      }

      this.input1 = input1;
      this.input2 = input2;
    }

    /**
     * Continues a CoGroup transformation. <br/>
     * Defines the {@link Tuple} fields of the first co-grouped {@link DataSet} that should be used as grouping keys.<br/>
     * <b>Note: Fields can only be selected as grouping keys on Tuple DataSets.</b><br/>
     *
     * @param fields The indexes of the Tuple fields of the first co-grouped DataSets that should be used as keys.
     * @return An incomplete CoGroup transformation.
     *           Call {@link CoGroupOperatorSetsPredicate#equalTo()} to continue the CoGroup.
     *
     * @see Tuple
     * @see DataSet
     */
    public CoGroupOperatorSetsPredicate where(int... fields) {
      return new CoGroupOperatorSetsPredicate(new Keys.FieldPositionKeys<I1>(fields, input1.getType()));
    }

    /**
     * Continues a CoGroup transformation and defines a {@link KeySelector} function for the first co-grouped {@link DataSet}.</br>
     * The KeySelector function is called for each element of the first DataSet and extracts a single
     * key value on which the DataSet is grouped. </br>
     *
     * @param keySelector The KeySelector function which extracts the key values from the DataSet on which it is grouped.
     * @return An incomplete CoGroup transformation.
     *           Call {@link CoGroupOperatorSetsPredicate#equalTo()} to continue the CoGroup.
     *
     * @see KeySelector
     * @see DataSet
     */
    public <K> CoGroupOperatorSetsPredicate where(KeySelector<I1, K> keyExtractor) {
      return new CoGroupOperatorSetsPredicate(new Keys.SelectorFunctionKeys<I1, K>(keyExtractor, input1.getType()));
    }

    // ----------------------------------------------------------------------------------------

    /**
     * Intermediate step of a CoGroup transformation. <br/>
     * To continue the CoGroup transformation, select the grouping key of the second input {@link DataSet} by calling
     * {@link CoGroupOperatorSetsPredicate#equalTo(int...)} or {@link CoGroupOperatorSetsPredicate#equalTo(KeySelector)}.
     *
     */
    public final class CoGroupOperatorSetsPredicate {

      private final Keys<I1> keys1;

      private CoGroupOperatorSetsPredicate(Keys<I1> keys1) {
        if (keys1 == null) {
          throw new NullPointerException();
        }

        if (keys1.isEmpty()) {
          throw new InvalidProgramException("The join keys must not be empty.");
        }

        this.keys1 = keys1;
      }

      /**
       * Continues a CoGroup transformation and defines the {@link Tuple} fields of the second co-grouped
       * {@link DataSet} that should be used as grouping keys.<br/>
       * <b>Note: Fields can only be selected as grouping keys on Tuple DataSets.</b><br/>
       *
       * @param fields The indexes of the Tuple fields of the second co-grouped DataSet that should be used as keys.
       * @return An incomplete CoGroup transformation.
       *           Call {@link CoGroupOperatorWithoutFunction#with(CoGroupFunction))} to finalize the CoGroup transformation.
       */
      public CoGroupOperatorWithoutFunction equalTo(int... fields) {
        return createCoGroupOperator(new Keys.FieldPositionKeys<I2>(fields, input2.getType()));

      }

      /**
       * Continues a CoGroup transformation and defines a {@link KeySelector} function for the second co-grouped {@link DataSet}.</br>
       * The KeySelector function is called for each element of the second DataSet and extracts a single
       * key value on which the DataSet is grouped. </br>
       *
       * @param keySelector The KeySelector function which extracts the key values from the second DataSet on which it is grouped.
       * @return An incomplete CoGroup transformation.
       *           Call {@link CoGroupOperatorWithoutFunction#with(CoGroupFunction))} to finalize the CoGroup transformation.
       */
      public <K> CoGroupOperatorWithoutFunction equalTo(KeySelector<I2, K> keyExtractor) {
        return createCoGroupOperator(new Keys.SelectorFunctionKeys<I2, K>(keyExtractor, input2.getType()));
      }

      /**
       * Intermediate step of a CoGroup transformation. <br/>
       * To continue the CoGroup transformation, provide a {@link CoGroupFunction} by calling
       * {@link CoGroupOperatorWithoutFunction#with(CoGroupFunction))}.
       *
       */
      private CoGroupOperatorWithoutFunction createCoGroupOperator(Keys<I2> keys2) {
        if (keys2 == null) {
          throw new NullPointerException();
        }

        if (keys2.isEmpty()) {
          throw new InvalidProgramException("The join keys must not be empty.");
        }

        if (!keys1.areCompatibale(keys2)) {
          throw new InvalidProgramException("The pair of join keys are not compatible with each other.");
        }

        return new CoGroupOperatorWithoutFunction(keys2);
      }

      public final class CoGroupOperatorWithoutFunction {
        private final Keys<I2> keys2;

        private CoGroupOperatorWithoutFunction(Keys<I2> keys2) {
          if (keys2 == null) {
            throw new NullPointerException();
          }

          if (keys2.isEmpty()) {
            throw new InvalidProgramException("The join keys must not be empty.");
          }

          this.keys2 = keys2;
        }

        /**
         * Finalizes a CoGroup transformation by applying a {@link CoGroupFunction} to groups of elements with identical keys.<br/>
         * Each CoGroupFunction call returns an arbitrary number of keys.
         *
         * @param function The CoGroupFunction that is called for all groups of elements with identical keys.
         * @return An CoGroupOperator that represents the co-grouped result DataSet.
         *
         * @see CoGroupFunction
         * @see DataSet
         */
        public <R> CoGroupOperator<I1, I2, R> with(CoGroupFunction<I1, I2, R> function) {
          if (function == null) {
            throw new NullPointerException("CoGroup function must not be null.");
          }
          TypeInformation<R> returnType = TypeExtractor.getCoGroupReturnTypes(function, input1.getType(), input2.getType());
          return new CoGroupOperator<I1, I2, R>(input1, input2, keys1, keys2, function, returnType);
        }
      }
    }
  }
}
TOP

Related Classes of eu.stratosphere.api.java.operators.CoGroupOperatorSets$CoGroupOperatorSetsPredicate$CoGroupOperatorWithoutFunction

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.