/***********************************************************************************************************************
*
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.api.java.operators;
import java.security.InvalidParameterException;
import eu.stratosphere.api.common.InvalidProgramException;
import eu.stratosphere.api.common.functions.GenericCoGrouper;
import eu.stratosphere.api.common.functions.GenericMap;
import eu.stratosphere.api.common.operators.BinaryOperatorInformation;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.UnaryOperatorInformation;
import eu.stratosphere.api.common.operators.base.CoGroupOperatorBase;
import eu.stratosphere.api.common.operators.base.MapOperatorBase;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.functions.CoGroupFunction;
import eu.stratosphere.api.java.functions.KeySelector;
import eu.stratosphere.api.java.operators.translation.KeyExtractingMapper;
import eu.stratosphere.api.java.operators.translation.PlanUnwrappingCoGroupOperator;
import eu.stratosphere.api.java.operators.translation.TupleKeyExtractingMapper;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.api.java.typeutils.TupleTypeInfo;
import eu.stratosphere.api.java.typeutils.TypeExtractor;
import eu.stratosphere.types.TypeInformation;
/**
* A {@link DataSet} that is the result of a CoGroup transformation.
*
* @param <I1> The type of the first input DataSet of the CoGroup transformation.
* @param <I2> The type of the second input DataSet of the CoGroup transformation.
* @param <OUT> The type of the result of the CoGroup transformation.
*
* @see DataSet
*/
public class CoGroupOperator<I1, I2, OUT> extends TwoInputUdfOperator<I1, I2, OUT, CoGroupOperator<I1, I2, OUT>> {
private final CoGroupFunction<I1, I2, OUT> function;
private final Keys<I1> keys1;
private final Keys<I2> keys2;
protected CoGroupOperator(DataSet<I1> input1, DataSet<I2> input2,
Keys<I1> keys1, Keys<I2> keys2,
CoGroupFunction<I1, I2, OUT> function,
TypeInformation<OUT> returnType)
{
super(input1, input2, returnType);
this.function = function;
if (keys1 == null || keys2 == null) {
throw new NullPointerException();
}
this.keys1 = keys1;
this.keys2 = keys2;
extractSemanticAnnotationsFromUdf(function.getClass());
}
protected Keys<I1> getKeys1() {
return this.keys1;
}
protected Keys<I2> getKeys2() {
return this.keys2;
}
@Override
protected eu.stratosphere.api.common.operators.base.CoGroupOperatorBase<?, ?, OUT, ?> translateToDataFlow(Operator<I1> input1, Operator<I2> input2) {
String name = getName() != null ? getName() : function.getClass().getName();
if (keys1 instanceof Keys.SelectorFunctionKeys
&& keys2 instanceof Keys.SelectorFunctionKeys
&& keys1.areCompatibale(keys2)) {
@SuppressWarnings("unchecked")
Keys.SelectorFunctionKeys<I1, ?> selectorKeys1 = (Keys.SelectorFunctionKeys<I1, ?>) keys1;
@SuppressWarnings("unchecked")
Keys.SelectorFunctionKeys<I2, ?> selectorKeys2 = (Keys.SelectorFunctionKeys<I2, ?>) keys2;
PlanUnwrappingCoGroupOperator<I1, I2, OUT, ?> po =
translateSelectorFunctionCoGroup(selectorKeys1, selectorKeys2, function,
getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);
// set dop
po.setDegreeOfParallelism(this.getParallelism());
return po;
}
else if (keys1 instanceof Keys.FieldPositionKeys
&& keys2 instanceof Keys.FieldPositionKeys
&& keys1.areCompatibale(keys2)
) {
int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();
int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();
CoGroupOperatorBase<I1, I2, OUT, GenericCoGrouper<I1, I2, OUT>> po =
new CoGroupOperatorBase<I1, I2, OUT, GenericCoGrouper<I1, I2, OUT>>(
function, new BinaryOperatorInformation<I1, I2, OUT>(getInput1Type(), getInput2Type(), getResultType()),
logicalKeyPositions1, logicalKeyPositions2, name);
// set inputs
po.setFirstInput(input1);
po.setSecondInput(input2);
// set dop
po.setDegreeOfParallelism(this.getParallelism());
return po;
}
else if (keys1 instanceof Keys.FieldPositionKeys
&& keys2 instanceof Keys.SelectorFunctionKeys
&& keys1.areCompatibale(keys2)
) {
int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();
@SuppressWarnings("unchecked")
Keys.SelectorFunctionKeys<I2, ?> selectorKeys2 = (Keys.SelectorFunctionKeys<I2, ?>) keys2;
PlanUnwrappingCoGroupOperator<I1, I2, OUT, ?> po =
translateSelectorFunctionCoGroupRight(logicalKeyPositions1, selectorKeys2, function,
getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);
// set dop
po.setDegreeOfParallelism(this.getParallelism());
return po;
}
else if (keys1 instanceof Keys.SelectorFunctionKeys
&& keys2 instanceof Keys.FieldPositionKeys
&& keys1.areCompatibale(keys2)
) {
@SuppressWarnings("unchecked")
Keys.SelectorFunctionKeys<I1, ?> selectorKeys1 = (Keys.SelectorFunctionKeys<I1, ?>) keys1;
int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();
PlanUnwrappingCoGroupOperator<I1, I2, OUT, ?> po =
translateSelectorFunctionCoGroupLeft(selectorKeys1, logicalKeyPositions2, function,
getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);
// set dop
po.setDegreeOfParallelism(this.getParallelism());
return po;
}
else {
throw new UnsupportedOperationException("Unrecognized or incompatible key types.");
}
}
private static <I1, I2, K, OUT> PlanUnwrappingCoGroupOperator<I1, I2, OUT, K> translateSelectorFunctionCoGroup(
Keys.SelectorFunctionKeys<I1, ?> rawKeys1, Keys.SelectorFunctionKeys<I2, ?> rawKeys2,
CoGroupFunction<I1, I2, OUT> function,
TypeInformation<I1> inputType1, TypeInformation<I2> inputType2, TypeInformation<OUT> outputType, String name,
Operator<I1> input1, Operator<I2> input2)
{
@SuppressWarnings("unchecked")
final Keys.SelectorFunctionKeys<I1, K> keys1 = (Keys.SelectorFunctionKeys<I1, K>) rawKeys1;
@SuppressWarnings("unchecked")
final Keys.SelectorFunctionKeys<I2, K> keys2 = (Keys.SelectorFunctionKeys<I2, K>) rawKeys2;
final TypeInformation<Tuple2<K, I1>> typeInfoWithKey1 = new TupleTypeInfo<Tuple2<K, I1>>(keys1.getKeyType(), inputType1);
final TypeInformation<Tuple2<K, I2>> typeInfoWithKey2 = new TupleTypeInfo<Tuple2<K, I2>>(keys2.getKeyType(), inputType2);
final KeyExtractingMapper<I1, K> extractor1 = new KeyExtractingMapper<I1, K>(keys1.getKeyExtractor());
final KeyExtractingMapper<I2, K> extractor2 = new KeyExtractingMapper<I2, K>(keys2.getKeyExtractor());
final MapOperatorBase<I1, Tuple2<K, I1>, GenericMap<I1, Tuple2<K, I1>>> keyMapper1 =
new MapOperatorBase<I1, Tuple2<K, I1>, GenericMap<I1, Tuple2<K, I1>>>(extractor1, new UnaryOperatorInformation<I1, Tuple2<K, I1>>(inputType1, typeInfoWithKey1), "Key Extractor 1");
final MapOperatorBase<I2, Tuple2<K, I2>, GenericMap<I2, Tuple2<K, I2>>> keyMapper2 =
new MapOperatorBase<I2, Tuple2<K, I2>, GenericMap<I2, Tuple2<K, I2>>>(extractor2, new UnaryOperatorInformation<I2, Tuple2<K, I2>>(inputType2, typeInfoWithKey2), "Key Extractor 2");
final PlanUnwrappingCoGroupOperator<I1, I2, OUT, K> cogroup = new PlanUnwrappingCoGroupOperator<I1, I2, OUT, K>(function, keys1, keys2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);
cogroup.setFirstInput(keyMapper1);
cogroup.setSecondInput(keyMapper2);
keyMapper1.setInput(input1);
keyMapper2.setInput(input2);
// set dop
keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());
return cogroup;
}
private static <I1, I2, K, OUT> PlanUnwrappingCoGroupOperator<I1, I2, OUT, K> translateSelectorFunctionCoGroupRight(
int[] logicalKeyPositions1, Keys.SelectorFunctionKeys<I2, ?> rawKeys2,
CoGroupFunction<I1, I2, OUT> function,
TypeInformation<I1> inputType1, TypeInformation<I2> inputType2, TypeInformation<OUT> outputType, String name,
Operator<I1> input1, Operator<I2> input2)
{
if(!inputType1.isTupleType()) {
throw new InvalidParameterException("Should not happen.");
}
@SuppressWarnings("unchecked")
final Keys.SelectorFunctionKeys<I2, K> keys2 = (Keys.SelectorFunctionKeys<I2, K>) rawKeys2;
final TypeInformation<Tuple2<K, I1>> typeInfoWithKey1 = new TupleTypeInfo<Tuple2<K, I1>>(keys2.getKeyType(), inputType1); // assume same key, checked by Key.areCompatibale() before
final TypeInformation<Tuple2<K, I2>> typeInfoWithKey2 = new TupleTypeInfo<Tuple2<K, I2>>(keys2.getKeyType(), inputType2);
final TupleKeyExtractingMapper<I1, K> extractor1 = new TupleKeyExtractingMapper<I1, K>(logicalKeyPositions1[0]);
final KeyExtractingMapper<I2, K> extractor2 = new KeyExtractingMapper<I2, K>(keys2.getKeyExtractor());
final MapOperatorBase<I1, Tuple2<K, I1>, GenericMap<I1, Tuple2<K, I1>>> keyMapper1 =
new MapOperatorBase<I1, Tuple2<K, I1>, GenericMap<I1, Tuple2<K, I1>>>(extractor1, new UnaryOperatorInformation<I1, Tuple2<K, I1>>(inputType1, typeInfoWithKey1), "Key Extractor 1");
final MapOperatorBase<I2, Tuple2<K, I2>, GenericMap<I2, Tuple2<K, I2>>> keyMapper2 =
new MapOperatorBase<I2, Tuple2<K, I2>, GenericMap<I2, Tuple2<K, I2>>>(extractor2, new UnaryOperatorInformation<I2, Tuple2<K, I2>>(inputType2, typeInfoWithKey2), "Key Extractor 2");
final PlanUnwrappingCoGroupOperator<I1, I2, OUT, K> cogroup = new PlanUnwrappingCoGroupOperator<I1, I2, OUT, K>(function, logicalKeyPositions1, keys2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);
cogroup.setFirstInput(keyMapper1);
cogroup.setSecondInput(keyMapper2);
keyMapper1.setInput(input1);
keyMapper2.setInput(input2);
// set dop
keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());
return cogroup;
}
private static <I1, I2, K, OUT> PlanUnwrappingCoGroupOperator<I1, I2, OUT, K> translateSelectorFunctionCoGroupLeft(
Keys.SelectorFunctionKeys<I1, ?> rawKeys1, int[] logicalKeyPositions2,
CoGroupFunction<I1, I2, OUT> function,
TypeInformation<I1> inputType1, TypeInformation<I2> inputType2, TypeInformation<OUT> outputType, String name,
Operator<I1> input1, Operator<I2> input2)
{
if(!inputType2.isTupleType()) {
throw new InvalidParameterException("Should not happen.");
}
@SuppressWarnings("unchecked")
final Keys.SelectorFunctionKeys<I1, K> keys1 = (Keys.SelectorFunctionKeys<I1, K>) rawKeys1;
final TypeInformation<Tuple2<K, I1>> typeInfoWithKey1 = new TupleTypeInfo<Tuple2<K, I1>>(keys1.getKeyType(), inputType1); // assume same key, checked by Key.areCompatibale() before
final TypeInformation<Tuple2<K, I2>> typeInfoWithKey2 = new TupleTypeInfo<Tuple2<K, I2>>(keys1.getKeyType(), inputType2);
final KeyExtractingMapper<I1, K> extractor1 = new KeyExtractingMapper<I1, K>(keys1.getKeyExtractor());
final TupleKeyExtractingMapper<I2, K> extractor2 = new TupleKeyExtractingMapper<I2, K>(logicalKeyPositions2[0]);
final MapOperatorBase<I1, Tuple2<K, I1>, GenericMap<I1, Tuple2<K, I1>>> keyMapper1 =
new MapOperatorBase<I1, Tuple2<K, I1>, GenericMap<I1, Tuple2<K, I1>>>(extractor1, new UnaryOperatorInformation<I1, Tuple2<K, I1>>(inputType1, typeInfoWithKey1), "Key Extractor 1");
final MapOperatorBase<I2, Tuple2<K, I2>, GenericMap<I2, Tuple2<K, I2>>> keyMapper2 =
new MapOperatorBase<I2, Tuple2<K, I2>, GenericMap<I2, Tuple2<K, I2>>>(extractor2, new UnaryOperatorInformation<I2, Tuple2<K, I2>>(inputType2, typeInfoWithKey2), "Key Extractor 2");
final PlanUnwrappingCoGroupOperator<I1, I2, OUT, K> cogroup = new PlanUnwrappingCoGroupOperator<I1, I2, OUT, K>(function, keys1, logicalKeyPositions2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);
cogroup.setFirstInput(keyMapper1);
cogroup.setSecondInput(keyMapper2);
keyMapper1.setInput(input1);
keyMapper2.setInput(input2);
// set dop
keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());
return cogroup;
}
// --------------------------------------------------------------------------------------------
// Builder classes for incremental construction
// --------------------------------------------------------------------------------------------
/**
* Intermediate step of a CoGroup transformation. <br/>
* To continue the CoGroup transformation, select the grouping key of the first input {@link DataSet} by calling
* {@link CoGroupOperatorSets#where(int...)} or {@link CoGroupOperatorSets#where(KeySelector)}.
*
* @param <I1> The type of the first input DataSet of the CoGroup transformation.
* @param <I2> The type of the second input DataSet of the CoGroup transformation.
*/
public static final class CoGroupOperatorSets<I1, I2> {
private final DataSet<I1> input1;
private final DataSet<I2> input2;
public CoGroupOperatorSets(DataSet<I1> input1, DataSet<I2> input2) {
if (input1 == null || input2 == null) {
throw new NullPointerException();
}
this.input1 = input1;
this.input2 = input2;
}
/**
* Continues a CoGroup transformation. <br/>
* Defines the {@link Tuple} fields of the first co-grouped {@link DataSet} that should be used as grouping keys.<br/>
* <b>Note: Fields can only be selected as grouping keys on Tuple DataSets.</b><br/>
*
* @param fields The indexes of the Tuple fields of the first co-grouped DataSets that should be used as keys.
* @return An incomplete CoGroup transformation.
* Call {@link CoGroupOperatorSetsPredicate#equalTo()} to continue the CoGroup.
*
* @see Tuple
* @see DataSet
*/
public CoGroupOperatorSetsPredicate where(int... fields) {
return new CoGroupOperatorSetsPredicate(new Keys.FieldPositionKeys<I1>(fields, input1.getType()));
}
/**
* Continues a CoGroup transformation and defines a {@link KeySelector} function for the first co-grouped {@link DataSet}.</br>
* The KeySelector function is called for each element of the first DataSet and extracts a single
* key value on which the DataSet is grouped. </br>
*
* @param keySelector The KeySelector function which extracts the key values from the DataSet on which it is grouped.
* @return An incomplete CoGroup transformation.
* Call {@link CoGroupOperatorSetsPredicate#equalTo()} to continue the CoGroup.
*
* @see KeySelector
* @see DataSet
*/
public <K> CoGroupOperatorSetsPredicate where(KeySelector<I1, K> keyExtractor) {
return new CoGroupOperatorSetsPredicate(new Keys.SelectorFunctionKeys<I1, K>(keyExtractor, input1.getType()));
}
// ----------------------------------------------------------------------------------------
/**
* Intermediate step of a CoGroup transformation. <br/>
* To continue the CoGroup transformation, select the grouping key of the second input {@link DataSet} by calling
* {@link CoGroupOperatorSetsPredicate#equalTo(int...)} or {@link CoGroupOperatorSetsPredicate#equalTo(KeySelector)}.
*
*/
public final class CoGroupOperatorSetsPredicate {
private final Keys<I1> keys1;
private CoGroupOperatorSetsPredicate(Keys<I1> keys1) {
if (keys1 == null) {
throw new NullPointerException();
}
if (keys1.isEmpty()) {
throw new InvalidProgramException("The join keys must not be empty.");
}
this.keys1 = keys1;
}
/**
* Continues a CoGroup transformation and defines the {@link Tuple} fields of the second co-grouped
* {@link DataSet} that should be used as grouping keys.<br/>
* <b>Note: Fields can only be selected as grouping keys on Tuple DataSets.</b><br/>
*
* @param fields The indexes of the Tuple fields of the second co-grouped DataSet that should be used as keys.
* @return An incomplete CoGroup transformation.
* Call {@link CoGroupOperatorWithoutFunction#with(CoGroupFunction))} to finalize the CoGroup transformation.
*/
public CoGroupOperatorWithoutFunction equalTo(int... fields) {
return createCoGroupOperator(new Keys.FieldPositionKeys<I2>(fields, input2.getType()));
}
/**
* Continues a CoGroup transformation and defines a {@link KeySelector} function for the second co-grouped {@link DataSet}.</br>
* The KeySelector function is called for each element of the second DataSet and extracts a single
* key value on which the DataSet is grouped. </br>
*
* @param keySelector The KeySelector function which extracts the key values from the second DataSet on which it is grouped.
* @return An incomplete CoGroup transformation.
* Call {@link CoGroupOperatorWithoutFunction#with(CoGroupFunction))} to finalize the CoGroup transformation.
*/
public <K> CoGroupOperatorWithoutFunction equalTo(KeySelector<I2, K> keyExtractor) {
return createCoGroupOperator(new Keys.SelectorFunctionKeys<I2, K>(keyExtractor, input2.getType()));
}
/**
* Intermediate step of a CoGroup transformation. <br/>
* To continue the CoGroup transformation, provide a {@link CoGroupFunction} by calling
* {@link CoGroupOperatorWithoutFunction#with(CoGroupFunction))}.
*
*/
private CoGroupOperatorWithoutFunction createCoGroupOperator(Keys<I2> keys2) {
if (keys2 == null) {
throw new NullPointerException();
}
if (keys2.isEmpty()) {
throw new InvalidProgramException("The join keys must not be empty.");
}
if (!keys1.areCompatibale(keys2)) {
throw new InvalidProgramException("The pair of join keys are not compatible with each other.");
}
return new CoGroupOperatorWithoutFunction(keys2);
}
public final class CoGroupOperatorWithoutFunction {
private final Keys<I2> keys2;
private CoGroupOperatorWithoutFunction(Keys<I2> keys2) {
if (keys2 == null) {
throw new NullPointerException();
}
if (keys2.isEmpty()) {
throw new InvalidProgramException("The join keys must not be empty.");
}
this.keys2 = keys2;
}
/**
* Finalizes a CoGroup transformation by applying a {@link CoGroupFunction} to groups of elements with identical keys.<br/>
* Each CoGroupFunction call returns an arbitrary number of keys.
*
* @param function The CoGroupFunction that is called for all groups of elements with identical keys.
* @return An CoGroupOperator that represents the co-grouped result DataSet.
*
* @see CoGroupFunction
* @see DataSet
*/
public <R> CoGroupOperator<I1, I2, R> with(CoGroupFunction<I1, I2, R> function) {
if (function == null) {
throw new NullPointerException("CoGroup function must not be null.");
}
TypeInformation<R> returnType = TypeExtractor.getCoGroupReturnTypes(function, input1.getType(), input2.getType());
return new CoGroupOperator<I1, I2, R>(input1, input2, keys1, keys2, function, returnType);
}
}
}
}
}