Package cascading.pipe

Source Code of cascading.pipe.Splice

/*
* Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package cascading.pipe;

import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import cascading.flow.FlowElement;
import cascading.flow.planner.DeclaresResults;
import cascading.flow.planner.Scope;
import cascading.pipe.joiner.BufferJoin;
import cascading.pipe.joiner.InnerJoin;
import cascading.pipe.joiner.Joiner;
import cascading.tuple.Fields;
import cascading.tuple.FieldsResolverException;
import cascading.tuple.TupleException;
import cascading.tuple.coerce.Coercions;
import cascading.tuple.type.CoercibleType;
import cascading.util.Util;

import static java.util.Arrays.asList;

/**
* The base class for {@link GroupBy}, {@link CoGroup}, {@link Merge}, and {@link HashJoin}. This class should not be used directly.
*
* @see GroupBy
* @see CoGroup
* @see Merge
* @see HashJoin
*/
public class Splice extends Pipe
  {
  static enum Kind
    {
      GroupBy, CoGroup, Merge, Join
    }

  private Kind kind;
  /** Field spliceName */
  private String spliceName;
  /** Field pipes */
  private final List<Pipe> pipes = new ArrayList<Pipe>();
  /** Field groupFieldsMap */
  protected final Map<String, Fields> keyFieldsMap = new LinkedHashMap<String, Fields>(); // keep order
  /** Field sortFieldsMap */
  protected Map<String, Fields> sortFieldsMap = new LinkedHashMap<String, Fields>(); // keep order
  /** Field reverseOrder */
  private boolean reverseOrder = false;
  /** Field declaredFields */
  protected Fields declaredFields;
  /** Field resultGroupFields */
  protected Fields resultGroupFields;
  /** Field repeat */
  private int numSelfJoins = 0;
  /** Field coGrouper */
  private Joiner joiner;

  /** Field pipePos */
  private transient Map<String, Integer> pipePos;

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param lhs            of type Pipe
   * @param lhsGroupFields of type Fields
   * @param rhs            of type Pipe
   * @param rhsGroupFields of type Fields
   * @param declaredFields of type Fields
   */
  protected Splice( Pipe lhs, Fields lhsGroupFields, Pipe rhs, Fields rhsGroupFields, Fields declaredFields )
    {
    this( lhs, lhsGroupFields, rhs, rhsGroupFields, declaredFields, null, null );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param lhs               of type Pipe
   * @param lhsGroupFields    of type Fields
   * @param rhs               of type Pipe
   * @param rhsGroupFields    of type Fields
   * @param declaredFields    of type Fields
   * @param resultGroupFields of type Fields
   */
  protected Splice( Pipe lhs, Fields lhsGroupFields, Pipe rhs, Fields rhsGroupFields, Fields declaredFields, Fields resultGroupFields )
    {
    this( lhs, lhsGroupFields, rhs, rhsGroupFields, declaredFields, resultGroupFields, null );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param lhs            of type Pipe
   * @param lhsGroupFields of type Fields
   * @param rhs            of type Pipe
   * @param rhsGroupFields of type Fields
   * @param declaredFields of type Fields
   * @param joiner         of type CoGrouper
   */
  protected Splice( Pipe lhs, Fields lhsGroupFields, Pipe rhs, Fields rhsGroupFields, Fields declaredFields, Joiner joiner )
    {
    this( Pipe.pipes( lhs, rhs ), Fields.fields( lhsGroupFields, rhsGroupFields ), declaredFields, joiner );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param lhs               of type Pipe
   * @param lhsGroupFields    of type Fields
   * @param rhs               of type Pipe
   * @param rhsGroupFields    of type Fields
   * @param declaredFields    of type Fields
   * @param resultGroupFields of type Fields
   * @param joiner            of type Joiner
   */
  protected Splice( Pipe lhs, Fields lhsGroupFields, Pipe rhs, Fields rhsGroupFields, Fields declaredFields, Fields resultGroupFields, Joiner joiner )
    {
    this( Pipe.pipes( lhs, rhs ), Fields.fields( lhsGroupFields, rhsGroupFields ), declaredFields, resultGroupFields, joiner );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param lhs            of type Pipe
   * @param lhsGroupFields of type Fields
   * @param rhs            of type Pipe
   * @param rhsGroupFields of type Fields
   * @param joiner         of type CoGrouper
   */
  protected Splice( Pipe lhs, Fields lhsGroupFields, Pipe rhs, Fields rhsGroupFields, Joiner joiner )
    {
    this( lhs, lhsGroupFields, rhs, rhsGroupFields, null, joiner );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param lhs            of type Pipe
   * @param lhsGroupFields of type Fields
   * @param rhs            of type Pipe
   * @param rhsGroupFields of type Fields
   */
  protected Splice( Pipe lhs, Fields lhsGroupFields, Pipe rhs, Fields rhsGroupFields )
    {
    this( Pipe.pipes( lhs, rhs ), Fields.fields( lhsGroupFields, rhsGroupFields ) );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipes of type Pipe...
   */
  protected Splice( Pipe... pipes )
    {
    this( pipes, (Fields[]) null );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipes       of type Pipe[]
   * @param groupFields of type Fields[]
   */
  protected Splice( Pipe[] pipes, Fields[] groupFields )
    {
    this( null, pipes, groupFields, null, null );
    }


  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName  of type String
   * @param pipes       of type Pipe[]
   * @param groupFields of type Fields[]
   */
  protected Splice( String spliceName, Pipe[] pipes, Fields[] groupFields )
    {
    this( spliceName, pipes, groupFields, null, null );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName     of type String
   * @param pipes          of type Pipe[]
   * @param groupFields    of type Fields[]
   * @param declaredFields of type Fields
   */
  protected Splice( String spliceName, Pipe[] pipes, Fields[] groupFields, Fields declaredFields )
    {
    this( spliceName, pipes, groupFields, declaredFields, null );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName        of type String
   * @param pipes             of type Pipe[]
   * @param groupFields       of type Fields[]
   * @param declaredFields    of type Fields
   * @param resultGroupFields of type Fields
   */
  protected Splice( String spliceName, Pipe[] pipes, Fields[] groupFields, Fields declaredFields, Fields resultGroupFields )
    {
    this( spliceName, pipes, groupFields, declaredFields, resultGroupFields, null );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipes          of type Pipe[]
   * @param groupFields    of type Fields[]
   * @param declaredFields of type Fields
   * @param joiner         of type CoGrouper
   */
  protected Splice( Pipe[] pipes, Fields[] groupFields, Fields declaredFields, Joiner joiner )
    {
    this( null, pipes, groupFields, declaredFields, null, joiner );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipes             of type Pipe[]
   * @param groupFields       of type Fields[]
   * @param declaredFields    of type Fields
   * @param resultGroupFields of type Fields
   * @param joiner            of type Joiner
   */
  protected Splice( Pipe[] pipes, Fields[] groupFields, Fields declaredFields, Fields resultGroupFields, Joiner joiner )
    {
    this( null, pipes, groupFields, declaredFields, resultGroupFields, joiner );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName     of type String
   * @param pipes          of type Pipe[]
   * @param groupFields    of type Fields[]
   * @param declaredFields of type Fields
   * @param joiner         of type CoGrouper
   */
  protected Splice( String spliceName, Pipe[] pipes, Fields[] groupFields, Fields declaredFields, Fields resultGroupFields, Joiner joiner )
    {
    setKind();
    this.spliceName = spliceName;

    int uniques = new HashSet<Pipe>( asList( Pipe.resolvePreviousAll( pipes ) ) ).size();

    if( pipes.length > 1 && uniques == 1 )
      {
      if( new HashSet<Fields>( asList( groupFields ) ).size() != 1 )
        throw new IllegalArgumentException( "all groupFields must be identical" );

      addPipe( pipes[ 0 ] );
      this.numSelfJoins = pipes.length - 1;
      this.keyFieldsMap.put( pipes[ 0 ].getName(), groupFields[ 0 ] );

      if( resultGroupFields != null && groupFields[ 0 ].size() * pipes.length != resultGroupFields.size() )
        throw new IllegalArgumentException( "resultGroupFields and cogroup joined fields must be same size" );
      }
    else
      {
      int last = -1;
      for( int i = 0; i < pipes.length; i++ )
        {
        addPipe( pipes[ i ] );

        if( groupFields == null || groupFields.length == 0 )
          {
          addGroupFields( pipes[ i ], Fields.FIRST );
          continue;
          }

        if( last != -1 && last != groupFields[ i ].size() )
          throw new IllegalArgumentException( "all groupFields must be same size" );

        last = groupFields[ i ].size();
        addGroupFields( pipes[ i ], groupFields[ i ] );
        }

      if( resultGroupFields != null && last * pipes.length != resultGroupFields.size() )
        throw new IllegalArgumentException( "resultGroupFields and cogroup resulting joined fields must be same size" );
      }

    this.declaredFields = declaredFields;
    this.resultGroupFields = resultGroupFields;
    this.joiner = joiner;

    verifyCoGrouper();
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName     of type String
   * @param lhs            of type Pipe
   * @param lhsGroupFields of type Fields
   * @param rhs            of type Pipe
   * @param rhsGroupFields of type Fields
   * @param declaredFields of type Fields
   */
  protected Splice( String spliceName, Pipe lhs, Fields lhsGroupFields, Pipe rhs, Fields rhsGroupFields, Fields declaredFields )
    {
    this( lhs, lhsGroupFields, rhs, rhsGroupFields, declaredFields );
    this.spliceName = spliceName;
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName        of type String
   * @param lhs               of type Pipe
   * @param lhsGroupFields    of type Fields
   * @param rhs               of type Pipe
   * @param rhsGroupFields    of type Fields
   * @param declaredFields    of type Fields
   * @param resultGroupFields of type Fields
   */
  protected Splice( String spliceName, Pipe lhs, Fields lhsGroupFields, Pipe rhs, Fields rhsGroupFields, Fields declaredFields, Fields resultGroupFields )
    {
    this( lhs, lhsGroupFields, rhs, rhsGroupFields, declaredFields, resultGroupFields );
    this.spliceName = spliceName;
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName     of type String
   * @param lhs            of type Pipe
   * @param lhsGroupFields of type Fields
   * @param rhs            of type Pipe
   * @param rhsGroupFields of type Fields
   * @param declaredFields of type Fields
   * @param joiner         of type CoGrouper
   */
  protected Splice( String spliceName, Pipe lhs, Fields lhsGroupFields, Pipe rhs, Fields rhsGroupFields, Fields declaredFields, Joiner joiner )
    {
    this( lhs, lhsGroupFields, rhs, rhsGroupFields, declaredFields, joiner );
    this.spliceName = spliceName;
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName        of type String
   * @param lhs               of type Pipe
   * @param lhsGroupFields    of type Fields
   * @param rhs               of type Pipe
   * @param rhsGroupFields    of type Fields
   * @param declaredFields    of type Fields
   * @param resultGroupFields of type Fields
   * @param joiner            of type Joiner
   */
  protected Splice( String spliceName, Pipe lhs, Fields lhsGroupFields, Pipe rhs, Fields rhsGroupFields, Fields declaredFields, Fields resultGroupFields, Joiner joiner )
    {
    this( lhs, lhsGroupFields, rhs, rhsGroupFields, declaredFields, resultGroupFields, joiner );
    this.spliceName = spliceName;
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName     of type String
   * @param lhs            of type Pipe
   * @param lhsGroupFields of type Fields
   * @param rhs            of type Pipe
   * @param rhsGroupFields of type Fields
   * @param joiner         of type CoGrouper
   */
  protected Splice( String spliceName, Pipe lhs, Fields lhsGroupFields, Pipe rhs, Fields rhsGroupFields, Joiner joiner )
    {
    this( lhs, lhsGroupFields, rhs, rhsGroupFields, joiner );
    this.spliceName = spliceName;
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName     of type String
   * @param lhs            of type Pipe
   * @param lhsGroupFields of type Fields
   * @param rhs            of type Pipe
   * @param rhsGroupFields of type Fields
   */
  protected Splice( String spliceName, Pipe lhs, Fields lhsGroupFields, Pipe rhs, Fields rhsGroupFields )
    {
    this( lhs, lhsGroupFields, rhs, rhsGroupFields );
    this.spliceName = spliceName;
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName of type String
   * @param pipes      of type Pipe...
   */
  protected Splice( String spliceName, Pipe... pipes )
    {
    this( pipes );
    this.spliceName = spliceName;
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipe           of type Pipe
   * @param groupFields    of type Fields
   * @param numSelfJoins   of type int
   * @param declaredFields of type Fields
   */
  protected Splice( Pipe pipe, Fields groupFields, int numSelfJoins, Fields declaredFields )
    {
    this( pipe, groupFields, numSelfJoins );
    this.declaredFields = declaredFields;
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipe              of type Pipe
   * @param groupFields       of type Fields
   * @param numSelfJoins      of type int
   * @param declaredFields    of type Fields
   * @param resultGroupFields of type Fields
   */
  protected Splice( Pipe pipe, Fields groupFields, int numSelfJoins, Fields declaredFields, Fields resultGroupFields )
    {
    this( pipe, groupFields, numSelfJoins );
    this.declaredFields = declaredFields;
    this.resultGroupFields = resultGroupFields;

    if( resultGroupFields != null && groupFields.size() * numSelfJoins != resultGroupFields.size() )
      throw new IllegalArgumentException( "resultGroupFields and cogroup resulting join fields must be same size" );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipe           of type Pipe
   * @param groupFields    of type Fields
   * @param numSelfJoins   of type int
   * @param declaredFields of type Fields
   * @param joiner         of type CoGrouper
   */
  protected Splice( Pipe pipe, Fields groupFields, int numSelfJoins, Fields declaredFields, Joiner joiner )
    {
    this( pipe, groupFields, numSelfJoins, declaredFields );
    this.joiner = joiner;

    verifyCoGrouper();
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipe              of type Pipe
   * @param groupFields       of type Fields
   * @param numSelfJoins      of type int
   * @param declaredFields    of type Fields
   * @param resultGroupFields of type Fields
   * @param joiner            of type Joiner
   */
  protected Splice( Pipe pipe, Fields groupFields, int numSelfJoins, Fields declaredFields, Fields resultGroupFields, Joiner joiner )
    {
    this( pipe, groupFields, numSelfJoins, declaredFields, resultGroupFields );
    this.joiner = joiner;

    verifyCoGrouper();
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipe         of type Pipe
   * @param groupFields  of type Fields
   * @param numSelfJoins of type int
   * @param joiner       of type CoGrouper
   */
  protected Splice( Pipe pipe, Fields groupFields, int numSelfJoins, Joiner joiner )
    {
    setKind();
    addPipe( pipe );
    this.keyFieldsMap.put( pipe.getName(), groupFields );
    this.numSelfJoins = numSelfJoins;
    this.joiner = joiner;

    verifyCoGrouper();
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipe         of type Pipe
   * @param groupFields  of type Fields
   * @param numSelfJoins of type int
   */
  protected Splice( Pipe pipe, Fields groupFields, int numSelfJoins )
    {
    this( pipe, groupFields, numSelfJoins, (Joiner) null );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName     of type String
   * @param pipe           of type Pipe
   * @param groupFields    of type Fields
   * @param numSelfJoins   of type int
   * @param declaredFields of type Fields
   */
  protected Splice( String spliceName, Pipe pipe, Fields groupFields, int numSelfJoins, Fields declaredFields )
    {
    this( pipe, groupFields, numSelfJoins, declaredFields );
    this.spliceName = spliceName;
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName        of type String
   * @param pipe              of type Pipe
   * @param groupFields       of type Fields
   * @param numSelfJoins      of type int
   * @param declaredFields    of type Fields
   * @param resultGroupFields of type Fields
   */
  protected Splice( String spliceName, Pipe pipe, Fields groupFields, int numSelfJoins, Fields declaredFields, Fields resultGroupFields )
    {
    this( pipe, groupFields, numSelfJoins, declaredFields, resultGroupFields );
    this.spliceName = spliceName;
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName     of type String
   * @param pipe           of type Pipe
   * @param groupFields    of type Fields
   * @param numSelfJoins   of type int
   * @param declaredFields of type Fields
   * @param joiner         of type CoGrouper
   */
  protected Splice( String spliceName, Pipe pipe, Fields groupFields, int numSelfJoins, Fields declaredFields, Joiner joiner )
    {
    this( pipe, groupFields, numSelfJoins, declaredFields, joiner );
    this.spliceName = spliceName;
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName        of type String
   * @param pipe              of type Pipe
   * @param groupFields       of type Fields
   * @param numSelfJoins      of type int
   * @param declaredFields    of type Fields
   * @param resultGroupFields of type Fields
   * @param joiner            of type Joiner
   */
  protected Splice( String spliceName, Pipe pipe, Fields groupFields, int numSelfJoins, Fields declaredFields, Fields resultGroupFields, Joiner joiner )
    {
    this( pipe, groupFields, numSelfJoins, declaredFields, resultGroupFields, joiner );
    this.spliceName = spliceName;
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName   of type String
   * @param pipe         of type Pipe
   * @param groupFields  of type Fields
   * @param numSelfJoins of type int
   * @param joiner       of type CoGrouper
   */
  protected Splice( String spliceName, Pipe pipe, Fields groupFields, int numSelfJoins, Joiner joiner )
    {
    this( pipe, groupFields, numSelfJoins, joiner );
    this.spliceName = spliceName;
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName   of type String
   * @param pipe         of type Pipe
   * @param groupFields  of type Fields
   * @param numSelfJoins of type int
   */
  protected Splice( String spliceName, Pipe pipe, Fields groupFields, int numSelfJoins )
    {
    this( pipe, groupFields, numSelfJoins );
    this.spliceName = spliceName;
    }

  ////////////
  // GROUPBY
  ////////////

  /**
   * Constructor Splice creates a new Splice instance where grouping occurs on {@link Fields#ALL} fields.
   *
   * @param pipe of type Pipe
   */
  protected Splice( Pipe pipe )
    {
    this( null, pipe, Fields.ALL, null, false );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipe        of type Pipe
   * @param groupFields of type Fields
   */
  protected Splice( Pipe pipe, Fields groupFields )
    {
    this( null, pipe, groupFields, null, false );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName  of type String
   * @param pipe        of type Pipe
   * @param groupFields of type Fields
   */
  protected Splice( String spliceName, Pipe pipe, Fields groupFields )
    {
    this( spliceName, pipe, groupFields, null, false );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipe        of type Pipe
   * @param groupFields of type Fields
   * @param sortFields  of type Fields
   */
  protected Splice( Pipe pipe, Fields groupFields, Fields sortFields )
    {
    this( null, pipe, groupFields, sortFields, false );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName  of type String
   * @param pipe        of type Pipe
   * @param groupFields of type Fields
   * @param sortFields  of type Fields
   */
  protected Splice( String spliceName, Pipe pipe, Fields groupFields, Fields sortFields )
    {
    this( spliceName, pipe, groupFields, sortFields, false );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipe         of type Pipe
   * @param groupFields  of type Fields
   * @param sortFields   of type Fields
   * @param reverseOrder of type boolean
   */
  protected Splice( Pipe pipe, Fields groupFields, Fields sortFields, boolean reverseOrder )
    {
    this( null, pipe, groupFields, sortFields, reverseOrder );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName   of type String
   * @param pipe         of type Pipe
   * @param groupFields  of type Fields
   * @param sortFields   of type Fields
   * @param reverseOrder of type boolean
   */
  protected Splice( String spliceName, Pipe pipe, Fields groupFields, Fields sortFields, boolean reverseOrder )
    {
    this( spliceName, Pipe.pipes( pipe ), groupFields, sortFields, reverseOrder );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipes       of type Pipe
   * @param groupFields of type Fields
   */
  protected Splice( Pipe[] pipes, Fields groupFields )
    {
    this( null, pipes, groupFields, null, false );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName  of type String
   * @param pipes       of type Pipe
   * @param groupFields of type Fields
   */
  protected Splice( String spliceName, Pipe[] pipes, Fields groupFields )
    {
    this( spliceName, pipes, groupFields, null, false );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipes       of type Pipe
   * @param groupFields of type Fields
   * @param sortFields  of type Fields
   */
  protected Splice( Pipe[] pipes, Fields groupFields, Fields sortFields )
    {
    this( null, pipes, groupFields, sortFields, false );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName  of type String
   * @param pipe        of type Pipe
   * @param groupFields of type Fields
   * @param sortFields  of type Fields
   */
  protected Splice( String spliceName, Pipe[] pipe, Fields groupFields, Fields sortFields )
    {
    this( spliceName, pipe, groupFields, sortFields, false );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param pipes        of type Pipe
   * @param groupFields  of type Fields
   * @param sortFields   of type Fields
   * @param reverseOrder of type boolean
   */
  protected Splice( Pipe[] pipes, Fields groupFields, Fields sortFields, boolean reverseOrder )
    {
    this( null, pipes, groupFields, sortFields, reverseOrder );
    }

  /**
   * Constructor Splice creates a new Splice instance.
   *
   * @param spliceName   of type String
   * @param pipes        of type Pipe[]
   * @param groupFields  of type Fields
   * @param sortFields   of type Fields
   * @param reverseOrder of type boolean
   */
  protected Splice( String spliceName, Pipe[] pipes, Fields groupFields, Fields sortFields, boolean reverseOrder )
    {
    setKind();
    this.spliceName = spliceName;

    for( Pipe pipe : pipes )
      {
      addPipe( pipe );
      this.keyFieldsMap.put( pipe.getName(), groupFields );

      if( sortFields != null )
        this.sortFieldsMap.put( pipe.getName(), sortFields );
      }

    this.reverseOrder = reverseOrder;
    this.joiner = new InnerJoin();
    }

  private void verifyCoGrouper()
    {
    if( isJoin() && joiner instanceof BufferJoin )
      throw new IllegalArgumentException( "invalid joiner, may not use BufferJoiner in a HashJoin" );

    if( joiner == null )
      {
      joiner = new InnerJoin();
      return;
      }

    if( joiner.numJoins() == -1 )
      return;

    int joins = Math.max( numSelfJoins, keyFieldsMap.size() - 1 ); // joining two streams is one join

    if( joins != joiner.numJoins() )
      throw new IllegalArgumentException( "invalid joiner, only accepts " + joiner.numJoins() + " joins, there are: " + joins );
    }

  private void setKind()
    {
    if( this instanceof GroupBy )
      kind = Kind.GroupBy;
    else if( this instanceof CoGroup )
      kind = Kind.CoGroup;
    else if( this instanceof Merge )
      kind = Kind.Merge;
    else
      kind = Kind.Join;
    }

  /**
   * Method getDeclaredFields returns the declaredFields of this Splice object.
   *
   * @return the declaredFields (type Fields) of this Splice object.
   */
  public Fields getDeclaredFields()
    {
    return declaredFields;
    }

  private void addPipe( Pipe pipe )
    {
    if( pipe.getName() == null )
      throw new IllegalArgumentException( "each input pipe must have a name" );

    pipes.add( pipe ); // allow same pipe
    }

  private void addGroupFields( Pipe pipe, Fields fields )
    {
    if( keyFieldsMap.containsKey( pipe.getName() ) )
      throw new IllegalArgumentException( "each input pipe branch must be uniquely named" );

    keyFieldsMap.put( pipe.getName(), fields );
    }

  @Override
  public String getName()
    {
    if( spliceName != null )
      return spliceName;

    StringBuffer buffer = new StringBuffer();

    for( Pipe pipe : pipes )
      {
      if( buffer.length() != 0 )
        {
        if( isGroupBy() || isMerge() )
          buffer.append( "+" );
        else if( isCoGroup() || isJoin() )
          buffer.append( "*" ); // more semantically correct
        }

      buffer.append( pipe.getName() );
      }

    spliceName = buffer.toString();

    return spliceName;
    }

  @Override
  public Pipe[] getPrevious()
    {
    return pipes.toArray( new Pipe[ pipes.size() ] );
    }

  /**
   * Method getGroupingSelectors returns the groupingSelectors of this Splice object.
   *
   * @return the groupingSelectors (type Map<String, Fields>) of this Splice object.
   */
  public Map<String, Fields> getKeySelectors()
    {
    return keyFieldsMap;
    }

  /**
   * Method getSortingSelectors returns the sortingSelectors of this Splice object.
   *
   * @return the sortingSelectors (type Map<String, Fields>) of this Splice object.
   */
  public Map<String, Fields> getSortingSelectors()
    {
    return sortFieldsMap;
    }

  /**
   * Method isSorted returns true if this Splice instance is sorting values other than the group fields.
   *
   * @return the sorted (type boolean) of this Splice object.
   */
  public boolean isSorted()
    {
    return !sortFieldsMap.isEmpty();
    }

  /**
   * Method isSortReversed returns true if sorting is reversed.
   *
   * @return the sortReversed (type boolean) of this Splice object.
   */
  public boolean isSortReversed()
    {
    return reverseOrder;
    }

  public synchronized Map<String, Integer> getPipePos()
    {
    if( pipePos != null )
      return pipePos;

    pipePos = new HashMap<String, Integer>();

    int pos = 0;
    for( Object pipe : pipes )
      pipePos.put( ( (Pipe) pipe ).getName(), pos++ );

    return pipePos;
    }

  public Joiner getJoiner()
    {
    return joiner;
    }

  /**
   * Method isGroupBy returns true if this Splice instance will perform a GroupBy operation.
   *
   * @return the groupBy (type boolean) of this Splice object.
   */
  public final boolean isGroupBy()
    {
    return kind == Kind.GroupBy;
    }

  public final boolean isCoGroup()
    {
    return kind == Kind.CoGroup;
    }

  public final boolean isMerge()
    {
    return kind == Kind.Merge;
    }

  public final boolean isJoin()
    {
    return kind == Kind.Join;
    }

  public int getNumSelfJoins()
    {
    return numSelfJoins;
    }

  public boolean isSelfJoin()
    {
    return numSelfJoins != 0;
    }

  // FIELDS

  @Override
  public Scope outgoingScopeFor( Set<Scope> incomingScopes )
    {
    Map<String, Fields> groupingSelectors = resolveGroupingSelectors( incomingScopes );
    Map<String, Fields> sortingSelectors = resolveSortingSelectors( incomingScopes );
    Fields declared = resolveDeclared( incomingScopes );

    Fields outGroupingFields = resultGroupFields;

    if( outGroupingFields == null && isCoGroup() )
      outGroupingFields = createJoinFields( incomingScopes, groupingSelectors, declared );

    // for Group, the outgoing fields are the same as those declared
    Scope.Kind kind = getScopeKind();

    return new Scope( getName(), declared, outGroupingFields, groupingSelectors, sortingSelectors, declared, kind );
    }

  private Scope.Kind getScopeKind()
    {
    switch( kind )
      {
      case GroupBy:
        return Scope.Kind.GROUPBY;
      case CoGroup:
        return Scope.Kind.COGROUP;
      case Merge:
        return Scope.Kind.MERGE;
      case Join:
        return Scope.Kind.HASHJOIN;
      }

    throw new IllegalStateException( "unknown kind: " + kind );
    }

  private Fields createJoinFields( Set<Scope> incomingScopes, Map<String, Fields> groupingSelectors, Fields declared )
    {
    if( declared.isNone() )
      declared = Fields.UNKNOWN;

    Map<String, Fields> incomingFields = new HashMap<String, Fields>();

    for( Scope scope : incomingScopes )
      incomingFields.put( scope.getName(), scope.getIncomingSpliceFields() );

    Fields outGroupingFields = Fields.NONE;

    int offset = 0;
    for( Pipe pipe : pipes ) // need to retain order of pipes
      {
      String pipeName = pipe.getName();
      Fields pipeGroupingSelector = groupingSelectors.get( pipeName );
      Fields incomingField = incomingFields.get( pipeName );

      if( !pipeGroupingSelector.isNone() )
        {
        Fields offsetFields = incomingField.selectPos( pipeGroupingSelector, offset );
        Fields resolvedSelect = declared.select( offsetFields );

        outGroupingFields = outGroupingFields.append( resolvedSelect );
        }

      offset += incomingField.size();
      }

    return outGroupingFields;
    }

  Map<String, Fields> resolveGroupingSelectors( Set<Scope> incomingScopes )
    {
    try
      {
      Map<String, Fields> groupingSelectors = getKeySelectors();
      Map<String, Fields> groupingFields = resolveSelectorsAgainstIncoming( incomingScopes, groupingSelectors, "grouping" );

      if( !verifySameSize( groupingFields ) )
        throw new OperatorException( this, "all grouping fields must be same size: " + toString() );

      verifySameTypes( groupingSelectors, groupingFields );

      return groupingFields;
      }
    catch( FieldsResolverException exception )
      {
      throw new OperatorException( this, OperatorException.Kind.grouping, exception.getSourceFields(), exception.getSelectorFields(), exception );
      }
    catch( RuntimeException exception )
      {
      throw new OperatorException( this, "could not resolve grouping selector in: " + this, exception );
      }
    }

  private boolean verifySameTypes( Map<String, Fields> groupingSelectors, Map<String, Fields> groupingFields )
    {
    // create array of field positions with comparators from the grouping selectors
    // unsure which side has the comparators declared so make a union
    boolean[] hasComparator = new boolean[ groupingFields.values().iterator().next().size() ];

    for( Map.Entry<String, Fields> entry : groupingSelectors.entrySet() )
      {
      Comparator[] comparatorsArray = entry.getValue().getComparators();

      for( int i = 0; i < comparatorsArray.length; i++ )
        hasComparator[ i ] = hasComparator[ i ] || comparatorsArray[ i ] != null;
      }

    // compare all the rhs fields with the lhs (lhs and rhs are arbitrary here)
    Iterator<Fields> iterator = groupingFields.values().iterator();
    Fields lhsFields = iterator.next();
    Type[] lhsTypes = lhsFields.getTypes();

    // if types are null, no basis for comparison
    if( lhsTypes == null )
      return true;

    while( iterator.hasNext() )
      {
      Fields rhsFields = iterator.next();
      Type[] rhsTypes = rhsFields.getTypes();

      // if types are null, no basis for comparison
      if( rhsTypes == null )
        return true;

      for( int i = 0; i < lhsTypes.length; i++ )
        {
        if( hasComparator[ i ] )
          continue;

        Type lhs = lhsTypes[ i ];
        Type rhs = rhsTypes[ i ];

        lhs = getCanonicalType( lhs );
        rhs = getCanonicalType( rhs );

        if( lhs.equals( rhs ) )
          continue;

        Fields lhsError = new Fields( lhsFields.get( i ), lhsFields.getType( i ) );
        Fields rhsError = new Fields( rhsFields.get( i ), rhsFields.getType( i ) );

        throw new OperatorException( this, "grouping fields must declare same types:" + lhsError.printVerbose() + " not same as " + rhsError.printVerbose() );
        }
      }

    return true;
    }

  private Type getCanonicalType( Type type )
    {
    if( type instanceof CoercibleType )
      type = ( (CoercibleType) type ).getCanonicalType();

    // if one side is primitive, normalize to its primitive wrapper type
    if( type instanceof Class )
      type = Coercions.asNonPrimitive( (Class) type );

    return type;
    }

  private boolean verifySameSize( Map<String, Fields> groupingFields )
    {
    Iterator<Fields> iterator = groupingFields.values().iterator();
    int size = iterator.next().size();

    while( iterator.hasNext() )
      {
      Fields groupingField = iterator.next();

      if( groupingField.size() != size )
        return false;

      size = groupingField.size();
      }

    return true;
    }

  private Map<String, Fields> resolveSelectorsAgainstIncoming( Set<Scope> incomingScopes, Map<String, Fields> selectors, String type )
    {
    Map<String, Fields> resolvedFields = new HashMap<String, Fields>();

    for( Scope incomingScope : incomingScopes )
      {
      Fields selector = selectors.get( incomingScope.getName() );

      if( selector == null )
        throw new OperatorException( this, "no " + type + " selector found for: " + incomingScope.getName() );

      Fields incomingFields;

      if( selector.isNone() )
        incomingFields = Fields.NONE;
      else if( selector.isAll() )
        incomingFields = incomingScope.getIncomingSpliceFields();
      else if( selector.isGroup() )
        incomingFields = incomingScope.getOutGroupingFields();
      else if( selector.isValues() )
        incomingFields = incomingScope.getOutValuesFields().subtract( incomingScope.getOutGroupingFields() );
      else
        incomingFields = incomingScope.getIncomingSpliceFields().select( selector );

      resolvedFields.put( incomingScope.getName(), incomingFields );
      }

    return resolvedFields;
    }

  Map<String, Fields> resolveSortingSelectors( Set<Scope> incomingScopes )
    {
    try
      {
      if( getSortingSelectors().isEmpty() )
        return null;

      return resolveSelectorsAgainstIncoming( incomingScopes, getSortingSelectors(), "sorting" );
      }
    catch( FieldsResolverException exception )
      {
      throw new OperatorException( this, OperatorException.Kind.sorting, exception.getSourceFields(), exception.getSelectorFields(), exception );
      }
    catch( RuntimeException exception )
      {
      throw new OperatorException( this, "could not resolve sorting selector in: " + this, exception );
      }
    }

  @Override
  public Fields resolveIncomingOperationPassThroughFields( Scope incomingScope )
    {
    return incomingScope.getIncomingSpliceFields();
    }

  Fields resolveDeclared( Set<Scope> incomingScopes )
    {
    try
      {
      Fields declaredFields = getJoinDeclaredFields();

      // Fields.NONE is a flag to the CoGroup the following Buffer will use the JoinerClosure directly
      if( declaredFields != null && declaredFields.isNone() )
        {
        if( !isCoGroup() )
          throw new IllegalArgumentException( "Fields.NONE may only be declared as the join fields when using a CoGroup" );

        return Fields.NONE;
        }

      if( declaredFields != null ) // null for GroupBy
        {
        if( incomingScopes.size() != pipes.size() && isSelfJoin() )
          throw new OperatorException( this, "self joins without intermediate operators are not permitted, see 'numSelfJoins' constructor or identity function" );

        int size = 0;
        boolean foundUnknown = false;

        List<Fields> appendableFields = getOrderedResolvedFields( incomingScopes );

        for( Fields fields : appendableFields )
          {
          foundUnknown = foundUnknown || fields.isUnknown();
          size += fields.size();
          }

        // we must relax field checking in the face of unknown fields
        if( !foundUnknown && declaredFields.size() != size * ( numSelfJoins + 1 ) )
          {
          if( isSelfJoin() )
            throw new OperatorException( this, "declared grouped fields not same size as grouped values, declared: " + declaredFields.printVerbose() + " != size: " + size * ( numSelfJoins + 1 ) );
          else
            throw new OperatorException( this, "declared grouped fields not same size as grouped values, declared: " + declaredFields.printVerbose() + " resolved: " + Util.print( appendableFields, "" ) );
          }

        int i = 0;
        for( Fields appendableField : appendableFields )
          {
          Type[] types = appendableField.getTypes();

          if( types == null )
            {
            i += appendableField.size();
            continue;
            }

          for( Type type : types )
            {
            if( type != null )
              declaredFields = declaredFields.applyType( i, type );

            i++;
            }
          }

        return declaredFields;
        }

      // support merge or cogrouping here
      if( isGroupBy() || isMerge() )
        {
        Iterator<Scope> iterator = incomingScopes.iterator();
        Fields commonFields = iterator.next().getIncomingSpliceFields();

        while( iterator.hasNext() )
          {
          Scope incomingScope = iterator.next();
          Fields fields = incomingScope.getIncomingSpliceFields();

          if( !commonFields.equalsFields( fields ) )
            throw new OperatorException( this, "merged streams must declare the same field names, in the same order, expected: " + commonFields.printVerbose() + " found: " + fields.printVerbose() );
          }

        return commonFields;
        }
      else
        {
        List<Fields> appendableFields = getOrderedResolvedFields( incomingScopes );
        Fields appendedFields = new Fields();

        try
          {
          // will fail on name collisions
          for( Fields appendableField : appendableFields )
            appendedFields = appendedFields.append( appendableField );
          }
        catch( TupleException exception )
          {
          String fields = "";

          for( Fields appendableField : appendableFields )
            fields += appendableField.print();

          throw new OperatorException( this, "found duplicate field names in joined tuple stream: " + fields, exception );
          }

        return appendedFields;
        }
      }
    catch( OperatorException exception )
      {
      throw exception;
      }
    catch( RuntimeException exception )
      {
      throw new OperatorException( this, "could not resolve declared fields in: " + this, exception );
      }
    }

  public Fields getJoinDeclaredFields()
    {
    Fields declaredFields = getDeclaredFields();

    if( !( joiner instanceof DeclaresResults ) )
      return declaredFields;

    if( declaredFields == null && ( (DeclaresResults) joiner ).getFieldDeclaration() != null )
      declaredFields = ( (DeclaresResults) joiner ).getFieldDeclaration();

    return declaredFields;
    }

  private List<Fields> getOrderedResolvedFields( Set<Scope> incomingScopes )
    {
    Map<String, Scope> scopesMap = new HashMap<String, Scope>();

    for( Scope incomingScope : incomingScopes )
      scopesMap.put( incomingScope.getName(), incomingScope );

    List<Fields> appendableFields = new ArrayList<Fields>();

    for( Pipe pipe : pipes )
      appendableFields.add( scopesMap.get( pipe.getName() ).getIncomingSpliceFields() );
    return appendableFields;
    }

  @Override
  public boolean isEquivalentTo( FlowElement element )
    {
    boolean equivalentTo = super.isEquivalentTo( element );

    if( !equivalentTo )
      return equivalentTo;

    Splice splice = (Splice) element;

    if( !keyFieldsMap.equals( splice.keyFieldsMap ) )
      return false;

    if( !pipes.equals( splice.pipes ) )
      return false;

    return true;
    }

  // OBJECT OVERRIDES

  @Override
  @SuppressWarnings({"RedundantIfStatement"})
  public boolean equals( Object object )
    {
    if( this == object )
      return true;
    if( object == null || getClass() != object.getClass() )
      return false;
    if( !super.equals( object ) )
      return false;

    Splice splice = (Splice) object;

    if( spliceName != null ? !spliceName.equals( splice.spliceName ) : splice.spliceName != null )
      return false;
    if( keyFieldsMap != null ? !keyFieldsMap.equals( splice.keyFieldsMap ) : splice.keyFieldsMap != null )
      return false;
    if( pipes != null ? !pipes.equals( splice.pipes ) : splice.pipes != null )
      return false;

    return true;
    }

  @Override
  public int hashCode()
    {
    int result = super.hashCode();
    result = 31 * result + ( pipes != null ? pipes.hashCode() : 0 );
    result = 31 * result + ( keyFieldsMap != null ? keyFieldsMap.hashCode() : 0 );
    result = 31 * result + ( spliceName != null ? spliceName.hashCode() : 0 );
    return result;
    }

  @Override
  public String toString()
    {
    StringBuilder buffer = new StringBuilder( super.toString() );

    buffer.append( "[by:" );

    for( String name : keyFieldsMap.keySet() )
      {
      if( keyFieldsMap.size() > 1 )
        buffer.append( " " ).append( name ).append( ":" );

      buffer.append( keyFieldsMap.get( name ).printVerbose() );
      }

    if( isSelfJoin() )
      buffer.append( "[numSelfJoins:" ).append( numSelfJoins ).append( "]" );

    buffer.append( "]" );

    return buffer.toString();
    }

  @Override
  protected void printInternal( StringBuffer buffer, Scope scope )
    {
    super.printInternal( buffer, scope );
    Map<String, Fields> map = scope.getKeySelectors();

    if( map != null )
      {
      buffer.append( "[by:" );

      // important to retain incoming pipe order
      for( Map.Entry<String, Fields> entry : keyFieldsMap.entrySet() )
        {
        String name = entry.getKey();

        if( map.size() > 1 )
          buffer.append( name ).append( ":" );

        Fields keys = map.get( name );

        // if keys null, this is likely an edge contracted map
        if( keys == null )
          buffer.append( "<unavailable>" );
        else
          buffer.append( keys.print() ); // get resolved keys
        }

      if( isSelfJoin() )
        buffer.append( "[numSelfJoins:" ).append( numSelfJoins ).append( "]" );

      buffer.append( "]" );
      }
    }
  }
TOP

Related Classes of cascading.pipe.Splice

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.