Package cascading.lingual.optiq

Source Code of cascading.lingual.optiq.ProgramUtil

/*
* Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package cascading.lingual.optiq;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;

import net.hydromatic.linq4j.Ord;
import org.eigenbase.reltype.RelDataType;
import org.eigenbase.reltype.RelDataTypeField;
import org.eigenbase.reltype.RelRecordType;
import org.eigenbase.rex.RexBuilder;
import org.eigenbase.rex.RexLiteral;
import org.eigenbase.rex.RexLocalRef;
import org.eigenbase.rex.RexNode;
import org.eigenbase.rex.RexProgram;
import org.eigenbase.rex.RexProgramBuilder;
import org.eigenbase.util.Pair;
import org.eigenbase.util.Permutation;

/**
*
*/
class ProgramUtil
  {
  private static boolean hasFunctions( RexProgram program )
    {
    List<RexLocalRef> projects = program.getProjectList();
    RelDataType inputRowType = program.getInputRowType();
    int fieldCount = inputRowType.getFieldCount();

    for( RexLocalRef project : projects )
      {
      if( !program.isConstant( project ) && project.getIndex() >= fieldCount )
        return true;
      }

    return false;
    }

  private static boolean hasConstants( RexProgram program )
    {
    List<RexLocalRef> projects = program.getProjectList();

    for( RexLocalRef project : projects )
      {
      if( program.isConstant( project ) )
        return true;
      }

    return false;
    }

  private static boolean isOnlyRename( RexProgram program )
    {
    final List<String> inputFieldNames = program.getInputRowType().getFieldNames();
    final List<String> outputFieldNames = program.getOutputRowType().getFieldNames();

    return inputFieldNames.size() == outputFieldNames.size()
      && unique( inputFieldNames )
      && unique( outputFieldNames )
      && !inputFieldNames.equals( outputFieldNames );
    }

  /**
   * Returns whether a program returns a subset of its input fields. Fields
   * are in the same order, and no input field is output more than once.
   * There is no condition or non-trivial expressions.
   */
  private static boolean isRetain( RexProgram program, boolean allowRename )
    {
    if( program.getCondition() != null )
      return false;

    if( program.getExprList().size() > program.getInputRowType().getFieldCount() )
      return false;

    final HashSet<Integer> used = new HashSet<Integer>();

    for( Pair<RexLocalRef, String> pair : program.getNamedProjects() )
      {
      final int index = pair.left.getIndex();

      if( !used.add( index ) )
        return false; // field used more than once

      if( !allowRename && !program.getInputRowType().getFieldNames().get( index ).equals( pair.right ) )
        return false; // field projected with different name
      }

    return true;
    }

  private static boolean isComplex( RexProgram program )
    {
    final List<String> inputFieldNames = program.getInputRowType().getFieldNames();
    final List<String> outputFieldNames = program.getOutputRowType().getFieldNames();

    return !( inputFieldNames.size() == outputFieldNames.size()
      && unique( inputFieldNames )
      && unique( outputFieldNames ) );
    }

  public static RelDataType getOutputConstantsRowType( RexProgram program )
    {
    RelDataType outputRowType = program.getOutputRowType();
    List<RelDataTypeField> fields = new ArrayList<RelDataTypeField>();
    List<RexLocalRef> projectList = program.getProjectList();

    for( int i = 0; i < projectList.size(); i++ )
      {
      RexLocalRef ref = projectList.get( i );

      if( !program.isConstant( ref ) )
        continue;

      fields.add( outputRowType.getFieldList().get( i ) );
      }

    return new RelRecordType( fields );
    }

  public static List<RexLiteral> getOutputConstantsLiterals( RexProgram program )
    {
    List<RexNode> exprList = program.getExprList();
    List<RexLiteral> literals = new ArrayList<RexLiteral>();

    List<RexLocalRef> projectList = program.getProjectList();

    for( RexLocalRef ref : projectList )
      {
      if( !program.isConstant( ref ) )
        continue;

      literals.add( (RexLiteral) exprList.get( ref.getIndex() ) );
      }

    return literals;
    }

  public static List<Object> asValues2( List<RexLiteral> literals )
    {
    List<Object> values = new ArrayList<Object>();

    for( RexLiteral literal : literals )
      values.add( literal.getValue2() );

    return values;
    }

  static <E> boolean unique( List<E> elements )
    {
    return new HashSet<E>( elements ).size() == elements.size();
    }

  public static Analyzed analyze( RexProgram program )
    {
    return new Analyzed(
      program,
      hasFunctions( program ),
      hasConstants( program ),
      isComplex( program ),
      isOnlyRename( program ),
      program.getPermutation() );
    }

  static class Analyzed
    {
    private final RexProgram program;
    public final boolean hasFunctions;
    public final boolean hasConstants;
    public final boolean isComplex;
    public final boolean isOnlyRename;
    public final Permutation permutation;

    public Analyzed(
      RexProgram program,
      boolean hasFunctions,
      boolean hasConstants,
      boolean isComplex,
      boolean isOnlyRename,
      Permutation permutation )
      {
      this.program = program;
      this.hasFunctions = hasFunctions;
      this.hasConstants = hasConstants;
      this.isComplex = isComplex;
      this.isOnlyRename = isOnlyRename;
      this.permutation = permutation;
      }

    public boolean isFilter()
      {
      return program.getCondition() != null;
      }

    public boolean isRetainWithRename()
      {
      return ProgramUtil.isRetain( program, true );
      }

    public boolean isIdentity()
      {
      if( program.getCondition() != null || !program.getInputRowType().equals( program.getOutputRowType() ) )
        return false;

      for( int i = 0; i < program.getProjectList().size(); i++ )
        {
        RexLocalRef ref = program.getProjectList().get( i );

        if( ref.getIndex() != i )
          return false;
        }

      return true;
      }
    }

  /*
  if is non-trivial permutation
    add Rename
  else
    if has filter
      add ScriptFilter        # addFilter
    if is complex
      add ScriptTupleFunction # addFunction
    else
      if has functions
        add function
      if has constants
        add Insert (addConstants)
      if is rename
        if duplicate fields
          add Discard
        add Rename
  add Retain
  add Debug
   */

  enum Op
    {
      FILTER,
      FUNCTION,
      CONSTANT,
      DISCARD,
      RENAME,
      RETAIN
    }

  /**
   * A program decomposed into several programs, each of which can be
   * implemented in a single Cascading operation.
   */
  public static class Split
    {
    public final List<Pair<Op, RexProgram>> list;

    public Split( List<Pair<Op, RexProgram>> list )
      {
      this.list = list;

      /*
      assert filter == null
             || analyze( filter ).isPureFilter();
      assert function == null
             || analyze( function ).isPureFunction();
      assert constant == null
             || analyze( constant ).isPureConstant();
      assert discard == null
             || analyze( discard ).isPureDiscard();
      assert rename == null
             || analyze( rename ).isPureRename();
      assert retain == null
             || analyze( retain ).isPureRetain();
*/
      Pair<Op, RexProgram> previous = null;

      for( Pair<Op, RexProgram> pair : list )
        {
        if( previous != null )
          assert previous.right.getOutputRowType().equals( pair.right.getInputRowType() );

        previous = pair;
        }
      }

    public static Split of( RexProgram program, RexBuilder rexBuilder )
      {
      final RexProgram program0 = program; // for debug
      program = normalize( program, rexBuilder );
      final List<Pair<Op, RexProgram>> list = new ArrayList<Pair<Op, RexProgram>>();

      // Holds the previous link in the chain. The initial identity program is
      // not used, but is a convenient place to hold the row type.
      RexProgram previous = RexProgram.createIdentity( program.getInputRowType() );

      for( int count = 0; program != null; count++ )
        {
        // We rely on unique field names everywhere. Otherwise all bets are off.
        if( !unique( program.getInputRowType().getFieldNames() ) )
          throw new AssertionError();

        if( !unique( program.getOutputRowType().getFieldNames() ) )
          throw new AssertionError();

        if( program.equals( previous ) )
          break;

        // We should need no more than one call to each kind of operator (RENAME, RETAIN, etc.) so if
        // we're not done now, we'll never be.
        if( count > 10 )
          throw new AssertionError( "program cannot be simplified after " + count + " iterations:" + program );

        final Analyzed analyze = ProgramUtil.analyze( program );

        if( analyze.isIdentity() )
          break;

        if( analyze.permutation != null && !analyze.permutation.isIdentity() )
          {
          if( analyze.hasConstants || analyze.hasFunctions )
            throw new IllegalStateException( "permutation projection has constant and function transforms" );

          final RexProgramBuilder builder = new RexProgramBuilder( previous.getOutputRowType(), rexBuilder );

          for( int i = 0; i < analyze.permutation.getTargetCount(); i++ )
            {
            final int target = analyze.permutation.getTarget( i );
            builder.addProject( target, null );
            }

          previous = builder.getProgram();
          list.add( Pair.of( Op.RENAME, previous ) );
          break;
          }

        if( analyze.isFilter() )
          {
          // Build a program that has a condition (a possibly complex expression)
          // but projects all inputs.
          final RexProgramBuilder builder = new RexProgramBuilder( previous.getOutputRowType(), rexBuilder );
          builder.addIdentity();
          builder.addCondition( program.gatherExpr( program.getCondition() ) );
          previous = builder.getProgram();
          list.add( Pair.of( Op.FILTER, previous ) );

          // Remove condition from the remaining program.
          final RexProgramBuilder builder2 = RexProgramBuilder.forProgram( program, rexBuilder, false );
          builder2.clearCondition();
          program = builder2.getProgram( true );
          continue;
          }

        // TODO: remove "|| analyze.hasConstants" and generate a CONSTANTS slice
        if( analyze.isComplex || analyze.hasFunctions || analyze.hasConstants )
          {
          previous = program;
          list.add( Pair.of( Op.FUNCTION, previous ) );
          break;
          }

        if( analyze.hasConstants )
          {
          final RexProgramBuilder builder = new RexProgramBuilder( previous.getOutputRowType(), rexBuilder );

          if( true )
            throw new AssertionError(); // TODO:

          previous = builder.getProgram();
          list.add( Pair.of( Op.CONSTANT, previous ) );
          continue;
          }

        if( analyze.isOnlyRename )
          {
          // Create a program that projects all of its inputs, in order, but with different names.
          final RexProgramBuilder builder = new RexProgramBuilder( previous.getOutputRowType(), rexBuilder );
          final List<String> outputFieldNames = new ArrayList<String>( program.getInputRowType().getFieldNames() );

          for( Ord<String> name : Ord.zip( program.getOutputRowType().getFieldNames() ) )
            {
            final int source = program.getSourceField( name.i );

            if( source >= 0 )
              outputFieldNames.set( source, name.e );
            }

          for( int i = 0; i < outputFieldNames.size(); i++ )
            builder.addProject( i, outputFieldNames.get( i ) );

          previous = builder.getProgram();
          list.add( Pair.of( Op.RENAME, previous ) );

          // We're done. Remaining program would be the identity.
          break;
          }

        if( analyze.isRetainWithRename() )
          {
          final RexProgramBuilder builder = new RexProgramBuilder( previous.getOutputRowType(), rexBuilder );
          builder.addIdentity();
          builder.clearProjects();

          for( RexLocalRef pair : program.getProjectList() )
            {
            final int index = pair.getIndex();
            builder.addProject( index, program.getInputRowType().getFieldNames().get( index ) );
            }

          previous = builder.getProgram();
          list.add( Pair.of( Op.RETAIN, previous ) );

          // There may or may not be renames left.
          program = RexProgram.createIdentity( previous.getOutputRowType(), program.getOutputRowType() );

          continue;
          }

        throw new AssertionError( "program cannot be simplified: " + program );
        }

      return new Split( list );
      }
    }

  private static RexProgram normalize( RexProgram program, RexBuilder rexBuilder )
    {
    return RexProgramBuilder.forProgram( program, rexBuilder, true ).getProgram( false );
    }
  }
TOP

Related Classes of cascading.lingual.optiq.ProgramUtil

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.