Package com.ebay.erl.mobius.core

Source Code of com.ebay.erl.mobius.core.JoinOnConfigure$EQ

package com.ebay.erl.mobius.core;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobConf;

import com.ebay.erl.mobius.core.builder.Dataset;
import com.ebay.erl.mobius.core.mapred.AbstractMobiusMapper;
import com.ebay.erl.mobius.core.model.Column;
import com.ebay.erl.mobius.core.model.Tuple;
import com.ebay.erl.mobius.core.model.WriteImpl;
import com.ebay.erl.mobius.util.SerializableUtil;
import com.ebay.erl.mobius.util.Util;

/**
* Specify the join relationship in a Mobius join job.
* <p>
*
* A join relationship defines the columns to be
* used as the join keys of the participating
* datasets.
* <p>
*
* Use {@link MobiusJob#innerJoin(Dataset...)},
* {@link MobiusJob#leftOuterJoin(Dataset, Dataset, Object)} or
* {@link MobiusJob#rightOuterJoin(Dataset, Dataset, Object)} to
* obtain an instance of this class to create a join job.
*
*
*
* This product is licensed under the Apache License,  Version 2.0,
* available at http://www.apache.org/licenses/LICENSE-2.0.
*
* This product contains portions derived from Apache hadoop which is
* licensed under the Apache License, Version 2.0, available at
* http://hadoop.apache.org.
*
* © 2007 – 2012 eBay Inc., Evan Chiu, Woody Zhou, Neel Sundaresan
*/
@SuppressWarnings("deprecation")
public class JoinOnConfigure
  private Configuration jobConf;
 
  private Dataset[] datasets;
 
  // hide the constructor to default level
  JoinOnConfigure(Configuration jobConf, Dataset... datasets)
  {
    if( datasets==null||datasets.length<=1 )
    {
      throw new IllegalArgumentException("Join must be performed with at least two or more dataset.");
    }
   
    this.jobConf  = jobConf;
    this.datasets  = datasets;
  }
 
 
  JoinOnConfigure(Object nullReplacement, Configuration jobConf, Dataset... datasets)
    throws IOException
  {
    this(jobConf, datasets);
   
    // validate the type of nullReplacement
    byte type = Tuple.getType(nullReplacement);
    this.jobConf.setInt(ConfigureConstants.NULL_REPLACEMENT_TYPE, type);
   
    if( nullReplacement!=null )
    {     
      ByteArrayOutputStream buffer  = new ByteArrayOutputStream();
      DataOutputStream out       = new DataOutputStream(buffer);
      WriteImpl writer        = new WriteImpl(out);
      writer.setValue(nullReplacement);
      writer.handle(type);
      out.flush();
      out.close();
     
      byte[] binary = buffer.toByteArray();
      String base64 = SerializableUtil.serializeToBase64(binary);
     
      this.jobConf.set(ConfigureConstants.NULL_REPLACEMENT, base64);     
    }
  }
 
 
  /**
   * Specify the joining columns from the dataset.
   * <p>
   *
   * Where there are more than one {@link EQ} in the
   * argument, they will be concatenated together
   * with AND.
   * <p>
   *
   * Mobius only supports equal-join, ex: dataset1.column1=dataset2.column1.
   */
  public Persistable on(EQ... eqs)
    throws IOException
  {
    if( eqs==null||eqs.length==0 )
    {
      throw new IllegalArgumentException ("Please set at least one join key");
    }   
   
    Set<Column> keyColumns = new HashSet<Column>();
   
    for( EQ anEQ:eqs )
    {
      for( Column aColumn:anEQ.columns )
      {
        this.setJoinKey (aColumn);
        keyColumns.add(aColumn);
      }
    }
   
    this.jobConf.set(ConfigureConstants.ALL_GROUP_KEY_COLUMNS, SerializableUtil.serializeToBase64(keyColumns.toArray(new Column[0])));
    StringBuffer involvedDSName = new StringBuffer();
    for( int i=0;i<this.datasets.length;i++ )
    {
      involvedDSName.append(this.datasets[i].getName());
      if( i<this.datasets.length-1 )
        involvedDSName.append(", ");
    }
    boolean isOuterJoin = this.jobConf.getBoolean(ConfigureConstants.IS_OUTER_JOIN, false);
   
    this.jobConf.set("mapred.job.name", (isOuterJoin?"Outer Join ":"Inner Join ")+involvedDSName.toString()+" On "+Arrays.toString(eqs));
   
    return new Persistable(new JobConf(this.jobConf), this.datasets);
  }
 
  /**
   * set the $datasetID.key.columns so the {@link AbstractMobiusMapper} can
   * emit the correct key to perform join.
   *
   * @param aColumn
   */
  private void setJoinKey(Column aColumn)
    throws IOException
  {
    // represent property name, $datasetID.key.columns
    String joinKeyPropertyName = null;
   
    for( byte assignedDatasetID=0;assignedDatasetID<this.datasets.length;assignedDatasetID++ )
    {
      Dataset aDataset = this.datasets[assignedDatasetID];
      if( aColumn.getDataset().equals(aDataset) )
      {
        JobSetup.validateColumns(aDataset, aColumn);       
        Configuration aJobConf  = aDataset.createJobConf(assignedDatasetID);
        this.jobConf      = Util.merge(this.jobConf, aJobConf);       
        joinKeyPropertyName    = assignedDatasetID+".key.columns";
        break;
      }
    }
   
    if ( joinKeyPropertyName==null )
    {
      throw new IllegalArgumentException (aColumn.getDataset() + " doesn't in the selected join dataset.");
    }
   
    if( this.jobConf.get(joinKeyPropertyName, "").isEmpty() )
    {
      this.jobConf.set (joinKeyPropertyName, aColumn.getInputColumnName ());
    }
    else
    {
      this.jobConf.set (joinKeyPropertyName, this.jobConf.get (joinKeyPropertyName) + "," + aColumn.getInputColumnName ());
    }

  }
 
 
  /**
   * Specify the equal relationship
   * of columns from different datasets.
   * <p>
   *
   * Each {@link EQ} is used in a join job
   * as one of the join conditions.
   */
  public static class EQ
  {
    private Column[] columns;
   
    /**
     * Build a equal join condition from columns
     * in the participated datasets in a join job.
     *
     *
     * @param columns  Columns from different datasets
     * to be used to build equal relationship.
     */
    public EQ(Column... columns)
    {
      // validation, lengh of columns
      // must >=2, and all from different
      // dataset
     
      if( columns==null  || columns.length<2 )
        throw new IllegalArgumentException("Please specify at least two columns to form a equal relation.");
     
      // make sure columns all from different dataset
      Set<Dataset> ds = new HashSet<Dataset>();
      for( Column aColumn:columns )
        ds.add(aColumn.getDataset());
     
      if( ds.size()!=columns.length )
        throw new IllegalArgumentException("The specified columns must from different datasets.");
     
      this.columns = columns;
    }
   
    public String toString()
    {
      StringBuffer str = new StringBuffer();
      for( int i=0;i<this.columns.length;i++ )
      {
        str.append(this.columns[i].getInputColumnName());
        if( i<this.columns.length-1 )
          str.append(" = ");
      }
      return str.toString();
    }
  }
}
TOP

Related Classes of com.ebay.erl.mobius.core.JoinOnConfigure$EQ

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.