Package com.ebay.erl.mobius.core.function

Source Code of com.ebay.erl.mobius.core.function.Top

package com.ebay.erl.mobius.core.function;

import java.util.Arrays;
import java.util.Comparator;
import java.util.PriorityQueue;

import com.ebay.erl.mobius.core.builder.Dataset;
import com.ebay.erl.mobius.core.collection.BigTupleList;
import com.ebay.erl.mobius.core.function.base.GroupFunction;
import com.ebay.erl.mobius.core.model.Column;
import com.ebay.erl.mobius.core.model.Tuple;
import com.ebay.erl.mobius.core.model.TupleColumnComparator;
import com.ebay.erl.mobius.core.sort.Sorter;
import com.ebay.erl.mobius.util.Util;


/**
* Returns up to <code>topX</code> {@link Tuple}
* in a group.
* <p>
*
* Natural ordering is used by default. Users can override
* the ordering by providing a customized comparator in
* {@link #Top(Dataset, String[], int, Class)}.
* <p>
*
* Top X is defined as: tuples are sorted by natural ordering, customized
* comparator ({@link #Top(Dataset, String[], int, Class)}), or
* user provided sorters ( {@link #Top(Dataset, String[], Sorter[], int)}).
* When tuples are sorted, it pick the first X elements in the sorted
* result.<br>
* For example, a tuple list with one single column and their values are
* [2, 7, -1, 3, 10, 2, 4], then if people chose to use natural ordering,
* and want top 3 element, and the top 3 elements would be {-1, 2, 2}.  If
* user chose to use <code>Sorter(Ordering.DESC)</code>, then top 3 are
* {4, 7, 10}.
* <p>
*
* This product is licensed under the Apache License,  Version 2.0,
* available at http://www.apache.org/licenses/LICENSE-2.0.
*
* This product contains portions derived from Apache hadoop which is
* licensed under the Apache License, Version 2.0, available at
* http://hadoop.apache.org.
*
* © 2007 – 2012 eBay Inc., Evan Chiu, Woody Zhou, Neel Sundaresan
*
*/
public class Top extends GroupFunction
{
  private static final long serialVersionUID = 9006860239763386840L;
 
  private transient Comparator<Tuple> comparator;
 
  private int topX;
 
  private String comparatorClassName;
 
  private transient PriorityQueue<Tuple> maxHeap;
 
  private Sorter[] sorters;
 
 
  /**
   * Create a {@link Top} operation that emit up to
   * <code>topX</code> rows from the given dataset
   * <code>ds</code>.
   * <p>
   *
   * The ordering is natural ordering by comparing the
   * value of <code>inputColumns</code>, in sequence.
   */
  public Top(Dataset ds, String[] inputColumns, int topX)
  {
    this(ds, inputColumns, topX, null);
  }
 
 
  /**
   * Create a {@link Top} operation that emit up to
   * <code>topX</code> rows from the given dataset
   * <code>ds</code>.
   * <p>
   *
   * The ordering is defined by the specified <code>comparator</code>,
   * note that, the comparator can only access columns in
   * <code>inputColumns</code>.
   */
  public Top(Dataset ds, String[] inputColumns, int topX, Class<? extends Comparator<Tuple>> comparator)
  {
    super(getColumns(ds, inputColumns));
    this.comparatorClassName = comparator.getCanonicalName();
    this.topX = topX;
  }
 
  /**
   * Create a {@link Top} operation that emit up to
   * <code>topX</code> rows from the given dataset
   * <code>ds</code>.
   * <p>
   *
   * The ordering is defined by the specified <code>sorters</code>,
   * note that, the <code>sorters</code> can only access columns in
   * <code>inputColumns</code>.
   */
  public Top(Dataset ds, String[] inputColumns, Sorter[] sorters, int topX)
  {
    super(getColumns(ds, inputColumns));
   
    if( sorters==null || sorters.length==0 )
    {
      throw new IllegalArgumentException("sorters cannot null nor empty.");
    }
   
    // make sure the "sorters" only use the columns in the
    // <code>inputColumns</code>
    for(Sorter aSorter:sorters )
    {
      boolean withinSchema = false;
      for( String aColumn: inputColumns)
      {
        if( aSorter.getColumn().equalsIgnoreCase(aColumn) )
        {
          withinSchema = true;
          break;
        }
      }
     
      if( !withinSchema )
      {
        throw new IllegalArgumentException("A sorter uses column["+aSorter.getColumn()+"] " +
            "that is not in the input columns:"+Arrays.toString(inputColumns));
      }
    }
   
    this.topX = topX;
    this.sorters = sorters;
  }

  /**
   * Put the tuple into a min heap, once the heap
   * size is greater than the specified {@link #topX},
   * the smallest element (head of the heap) is poll
   * out from the heap.
   */
  @Override
  public void consume(Tuple tuple)
  { 
    if( this.maxHeap==null )
    {
      this.maxHeap = new PriorityQueue<Tuple>(this.topX, this.getComparator());
    }
   
    this.maxHeap.add(tuple);
   
    while ( this.maxHeap.size()>topX )
    {
      this.maxHeap.poll();// remove the smallest
    }
  }
 
 
  @Override
  public BigTupleList getResult()
  {
    BigTupleList result = new BigTupleList(this.reporter);
   
    Tuple[] tuples = this.maxHeap.toArray(new Tuple[this.maxHeap.size()]);
    Arrays.sort(tuples, this.getComparator());
   
    for( int i=tuples.length-1;i>=0;i-- )
    {
      Tuple currentTuple = tuples[i];
      Tuple t = new Tuple();
      for( int j=0;j<this.getOutputSchema().length;j++ )   
      {
        String inName  = this.inputs[j].getInputColumnName();
        String outName  = this.getOutputSchema()[j];
       
        t.insert(outName, currentTuple.get(inName));
      }
      result.add(t);
    }
    return result;
  }
 
  @Override
  public void reset()
  {
    super.reset();
    if ( this.maxHeap!=null )
      this.maxHeap.clear();
  }
 
 
 
 
 
  @SuppressWarnings("unchecked")
  private Comparator<Tuple> getComparator()
  {
    if( this.comparator!=null )
      return this.comparator;
   
    Comparator<Tuple> comp = null;
   
    if(this.comparator==null && comparatorClassName!=null)
    {
      try
      {
        comp = (Comparator<Tuple>)Util.getClass(this.comparatorClassName).newInstance();       
      }catch(Throwable e)
      {
        throw new RuntimeException("Cannot create instance of comparator:"+this.comparatorClassName, e);
      }
    }
    else if ( this.comparator==null && comparatorClassName==null )
    {
      if( this.sorters==null)
      {
        // user doesn't specified sorters nor customized comparator,
        // use the default comparator (Tuple).
        comp = new Tuple();
      }
      else
      {
        // user has specified sorters, create a comparator that
        // use sorters to compare.
        comp = new Comparator<Tuple>(){
       
          TupleColumnComparator comparator = new TupleColumnComparator();
         
          @Override
          public int compare(Tuple t1, Tuple t2)
          {
            int result = comparator.compareKey(t1, t2, sorters, getConf());
            return result;
          }
        };
      }
    }
   
    final Comparator<Tuple> finalComp = comp;
   
    this.comparator = new Comparator(){
      @Override
      public int compare(Object o1, Object o2)
      {
        // reverse order as use max heap to get top x
        return - finalComp.compare((Tuple)o1, (Tuple)o2);
      }
    };
   
    return this.comparator;
  }
 
 
  private static Column[] getColumns(Dataset ds, String[] columns)
  {
    Column[] result = new Column[columns.length];
   
    for( int i=0;i<columns.length;i++ )
    {
      result[i] = new Column(ds, columns[i]);
    }
    return result;
  }
 
}
TOP

Related Classes of com.ebay.erl.mobius.core.function.Top

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.