Package brickhouse.udf.sketch

Source Code of brickhouse.udf.sketch.EstimatedReachUDF

package brickhouse.udf.sketch;
/**
* Copyright 2012 Klout, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**/


import java.util.List;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.log4j.Logger;

import brickhouse.analytics.uniques.SketchSet;

/**
*  Interpret a list of strings as a sketch_set
*  and return an estimated reach number
*
*/
@Description(name="estimated_reach",
    value = "_FUNC_(x) - Estimate reach from a  sketch set of Strings. "
)
public class EstimatedReachUDF extends GenericUDF {
  private static final Logger LOG = Logger.getLogger( EstimatedReachUDF.class);
 
  private ListObjectInspector listInspector;
  private PrimitiveObjectInspector elemInspector;
  private PrimitiveCategory elemCategory;
  private IntObjectInspector lengthInspector;
 

  @Override
  public Object evaluate(DeferredObject[] arg0) throws HiveException {
    Object listObj = arg0[0].get();
    int maxItems = SketchSet.DEFAULT_MAX_ITEMS;
    if( arg0.length > 1) {
      maxItems = lengthInspector.get( arg0[1].get());
    }
   
    int listLen = listInspector.getListLength( listObj);
    if( listLen < maxItems ) {
      return (long)listLen;
    }
    if( listLen > maxItems) {
      LOG.warn( "estimated_reach: List length " + listLen + " is greater than sketch set Max items " + maxItems);
    }
    Object uninspMax = listInspector.getListElement( listObj, maxItems -1);
    switch( this.elemCategory) {
    case STRING :
      StringObjectInspector strInspector = (StringObjectInspector) elemInspector;
      String lastItem = strInspector.getPrimitiveJavaObject(uninspMax);
      double reach = SketchSet.EstimatedReach( lastItem, maxItems);
      if( reach > listLen)
         return (long)(reach);
      else
         return (long)listLen;
    case LONG :
      LongObjectInspector longInspector = (LongObjectInspector) elemInspector;
      long lastHash = longInspector.get(uninspMax);
      double reachHash = SketchSet.EstimatedReach( lastHash, maxItems);
      if( reachHash > listLen)
         return (long)(reachHash);
      else
         return (long)listLen;
     default:
       /// should not happen
      throw new HiveException("Unexpected category type");
    }
  }

  @Override
  public String getDisplayString(String[] arg0) {
    StringBuilder sb = new StringBuilder("estimated_reach( ");
    for(int i=0; i<arg0.length - 1; ++i) {
      sb.append( arg0[i]);
      sb.append(" , ");
    }
    sb.append(arg0[arg0.length -1 ]);
    sb.append(" )");
    return sb.toString();
  }

  @Override
  public ObjectInspector initialize(ObjectInspector[] arg0)
      throws UDFArgumentException {
    if( arg0.length != 1 && arg0.length != 2 ) {
      throw new UDFArgumentException("estimated_reach takes an array of strings or an array of hashes, and an optional sketch size");
    }
    if( arg0[0].getCategory() != Category.LIST) {
      throw new UDFArgumentException("estimated_reach takes an array of strings or an array of hashes, and an optional sketch size");
    }
    this.listInspector = (ListObjectInspector) arg0[0];
    if(listInspector.getListElementObjectInspector().getCategory() != Category.PRIMITIVE) {
      throw new UDFArgumentException("estimated_reach takes an array of strings or an array of hashes, and an optional sketch size");
    }
    this.elemInspector = (PrimitiveObjectInspector) listInspector.getListElementObjectInspector();
    LOG.info(" Element category is " + this.elemInspector.getCategory());
    this.elemCategory = this.elemInspector.getPrimitiveCategory();
    if(this.elemCategory != PrimitiveCategory.STRING
        && this.elemCategory != PrimitiveCategory.LONG) {
      throw new UDFArgumentException("estimated_reach takes an array of strings or an array of hashes, and an optional sketch size");
    }
    if( arg0.length > 1) {
      if( !(arg0[1] instanceof IntObjectInspector)) {
        throw new UDFArgumentException("estimated_reach takes an array of strings or an array of hashes, and an optional sketch size");
       
      }
      this.lengthInspector = (IntObjectInspector) arg0[1];
    }
   
    return PrimitiveObjectInspectorFactory.javaLongObjectInspector;
  }
 

}
TOP

Related Classes of brickhouse.udf.sketch.EstimatedReachUDF

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.