Package brickhouse.udf.hll

Source Code of brickhouse.udf.hll.CombineHyperLogLogUDF

package brickhouse.udf.hll;
/**
* Copyright 2012,2013 Klout, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**/


import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.log4j.Logger;

import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus;
import com.clearspring.analytics.stream.cardinality.ICardinality;

/**
* Combine two HyperLogLog++ structures together.
*
*/
@Description(name="combine_hyperloglog",
    value = "_FUNC_(x) - Combined two  HyperLogLog++ binary blobs. "
)
public class CombineHyperLogLogUDF extends GenericUDF {
  private static final Logger LOG = Logger.getLogger( CombineHyperLogLogUDF.class);
 
  private BinaryObjectInspector binary1Inspector;
  private BinaryObjectInspector binary2Inspector;
 

  @Override
  public Object evaluate(DeferredObject[] arg0) throws HiveException {
    try {
      Object blobObj1 = arg0[0].get();
      Object blobObj2 = arg0[1].get();
   
      byte[] bref1 = this.binary1Inspector.getPrimitiveJavaObject(blobObj1);
      byte[] bref2 = this.binary2Inspector.getPrimitiveJavaObject(blobObj2);
   
      if(bref1 != null && bref2 != null) {
        HyperLogLogPlus hll1 = HyperLogLogPlus.Builder.build( bref1 );
        HyperLogLogPlus hll2 = HyperLogLogPlus.Builder.build( bref2 );
     
        ICardinality merged = hll1.merge(hll2);
      return merged.getBytes();
      } else {
        return null;
      }
     
    } catch(Exception e) {
      LOG.error("Error", e);
      throw new HiveException(e);
    }
   
  }

  @Override
  public String getDisplayString(String[] arg0) {
    StringBuilder sb = new StringBuilder("combine_hyperloglog( ");
    for(int i=0; i<arg0.length - 1; ++i) {
      sb.append( arg0[i]);
      sb.append(" , ");
    }
    sb.append(arg0[arg0.length -1 ]);
    sb.append(" )");
    return sb.toString();
  }

  @Override
  public ObjectInspector initialize(ObjectInspector[] arg0)
      throws UDFArgumentException {
    if( arg0.length != 2 ) {
      throw new UDFArgumentException("combine_hyperloglog takes a pair of binary objects which were created with the hyperloglog UDAF");
    }
    if( arg0[0].getCategory() != Category.PRIMITIVE) {
      throw new UDFArgumentException("combine_hyperloglog takes a pair of binary objects which were created with the hyperloglog UDAF");
    }
    PrimitiveObjectInspector primInsp = (PrimitiveObjectInspector) arg0[0];
    if( primInsp.getPrimitiveCategory() != PrimitiveCategory.BINARY) {
      throw new UDFArgumentException("hll_est_cardinality takes a binary object which was created with the hyperloglog UDAF");
    }
    this.binary1Inspector = (BinaryObjectInspector) arg0[0];
    this.binary2Inspector = (BinaryObjectInspector) arg0[1];
   
    return PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector;
  }
 

}
TOP

Related Classes of brickhouse.udf.hll.CombineHyperLogLogUDF

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.