Package brickhouse.hbase

Source Code of brickhouse.hbase.BatchPutUDAF$BatchPutUDAFEvaluator$PutBuffer

package brickhouse.hbase;
/**
* Copyright 2012 Klout, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**/

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.log4j.Logger;

/**
*   Retrieve from HBase by doing bulk s from an aggregate function call.
*
*/

@Description(name="hbase_batch_put",
value = "_FUNC_(config_map, key, value) - Perform batch HBase updates of a table "
)
public class BatchPutUDAF extends AbstractGenericUDAFResolver {
  private static final Logger LOG = Logger.getLogger( BatchPutUDAF.class);
 


  @Override
  public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
      throws SemanticException {
    for(int i=0; i<parameters.length; ++i) {
      LOG.info(" BATCH PUT PARAMETERS : " + i  + " -- " + parameters[i].getTypeName() + " cat = " + parameters[i].getCategory());
      System.out.println(" BATCH PUT PARAMETERS : " + i  + " -- " + parameters[i].getTypeName() + " cat = " + parameters[i].getCategory());
    }
   
    return new BatchPutUDAFEvaluator();
  }
 
  public static class BatchPutUDAFEvaluator extends GenericUDAFEvaluator {
    public class PutBuffer implements AggregationBuffer{
      public List<Put> putList;

      public PutBuffer() {
      }

      public void reset() { putList = new ArrayList<Put>(); }

      public void addKeyValue( String key, String val) throws HiveException{
        Put thePut = new Put(key.getBytes());
        thePut.add( getFamily(), getQualifier(), val.getBytes());
        thePut.setWriteToWAL(false);
        putList.add( thePut);
      }
    }
 
 
    private byte[] getFamily() {
      String famStr = configMap.get( HTableFactory.FAMILY_TAG);
      return famStr.getBytes();
    }
   
    private byte[] getQualifier() {
      String famStr = configMap.get( HTableFactory.QUALIFIER_TAG);
      return famStr.getBytes();
    }
 

    private int batchSize = 10000;
    private int numPutRecords = 0;
   
    public static final String BATCH_SIZE_TAG = "batch_size";
   
    // For PARTIAL1 and COMPLETE: ObjectInspectors for original data
    private PrimitiveObjectInspector inputKeyOI;
    private PrimitiveObjectInspector inputValOI;
    // For PARTIAL2 and FINAL: ObjectInspectors for partial aggregations (list
    // of objs)
    private StandardListObjectInspector listKVOI;
    private Map<String,String> configMap;
   
    private HTable table;



    public ObjectInspector init(Mode m, ObjectInspector[] parameters)
        throws HiveException {
      super.init(m, parameters);
      // init output object inspectors
      ///  input will be key, value and batch size
      LOG.info(" Init mode = " + m );
      System.out.println(" Init mode = " + m );
      System.out.println(" parameters =  = " + parameters + " Length = " + parameters.length );
      configMap = new HashMap<String,String>();
          for( int k=0; k< parameters.length; ++k) {
            LOG.info( "Param " + k + " is " + parameters[k]);
            System.out.println( "Param " + k + " is " + parameters[k]);
          }
         
      if (m == Mode.PARTIAL1 || m == Mode.COMPLETE ) {
        configMap = HTableFactory.getConfigFromConstMapInspector(parameters[0]);
        HTableFactory.checkConfig( configMap);
       
       
        inputKeyOI = (PrimitiveObjectInspector) parameters[1];
        inputValOI = (PrimitiveObjectInspector) parameters[2];
       
       
        try {
          LOG.info(" Initializing HTable ");
          table = HTableFactory.getHTable( configMap);
         
          if(configMap.containsKey(BATCH_SIZE_TAG)) {
            batchSize = Integer.parseInt( configMap.get( BATCH_SIZE_TAG));
          }
        } catch (IOException e) {
          throw new HiveException(e);
        }
      } else {
        listKVOI = (StandardListObjectInspector) parameters[0];
       
      }
     
      if( m == Mode.PARTIAL1 || m  == Mode.PARTIAL2) {
         return ObjectInspectorFactory
            .getStandardListObjectInspector(
                ObjectInspectorFactory.getStandardListObjectInspector(
                    PrimitiveObjectInspectorFactory.javaStringObjectInspector ) );
      } else {
        /// Otherwise return a message
        return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
      }
    }

    @Override
    public AggregationBuffer getNewAggregationBuffer() throws HiveException {
      PutBuffer buff= new PutBuffer();
      reset(buff);
      return buff;
    }

    @Override
    public void iterate(AggregationBuffer agg, Object[] parameters)
        throws HiveException {
      String key = getByteString( parameters[1], inputKeyOI);
      String val = getByteString( parameters[2], inputValOI);
     
      PutBuffer kvBuff = (PutBuffer) agg;
      kvBuff.addKeyValue( key,val);

      if(kvBuff.putList.size() >= batchSize) {
        batchUpdate( kvBuff, false);
      }
    }
   
   
    /**
     *
     * @param obj
     * @param objInsp
     * @return
     */
    private String getByteString( Object obj, PrimitiveObjectInspector objInsp) {
        switch( objInsp.getPrimitiveCategory() ) {
        case STRING :
            StringObjectInspector strInspector = (StringObjectInspector) objInsp;
            return strInspector.getPrimitiveJavaObject(obj);
        case BINARY :
            BinaryObjectInspector binInspector = (BinaryObjectInspector) objInsp;
            return new String(binInspector.getPrimitiveJavaObject( obj));
        /// XXX TODO interpret other types, like ints or doubled
         default :
            return null;
        }
    }
   
    protected void batchUpdate( PutBuffer  kvBuff, boolean flushCommits) throws HiveException {
      try {
       
        HTable htable = HTableFactory.getHTable(configMap);
       
        htable.put( kvBuff.putList);
        if(flushCommits)
           htable.flushCommits();
        numPutRecords += kvBuff.putList.size();
        if(kvBuff.putList.size() > 0)
          LOG.info(" Doing Batch Put " + kvBuff.putList.size() + " records; Total put records = " + numPutRecords + " ; Start = " + (new String(kvBuff.putList.get(0).getRow()))  + " ; End = " + ( new String( kvBuff.putList.get( kvBuff.putList.size()-1).getRow())));
        else
          LOG.info( " Doing Batch Put with ZERO 0 records");
        kvBuff.putList.clear();
       
       
      } catch (IOException e) {
        throw new HiveException(e);
      }
    }

    @Override
    public void merge(AggregationBuffer agg, Object partial)
        throws HiveException {
      PutBuffer myagg = (PutBuffer) agg;
      List<Object> partialResult = (List<Object>)this.listKVOI.getList(partial);
      ListObjectInspector subListOI = (ListObjectInspector) listKVOI.getListElementObjectInspector();
   
      List first = subListOI.getList( partialResult.get(0));
      String tableName = ((StringObjectInspector)(subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(0));
      configMap.put( HTableFactory.TABLE_NAME_TAG, tableName);
      String zookeeper = ((StringObjectInspector)(subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(1));
      configMap.put( HTableFactory.ZOOKEEPER_QUORUM_TAG, zookeeper);
      String family = ((StringObjectInspector)(subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(2));
      configMap.put( HTableFactory.FAMILY_TAG, family);
      String qualifier = ((StringObjectInspector)(subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(3));
      configMap.put( HTableFactory.QUALIFIER_TAG, qualifier);
      //// Include arbitrary configurations, by adding strings of the form k=v
      for(int j=4; j < first.size(); ++j ) {
        String kvStr =  ((StringObjectInspector)(subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(j));
        String[] kvArr = kvStr.split("=");
        if(kvArr.length == 2 ) {
          configMap.put( kvArr[0], kvArr[1]);
        }
      }
     
      for(int i=1; i< partialResult.size(); ++i) {
       
         List kvList = subListOI.getList( partialResult.get(i));
         String key = ((StringObjectInspector)(subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(kvList.get(0));
         String val = ((StringObjectInspector)(subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(kvList.get(1));
        
         myagg.addKeyValue( key, val);
        
      }
     
      if(myagg.putList.size() >= batchSize) {
        batchUpdate( myagg, false);
      }
    }

    @Override
    public void reset(AggregationBuffer buff) throws HiveException {
      PutBuffer putBuffer = (PutBuffer) buff;
      putBuffer.reset();
    }

    @Override
    public Object terminate(AggregationBuffer agg) throws HiveException {
      PutBuffer myagg = (PutBuffer) agg;
      batchUpdate( myagg, true);
      return "Finished Batch updates ; Num Puts = " + numPutRecords ;

    }


    @Override
    public Object terminatePartial(AggregationBuffer agg) throws HiveException {
      PutBuffer myagg = (PutBuffer) agg;
     
     
      ArrayList<List<String>> ret = new ArrayList<List<String>>();
      ArrayList tname = new ArrayList<String>();
      tname.add( configMap.get( HTableFactory.TABLE_NAME_TAG));
      tname.add( configMap.get( HTableFactory.ZOOKEEPER_QUORUM_TAG));
      tname.add( configMap.get( HTableFactory.FAMILY_TAG) );
      tname.add( configMap.get( HTableFactory.QUALIFIER_TAG ));
     
      for( Entry<String,String> entry : configMap.entrySet() ) {
        if(!entry.getKey().equals( HTableFactory.TABLE_NAME_TAG)
            && !entry.getKey().equals( HTableFactory.ZOOKEEPER_QUORUM_TAG )
            && !entry.getKey().equals( HTableFactory.FAMILY_TAG )
            && !entry.getKey().equals( HTableFactory.QUALIFIER_TAG ) ) {
         
          tname.add( entry.getKey() + "=" + entry.getValue());
        }
      }
      ret.add( tname);
     
      for(Put thePut : myagg.putList) {
        ArrayList<String> kvList = new ArrayList<String>();
        kvList.add( new String(thePut.getRow() )  );
          Map<byte[],List<KeyValue>> familyMap = thePut.getFamilyMap();
          for( List<KeyValue> innerList : familyMap.values() ) {
            for(KeyValue kv : innerList) {
              kvList.add( new String( kv.getValue() ));
            }
          }
          ret.add( kvList);
      }
     
      return ret;
    }
  }



}
TOP

Related Classes of brickhouse.hbase.BatchPutUDAF$BatchPutUDAFEvaluator$PutBuffer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.