Package org.olap4cloud.impl

Source Code of org.olap4cloud.impl.CubeScanMR$CubeScanMRReducer

package org.olap4cloud.impl;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.OutputLogFilter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.log4j.Logger;
import org.olap4cloud.client.CubeDescriptor;
import org.olap4cloud.client.CubeQueryResult;
import org.olap4cloud.client.CubeQueryResultRow;
import org.olap4cloud.util.DataUtils;
import org.olap4cloud.util.LogUtils;

public class CubeScanMR {

  static Logger logger = Logger.getLogger(CubeScanMR.class);
 
  public static CubeQueryResult scan(CubeScan scan, CubeDescriptor cubeDescriptor) throws Exception {
    String methodName = "CubeScanMR.scan() ";
    Job job = new Job();
    job.setJarByClass(CubeScanMR.class);
    TableMapReduceUtil.initTableMapperJob(cubeDescriptor.getCubeDataTable(), scan.getHBaseScan()
        , CubeScanMRMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
    job.setInputFormatClass(CubeScanTableInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setReducerClass(CubeScanMRReducer.class);
    job.setCombinerClass(CubeScanMRCombiner.class);
    String outPath = OLAPEngineConstants.MR_OUT_DIRECTORY_PREFIX + System.currentTimeMillis();
    FileOutputFormat.setOutputPath(job, new Path(outPath));
    job.getConfiguration().set(OLAPEngineConstants.JOB_CONF_PROP_CUBE_DESCRIPTOR
        , DataUtils.objectToString(cubeDescriptor));
    job.getConfiguration().set(OLAPEngineConstants.JOB_CONF_PROP_CUBE_SCAN
        , DataUtils.objectToString(scan));
    job.getConfiguration().setInt("mapred.map.tasks", 10000);
    job.waitForCompletion(true);
    return getCubeQueryResult(outPath, scan);
  }
 
 
 
  private static CubeQueryResult getCubeQueryResult(String outPath,
      CubeScan scan) throws Exception {
    CubeQueryResult cubeQueryResult = new CubeQueryResult();
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path[] fileList = FileUtil.stat2Paths(fs.listStatus(new Path(outPath), new OutputLogFilter()));
    for(Path path: fileList) {
      FSDataInputStream fsin = fs.open(path);
      BufferedReader reader = new BufferedReader(new InputStreamReader(fsin));
      String s = null;
      do {
        s = reader.readLine();
        if(s != null) {
          StringTokenizer sb = new StringTokenizer(s, "\t", false);
          CubeQueryResultRow row = new CubeQueryResultRow();
          for(int i = 0; i < scan.getGroupBy().length; i ++)
            row.getGroupBy().add(Long.parseLong(sb.nextToken()));
          for(int i = 0; i < scan.getCubeScanAggregates().size(); i ++)
            row.getValues().add(Double.parseDouble(sb.nextToken()));
          cubeQueryResult.getRows().add(row);
        }
      } while(s != null);
      reader.close();
      fsin.close();
    }
    return cubeQueryResult;
  }



  public static class CubeScanMRMapper extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable>{
   
    CubeScan cubeScan = null;
   
    CubeScanFilter cubeScanFilter = null;
   
    byte outValues[];
   
    ImmutableBytesWritable outValuesWritable = new ImmutableBytesWritable();
   
    ImmutableBytesWritable outKeyWritable = new ImmutableBytesWritable();
   
    byte outKey[];
   
    int groupBy[];
   
    @Override
    protected void setup(Mapper<ImmutableBytesWritable,Result,ImmutableBytesWritable
        ,ImmutableBytesWritable>.Context context) throws IOException ,InterruptedException {
      try {
        cubeScan = (CubeScan)DataUtils.stringToObject(context.getConfiguration()
            .get(OLAPEngineConstants.JOB_CONF_PROP_CUBE_SCAN));
        cubeScanFilter = new CubeScanFilter(cubeScan);
        outValues = new byte[cubeScan.getColumns().size() * 8];
        groupBy = cubeScan.getGroupBy();
        if(groupBy.length == 0) {
          outKey = new byte[1];
          outKey[0] = 0;
        } else
          outKey = new byte[groupBy.length * 8];
      } catch (Exception e) {
        logger.error(e.getMessage(), e);
        throw new InterruptedException(e.getMessage());
      }
    };
   
    @Override
    protected void map(ImmutableBytesWritable key, Result value,
        Context context) throws IOException, InterruptedException {
      if(cubeScanFilter.filterRowKey(key.get(), 0, -1))
        return;
      byte inKey[] = key.get();
      String methodName = "CubeScanMRMapper.map() ";
      if(cubeScanFilter.filterRowKey(key.get(), 0, -1))
                return;
      if(logger.isDebugEnabled())
        logger.debug(methodName + "process key: " + LogUtils.describeKey(inKey));
      int n = cubeScan.getColumns().size();
      for(int i = 0; i < n; i ++) {
        Pair<byte[], byte[]> column = cubeScan.getColumns().get(i);
        byte val[] = value.getValue(column.getFirst(), column.getSecond());
        if(logger.isDebugEnabled()) logger.debug(methodName + " map measure: " + Bytes.toDouble(val));
        Bytes.putBytes(outValues, i * 8, val, 0, 8);
      }
      for(int i = 0; i < groupBy.length; i ++)
        Bytes.putBytes(outKey, i * 8, inKey, groupBy[i] * 8, 8);
      outKeyWritable.set(outKey);
      outValuesWritable.set(outValues);
      context.write(outKeyWritable, outValuesWritable);
    }
  }
 
  public static class CubeScanMRCombiner extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable, ImmutableBytesWritable
  , ImmutableBytesWritable> {
   
    CubeScan cubeScan = null;
   
    byte outValues[];
   
    ImmutableBytesWritable outValuesWritable = new ImmutableBytesWritable();
   
    LongWritable outKey = new LongWritable(1);
   
    List<CubeScanAggregate> aggregates = null;
   
    double inValues[];
   
    int inN = 0;
   
    int outN = 0;
   
    protected void setup(org.apache.hadoop.mapreduce.Reducer<ImmutableBytesWritable,ImmutableBytesWritable
        ,ImmutableBytesWritable,ImmutableBytesWritable>.Context context) throws IOException ,InterruptedException {
      try {
        cubeScan = (CubeScan)DataUtils.stringToObject(context.getConfiguration()
            .get(OLAPEngineConstants.JOB_CONF_PROP_CUBE_SCAN));
        outValues = new byte[cubeScan.getCubeScanAggregates().size() * 8];
        inValues = new double[cubeScan.getColumns().size()];
        inN = inValues.length;
        aggregates = cubeScan.getCubeScanAggregates();
        outN = aggregates.size();
      } catch (Exception e) {
        logger.error(e.getMessage(), e);
        throw new InterruptedException(e.getMessage());
      }     
    };
       
    @Override
    protected void reduce(ImmutableBytesWritable inKey,
        Iterable<ImmutableBytesWritable> inValIter,
        org.apache.hadoop.mapreduce.Reducer<ImmutableBytesWritable, ImmutableBytesWritable
        , ImmutableBytesWritable
        , ImmutableBytesWritable>.Context context)
        throws IOException, InterruptedException {
      String methodName = "CubeScanMRCombiner.reduce() ";
      if(logger.isDebugEnabled()) logger.debug(methodName + "reduce key: " + inKey.get());
      for(CubeScanAggregate aggregate: aggregates)
        aggregate.reset();
      for(ImmutableBytesWritable inVal: inValIter) {
        byte buf[] = inVal.get();
        for(int i = 0; i < inN; i ++)
          inValues[i] = Bytes.toDouble(buf, i * 8);
        for(CubeScanAggregate aggregate: aggregates)
          aggregate.combine(inValues[aggregate.getColumnNuber()]);
        if(logger.isDebugEnabled()) logger.debug(methodName + " combined values: " + LogUtils.describe(inValues));
      }
      for(int i = 0; i < outN; i ++)
        Bytes.putDouble(outValues, i * 8, aggregates.get(i).getResult());
      outValuesWritable.set(outValues);
      context.write(inKey, outValuesWritable);
    }
   
  }
 
  public static class CubeScanMRReducer extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable
    , Text, Text> {
   
    CubeScan cubeScan = null;
   
    List<CubeScanAggregate> aggregates;
   
    double inValues[];
   
    int inN;
   
    Text outVal = new Text();
   
    Text outKey = new Text();
   
    StringBuilder outValSB = new StringBuilder();
   
    StringBuilder outKeySB = new StringBuilder();
   
    int groupBy[];

    protected void setup(org.apache.hadoop.mapreduce.Reducer<ImmutableBytesWritable,ImmutableBytesWritable
        ,Text,Text>
      .Context context) throws IOException ,InterruptedException {
      try {
        cubeScan = (CubeScan)DataUtils.stringToObject(context.getConfiguration()
          .get(OLAPEngineConstants.JOB_CONF_PROP_CUBE_SCAN));
        aggregates = cubeScan.getCubeScanAggregates();
        inN = aggregates.size();
        inValues = new double[inN];
        groupBy = cubeScan.getGroupBy();
      } catch (Exception e) {
        logger.error(e.getMessage(), e);
        throw new InterruptedException(e.getMessage());
      }
    };
   
    @Override
    protected void reduce(ImmutableBytesWritable inKey,
        Iterable<ImmutableBytesWritable> inVals,
        org.apache.hadoop.mapreduce.Reducer<ImmutableBytesWritable, ImmutableBytesWritable
        , Text
        , Text>.Context context)
        throws IOException, InterruptedException {
      String methodName = "CubeScanMRReducer.reduce() ";
      for(CubeScanAggregate aggregate: aggregates)
        aggregate.reset();
      for(ImmutableBytesWritable inWritable: inVals) {
        byte buf[] = inWritable.get();
        for(int i = 0; i < inN; i ++)
          aggregates.get(i).reduce(Bytes.toDouble(buf, i * 8));
      }
      outKeySB.setLength(0);
      byte inKeyVal[] = inKey.get();
      for(int i = 0; i < groupBy.length; i ++)
        outKeySB.append(Bytes.toLong(inKeyVal, i * 8)).append("\t");
      outValSB.setLength(0);
      for(CubeScanAggregate aggregate: aggregates)
        outValSB.append(aggregate.getResult() + "\t");
      outKey.set(outKeySB.toString());
      outVal.set(outValSB.toString());
      context.write(outKey, outVal);
    }
   
  }
}
TOP

Related Classes of org.olap4cloud.impl.CubeScanMR$CubeScanMRReducer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.