Package com.datasalt.pangool.tuplemr.mapred

Source Code of com.datasalt.pangool.tuplemr.mapred.SortComparator$Offsets

/**
* Copyright [2012] [Datasalt Systems S.L.]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datasalt.pangool.tuplemr.mapred;

import static org.apache.hadoop.io.WritableComparator.compareBytes;
import static org.apache.hadoop.io.WritableComparator.readDouble;
import static org.apache.hadoop.io.WritableComparator.readFloat;
import static org.apache.hadoop.io.WritableComparator.readVInt;
import static org.apache.hadoop.io.WritableComparator.readVLong;

import java.io.IOException;
import java.nio.ByteBuffer;

import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.io.WritableUtils;

import com.datasalt.pangool.PangoolRuntimeException;
import com.datasalt.pangool.io.ITuple;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.Schema.Field;
import com.datasalt.pangool.io.Schema.Field.FieldSerializer;
import com.datasalt.pangool.io.Schema.Field.Type;
import com.datasalt.pangool.io.Utf8;
import com.datasalt.pangool.tuplemr.Criteria;
import com.datasalt.pangool.tuplemr.Criteria.Order;
import com.datasalt.pangool.tuplemr.Criteria.SortElement;
import com.datasalt.pangool.tuplemr.SerializationInfo;
import com.datasalt.pangool.tuplemr.TupleMRConfig;
import com.datasalt.pangool.tuplemr.TupleMRConfigBuilder;
import com.datasalt.pangool.tuplemr.serialization.TupleSerialization;

/**
* Tuple-based MapRed job binary comparator. It decodes the binary serialization
* performed by {@link TupleSerialization}.
*
* Used to group tuples according to
* {@link TupleMRConfigBuilder#setOrderBy(com.datasalt.pangool.tuplemr.OrderBy)}
*/
@SuppressWarnings("rawtypes")
public class SortComparator implements RawComparator<ITuple>, Configurable {

  protected Configuration conf;
  protected TupleMRConfig tupleMRConf;
  protected SerializationInfo serInfo;

  protected final SerializerComparator serializerComparator = new SerializerComparator();

  private static final class Offsets {
    protected int offset1 = 0;
    protected int offset2 = 0;
  }

  protected Offsets offsets = new Offsets();
  protected boolean isMultipleSources;

  public TupleMRConfig getConfig() {
    return tupleMRConf;
  }

  public SortComparator() {
  }

  /**
   * Never called in MapRed jobs. Just for completion and test purposes
   */
  @Override
  public int compare(ITuple w1, ITuple w2) {
    if(isMultipleSources) {
      int schemaId1 = tupleMRConf.getSchemaIdByName(w1.getSchema().getName());
      int schemaId2 = tupleMRConf.getSchemaIdByName(w2.getSchema().getName());
      int[] indexes1 = serInfo.getCommonSchemaIndexTranslation(schemaId1);
      int[] indexes2 = serInfo.getCommonSchemaIndexTranslation(schemaId2);
      Criteria c = tupleMRConf.getCommonCriteria();
      int comparison = compare(serInfo.getCommonSchema(), c, w1, indexes1, w2, indexes2,serInfo.getCommonSchemaSerializers());
      if(comparison != 0) {
        return comparison;
      } else if(schemaId1 != schemaId2) {
        int r = schemaId1 - schemaId2;
        return (tupleMRConf.getSchemasOrder() == Order.ASC) ? r : -r;
      }
      int schemaId = schemaId1;
      c = tupleMRConf.getSpecificOrderBys().get(schemaId);
      if(c != null) {
        int[] indexes = serInfo.getSpecificSchemaIndexTranslation(schemaId);
        return compare(serInfo.getSpecificSchema(schemaId), c, w1, indexes, w2, indexes,serInfo.getSpecificSchemaSerializers().get(schemaId));
      } else {
        return 0;
      }
    } else {
      int[] indexes = serInfo.getCommonSchemaIndexTranslation(0);
      Criteria c = tupleMRConf.getCommonCriteria();
      return compare(serInfo.getCommonSchema(), c, w1, indexes, w2, indexes,serInfo.getCommonSchemaSerializers());
    }

  }


  public int compare(Schema schema, Criteria c, ITuple w1, int[] index1, ITuple w2,
      int[] index2,FieldSerializer[] serializers) {
    for(int i = 0; i < c.getElements().size(); i++) {
      Field field = schema.getField(i);
      SortElement e = c.getElements().get(i);
      Object o1 = w1.get(index1[i]);
      Object o2 = w2.get(index2[i]);
      FieldSerializer serializer = (serializers == null) ? null : serializers[i];
      int comparison = compareObjects(o1, o2, e.getCustomComparator(), field.getType(),serializer);
      if(comparison != 0) {
        return(e.getOrder() == Order.ASC ? comparison : -comparison);
      }
    }
    return 0;
  }

  /**
   * Compares two objects. Uses the given custom comparator if present. If the
   * type is {@link Type#OBJECT} and no raw comparator is present, then a serializer
   * comparator is used.
   */
  @SuppressWarnings({ "unchecked" })
  public int compareObjects(Object elem1, Object elem2, RawComparator comparator,
      Type type,FieldSerializer serializer) {
    // If custom, just use custom.
    if(comparator != null) {
      return comparator.compare(elem1, elem2);
    }

    if(type == Type.OBJECT) {
      return serializerComparator.compare(elem1,serializer, elem2,serializer);
    } else {
      return compareObjects(elem1,elem2);
    }
  }
 
  public static int compareObjects(Object element1,Object element2){
    if(element1 == null) {
      return (element2 == null) ? 0 : -1;
    } else if(element2 == null) {
      return 1;
    } else {
      if(element1 instanceof String) {
        element1 = new Utf8((String) element1);
      }
      if(element2 instanceof String) {
        element2 = new Utf8((String) element2);
      }
      if(element1 instanceof byte[]){
        byte[] buffer1 = (byte[])element1;
        if (element2 instanceof byte[]){
          byte[] buffer2 = (byte[])element2;
          return compareBytes(buffer1,0,buffer1.length,buffer2,0,buffer2.length);
        } else if (element2 instanceof ByteBuffer){
          ByteBuffer buffer2 = (ByteBuffer)element2;
          int start2 = buffer2.arrayOffset() + buffer2.position();
          int len2 = buffer2.limit() - buffer2.position();
          return compareBytes(buffer1,0,buffer1.length,buffer2.array(),start2,len2);
        } else {
          throw new PangoolRuntimeException("Can't compare byte[] with " + element2.getClass());
        }
      } else if(element1 instanceof ByteBuffer){
        ByteBuffer buffer1 = (ByteBuffer)element1;
        int pos1 = buffer1.position();
        int start1 = buffer1.arrayOffset() + pos1;
        int len1 = buffer1.limit() - pos1;
        if (element2 instanceof byte[]){
          byte[] buffer2 =(byte[]) element2;
          return compareBytes(buffer1.array(),start1,len1,buffer2,0,buffer2.length);
        } else if (element2 instanceof ByteBuffer){
          ByteBuffer buffer2 = (ByteBuffer)element2;
          int pos2 = buffer2.position();
          int start2 = buffer2.arrayOffset() + pos2;
          int len2 = buffer2.limit() - pos2;
          return compareBytes(buffer1.array(),start1,len1,buffer2.array(),start2,len2);
        } else {
          throw new PangoolRuntimeException("Can't compare byte[] with " + element2.getClass());
        }
      }  else if(element1 instanceof Comparable) {
          return ((Comparable) element1).compareTo(element2);
      } else if(element2 instanceof Comparable) {
          return -((Comparable) element2).compareTo(element1);
      } else {
        throw new PangoolRuntimeException("Not comparable elements:" + element1.getClass() + " with object " + element2.getClass());
      }
    }
  }
 

  @Override
  public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
    try {
      return (isMultipleSources) ? compareMultipleSources(b1, s1, l1, b2, s2, l2)
          : compareOneSource(b1, s1, l1, b2, s2, l2);
    } catch(IOException e) {
      throw new RuntimeException(e);
    }
  }

  protected int compareMultipleSources(byte[] b1, int s1, int l1, byte[] b2, int s2,
      int l2) throws IOException {
    Schema commonSchema = serInfo.getCommonSchema();
    Criteria commonOrder = tupleMRConf.getCommonCriteria();

    int comparison = compare(b1, s1, b2, s2, commonSchema, commonOrder, offsets);
    if(comparison != 0) {
      return comparison;
    }

    int schemaId1 = readVInt(b1, offsets.offset1);
    int schemaId2 = readVInt(b2, offsets.offset2);
    if(schemaId1 != schemaId2) {
      int r = schemaId1 - schemaId2;
      return (tupleMRConf.getSchemasOrder() == Order.ASC) ? r : -r;
    }

    int vintSize = WritableUtils.decodeVIntSize(b1[offsets.offset1]);
    offsets.offset1 += vintSize;
    offsets.offset2 += vintSize;

    // sources are the same
    Criteria criteria = tupleMRConf.getSpecificOrderBys().get(schemaId1);
    if(criteria == null) {
      return 0;
    }

    Schema specificSchema = serInfo.getSpecificSchema(schemaId1);
    return compare(b1, offsets.offset1, b2, offsets.offset2, specificSchema, criteria,
        offsets);

  }

  private int compareOneSource(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2)
      throws IOException {
    Schema commonSchema = serInfo.getCommonSchema();
    Criteria commonOrder = tupleMRConf.getCommonCriteria();
    return compare(b1, s1, b2, s2, commonSchema, commonOrder, offsets);
  }

  protected int compare(byte[] b1, int s1, byte[] b2, int s2, Schema schema,
      Criteria criteria, Offsets o) throws IOException {
    o.offset1 = s1;
    o.offset2 = s2;
    for(int depth = 0; depth < criteria.getElements().size(); depth++) {
      Field field = schema.getField(depth);
      Field.Type type = field.getType();
      SortElement sortElement = criteria.getElements().get(depth);
      Order sort = sortElement.getOrder();
      RawComparator comparator = sortElement.getCustomComparator();

      if(comparator != null) {
        //custom comparator for OBJECT
        int length1 = WritableComparator.readVInt(b1, o.offset1);
        int length2 = WritableComparator.readVInt(b2, o.offset2);
        o.offset1 += WritableUtils.decodeVIntSize(b1[o.offset1]);
        o.offset2 += WritableUtils.decodeVIntSize(b2[o.offset2]);
        int comparison = comparator.compare(b1, o.offset1, length1, b2,
            o.offset2, length2);
        o.offset1 += length1;
        o.offset2 += length2;
        if(comparison != 0) {
          return (sort == Order.ASC) ? comparison : -comparison;
        }
      } else {
        //not custom comparator
        switch(type){
        case INT:
        case ENUM:{
          int value1 = readVInt(b1, o.offset1);
          int value2 = readVInt(b2, o.offset2);
          if(value1 > value2) {
            return (sort == Order.ASC) ? 1 : -1;
          } else if(value1 < value2) {
            return (sort == Order.ASC) ? -1 : 1;
          }
          int vintSize = WritableUtils.decodeVIntSize(b1[o.offset1]);
          o.offset1 += vintSize;
          o.offset2 += vintSize;
        break;
        case LONG:{
          long value1 = readVLong(b1, o.offset1);
          long value2 = readVLong(b2, o.offset2);
          if(value1 > value2) {
            return (sort == Order.ASC) ? 1 : -1;
          } else if(value1 < value2) {
            return (sort == Order.ASC) ? -1 : 1;
          }
          int vIntSize = WritableUtils.decodeVIntSize(b1[o.offset1]);
          o.offset1 += vIntSize;
          o.offset2 += vIntSize;
        } break;
        case FLOAT:{
          float value1 = readFloat(b1, o.offset1);
          float value2 = readFloat(b2, o.offset2);
          if(value1 > value2) {
            return (sort == Order.ASC) ? 1 : -1;
          } else if(value1 < value2) {
            return (sort == Order.ASC) ? -1 : 1;
          }
          o.offset1 += Float.SIZE / 8;
          o.offset2 += Float.SIZE / 8;
        } break;
        case DOUBLE:{
          double value1 = readDouble(b1, o.offset1);
          double value2 = readDouble(b2, o.offset2);
          if(value1 > value2) {
            return (sort == Order.ASC) ? 1 : -1;
          } else if(value1 < value2) {
            return (sort == Order.ASC) ? -1 : 1;
          }
          o.offset1 += Double.SIZE / 8;
          o.offset2 += Double.SIZE / 8;
        } break;
        case BOOLEAN:{
          byte value1 = b1[o.offset1++];
          byte value2 = b2[o.offset2++];
          if(value1 > value2) {
            return (sort == Order.ASC) ? 1 : -1;
          } else if(value1 < value2) {
            return (sort == Order.ASC) ? -1 : 1;
          }
          } break;
        case STRING:
        case OBJECT:
        case BYTES:{
          int length1 = readVInt(b1, o.offset1);
          int length2 = readVInt(b2, o.offset2);
          o.offset1 += WritableUtils.decodeVIntSize(b1[o.offset1]);
          o.offset2 += WritableUtils.decodeVIntSize(b2[o.offset2]);
          int comparison = compareBytes(b1, o.offset1, length1, b2, o.offset2, length2);
          o.offset1 += length1;
          o.offset2 += length2;
          if(comparison != 0) {
            return (sort == Order.ASC) ? comparison : (-comparison);
          }
        } break;
        default:
          throw new IOException("Not supported comparison for type:" + type);
        }
      }
    }
    return 0; // equals
  }

  @Override
  public Configuration getConf() {
    return conf;
  }

  @Override
  public void setConf(Configuration conf) {
    try {
      if(conf != null) {
        this.conf = conf;
        setTupleMRConf(TupleMRConfig.get(conf));
        TupleMRConfigBuilder.initializeComparators(conf, this.tupleMRConf);
        serializerComparator.setConf(conf);
      }
    } catch(Exception e) {
      throw new RuntimeException(e);
    }
  }

  private void setTupleMRConf(TupleMRConfig config) {
    if(this.tupleMRConf != null) {
      throw new RuntimeException("TupleMR config is already set");
    }
    this.tupleMRConf = config;
    this.serInfo = tupleMRConf.getSerializationInfo();
    this.isMultipleSources = tupleMRConf.getNumIntermediateSchemas() >= 2;
  }

}
TOP

Related Classes of com.datasalt.pangool.tuplemr.mapred.SortComparator$Offsets

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.