Package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer

Source Code of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigBytesRawComparator

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer;

import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapred.JobConf;
import org.apache.pig.data.BinInterSedes;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.io.NullableBytesWritable;
import org.apache.pig.impl.util.ObjectSerializer;

public class PigBytesRawComparator extends WritableComparator implements Configurable {

    private final Log mLog = LogFactory.getLog(getClass());
    private boolean[] mAsc;
    private WritableComparator mWrappedComp;

    public PigBytesRawComparator() {
        super(NullableBytesWritable.class);
        mWrappedComp = new BinInterSedes.BinInterSedesTupleRawComparator();
    }

    public void setConf(Configuration conf) {
        try {
            mAsc = (boolean[])ObjectSerializer.deserialize(conf.get(
                "pig.sortOrder"));
        } catch (IOException ioe) {
            mLog.error("Unable to deserialize pig.sortOrder " +
                ioe.getMessage());
            throw new RuntimeException(ioe);
        }
        if (mAsc == null) {
            mAsc = new boolean[1];
            mAsc[0] = true;
        }
        ((BinInterSedes.BinInterSedesTupleRawComparator)mWrappedComp).setConf(conf);
    }

    public Configuration getConf() {
        return null;
    }

    /**
     * Compare two NullableBytesWritables as raw bytes.
     * If both are null, then the indices are compared.
     * If neither are null
     *    and both are bytearrays, then direct Writable.compareBytes is used.
     *    For non-bytearrays, we use BinInterSedesTupleRawComparator.
     * If either is null, null one is defined to be less.
     */
    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
        int rc = 0;

        // If either are null, handle differently.
        if (b1[s1] == 0 && b2[s2] == 0) {
            // Checking if both of them are ByteArrays.
            // b1[s1+1] is always TUPLE_1 so skipping.
            // b1[s1+2] contains the datatype followed by a datasize.
            // No need to read the actual size since it is given from l1&l2.
            // We just need to skip those bytes here.
            // This shortcut is placed here for performances purposes.(PIG-2975)
            boolean dataByteArraysCompare=true;
            int offset1,offset2,length1,length2;
            switch(b1[s1 + 2]) {
              case BinInterSedes.TINYBYTEARRAY:
                // skipping mNull, TUPLE_1, TINYBYTEARRAY(1byte), size(1byte)
                offset1 = s1 + 4;
                length1 = l1 - 4;
                break;
              case BinInterSedes.SMALLBYTEARRAY:
                // skipping mNull, TUPLE_1, SMALLBYTEARRAY(1byte), size(2byte)
                offset1 = s1 + 5;
                length1 = l1 - 5;
                break;
              case BinInterSedes.BYTEARRAY:
                // skipping mNull, TUPLE_1, BYTEARRAY(1byte), size(4byte)
                offset1 = s1 + 7;
                length1 = l1 - 7;
                break;
              default:
                offset1 = length1 = 0;
                dataByteArraysCompare = false;
            }
            switch(b2[s2 + 2]) {
              case BinInterSedes.TINYBYTEARRAY:
                offset2 = s2 + 4;
                length2 = l2 - 4;
                break;
              case BinInterSedes.SMALLBYTEARRAY:
                offset2 = s2 + 5;
                length2 = l2 - 5;
                break;
              case BinInterSedes.BYTEARRAY:
                offset2 = s2 + 7;
                length2 = l2 - 7;
                break;
              default:
                offset2 = length2 = 0;
                dataByteArraysCompare=false;
            }
            if( dataByteArraysCompare ) {
              rc = WritableComparator.compareBytes(b1, offset1, length1, b2, offset2, length2);
            } else {
              // Subtract 2, one for null byte and one for index byte. Also, do not reverse the sign
              // of rc when mAsc[0] is false because BinInterSedesTupleRawComparator.compare() already
              // takes that into account.
              return mWrappedComp.compare(b1, s1 + 1, l1 - 2, b2, s2 + 1, l2 - 2);
            }
        } else {
            // For sorting purposes two nulls are equal.
            if (b1[s1] != 0 && b2[s2] != 0) rc = 0;
            else if (b1[s1] != 0) rc = -1;
            else rc = 1;
        }
        if (!mAsc[0]) rc *= -1;
        return rc;
    }

    public int compare(Object o1, Object o2) {
        NullableBytesWritable nbw1 = (NullableBytesWritable)o1;
        NullableBytesWritable nbw2 = (NullableBytesWritable)o2;
        int rc = 0;

        // If either are null, handle differently.
        if (!nbw1.isNull() && !nbw2.isNull()) {
            rc = DataType.compare(nbw1.getValueAsPigType(), nbw2.getValueAsPigType());
        } else {
            // For sorting purposes two nulls are equal.
            if (nbw1.isNull() && nbw2.isNull()) rc = 0;
            else if (nbw1.isNull()) rc = -1;
            else rc = 1;
        }
        if (!mAsc[0]) rc *= -1;
        return rc;
    }

}
TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigBytesRawComparator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.