import java.util.Arrays;
import java.util.concurrent.atomic.AtomicLongArray;

* This data structure maps byte sequences to ordinals.  This is a hash table.  The <code>pointersAndOrdinals</code> AtomicLongArray contains
* keys, and the <code>ByteDataBuffer</code> contains values.  Each key has two components.  The high 28 bits in the key represents the ordinal.
* The low 36 bits represents the pointer to the start position of the byte sequence in the ByteDataBuffer.  Each byte sequence is preceded by
* a variable-length integer (see {@link VarInt}), indicating the length of the sequence.<p/>
* This implementation is extremely fast.  Even though it would be technically correct and clearer,
* using a separate int[] array for the pointers, and an AtomicIntegerArray for the ordinals as keys
* was measured as two orders of magnitude slower.
* @author dkoszewnik
public class ByteArrayOrdinalMap {

    private final static long EMPTY_BUCKET_VALUE = -1L;

    /// IMPORTANT: Thread safety:  We need volatile access semantics to the individual elements in the
    /// pointersAndOrdinals array.  This only works in JVMs 1.5 or later (JSR 133).
    /// Ordinal is the high 28 bits.  Pointer to byte data is the low 36 bits.
    private AtomicLongArray pointersAndOrdinals;
    private final ByteDataBuffer byteData;
    private final FreeOrdinalTracker freeOrdinalTracker;
    private int size;
    private int sizeBeforeGrow;

    private long pointersByOrdinal[];

    public ByteArrayOrdinalMap() {

    public ByteArrayOrdinalMap(int bufferSize) {
        this.freeOrdinalTracker = new FreeOrdinalTracker();
        this.byteData = new ByteDataBuffer(bufferSize);
        this.pointersAndOrdinals = emptyKeyArray(256);
        this.sizeBeforeGrow = 179; /// 70% load factor
        this.size = 0;

    private ByteArrayOrdinalMap(long keys[], ByteDataBuffer byteData, FreeOrdinalTracker freeOrdinalTracker, int keyArraySize) {
        this.freeOrdinalTracker = freeOrdinalTracker;
        this.byteData = byteData;
        AtomicLongArray pointersAndOrdinals = emptyKeyArray(keyArraySize);
        populateNewHashArray(pointersAndOrdinals, keys);
        this.pointersAndOrdinals = pointersAndOrdinals;
        this.size = keys.length;
        this.sizeBeforeGrow = keyArraySize * 7 / 10; /// 70% load factor


     * Add a sequence of bytes to this map.  If the sequence of bytes has already been added to this map, return the originally assigned ordinal.
     * If the sequence of bytes has not been added to this map, assign and return a new ordinal.  This operation is thread-safe.
    public int getOrAssignOrdinal(ByteDataBuffer serializedRepresentation) {
        int hash = SegmentedByteArrayHasher.hashCode(serializedRepresentation);

        int modBitmask = pointersAndOrdinals.length() - 1;
        int bucket = hash & modBitmask;
        long key = pointersAndOrdinals.get(bucket);

        /// linear probing to resolve collisions.
        while(key != EMPTY_BUCKET_VALUE) {
            if(compare(serializedRepresentation, key)) {
                return (int)(key >> 36);

            bucket = (bucket + 1) & modBitmask;
            key = pointersAndOrdinals.get(bucket);

        return assignOrdinal(serializedRepresentation, hash);

    /// acquire the lock before writing.
    private synchronized int assignOrdinal(ByteDataBuffer serializedRepresentation, int hash) {
        if(size > sizeBeforeGrow)

        /// check to make sure that after acquiring the lock, the element still does not exist.
        /// this operation is akin to double-checked locking which is 'fixed' with the JSR 133 memory model in JVM >= 1.5.
        int modBitmask = pointersAndOrdinals.length() - 1;
        int bucket = hash & modBitmask;
        long key = pointersAndOrdinals.get(bucket);

        while(key != EMPTY_BUCKET_VALUE) {
            if(compare(serializedRepresentation, key)) {
                return (int)(key >> 36);

            bucket = (bucket + 1) & modBitmask;
            key = pointersAndOrdinals.get(bucket);

        /// the ordinal for this object still does not exist in the list, even after the lock has been acquired.
        /// it is up to this thread to add it at the current bucket position.
        int ordinal = freeOrdinalTracker.getFreeOrdinal();
        long pointer = byteData.length();

        VarInt.writeVInt(byteData, (int)serializedRepresentation.length());

        key = ((long)ordinal << 36) | pointer;


        /// this set on the AtomicLongArray has volatile semantics (i.e. behaves like a monitor release).
        /// Any other thread reading this element in the AtomicLongArray will have visibility to all memory writes this thread has made up to this point.
        /// This means the entire byte sequence is guaranteed to be visible to any thread which reads the pointer to that data.
        pointersAndOrdinals.set(bucket, key);

        return ordinal;

     * Assign a predefined ordinal to a serialized representation.<p/>
     * This is intended for use in the client-side heap-safe double snapshot load.
    public void put(ByteDataBuffer serializedRepresentation, int ordinal) {
        if(size > sizeBeforeGrow)

        int hash = SegmentedByteArrayHasher.hashCode(serializedRepresentation);

        int modBitmask = pointersAndOrdinals.length() - 1;
        int bucket = hash & modBitmask;
        long key = pointersAndOrdinals.get(bucket);

        while(key != EMPTY_BUCKET_VALUE) {
            if(compare(serializedRepresentation, key))

            bucket = (bucket + 1) & modBitmask;
            key = pointersAndOrdinals.get(bucket);

        long pointer = byteData.length();

        VarInt.writeVInt(byteData, (int)serializedRepresentation.length());

        key = ((long)ordinal << 36) | pointer;


        pointersAndOrdinals.set(bucket, key);

     * Returns the ordinal for a previously added byte sequence.  If this byte sequence has not been added to the map, then -1 is returned.<p/>
     * This is intended for use in the client-side heap-safe double snapshot load.
     * @param serializedRepresentation
     * @return The ordinal for this serialized representation, or -1.
    public int get(ByteDataBuffer serializedRepresentation) {
        int hash = SegmentedByteArrayHasher.hashCode(serializedRepresentation);

        int modBitmask = pointersAndOrdinals.length() - 1;
        int bucket = hash & modBitmask;
        long key = pointersAndOrdinals.get(bucket);

        /// linear probing to resolve collisions.
        while(key != EMPTY_BUCKET_VALUE) {
            if(compare(serializedRepresentation, key)) {
                return (int)(key >> 36);

            bucket = (bucket + 1) & modBitmask;
            key = pointersAndOrdinals.get(bucket);

        return -1;

     * Remove all entries from this map, but reuse the existing arrays when populating the map next time.
     * This is intended for use in the client-side heap-safe double snapshot load.
    public void clear() {
        for(int i=0;i<pointersAndOrdinals.length();i++) {
            pointersAndOrdinals.set(i, EMPTY_BUCKET_VALUE);
        size = 0;

     * Create an array mapping the ordinals to pointers, so that they can be easily looked up
     * when writing to blob streams.
     * @return the maximum length, in bytes, of any byte sequence in this map.
    public int prepareForWrite() {
        int maxOrdinal = 0;
        int maxLength = 0;

        for(int i=0;i<pointersAndOrdinals.length();i++) {
            long key = pointersAndOrdinals.get(i);
            if(key != EMPTY_BUCKET_VALUE) {
                int ordinal = (int)(key >> 36);
                if(ordinal > maxOrdinal)
                    maxOrdinal = ordinal;

        pointersByOrdinal = new long[maxOrdinal + 1];
        Arrays.fill(pointersByOrdinal, -1);

        for(int i=0;i<pointersAndOrdinals.length();i++) {
            long key = pointersAndOrdinals.get(i);
            if(key != EMPTY_BUCKET_VALUE) {
                int ordinal = (int)(key >> 36);
                pointersByOrdinal[ordinal] = key & 0xFFFFFFFFFL;

                int dataLength = VarInt.readVInt(byteData.getUnderlyingArray(), pointersByOrdinal[ordinal]);
                if(dataLength > maxLength)
                    maxLength = dataLength;

        return maxLength;

     * Reclaim space in the byte array used in the previous cycle, but not referenced in this cycle.<p/>
     * This is achieved by shifting all used byte sequences down in the byte array, then updating
     * the key array to reflect the new pointers and exclude the removed entries.  This is also where ordinals
     * which are unused are returned to the pool.<p/>
     * @param usedOrdinals a bit set representing the ordinals which are currently referenced by any image.
    public void compact(ThreadSafeBitSet usedOrdinals) {
        long populatedReverseKeys[] = new long[size];

        int counter = 0;

        for(int i=0;i<pointersAndOrdinals.length();i++) {
            long key = pointersAndOrdinals.get(i);
            if(key != EMPTY_BUCKET_VALUE) {
                populatedReverseKeys[counter++] = key << 28 | key >>> 36;


        SegmentedByteArray arr = byteData.getUnderlyingArray();
        long currentCopyPointer = 0;

        for(int i=0;i<populatedReverseKeys.length;i++) {
            int ordinal = (int)(populatedReverseKeys[i] & 0xFFFFFFF);

            if(usedOrdinals.get(ordinal)) {
                long pointer = populatedReverseKeys[i] >> 28;
                int length = VarInt.readVInt(arr, pointer);
                length += VarInt.sizeOfVInt(length);

                if(currentCopyPointer != pointer)
                    arr.copy(arr, pointer, currentCopyPointer, length);

                populatedReverseKeys[i] = populatedReverseKeys[i] << 36 | currentCopyPointer;

                currentCopyPointer += length;
            } else {
                populatedReverseKeys[i] = EMPTY_BUCKET_VALUE;


        for(int i=0;i<pointersAndOrdinals.length();i++) {
            pointersAndOrdinals.set(i, EMPTY_BUCKET_VALUE);

        populateNewHashArray(pointersAndOrdinals, populatedReverseKeys);
        size = usedOrdinals.cardinality();

        pointersByOrdinal = null;

     * Write the byte sequence of an object specified by an ordinal to the OutputStream.
     * @throws IOException
    public void writeSerializedObject(OutputStream out, int ordinal) throws IOException {
        long pointer = pointersByOrdinal[ordinal] & 0xFFFFFFFFFL;
        int length = VarInt.readVInt(byteData.getUnderlyingArray(), pointer);
        pointer += VarInt.sizeOfVInt(length);

        byteData.getUnderlyingArray().writeTo(out, pointer, length);

    public boolean isReadyForWriting() {
        return pointersByOrdinal != null;

    public boolean isReadyForAddingObjects() {
        return pointersByOrdinal == null;

    public long getDataSize() {
        return byteData.length();

     * Fill a deserialization state from the serialized data which exists in this ByteArrayOrdinalMap
     * @param copyTo
    void fillDeserializationStateFromData(final FastBlobTypeDeserializationState<?> fill) {
        SimultaneousExecutor executor = new SimultaneousExecutor(1);
        final int numThreads = executor.getMaximumPoolSize();

        fill.ensureCapacity(maxOrdinal() + 1);

        for(int i=0;i<numThreads;i++) {
            final int threadNumber = i;
            executor.execute(new Runnable() {
                public void run() {
                    FastBlobDeserializationRecord rec = new FastBlobDeserializationRecord(fill.getSchema(), byteData.getUnderlyingArray());
                    for(int i=threadNumber;i<pointersAndOrdinals.length();i += numThreads) {
                        long pointerAndOrdinal = pointersAndOrdinals.get(i);
                        if(pointerAndOrdinal != EMPTY_BUCKET_VALUE) {
                            long pointer = pointerAndOrdinal & 0xFFFFFFFFFL;
                            int ordinal = (int)(pointerAndOrdinal >> 36);

                            int sizeOfData = VarInt.readVInt(byteData.getUnderlyingArray(), pointer);
                            pointer += VarInt.sizeOfVInt(sizeOfData);


                            fill.add(ordinal, rec);


     * Copy all of the data from this ByteArrayOrdinalMap to the provided FastBlobTypeSerializationState.
     * Image memberships for each ordinal are determined via the provided array of ThreadSafeBitSets.
     * @param destState
     * @param imageMemberships
     * @param stateOrdinalMappers
    void copySerializedObjectData(final FastBlobTypeSerializationState<?> destState, final ThreadSafeBitSet imageMemberships[],
            final OrdinalMapping ordinalMapping) {

        final StateOrdinalMapping stateOrdinalMapping = ordinalMapping.createStateOrdinalMapping(destState.getName(), maxOrdinal());

        SimultaneousExecutor executor = new SimultaneousExecutor(8);
        final int numThreads = executor.getMaximumPoolSize();

        for(int i=0;i<numThreads;i++) {
            final int threadNumber = i;
            executor.submit( new Runnable() {
                public void run() {
                    final ByteDataBuffer mappedBuffer = new ByteDataBuffer();
                    final FastBlobDeserializationRecord rec = new FastBlobDeserializationRecord(destState.getSchema(), byteData.getUnderlyingArray());
                    final boolean imageMembershipsFlags[] = new boolean[imageMemberships.length];
                    final OrdinalRemapper remapper = new OrdinalRemapper(ordinalMapping);

                    for(int j = threadNumber;j < pointersAndOrdinals.length();j += numThreads) {
                        long pointerAndOrdinal = pointersAndOrdinals.get(j);
                        if(pointerAndOrdinal != EMPTY_BUCKET_VALUE) {
                            long pointer = pointerAndOrdinal & 0xFFFFFFFFFL;
                            int ordinal = (int)(pointerAndOrdinal >> 36);

                            for(int imageIndex=0;imageIndex<imageMemberships.length;imageIndex++) {
                                imageMembershipsFlags[imageIndex] = imageMemberships[imageIndex].get(ordinal);

                            int sizeOfData = VarInt.readVInt(byteData.getUnderlyingArray(), pointer);
                            pointer += VarInt.sizeOfVInt(sizeOfData);

                            remapper.remapOrdinals(rec, mappedBuffer);

                            int newOrdinal = destState.addData(mappedBuffer, FastBlobImageUtils.toLong(imageMembershipsFlags));
                            stateOrdinalMapping.setMappedOrdinal(ordinal, newOrdinal);



    public int maxOrdinal() {
        int maxOrdinal = 0;
        for(int i=0;i<pointersAndOrdinals.length();i++) {
            int ordinal = (int)(pointersAndOrdinals.get(i) >> 36);
            if(ordinal > maxOrdinal)
                maxOrdinal = ordinal;
        return maxOrdinal;

     * Compare the byte sequence contained in the supplied ByteDataBuffer with the
     * sequence contained in the map pointed to by the specified key, byte by byte.
    private boolean compare(ByteDataBuffer serializedRepresentation, long key) {
        long position = key & 0xFFFFFFFFFL;

        int sizeOfData = VarInt.readVInt(byteData.getUnderlyingArray(), position);

        if(sizeOfData != serializedRepresentation.length())
            return false;

        position += VarInt.sizeOfVInt(sizeOfData);

        for(int i=0;i<sizeOfData;i++) {
            if(serializedRepresentation.get(i) != byteData.get(position++))
                return false;

        return true;

     * Grow the key array.  All of the values in the current array must be re-hashed and added to the new array.
    private void growKeyArray() {
        AtomicLongArray newKeys = emptyKeyArray(pointersAndOrdinals.length() * 2);

        long valuesToAdd[] = new long[size];

        int counter = 0;

        /// do not iterate over these values in the same order in which they appear in the hashed array.
        /// if we do so, we cause large clusters of collisions to appear (because we resolve collisions with linear probing).
        for(int i=0;i<pointersAndOrdinals.length();i++) {
            long key = pointersAndOrdinals.get(i);
            if(key != EMPTY_BUCKET_VALUE) {
                valuesToAdd[counter++] = key;


        populateNewHashArray(newKeys, valuesToAdd);

        /// 70% load factor
        sizeBeforeGrow = (newKeys.length() * 7) / 10;
        pointersAndOrdinals = newKeys;

     * Hash all of the existing values specified by the keys in the supplied long array
     * into the supplied AtomicLongArray.
    private void populateNewHashArray(AtomicLongArray newKeys, long[] valuesToAdd) {
        int modBitmask = newKeys.length() - 1;

        for(int i=0;i<valuesToAdd.length;i++) {
            if(valuesToAdd[i] != EMPTY_BUCKET_VALUE) {
                int hash = rehashPreviouslyAddedData(valuesToAdd[i]);
                int bucket = hash & modBitmask;
                while(newKeys.get(bucket) != EMPTY_BUCKET_VALUE)
                    bucket = (bucket + 1) & modBitmask;
                newKeys.set(bucket, valuesToAdd[i]);

     * Get the hash code for the byte array pointed to by the specified key.
    private int rehashPreviouslyAddedData(long key) {
        long position = key & 0xFFFFFFFFFL;

        int sizeOfData = VarInt.readVInt(byteData.getUnderlyingArray(), position);
        position += VarInt.sizeOfVInt(sizeOfData);

        return SegmentedByteArrayHasher.hashCode(byteData.getUnderlyingArray(), position, sizeOfData);

     * Create an AtomicLongArray of the specified size, each value in the array will be EMPTY_BUCKET_VALUE
    private AtomicLongArray emptyKeyArray(int size) {
        AtomicLongArray arr = new AtomicLongArray(size);
        for(int i=0;i<arr.length();i++) {
            arr.set(i, EMPTY_BUCKET_VALUE);
        return arr;

     * This is used to store the server's SerializationState, so that it may resume the delta chain after a new server is brought back up.
     * @param os
     * @throws IOException
    public void serializeTo(OutputStream os) throws IOException {
        /// indicate which state this ByteArrayOrdinalMap was in.
        int isPreparedForWrite = pointersByOrdinal != null ? 1 : 0;

        /// write the hashed key array size
        VarInt.writeVInt(os, pointersAndOrdinals.length());

        /// write the keys in sorted ordinal order to the stream
        long keys[] = new long[size];

        int counter = 0;

        for(int i=0;i<pointersAndOrdinals.length();i++) {
            long key = pointersAndOrdinals.get(i);
            if(key != EMPTY_BUCKET_VALUE) {
                keys[counter++] = key;


        VarInt.writeVInt(os, keys.length);

        for(int i=0;i<keys.length;i++) {
            VarInt.writeVInt(os, (int)(keys[i] >> 36));
            VarInt.writeVLong(os, keys[i] & 0xFFFFFFFFFL);

        /// write the byte data to the stream
        VarInt.writeVLong(os, byteData.length());

        for(long i=0;i<byteData.length();i++) {
            os.write(byteData.get(i) & 0xFF);

        /// write the freeOrdinalTracker to the stream

     * This is used to restore the server's SerializationState, so that it may resume the delta chain after a new server is brought back up.
     * @throws IOException
    public static ByteArrayOrdinalMap deserializeFrom(InputStream is) throws IOException {
        boolean wasPreparedForWrite = == 1;

        int hashedKeyArraySize = VarInt.readVInt(is);

        long keys[] = new long[VarInt.readVInt(is)];

        for(int i=0;i<keys.length;i++) {
            keys[i] = (VarInt.readVLong(is) << 36) | VarInt.readVLong(is);

        ByteDataBuffer byteData = new ByteDataBuffer(262144);

        long byteDataSize = VarInt.readVLong(is);

        for(long i=0;i<byteDataSize;i++) {

        FreeOrdinalTracker freeOrdinalTracker = FreeOrdinalTracker.deserializeFrom(is);

        ByteArrayOrdinalMap deserializedMap = new ByteArrayOrdinalMap(keys, byteData, freeOrdinalTracker, hashedKeyArraySize);


        return deserializedMap;

    public ByteDataBuffer getByteData() {
        return byteData;

    public AtomicLongArray getPointersAndOrdinals() {
        return pointersAndOrdinals;

    public static boolean isPointerAndOrdinalEmpty(long pointerAndOrdinal) {
        return pointerAndOrdinal == EMPTY_BUCKET_VALUE;

    public static long getPointer(long pointerAndOrdinal) {
        return pointerAndOrdinal & 0xFFFFFFFFFL;

    public static int getOrdinal(long pointerAndOrdinal) {
        return (int)(pointerAndOrdinal >> 36);

