Package org.apache.vxquery.datamodel.builders.nodes

Source Code of org.apache.vxquery.datamodel.builders.nodes.DictionaryBuilder

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.vxquery.datamodel.builders.nodes;

import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;

import org.apache.vxquery.util.GrowableIntArray;

import edu.uci.ics.hyracks.data.std.algorithms.BinarySearchAlgorithm;
import edu.uci.ics.hyracks.data.std.collections.api.IValueReferenceVector;
import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
import edu.uci.ics.hyracks.data.std.util.ByteArrayAccessibleOutputStream;

public class DictionaryBuilder {
    private final GrowableIntArray stringEndOffsets;

    private final GrowableIntArray sortedSlotIndexes;

    private final ByteArrayAccessibleOutputStream dataBuffer;

    private final DataOutput dataBufferOut;

    private final ByteArrayAccessibleOutputStream tempStringData;

    private final DataOutput tempOut;

    private final UTF8StringPointable tempStringPointable;

    private final IValueReferenceVector sortedStringsVector = new IValueReferenceVector() {
        @Override
        public int getStart(int index) {
            int slot = sortedSlotIndexes.getArray()[index];
            return slot == 0 ? 0 : stringEndOffsets.getArray()[slot - 1];
        }

        @Override
        public int getSize() {
            return stringEndOffsets.getSize();
        }

        @Override
        public int getLength(int index) {
            return UTF8StringPointable.getUTFLength(dataBuffer.getByteArray(), getStart(index)) + 2;
        }

        @Override
        public byte[] getBytes(int index) {
            return dataBuffer.getByteArray();
        }
    };

    private final BinarySearchAlgorithm binSearch = new BinarySearchAlgorithm();

    public DictionaryBuilder() {
        stringEndOffsets = new GrowableIntArray();
        sortedSlotIndexes = new GrowableIntArray();
        dataBuffer = new ByteArrayAccessibleOutputStream();
        dataBufferOut = new DataOutputStream(dataBuffer);
        tempStringData = new ByteArrayAccessibleOutputStream();
        tempOut = new DataOutputStream(tempStringData);
        tempStringPointable = (UTF8StringPointable) UTF8StringPointable.FACTORY.createPointable();
    }

    public void write(ArrayBackedValueStorage abvs) throws IOException {
        DataOutput out = abvs.getDataOutput();
        int sizeOffset = abvs.getLength();
        out.writeInt(0);
        int entryCount = stringEndOffsets.getSize();
        out.writeInt(entryCount);
        int[] entryOffsets = stringEndOffsets.getArray();
        for (int i = 0; i < entryCount; ++i) {
            out.writeInt(entryOffsets[i]);
        }
        int[] sortedOffsets = sortedSlotIndexes.getArray();
        for (int i = 0; i < entryCount; ++i) {
            out.writeInt(sortedOffsets[i]);
        }
        out.write(dataBuffer.getByteArray(), 0, dataBuffer.size());
        IntegerPointable.setInteger(abvs.getByteArray(), sizeOffset, abvs.getLength() - sizeOffset);
    }

    public int lookup(String str) {
        tempStringData.reset();
        try {
            tempOut.writeUTF(str);
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
        tempStringPointable.set(tempStringData.getByteArray(), 0, tempStringData.size());
        return lookup(tempStringPointable);
    }

    public int lookup(UTF8StringPointable str) {
        boolean found = binSearch.find(sortedStringsVector, str);
        int index = binSearch.getIndex();
        if (found) {
            return sortedSlotIndexes.getArray()[index];
        }
        dataBuffer.write(str.getByteArray(), str.getStartOffset(), str.getLength());
        int slotIndex = stringEndOffsets.getSize();
        try {
            dataBufferOut.writeInt(slotIndex);
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
        stringEndOffsets.append(dataBuffer.size());
        sortedSlotIndexes.insert(index, slotIndex);
        return slotIndex;
    }
}
TOP

Related Classes of org.apache.vxquery.datamodel.builders.nodes.DictionaryBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.