Source Code of org.lilyproject.repository.impl.RecordDecoder

/*
 * Copyright 2012 NGDATA nv
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.lilyproject.repository.impl;


import java.util.ArrayList;
import java.util.EnumMap;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;


import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.lilyproject.bytes.impl.DataInputImpl;
import org.lilyproject.repository.api.FieldType;
import org.lilyproject.repository.api.FieldTypes;
import org.lilyproject.repository.api.IdGenerator;
import org.lilyproject.repository.api.IdRecord;
import org.lilyproject.repository.api.Metadata;
import org.lilyproject.repository.api.QName;
import org.lilyproject.repository.api.Record;
import org.lilyproject.repository.api.RecordFactory;
import org.lilyproject.repository.api.RecordId;
import org.lilyproject.repository.api.RecordType;
import org.lilyproject.repository.api.RepositoryException;
import org.lilyproject.repository.api.SchemaId;
import org.lilyproject.repository.api.Scope;
import org.lilyproject.repository.api.TypeManager;
import org.lilyproject.repository.api.ValueType;
import org.lilyproject.repository.impl.id.SchemaIdImpl;
import org.lilyproject.util.Pair;
import org.lilyproject.util.hbase.LilyHBaseSchema;


import static org.lilyproject.util.hbase.LilyHBaseSchema.RecordCf;
import static org.lilyproject.util.hbase.LilyHBaseSchema.RecordColumn;


/**
 * Methods related to decoding HBase Result objects into Lily Record objects.
 *
 * <p>The methods in this class assume they are supplied with non-deleted records, thus where the
 * {@link LilyHBaseSchema.RecordColumn#DELETED} flag is false.</p>
 */
public class RecordDecoder {
    public static final Map<Scope, byte[]> RECORD_TYPE_ID_QUALIFIERS = new EnumMap<Scope, byte[]>(Scope.class);
    public static final Map<Scope, byte[]> RECORD_TYPE_VERSION_QUALIFIERS = new EnumMap<Scope, byte[]>(Scope.class);


    static {
        RECORD_TYPE_ID_QUALIFIERS.put(Scope.NON_VERSIONED, RecordColumn.NON_VERSIONED_RT_ID.bytes);
        RECORD_TYPE_ID_QUALIFIERS.put(Scope.VERSIONED, RecordColumn.VERSIONED_RT_ID.bytes);
        RECORD_TYPE_ID_QUALIFIERS.put(Scope.VERSIONED_MUTABLE, RecordColumn.VERSIONED_MUTABLE_RT_ID.bytes);


        RECORD_TYPE_VERSION_QUALIFIERS.put(Scope.NON_VERSIONED, RecordColumn.NON_VERSIONED_RT_VERSION.bytes);
        RECORD_TYPE_VERSION_QUALIFIERS.put(Scope.VERSIONED, RecordColumn.VERSIONED_RT_VERSION.bytes);
        RECORD_TYPE_VERSION_QUALIFIERS.put(Scope.VERSIONED_MUTABLE, RecordColumn.VERSIONED_MUTABLE_RT_VERSION.bytes);
    }


    public static final List<byte[]> SYSTEM_FIELDS = new ArrayList<byte[]>();


    static {
        SYSTEM_FIELDS.add(RecordColumn.OCC.bytes);
        SYSTEM_FIELDS.add(RecordColumn.DELETED.bytes);
        SYSTEM_FIELDS.add(RecordColumn.VERSION.bytes);
        SYSTEM_FIELDS.add(RecordColumn.NON_VERSIONED_RT_ID.bytes);
        SYSTEM_FIELDS.add(RecordColumn.NON_VERSIONED_RT_VERSION.bytes);
        SYSTEM_FIELDS.add(RecordColumn.VERSIONED_RT_ID.bytes);
        SYSTEM_FIELDS.add(RecordColumn.VERSIONED_RT_VERSION.bytes);
        SYSTEM_FIELDS.add(RecordColumn.VERSIONED_MUTABLE_RT_ID.bytes);
        SYSTEM_FIELDS.add(RecordColumn.VERSIONED_MUTABLE_RT_VERSION.bytes);
    }


    private TypeManager typeManager;
    private IdGenerator idGenerator;
    private RecordFactory recordFactory;


    public RecordDecoder(TypeManager typeManager, IdGenerator idGenerator, RecordFactory recordFactory) {
        this.typeManager = typeManager;
        this.idGenerator = idGenerator;
        this.recordFactory = recordFactory;
    }


    /**
     * Decode the given HBase result into a Lily Record assuming it contains the state of the latest version of
     * the record (if the record has any versions, otherwise it only contains non-versioned fields).
     */
    public Record decodeRecord(Result result) throws InterruptedException, RepositoryException {
        Long latestVersion = getLatestVersion(result);
        RecordId recordId = idGenerator.fromBytes(result.getRow());
        return decodeRecord(recordId, latestVersion, null, result, typeManager.getFieldTypesSnapshot());
    }


    /**
     * Gets the requested version of the record (fields and recordTypes) from the Result object.
     */
    public Record decodeRecord(RecordId recordId, Long requestedVersion, ReadContext readContext,
                               Result result, FieldTypes fieldTypes) throws InterruptedException, RepositoryException {
        Record record = recordFactory.newRecord(recordId);
        record.setVersion(requestedVersion);


        // If the version is null, this means the record has no version an thus only contains non-versioned fields (if any)
        // All non-versioned fields are stored at version 1, so we extract the fields at version 1
        Long versionToRead = (requestedVersion == null) ? 1L : requestedVersion;


        // Get a map of all fields with their values for each (cell-)version
        NavigableMap<byte[], NavigableMap<Long, byte[]>> mapWithVersions = result.getMap().get(RecordCf.DATA.bytes);
        if (mapWithVersions != null) {
            // Iterate over all columns
            for (Map.Entry<byte[], NavigableMap<Long, byte[]>> columnWithAllVersions : mapWithVersions.entrySet()) {
                // Check if the retrieved column is from a data field, and not a system field
                byte[] key = columnWithAllVersions.getKey();
                if (key[0] == LilyHBaseSchema.RecordColumn.DATA_PREFIX) {
                    NavigableMap<Long, byte[]> allValueVersions = columnWithAllVersions.getValue();
                    // Get the entry for the version (can be a cell with a lower version number if the field was not changed)
                    Map.Entry<Long, byte[]> ceilingEntry = allValueVersions.ceilingEntry(versionToRead);
                    if (ceilingEntry != null) {
                        // Extract and decode the value of the field
                        ExtractedField field =
                                extractField(key, ceilingEntry.getValue(), readContext, fieldTypes);
                        if (field != null) {
                            record.setField(field.type.getName(), field.value);
                            if (field.metadata != null) {
                                record.setMetadata(field.type.getName(), field.metadata);
                            }
                        }
                    }
                }
            }
        }


        for (Scope scope : Scope.values()) {
            Pair<SchemaId, Long> recordTypePair =  requestedVersion == null ? extractLatestRecordType(scope, result) :
                extractVersionRecordType(scope, result, requestedVersion);
            if (recordTypePair != null) {
                // We read the last version of the record type, though it might seem more logical to read
                // the exact version stored in the record. However, this doesn't make any difference, since
                // we just need the name of the record type, which is the same for all versions. The reason
                // why reading the last version is preferred is because at the time of this writing, only
                // the last version of record types is cached, and this makes an enormous difference in
                // record read or scan speed.
                QName recordTypeName =
                        typeManager.getRecordTypeById(recordTypePair.getV1(), null).getName();
                record.setRecordType(scope, recordTypeName, recordTypePair.getV2());
                if (readContext != null) {
                    readContext.setRecordTypeId(scope, recordTypePair.getV1());
                }
            }
        }


        return record;
    }


    /**
     * Version of #decodeRecord() which returns an {@link IdRecord} instance rather than a {@link Record} instance.
     */
    public IdRecord decodeRecordWithIds(Result result) throws InterruptedException, RepositoryException {
        Long latestVersion = getLatestVersion(result);
        RecordId recordId = idGenerator.fromBytes(result.getRow());
        return decodeRecordWithIds(recordId, latestVersion, result, typeManager.getFieldTypesSnapshot());
    }


    /**
     * Version of #decodeRecord() which returns an {@link IdRecord} instance rather than a {@link Record} instance.
     */
    public IdRecord decodeRecordWithIds(RecordId recordId, Long requestedVersion, Result result, FieldTypes fieldTypes)
            throws InterruptedException, RepositoryException {
        final ReadContext readContext = new ReadContext();
        final Record record = decodeRecord(recordId, requestedVersion, readContext, result, fieldTypes);


        Map<SchemaId, QName> idToQNameMapping = new HashMap<SchemaId, QName>();
        for (FieldType fieldType : readContext.getFieldTypes().values()) {
            idToQNameMapping.put(fieldType.getId(), fieldType.getName());
        }


        Map<Scope, SchemaId> recordTypeIds = new EnumMap<Scope, SchemaId>(Scope.class);
        for (Map.Entry<Scope, SchemaId> entry : readContext.getRecordTypes().entrySet()) {
            recordTypeIds.put(entry.getKey(), entry.getValue());
        }


        return new IdRecordImpl(record, idToQNameMapping, recordTypeIds);
    }


    /**
     * Gets the requested version of the record (fields and recordTypes) from the Result object.
     * This method is optimized for reading multiple versions.
     */
    public List<Record> decodeRecords(RecordId recordId, List<Long> requestedVersions, Result result,
                                      FieldTypes fieldTypes) throws InterruptedException, RepositoryException {
        Map<Long, Record> records = new HashMap<Long, Record>(requestedVersions.size());
        Map<Long, Set<Scope>> scopes = new HashMap<Long, Set<Scope>>(requestedVersions.size());
        for (Long requestedVersion : requestedVersions) {
            Record record = recordFactory.newRecord(recordId);
            record.setVersion(requestedVersion);
            records.put(requestedVersion, record);
            scopes.put(requestedVersion, EnumSet.noneOf(Scope.class));
        }


        // Get a map of all fields with their values for each (cell-)version
        NavigableMap<byte[], NavigableMap<Long, byte[]>> mapWithVersions = result.getMap().get(RecordCf.DATA.bytes);
        if (mapWithVersions != null) {


            // Iterate over all columns
            for (Map.Entry<byte[], NavigableMap<Long, byte[]>> columnWithAllVersions : mapWithVersions.entrySet()) {


                // Check if the retrieved column is from a data field, and not a system field
                byte[] key = columnWithAllVersions.getKey();
                if (key[0] == RecordColumn.DATA_PREFIX) {
                    NavigableMap<Long, byte[]> allValueVersions = columnWithAllVersions.getValue();


                    // Keep the last decoded field value, to avoid decoding the same value again and again if unchanged
                    // between versions (sparse storage). Note that lastDecodedField can be null, in case of a field
                    // deletion marker
                    Long lastDecodedFieldVersion = null;
                    ExtractedField lastDecodedField = null;
                    for (Long versionToRead : requestedVersions) {
                        Record record = records.get(versionToRead);
                        // Get the entry for the version (can be a cell with a lower version number if the field was
                        // not changed)
                        Map.Entry<Long, byte[]> ceilingEntry = allValueVersions.ceilingEntry(versionToRead);
                        if (ceilingEntry != null) {
                            if (lastDecodedFieldVersion == null ||
                                    !lastDecodedFieldVersion.equals(ceilingEntry.getKey())) {
                                // Not yet decoded, do it now
                                lastDecodedFieldVersion = ceilingEntry.getKey();
                                lastDecodedField = extractField(key, ceilingEntry.getValue(), null, fieldTypes);
                            }
                            if (lastDecodedField != null) {
                                record.setField(lastDecodedField.type.getName(), lastDecodedField.value);
                                scopes.get(versionToRead).add(lastDecodedField.type.getScope());
                                if (lastDecodedField.metadata != null) {
                                    record.setMetadata(lastDecodedField.type.getName(), lastDecodedField.metadata);
                                }
                            }
                        }
                    }
                }
            }
        }


        // Add the record types to the records
        for (Map.Entry<Long, Record> recordEntry : records.entrySet()) {
            Set<Scope> scopesForVersion = scopes.get(recordEntry.getKey());


            // We're only adding the record types if any fields were read.
            if (!scopesForVersion.isEmpty()) {
                // Get the record type for each scope


                // At least the non-versioned record type should be read since that is also the record type of the whole record
                scopesForVersion.add(Scope.NON_VERSIONED);
                for (Scope scope : scopesForVersion) {
                    Pair<SchemaId, Long> recordTypePair = extractVersionRecordType(scope, result, recordEntry.getKey());
                    if (recordTypePair != null) {
                        RecordType recordType =
                                typeManager.getRecordTypeById(recordTypePair.getV1(), recordTypePair.getV2());
                        recordEntry.getValue().setRecordType(scope, recordType.getName(), recordType.getVersion());
                    }
                }
            }
        }


        return new ArrayList<Record>(records.values());
    }


    private static class ExtractedField {
        FieldType type;
        Object value;
        Metadata metadata;


        ExtractedField(FieldType type, Object value, Metadata metadata) {
            this.type = type;
            this.value = value;
            this.metadata = metadata;
        }
    }


    private ExtractedField extractField(byte[] key, byte[] prefixedValue, ReadContext context,
                                                 FieldTypes fieldTypes)
            throws RepositoryException, InterruptedException {
        byte flags = prefixedValue[0];
        if (FieldFlags.isDeletedField(flags)) {
            return null;
        }
        FieldType fieldType = fieldTypes.getFieldType(new SchemaIdImpl(Bytes.tail(key, key.length - 1)));
        if (context != null) {
            context.addFieldType(fieldType);
        }
        ValueType valueType = fieldType.getValueType();


        Metadata metadata = null;
        int metadataSpace = 0; // space taken up by metadata (= metadata itself + length suffix)
        int metadataEncodingVersion = FieldFlags.getFieldMetadataVersion(flags);
        if (metadataEncodingVersion == 0) {
            // there is no metadata
        } else if (metadataEncodingVersion == 1) {
            int metadataSize = Bytes.toInt(prefixedValue, prefixedValue.length - Bytes.SIZEOF_INT, Bytes.SIZEOF_INT);
            metadataSpace = metadataSize + Bytes.SIZEOF_INT;
            metadata = MetadataSerDeser.read(
                    new DataInputImpl(prefixedValue, prefixedValue.length - metadataSpace, metadataSize));
        } else {
            throw new RuntimeException("Unsupported field metadata encoding version: " + metadataEncodingVersion);
        }


        Object value = valueType.read(new DataInputImpl(prefixedValue, FieldFlags.SIZE_OF_FIELD_FLAGS,
                prefixedValue.length - FieldFlags.SIZE_OF_FIELD_FLAGS - metadataSpace));


        return new ExtractedField(fieldType, value, metadata);
    }


    /**
     * Extracts the latest record type for a specific scope from the Result.
     */
    private Pair<SchemaId, Long> extractLatestRecordType(Scope scope, Result result) {
        byte[] idBytes = getLatest(result, RecordCf.DATA.bytes, RECORD_TYPE_ID_QUALIFIERS.get(scope));
        byte[] versionBytes = getLatest(result, RecordCf.DATA.bytes, RECORD_TYPE_VERSION_QUALIFIERS.get(scope));
        if ((idBytes == null || idBytes.length == 0) || (versionBytes == null || versionBytes.length == 0)) {
            return null; // No record type was found
        }
        return new Pair<SchemaId, Long>(new SchemaIdImpl(idBytes), Bytes.toLong(versionBytes));
    }


    /**
     * Gets the latest value for a family/qualifier from a Result object, using its
     * getMap(). Most of the time this will be more efficient than using
     * Result.getValue() since the map will need to built anyway when reading the
     * record.
     */
    public byte[] getLatest(Result result, byte[] family, byte[] qualifier) {
        NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> map = result.getMap();
        if (map == null) {
            return null;
        }


        NavigableMap<byte[], NavigableMap<Long, byte[]>> qualifiers = map.get(family);
        if (qualifiers == null) {
            return null;
        }


        NavigableMap<Long, byte[]> timestamps = qualifiers.get(qualifier);
        if (timestamps == null) {
            return null;
        }


        Map.Entry<Long, byte[]> entry = timestamps.lastEntry();
        return entry == null ? null : entry.getValue();
    }


    /**
     * Collects the FieldType and RecordType objects used during reading a record.
     * Allows to reliably map QName's to types even if these would have been renamed during the reading
     * operation.
     */
    public static class ReadContext {
        private Map<SchemaId, FieldType> fieldTypes = new HashMap<SchemaId, FieldType>();
        private Map<Scope, SchemaId> recordTypes = new EnumMap<Scope, SchemaId>(Scope.class);


        public void addFieldType(FieldType fieldType) {
            fieldTypes.put(fieldType.getId(), fieldType);
        }


        public void setRecordTypeId(Scope scope, SchemaId recordTypeId) {
            recordTypes.put(scope, recordTypeId);
        }


        public Map<Scope, SchemaId> getRecordTypes() {
            return recordTypes;
        }


        public Map<SchemaId, FieldType> getFieldTypes() {
            return fieldTypes;
        }
    }


    /**
     * Extracts the record type for a specific version and a specific scope
     */
    public Pair<SchemaId, Long> extractVersionRecordType(Scope scope, Result result, Long version) {
        NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> allVersionsMap = result.getMap();
        NavigableMap<byte[], NavigableMap<Long, byte[]>> allColumnsAllVersionsMap = allVersionsMap
                .get(RecordCf.DATA.bytes);


        byte[] recordTypeIdColumnName = RECORD_TYPE_ID_QUALIFIERS.get(scope);
        byte[] recordTypeVersionColumnName = RECORD_TYPE_VERSION_QUALIFIERS.get(scope);
        // Get recordTypeId
        Map.Entry<Long, byte[]> idCeilingEntry = null;
        NavigableMap<Long, byte[]> recordTypeIdMap = allColumnsAllVersionsMap.get(recordTypeIdColumnName);
        if (recordTypeIdMap != null) {
            idCeilingEntry = recordTypeIdMap.ceilingEntry(version);
        }
        SchemaId recordTypeId;
        if (idCeilingEntry == null) {
            return null; // No record type was found
        }
        recordTypeId = new SchemaIdImpl(idCeilingEntry.getValue());


        // Get recordTypeVersion
        Long recordTypeVersion;
        Map.Entry<Long, byte[]> versionCeilingEntry = null;
        NavigableMap<Long, byte[]> recordTypeVersionMap = allColumnsAllVersionsMap.get(recordTypeVersionColumnName);
        if (recordTypeVersionMap != null) {
            versionCeilingEntry = recordTypeVersionMap.ceilingEntry(version);
        }
        if (versionCeilingEntry == null) {
            return null; // No record type was found, we should never get here: if there is an id there should also be a version
        }
        recordTypeVersion = Bytes.toLong(versionCeilingEntry.getValue());
        return new Pair<SchemaId, Long>(recordTypeId, recordTypeVersion);
    }


    // Fixme consider returning  Optional<Long>
    public Long getLatestVersion(Result result) {
        byte[] latestVersionBytes = getLatest(result, RecordCf.DATA.bytes, LilyHBaseSchema.RecordColumn.VERSION.bytes);
        return latestVersionBytes != null ? Bytes.toLong(latestVersionBytes) : null;
    }


    public static void addSystemColumnsToGet(Get get) {
        for (byte[] field : SYSTEM_FIELDS) {
            get.addColumn(RecordCf.DATA.bytes, field);
        }
    }


    public static void addSystemColumnsToScan(Scan scan) {
        for (byte[] field : SYSTEM_FIELDS) {
            scan.addColumn(RecordCf.DATA.bytes, field);
        }
    }
}
Source Code of org.lilyproject.repository.impl.RecordDecoder

Related Classes of org.lilyproject.repository.impl.RecordDecoder