/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.schema.layout;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.google.common.base.Joiner;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import com.google.common.collect.ImmutableBiMap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.avro.Schema;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.schema.InternalKijiError;
import org.kiji.schema.KijiColumnName;
import org.kiji.schema.KijiSchemaTable;
import org.kiji.schema.NoSuchColumnException;
import org.kiji.schema.avro.CellSchema;
import org.kiji.schema.avro.ColumnDesc;
import org.kiji.schema.avro.ColumnNameTranslator;
import org.kiji.schema.avro.FamilyDesc;
import org.kiji.schema.avro.LocalityGroupDesc;
import org.kiji.schema.avro.RowKeyComponent;
import org.kiji.schema.avro.RowKeyEncoding;
import org.kiji.schema.avro.RowKeyFormat;
import org.kiji.schema.avro.RowKeyFormat2;
import org.kiji.schema.avro.SchemaStorage;
import org.kiji.schema.avro.SchemaType;
import org.kiji.schema.avro.TableLayoutDesc;
import org.kiji.schema.impl.Versions;
import org.kiji.schema.layout.KijiTableLayout.LocalityGroupLayout.FamilyLayout;
import org.kiji.schema.layout.KijiTableLayout.LocalityGroupLayout.FamilyLayout.ColumnLayout;
import org.kiji.schema.layout.impl.ColumnId;
import org.kiji.schema.util.FromJson;
import org.kiji.schema.util.Hasher;
import org.kiji.schema.util.KijiNameValidator;
import org.kiji.schema.util.ProtocolVersion;
import org.kiji.schema.util.ResourceUtils;
import org.kiji.schema.util.ToJson;
/**
* Layout of a Kiji table.
*
* <p>
* Kiji uses the term <i>layout</i> to describe the structure of a table.
* Kiji does not use the term <i>schema</i> to avoid confusion with Avro schemas or XML schemas.
* </p>
*
* <p>
* KijiTableLayout wraps a layout descriptor represented as a
* {@link org.kiji.schema.avro.TableLayoutDesc TableLayoutDesc} Avro record.
* KijiTableLayout provides strict validation and accessors to navigate through the layout.
* </p>
*
* <p>
* KijiTableLayouts can be created via one of two methods: from a concrete layout with
* {@link #newLayout(TableLayoutDesc)}, or as a layout update from a preexisting
* KijiTableLayout, with {@link #createUpdatedLayout(TableLayoutDesc,KijiTableLayout)}.
* For the format requirements of layout descriptors for these methods, see the
* "Layout descriptors" section below.
* </p>
*
* <h1>Overall structure</h1>
* <p>At the top-level, a table contains:
* <ul>
* <li>the table name and description;</li>
* <li>how row keys are encoded;</li>
* <li>the table locality groups.</li>
* </ul>
* </p>
*
* <p>Each locality group has:
* <ul>
* <li>a primary name, unique within the table, a description and some name aliases;</li>
* <li>whether the data is to be stored in memory or on disk;</li>
* <li>data retention lifetime;</li>
* <li>maximum number of versions to keep;</li>
* <li>type of compression;</li>
* <li>column families stored in this locality group</li>
* </ul>
* </p>
*
* <p>Each column family has:
* <ul>
* <li>a primary name, globally unique within the table,
* a description and some name aliases;</li>
* <li>for map-type families, the Avro schema of the cell values;</li>
* <li>for group-type families, the collection of columns in the group.</li>
* </ul>
* </p>
*
* <p>Each column in a group-type family has:
* <ul>
* <li>a primary name, unique within the family, a description and some name aliases;</li>
* <li>an Avro schema.</li>
* </ul>
* </p>
*
* <h1>Layout descriptors</h1>
*
* Layout descriptors are represented using
* {@link org.kiji.schema.avro.TableLayoutDesc TableLayoutDesc} Avro records.
* Layout descriptors come in two flavors: <i>concrete layouts</i> and <i>layout updates</i>.
*
* <h2><i>Concrete layout descriptors</i></h2>
* A concrete layout descriptors is an absolute, standalone description of a table layout, which
* does not reference or build upon any previous version of the table layout. Column IDs have
* been assigned to all locality groups, families and columns.
*
* <p> Names of tables, locality groups, families and column qualifiers must be valid identifiers.
* Name validation occurs in {@link org.kiji.schema.util.KijiNameValidator KijiNameValidator}.
*
* <h3>Validation rules</h3>
*
* <ul>
* <li> Table names, locality group names, family names, and column names in a group-type family
* must be valid identifiers (no punctuation or symbols).
* Note: map-type family qualifiers are free-form, but do never appear in a table layout.
* <li> Locality group names and aliases must be unique within the table.
* <li> Family names and aliases must be unique within the table.
* <li> Group-type family qualifiers must be unique within the family.
* </ul>
*
* <h2><i>Layout update descriptors</i></h2>
* A table layout update descriptor builds on a reference table layout, and describes layout
* modification to apply on the reference layout.
* The reference table layout is specified by writing the ID of the reference layout
* ({@link TableLayoutDesc#layout_id}) into the {@link TableLayoutDesc#reference_layout}.
* This mechanism prevents race conditions when updating the layout of a table.
* The first layout of a newly created table has no reference layout.
*
* <p>During a layout update, the user may delete or declare new locality groups, families and/or
* columns, or modify existing entities, by specifying the new layout. Update validation rules
* are enforced to ensure compatibility (see Validation rules for updates below).
*
* <p>Entities may also be renamed, as long as uniqueness requirements are met.
* Primary name updates must be explicitly annotated by setting the {@code renamedFrom} field of
* the entity being renamed.
* The name of a table cannot be changed.
*
* <p>For example, suppose the reference layout contained one family {@code Info}, containing a
* column {@code Name}, and the user wishes to add a new {@code Address} column to the
* {@code Info} family.
* To perform this update, the user would create a layout update by starting with the existing
* layout, setting the {@code reference_layout} field to the {@code layout_id} of the
* current layout, and adding a new {@link ColumnDesc} record describing the {@code Address}
* column to the the {@code columns} field of the {@link FamilyDesc} for the {@code Info} family.
*
* <p>The result of applying a layout update on top of a concrete reference layout is a new
* concrete layout.
*
* <h3> Validation rules for updates </h3>
*
* <p> Updates are subject to the same restrictions as concrete layout descriptors.
* In addition:</p>
*
* <ul>
* <li> The type of a family (map-type or group-type) cannot be changed.
* <li> A family cannot be moved into a different locality group.
* <li> The encoding of Kiji cells (hash, UID, final) cannot be modified.
* <li> The schema of a Kiji cell can only be changed to a schema that is compatible with
* all the former schemas of the column. Schema compatibility requires that the new schema
* allows decoding all former schemas associated to the column or the map-type family.
* </ul>
*
* <h1>Row keys encoding</h1>
*
* A row in a Kiji table is identified by its Kiji row key. Kiji row keys are converted into HBase
* row keys according to the row key encoding specified in the table layout:
* <ul>
* <li> Raw encoding: the user has direct control over the encoding of row keys in the HBase
* table. In other words, the HBase row key is exactly the Kiji row key. These are used
* when the user would like to use arrays of bytes as row keys.
* </li>
* <li> Hashed: Deprecated! The HBase row key is computed as a hash of a single String or
* byte array component.
* </li>
* <li> Hash-prefixed: the HBase row key is computed as the concatenation of the hash of a
* single String or byte array component.
* </li>
* <li> Formatted: the row key is comprised of one or more components. Each component can be
* a string, a number or a hash of another component. The user will specify the size
* of this hash. The user also specifies the actual order of the components in the key.
* </li>
* </ul>
*
* Hashing allows to spread the rows evenly across all the regions in the table. Specifying the size
* of the hash gives the user fine grained control of how the data will be distributed.
*
* <h1>Cell schema</h1>
*
* Kiji cells are encoded according to a schema specified via
* {@link org.kiji.schema.avro.CellSchema CellSchema} Avro records.
* Kiji provides various cell encoding schemes:
* <ul>
* <li> Hash: each Kiji cell is encoded as a hash of the Avro schema, followed by the binary
* encoding of the Avro value.
* </li>
* <li> UID: each Kiji cell is encoded as the unique ID of the Avro schema, followed by the
* binary encoding of the Avro value.
* </li>
* <li> Final: each Kiji cell is encoded as the binary encoding of the Avro value.
* </li>
* </ul>
* See {@link org.kiji.schema.impl.AvroCellEncoder KijiCellEncoder}
* and {@link org.kiji.schema.impl.AvroCellDecoder KijiCellDecoder}
* for more implementation details.
*
* <h1>Column IDs</h1>
*
* Kiji allows the column names to be represented on HBase in multiple modes via
* {@link org.kiji.schema.avro.ColumnNameTranslator ColumnNameTranslator} Avro enumeration.
* By default we use the shortened Kiji column name translation due to space efficiency.
* Depending on compatability requirements with other HBase tools it may be desirable to use the
* IDENTITY or HBASE_NATIVE column name translators.
*
* <h2>SHORT Kiji column name translation:</h2>
* For storage efficiency purposes, Kiji family and column names are translated into short
* HBase column names by default.
* This translation happens in
* {@link org.kiji.schema.layout.impl.hbase.ShortColumnNameTranslator ShortColumnNameTranslator}
* and relies on
* {@link org.kiji.schema.layout.impl.ColumnId ColumnId}.
* Column IDs are assigned automatically by KijiTableLayout.
* The user may specify column IDs manually. KijiTableLayout checks the consistency of column IDs.
*
* <p>Column IDs cannot be changed (a column ID change is equivalent to deleting the existing column
* and then re-creating it as a new empty column).
*
* <h2>IDENTITY Kiji column name translation:</h2>
* For compatibility with other HBase tools, Kiji family and column names can be written to HBase
* directly.
* This translation happens in
* {@link org.kiji.schema.layout.impl.hbase.IdentityColumnNameTranslator}
* In this mode:
* <ul>
* <li>Kiji locality groups are translated into HBase families.</li>
* <li>Kiji column families and qualifiers are combined to form the HBase
* qualifier("family:qualifier").</li>
* </ul>
*
* <h2>HBASE_NATIVE Kiji column name translation:</h2>
* For compatibility with existing HBase tables, the notion of a Kiji locality group can be
* ignored, mapping Kiji family and column names directly to their HBase equivalents.
* This translation happens in
* {@link org.kiji.schema.layout.impl.hbase.HBaseNativeColumnNameTranslator}
* In this mode:
* <ul>
* <li>Kiji locality groups and column families are translated into HBase families.</li>
* <li>Additionally, Kiji locality groups must match the Kiji column families. This has the
* side effect of requiring a one to one mapping between the Kiji locality groups and column
* families.</li>
* <li>Kiji column qualifiers are combined to form the HBase qualifier.</li>
* </ul>
*
*/
@ApiAudience.Public
@ApiStability.Evolving
public final class KijiTableLayout {
private static final Logger LOG = LoggerFactory.getLogger(KijiTableLayout.class);
// ProtocolVersions specifying when different features were added to layout functionality.
/** All layout versions must use the format 'kiji-x.y' to specify what version they use. */
private static final String LAYOUT_PROTOCOL_NAME = "layout";
/**
* Returns the maximum layout version supported.
*
* @return the maximum layout version recognized by this version of Kiji.
*/
public static ProtocolVersion getMaxSupportedLayoutVersion() {
return Versions.MAX_LAYOUT_VERSION;
}
/**
* Returns the minimum layout version supported.
*
* @return the minimum layout version recognized by this version of Kiji.
*/
public static ProtocolVersion getMinSupportedLayoutVersion() {
return Versions.MIN_LAYOUT_VERSION;
}
/** Concrete layout of a locality group. */
@ApiAudience.Public
public final class LocalityGroupLayout {
/** Concrete layout of a family. */
@ApiAudience.Public
public final class FamilyLayout {
/** Concrete layout of a column. */
@ApiAudience.Public
public final class ColumnLayout {
/** Column layout descriptor. */
private final ColumnDesc mDesc;
/** Column name and aliases. */
private final Set<String> mNames;
/** Column ID. */
private ColumnId mId = null;
/**
* Builds a new column layout instance from a descriptor.
*
* @param desc Column descriptor.
* @param reference Optional reference layout, or null.
* @throws InvalidLayoutException if the layout is invalid or inconsistent.
*/
private ColumnLayout(ColumnDesc desc, ColumnLayout reference)
throws InvalidLayoutException {
mDesc = Preconditions.checkNotNull(desc);
final Set<String> names = Sets.newHashSet();
names.add(desc.getName());
names.addAll(desc.getAliases());
mNames = ImmutableSet.copyOf(names);
if (!isValidName(desc.getName())) {
throw new InvalidLayoutException(String.format(
"Invalid column name: '%s'.", desc.getName()));
}
for (String name : mNames) {
if (!isValidAlias(name)) {
throw new InvalidLayoutException(String.format(
"Invalid column alias: '%s'.", name));
}
}
if (desc.getId() > 0) {
mId = new ColumnId(desc.getId());
}
if (reference != null) {
if ((mId != null) && !mId.equals(reference.getId())) {
throw new InvalidLayoutException(String.format(
"Descriptor for column '%s' has ID %s but reference ID is %s.",
getName(), mId, reference.getId()));
}
mId = reference.getId();
desc.setId(mId.getId());
}
// Force validation of schema:
final CellSchema referenceSchema =
(null != reference) ? reference.getDesc().getColumnSchema() : null;
validateCellSchema(mLayoutVersion, mDesc.getColumnSchema(), referenceSchema);
}
/** @return A copy of the Avro descriptor for this column. */
public ColumnDesc getDesc() {
return ColumnDesc.newBuilder(mDesc).build();
}
/** @return the primary name for the column. */
public String getName() {
return mDesc.getName();
}
/** @return the name and aliases for the column. */
public Set<String> getNames() {
return mNames;
}
/** @return the ID associated to this column. */
public ColumnId getId() {
return mId;
}
/**
* Assigns the ID of this column.
*
* @param cid the ID of the column.
* @return this column.
*/
private ColumnLayout setId(ColumnId cid) {
Preconditions.checkArgument(cid.getId() >= 1);
Preconditions.checkState(null == mId);
mId = cid;
mDesc.setId(cid.getId());
return this;
}
/** @return the family this column belongs to. */
public FamilyLayout getFamily() {
return FamilyLayout.this;
}
} // class ColumnLayout
// -------------------------------------------------------------------------------------------
/** Family layout descriptor. */
private final FamilyDesc mDesc;
/** Family name and aliases. */
private final Set<String> mNames;
/** Columns in the family. */
private final ImmutableList<ColumnLayout> mColumns;
/** Map column qualifier name (no aliases) to column layout. */
private final ImmutableMap<String, ColumnLayout> mColumnMap;
/** Bidirectional mapping between column IDs and column names (no aliases). */
private final BiMap<ColumnId, String> mColumnIdNameMap;
/** Family ID. */
private ColumnId mId = null;
// CSOFF: MethodLengthCheck
/**
* Builds a new family layout instance.
*
* @param familyDesc Descriptor of the family.
* @param reference Optional reference family layout, or null.
* @throws InvalidLayoutException if the layout is invalid or inconsistent.
*/
private FamilyLayout(FamilyDesc familyDesc, FamilyLayout reference)
throws InvalidLayoutException {
mDesc = Preconditions.checkNotNull(familyDesc);
// Ensure the array of columns is mutable:
mDesc.setColumns(Lists.newArrayList(mDesc.getColumns()));
if (!mDesc.getColumns().isEmpty() && (null != mDesc.getMapSchema())) {
throw new InvalidLayoutException(String.format(
"Invalid family '%s' with both map-type and columns",
getName()));
}
final Set<String> familyNames = Sets.newHashSet();
familyNames.add(familyDesc.getName());
familyNames.addAll(familyDesc.getAliases());
mNames = ImmutableSet.copyOf(familyNames);
if (!isValidName(familyDesc.getName())) {
throw new InvalidLayoutException(String.format(
"Invalid family name: '%s'.", familyDesc.getName()));
}
for (String name : mNames) {
if (!isValidAlias(name)) {
throw new InvalidLayoutException(String.format(
"Invalid family alias: '%s'.", name));
}
}
if (familyDesc.getId() > 0) {
mId = new ColumnId(familyDesc.getId());
}
if (reference != null) {
if ((mId != null) && !mId.equals(reference.getId())) {
throw new InvalidLayoutException(String.format(
"Descriptor for family '%s' has ID %s but reference ID is %s.",
getName(), mId, reference.getId()));
}
mId = reference.getId();
familyDesc.setId(mId.getId());
// Cannot change family type (group-type vs map-type):
if (reference.isMapType() != this.isMapType()) {
throw new InvalidLayoutException(String.format(
"Invalid layout update for family '%s' from reference type %s to type %s.",
getName(),
reference.isMapType() ? "map" : "group",
this.isMapType() ? "map" : "group"));
}
}
if (this.isMapType()) {
// Force validation of schema:
final CellSchema referenceSchema =
(null != reference) ? reference.getDesc().getMapSchema() : null;
validateCellSchema(mLayoutVersion, mDesc.getMapSchema(), referenceSchema);
}
// Build columns:
/**
* Map of columns from the reference layout.
* Entries are removed as they are processed and linked to column descriptors.
* At the end of the process, this map must be empty.
*/
final BiMap<String, ColumnId> refCIdMap = (reference != null)
? HashBiMap.create(reference.getColumnIdNameMap().inverse())
: HashBiMap.<String, ColumnId>create();
final List<ColumnLayout> columns = Lists.newArrayList();
final Map<String, ColumnLayout> columnMap = Maps.newHashMap();
/** Map of columns in the new layout. */
final BiMap<ColumnId, String> idMap = HashBiMap.create();
/** Columns with no ID assigned yet. */
final List<ColumnLayout> unassigned = Lists.newArrayList();
final Iterator<ColumnDesc> itColumnDesc = familyDesc.getColumns().iterator();
while (itColumnDesc.hasNext()) {
final ColumnDesc columnDesc = itColumnDesc.next();
final boolean isRename = (columnDesc.getRenamedFrom() != null);
final String refCName = isRename ? columnDesc.getRenamedFrom() : columnDesc.getName();
columnDesc.setRenamedFrom(null);
if (isRename && (reference == null)) {
throw new InvalidLayoutException(String.format(
"Invalid renaming: cannot find reference family for column '%s:%s'.",
getName(), refCName));
}
final ColumnLayout refCLayout =
(reference != null) ? reference.getColumnMap().get(refCName) : null;
if (isRename && (refCLayout == null)) {
throw new InvalidLayoutException(String.format(
"Invalid renaming: cannot find column '%s:%s' in reference family.",
getName(), refCName));
}
final ColumnId refCId = refCIdMap.remove(refCName);
if (columnDesc.getDelete()) {
if (refCId == null) {
throw new InvalidLayoutException(String.format(
"Deleted column '%s:%s' does not exist in reference layout.",
mDesc.getName(), refCName));
}
itColumnDesc.remove();
continue;
}
final ColumnLayout cLayout = new ColumnLayout(columnDesc, refCLayout);
columns.add(cLayout);
for (String columnName : cLayout.getNames()) {
if (null != columnMap.put(columnName, cLayout)) {
throw new InvalidLayoutException(String.format(
"Family '%s' contains duplicate column qualifier '%s'.",
getName(), columnName));
}
}
if (cLayout.getId() != null) {
final String previous = idMap.put(cLayout.getId(), cLayout.getName());
Preconditions.checkState(previous == null,
String.format("Duplicate column ID '%s' associated to '%s' and '%s'.",
cLayout.getId(), cLayout.getName(), previous));
} else {
unassigned.add(cLayout);
}
}
if (!refCIdMap.isEmpty()) {
throw new InvalidLayoutException(String.format(
"Descriptor for family '%s' is missing columns: %s.",
getName(), Joiner.on(",").join(refCIdMap.keySet())));
}
mColumns = ImmutableList.copyOf(columns);
mColumnMap = ImmutableMap.copyOf(columnMap);
// Assign IDs to columns, build ID maps
int nextColumnId = 1;
for (ColumnLayout column : unassigned) {
Preconditions.checkState(column.getId() == null);
while (true) {
final ColumnId columnId = new ColumnId(nextColumnId);
nextColumnId += 1;
if (!idMap.containsKey(columnId)) {
column.setId(columnId);
idMap.put(columnId, column.getName());
break;
}
}
}
mColumnIdNameMap = ImmutableBiMap.copyOf(idMap);
}
/** @return the Avro descriptor for this family. */
public FamilyDesc getDesc() {
return FamilyDesc.newBuilder(mDesc).build();
}
/** @return the primary name for the family. */
public String getName() {
return mDesc.getName();
}
/** @return the family name and aliases. */
public Set<String> getNames() {
return mNames;
}
/** @return the column ID assigned to this family. */
public ColumnId getId() {
return mId;
}
/**
* Assigns the ID of this family.
*
* @param cid the ID of the family.
* @return this column.
*/
private FamilyLayout setId(ColumnId cid) {
Preconditions.checkArgument(cid.getId() >= 1);
Preconditions.checkState(null == mId);
mId = cid;
mDesc.setId(cid.getId());
return this;
}
/** @return the columns in this family. */
public Collection<ColumnLayout> getColumns() {
return mColumns;
}
/** @return the mapping from column names (no aliases) to column layouts. */
public Map<String, ColumnLayout> getColumnMap() {
return mColumnMap;
}
/** @return the bidirectional mapping between column names (no aliases) and IDs. */
public BiMap<ColumnId, String> getColumnIdNameMap() {
return mColumnIdNameMap;
}
/** @return the locality group this family belongs to. */
public LocalityGroupLayout getLocalityGroup() {
return LocalityGroupLayout.this;
}
/** @return whether this is a group-type family. */
public boolean isGroupType() {
return mDesc.getMapSchema() == null;
}
/** @return whether this is a map-type family. */
public boolean isMapType() {
return !isGroupType();
}
} // class FamilyLayout
// -------------------------------------------------------------------------------------------
/** Locality group descriptor. */
private final LocalityGroupDesc mDesc;
/** Locality group name and aliases. */
private final ImmutableSet<String> mNames;
/** Families in the locality group. */
private final ImmutableList<FamilyLayout> mFamilies;
/** Map family name or alias to family layout. */
private final ImmutableMap<String, FamilyLayout> mFamilyMap;
/** Bidirectional mapping between family IDs and family names (no alias). */
private final BiMap<ColumnId, String> mFamilyIdNameBiMap;
/** Locality group ID. */
private ColumnId mId = null;
/**
* Constructs a locality group layout.
*
* @param lgDesc Locality group descriptor.
* @param reference Optional reference locality group, or null.
* @throws InvalidLayoutException if the layout is invalid or inconsistent.
*/
private LocalityGroupLayout(LocalityGroupDesc lgDesc, LocalityGroupLayout reference)
throws InvalidLayoutException {
mDesc = Preconditions.checkNotNull(lgDesc);
// Ensure the array of families is mutable:
mDesc.setFamilies(Lists.newArrayList(mDesc.getFamilies()));
// All the recognized names for this locality group:
final Set<String> names = Sets.newHashSet();
names.add(lgDesc.getName());
names.addAll(lgDesc.getAliases());
mNames = ImmutableSet.copyOf(names);
if (!isValidName(lgDesc.getName())) {
throw new InvalidLayoutException(String.format(
"Invalid locality group name: '%s'.", lgDesc.getName()));
}
for (String name : mNames) {
if (!isValidAlias(name)) {
throw new InvalidLayoutException(String.format(
"Invalid locality group alias: '%s'.", name));
}
}
if (lgDesc.getId() > 0) {
mId = new ColumnId(lgDesc.getId());
}
if (mDesc.getTtlSeconds() <= 0) {
throw new InvalidLayoutException(String.format(
"Invalid TTL seconds for locality group '%s': TTL must be positive, got %d.",
getName(), mDesc.getTtlSeconds()));
}
if (mDesc.getMaxVersions() <= 0) {
throw new InvalidLayoutException(String.format(
"Invalid max versions for locality group '%s': max versions must be positive, got %d.",
getName(), mDesc.getMaxVersions()));
}
if (reference != null) {
if ((mId != null) && !mId.equals(reference.getId())) {
throw new InvalidLayoutException(String.format(
"Descriptor for locality group '%s' has ID %s but reference ID is %s.",
getName(), mId, reference.getId()));
}
mId = reference.getId();
lgDesc.setId(mId.getId());
}
// Build families:
/**
* Map of the family IDs from the reference layout.
* Entries are removed as they are linked to the families in the descriptor.
* Eventually, this map must become empty.
*/
final BiMap<String, ColumnId> refFIdMap = (reference != null)
? HashBiMap.create(reference.getFamilyIdNameMap().inverse())
: HashBiMap.<String, ColumnId>create();
final List<FamilyLayout> families = Lists.newArrayList();
final Map<String, FamilyLayout> familyMap = Maps.newHashMap();
/** Map of families in the new layout. */
final BiMap<ColumnId, String> idMap = HashBiMap.create();
/** Families with no ID assigned yet. */
final List<FamilyLayout> unassigned = Lists.newArrayList();
final Iterator<FamilyDesc> itFamilyDesc = lgDesc.getFamilies().iterator();
while (itFamilyDesc.hasNext()) {
final FamilyDesc familyDesc = itFamilyDesc.next();
final boolean isRename = (familyDesc.getRenamedFrom() != null);
final String refFName = isRename ? familyDesc.getRenamedFrom() : familyDesc.getName();
familyDesc.setRenamedFrom(null);
if (isRename && (reference == null)) {
throw new InvalidLayoutException(String.format(
"Invalid rename: no reference locality group '%s' for family '%s'.",
getName(), refFName));
}
final FamilyLayout refFLayout =
(reference != null) ? reference.getFamilyMap().get(refFName) : null;
if (isRename && (refFLayout == null)) {
throw new InvalidLayoutException(String.format(
"Invalid rename: cannot find reference family '%s' in locality group '%s'.",
refFName, getName()));
}
final ColumnId refFId = refFIdMap.remove(refFName);
if (familyDesc.getDelete()) {
if (refFId == null) {
throw new InvalidLayoutException(String.format(
"Deleted family '%s' unknown in reference locality group '%s'.",
refFName, getName()));
}
itFamilyDesc.remove();
continue;
}
final FamilyLayout fLayout = new FamilyLayout(familyDesc, refFLayout);
families.add(fLayout);
for (String familyName : fLayout.getNames()) {
Preconditions.checkState(familyMap.put(familyName, fLayout) == null,
"Duplicate family name: " + familyName);
}
if (fLayout.getId() != null) {
final String previous = idMap.put(fLayout.getId(), fLayout.getName());
Preconditions.checkState(previous == null,
String.format("Duplicate family ID '%s' associated to '%s' and '%s'.",
fLayout.getId(), fLayout.getName(), previous));
} else {
unassigned.add(fLayout);
}
}
if (!refFIdMap.isEmpty()) {
throw new InvalidLayoutException(String.format(
"Descriptor for locality group '%s' is missing families: %s",
lgDesc.getName(), Joiner.on(",").join(refFIdMap.keySet())));
}
mFamilies = ImmutableList.copyOf(families);
mFamilyMap = ImmutableMap.copyOf(familyMap);
// Assign IDs to families:
int nextFamilyId = 1;
for (FamilyLayout fLayout : unassigned) {
Preconditions.checkState(fLayout.getId() == null);
while (true) {
final ColumnId fId = new ColumnId(nextFamilyId);
nextFamilyId += 1;
if (!idMap.containsKey(fId)) {
fLayout.setId(fId);
idMap.put(fId, fLayout.getName());
break;
}
}
}
mFamilyIdNameBiMap = ImmutableBiMap.copyOf(idMap);
}
/** @return the table layout this locality group belongs to. */
public KijiTableLayout getTableLayout() {
return KijiTableLayout.this;
}
/** @return the Avro descriptor for this locality group. */
public LocalityGroupDesc getDesc() {
return LocalityGroupDesc.newBuilder(mDesc).build();
}
/** @return the locality group primary name. */
public String getName() {
return mDesc.getName();
}
/** @return the locality group name and aliases. */
public Set<String> getNames() {
return mNames;
}
/** @return the ID associated to this locality group. */
public ColumnId getId() {
return mId;
}
/**
* Assigns the ID of the locality group.
*
* @param cid the ID of the locality group.
* @return this locality group.
*/
private LocalityGroupLayout setId(ColumnId cid) {
Preconditions.checkArgument(cid.getId() >= 1);
Preconditions.checkState(null == mId);
mId = cid;
mDesc.setId(cid.getId());
return this;
}
/** @return the families in this locality group, in no particular order. */
public Collection<FamilyLayout> getFamilies() {
return mFamilies;
}
/** @return the mapping from family names and aliases to family layouts. */
public Map<String, FamilyLayout> getFamilyMap() {
return mFamilyMap;
}
/** @return the bidirectional mapping between family names (no alias) and IDs. */
public BiMap<ColumnId, String> getFamilyIdNameMap() {
return mFamilyIdNameBiMap;
}
} // class LocalityGroupLayout
// -----------------------------------------------------------------------------------------------
/** Avro record describing the table layout absolutely (no reference layout required). */
private final TableLayoutDesc mDesc;
/** Version of this table layout. */
private final ProtocolVersion mLayoutVersion;
/** Locality groups in the table, in no particular order. */
private final ImmutableList<LocalityGroupLayout> mLocalityGroups;
/** Map locality group name or alias to locality group layout. */
private final ImmutableMap<String, LocalityGroupLayout> mLocalityGroupMap;
/** Families in the table, in no particular order. */
private final ImmutableList<FamilyLayout> mFamilies;
/** Map family names and aliases to family layout. */
private final ImmutableMap<String, FamilyLayout> mFamilyMap;
/** Bidirectional map between locality group names (no alias) and IDs. */
private final ImmutableBiMap<ColumnId, String> mLocalityGroupIdNameMap;
/** All primary column names in the table (including names for map-type families). */
private final ImmutableSet<KijiColumnName> mColumnNames;
/**
* Optional schema table that allows resolution of Avro schemas.
* The schema table is injected in the CellSpec instances created from this layout.
*/
private KijiSchemaTable mSchemaTable;
/**
* Ensure a row key format (version 1) specified in a layout file is sane.
* @param format The RowKeyFormat created from the layout file for a table.
* @throws InvalidLayoutException If the format is invalid.
*/
private void isValidRowKeyFormat1(RowKeyFormat format) throws InvalidLayoutException {
RowKeyEncoding rowKeyEncoding = format.getEncoding();
if (rowKeyEncoding != RowKeyEncoding.RAW
&& rowKeyEncoding != RowKeyEncoding.HASH
&& rowKeyEncoding != RowKeyEncoding.HASH_PREFIX) {
throw new InvalidLayoutException("RowKeyFormat only supports encodings"
+ "of type RAW, HASH and HASH_PREFIX. Use RowKeyFormat2 instead");
}
if (rowKeyEncoding == RowKeyEncoding.HASH || rowKeyEncoding == RowKeyEncoding.HASH_PREFIX) {
if (format.getHashSize() < 0 || format.getHashSize() > Hasher.HASH_SIZE_BYTES) {
throw new InvalidLayoutException("HASH or HASH_PREFIX row key formats require hash size"
+ "to be between 1 and " + Hasher.HASH_SIZE_BYTES);
}
}
}
/**
* Ensure a row key format (version 2) specified in a layout file is sane.
* @param format The RowKeyFormat2 created from the layout file for a table.
* @throws InvalidLayoutException If the format is invalid.
*/
private void isValidRowKeyFormat2(RowKeyFormat2 format) throws InvalidLayoutException {
// RowKeyFormat2 can only contain RAW or FORMATTED encoding types.
if (format.getEncoding() != RowKeyEncoding.RAW
&& format.getEncoding() != RowKeyEncoding.FORMATTED) {
throw new InvalidLayoutException("RowKeyFormat2 only supports RAW or FORMATTED encoding."
+ "Found " + format.getEncoding().name());
}
// For RAW encoding, ignore the rest of the fields.
if (format.getEncoding() == RowKeyEncoding.RAW) {
return;
}
// At least one primitive component.
if (format.getComponents().size() <= 0) {
throw new InvalidLayoutException("At least 1 component is required in row key format.");
}
if (format.getSalt() == null) {
// SCHEMA-489. The Avro decoder should replace this with a non-null HashSpec object,
// but check here for paranoia.
throw new InvalidLayoutException(
"Null values for RowKeyFormat2.salt are only allowed for RAW encoding.");
}
// Nullable index cannot be the first element or anything greater
// than the components length (number of components).
if (format.getNullableStartIndex() <= 0
|| format.getNullableStartIndex() > format.getComponents().size()) {
throw new InvalidLayoutException("Invalid index for nullable component. The second component"
+ " onwards can be set to null.");
}
// Range scan index cannot be the first element or anything greater
// than the components length (number of components).
if (format.getRangeScanStartIndex() <= 0
|| format.getRangeScanStartIndex() > format.getComponents().size()) {
throw new InvalidLayoutException("Invalid range scan index. Range scans are supported "
+ "starting with the second component.");
}
// If suppress_key_materialization is true, range scans are impossible, as the
// key components will not be stored.
if (format.getSalt().getSuppressKeyMaterialization()
&& format.getRangeScanStartIndex() != format.getComponents().size()) {
throw new InvalidLayoutException("Range scans are not supported if "
+ "suppress_key_materialization is true. Please set range_scan_start_index "
+ "to components.size");
}
Set<String> nameset = new HashSet<String>();
for (RowKeyComponent component: format.getComponents()) {
// ensure names are valid "[a-zA-Z_][a-zA-Z0-9_]*"
if (!isValidName(component.getName())) {
throw new InvalidLayoutException("Names should begin with a letter followed by a "
+ "combination of letters, numbers and underscores.");
}
nameset.add(component.getName());
}
// repeated component names
if (nameset.size() != format.getComponents().size()) {
throw new InvalidLayoutException("Component name already used.");
}
// hash size invalid
if (format.getSalt().getHashSize() <= 0
|| format.getSalt().getHashSize() > Hasher.HASH_SIZE_BYTES) {
throw new InvalidLayoutException("Valid hash sizes are between 1 and "
+ Hasher.HASH_SIZE_BYTES);
}
}
/**
* Computes the effective ProtocolVersion from a layout version string.
*
* <p> Normalizes kiji-1.0 into layout-1.0.0 </p>
*
* @param version Layout version string.
* @return the effective layout ProtocolVersion.
*/
private static ProtocolVersion computeLayoutVersion(String version) {
final ProtocolVersion pversion = ProtocolVersion.parse(version);
if (Objects.equal(pversion, Versions.LAYOUT_KIJI_1_0_0_DEPRECATED)) {
// Deprecated "kiji-1.0" is compatible with "layout-1.0.0"
return Versions.LAYOUT_1_0_0;
} else {
return pversion;
}
}
// CSOFF: MethodLengthCheck
/**
* Constructs a KijiTableLayout from an Avro descriptor and an optional reference layout.
*
* @param desc Avro layout descriptor (relative to the reference layout).
* @param reference Optional reference layout, or null.
* @throws InvalidLayoutException if the descriptor is invalid or inconsistent wrt reference.
*/
private KijiTableLayout(TableLayoutDesc desc, KijiTableLayout reference)
throws InvalidLayoutException {
// Deep-copy the descriptor to prevent mutating a parameter:
mDesc = TableLayoutDesc.newBuilder(Preconditions.checkNotNull(desc)).build();
// Ensure the array of locality groups is mutable:
mDesc.setLocalityGroups(Lists.newArrayList(mDesc.getLocalityGroups()));
// Check that the version specified in the layout matches the features used.
// Any compatibility checks belong in this section.
mLayoutVersion = computeLayoutVersion(mDesc.getVersion());
if (!Objects.equal(LAYOUT_PROTOCOL_NAME, mLayoutVersion.getProtocolName())) {
final String exceptionMessage;
if (Objects.equal(
Versions.LAYOUT_KIJI_1_0_0_DEPRECATED.getProtocolName(),
mLayoutVersion.getProtocolName())) {
// Warn the user if they tried a version number like 'kiji-0.9' or 'kiji-1.1'.
exceptionMessage =
String.format("Deprecated layout version protocol '%s' only valid for version '%s',"
+ " but received version '%s'. You should specify a layout version protocol"
+ " as '%s-x.y', not '%s-x.y'.",
Versions.LAYOUT_KIJI_1_0_0_DEPRECATED.getProtocolName(),
Versions.LAYOUT_KIJI_1_0_0_DEPRECATED,
mLayoutVersion,
LAYOUT_PROTOCOL_NAME,
Versions.LAYOUT_KIJI_1_0_0_DEPRECATED.getProtocolName());
} else {
exceptionMessage = String.format("Invalid version protocol: '%s'. Expected '%s'.",
mLayoutVersion.getProtocolName(),
LAYOUT_PROTOCOL_NAME);
}
throw new InvalidLayoutException(exceptionMessage);
}
if (Versions.MAX_LAYOUT_VERSION.compareTo(mLayoutVersion) < 0) {
throw new InvalidLayoutException("The maximum layout version we support is "
+ Versions.MAX_LAYOUT_VERSION + "; this layout requires " + mLayoutVersion);
} else if (Versions.MIN_LAYOUT_VERSION.compareTo(mLayoutVersion) > 0) {
throw new InvalidLayoutException("The minimum layout version we support is "
+ Versions.MIN_LAYOUT_VERSION + "; this layout requires " + mLayoutVersion);
}
// max_filesize and memstore_flushsize were introduced in version 1.2.
if (Versions.BLOCK_SIZE_LAYOUT_VERSION.compareTo(mLayoutVersion) > 0) {
if (mDesc.getMaxFilesize() != null) {
// Cannot use max_filesize if this is the case.
throw new InvalidLayoutException(
"Support for specifying max_filesize begins with layout version "
+ Versions.BLOCK_SIZE_LAYOUT_VERSION.toString());
}
if (mDesc.getMemstoreFlushsize() != null) {
// Cannot use memstore_flushsize if this is the case.
throw new InvalidLayoutException(
"Support for specifying memstore_flushsize begins with layout version "
+ Versions.BLOCK_SIZE_LAYOUT_VERSION);
}
} else {
if (mDesc.getMaxFilesize() != null && mDesc.getMaxFilesize() <= 0) {
throw new InvalidLayoutException("max_filesize must be greater than 0");
}
if (mDesc.getMemstoreFlushsize() != null && mDesc.getMemstoreFlushsize() <= 0) {
throw new InvalidLayoutException("memstore_flushsize must be greater than 0");
}
}
// Ability to configure column name translation was introduced in version 1.5
if (Versions.CONFIGURE_COLUMN_NAME_TRANSLATION_VERSION.compareTo(mLayoutVersion) > 0) {
if (mDesc.getColumnNameTranslator() != ColumnNameTranslator.SHORT) {
throw new InvalidLayoutException(
"Support for specifiying non-short column name translators begins with layout version "
+ Versions.CONFIGURE_COLUMN_NAME_TRANSLATION_VERSION);
}
}
// Composite keys and RowKeyFormat2 was introduced in version 1.1.
if (Versions.RKF2_LAYOUT_VERSION.compareTo(mLayoutVersion) > 0
&& mDesc.getKeysFormat() instanceof RowKeyFormat2) {
// Cannot use RowKeyFormat2 if this is the case.
throw new InvalidLayoutException(
"Support for specifying keys_format as a RowKeyFormat2 begins with layout version "
+ Versions.RKF2_LAYOUT_VERSION);
}
if (!isValidName(getName())) {
throw new InvalidLayoutException(String.format("Invalid table name: '%s'.", getName()));
}
if (reference != null) {
if (!getName().equals(reference.getName())) {
throw new InvalidLayoutException(String.format(
"Invalid layout update: layout name '%s' does not match reference layout name '%s'.",
getName(), reference.getName()));
}
if (!mDesc.getKeysFormat().equals(reference.getDesc().getKeysFormat())) {
throw new InvalidLayoutException(String.format(
"Invalid layout update from reference row keys format '%s' to row keys format '%s'.",
reference.getDesc().getKeysFormat(), mDesc.getKeysFormat()));
}
}
// Layout ID:
if (mDesc.getLayoutId() == null) {
try {
final long refLayoutId =
(reference == null) ? 0 : Long.parseLong(reference.getDesc().getLayoutId());
final long layoutId = refLayoutId + 1;
mDesc.setLayoutId(Long.toString(layoutId));
} catch (NumberFormatException nfe) {
throw new InvalidLayoutException(String.format(
"Reference layout for table '%s' has an invalid layout ID: '%s'",
getName(), reference.getDesc().getLayoutId()));
}
}
if (mDesc.getKeysFormat() instanceof RowKeyFormat) {
isValidRowKeyFormat1((RowKeyFormat) mDesc.getKeysFormat());
} else if (mDesc.getKeysFormat() instanceof RowKeyFormat2) {
// Check validity of row key format.
isValidRowKeyFormat2((RowKeyFormat2) mDesc.getKeysFormat());
}
// Build localities:
/**
* Reference map from locality group name to locality group ID.
* Entries are removed as we process locality group descriptors in the new layout.
* At the end of the process, this map must be empty.
*/
final BiMap<String, ColumnId> refLGIdMap = (reference == null)
? HashBiMap.<String, ColumnId>create()
: HashBiMap.create(reference.mLocalityGroupIdNameMap.inverse());
/** Map of locality groups in the new layout. */
final List<LocalityGroupLayout> localityGroups = Lists.newArrayList();
final Map<String, LocalityGroupLayout> lgMap = Maps.newHashMap();
final BiMap<ColumnId, String> idMap = HashBiMap.create();
/** Locality group with no ID assigned yet. */
final List<LocalityGroupLayout> unassigned = Lists.newArrayList();
/** All the families in the table. */
final List<FamilyLayout> families = Lists.newArrayList();
/** Map from family name or alias to family layout. */
final Map<String, FamilyLayout> familyMap = Maps.newHashMap();
/** All primary column names (including map-type families). */
final Set<KijiColumnName> columnNames = Sets.newTreeSet();
final Map<KijiColumnName, ColumnLayout> columnMap = Maps.newHashMap();
final Iterator<LocalityGroupDesc> itLGDesc = mDesc.getLocalityGroups().iterator();
while (itLGDesc.hasNext()) {
final LocalityGroupDesc lgDesc = itLGDesc.next();
final boolean isRename = (lgDesc.getRenamedFrom() != null);
final String refLGName = isRename ? lgDesc.getRenamedFrom() : lgDesc.getName();
lgDesc.setRenamedFrom(null);
if (isRename && (reference == null)) {
throw new InvalidLayoutException(String.format(
"Invalid rename: no reference table layout for locality group '%s'.", refLGName));
}
final LocalityGroupLayout refLGLayout =
(reference != null) ? reference.mLocalityGroupMap.get(refLGName) : null;
if (isRename && (refLGLayout == null)) {
throw new InvalidLayoutException(String.format(
"Invalid rename: cannot find reference locality group '%s'.", refLGName));
}
final ColumnId refLGId = refLGIdMap.remove(refLGName);
if (lgDesc.getDelete()) {
// This locality group is deleted:
if (refLGId == null) {
throw new InvalidLayoutException(String.format(
"Attempting to delete locality group '%s' unknown in reference layout.",
refLGName));
}
itLGDesc.remove();
continue;
}
// BloomType, block_size were introduced in version 1.2.
if (Versions.BLOCK_SIZE_LAYOUT_VERSION.compareTo(mLayoutVersion) > 0) {
if (lgDesc.getBlockSize() != null) {
// Cannot use max_filesize if this is the case.
throw new InvalidLayoutException(
"Support for specifying block_size begins with layout version "
+ Versions.BLOCK_SIZE_LAYOUT_VERSION);
}
if (lgDesc.getBloomType() != null) {
// Cannot use bloom_type if this is the case.
throw new InvalidLayoutException(
"Support for specifying bloom_type begins with layout version "
+ Versions.BLOCK_SIZE_LAYOUT_VERSION);
}
} else {
if (lgDesc.getBlockSize() != null && lgDesc.getBlockSize() <= 0) {
throw new InvalidLayoutException("block_size must be greater than 0");
}
}
final LocalityGroupLayout lgLayout = new LocalityGroupLayout(lgDesc, refLGLayout);
localityGroups.add(lgLayout);
for (String lgName : lgLayout.getNames()) {
Preconditions.checkState(lgMap.put(lgName, lgLayout) == null,
"Duplicate locality group name: " + lgName);
}
if (lgLayout.getId() != null) {
final String previous = idMap.put(lgLayout.getId(), lgLayout.getName());
Preconditions.checkState(previous == null,
String.format("Duplicate locality group ID '%s' associated to '%s' and '%s'.",
lgLayout.getId(), lgLayout.getName(), previous));
} else {
unassigned.add(lgLayout);
}
families.addAll(lgLayout.getFamilies());
for (FamilyLayout familyLayout : lgLayout.getFamilies()) {
for (String familyName : familyLayout.getNames()) {
if (null != familyMap.put(familyName, familyLayout)) {
throw new InvalidLayoutException(String.format(
"Layout for table '%s' contains duplicate family name '%s'.",
getName(), familyName));
}
}
if (familyLayout.isMapType()) {
Preconditions.checkState(
columnNames.add(KijiColumnName.create(familyLayout.getName(), null)));
}
for (ColumnLayout columnLayout: familyLayout.getColumns()) {
for (String columnName : columnLayout.getNames()) {
final KijiColumnName column = KijiColumnName.create(familyLayout.getName(), columnName);
if (null != columnMap.put(column, columnLayout)) {
throw new InvalidLayoutException(String.format(
"Layout for table '%s' contains duplicate column '%s'.",
getName(), column));
}
}
Preconditions.checkState(columnNames.add(
KijiColumnName.create(familyLayout.getName(), columnLayout.getName())));
}
}
}
if (!refLGIdMap.isEmpty()) {
throw new InvalidLayoutException(String.format(
"Missing descriptor(s) for locality group(s): %s.",
Joiner.on(",").join(refLGIdMap.keySet())));
}
mLocalityGroups = ImmutableList.copyOf(localityGroups);
mLocalityGroupMap = ImmutableMap.copyOf(lgMap);
mFamilies = ImmutableList.copyOf(families);
mFamilyMap = ImmutableMap.copyOf(familyMap);
mColumnNames = ImmutableSet.copyOf(columnNames);
// Assign IDs to locality groups:
int nextColumnId = 1;
for (LocalityGroupLayout localityGroup : unassigned) {
Preconditions.checkState(localityGroup.getId() == null);
while (true) {
final ColumnId columnId = new ColumnId(nextColumnId);
nextColumnId += 1;
if (!idMap.containsKey(columnId)) {
localityGroup.setId(columnId);
idMap.put(columnId, localityGroup.getName());
break;
}
}
}
mLocalityGroupIdNameMap = ImmutableBiMap.copyOf(idMap);
}
// CSON: MethodLengthCheck
/**
* Returns the Avro descriptor for this table layout.
* @return the Avro descriptor for this table layout.
*/
public TableLayoutDesc getDesc() {
return TableLayoutDesc.newBuilder(mDesc).build();
}
/**
* Returns the table name.
* @return the table name.
*/
public String getName() {
return mDesc.getName();
}
/**
* Returns the locality groups in the table, in no particular order.
* @return the locality groups in the table, in no particular order.
*/
public Collection<LocalityGroupLayout> getLocalityGroups() {
return mLocalityGroups;
}
/**
* Returns the bidirectional mapping between locality group names (no alias) and IDs.
* @return the bidirectional mapping between locality group names (no alias) and IDs.
*/
public BiMap<ColumnId, String> getLocalityGroupIdNameMap() {
return mLocalityGroupIdNameMap;
}
/**
* Returns the map from locality group names and aliases to layouts.
* @return the map from locality group names and aliases to layouts.
*/
public Map<String, LocalityGroupLayout> getLocalityGroupMap() {
return mLocalityGroupMap;
}
/**
* Returns the map from locality group names and aliases to layouts.
* @return the map from locality group names and aliases to layouts.
*/
public Map<String, FamilyLayout> getFamilyMap() {
return mFamilyMap;
}
/**
* Returns all the families in the table, in no particular order.
* @return all the families in the table, in no particular order.
*/
public Collection<FamilyLayout> getFamilies() {
return mFamilies;
}
/**
* Returns all the primary column names in the table, including map-type families.
* @return all the primary column names in the table, including map-type families.
*/
public Set<KijiColumnName> getColumnNames() {
return mColumnNames;
}
/**
* Reports the raw specification record for the specified column.
*
* <p> Note: in most cases, you should use {@link #getCellSpec(KijiColumnName)}. </p>
*
* @param columnName Column to reports the raw specification record of.
* @return the raw specification record for the specified column.
* @throws NoSuchColumnException if the column does not exist.
*/
public CellSchema getCellSchema(KijiColumnName columnName) throws NoSuchColumnException {
final FamilyLayout fLayout = mFamilyMap.get(columnName.getFamily());
if (fLayout == null) {
throw new NoSuchColumnException(String.format(
"Table '%s' has no family '%s'.", getName(), columnName.getFamily()));
}
if (fLayout.isMapType()) {
return CellSchema.newBuilder(fLayout.getDesc().getMapSchema()).build();
}
// Group-type family:
Preconditions.checkArgument(columnName.isFullyQualified(),
String.format("Cannot get CellFormat for entire group-type family: '%s'.", columnName));
final FamilyLayout.ColumnLayout cLayout =
fLayout.getColumnMap().get(columnName.getQualifier());
if (cLayout == null) {
throw new NoSuchColumnException(String.format(
"Table '%s' has no column '%s'.", getName(), columnName));
}
return CellSchema.newBuilder(cLayout.getDesc().getColumnSchema()).build();
}
/**
* Reports the Avro schema of the specified column.
*
* <p> This method will return null in most cases on table with layout validation enabled. </p>
*
* @param columnName Column name.
* @return the Avro schema of the column.
* @throws InvalidLayoutException if the layout is invalid.
* @throws NoSuchColumnException if the column does not exist.
* @deprecated With schema validation and layout 1.3, there is no single Avro schema associated
* with a column anymore.
* Use {@link #getCellSpec(KijiColumnName)} to obtain further details about a column.
*/
@Deprecated
public Schema getSchema(KijiColumnName columnName)
throws InvalidLayoutException, NoSuchColumnException {
return CellSpec.readAvroSchema(getCellSchema(columnName));
}
/**
* Reports the cell format for the specified column.
*
* @param column Column name.
* @return the cell format for the column.
* @throws NoSuchColumnException if the column does not exist.
* @deprecated Use {@link #getCellSpec(KijiColumnName)} to obtain further details about a column.
*/
@Deprecated
public SchemaStorage getCellFormat(KijiColumnName column) throws NoSuchColumnException {
return getCellSchema(column).getStorage();
}
/**
* Reports the specification of the specified column.
*
* @param column Column to report the specification of.
* @return the specification for the specified column.
* @throws IOException on I/O error.
*/
public CellSpec getCellSpec(KijiColumnName column) throws IOException {
return CellSpec.fromCellSchema(getCellSchema(column))
.setSchemaTable(mSchemaTable);
}
/**
* Reports whether a column exists.
*
* @param column Column name.
* @return whether the specified column exists.
*/
public boolean exists(KijiColumnName column) {
final FamilyLayout fLayout = mFamilyMap.get(column.getFamily());
if (fLayout == null) {
// Family does not exist:
return false;
}
if (fLayout.isMapType()) {
// This is a map-type family, we don't need to validate the qualifier:
return true;
}
// This is a group-type family:
if (!column.isFullyQualified()) {
// No column qualifier, the group-type family exists:
return true;
}
// Validate the qualifier:
return fLayout.getColumnMap().containsKey(column.getQualifier());
}
/** {@inheritDoc} */
@Override
public boolean equals(Object other) {
if (!(other instanceof KijiTableLayout)) {
return false;
}
final KijiTableLayout otherLayout = (KijiTableLayout) other;
return getDesc().equals(otherLayout.getDesc());
}
/** {@inheritDoc} */
@Override
public int hashCode() {
return getDesc().hashCode();
}
/** {@inheritDoc} */
@Override
public String toString() {
try {
return ToJson.toJsonString(mDesc);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
/**
* Binds this table layout to the specified schema table.
*
* <p> Once set, the schema table bound to a table layout cannot be modified. </p>
*
* @param schemaTable Avro schema table to bind this layout to.
* @return this layout.
*/
public KijiTableLayout setSchemaTable(KijiSchemaTable schemaTable) {
Preconditions.checkState(mSchemaTable == null);
mSchemaTable = schemaTable;
return this;
}
/**
* Returns the schema table bound to this table layout. Null means unbound.
* @return the schema table bound to this table layout. Null means unbound.
*/
public KijiSchemaTable getSchemaTable() {
return mSchemaTable;
}
// -----------------------------------------------------------------------------------------------
/**
* Validates a name (table name, locality group name, family name, or column name).
*
* @param name The name to validate.
* @return whether the name is valid.
*/
private static boolean isValidName(String name) {
return KijiNameValidator.isValidLayoutName(name);
}
/**
* Validates an alias (table name, locality group name, family name, or column name).
*
* @param alias The alias to validate.
* @return whether the alias is valid.
*/
private static boolean isValidAlias(String alias) {
return KijiNameValidator.isValidAlias(alias);
}
/**
* Validates a cell schema descriptor.
*
* Ignores failures due to specific Avro record classes not being present on the classpath.
*
* @param layoutVersion Version of the new table layout.
* @param schema New cell schema descriptor.
* @param reference Reference cell schema descriptor, or null.
* @throws InvalidLayoutException if the cell schema descriptor is invalid
* or incompatible with the reference cell schema.
*/
private static void validateCellSchema(
ProtocolVersion layoutVersion,
CellSchema schema,
CellSchema reference)
throws InvalidLayoutException {
switch (schema.getType()) {
case INLINE:
case CLASS:
case COUNTER:
// Nothing to validate
break;
case AVRO: {
if (layoutVersion.compareTo(Versions.LAYOUT_VALIDATION_VERSION) < 0) {
throw new InvalidLayoutException(String.format(
"Cell type %s requires table layout version >= %s, "
+ "got version %s in cell specification %s.",
schema.getType(),
Versions.LAYOUT_VALIDATION_VERSION,
layoutVersion,
schema));
}
break;
}
case RAW_BYTES: {
if (layoutVersion.compareTo(Versions.RAW_BYTES_CELL_ENCODING_VERSION) < 0) {
throw new InvalidLayoutException(String.format(
"Cell type %s requires table layout version >= %s, "
+ "got version %s in cell specification %s.",
schema.getType(),
Versions.RAW_BYTES_CELL_ENCODING_VERSION,
layoutVersion,
schema));
}
break;
}
case PROTOBUF: {
if (layoutVersion.compareTo(Versions.PROTOBUF_CELL_ENCODING_VERSION) < 0) {
throw new InvalidLayoutException(String.format(
"Cell type %s requires table layout version >= %s, "
+ "got version %s in cell specification %s.",
schema.getType(),
Versions.PROTOBUF_CELL_ENCODING_VERSION,
layoutVersion,
schema));
}
break;
}
default:
throw new InternalKijiError("Unhandled cell type: " + schema);
}
// Validate Avro schema through loading and parsing:
try {
CellSpec.readAvroSchema(schema);
} catch (SchemaClassNotFoundException scnfe) {
LOG.debug(String.format("Avro schema class '%s' not found.", schema.getValue()));
}
// Final schema storage is only valid with counters and inline schema:
if (schema.getStorage() == SchemaStorage.FINAL) {
switch (schema.getType()) {
case INLINE:
case COUNTER:
break;
default:
throw new InvalidLayoutException(String.format(
"Invalid final column schema: %s.", schema));
}
}
// Counters require schema storage final:
if (schema.getType() == SchemaType.COUNTER) {
if (schema.getStorage() != SchemaStorage.FINAL) {
throw new InvalidLayoutException(String.format(
"Invalid counter schema, storage must be final: %s.", schema));
}
}
if (null != reference) {
// Schema storage cannot change:
if (schema.getStorage() != reference.getStorage()) {
throw new InvalidLayoutException(String.format(
"Cell schema storage cannot be modified from %s to %s.",
reference, schema));
}
// Final schema cannot change:
if ((reference.getStorage() == SchemaStorage.FINAL)
&& !Objects.equal(schema.getValue(), reference.getValue())) {
throw new InvalidLayoutException(String.format(
"Final column schema cannot be modified from %s to %s.",
reference, schema));
}
// Counter is forever:
if ((reference.getType() == SchemaType.COUNTER) ^ (schema.getType() == SchemaType.COUNTER)) {
throw new InvalidLayoutException(String.format(
"Column schema cannot be modified from %s to %s.",
reference, schema));
}
}
}
/**
* Loads a table layout from the specified resource as JSON.
*
* @param resource Path of the resource containing the JSON layout description.
* @return the parsed table layout.
* @throws IOException on I/O error.
*/
public static KijiTableLayout createFromEffectiveJsonResource(String resource)
throws IOException {
return createFromEffectiveJson(KijiTableLayout.class.getResourceAsStream(resource));
}
/**
* Loads a table layout from the specified JSON text.
*
* @param istream Input stream containing the JSON text.
* @return the parsed table layout.
* @throws IOException on I/O error.
*/
public static KijiTableLayout createFromEffectiveJson(InputStream istream) throws IOException {
try {
final TableLayoutDesc desc = readTableLayoutDescFromJSON(istream);
final KijiTableLayout layout = new KijiTableLayout(desc, null);
return layout;
} finally {
ResourceUtils.closeOrLog(istream);
}
}
/**
* Creates and returns a new KijiTableLayout instance as specified by an Avro TableLayoutDesc
* description record.
*
* @param layout The Avro TableLayoutDesc descriptor record that describes the actual layout.
* of the table.
* @return A new table layout.
* @throws InvalidLayoutException If the descriptor is invalid.
*/
public static KijiTableLayout newLayout(TableLayoutDesc layout) throws InvalidLayoutException {
return new KijiTableLayout(layout, null);
}
/**
* Creates and returns a new KijiTableLayout instance as specified by composing updates described
* by a {@link org.kiji.schema.avro.TableLayoutDesc TableLayoutDesc} with the original
* KijiTableLayout. See {@link org.kiji.schema.layout.KijiTableLayout KijiTableLayout}
* for what can and cannot be updated.
*
* @param updateLayoutDesc The Avro TableLayoutDesc descriptor record that describes
* the layout update.
* @param oldLayout The old table layout.
* @return A new table layout.
* @throws InvalidLayoutException If the descriptor is invalid or inconsistent wrt reference.
*/
public static KijiTableLayout createUpdatedLayout(
TableLayoutDesc updateLayoutDesc, KijiTableLayout oldLayout) throws InvalidLayoutException {
return new KijiTableLayout(updateLayoutDesc, oldLayout);
}
/**
* Reads a table layout descriptor from its JSON serialized form.
*
* @param istream JSON input stream.
* @return the decoded table layout descriptor.
* @throws IOException on I/O error.
*/
public static TableLayoutDesc readTableLayoutDescFromJSON(InputStream istream)
throws IOException {
final String json = IOUtils.toString(istream);
final TableLayoutDesc desc =
(TableLayoutDesc) FromJson.fromJsonString(json, TableLayoutDesc.SCHEMA$);
return desc;
}
/**
* Find the encoding of the row key given the format.
*
* @param rowKeyFormat Format of row keys of type RowKeyFormat or RowKeyFormat2.
* @return The specific row key encoding, e.g. RAW, HASH, etc.
*/
public static RowKeyEncoding getEncoding(Object rowKeyFormat) {
if (rowKeyFormat instanceof RowKeyFormat) {
return ((RowKeyFormat) rowKeyFormat).getEncoding();
} else if (rowKeyFormat instanceof RowKeyFormat2) {
return ((RowKeyFormat2) rowKeyFormat).getEncoding();
} else {
throw new RuntimeException("Unsupported Row Key Format");
}
}
/**
* Get the hash size for a given row key format.
*
* @param rowKeyFormat Format of row keys of type RowKeyFormat or RowKeyFormat2.
* @return The size of the hash prefix.
*/
public static int getHashSize(Object rowKeyFormat) {
if (rowKeyFormat instanceof RowKeyFormat) {
return ((RowKeyFormat) rowKeyFormat).getHashSize();
} else if (rowKeyFormat instanceof RowKeyFormat2) {
RowKeyFormat2 format2 = (RowKeyFormat2) rowKeyFormat;
if (null == format2.getSalt()) {
throw new RuntimeException("This RowKeyFormat2 instance does not specify salt/hashing.");
} else {
return format2.getSalt().getHashSize();
}
} else {
throw new RuntimeException("Unsupported Row Key Format");
}
}
}