Source Code of co.cask.cdap.api.dataset.lib.IndexedObjectStore

/*
 * Copyright © 2014 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */


package co.cask.cdap.api.dataset.lib;


import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.dataset.table.Row;
import co.cask.cdap.api.dataset.table.Table;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;


import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;


/**
 * An ObjectStore Dataset extension that supports access to objects via indices; lookups by the index will return
 * all the objects stored in the object store that have the index value.
 *
 * The dataset uses two tables: an object store, to store the actual data, and a second table for the index.
 *
 * @param <T> the type of objects in the store
 */
public class IndexedObjectStore<T> extends AbstractDataset {
  //NOTE: cannot use byte[0] as empty value because byte[0] is treated as null
  private static final byte[] EMPTY_VALUE = new byte[1];
  //KEY_PREFIX is used to prefix primary key when it stores PrimaryKey -> Categories mapping.
  private static final byte[] KEY_PREFIX = Bytes.toBytes("_fk");


  //IndexedObjectStore stores the following mappings
  // 1. ObjectStore
  //    (primaryKey to Object)
  // 2. Index table
  //    (secondaryKeys to primaryKey mapping)
  //    (prefixedPrimaryKey to secondaryKeys)
  private final ObjectStore<T> objectStore;
  private final Table index;


  /**
   * Constructs the IndexedObjectStore with name and type.
   *
   * @param name name of the dataset
   * @param objectStore dataset to use as the objectStore
   * @param index dataset to use as the index
   */
  public IndexedObjectStore(String name, ObjectStore<T> objectStore, Table index) {
    super(name, objectStore, index);
    this.objectStore = objectStore;
    this.index = index;
  }


  /**
   * See {@link ObjectStore#read(byte[])}.
   */
  public T read(byte[] key) {
    return objectStore.read(key);
  }


  /**
   * See {@link ObjectStore#read(String)}.
   */
  public T read(String key) {
    return objectStore.read(key);
  }


  /**
   * Read all the objects from the objectStore for a given index. Returns all the objects that match the secondaryKey.
   * Returns an empty list if no values are found. Never returns null.
   *
   * @param secondaryKey for the lookup.
   * @return List of Objects matching the secondaryKey.
   */
  public List<T> readAllByIndex(byte[] secondaryKey) {
    ImmutableList.Builder<T> resultList = ImmutableList.builder();
    //Lookup the secondaryKey and get all the keys in primary
    //Each row with secondaryKey as rowKey contains column named as the primary key
    // of every object that can be looked up using the secondaryKey
    Row row = index.get(secondaryKey);


    // if the index has no match, return nothing
    if (!row.isEmpty()) {
      for (byte[] column : row.getColumns().keySet()) {
        T obj = objectStore.read(column);
        resultList.add(obj);
      }
    }
    return resultList.build();
  }


  private List<byte[]> secondaryKeysToDelete(Set<byte[]> existingSecondaryKeys, Set<byte[]> newSecondaryKeys) {
    List<byte[]> secondaryKeysToDelete = Lists.newArrayList();
    if (existingSecondaryKeys.size() > 0) {
      for (byte[] secondaryKey : existingSecondaryKeys) {
        // If it is not in newSecondaryKeys then it needs to be deleted.
        if (!newSecondaryKeys.contains(secondaryKey)) {
          secondaryKeysToDelete.add(secondaryKey);
        }
      }
    }
    return secondaryKeysToDelete;
  }


  private List<byte[]> secondaryKeysToAdd(Set<byte[]> existingSecondaryKeys, Set<byte[]> newSecondaryKeys) {
    List<byte[]> secondaryKeysToAdd = Lists.newArrayList();
    if (existingSecondaryKeys.size() > 0) {
      for (byte[] secondaryKey : newSecondaryKeys) {
        // If it is not in existingSecondaryKeys then it needs to be added
        // else it exists already.
        if (!existingSecondaryKeys.contains(secondaryKey)) {
          secondaryKeysToAdd.add(secondaryKey);
        }
      }
    } else {
      //all the newValues should be added
      secondaryKeysToAdd.addAll(newSecondaryKeys);
    }


    return secondaryKeysToAdd;
  }


  /**
   * Writes to the dataset, deleting any existing secondaryKey corresponding to the key and updates the indexTable with
   * the secondaryKey that is passed.
   *
   * @param key key for storing the object
   * @param object object to be stored
   * @param secondaryKeys indices that can be used to lookup the object
   */
  public void write(byte[] key, T object, byte[][] secondaryKeys) {
    writeToObjectStore(key, object);


    //Update the secondaryKeys
    //logic:
    //  - Get existing secondary keys
    //  - Compute diff between existing secondary keys and new secondary keys
    //  - Remove the secondaryKeys that are removed
    //  - Add the new keys that are added
    Row row = index.get(getPrefixedPrimaryKey(key));
    Set<byte[]> existingSecondaryKeys = Sets.newTreeSet(new Bytes.ByteArrayComparator());


    if (!row.isEmpty()) {
      existingSecondaryKeys = row.getColumns().keySet();
    }


    Set<byte[]> newSecondaryKeys = new TreeSet<byte[]>(new Bytes.ByteArrayComparator());
    newSecondaryKeys.addAll(Arrays.asList(secondaryKeys));


    List<byte[]> secondaryKeysDeleted = secondaryKeysToDelete(existingSecondaryKeys, newSecondaryKeys);
    if (secondaryKeysDeleted.size() > 0) {
      deleteSecondaryKeys(key, secondaryKeysDeleted.toArray(new byte[secondaryKeysDeleted.size()][]));
    }


    List<byte[]> secondaryKeysAdded =  secondaryKeysToAdd(existingSecondaryKeys, newSecondaryKeys);


    //for each key store the secondaryKey. This will be used while deleting old index values.
    if (secondaryKeysAdded.size() > 0) {
      byte[][] fooValues = new byte[secondaryKeysAdded.size()][];
      Arrays.fill(fooValues, EMPTY_VALUE);
      index.put(getPrefixedPrimaryKey(key),
                 secondaryKeysAdded.toArray(new byte[secondaryKeysAdded.size()][]),
                 fooValues);
    }


    for (byte[] secondaryKey : secondaryKeysAdded) {
      //update the index.
      index.put(secondaryKey, key, EMPTY_VALUE);
    }
  }


  private void writeToObjectStore(byte[] key, T object) {
    objectStore.write(key, object);
  }


  public void write(byte[] key, T object) {
    Row row = index.get(getPrefixedPrimaryKey(key));
    if (!row.isEmpty()) {
      Set<byte[]> columnsToDelete = row.getColumns().keySet();
      deleteSecondaryKeys(key, columnsToDelete.toArray(new byte[columnsToDelete.size()][]));
    }
    writeToObjectStore(key, object);
  }




  private void deleteSecondaryKeys(byte[] key, byte[][] columns) {
    //Delete the key to secondaryKey mapping
    index.delete(getPrefixedPrimaryKey(key), columns);


    // delete secondaryKey to key mapping
    for (byte[] col : columns) {
      index.delete(col, key);
    }
  }


  private byte[] getPrefixedPrimaryKey(byte[] key) {
    return Bytes.add(KEY_PREFIX, key);
  }


  /**
   * Deletes an index that is no longer needed. After deleting the index, lookups using the index value will no
   * longer return the object.
   *
   * @param key key for the object
   * @param secondaryKey index to be pruned
   */
  public void pruneIndex(byte[] key, byte[] secondaryKey) {
    this.index.delete(secondaryKey, key);
    this.index.delete(getPrefixedPrimaryKey(key), secondaryKey);
  }


  /**
   * Updates the index value for an existing key. This will not delete the older secondaryKeys.
   *
   * @param key key for the object
   * @param secondaryKey index to be pruned
   */
  public void updateIndex(byte[] key, byte[] secondaryKey) {
    this.index.put(secondaryKey, key, EMPTY_VALUE);
    this.index.put(getPrefixedPrimaryKey(key), secondaryKey, EMPTY_VALUE);
  }
}
Source Code of co.cask.cdap.api.dataset.lib.IndexedObjectStore

Related Classes of co.cask.cdap.api.dataset.lib.IndexedObjectStore