Package uk.ac.open.kmi.smartproducts.sesame.sail

Source Code of uk.ac.open.kmi.smartproducts.sesame.sail.TripleStore$TripleIndex

/*
* Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2009.
*
* Licensed under the Aduna BSD-style license.
*/
package uk.ac.open.kmi.smartproducts.sesame.sail;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.StringTokenizer;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import info.aduna.io.ByteArrayUtil;

import org.openrdf.sail.SailException;
import uk.ac.open.kmi.smartproducts.sesame.sail.TxnStatusFile.TxnStatus;
import uk.ac.open.kmi.smartproducts.sesame.sail.btree.BTree;
import uk.ac.open.kmi.smartproducts.sesame.sail.btree.RecordComparator;
import uk.ac.open.kmi.smartproducts.sesame.sail.btree.RecordIterator;

/**
* File-based indexed storage and retrieval of RDF statements. TripleStore
* stores statements in the form of four integer IDs. Each ID represent an RDF
* value that is stored in a {@link ValueStore}. The four IDs refer to the
* statement's subject, predicate, object and context. The ID <tt>0</tt> is used
* to represent the "null" context and doesn't map to an actual RDF value.
*
* @author Arjohn Kampman
*/
class TripleStore {

  /*-----------*
   * Constants *
   *-----------*/

  /**
   * The default triple indexes.
   */
  private static final String DEFAULT_INDEXES = "spoc,posc";

  /**
   * The file name for the properties file.
   */
  private static final String PROPERTIES_FILE = "triples.prop";

  /**
   * The key used to store the triple store version in the properties file.
   */
  private static final String VERSION_KEY = "version";

  /**
   * The key used to store the triple indexes specification that specifies
   * which triple indexes exist.
   */
  private static final String INDEXES_KEY = "triple-indexes";

  /**
   * The version number for the current triple store.
   * <ul>
   * <li>version 0: The first version which used a single spo-index. This
   * version did not have a properties file yet.
   * <li>version 1: Introduces configurable triple indexes and the properties
   * file.
   * <li>version 10: Introduces a context field, essentially making this a quad
   * store.
   * <li>version 10a: Introduces transaction flags, this is backwards
   * compatible with version 10.
   * </ul>
   */
  private static final int SCHEME_VERSION = 10;

  // 17 bytes are used to represent a triple:
  // byte 0-3 : subject
  // byte 4-7 : predicate
  // byte 8-11: object
  // byte 12-15: context
  // byte 16: additional flag(s)
  static final int RECORD_LENGTH = 17;

  static final int SUBJ_IDX = 0;

  static final int PRED_IDX = 4;

  static final int OBJ_IDX = 8;

  static final int CONTEXT_IDX = 12;

  static final int FLAG_IDX = 16;

  /**
   * Bit field indicating that a statement has been explicitly added (instead
   * of being inferred).
   */
  static final byte EXPLICIT_FLAG = (byte)0x1; // 0000 0001

  /**
   * Bit field indicating that a statement has been added in a (currently
   * active) transaction.
   */
  static final byte ADDED_FLAG = (byte)0x2; // 0000 0010

  /**
   * Bit field indicating that a statement has been removed in a (currently
   * active) transaction.
   */
  static final byte REMOVED_FLAG = (byte)0x4; // 0000 0100

  /**
   * Bit field indicating that the explicit flag has been toggled (from true to
   * false, or vice versa) in a (currently active) transaction.
   */
  static final byte TOGGLE_EXPLICIT_FLAG = (byte)0x8; // 0000 1000

  /*-----------*
   * Variables *
   *-----------*/

  private final Logger logger = LoggerFactory.getLogger(this.getClass());

  /**
   * The directory that is used to store the index files.
   */
  private final File dir;

  /**
   * Object containing meta-data for the triple store. This includes
   */
  private final Properties properties;

  /**
   * The list of triple indexes that are used to store and retrieve triples.
   */
  private final List<TripleIndex> indexes = new ArrayList<TripleIndex>();

  private final boolean forceSync;

  private final TxnStatusFile txnStatusFile;

  private volatile RecordCache updatedTriplesCache;

  /*--------------*
   * Constructors *
   *--------------*/

  public TripleStore(File dir, String indexSpecStr)
    throws IOException, SailException
  {
    this(dir, indexSpecStr, false);
  }

  public TripleStore(File dir, String indexSpecStr, boolean forceSync)
    throws IOException, SailException
  {
    this.dir = dir;
    this.forceSync = forceSync;
    this.txnStatusFile = new TxnStatusFile(dir);

    File propFile = new File(dir, PROPERTIES_FILE);

    if (!propFile.exists()) {
      // newly created native store
      properties = new Properties();

      Set<String> indexSpecs = parseIndexSpecList(indexSpecStr);

      if (indexSpecs.isEmpty()) {
        logger.debug("No indexes specified, using default indexes: {}", DEFAULT_INDEXES);
        indexSpecStr = DEFAULT_INDEXES;
        indexSpecs = parseIndexSpecList(indexSpecStr);
      }

      initIndexes(indexSpecs);
    }
    else {
      // Read triple properties file and check format version number
      properties = loadProperties(propFile);
      checkVersion();

      // Initialize existing indexes
      Set<String> indexSpecs = getIndexSpecs();
      initIndexes(indexSpecs);

      // Check transaction status
      TxnStatus txnStatus = txnStatusFile.getTxnStatus();
      if (txnStatus == TxnStatus.NONE) {
        logger.trace("No uncompleted transactions found");
      }
      else {
        processUncompletedTransaction(txnStatus);
      }

      // Compare the existing indexes with the requested indexes
      Set<String> reqIndexSpecs = parseIndexSpecList(indexSpecStr);

      if (reqIndexSpecs.isEmpty()) {
        // No indexes specified, use the existing ones
        indexSpecStr = properties.getProperty(INDEXES_KEY);
      }
      else if (!reqIndexSpecs.equals(indexSpecs)) {
        // Set of indexes needs to be changed
        reindex(indexSpecs, reqIndexSpecs);
      }
    }

    if (!String.valueOf(SCHEME_VERSION).equals(properties.getProperty(VERSION_KEY))
        || !indexSpecStr.equals(properties.getProperty(INDEXES_KEY)))
    {
      // Store up-to-date properties
      properties.setProperty(VERSION_KEY, String.valueOf(SCHEME_VERSION));
      properties.setProperty(INDEXES_KEY, indexSpecStr);
      storeProperties(propFile);
    }
  }

  /*---------*
   * Methods *
   *---------*/

  private void checkVersion()
    throws SailException
  {
    // Check version number
    String versionStr = properties.getProperty(VERSION_KEY);
    if (versionStr == null) {
      logger.warn("{} missing in TripleStore's properties file", VERSION_KEY);
    }
    else {
      try {
        int version = Integer.parseInt(versionStr);
        if (version < 10) {
          throw new SailException("Directory contains incompatible triple data");
        }
        else if (version > SCHEME_VERSION) {
          throw new SailException("Directory contains data that uses a newer data format");
        }
      }
      catch (NumberFormatException e) {
        logger.warn("Malformed version number in TripleStore's properties file");
      }
    }
  }

  private Set<String> getIndexSpecs()
    throws SailException
  {
    String indexesStr = properties.getProperty(INDEXES_KEY);

    if (indexesStr == null) {
      throw new SailException(INDEXES_KEY + " missing in TripleStore's properties file");
    }

    Set<String> indexSpecs = parseIndexSpecList(indexesStr);

    if (indexSpecs.isEmpty()) {
      throw new SailException("No " + INDEXES_KEY + " found in TripleStore's properties file");
    }

    return indexSpecs;
  }

  /**
   * Parses a comma/whitespace-separated list of index specifications. Index
   * specifications are required to consists of 4 characters: 's', 'p', 'o' and
   * 'c'.
   *
   * @param indexSpecStr
   *        A string like "spoc, pocs, cosp".
   * @return A Set containing the parsed index specifications.
   */
  private Set<String> parseIndexSpecList(String indexSpecStr)
    throws SailException
  {
    Set<String> indexes = new HashSet<String>();

    if (indexSpecStr != null) {
      StringTokenizer tok = new StringTokenizer(indexSpecStr, ", \t");
      while (tok.hasMoreTokens()) {
        String index = tok.nextToken().toLowerCase();

        // sanity checks
        if (index.length() != 4 || index.indexOf('s') == -1 || index.indexOf('p') == -1
            || index.indexOf('o') == -1 || index.indexOf('c') == -1)
        {
          throw new SailException("invalid value '" + index + "' in index specification: "
              + indexSpecStr);
        }

        indexes.add(index);
      }
    }

    return indexes;
  }

  private void initIndexes(Set<String> indexSpecs)
    throws IOException
  {
    for (String fieldSeq : indexSpecs) {
      logger.trace("Initializing index '{}'...", fieldSeq);
      indexes.add(new TripleIndex(fieldSeq));
    }
  }

  private void processUncompletedTransaction(TxnStatus txnStatus)
    throws IOException
  {
    switch (txnStatus) {
      case COMMITTING:
        logger.info("Detected uncompleted commit, trying to complete");
        try {
          commit();
          logger.info("Uncompleted commit completed successfully");
        }
        catch (IOException e) {
          logger.error("Failed to restore from uncompleted commit", e);
          throw e;
        }
        break;
      case ROLLING_BACK:
        logger.info("Detected uncompleted rollback, trying to complete");
        try {
          rollback();
          logger.info("Uncompleted rollback completed successfully");
        }
        catch (IOException e) {
          logger.error("Failed to restore from uncompleted rollback", e);
          throw e;
        }
        break;
      case ACTIVE:
        logger.info("Detected unfinished transaction, trying to roll back");
        try {
          rollback();
          logger.info("Unfinished transaction rolled back successfully");
        }
        catch (IOException e) {
          logger.error("Failed to roll back unfinished transaction", e);
          throw e;
        }
        break;
      case UNKNOWN:
        logger.info("Read invalid or unknown transaction status, trying to roll back");
        try {
          rollback();
          logger.info("Successfully performed a rollback for invalid or unknown transaction status");
        }
        catch (IOException e) {
          logger.error("Failed to perform rollback for invalid or unknown transaction status", e);
          throw e;
        }
        break;
    }
  }

  private void reindex(Set<String> currentIndexSpecs, Set<String> newIndexSpecs)
    throws IOException, SailException
  {
    Map<String, TripleIndex> currentIndexes = new HashMap<String, TripleIndex>();
    for (TripleIndex index : indexes) {
      currentIndexes.put(new String(index.getFieldSeq()), index);
    }

    // Determine the set of newly added indexes and initialize these using an
    // existing index as source
    Set<String> addedIndexSpecs = new HashSet<String>(newIndexSpecs);
    addedIndexSpecs.removeAll(currentIndexSpecs);

    if (!addedIndexSpecs.isEmpty()) {
      TripleIndex sourceIndex = indexes.get(0);

      for (String fieldSeq : addedIndexSpecs) {
        logger.debug("Initializing new index '{}'...", fieldSeq);

        TripleIndex addedIndex = new TripleIndex(fieldSeq);
        BTree addedBTree = addedIndex.getBTree();

        RecordIterator sourceIter = sourceIndex.getBTree().iterateAll();
        try {
          byte[] value = null;
          while ((value = sourceIter.next()) != null) {
            addedBTree.insert(value);
          }
        }
        finally {
          sourceIter.close();
        }

        currentIndexes.put(fieldSeq, addedIndex);
      }

      logger.debug("New index(es) initialized");
    }

    // Determine the set of removed indexes
    Set<String> removedIndexSpecs = new HashSet<String>(currentIndexSpecs);
    removedIndexSpecs.removeAll(newIndexSpecs);

    // Delete files for removed indexes
    for (String fieldSeq : removedIndexSpecs) {
      TripleIndex removedIndex = currentIndexes.remove(fieldSeq);

      boolean deleted = removedIndex.getBTree().delete();

      if (deleted) {
        logger.debug("Deleted file(s) for removed {} index", fieldSeq);
      }
      else {
        logger.warn("Unable to delete file(s) for removed {} index", fieldSeq);
      }
    }

    // Update the indexes variable, using the specified index order
    indexes.clear();
    for (String fieldSeq : newIndexSpecs) {
      indexes.add(currentIndexes.remove(fieldSeq));
    }
  }

  private String getCurrentIndexSpecStr() {
    return properties.getProperty(INDEXES_KEY);
  }

  public void close()
    throws IOException
  {
    for (TripleIndex index : indexes) {
      index.getBTree().close();
    }
   
    // Should have been removed upon commit() or rollback(), but just to be sure
    if (updatedTriplesCache != null) {
      updatedTriplesCache.discard();
      updatedTriplesCache = null;
    }
  }

  public RecordIterator getTriples(int subj, int pred, int obj, int context)
    throws IOException
  {
    // Return all triples except those that were added but not yet committed
    return getTriples(subj, pred, obj, context, 0, ADDED_FLAG);
  }

  public RecordIterator getTriples(int subj, int pred, int obj, int context, boolean readTransaction)
    throws IOException
  {
    if (readTransaction) {
      // Don't read removed statements
      return getTriples(subj, pred, obj, context, 0, TripleStore.REMOVED_FLAG);
    }
    else {
      // Don't read added statements
      return getTriples(subj, pred, obj, context, 0, TripleStore.ADDED_FLAG);
    }
  }

  /**
   * If an index exists by context - use it, otherwise return null.
   *
   * @param readTransaction
   * @return All triples sorted by context or null if no context index exists
   * @throws IOException
   */
  public RecordIterator getAllTriplesSortedByContext(boolean readTransaction)
    throws IOException
  {
    if (readTransaction) {
      // Don't read removed statements
      return getAllTriplesSortedByContext(0, TripleStore.REMOVED_FLAG);
    }
    else {
      // Don't read added statements
      return getAllTriplesSortedByContext(0, TripleStore.ADDED_FLAG);
    }
  }

  public RecordIterator getTriples(int subj, int pred, int obj, int context, boolean explicit,
      boolean readTransaction)
    throws IOException
  {
    int flags = 0;
    int flagsMask = 0;

    if (readTransaction) {
      flagsMask |= TripleStore.REMOVED_FLAG;
      // 'explicit' is handled through an ExplicitStatementFilter
    }
    else {
      flagsMask |= TripleStore.ADDED_FLAG;

      if (explicit) {
        flags |= TripleStore.EXPLICIT_FLAG;
        flagsMask |= TripleStore.EXPLICIT_FLAG;
      }
    }

    RecordIterator btreeIter = getTriples(subj, pred, obj, context, flags, flagsMask);

    if (readTransaction && explicit) {
      // Filter implicit statements from the result
      btreeIter = new ExplicitStatementFilter(btreeIter);
    }

    return btreeIter;
  }

  /*-------------------------------------*
   * Inner class ExplicitStatementFilter *
   *-------------------------------------*/

  private static class ExplicitStatementFilter implements RecordIterator {

    private final RecordIterator wrappedIter;

    public ExplicitStatementFilter(RecordIterator wrappedIter) {
      this.wrappedIter = wrappedIter;
    }

    public byte[] next()
      throws IOException
    {
      byte[] result;

      while ((result = wrappedIter.next()) != null) {
        byte flags = result[TripleStore.FLAG_IDX];
        boolean explicit = (flags & TripleStore.EXPLICIT_FLAG) != 0;
        boolean toggled = (flags & TripleStore.TOGGLE_EXPLICIT_FLAG) != 0;

        if (explicit != toggled) {
          // Statement is either explicit and hasn't been toggled, or vice
          // versa
          break;
        }
      }

      return result;
    }

    public void set(byte[] value)
      throws IOException
    {
      wrappedIter.set(value);
    }

    public void close()
      throws IOException
    {
      wrappedIter.close();
    }
  } // end inner class ExplicitStatementFilter

  private RecordIterator getTriples(int subj, int pred, int obj, int context, int flags, int flagsMask)
    throws IOException
  {
    TripleIndex index = getBestIndex(subj, pred, obj, context);
    boolean doRangeSearch = index.getPatternScore(subj, pred, obj, context) > 0;
    return getTriplesUsingIndex(subj, pred, obj, context, flags, flagsMask, index, doRangeSearch);
  }

  private RecordIterator getAllTriplesSortedByContext(int flags, int flagsMask)
    throws IOException
  {
    for (TripleIndex index : indexes) {
      if (index.getFieldSeq()[0] == 'c') {
        // found a context-first index
        return getTriplesUsingIndex(-1, -1, -1, -1, flags, flagsMask, index, false);
      }
    }

    return null;
  }

  private RecordIterator getTriplesUsingIndex(int subj, int pred, int obj, int context, int flags,
      int flagsMask, TripleIndex index, boolean rangeSearch)
  {
    byte[] searchKey = getSearchKey(subj, pred, obj, context, flags);
    byte[] searchMask = getSearchMask(subj, pred, obj, context, flagsMask);

    if (rangeSearch) {
      // Use ranged search
      byte[] minValue = getMinValue(subj, pred, obj, context);
      byte[] maxValue = getMaxValue(subj, pred, obj, context);

      return index.getBTree().iterateRangedValues(searchKey, searchMask, minValue, maxValue);
    }
    else {
      // Use sequential scan
      return index.getBTree().iterateValues(searchKey, searchMask);
    }
  }

  protected double cardinality(int subj, int pred, int obj, int context)
    throws IOException
  {
    TripleIndex index = getBestIndex(subj, pred, obj, context);
    BTree btree = index.btree;

    double rangeSize;

    if (index.getPatternScore(subj, pred, obj, context) == 0) {
      rangeSize = btree.getValueCountEstimate();
    }
    else {
      byte[] minValue = getMinValue(subj, pred, obj, context);
      byte[] maxValue = getMaxValue(subj, pred, obj, context);
      rangeSize = btree.getValueCountEstimate(minValue, maxValue);
    }

    return rangeSize;
  }

  protected TripleIndex getBestIndex(int subj, int pred, int obj, int context) {
    int bestScore = -1;
    TripleIndex bestIndex = null;

    for (TripleIndex index : indexes) {
      int score = index.getPatternScore(subj, pred, obj, context);
      if (score > bestScore) {
        bestScore = score;
        bestIndex = index;
      }
    }

    return bestIndex;
  }

  public void clear()
    throws IOException
  {
    for (TripleIndex index : indexes) {
      index.getBTree().clear();
    }
  }

  public boolean storeTriple(int subj, int pred, int obj, int context)
    throws IOException
  {
    return storeTriple(subj, pred, obj, context, true);
  }

  public boolean storeTriple(int subj, int pred, int obj, int context, boolean explicit)
    throws IOException
  {
    boolean stAdded = false;

    byte[] data = getData(subj, pred, obj, context, 0);
    byte[] storedData = indexes.get(0).getBTree().get(data);

    if (storedData == null) {
      // Statement does not yet exist
      data[FLAG_IDX] |= ADDED_FLAG;
      if (explicit) {
        data[FLAG_IDX] |= EXPLICIT_FLAG;
      }

      stAdded = true;
    }
    else {
      // Statement already exists, only modify its flags, see txn-flags.txt
      // for a description of the flag transformations
      byte flags = storedData[FLAG_IDX];
      boolean wasExplicit = (flags & EXPLICIT_FLAG) != 0;
      boolean wasAdded = (flags & ADDED_FLAG) != 0;
      boolean wasRemoved = (flags & REMOVED_FLAG) != 0;
      boolean wasToggled = (flags & TOGGLE_EXPLICIT_FLAG) != 0;

      if (wasAdded) {
        // Statement has been added in the current transaction and is
        // invisible to other connections, we can simply modify its flags
        data[FLAG_IDX] |= ADDED_FLAG;
        if (explicit || wasExplicit) {
          data[FLAG_IDX] |= EXPLICIT_FLAG;
        }
      }
      else {
        // Committed statement, must keep explicit flag the same
        if (wasExplicit) {
          data[FLAG_IDX] |= EXPLICIT_FLAG;
        }

        if (explicit) {
          if (!wasExplicit) {
            // Make inferred statement explicit
            data[FLAG_IDX] |= TOGGLE_EXPLICIT_FLAG;
          }
        }
        else {
          if (wasRemoved) {
            if (wasExplicit) {
              // Re-add removed explicit statement as inferred
              data[FLAG_IDX] |= TOGGLE_EXPLICIT_FLAG;
            }
          }
          else if (wasToggled) {
            data[FLAG_IDX] |= TOGGLE_EXPLICIT_FLAG;
          }
        }
      }

      // Statement is new if it was removed before
      stAdded = wasRemoved;
    }

    if (storedData == null || !Arrays.equals(data, storedData)) {
      for (TripleIndex index : indexes) {
        index.getBTree().insert(data);
      }

      updatedTriplesCache.storeRecord(data);
    }

    return stAdded;
  }

  public int removeTriples(int subj, int pred, int obj, int context)
    throws IOException
  {
    RecordIterator iter = getTriples(subj, pred, obj, context, 0, 0);
    return removeTriples(iter);
  }

  /**
   * @param subj
   *        The subject for the pattern, or <tt>-1</tt> for a wildcard.
   * @param pred
   *        The predicate for the pattern, or <tt>-1</tt> for a wildcard.
   * @param obj
   *        The object for the pattern, or <tt>-1</tt> for a wildcard.
   * @param context
   *        The context for the pattern, or <tt>-1</tt> for a wildcard.
   * @param explicit
   *        Flag indicating whether explicit or inferred statements should be
   *        removed; <tt>true</tt> removes explicit statements that match the
   *        pattern, <tt>false</tt> removes inferred statements that match the
   *        pattern.
   * @return The number of triples that were removed.
   * @throws IOException
   */
  public int removeTriples(int subj, int pred, int obj, int context, boolean explicit)
    throws IOException
  {
    byte flags = explicit ? EXPLICIT_FLAG : 0;
    RecordIterator iter = getTriples(subj, pred, obj, context, flags, EXPLICIT_FLAG);
    return removeTriples(iter);
  }

  private int removeTriples(RecordIterator iter)
    throws IOException
  {
    byte[] data = iter.next();

    if (data == null) {
      // no triples to remove
      return 0;
    }

    int count = 0;

    // Store the values that need to be removed in a tmp file and then
    // iterate over this file to set the REMOVED flag
    RecordCache removedTriplesCache = new SequentialRecordCache(dir, RECORD_LENGTH);
    try {
      while (data != null) {
        if ((data[FLAG_IDX] & REMOVED_FLAG) == 0) {
          data[FLAG_IDX] |= REMOVED_FLAG;
          removedTriplesCache.storeRecord(data);
        }
        data = iter.next();
      }
      iter.close();

      count = (int)removedTriplesCache.getRecordCount();
      updatedTriplesCache.storeRecords(removedTriplesCache);

      // Set the REMOVED flag by overwriting the affected records
      for (TripleIndex index : indexes) {
        BTree btree = index.getBTree();

        RecordIterator recIter = removedTriplesCache.getRecords();
        try {
          while ((data = recIter.next()) != null) {
            btree.insert(data);
          }
        }
        finally {
          recIter.close();
        }
      }
    }
    finally {
      removedTriplesCache.discard();
    }

    return count;
  }

  public void startTransaction()
    throws IOException
  {
    txnStatusFile.setTxnStatus(TxnStatus.ACTIVE);

    // Create a record cache for storing updated triples with a maximum of
    // some 10% of the number of triples
    long maxRecords = indexes.get(0).getBTree().getValueCountEstimate() / 10L;
    if (updatedTriplesCache == null) {
      updatedTriplesCache = new SortedRecordCache(dir, RECORD_LENGTH, maxRecords, new TripleComparator(
          "spoc"));
    }
    else {
      assert updatedTriplesCache.getRecordCount() == 0L : "updatedTripleCache should have been cleared upon commit or rollback";
      updatedTriplesCache.setMaxRecords(maxRecords);
    }
  }

  public void commit()
    throws IOException
  {
    txnStatusFile.setTxnStatus(TxnStatus.COMMITTING);

    // updatedTriplesCache will be null when recovering from a crashed commit
    boolean validCache = updatedTriplesCache != null && updatedTriplesCache.isValid();

    for (TripleIndex index : indexes) {
      BTree btree = index.getBTree();

      RecordIterator iter;
      if (validCache) {
        // Use the cached set of updated triples
        iter = updatedTriplesCache.getRecords();
      }
      else {
        // Cache is invalid; too much updates(?). Iterate over all triples
        iter = btree.iterateAll();
      }

      try {
        byte[] data;
        while ((data = iter.next()) != null) {
          byte flags = data[FLAG_IDX];
          boolean wasAdded = (flags & ADDED_FLAG) != 0;
          boolean wasRemoved = (flags & REMOVED_FLAG) != 0;
          boolean wasToggled = (flags & TOGGLE_EXPLICIT_FLAG) != 0;

          if (wasRemoved) {
            btree.remove(data);
          }
          else if (wasAdded || wasToggled) {
            if (wasToggled) {
              data[FLAG_IDX] ^= EXPLICIT_FLAG;
            }
            if (wasAdded) {
              data[FLAG_IDX] ^= ADDED_FLAG;
            }

            if (validCache) {
              // We're iterating the cache
              btree.insert(data);
            }
            else {
              // We're iterating the BTree itself
              iter.set(data);
            }
          }
        }
      }
      finally {
        iter.close();
      }
    }

    if (updatedTriplesCache != null) {
      updatedTriplesCache.clear();
    }

    sync();

    txnStatusFile.setTxnStatus(TxnStatus.NONE);
    // checkAllCommitted();
  }

  private void checkAllCommitted()
    throws IOException
  {
    for (TripleIndex index : indexes) {
      System.out.println("Checking " + index + " index");
      BTree btree = index.getBTree();
      RecordIterator iter = btree.iterateAll();
      try {
        for (byte[] data = iter.next(); data != null; data = iter.next()) {
          byte flags = data[FLAG_IDX];
          boolean wasAdded = (flags & ADDED_FLAG) != 0;
          boolean wasRemoved = (flags & REMOVED_FLAG) != 0;
          boolean wasToggled = (flags & TOGGLE_EXPLICIT_FLAG) != 0;
          if (wasAdded || wasRemoved || wasToggled) {
            System.out.println("unexpected triple: " + ByteArrayUtil.toHexString(data));
          }
        }
      }
      finally {
        iter.close();
      }
    }
  }

  public void rollback()
    throws IOException
  {
    txnStatusFile.setTxnStatus(TxnStatus.ROLLING_BACK);

    // updatedTriplesCache will be null when recovering from a crash
    boolean validCache = updatedTriplesCache != null && updatedTriplesCache.isValid();

    byte txnFlagsMask = ~(ADDED_FLAG | REMOVED_FLAG | TOGGLE_EXPLICIT_FLAG);

    for (TripleIndex index : indexes) {
      BTree btree = index.getBTree();

      RecordIterator iter;
      if (validCache) {
        // Use the cached set of updated triples
        iter = updatedTriplesCache.getRecords();
      }
      else {
        // Cache is invalid; too much updates(?). Iterate over all triples
        iter = btree.iterateAll();
      }

      try {
        byte[] data = null;
        while ((data = iter.next()) != null) {
          byte flags = data[FLAG_IDX];
          boolean wasAdded = (flags & ADDED_FLAG) != 0;
          boolean wasRemoved = (flags & REMOVED_FLAG) != 0;
          boolean wasToggled = (flags & TOGGLE_EXPLICIT_FLAG) != 0;

          if (wasAdded) {
            btree.remove(data);
          }
          else {
            if (wasRemoved || wasToggled) {
              data[FLAG_IDX] &= txnFlagsMask;

              if (validCache) {
                // We're iterating the cache
                btree.insert(data);
              }
              else {
                // We're iterating the BTree itself
                iter.set(data);
              }
            }
          }
        }
      }
      finally {
        iter.close();
      }
    }

    if (updatedTriplesCache != null) {
      updatedTriplesCache.clear();
    }

    sync();

    txnStatusFile.setTxnStatus(TxnStatus.NONE);
  }

  protected void sync()
    throws IOException
  {
    for (TripleIndex index : indexes) {
      index.getBTree().sync();
    }
  }

  private byte[] getData(int subj, int pred, int obj, int context, int flags) {
    byte[] data = new byte[RECORD_LENGTH];

    ByteArrayUtil.putInt(subj, data, SUBJ_IDX);
    ByteArrayUtil.putInt(pred, data, PRED_IDX);
    ByteArrayUtil.putInt(obj, data, OBJ_IDX);
    ByteArrayUtil.putInt(context, data, CONTEXT_IDX);
    data[FLAG_IDX] = (byte)flags;

    return data;
  }

  private byte[] getSearchKey(int subj, int pred, int obj, int context, int flags) {
    return getData(subj, pred, obj, context, flags);
  }

  private byte[] getSearchMask(int subj, int pred, int obj, int context, int flags) {
    byte[] mask = new byte[RECORD_LENGTH];

    if (subj != -1) {
      ByteArrayUtil.putInt(0xffffffff, mask, SUBJ_IDX);
    }
    if (pred != -1) {
      ByteArrayUtil.putInt(0xffffffff, mask, PRED_IDX);
    }
    if (obj != -1) {
      ByteArrayUtil.putInt(0xffffffff, mask, OBJ_IDX);
    }
    if (context != -1) {
      ByteArrayUtil.putInt(0xffffffff, mask, CONTEXT_IDX);
    }
    mask[FLAG_IDX] = (byte)flags;

    return mask;
  }

  private byte[] getMinValue(int subj, int pred, int obj, int context) {
    byte[] minValue = new byte[RECORD_LENGTH];

    ByteArrayUtil.putInt((subj == -1 ? 0x00000000 : subj), minValue, SUBJ_IDX);
    ByteArrayUtil.putInt((pred == -1 ? 0x00000000 : pred), minValue, PRED_IDX);
    ByteArrayUtil.putInt((obj == -1 ? 0x00000000 : obj), minValue, OBJ_IDX);
    ByteArrayUtil.putInt((context == -1 ? 0x00000000 : context), minValue, CONTEXT_IDX);
    minValue[FLAG_IDX] = (byte)0;

    return minValue;
  }

  private byte[] getMaxValue(int subj, int pred, int obj, int context) {
    byte[] maxValue = new byte[RECORD_LENGTH];

    ByteArrayUtil.putInt((subj == -1 ? 0xffffffff : subj), maxValue, SUBJ_IDX);
    ByteArrayUtil.putInt((pred == -1 ? 0xffffffff : pred), maxValue, PRED_IDX);
    ByteArrayUtil.putInt((obj == -1 ? 0xffffffff : obj), maxValue, OBJ_IDX);
    ByteArrayUtil.putInt((context == -1 ? 0xffffffff : context), maxValue, CONTEXT_IDX);
    maxValue[FLAG_IDX] = (byte)0xff;

    return maxValue;
  }

  private Properties loadProperties(File propFile)
    throws IOException
  {
    InputStream in = new FileInputStream(propFile);
    try {
      Properties properties = new Properties();
      properties.load(in);
      return properties;
    }
    finally {
      in.close();
    }
  }

  private void storeProperties(File propFile)
    throws IOException
  {
    OutputStream out = new FileOutputStream(propFile);
    try {
      properties.store(out, "triple indexes meta-data, DO NOT EDIT!");
    }
    finally {
      out.close();
    }
  }

  /*-------------------------*
   * Inner class TripleIndex *
   *-------------------------*/

  private class TripleIndex {

    private final TripleComparator tripleComparator;

    private final BTree btree;

    public TripleIndex(String fieldSeq)
      throws IOException
    {
      tripleComparator = new TripleComparator(fieldSeq);
      btree = new BTree(dir, getFilenamePrefix(fieldSeq), 2048, RECORD_LENGTH, tripleComparator, forceSync);
    }

    private String getFilenamePrefix(String fieldSeq) {
      return "triples-" + fieldSeq;
    }

    public char[] getFieldSeq() {
      return tripleComparator.getFieldSeq();
    }

    public BTree getBTree() {
      return btree;
    }

    /**
     * Determines the 'score' of this index on the supplied pattern of
     * subject, predicate, object and context IDs. The higher the score, the
     * better the index is suited for matching the pattern. Lowest score is 0,
     * which means that the index will perform a sequential scan.
     */
    public int getPatternScore(int subj, int pred, int obj, int context) {
      int score = 0;

      for (char field : tripleComparator.getFieldSeq()) {
        switch (field) {
          case 's':
            if (subj >= 0) {
              score++;
            }
            else {
              return score;
            }
            break;
          case 'p':
            if (pred >= 0) {
              score++;
            }
            else {
              return score;
            }
            break;
          case 'o':
            if (obj >= 0) {
              score++;
            }
            else {
              return score;
            }
            break;
          case 'c':
            if (context >= 0) {
              score++;
            }
            else {
              return score;
            }
            break;
          default:
            throw new RuntimeException("invalid character '" + field + "' in field sequence: "
                + new String(tripleComparator.getFieldSeq()));
        }
      }

      return score;
    }

    @Override
    public String toString() {
      return new String(getFieldSeq());
    }
  }

  /*------------------------------*
   * Inner class TripleComparator *
   *------------------------------*/

  /**
   * A RecordComparator that can be used to create indexes with a configurable
   * order of the subject, predicate, object and context fields.
   */
  private static class TripleComparator implements RecordComparator {

    private final char[] fieldSeq;

    public TripleComparator(String fieldSeq) {
      this.fieldSeq = fieldSeq.toCharArray();
    }

    public char[] getFieldSeq() {
      return fieldSeq;
    }

    public final int compareBTreeValues(byte[] key, byte[] data, int offset, int length) {
      for (char field : fieldSeq) {
        int fieldIdx = 0;

        switch (field) {
          case 's':
            fieldIdx = SUBJ_IDX;
            break;
          case 'p':
            fieldIdx = PRED_IDX;
            break;
          case 'o':
            fieldIdx = OBJ_IDX;
            break;
          case 'c':
            fieldIdx = CONTEXT_IDX;
            break;
          default:
            throw new IllegalArgumentException("invalid character '" + field + "' in field sequence: "
                + new String(fieldSeq));
        }

        int diff = ByteArrayUtil.compareRegion(key, fieldIdx, data, offset + fieldIdx, 4);

        if (diff != 0) {
          return diff;
        }
      }

      return 0;
    }
  }
}
TOP

Related Classes of uk.ac.open.kmi.smartproducts.sesame.sail.TripleStore$TripleIndex

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.