Source Code of org.apache.derby.iapi.store.access.BackingStoreHashtable

/*


   Derby - Class org.apache.derby.iapi.store.access.BackingStoreHashtable


   Copyright 1999, 2004 The Apache Software Foundation or its licensors, as applicable.


   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at


      http://www.apache.org/licenses/LICENSE-2.0


   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


 */


package org.apache.derby.iapi.store.access;


import org.apache.derby.iapi.services.sanity.SanityManager;


import org.apache.derby.iapi.services.io.Storable;


import org.apache.derby.iapi.error.StandardException; 


import org.apache.derby.iapi.types.CloneableObject;
import org.apache.derby.iapi.types.DataValueDescriptor;


import org.apache.derby.iapi.services.cache.ClassSize;


import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Properties; 
import java.util.Vector;
import java.util.NoSuchElementException;


/**
A BackingStoreHashtable is a utility class which will store a set of rows into
an in memory hash table, or overflow the hash table to a tempory on disk 
structure.
<p>
All rows must contain the same number of columns, and the column at position
N of all the rows must have the same format id.  If the BackingStoreHashtable needs to be
overflowed to disk, then an arbitrary row will be chosen and used as a template
for creating the underlying overflow container.


<p>
The hash table will be built logically as follows (actual implementation
may differ).  The important points are that the hash value is the standard
java hash value on the row[key_column_numbers[0], if key_column_numbers.length is 1,
or row[key_column_numbers[0, 1, ...]] if key_column_numbers.length > 1, 
and that duplicate detection is done by the standard java duplicate detection provided by 
java.util.Hashtable.
<p>
import java.util.Hashtable;


hash_table = new Hashtable();


Object[] row;
boolean  needsToClone = rowSource.needsToClone();


while((row = rowSource.getNextRowFromRowSource()) != null)
{
    if (needsToClone)
        row = clone_row_from_row(row);


  Object key = KeyHasher.buildHashKey(row, key_column_numbers);


    if ((duplicate_value = 
        hash_table.put(key, row)) != null)
    {
        Vector row_vec;


        // inserted a duplicate
        if ((duplicate_value instanceof vector))
        {
            row_vec = (Vector) duplicate_value;
        }
        else
        {
            // allocate vector to hold duplicates
            row_vec = new Vector(2);


            // insert original row into vector
            row_vec.addElement(duplicate_value);


            // put the vector as the data rather than the row
            hash_table.put(key, row_vec);
        }
        
        // insert new row into vector
        row_vec.addElement(row);
    }
}


**/


public class BackingStoreHashtable
{


    /**************************************************************************
     * Fields of the class
     **************************************************************************
     */
    private TransactionController tc;
    private Hashtable   hash_table;
    private int[]       key_column_numbers;
    private boolean     remove_duplicates;
  private boolean    skipNullKeyColumns;
    private Properties  auxillary_runtimestats;
  private RowSource  row_source;
    /* If max_inmemory_rowcnt > 0 then use that to decide when to spill to disk.
     * Otherwise compute max_inmemory_size based on the JVM memory size when the BackingStoreHashtable
     * is constructed and use that to decide when to spill to disk.
     */
    private long max_inmemory_rowcnt;
    private long inmemory_rowcnt;
    private long max_inmemory_size;
    private boolean keepAfterCommit;


    /**
     * The estimated number of bytes used by Vector(0)
     */  
    private final static int vectorSize = ClassSize.estimateBaseFromCatalog(java.util.Vector.class);
    
    private DiskHashtable diskHashtable;


    /**************************************************************************
     * Constructors for This class:
     **************************************************************************
     */
    private BackingStoreHashtable(){}


    /**
     * Create the BackingStoreHashtable from a row source.
     * <p>
     * This routine drains the RowSource.  The performance characteristics
     * depends on the number of rows inserted and the parameters to the 
     * constructor.  
     * <p>
     * If the number of rows is <= "max_inmemory_rowcnt", then the rows are
     * inserted into a java.util.Hashtable.  In this case no 
     * TransactionController is necessary, a "null" tc is valid.
     * <p>
     * If the number of rows is > "max_inmemory_rowcnt", then the rows will
     * be all placed in some sort of Access temporary file on disk.  This 
     * case requires a valid TransactionController.
     *
     * @param tc                An open TransactionController to be used if the
     *                          hash table needs to overflow to disk.
     *
     * @param row_source        RowSource to read rows from.
     *
     * @param key_column_numbers The column numbers of the columns in the
     *                          scan result row to be the key to the Hashtable.
     *                          "0" is the first column in the scan result
     *                          row (which may be different than the first
     *                          row in the table of the scan).
     *
     * @param remove_duplicates Should the Hashtable automatically remove
     *                          duplicates, or should it create the Vector of
     *                          duplicates?
     *
     * @param estimated_rowcnt  The estimated number of rows in the hash table.
     *                          Pass in -1 if there is no estimate.
     *
     * @param max_inmemory_rowcnt
     *                          The maximum number of rows to insert into the 
     *                          inmemory Hash table before overflowing to disk.
     *                          Pass in -1 if there is no maximum.
     *
     * @param initialCapacity   If not "-1" used to initialize the java 
     *                          Hashtable.
     *
     * @param loadFactor        If not "-1" used to initialize the java 
     *                          Hashtable.
   *
   * @param skipNullKeyColumns  Skip rows with a null key column, if true.
     *
     * @param keepAfterCommit If true the hash table is kept after a commit,
     *                        if false the hash table is dropped on the next commit.
     *
     *
   * @exception  StandardException  Standard exception policy.
     **/
    public BackingStoreHashtable(
    TransactionController   tc,
    RowSource               row_source,
    int[]                   key_column_numbers,
    boolean                 remove_duplicates,
    long                    estimated_rowcnt,
    long                    max_inmemory_rowcnt,
    int                     initialCapacity,
    float                   loadFactor,
  boolean          skipNullKeyColumns,
    boolean                 keepAfterCommit)
        throws StandardException
    {
        this.key_column_numbers    = key_column_numbers;
        this.remove_duplicates    = remove_duplicates;
    this.row_source         = row_source;
    this.skipNullKeyColumns     = skipNullKeyColumns;
        this.max_inmemory_rowcnt = max_inmemory_rowcnt;
        if( max_inmemory_rowcnt > 0)
            max_inmemory_size = Long.MAX_VALUE;
        else
            max_inmemory_size = Runtime.getRuntime().totalMemory()/100;
        this.tc = tc;
        this.keepAfterCommit = keepAfterCommit;


        Object[] row;


        // use passed in capacity and loadfactor if not -1, you must specify
        // capacity if you want to specify loadfactor.
        if (initialCapacity != -1)
        {
            hash_table = 
                ((loadFactor == -1) ? 
                     new Hashtable(initialCapacity) : 
                     new Hashtable(initialCapacity, loadFactor));
        }
        else
        {
            /* We want to create the hash table based on the estimated row
             * count if a) we have an estimated row count (i.e. it's greater
             * than zero) and b) we think we can create a hash table to
             * hold the estimated row count without running out of memory.
             * The check for "b" is required because, for deeply nested
             * queries and/or queries with a high number of tables in
             * their FROM lists, the optimizer can end up calculating
             * some very high row count estimates--even up to the point of
             * Double.POSITIVE_INFINITY.  In that case attempts to
             * create a Hashtable of size estimated_rowcnt can cause
             * OutOfMemory errors when we try to create the Hashtable.
             * So as a "red flag" for that kind of situation, we check to
             * see if the estimated row count is greater than the max
             * in-memory size for this table.  Unit-wise this comparison
             * is relatively meaningless: rows vs bytes.  But if our
             * estimated row count is greater than the max number of
             * in-memory bytes that we're allowed to consume, then
             * it's very likely that creating a Hashtable with a capacity
             * of estimated_rowcnt will lead to memory problems.  So in
             * that particular case we leave hash_table null here and
             * initialize it further below, using the estimated in-memory
             * size of the first row to figure out what a reasonable size
             * for the Hashtable might be.
             */
            hash_table = 
                (((estimated_rowcnt <= 0) || (row_source == null)) ?
                     new Hashtable() :
                     (estimated_rowcnt < max_inmemory_size) ?
                         new Hashtable((int) estimated_rowcnt) :
                         null);
        }


        if (row_source != null)
        {
            boolean needsToClone = row_source.needsToClone();


            while ((row = getNextRowFromRowSource()) != null)
            {
                // If we haven't initialized the hash_table yet then that's
                // because a Hashtable with capacity estimated_rowcnt would
                // probably cause memory problems.  So look at the first row
                // that we found and use that to create the hash table with
                // an initial capacity such that, if it was completely full,
                // it would still satisfy the max_inmemory condition.  Note
                // that this isn't a hard limit--the hash table can grow if
                // needed.
                if (hash_table == null)
                {
                    // Check to see how much memory we think the first row
                    // is going to take, and then use that to set the initial
                    // capacity of the Hashtable.
                    double rowUsage = getEstimatedMemUsage(row);
                    hash_table = new Hashtable((int)(max_inmemory_size / rowUsage));
                }


                if (needsToClone)
                {
                    row = cloneRow(row);
                }


                Object key = 
                    KeyHasher.buildHashKey(row, key_column_numbers);


                add_row_to_hash_table(hash_table, key, row);
            }
        }


        // In the (unlikely) event that we received a "red flag" estimated_rowcnt
        // that is too big (see comments above), it's possible that, if row_source
        // was null or else didn't have any rows, hash_table could still be null
        // at this point.  So we initialize it to an empty hashtable (representing
        // an empty result set) so that calls to other methods on this
        // BackingStoreHashtable (ex. "size()") will have a working hash_table
        // on which to operate.
        if (hash_table == null)
            hash_table = new Hashtable();
    }


    /**************************************************************************
     * Private/Protected methods of This class:
     **************************************************************************
     */


  /**
   * Call method to either get next row or next row with non-null
   * key columns.
   *
     *
   * @exception  StandardException  Standard exception policy.
   */
  private Object[] getNextRowFromRowSource()
    throws StandardException
  {
    Object[] row = row_source.getNextRowFromRowSource();


    if (skipNullKeyColumns)
    {
      while (row != null)
      {
        // Are any key columns null?
        int index = 0;
        for ( ; index < key_column_numbers.length; index++)
        {
          if (SanityManager.DEBUG)
          {
            if (! (row[key_column_numbers[index]] instanceof Storable))
            {
              SanityManager.THROWASSERT(
                "row[key_column_numbers[index]] expected to be Storable, not " +
                row[key_column_numbers[index]].getClass().getName());
            }
          }
          Storable storable = (Storable) row[key_column_numbers[index]];
          if (storable.isNull())
          {
            break;
          }
        }
        // No null key columns
        if (index == key_column_numbers.length)
        {
          return row;
        }
        // 1 or more null key columns
        row = row_source.getNextRowFromRowSource();
      }
    }
    return row;
  }


    /**
     * Return a cloned copy of the row.
     *
   * @return The cloned row row to use.
     *
   * @exception  StandardException  Standard exception policy.
     **/
    static Object[] cloneRow(Object[] old_row)
        throws StandardException
    {
        Object[] new_row = new DataValueDescriptor[old_row.length];


    // the only difference between getClone and cloneObject is cloneObject does
    // not objectify a stream.  We use getClone here.  Beetle 4896.
        for (int i = 0; i < old_row.length; i++)
        {
            if( old_row[i] != null)
                new_row[i] = ((DataValueDescriptor) old_row[i]).getClone();
        }


        return(new_row);
    }


    /**
     * Do the work to add one row to the hash table.
     * <p>
     *
     * @param row               Row to add to the hash table.
     * @param hash_table        The java HashTable to load into.
     *
   * @exception  StandardException  Standard exception policy.
     **/
    private void add_row_to_hash_table(
    Hashtable   hash_table,
    Object      key,
    Object[]    row)
    throws StandardException
    {
        if( spillToDisk( hash_table, key, row))
            return;
        
        Object  duplicate_value = null;


        if ((duplicate_value = hash_table.put(key, row)) == null)
            doSpaceAccounting( row, false);
        else
        {
            if (!remove_duplicates)
            {
                Vector row_vec;


                // inserted a duplicate
                if ((duplicate_value instanceof Vector))
                {
                    doSpaceAccounting( row, false);
                    row_vec = (Vector) duplicate_value;
                }
                else
                {
                    // allocate vector to hold duplicates
                    row_vec = new Vector(2);


                    // insert original row into vector
                    row_vec.addElement(duplicate_value);
                    doSpaceAccounting( row, true);
                }


                // insert new row into vector
                row_vec.addElement(row);


                // store vector of rows back into hash table,
                // overwriting the duplicate key that was 
                // inserted.
                hash_table.put(key, row_vec);
            }
        }


        row = null;
    }


    private void doSpaceAccounting( Object[] row,
                                    boolean firstDuplicate)
    {
        inmemory_rowcnt++;
        if( max_inmemory_rowcnt <= 0)
        {
            max_inmemory_size -= getEstimatedMemUsage(row);
            if( firstDuplicate)
                max_inmemory_size -= vectorSize;
        }
    } // end of doSpaceAccounting


    /**
     * Determine whether a new row should be spilled to disk and, if so, do it.
     *
     * @param hash_table The in-memory hash table
     * @param key The row's key
     * @param row
     *
     * @return true if the row was spilled to disk, false if not
     *
     * @exception  StandardException  Standard exception policy.
     */
    private boolean spillToDisk( Hashtable   hash_table,
                                 Object      key,
                                 Object[]    row)
    throws StandardException
    {
        // Once we have started spilling all new rows will go to disk, even if we have freed up some
        // memory by moving duplicates to disk. This simplifies handling of duplicates and accounting.
        if( diskHashtable == null)
        {
            if( max_inmemory_rowcnt > 0)
            {
                if( inmemory_rowcnt < max_inmemory_rowcnt)
                    return false; // Do not spill
            }
            else if( max_inmemory_size > 0)
                return false;
            // Want to start spilling
            if( ! (row instanceof DataValueDescriptor[]))
            {
                if( SanityManager.DEBUG)
                    SanityManager.THROWASSERT( "BackingStoreHashtable row is not DataValueDescriptor[]");
                // Do not know how to put it on disk
                return false;
            }
            diskHashtable = new DiskHashtable( tc,
                                               (DataValueDescriptor[]) row,
                                               key_column_numbers,
                                               remove_duplicates,
                                               keepAfterCommit);
        }
        
        Object duplicateValue = hash_table.get( key);
        if( duplicateValue != null)
        {
            if( remove_duplicates)
                return true; // a degenerate case of spilling
            // If we are keeping duplicates then move all the duplicates from memory to disk
            // This simplifies finding duplicates: they are either all in memory or all on disk.
            if( duplicateValue instanceof Vector)
            {
                Vector duplicateVec = (Vector) duplicateValue;
                for( int i = duplicateVec.size() - 1; i >= 0; i--)
                {
                    Object[] dupRow = (Object[]) duplicateVec.elementAt(i);
                    diskHashtable.put( key, dupRow);
                }
            }
            else
                diskHashtable.put( key, (Object []) duplicateValue);
            hash_table.remove( key);
        }
        diskHashtable.put( key, row);
        return true;
    } // end of spillToDisk


    /**
     * Take a row and return an estimate as to how much memory that
     * row will consume.
     * 
     * @param row The row for which we want to know the memory usage.
     * @return A guess as to how much memory the current row will
     *  use.
     */
    private long getEstimatedMemUsage(Object [] row)
    {
        long rowMem = 0;
        for( int i = 0; i < row.length; i++)
        {
            if (row[i] instanceof DataValueDescriptor)
                rowMem += ((DataValueDescriptor) row[i]).estimateMemoryUsage();
            rowMem += ClassSize.refSize;
        }


        rowMem += ClassSize.refSize;
        return rowMem;
    }


    /**************************************************************************
     * Public Methods of This class:
     **************************************************************************
     */


    /**
     * Close the BackingStoreHashtable.
     * <p>
     * Perform any necessary cleanup after finishing with the hashtable.  Will
     * deallocate/dereference objects as necessary.  If the table has gone
     * to disk this will drop any on disk files used to support the hash table.
     * <p>
     *
   * @exception  StandardException  Standard exception policy.
     **/
    public void close() 
    throws StandardException
    {
        hash_table = null;
        if( diskHashtable != null)
        {
            diskHashtable.close();
            diskHashtable = null;
        }
        return;
    }


    /**
     * Return an Enumeration that can be used to scan entire table.
     * <p>
     * RESOLVE - is it worth it to support this routine when we have a
     *           disk overflow hash table?
     *
   * @return The Enumeration.
     *
   * @exception  StandardException  Standard exception policy.
     **/
    public Enumeration elements()
        throws StandardException
    {
        if( diskHashtable == null)
            return(hash_table.elements());
        return new BackingStoreHashtableEnumeration();
    }


    /**
     * get data associated with given key.
     * <p>
     * There are 2 different types of objects returned from this routine.
     * <p>
   * In both cases, the key value is either the object stored in 
     * row[key_column_numbers[0]], if key_column_numbers.length is 1, 
     * otherwise it is a KeyHasher containing
   * the objects stored in row[key_column_numbers[0, 1, ...]].
     * For every qualifying unique row value an entry is placed into the 
     * Hashtable.
     * <p>
     * For row values with duplicates, the value of the data is a Vector of
     * rows.
     * <p>
     * The caller will have to call "instanceof" on the data value
     * object if duplicates are expected, to determine if the data value
     * of the Hashtable entry is a row or is a Vector of rows.
     * <p>
     * The BackingStoreHashtable "owns" the objects returned from the get()
     * routine.  They remain valid until the next access to the 
     * BackingStoreHashtable.  If the client needs to keep references to these
     * objects, it should clone copies of the objects.  A valid 
     * BackingStoreHashtable can place all rows into a disk based conglomerate,
     * declare a row buffer and then reuse that row buffer for every get()
     * call.
     *
   * @return The value to which the key is mapped in this hashtable; 
     *         null if the key is not mapped to any value in this hashtable.
     *
     * @param key    The key to hash on.
     *
   * @exception  StandardException  Standard exception policy.
     **/
    public Object get(Object key)
    throws StandardException
    {
        Object obj = hash_table.get(key);
        if( diskHashtable == null || obj != null)
            return obj;
        return diskHashtable.get( key);
    }


    /**
     * Return runtime stats to caller by adding them to prop.
     * <p>
     *
     * @param prop   The set of properties to append to.
     *
   * @exception  StandardException  Standard exception policy.
     **/
    public void getAllRuntimeStats(Properties   prop)
    throws StandardException
    {
        if (auxillary_runtimestats != null)
            org.apache.derby.iapi.util.PropertyUtil.copyProperties(auxillary_runtimestats, prop);
    }


    /**
     * remove a row from the hash table.
     * <p>
     * a remove of a duplicate removes the entire duplicate list.
     *
     * @param key          The key of the row to remove.
     *
   * @exception  StandardException  Standard exception policy.
     **/
    public Object remove(
    Object      key)
    throws StandardException
    {
        Object obj = hash_table.remove(key);
        if( obj != null || diskHashtable == null)
            return obj;
        return diskHashtable.remove(key);
    }


    /**
     * Set the auxillary runtime stats.
     * <p>
     * getRuntimeStats() will return both the auxillary stats and any
     * BackingStoreHashtable() specific stats.  Note that each call to
     * setAuxillaryRuntimeStats() overwrites the Property set that was
     * set previously.
     *
     * @param prop   The set of properties to append from.
     *
   * @exception  StandardException  Standard exception policy.
     **/
    public void setAuxillaryRuntimeStats(Properties   prop)
    throws StandardException
    {
        auxillary_runtimestats = prop;
    }


    /**
     * Put a row into the hash table.
     * <p>
     * The in memory hash table will need to keep a reference to the row
     * after the put call has returned.  If "needsToClone" is true then the
     * hash table will make a copy of the row and put that, else if 
     * "needsToClone" is false then the hash table will keep a reference to
     * the row passed in and no copy will be made.
     * <p>
     * If rouine returns false, then no reference is kept to the duplicate
     * row which was rejected (thus allowing caller to reuse the object).
     *
     * @param needsToClone does this routine have to make a copy of the row,
     *                     in order to keep a reference to it after return?
     * @param row          The row to insert into the table.
     *
   * @return true if row was inserted into the hash table.  Returns
     *              false if the BackingStoreHashtable is eliminating 
     *              duplicates, and the row being inserted is a duplicate,
   *        or if we are skipping rows with 1 or more null key columns
   *        and we find a null key column.
     *
   * @exception  StandardException  Standard exception policy.
     **/
    public boolean put(
    boolean     needsToClone,
    Object[]    row)
    throws StandardException
    {
    // Are any key columns null?
    if (skipNullKeyColumns)
    {
      int index = 0;
      for ( ; index < key_column_numbers.length; index++)
      {
        if (SanityManager.DEBUG)
        {
          if (! (row[key_column_numbers[index]] instanceof Storable))
          {
            SanityManager.THROWASSERT(
              "row[key_column_numbers[index]] expected to be Storable, not " +
              row[key_column_numbers[index]].getClass().getName());
          }
        }
        Storable storable = (Storable) row[key_column_numbers[index]];
        if (storable.isNull())
        {
          return false;
        }
      }
    }


        if (needsToClone)
        {
            row = cloneRow(row);
        }


        Object key = KeyHasher.buildHashKey(row, key_column_numbers);


        if ((remove_duplicates) && (get(key) != null))
        {
            return(false);
        }
        else
        {
            add_row_to_hash_table(hash_table, key, row);
            return(true);
        }
    }


    /**
     * Return number of unique rows in the hash table.
     * <p>
     *
   * @return The number of unique rows in the hash table.
     *
   * @exception  StandardException  Standard exception policy.
     **/
    public int size()
    throws StandardException
    {
        if( diskHashtable == null)
            return(hash_table.size());
        return hash_table.size() + diskHashtable.size();
    }


    private class BackingStoreHashtableEnumeration implements Enumeration
    {
        private Enumeration memoryEnumeration;
        private Enumeration diskEnumeration;


        BackingStoreHashtableEnumeration()
        {
            memoryEnumeration = hash_table.elements();
            if( diskHashtable != null)
            {
                try
                {
                    diskEnumeration = diskHashtable.elements();
                }
                catch( StandardException se)
                {
                    diskEnumeration = null;
                }
            }
        }
        
        public boolean hasMoreElements()
        {
            if( memoryEnumeration != null)
            {
                if( memoryEnumeration.hasMoreElements())
                    return true;
                memoryEnumeration = null;
            }
            if( diskEnumeration == null)
                return false;
            return diskEnumeration.hasMoreElements();
        }


        public Object nextElement() throws NoSuchElementException
        {
            if( memoryEnumeration != null)
            {
                if( memoryEnumeration.hasMoreElements())
                    return memoryEnumeration.nextElement();
                memoryEnumeration = null;
            }
            return diskEnumeration.nextElement();
        }
    } // end of class BackingStoreHashtableEnumeration
}
Source Code of org.apache.derby.iapi.store.access.BackingStoreHashtable

Related Classes of org.apache.derby.iapi.store.access.BackingStoreHashtable