Source Code of org.apache.cassandra.db.index.SecondaryIndexManager

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.db.index;


import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;
import java.util.concurrent.*;


import org.apache.cassandra.config.ColumnDefinition;
import org.apache.cassandra.config.ConfigurationException;
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.compaction.CompactionManager;
import org.apache.cassandra.db.filter.IFilter;
import org.apache.cassandra.dht.AbstractBounds;
import org.apache.cassandra.dht.LocalToken;
import org.apache.cassandra.io.sstable.ReducingKeyIterator;
import org.apache.cassandra.io.sstable.SSTableReader;
import org.apache.cassandra.thrift.IndexClause;
import org.apache.cassandra.thrift.IndexExpression;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
 * Manages all the indexes associated with a given CFS
 * Different types of indexes can be created across the same CF
 */
public class SecondaryIndexManager
{
    private static final Logger logger = LoggerFactory.getLogger(SecondaryIndexManager.class);


    /**
     * Organizes the indexes by column name
     */
    private final ConcurrentNavigableMap<ByteBuffer, SecondaryIndex> indexesByColumn;
    
    
    /**
     * Keeps a single instance of a SecondaryIndex for many columns when the index type
     * has isRowLevelIndex() == true
     * 
     * This allows updates to happen to an entire row at once
     */
    private final Map<Class<? extends SecondaryIndex>,SecondaryIndex> rowLevelIndexMap;
    


    /**
     * The underlying column family containing the source data for these indexes
     */
    public final ColumnFamilyStore baseCfs;


    public SecondaryIndexManager(ColumnFamilyStore baseCfs)
    {
        indexesByColumn = new ConcurrentSkipListMap<ByteBuffer, SecondaryIndex>();
        rowLevelIndexMap = new HashMap<Class<? extends SecondaryIndex>, SecondaryIndex>();
        
        this.baseCfs = baseCfs;
    }


    /**
     * Drops and adds new indexes associated with the underlying CF
     * @throws IOException 
     */
    public void reload() throws IOException
    {
        // figure out what needs to be added and dropped.
        // future: if/when we have modifiable settings for secondary indexes,
        // they'll need to be handled here.
        Collection<ByteBuffer> indexedColumnNames = getIndexedColumns();
        for (ByteBuffer indexedColumn : indexedColumnNames)
        {
            ColumnDefinition def = baseCfs.metadata.getColumn_metadata().get(indexedColumn);
            if (def == null || def.getIndexType() == null)
                removeIndexedColumn(indexedColumn);
        }


        for (ColumnDefinition cdef : baseCfs.metadata.getColumn_metadata().values())
            if (cdef.getIndexType() != null && !indexedColumnNames.contains(cdef.name))
                addIndexedColumn(cdef);
    }
    
    
    /**
     * Does a full rebuild of the indexes specified by columns from the sstables.
     * Does nothing if columns is empty.
     * @param sstables the data to build from
     * @param columns the list of columns to index
     * @throws IOException 
     */
    public void maybeBuildSecondaryIndexes(Collection<SSTableReader> sstables, SortedSet<ByteBuffer> columns) throws IOException
    {
        if (columns.isEmpty())
            return;


        logger.info(String.format("Submitting index build of %s for data in %s",
                                  baseCfs.metadata.comparator.getString(columns), StringUtils.join(sstables, ", ")));


        SecondaryIndexBuilder builder = new SecondaryIndexBuilder(baseCfs, columns, new ReducingKeyIterator(sstables));
        Future<?> future = CompactionManager.instance.submitIndexBuild(builder);
        try
        {
            future.get();
            flushIndexes();
        }
        catch (InterruptedException e)
        {
            throw new AssertionError(e);
        }
        catch (ExecutionException e)
        {
            throw new RuntimeException(e);
        }


        logger.info("Index build of " + baseCfs.metadata.comparator.getString(columns) + " complete");
    }


    /**
     * @return the list of indexed columns
     */
    public SortedSet<ByteBuffer> getIndexedColumns()
    {
        return indexesByColumn.keySet();
    }


    /**
     * Removes a existing index
     * @param column the indexed column to remove
     * @throws IOException 
     */
    public void removeIndexedColumn(ByteBuffer column) throws IOException
    {
        SecondaryIndex index = indexesByColumn.remove(column);
        
        if (index == null)
            return;
        
        SystemTable.setIndexRemoved(baseCfs.metadata.ksName, index.getNameForSystemTable(column));
        
        index.removeIndex(column);
        
        // Remove this column from from row level index map
        if (index instanceof PerRowSecondaryIndex)
        {
            index.removeColumnDef(column);
            
            //If now columns left on this CF remove from row level lookup
            if (index.getColumnDefs().isEmpty())
                rowLevelIndexMap.remove(index.getClass());
        }
        
    }


    /**
     * Adds and builds a index for a column
     * @param cdef the column definition holding the index data
     * @return a future which the caller can optionally block on signaling the index is built
     */
    public synchronized Future<?> addIndexedColumn(ColumnDefinition cdef)
    {
        
        if (indexesByColumn.containsKey(cdef.name))
            return null;
        
        assert cdef.getIndexType() != null;
        logger.info("Creating new index : {}",cdef);
        
        SecondaryIndex index;
        try
        {
            index = SecondaryIndex.createInstance(baseCfs, cdef);
        } catch (ConfigurationException e)
        {
            throw new RuntimeException(e);
        }      
        
        // Keep a single instance of the index per-cf for row level indexes
        // since we want all columns to be under the index
        if (index instanceof PerRowSecondaryIndex)
        {
            SecondaryIndex currentIndex = rowLevelIndexMap.get(index.getClass());
            
            if (currentIndex == null)
            {
                rowLevelIndexMap.put(index.getClass(), index);
                index.init();
            }
            else
            {
                index = currentIndex;
                index.addColumnDef(cdef);
            }         
        }
        else
        {
            index.init();
        }
             
        // link in indexedColumns. this means that writes will add new data to
        // the index immediately,
        // so we don't have to lock everything while we do the build. it's up to
        // the operator to wait
        // until the index is actually built before using in queries.
        indexesByColumn.put(cdef.name, index);
        
        // if we're just linking in the index to indexedColumns on an
        // already-built index post-restart, we're done
        if (index.isIndexBuilt(cdef.name))
            return null;


        return index.buildIndexAsync();
    }


    /**
     * 
     * @param column the name of indexes column
     * @return the index
     */
    public SecondaryIndex getIndexForColumn(ByteBuffer column)
    {
        return indexesByColumn.get(column);
    }


    /**
     * Remove all index MBeans
     */
    public void unregisterMBeans()
    {
        for (Map.Entry<ByteBuffer, SecondaryIndex> entry : indexesByColumn.entrySet())
            entry.getValue().unregisterMbean();
    }
    
    /**
     * Remove all underlying index data
     * @throws IOException 
     */
    public void removeAllIndexes() throws IOException
    {
        for (Map.Entry<ByteBuffer, SecondaryIndex> entry : indexesByColumn.entrySet())
            entry.getValue().removeIndex(entry.getKey());
    }
    
    /**
     * Rename all underlying index files
     * @param newCfName the new index Name
     */
    public void renameIndexes(String newCfName) throws IOException
    {
        for (Map.Entry<ByteBuffer, SecondaryIndex> entry : indexesByColumn.entrySet())
            entry.getValue().renameIndex(newCfName);
    }
    
    /**
     * Flush all indexes to disk
     * @throws ExecutionException
     * @throws InterruptedException
     */
    public void flushIndexes() throws IOException
    {
        for (Map.Entry<ByteBuffer, SecondaryIndex> entry : indexesByColumn.entrySet())
            entry.getValue().forceBlockingFlush();
    }
    
    /**
     * Returns the decoratedKey for a column value
     * @param name column name
     * @param value column value
     * @return decorated key
     */
    public DecoratedKey<LocalToken> getIndexKeyFor(ByteBuffer name, ByteBuffer value)
    {
        return new DecoratedKey<LocalToken>(new LocalToken(baseCfs.metadata.getColumnDefinition(name).getValidator(), value), value);
    }
    
    
    /**
     * @return all built indexes (ready to use)
     */
    public List<String> getBuiltIndexes()
    {
        List<String> indexList = new ArrayList<String>();
        
        for (Map.Entry<ByteBuffer, SecondaryIndex> entry : indexesByColumn.entrySet())
        {
            SecondaryIndex index = entry.getValue();
            
            if (index.isIndexBuilt(entry.getKey())) 
            {
                indexList.add(entry.getValue().getIndexName());
            }
        }
        
        return indexList;
    }
    
    /**
     * @return all CFS from indexes which use a backing CFS internally (KEYS)
     */
    public Collection<ColumnFamilyStore> getIndexesBackedByCfs()
    {
        ArrayList<ColumnFamilyStore> cfsList = new ArrayList<ColumnFamilyStore>();


        for (Map.Entry<ByteBuffer, SecondaryIndex> entry : indexesByColumn.entrySet())
        {
            ColumnFamilyStore cfs = entry.getValue().getUnderlyingCfs();
            
            if (cfs != null)
                cfsList.add(cfs);        
        }
        
        return cfsList;
    }
        
   
    /**
     * Removes obsolete index entries and creates new ones for the given row key
     * and mutated columns.
     * 
     * For columns whos underlying index type has the isRowLevelIndex() flag set to true this function will
     * call the 
     * 
     * @param rowKey the row key
     * @param cf the current rows data
     * @param mutatedIndexedColumns the set of columns that were changed or added
     * @param oldIndexedColumns the columns what were deleted
     * @throws IOException 
     */
    public void applyIndexUpdates(ByteBuffer rowKey,
                                  ColumnFamily cf,
                                  SortedSet<ByteBuffer> mutatedIndexedColumns,
                                  ColumnFamily oldIndexedColumns) throws IOException
    {
        
        // Identify the columns with PerRowSecondaryIndexes
        // we need to make sure this is only called once
        Set<Class<? extends SecondaryIndex>> appliedRowLevelIndexes = null;       
      
        // remove the old index entries
        if (oldIndexedColumns != null)
        {
            for (ByteBuffer columnName : oldIndexedColumns.getColumnNames())
            {
                
                IColumn column = oldIndexedColumns.getColumn(columnName);
                
                if (column == null)
                    continue;
                
                //this was previously deleted so should not be in index
                if (column.isMarkedForDelete())
                    continue;              
                
                SecondaryIndex index = getIndexForColumn(columnName);
                assert index != null;               


                //Update entire row if we encounter a row level index
                if (index instanceof PerRowSecondaryIndex)
                {
                    if (appliedRowLevelIndexes == null)
                        appliedRowLevelIndexes = new HashSet<Class<? extends SecondaryIndex>>();
                    else
                        if (appliedRowLevelIndexes.add(index.getClass()))
                            ((PerRowSecondaryIndex)index).applyIndexUpdates(rowKey, cf, mutatedIndexedColumns, oldIndexedColumns);
                }
                else
                {
                    DecoratedKey<LocalToken> valueKey = getIndexKeyFor(columnName, column.value());


                    ((PerColumnSecondaryIndex)index).deleteColumn(valueKey, rowKey, column);
                }
            }
        }
        
        //insert new columns
        for (ByteBuffer columnName : mutatedIndexedColumns)
        {
            IColumn column = cf.getColumn(columnName);
            if (column == null || column.isMarkedForDelete())
                continue; // null column == row deletion


            SecondaryIndex index = getIndexForColumn(columnName);
            assert index != null;


            //Update entire row if we encounter a row level index
            if (index instanceof PerRowSecondaryIndex)
            {
                if (appliedRowLevelIndexes == null)
                    appliedRowLevelIndexes = new HashSet<Class<? extends SecondaryIndex>>();
                else
                    if (appliedRowLevelIndexes.add(index.getClass()))
                        ((PerRowSecondaryIndex)index).applyIndexUpdates(rowKey, cf, mutatedIndexedColumns, oldIndexedColumns);
            }
            else
            {
                DecoratedKey<LocalToken> valueKey = getIndexKeyFor(columnName, column.value());
                
                ((PerColumnSecondaryIndex)index).insertColumn(valueKey, rowKey, column);         
            }
        }
    }
    
    /**
     * Delete all columns from all indexes for this row
     * @param key the row key
     * @param indexedColumnsInRow all column names in row
     */
    public void deleteFromIndexes(DecoratedKey<?> key, List<IColumn> indexedColumnsInRow) throws IOException
    {
        
        // Identify the columns with isRowLevelIndex == true 
        // we need to make sure this is only called once
        Set<Class<? extends SecondaryIndex>> cleanedRowLevelIndexes = null;       
        
        for (IColumn column : indexedColumnsInRow)
        {
            SecondaryIndex index = indexesByColumn.get(column.name());
            
            if (index == null)
                continue;
            
            //Update entire row if we encounter a row level index
            if (index instanceof PerRowSecondaryIndex)
            {
                if (cleanedRowLevelIndexes == null)
                    cleanedRowLevelIndexes = new HashSet<Class<? extends SecondaryIndex>>();
                else
                    if (cleanedRowLevelIndexes.add(index.getClass()))
                        ((PerRowSecondaryIndex)index).deleteFromIndex(key, indexedColumnsInRow);             
            }
            else
            {
                DecoratedKey<LocalToken> valueKey = getIndexKeyFor(column.name(), column.value());


                ((PerColumnSecondaryIndex)index).deleteColumn(valueKey, key.key, column);
            }
        }       
    }
    
    
    /**
     * Get a list of IndexSearchers from the union of expression index types
     * @param clause the query clause
     * @return the searchers to needed to query the index
     */
    private List<SecondaryIndexSearcher> getIndexSearchersForQuery(IndexClause clause)
    {
        List<SecondaryIndexSearcher> indexSearchers = new ArrayList<SecondaryIndexSearcher>();
        
        Map<String, Set<ByteBuffer>> groupByIndexType = new HashMap<String, Set<ByteBuffer>>();
 
        
        //Group columns by type
        for (IndexExpression ix : clause.expressions)
        {
            SecondaryIndex index = getIndexForColumn(ix.column_name);
            
            if (index == null)
                continue;
            
            Set<ByteBuffer> columns = groupByIndexType.get(index.getClass().getCanonicalName());
            
            if (columns == null)
            {
                columns = new HashSet<ByteBuffer>();
                groupByIndexType.put(index.getClass().getCanonicalName(), columns);
            }
            
            columns.add(ix.column_name);        
        }
        
        //create searcher per type
        for (Map.Entry<String, Set<ByteBuffer>> entry : groupByIndexType.entrySet())
        {
            indexSearchers.add( getIndexForColumn(entry.getValue().iterator().next()).createSecondaryIndexSearcher(entry.getValue()) );
        }
        
        return indexSearchers;
    }
    
    /**
     * Performs a search across a number of column indexes
     * TODO: add support for querying across index types
     * 
     * @param clause the index query clause
     * @param range the row range to restrict to
     * @param dataFilter the column range to restrict to
     * @return found indexed rows
     */
    public List<Row> search(IndexClause clause, AbstractBounds range, IFilter dataFilter)
    {
        List<SecondaryIndexSearcher> indexSearchers = getIndexSearchersForQuery(clause);
               
        if (indexSearchers.isEmpty())
            return Collections.emptyList();
       
        //We currently don't support searching across multiple index types
        if (indexSearchers.size() > 1)
            throw new RuntimeException("Unable to search across multiple secondary index types");
        
        
        return indexSearchers.get(0).search(clause, range, dataFilter);
    }
}
Source Code of org.apache.cassandra.db.index.SecondaryIndexManager

Related Classes of org.apache.cassandra.db.index.SecondaryIndexManager