Package org.apache.cassandra.db

Source Code of org.apache.cassandra.db.ColumnIndexer

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.cassandra.db;

import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.io.DataOutputBuffer;
import org.apache.cassandra.io.IndexHelper;
import org.apache.cassandra.utils.BloomFilter;


/**
* Help to create an index for a column family based on size of columns
* Author : Karthik Ranganathan ( kranganathan@facebook.com )
*/

public class ColumnIndexer
{
  /**
   * Given a column family this, function creates an in-memory structure that represents the
   * column index for the column family, and subsequently writes it to disk.
   * @param columnFamily Column family to create index for
   * @param dos data output stream
   * @throws IOException
   */
    public static void serialize(ColumnFamily columnFamily, DataOutputStream dos) throws IOException
  {
        Collection<IColumn> columns = columnFamily.getAllColumns();
        BloomFilter bf = createColumnBloomFilter(columns);                   
        /* Write out the bloom filter. */
        DataOutputBuffer bufOut = new DataOutputBuffer();
        BloomFilter.serializer().serialize(bf, bufOut);
        /* write the length of the serialized bloom filter. */
        dos.writeInt(bufOut.getLength());
        /* write out the serialized bytes. */
        dos.write(bufOut.getData(), 0, bufOut.getLength());

        /* Do the indexing */
        TypeInfo typeInfo = DatabaseDescriptor.getTypeInfo(columnFamily.name());       
        doIndexing(typeInfo, columns, dos);       
  }
   
    /**
     * Create a bloom filter that contains the subcolumns and the columns that
     * make up this Column Family.
     * @param columns columns of the ColumnFamily
     * @return BloomFilter with the summarized information.
     */
    private static BloomFilter createColumnBloomFilter(Collection<IColumn> columns)
    {
        int columnCount = 0;
        for ( IColumn column : columns )
        {
            columnCount += column.getObjectCount();
        }
       
        BloomFilter bf = new BloomFilter(columnCount, 4);
        for ( IColumn column : columns )
        {
            bf.add(column.name());
            /* If this is SuperColumn type Column Family we need to get the subColumns too. */
            if ( column instanceof SuperColumn )
            {
                Collection<IColumn> subColumns = column.getSubColumns();
                for ( IColumn subColumn : subColumns )
                {
                    bf.add(subColumn.name());
                }
            }
        }
        return bf;
    }
   
    private static IndexHelper.ColumnIndexInfo getColumnIndexInfo(TypeInfo typeInfo, IColumn column)
    {
        IndexHelper.ColumnIndexInfo cIndexInfo = null;
       
        if ( column instanceof SuperColumn )
        {
            cIndexInfo = IndexHelper.ColumnIndexFactory.instance(TypeInfo.STRING);           
            cIndexInfo.set(column.name());
        }
        else
        {
            cIndexInfo = IndexHelper.ColumnIndexFactory.instance(typeInfo);                       
            switch(typeInfo)
            {
                case STRING:
                    cIndexInfo.set(column.name());                       
                    break;
                   
                case LONG:
                    cIndexInfo.set(column.timestamp());                       
                    break;
            }
        }
       
        return cIndexInfo;
    }

    /**
     * Given the collection of columns in the Column Family,
     * the name index is generated and written into the provided
     * stream
     * @param columns for whom the name index needs to be generated
     * @param bf bloom filter that summarizes the columns that make
     *           up the column family.
     * @param dos stream into which the serialized name index needs
     *            to be written.
     * @throws IOException
     */
    private static void doIndexing(TypeInfo typeInfo, Collection<IColumn> columns, DataOutputStream dos) throws IOException
    {
        /* we are going to write column indexes */
        int numColumns = 0;
        int position = 0;
        int indexSizeInBytes = 0;
        int sizeSummarized = 0;
       
        /*
         * Maintains a list of KeyPositionInfo objects for the columns in this
         * column family. The key is the column name and the position is the
         * relative offset of that column name from the start of the list.
         * We do this so that we don't read all the columns into memory.
        */
       
        List<IndexHelper.ColumnIndexInfo> columnIndexList = new ArrayList<IndexHelper.ColumnIndexInfo>();       
       
        /* column offsets at the right thresholds into the index map. */
        for ( IColumn column : columns )
        {
            /* if we hit the column index size that we have to index after, go ahead and index it */
            if(position - sizeSummarized >= DatabaseDescriptor.getColumnIndexSize())
            {     
                /*
                 * ColumnSort applies only to columns. So in case of
                 * SuperColumn always use the name indexing scheme for
                 * the SuperColumns. We will fix this later.
                 */
                IndexHelper.ColumnIndexInfo cIndexInfo = getColumnIndexInfo(typeInfo, column);               
                cIndexInfo.position(position);
                cIndexInfo.count(numColumns);               
                columnIndexList.add(cIndexInfo);
                /*
                 * we will be writing this object as a UTF8 string and two ints,
                 * so calculate the size accordingly. Note that we store the string
                 * as UTF-8 encoded, so when we calculate the length, it should be
                 * converted to UTF-8.
                 */
                indexSizeInBytes += cIndexInfo.size();
                sizeSummarized = position;
                numColumns = 0;
            }
            position += column.serializedSize();
            ++numColumns;
        }
        /* write the column index list */
        IndexHelper.serialize(indexSizeInBytes, columnIndexList, dos);
    }
}
TOP

Related Classes of org.apache.cassandra.db.ColumnIndexer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.