Source Code of org.apache.poi.hssf.record.SSTRecord

/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2003 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache POI" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache POI", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */


package org.apache.poi.hssf.record;


import org.apache.poi.util.BinaryTree;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianConsts;


import java.util.Iterator;
import java.util.List;


/**
 * Title:        Static String Table Record
 * <P>
 * Description:  This holds all the strings for LabelSSTRecords.
 * <P>
 * REFERENCE:    PG 389 Microsoft Excel 97 Developer's Kit (ISBN:
 *               1-57231-498-2)
 * <P>
 * @author Andrew C. Oliver (acoliver at apache dot org)
 * @author Marc Johnson (mjohnson at apache dot org)
 * @author Glen Stampoultzis (glens at apache.org)
 *
 * @see org.apache.poi.hssf.record.LabelSSTRecord
 * @see org.apache.poi.hssf.record.ContinueRecord
 */


public class SSTRecord
        extends Record
{


    /** how big can an SST record be? As big as any record can be: 8228 bytes */
    static final int MAX_RECORD_SIZE = 8228;


    /** standard record overhead: two shorts (record id plus data space size)*/
    static final int STD_RECORD_OVERHEAD =
            2 * LittleEndianConsts.SHORT_SIZE;


    /** SST overhead: the standard record overhead, plus the number of strings and the number of unique strings -- two ints */
    static final int SST_RECORD_OVERHEAD =
            ( STD_RECORD_OVERHEAD + ( 2 * LittleEndianConsts.INT_SIZE ) );


    /** how much data can we stuff into an SST record? That would be _max minus the standard SST record overhead */
    static final int MAX_DATA_SPACE = MAX_RECORD_SIZE - SST_RECORD_OVERHEAD;


    /** overhead for each string includes the string's character count (a short) and the flag describing its characteristics (a byte) */
    static final int STRING_MINIMAL_OVERHEAD = LittleEndianConsts.SHORT_SIZE + LittleEndianConsts.BYTE_SIZE;


    public static final short sid = 0xfc;


    /** union of strings in the SST and EXTSST */
    private int field_1_num_strings;


    /** according to docs ONLY SST */
    private int field_2_num_unique_strings;
    private BinaryTree field_3_strings;


    /** Record lengths for initial SST record and all continue records */
    private List _record_lengths = null;
    private SSTDeserializer deserializer;


    /** Offsets from the beginning of the SST record (even across continuations) */
    int[] bucketAbsoluteOffsets;
    /** Offsets relative the start of the current SST or continue record */
    int[] bucketRelativeOffsets;


    /**
     * default constructor
     */
    public SSTRecord()
    {
        field_1_num_strings = 0;
        field_2_num_unique_strings = 0;
        field_3_strings = new BinaryTree();
        deserializer = new SSTDeserializer(field_3_strings);
    }


    /**
     * Constructs an SST record and sets its fields appropriately.
     *
     * @param id must be 0xfc or an exception will be throw upon
     *           validation
     * @param size the size of the data area of the record
     * @param data of the record (should not contain sid/len)
     */


    public SSTRecord( final short id, final short size, final byte[] data )
    {
        super( id, size, data );
    }


    /**
     * Constructs an SST record and sets its fields appropriately.
     *
     * @param id must be 0xfc or an exception will be throw upon
     *           validation
     * @param size the size of the data area of the record
     * @param data of the record (should not contain sid/len)
     * @param offset of the record
     */


    public SSTRecord( final short id, final short size, final byte[] data,
                      int offset )
    {
        super( id, size, data, offset );
    }


    /**
     * Add a string. Determines whether 8-bit encoding can be used, or
     * whether 16-bit encoding must be used.
     * <p>
     * THIS IS THE PREFERRED METHOD OF ADDING A STRING. IF YOU USE THE
     * OTHER ,code>addString</code> METHOD AND FORCE 8-BIT ENCODING ON
     * A STRING THAT SHOULD USE 16-BIT ENCODING, YOU WILL CORRUPT THE
     * STRING; IF YOU USE THAT METHOD AND FORCE 16-BIT ENCODING, YOU
     * ARE WASTING SPACE WHEN THE WORKBOOK IS WRITTEN OUT.
     *
     * @param string string to be added
     *
     * @return the index of that string in the table
     */


    public int addString( final String string )
    {
        int rval;


        if ( string == null )
        {
            rval = addString( "", false );
        }
        else
        {


            // scan for characters greater than 255 ... if any are
            // present, we have to use 16-bit encoding. Otherwise, we
            // can use 8-bit encoding
            boolean useUTF16 = false;
            int strlen = string.length();


            for ( int j = 0; j < strlen; j++ )
            {
                if ( string.charAt( j ) > 255 )
                {
                    useUTF16 = true;
                    break;
                }
            }
            rval = addString( string, useUTF16 );
        }
        return rval;
    }


    /**
     * Add a string and assert the encoding (8-bit or 16-bit) to be
     * used.
     * <P>
     * USE THIS METHOD AT YOUR OWN RISK. IF YOU FORCE 8-BIT ENCODING,
     * YOU MAY CORRUPT YOUR STRING. IF YOU FORCE 16-BIT ENCODING AND
     * IT ISN'T NECESSARY, YOU WILL WASTE SPACE WHEN THIS RECORD IS
     * WRITTEN OUT.
     *
     * @param string string to be added
     * @param useUTF16 if true, forces 16-bit encoding. If false,
     *                 forces 8-bit encoding
     *
     * @return the index of that string in the table
     */


    public int addString( final String string, final boolean useUTF16 )
    {
        field_1_num_strings++;
        String str = ( string == null ) ? ""
                : string;
        int rval;
        UnicodeString ucs = new UnicodeString();


        ucs.setString( str );
        ucs.setCharCount( (short) str.length() );
        ucs.setOptionFlags( (byte) ( useUTF16 ? 1
                : 0 ) );
        Integer integer = (Integer) field_3_strings.getKeyForValue( ucs );


        if ( integer != null )
        {
            rval = integer.intValue();
        }
        else
        {


            // This is a new string -- we didn't see it among the
            // strings we've already collected
            rval = field_3_strings.size();
            field_2_num_unique_strings++;
            integer = new Integer( rval );
            SSTDeserializer.addToStringTable( field_3_strings, integer, ucs );
//            field_3_strings.put( integer, ucs );
        }
        return rval;
    }


    /**
     * @return number of strings
     */


    public int getNumStrings()
    {
        return field_1_num_strings;
    }


    /**
     * @return number of unique strings
     */


    public int getNumUniqueStrings()
    {
        return field_2_num_unique_strings;
    }


    /**
     * USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
     * METHODS MANIPULATE THE NUMBER OF STRINGS AS A SIDE EFFECT; YOUR
     * ATTEMPTS AT MANIPULATING THE STRING COUNT IS LIKELY TO BE VERY
     * WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN THIS RECORD IS
     * WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ THE RECORD
     *
     * @param count  number of strings
     *
     */


    public void setNumStrings( final int count )
    {
        field_1_num_strings = count;
    }


    /**
     * USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
     * METHODS MANIPULATE THE NUMBER OF UNIQUE STRINGS AS A SIDE
     * EFFECT; YOUR ATTEMPTS AT MANIPULATING THE UNIQUE STRING COUNT
     * IS LIKELY TO BE VERY WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN
     * THIS RECORD IS WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ
     * THE RECORD
     *
     * @param count  number of strings
     */


    public void setNumUniqueStrings( final int count )
    {
        field_2_num_unique_strings = count;
    }


    /**
     * Get a particular string by its index
     *
     * @param id index into the array of strings
     *
     * @return the desired string
     */


    public String getString( final int id )
    {
        return ( (UnicodeString) field_3_strings.get( new Integer( id ) ) ).getString();
    }


    public boolean isString16bit( final int id )
    {
        UnicodeString unicodeString = ( (UnicodeString) field_3_strings.get( new Integer( id ) ) );
        return ( ( unicodeString.getOptionFlags() & 0x01 ) == 1 );
    }


    /**
     * Return a debugging string representation
     *
     * @return string representation
     */


    public String toString()
    {
        StringBuffer buffer = new StringBuffer();


        buffer.append( "[SST]\n" );
        buffer.append( "    .numstrings     = " )
                .append( Integer.toHexString( getNumStrings() ) ).append( "\n" );
        buffer.append( "    .uniquestrings  = " )
                .append( Integer.toHexString( getNumUniqueStrings() ) ).append( "\n" );
        for ( int k = 0; k < field_3_strings.size(); k++ )
        {
            buffer.append( "    .string_" + k + "      = " )
                    .append( ( field_3_strings
                    .get( new Integer( k ) ) ).toString() ).append( "\n" );
        }
        buffer.append( "[/SST]\n" );
        return buffer.toString();
    }


    /**
     * @return sid
     */
    public short getSid()
    {
        return sid;
    }


    /**
     * @return hashcode
     */
    public int hashCode()
    {
        return field_2_num_unique_strings;
    }


    public boolean equals( Object o )
    {
        if ( ( o == null ) || ( o.getClass() != this.getClass() ) )
        {
            return false;
        }
        SSTRecord other = (SSTRecord) o;


        return ( ( field_1_num_strings == other
                .field_1_num_strings ) && ( field_2_num_unique_strings == other
                .field_2_num_unique_strings ) && field_3_strings
                .equals( other.field_3_strings ) );
    }


    /**
     * validate SID
     *
     * @param id the alleged SID
     *
     * @exception RecordFormatException if validation fails
     */


    protected void validateSid( final short id )
            throws RecordFormatException
    {
        if ( id != sid )
        {
            throw new RecordFormatException( "NOT An SST RECORD" );
        }
    }


    /**
     * Fill the fields from the data
     * <P>
     * The data consists of sets of string data. This string data is
     * arranged as follows:
     * <P>
     * <CODE><pre>
     * short  string_length;   // length of string data
     * byte   string_flag;     // flag specifying special string
     *                         // handling
     * short  run_count;       // optional count of formatting runs
     * int    extend_length;   // optional extension length
     * char[] string_data;     // string data, can be byte[] or
     *                         // short[] (length of array is
     *                         // string_length)
     * int[]  formatting_runs; // optional formatting runs (length of
     *                         // array is run_count)
     * byte[] extension;       // optional extension (length of array
     *                         // is extend_length)
     * </pre></CODE>
     * <P>
     * The string_flag is bit mapped as follows:
     * <P>
     * <TABLE>
     *   <TR>
     *      <TH>Bit number</TH>
     *      <TH>Meaning if 0</TH>
     *      <TH>Meaning if 1</TH>
     *   <TR>
     *   <TR>
     *      <TD>0</TD>
     *      <TD>string_data is byte[]</TD>
     *      <TD>string_data is short[]</TH>
     *   <TR>
     *   <TR>
     *      <TD>1</TD>
     *      <TD>Should always be 0</TD>
     *      <TD>string_flag is defective</TH>
     *   <TR>
     *   <TR>
     *      <TD>2</TD>
     *      <TD>extension is not included</TD>
     *      <TD>extension is included</TH>
     *   <TR>
     *   <TR>
     *      <TD>3</TD>
     *      <TD>formatting run data is not included</TD>
     *      <TD>formatting run data is included</TH>
     *   <TR>
     *   <TR>
     *      <TD>4</TD>
     *      <TD>Should always be 0</TD>
     *      <TD>string_flag is defective</TH>
     *   <TR>
     *   <TR>
     *      <TD>5</TD>
     *      <TD>Should always be 0</TD>
     *      <TD>string_flag is defective</TH>
     *   <TR>
     *   <TR>
     *      <TD>6</TD>
     *      <TD>Should always be 0</TD>
     *      <TD>string_flag is defective</TH>
     *   <TR>
     *   <TR>
     *      <TD>7</TD>
     *      <TD>Should always be 0</TD>
     *      <TD>string_flag is defective</TH>
     *   <TR>
     * </TABLE>
     * <P>
     * We can handle eating the overhead associated with bits 2 or 3
     * (or both) being set, but we have no idea what to do with the
     * associated data. The UnicodeString class can handle the byte[]
     * vs short[] nature of the actual string data
     *
     * @param data raw data
     * @param size size of the raw data
     */


    protected void fillFields( final byte[] data, final short size,
                               int offset )
    {


        // this method is ALWAYS called after construction -- using
        // the nontrivial constructor, of course -- so this is where
        // we initialize our fields
        field_1_num_strings = LittleEndian.getInt( data, 0 + offset );
        field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset );
        field_3_strings = new BinaryTree();
        deserializer = new SSTDeserializer(field_3_strings);
        deserializer.manufactureStrings( data, 8 + offset, (short)(size - 8) );
    }




    /**
     * @return an iterator of the strings we hold. All instances are
     *         UnicodeStrings
     */


    Iterator getStrings()
    {
        return field_3_strings.values().iterator();
    }


    /**
     * @return count of the strings we hold.
     */


    int countStrings()
    {
        return field_3_strings.size();
    }


    /**
     * called by the class that is responsible for writing this sucker.
     * Subclasses should implement this so that their data is passed back in a
     * byte array.
     *
     * @return size
     */


    public int serialize( int offset, byte[] data )
    {
        SSTSerializer serializer = new SSTSerializer(
                _record_lengths, field_3_strings, getNumStrings(), getNumUniqueStrings() );
        int bytes = serializer.serialize( getRecordSize(), offset, data );
        bucketAbsoluteOffsets = serializer.getBucketAbsoluteOffsets();
        bucketRelativeOffsets = serializer.getBucketRelativeOffsets();
//        for ( int i = 0; i < bucketAbsoluteOffsets.length; i++ )
//        {
//            System.out.println( "bucketAbsoluteOffset = " + bucketAbsoluteOffsets[i] );
//            System.out.println( "bucketRelativeOffset = " + bucketRelativeOffsets[i] );
//        }
        return bytes;
    }




    public int getRecordSize()
    {
        SSTRecordSizeCalculator calculator = new SSTRecordSizeCalculator(field_3_strings);
        int recordSize = calculator.getRecordSize();
        _record_lengths = calculator.getRecordLengths();
        return recordSize;
    }


    SSTDeserializer getDeserializer()
    {
        return deserializer;
    }


    /**
     * Strange to handle continue records this way.  Is it a smell?
     */
    public void processContinueRecord( byte[] record )
    {
        deserializer.processContinueRecord( record );
    }


    /**
     * Creates an extended string record based on the current contents of
     * the current SST record.  The offset within the stream to the SST record
     * is required because the extended string record points directly to the
     * strings in the SST record.
     * <p>
     * NOTE: THIS FUNCTION MUST ONLY BE CALLED AFTER THE SST RECORD HAS BEEN
     *       SERIALIZED.
     *
     * @param sstOffset     The offset in the stream to the start of the
     *                      SST record.
     * @return  The new SST record.
     */
    public ExtSSTRecord createExtSSTRecord(int sstOffset)
    {
        if (bucketAbsoluteOffsets == null || bucketAbsoluteOffsets == null)
            throw new IllegalStateException("SST record has not yet been serialized.");


        ExtSSTRecord extSST = new ExtSSTRecord();
        extSST.setNumStringsPerBucket((short)8);
        int[] absoluteOffsets = (int[]) bucketAbsoluteOffsets.clone();
        int[] relativeOffsets = (int[]) bucketRelativeOffsets.clone();
        for ( int i = 0; i < absoluteOffsets.length; i++ )
            absoluteOffsets[i] += sstOffset;
        extSST.setBucketOffsets(absoluteOffsets, relativeOffsets);
        return extSST;
    }


    /**
     * Calculates the size in bytes of the EXTSST record as it would be if the
     * record was serialized.
     *
     * @return  The size of the ExtSST record in bytes.
     */
    public int calcExtSSTRecordSize()
    {
        return 4 + 2 + ((field_3_strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE) + 1) * 8;
    }
}
Source Code of org.apache.poi.hssf.record.SSTRecord

Related Classes of org.apache.poi.hssf.record.SSTRecord