Package prefuse.data.io

Source Code of prefuse.data.io.AbstractTextTableReader

package prefuse.data.io;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;

import prefuse.data.Table;
import prefuse.data.parser.DataParseException;
import prefuse.data.parser.DataParser;
import prefuse.data.parser.ParserFactory;
import prefuse.data.parser.TypeInferencer;
import prefuse.util.collections.ByteArrayList;
import prefuse.util.io.IOLib;

/**
* Abstract base class for TableReader instances that read in a table
* from a textual data file.
*
* @author <a href="http://jheer.org">jeffrey heer</a>
*/
public abstract class AbstractTextTableReader extends AbstractTableReader {

    private ParserFactory m_pfactory;
    private boolean m_hasHeader;
   
    /**
     * Create a new AbstractTextTableReader using a default ParserFactory.
     */
    public AbstractTextTableReader() {
        this(ParserFactory.getDefaultFactory());
    }

    /**
     * Create a new AbstractTextTableReader.
     * @param parserFactory the ParserFactory to use for parsing text strings
     * into table values.
     */
    public AbstractTextTableReader(ParserFactory parserFactory) {
        m_pfactory = parserFactory;
        m_hasHeader = true;
    }
   
    /**
     * Set whether or not the table data file includes a header row.
     * @param hasHeaderRow true if the the data file includes a header row,
     * false otherwise.
     */
    public void setHasHeader(boolean hasHeaderRow) {
        m_hasHeader = hasHeaderRow;
    }
   
    /**
     * @see prefuse.data.io.AbstractTableReader#readTable(java.io.InputStream)
     */
    public Table readTable(InputStream is) throws DataIOException {
       
        // determine input stream capabilities
        // if we can't reset the stream, we read in all the bytes
        // and make our own local stream
        ByteArrayList buf = null;
        if ( is.markSupported() ) {
            // mark the stream to our reset point
            is.mark(Integer.MAX_VALUE);
        } else {
            // load in the entirety of the input stream
            try {
                buf = IOLib.readAsBytes(is);
            } catch ( IOException ioe ) {
                throw new DataIOException(ioe);
            }
            // create our own input stream
            is = buf.getAsInputStream();
        }
       
        final TypeInferencer di = new TypeInferencer(m_pfactory);
        final ArrayList headers = getColumnNames();
        final int[] dim = new int[] { 0, 0 };
       
        TableReadListener scanner = new TableReadListener() {
            int prevLine = -1;
            public void readValue(int line, int col, String value)
                throws DataParseException
            {
                // sample value to determine data type
                if ( line > 1 || !m_hasHeader ) {
                    di.sample(col-1, value);
                   
                    // update num rows
                    if ( line != prevLine ) {
                        prevLine = line;
                        dim[0]++;
                    }
                } else if ( line == 1 && m_hasHeader ) {
                    headers.add(value);
                }
               
                // update num cols
                if ( col > dim[1] )
                    dim[1] = col;
            }
        };
       
        // do a scan of the stream, collecting length and type data
        try {
            read(is, scanner);
        } catch ( IOException ioe ) {
            throw new DataIOException(ioe);
        } catch ( DataParseException de ) {
            // can't happen
        }
       
        // create the table
        int nrows = dim[0];
        int ncols = dim[1];
        final Table table = new Table(nrows, ncols);
       
        // create the table columns
        for ( int i=0; i < ncols; ++i ) {
            String header;
            if ( m_hasHeader || i < headers.size() ) {
                header = (String)headers.get(i);
            } else {
                header = getDefaultHeader(i);
            }
            table.addColumn(header, di.getType(i));
            table.getColumn(i).setParser(di.getParser(i));
        }
       
        // reset dim array, will hold row/col indices
        dim[0] = dim[1] = -1;
       
        TableReadListener parser = new TableReadListener() {
            int prevLine = -1;
            public void readValue(int line, int col, String value)
                throws DataParseException
            {
                // early exit on header value
                if ( line == 1 && m_hasHeader )
                    return;
                if ( line != prevLine ) {
                    prevLine = line;
                    ++dim[0];
                }
                dim[1] = col-1;
               
                // XXX NOTE-2005.08.29-jheer
                // For now we use generic routines for filling column values.
                // This results in the autoboxing of primitive types, slowing
                // performance a bit and possibly triggering avoidable garbage
                // collections. If this proves to be a problem down the road,
                // we can add more nuance later.
                DataParser dp = di.getParser(dim[1]);
                table.set(dim[0], dim[1], dp.parse(value));
            }
        };
       
        // read the data into the table
        try {
            // prepare the input stream
            if ( is.markSupported() ) {
                is.reset();
            } else {
                is = buf.getAsInputStream();
            }
            // read the data
            read(is, parser);
        } catch ( IOException ioe ) {
            throw new DataIOException(ioe);
        } catch ( DataParseException de ) {
            throw new DataIOException("Parse exception for column "
                    + '\"' + dim[1] + '\"' + " at row: " + dim[0], de);
        }
       
        return table;
    }
   
    /**
     * Subclasses can override this to provide column names through
     * a custom mechanism.
     * @return an ArrayList of String instances indicating the column names
     */
    protected ArrayList getColumnNames() {
      return new ArrayList();
    }
   
    /**
     * Returns default column header names of the type "A", "B", ...,
     * "Z", "AA", "AB", etc.
     * @param idx the index of the column header
     * @return a default column header name for the given index.
     */
    public static String getDefaultHeader(int idx) {
        if ( idx == 0 ) return "A";
        int len = ((int)(Math.log(idx) / Math.log(26))) + 1;
        char[] h = new char[len];
        int p = len;
       
        h[--p] = (char)('A'+(idx%26));
        idx = idx / 26;
       
        while ( idx > 26 ) {
            h[--p] = (char)('A'+(idx%26));
            idx = idx/26;
        }
        if ( idx > 0 ) {
            h[--p] = (char)('A'+((idx-1)%26));
        }

        return new String(h, p, len);
    }
   
    /**
     * Scans the input stream, making call backs for each encountered entry
     * on the provided TextReadListener.
     * @param is the InputStream to read
     * @param trl the TextReadListener that will receive callbacks
     * @throws IOException
     * @throws DataParseException
     */
    protected abstract void read(InputStream is, TableReadListener trl)
        throws IOException, DataParseException;

} // end of abstract class AbstractTextTableReader
TOP

Related Classes of prefuse.data.io.AbstractTextTableReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.