Package org.molgenis.io.csv

Source Code of org.molgenis.io.csv.CsvReader

package org.molgenis.io.csv;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.molgenis.io.TupleReader;
import org.molgenis.io.processor.AbstractCellProcessor;
import org.molgenis.io.processor.CellProcessor;
import org.molgenis.util.tuple.Tuple;
import org.molgenis.util.tuple.ValueIndexTuple;
import org.molgenis.util.tuple.ValueTuple;

/**
* Comma-Separated Values reader
*
* @see <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>
*/
public class CsvReader implements TupleReader
{
  private static final Charset CHARSET_UTF8 = Charset.forName("UTF-8");

  public static final char DEFAULT_SEPARATOR = ',';

  private final au.com.bytecode.opencsv.CSVReader csvReader;
  private final boolean hasHeader;

  /** process cells after reading */
  private List<CellProcessor> cellProcessors;
  /** column names index */
  private Map<String, Integer> colNamesMap;

  public CsvReader(Reader reader)
  {
    this(reader, DEFAULT_SEPARATOR);
  }

  public CsvReader(Reader reader, char separator)
  {
    this(reader, separator, true);
  }

  public CsvReader(Reader reader, char separator, boolean hasHeader)
  {
    if (reader == null) throw new IllegalArgumentException("reader is null");
    this.csvReader = new au.com.bytecode.opencsv.CSVReader(reader, separator);
    this.hasHeader = hasHeader;
  }

  public CsvReader(File file) throws FileNotFoundException
  {
    this(new InputStreamReader(new FileInputStream(file), CHARSET_UTF8));
  }

  public CsvReader(File file, char separator) throws FileNotFoundException
  {
    this(new InputStreamReader(new FileInputStream(file), CHARSET_UTF8), separator);
  }

  public CsvReader(File file, char separator, boolean hasHeader) throws FileNotFoundException
  {
    this(new InputStreamReader(new FileInputStream(file), CHARSET_UTF8), separator, hasHeader);
  }

  @Override
  public boolean hasColNames()
  {
    return hasHeader;
  }

  @Override
  public Iterator<String> colNamesIterator() throws IOException
  {
    if (!hasHeader) return null;

    if (colNamesMap == null) colNamesMap = toColNamesMap(csvReader.readNext());
    return colNamesMap != null ? colNamesMap.keySet().iterator() : null;
  }

  @Override
  public Iterator<Tuple> iterator()
  {
    try
    {
      // create column header index once and reuse
      final Map<String, Integer> colNamesMap = hasHeader ? (this.colNamesMap == null ? toColNamesMap(csvReader
          .readNext()) : this.colNamesMap) : null;

      return new Iterator<Tuple>()
      {
        private Tuple next;
        private boolean getNext = true;

        @Override
        public boolean hasNext()
        {
          return get() != null;
        }

        @Override
        public Tuple next()
        {
          Tuple tuple = get();
          getNext = true;
          return tuple;
        }

        private Tuple get()
        {
          if (getNext)
          {
            try
            {

              String[] values = csvReader.readNext();
              if (values != null)
              {
                for (int i = 0; i < values.length; ++i)
                {
                  // subsequent separators indicate null
                  // values instead of empty strings
                  String value = values[i].isEmpty() ? null : values[i];
                  values[i] = processCell(value, false);
                }
                if (colNamesMap != null) next = new ValueIndexTuple(colNamesMap, Arrays.asList(values));
                else
                  next = new ValueTuple(Arrays.asList(values));
              }
              else
              {
                next = null;
              }
              getNext = false;

            }
            catch (IOException e)
            {
              throw new RuntimeException(e);
            }
          }
          return next;
        }

        @Override
        public void remove()
        {
          throw new UnsupportedOperationException();
        }
      };
    }
    catch (IOException e)
    {
      throw new RuntimeException(e);
    }
  }

  private Map<String, Integer> toColNamesMap(String[] headers)
  {
    if (headers == null) return null;
    if (headers.length == 0) return Collections.emptyMap();

    int capacity = (int) (headers.length / 0.75) + 1;
    Map<String, Integer> columnIdx = new LinkedHashMap<String, Integer>(capacity);
    for (int i = 0; i < headers.length; ++i)
    {
      String header = processCell(headers[i], true);
      columnIdx.put(header, i);
    }
    return columnIdx;
  }

  private String processCell(String value, boolean isHeader)
  {
    return AbstractCellProcessor.processCell(value, isHeader, this.cellProcessors);
  }

  @Override
  public void addCellProcessor(CellProcessor cellProcessor)
  {
    if (cellProcessors == null) cellProcessors = new ArrayList<CellProcessor>();
    cellProcessors.add(cellProcessor);
  }

  @Override
  public void close() throws IOException
  {
    csvReader.close();
  }
}
TOP

Related Classes of org.molgenis.io.csv.CsvReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.