Package edu.isi.karma.imp.csv

Source Code of edu.isi.karma.imp.csv.CSVImport

package edu.isi.karma.imp.csv;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;

import org.json.JSONArray;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import au.com.bytecode.opencsv.CSVReader;
import edu.isi.karma.imp.Import;
import edu.isi.karma.rep.HNode;
import edu.isi.karma.rep.HNode.HNodeType;
import edu.isi.karma.rep.HTable;
import edu.isi.karma.rep.RepFactory;
import edu.isi.karma.rep.Row;
import edu.isi.karma.rep.Table;
import edu.isi.karma.rep.Worksheet;
import edu.isi.karma.rep.Workspace;
import edu.isi.karma.rep.metadata.WorksheetProperties.Property;
import edu.isi.karma.rep.metadata.WorksheetProperties.SourceTypes;
import edu.isi.karma.util.EncodingDetector;
import edu.isi.karma.webserver.KarmaException;

public class CSVImport extends Import {
    private static Logger logger = LoggerFactory.getLogger(CSVImport.class);
  protected final int headerRowIndex;
    protected final int dataStartRowIndex;
    protected final char delimiter;
    protected final char quoteCharacter;
    protected final char escapeCharacter = '\\';
    protected final InputStream is;
    protected final String encoding;
    protected final int maxNumLines;
    protected final JSONArray columnsJson;
    public CSVImport(int headerRowIndex, int dataStartRowIndex,
            char delimiter, char quoteCharacter, String encoding,
            int maxNumLines,
            String sourceName,
            InputStream is,
            Workspace workspace,
            JSONArray columnsJson) {

        super(sourceName, workspace, encoding);
        this.headerRowIndex = headerRowIndex;
        this.dataStartRowIndex = dataStartRowIndex;
        this.delimiter = delimiter;
        this.quoteCharacter = quoteCharacter;
        this.encoding = encoding;
        this.maxNumLines = maxNumLines;
        this.is = is;
        this.columnsJson = columnsJson;
       
    }
   
    @Override

    public Worksheet generateWorksheet() throws IOException, KarmaException {
        Table dataTable = getWorksheet().getDataTable();

        BufferedReader br = getLineReader();


        // Index for row currently being read
        int rowCount = 0;
        Map<Integer, String> hNodeIdList = new HashMap<Integer, String>();

        // If no row is present for the column headers
        if (headerRowIndex == 0) {
            hNodeIdList = addEmptyHeaders(getWorksheet(), getFactory(), br);
            if (hNodeIdList == null || hNodeIdList.size() == 0) {
                br.close();
                throw new KarmaException("Error occured while counting header "
                        + "nodes for the worksheet!");
            }
        }

        // Populate the worksheet model
        String line = null;
        while ((line = br.readLine()) != null) {
          logger.debug("Read line: '" + line + "'");
            // Check for the header row
            if (rowCount + 1 == headerRowIndex) {
                hNodeIdList = addHeaders(getWorksheet(), getFactory(), line, br);
                rowCount++;
                continue;
            }

            // Populate the model with data rows
            if (rowCount + 1 >= dataStartRowIndex) {
                boolean added = addRow(getWorksheet(), getFactory(), line, hNodeIdList, dataTable);
                if(added) {
                  rowCount++;
                
                  if(maxNumLines > 0 && (rowCount - dataStartRowIndex) >= maxNumLines-1) {
                    break;
                  }
                }
                continue;
            }

            rowCount++;
        }
        br.close();
        getWorksheet().getMetadataContainer().getWorksheetProperties().setPropertyValue(Property.sourceType, SourceTypes.CSV.toString());
        return getWorksheet();
    }

  protected BufferedReader getLineReader() throws IOException {
    // Prepare the reader for reading file line by line
       
        InputStreamReader isr = EncodingDetector.getInputStreamReader(is, encoding);
       
        return new BufferedReader(isr);
  }

    private Map<Integer, String> addHeaders(Worksheet worksheet, RepFactory fac,
            String line, BufferedReader br) throws IOException {
        HTable headers = worksheet.getHeaders();
        Map<Integer, String> headersMap = new HashMap<Integer, String>();
        CSVReader reader = new CSVReader(new StringReader(line), delimiter,
                quoteCharacter, escapeCharacter);
        String[] rowValues = null;
        rowValues = reader.readNext();

        if (rowValues == null || rowValues.length == 0) {
            reader.close();
            return addEmptyHeaders(worksheet, fac, br);
        }

        for (int i = 0; i < rowValues.length; i++) {
            HNode hNode = null;
            if (headerRowIndex == 0) {
              if (isVisible("Column_" + (i + 1)))
                hNode = headers.addHNode("Column_" + (i + 1), HNodeType.Regular, worksheet, fac);
            } else {
              if (isVisible(rowValues[i]))
                hNode = headers.addHNode(rowValues[i], HNodeType.Regular, worksheet, fac);
            }
            if (hNode != null)
              headersMap.put(i, hNode.getId());
        }
        reader.close();
        return headersMap;
    }

    private boolean addRow(Worksheet worksheet, RepFactory fac, String line,
        Map<Integer, String> hNodeIdMap, Table dataTable) throws IOException {
        CSVReader reader = new CSVReader(new StringReader(line), delimiter,
                quoteCharacter, escapeCharacter);
        String[] rowValues = null;
        rowValues = reader.readNext();
        if (rowValues == null || rowValues.length == 0) {
            reader.close();
            return false;
        }

        Row row = dataTable.addRow(fac);
        int size = hNodeIdMap.size();
        if (columnsJson != null)
          size = columnsJson.length();
        for (int i = 0; i < rowValues.length; i++) {
            if (i < size) {
              String hNodeId = hNodeIdMap.get(i);
              if (hNodeId != null)
                row.setValue(hNodeId, rowValues[i], fac);
            } else {
                // TODO Our model does not allow a value to be added to a row
                // without its associated HNode. In CSVs, there could be case
                // where values in rows are greater than number of column names.
                logger.error("More data elements detected in the row than number of headers!");
            }
        }
        reader.close();
        return true;
    }
    private Map<Integer, String> addEmptyHeaders(Worksheet worksheet,
            RepFactory fac, BufferedReader br) throws IOException {
        HTable headers = worksheet.getHeaders();
        Map<Integer, String> headersMap = new HashMap<Integer, String>();

       
        br.mark(1000000);
        br.readLine();
       
        // Use the first data row to count the number of columns we need to add
        int rowCount = 0;
        String line = null;
        while (null != (line = br.readLine())) {
            if (rowCount + 1 == dataStartRowIndex) {
                line = br.readLine();
                CSVReader reader = new CSVReader(new StringReader(line),
                        delimiter, quoteCharacter, escapeCharacter);
                String[] rowValues = null;
                try {
                    rowValues = reader.readNext();
                } catch (IOException e) {
                    logger.error("Error reading Line:" + line, e);
                }
                for (int i = 0; i < rowValues.length; i++) {
                 
                    HNode hNode = null;
                    if (isVisible("Column_" + (i + 1)))
                      hNode = headers.addHNode("Column_" + (i + 1), HNodeType.Regular,
                            worksheet, fac);
                    if (hNode != null)
                      headersMap.put(i, hNode.getId());
                }
                reader.close();
                break;
            }
            rowCount++;
        }
        br.reset();
        return headersMap;
    }
   
    private boolean isVisible(String key) {
    if (columnsJson == null)
      return true;
    for (int i = 0; i < columnsJson.length(); i++) {
      JSONObject obj = columnsJson.getJSONObject(i);
      if (obj.has(key))
        return obj.getBoolean(key);
    }
    return false;
  }
}
TOP

Related Classes of edu.isi.karma.imp.csv.CSVImport

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.