Source Code of org.apache.phoenix.util.CSVLoader

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.phoenix.util;


import au.com.bytecode.opencsv.CSVReader;
import com.google.common.collect.Maps;
import org.apache.phoenix.exception.SQLExceptionCode;
import org.apache.phoenix.exception.SQLExceptionInfo;
import org.apache.phoenix.jdbc.PhoenixConnection;
import org.apache.phoenix.schema.PDataType;


import java.io.FileReader;
import java.sql.DatabaseMetaData;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;


/***
 * Upserts CSV data using Phoenix JDBC connection
 * 
 * 
 * 
 */
public class CSVLoader {


  private final PhoenixConnection conn;
  private final String tableName;
    private final List<String> columns;
    private final boolean isStrict;
    private final List<String> delimiter;
    private final Map<String,Character> ctrlTable = new HashMap<String,Character>() {
        {   put("1",'\u0001');
            put("2",'\u0002');
            put("3",'\u0003');
            put("4",'\u0004');
            put("5",'\u0005');
            put("6",'\u0006');
            put("7",'\u0007');
            put("8",'\u0008');
            put("9",'\u0009');}};
    
    private int unfoundColumnCount;


    public CSVLoader(PhoenixConnection conn, String tableName, List<String> columns, boolean isStrict,List<String> delimiter) {
        this.conn = conn;
        this.tableName = tableName;
        this.columns = columns;
        this.isStrict = isStrict;
        this.delimiter = delimiter;
    }


  public CSVLoader(PhoenixConnection conn, String tableName, List<String> columns, boolean isStrict) {
       this(conn,tableName,columns,isStrict,null);
    }




    /**
   * Upserts data from CSV file. Data is batched up based on connection batch
   * size. Column PDataType is read from metadata and is used to convert
   * column value to correct type before upsert. Note: Column Names are
   * expected as first line of CSV file.
   * 
   * @param fileName
   * @throws Exception
   */
  public void upsert(String fileName) throws Exception {
        List<String> delimiter = this.delimiter;
        CSVReader reader;
        if ((delimiter != null) && (delimiter.size() == 3)) {
            reader = new CSVReader(new FileReader(fileName),
                getCSVCustomField(this.delimiter.get(0)),
                getCSVCustomField(this.delimiter.get(1)),
                getCSVCustomField(this.delimiter.get(2)));
        } else {
            reader = new CSVReader(new FileReader(fileName));
        }
        upsert(reader);
  }




    public char getCSVCustomField(String field) {
        if(this.ctrlTable.containsKey(field)) {
            return this.ctrlTable.get(field);
        } else {
            return field.charAt(0);
        }
    }


  /**
   * Upserts data from CSV file. Data is batched up based on connection batch
   * size. Column PDataType is read from metadata and is used to convert
   * column value to correct type before upsert. Note: Column Names are
   * expected as first line of CSV file.
   * 
   * @param reader CSVReader instance
   * @throws Exception
   */
  public void upsert(CSVReader reader) throws Exception {
      List<String> columns = this.columns;
      if (columns != null && columns.isEmpty()) {
          columns = Arrays.asList(reader.readNext());
      }
    ColumnInfo[] columnInfo = generateColumnInfo(columns);
        PreparedStatement stmt = null;
        PreparedStatement[] stmtCache = null;
    if (columns == null) {
        stmtCache = new PreparedStatement[columnInfo.length];
    } else {
        String upsertStatement = QueryUtil.constructUpsertStatement(columnInfo, tableName, columnInfo.length - unfoundColumnCount);
        stmt = conn.prepareStatement(upsertStatement);
    }
    String[] nextLine;
    int rowCount = 0;
    int upsertBatchSize = conn.getMutateBatchSize();
    boolean wasAutoCommit = conn.getAutoCommit();
    try {
        conn.setAutoCommit(false);
        Object upsertValue = null;
        long start = System.currentTimeMillis();
    
        // Upsert data based on SqlType of each column
        while ((nextLine = reader.readNext()) != null) {
            if (columns == null) {
                stmt = stmtCache[nextLine.length-1];
                if (stmt == null) {
                      String upsertStatement = QueryUtil.constructUpsertStatement(columnInfo, tableName, nextLine.length);
                      stmt = conn.prepareStatement(upsertStatement);
                      stmtCache[nextLine.length-1] = stmt;
                }
            }
          for (int index = 0; index < columnInfo.length; index++) {
              if (columnInfo[index] == null) {
                  continue;
              }
                    String line = nextLine[index];
                    Integer info = columnInfo[index].getSqlType();
                    upsertValue = convertTypeSpecificValue(line, info);
            if (upsertValue != null) {
              stmt.setObject(index + 1, upsertValue, columnInfo[index].getSqlType());
            } else {
              stmt.setNull(index + 1, columnInfo[index].getSqlType());
            }
          }
          stmt.execute();
    
          // Commit when batch size is reached
          if (++rowCount % upsertBatchSize == 0) {
            conn.commit();
            System.out.println("Rows upserted: " + rowCount);
          }
        }
        conn.commit();
        double elapsedDuration = ((System.currentTimeMillis() - start) / 1000.0);
        System.out.println("CSV Upsert complete. " + rowCount + " rows upserted");
        System.out.println("Time: " + elapsedDuration + " sec(s)\n");
    } finally {
        if(stmt != null) {
            stmt.close();
        }
        if (wasAutoCommit) conn.setAutoCommit(true);
    }
  }
  
  /**
   * Gets CSV string input converted to correct type 
   */
  private Object convertTypeSpecificValue(String s, Integer sqlType) throws Exception {
      return PDataType.fromSqlType(sqlType).toObject(s);
  }


  /**
   * Get array of ColumnInfos that contain Column Name and its associated
   * PDataType
   * 
   * @param columns
   * @return
   * @throws SQLException
   */
  private ColumnInfo[] generateColumnInfo(List<String> columns)
      throws SQLException {
      Map<String,Integer> columnNameToTypeMap = Maps.newLinkedHashMap();
        DatabaseMetaData dbmd = conn.getMetaData();
        // TODO: escape wildcard characters here because we don't want that behavior here
        String escapedTableName = StringUtil.escapeLike(tableName);
        String[] schemaAndTable = escapedTableName.split("\\.");
        ResultSet rs = null;
        try {
            rs = dbmd.getColumns(null, (schemaAndTable.length == 1 ? "" : schemaAndTable[0]),
                    (schemaAndTable.length == 1 ? escapedTableName : schemaAndTable[1]),
                    null);
            while (rs.next()) {
                columnNameToTypeMap.put(rs.getString(QueryUtil.COLUMN_NAME_POSITION), rs.getInt(QueryUtil.DATA_TYPE_POSITION));
            }
        } finally {
            if(rs != null) {
                rs.close();
            }
        }
        ColumnInfo[] columnType;
      if (columns == null) {
            int i = 0;
            columnType = new ColumnInfo[columnNameToTypeMap.size()];
            for (Map.Entry<String, Integer> entry : columnNameToTypeMap.entrySet()) {
                columnType[i++] = new ColumnInfo(entry.getKey(),entry.getValue());
            }
      } else {
            // Leave "null" as indication to skip b/c it doesn't exist
            columnType = new ColumnInfo[columns.size()];
            for (int i = 0; i < columns.size(); i++) {
                String columnName = SchemaUtil.normalizeIdentifier(columns.get(i).trim());
                Integer sqlType = columnNameToTypeMap.get(columnName);
                if (sqlType == null) {
                    if (isStrict) {
                        throw new SQLExceptionInfo.Builder(SQLExceptionCode.COLUMN_NOT_FOUND)
                            .setColumnName(columnName).setTableName(tableName).build().buildException();
                    }
                    unfoundColumnCount++;
                } else {
                    columnType[i] = new ColumnInfo(columnName, sqlType);
                }
            }
            if (unfoundColumnCount == columns.size()) {
                throw new SQLExceptionInfo.Builder(SQLExceptionCode.COLUMN_NOT_FOUND)
                    .setColumnName(Arrays.toString(columns.toArray(new String[0]))).setTableName(tableName).build().buildException();
            }
      }
    return columnType;
  }
}
Source Code of org.apache.phoenix.util.CSVLoader

Related Classes of org.apache.phoenix.util.CSVLoader