/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.ytex.kernel;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.InvalidPropertiesFormatException;
import java.util.Map;
import java.util.Properties;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import javax.sql.DataSource;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.RowCallbackHandler;
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
import org.springframework.jdbc.core.simple.SimpleJdbcTemplate;
import org.springframework.transaction.TransactionStatus;
import org.springframework.transaction.support.TransactionCallback;
import org.springframework.transaction.support.TransactionTemplate;
public class SparseDataExporterImpl implements SparseDataExporter {
private static final Log log = LogFactory
.getLog(SparseDataExporterImpl.class);
@SuppressWarnings("static-access")
public static void main(String args[]) throws IOException {
Options options = new Options();
options.addOption(OptionBuilder
.withArgName("prop")
.hasArg()
.isRequired()
.withDescription(
"property file with queries and other parameters.")
.create("prop"));
options.addOption(OptionBuilder.withArgName("type").hasArg()
.isRequired()
.withDescription("export format; valid values: weka, libsvm")
.create("type"));
if (args.length == 0)
printHelp(options);
else {
try {
CommandLineParser parser = new GnuParser();
CommandLine line = parser.parse(options, args);
String propFile = line.getOptionValue("prop");
String format = line.getOptionValue("type");
SparseDataExporter exporter = KernelContextHolder
.getApplicationContext().getBean(
SparseDataExporter.class);
exporter.exportData(propFile, format);
} catch (ParseException pe) {
printHelp(options);
}
}
}
private static void printHelp(Options options) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("java " + SparseDataExporterImpl.class.getName()
+ " export sparse data", options);
}
protected JdbcTemplate jdbcTemplate;
protected KernelUtil kernelUtil;
protected NamedParameterJdbcTemplate namedJdbcTemplate;
protected Map<String, SparseDataFormatterFactory> nameToFormatterMap = new HashMap<String, SparseDataFormatterFactory>();
protected SimpleJdbcTemplate simpleJdbcTemplate;
protected TransactionTemplate txTemplateNew;
public SparseDataExporterImpl() {
super();
}
protected void addNominalWordToInstance(SparseData sparseData,
long instanceId, String word, String wordValue) {
// add the instance id to the set of instance ids if necessary
if (!sparseData.getInstanceIds().contains(instanceId))
sparseData.getInstanceIds().add(instanceId);
SortedMap<String, String> instanceWords = sparseData
.getInstanceNominalWords().get(instanceId);
SortedSet<String> wordValueSet = sparseData.getNominalWordValueMap()
.get(word);
if (instanceWords == null) {
instanceWords = new TreeMap<String, String>();
sparseData.getInstanceNominalWords().put(instanceId, instanceWords);
}
if (wordValueSet == null) {
wordValueSet = new TreeSet<String>();
sparseData.getNominalWordValueMap().put(word, wordValueSet);
}
// add the word-value for the instance
instanceWords.put(word, wordValue);
// add the value to the set of valid values
wordValueSet.add(wordValue);
}
protected void addNumericWordToInstance(SparseData sparseData,
long instanceId, String word, double wordValue) {
// add the instance id to the set of instance ids if necessary
if (!sparseData.getInstanceIds().contains(instanceId))
sparseData.getInstanceIds().add(instanceId);
// add the numeric word to the map of words for this document
SortedMap<String, Double> words = sparseData.getInstanceNumericWords()
.get(instanceId);
if (words == null) {
words = new TreeMap<String, Double>();
sparseData.getInstanceNumericWords().put(instanceId, words);
}
words.put(word, wordValue);
sparseData.getNumericWords().add(word);
}
/*
* (non-Javadoc)
*
* @see org.apache.ctakes.ytex.kernel.SparseDataExporter#exportData(org.apache.ctakes.ytex.kernel.SparseData,
* org.apache.ctakes.ytex.kernel.SparseDataFormatter, java.util.Properties)
*/
public void exportData(InstanceData instanceLabel,
SparseDataFormatter formatter, Properties properties,
BagOfWordsDecorator bDecorator) throws IOException {
String scope = properties.getProperty("scope", null);
SparseData sparseData = null;
if (scope == null) {
sparseData = this.loadData(instanceLabel,
properties.getProperty("numericWordQuery"),
properties.getProperty("nominalWordQuery"),
properties.getProperty("prepareScript"),
properties.getProperty("prepareScriptDelimiter", ";"),
bDecorator, null, null, null);
}
formatter.initializeExport(instanceLabel, properties, sparseData);
for (String label : instanceLabel.getLabelToInstanceMap().keySet()) {
if ("label".equals(scope)) {
sparseData = this.loadData(instanceLabel,
properties.getProperty("numericWordQuery"),
properties.getProperty("nominalWordQuery"),
properties.getProperty("prepareScript"),
properties.getProperty("prepareScriptDelimiter", ";"),
bDecorator, label, null, null);
}
formatter
.initializeLabel(label, instanceLabel
.getLabelToInstanceMap().get(label), properties,
sparseData);
for (int run : instanceLabel.getLabelToInstanceMap().get(label)
.keySet()) {
for (int fold : instanceLabel.getLabelToInstanceMap()
.get(label).get(run).keySet()) {
if (log.isInfoEnabled()
&& (label.length() > 0 || run > 0 || fold > 0))
log.info("exporting, label " + label + " run " + run
+ " fold " + fold);
if ("fold".equals(scope)) {
sparseData = this.loadData(instanceLabel, properties
.getProperty("numericWordQuery"), properties
.getProperty("nominalWordQuery"), properties
.getProperty("prepareScript"), properties
.getProperty("prepareScriptDelimiter", ";"),
bDecorator, label, fold, run);
}
formatter.initializeFold(sparseData, label, run, fold,
instanceLabel.getLabelToInstanceMap().get(label)
.get(run).get(fold));
for (boolean train : instanceLabel.getLabelToInstanceMap()
.get(label).get(run).get(fold).keySet()) {
formatter.exportFold(sparseData, instanceLabel
.getLabelToInstanceMap().get(label).get(run)
.get(fold).get(train), train, label,
0 == run ? null : run, 0 == fold ? null : fold);
}
formatter.clearFold();
}
}
formatter.clearLabel();
}
}
/*
* (non-Javadoc)
*
* @see org.apache.ctakes.ytex.kernel.SparseDataExporter#exportData(java.util.Properties,
* org.apache.ctakes.ytex.kernel.SparseDataFormatter, org.apache.ctakes.ytex.kernel.BagOfWordsDecorator)
*/
@Override
public void exportData(Properties props, SparseDataFormatter formatter,
BagOfWordsDecorator bDecorator) throws IOException {
InstanceData instanceLabel = this.getKernelUtil().loadInstances(
props.getProperty("instanceClassQuery"));
if (props.containsKey("folds")) {
this.getKernelUtil().generateFolds(instanceLabel, props);
}
// load label - instance id maps
// sparseData.setLabelToInstanceMap(this.getKernelUtil().loadInstances(
// props.getProperty("instanceClassQuery"),
// sparseData.getLabelToClassMap()));
this.exportData(instanceLabel, formatter, props, bDecorator);
// this.loadData(sparseData,
// props.getProperty("numericWordQuery"),
// props.getProperty("nominalWordQuery"), bDecorator);
// this.exportData(sparseData, formatter, props);
}
/*
* (non-Javadoc)
*
* @see org.apache.ctakes.ytex.kernel.SparseDataExporter#exportData(java.lang.String,
* java.lang.String)
*/
@Override
public void exportData(String propertiesFile, String format)
throws IOException, InvalidPropertiesFormatException {
Properties props = new Properties();
this.getKernelUtil().loadProperties(propertiesFile, props);
this.exportData(props, nameToFormatterMap.get(format.toLowerCase())
.getFormatter(), null);
}
public DataSource getDataSource(DataSource ds) {
return this.jdbcTemplate.getDataSource();
}
public KernelUtil getKernelUtil() {
return kernelUtil;
}
public Map<String, SparseDataFormatterFactory> getNameToFormatterMap() {
return nameToFormatterMap;
}
/**
* run the prepare script if defined.
*
* @param prepareScript
* sequence of sql statements to be executed with named params.
* @param prepareScriptDelimiter
* delimiter separating the sql statements.
* @param params
* for named parameters in sql statements.
*/
protected void prepare(final String prepareScript,
final String prepareScriptDelimiter,
final Map<String, Object> params) {
if (prepareScript != null && prepareScript.length() > 0) {
String[] statements = prepareScript.split(prepareScriptDelimiter);
// throw out empty lines
for (String sql : statements) {
if (sql != null && sql.trim().length() > 0) {
this.namedJdbcTemplate.update(sql, params);
}
}
}
}
/**
*
* @param sql
* result set has 3 columns. 1st column - integer - instance id.
* 2nd column - word. 3rd column - word value.
* @param instanceWordMap
* map of instance id to word-word value.
* @param wordValueMap
* map of word to valid values for the word.
* @return populate maps with results of query.
*/
protected void getNominalInstanceWords(final String sql,
final String prepareScript, final String prepareScriptDelimiter,
final SparseData sparseData, final Map<String, Object> params) {
txTemplateNew.execute(new TransactionCallback<Object>() {
// new PreparedStatementCreator() {
// @Override
// public PreparedStatement createPreparedStatement(
// Connection conn) throws SQLException {
// return conn.prepareStatement(sql,
// ResultSet.TYPE_FORWARD_ONLY,
// ResultSet.CONCUR_READ_ONLY);
// }
//
// } @Override
public Object doInTransaction(TransactionStatus txStatus) {
prepare(prepareScript, prepareScriptDelimiter, params);
namedJdbcTemplate.query(sql, params, new RowCallbackHandler() {
@Override
public void processRow(ResultSet rs) throws SQLException {
long instanceId = rs.getLong(1);
String word = rs.getString(2);
String wordValue = rs.getString(3);
addNominalWordToInstance(sparseData, instanceId, word,
wordValue);
}
});
return null;
}
});
}
/**
*
* @param sql
* result 1st column: instance id, 2nd column: word, 3rd column:
* numeric word value
* @param instanceNumericWords
* map of instance id - [map word - word value] to be populated
*/
protected void getNumericInstanceWords(final String sql,
final String prepareScript, final String prepareScriptDelimiter,
final SparseData sparseData, final Map<String, Object> params) {
txTemplateNew.execute(new TransactionCallback<Object>() {
@Override
public Object doInTransaction(TransactionStatus txStatus) {
prepare(prepareScript, prepareScriptDelimiter, params);
namedJdbcTemplate.query(sql, params
// new PreparedStatementCreator() {
//
// @Override
// public PreparedStatement createPreparedStatement(
// Connection conn) throws SQLException {
// return conn.prepareStatement(sql,
// ResultSet.TYPE_FORWARD_ONLY,
// ResultSet.CONCUR_READ_ONLY);
// }
//
// }
, new RowCallbackHandler() {
@Override
public void processRow(ResultSet rs)
throws SQLException {
long instanceId = rs.getLong(1);
String word = rs.getString(2);
double wordValue = rs.getDouble(3);
addNumericWordToInstance(sparseData,
instanceId, word, wordValue);
}
});
return null;
}
});
}
public TransactionTemplate getTxTemplateNew() {
return txTemplateNew;
}
/**
*
* @param instanceLabel
* instance data: label - fold - instance id - class map
* @param instanceNumericWordQuery
* query to get numeric attributes
* @param instanceNominalWordQuery
* query to get nominal attributes
* @param prepareScript
* prepare script to be executed in same tx as instance attribute
* queries
* @param prepareScriptDelimiter
* delimiter for statements in prepare script
* @param bDecorator
* decorator to add attributes
* @param label
* @param fold
* @param run
* @return
*/
protected SparseData loadData(InstanceData instanceLabel,
String instanceNumericWordQuery, String instanceNominalWordQuery,
String prepareScript, String prepareScriptDelimiter,
BagOfWordsDecorator bDecorator, String label, Integer fold,
Integer run) {
SparseData sparseData = new SparseData();
Map<String, Object> params = new HashMap<String, Object>();
if (label != null && label.length() > 0)
params.put("label", label);
if (fold != null && fold != 0)
params.put("fold", fold);
if (run != null && run != 0)
params.put("run", run);
// load numeric attributes
if (instanceNumericWordQuery != null
&& instanceNumericWordQuery.trim().length() > 0)
this.getNumericInstanceWords(instanceNumericWordQuery,
prepareScript, prepareScriptDelimiter, sparseData, params);
// added to support adding gram matrix index in GramMatrixExporter
if (bDecorator != null)
bDecorator.decorateNumericInstanceWords(
sparseData.getInstanceNumericWords(),
sparseData.getNumericWords());
// load nominal attributes
if (instanceNominalWordQuery != null
&& instanceNominalWordQuery.trim().length() > 0)
this.getNominalInstanceWords(instanceNominalWordQuery,
prepareScript, prepareScriptDelimiter, sparseData, params);
if (bDecorator != null)
bDecorator.decorateNominalInstanceWords(
sparseData.getInstanceNominalWords(),
sparseData.getNominalWordValueMap());
return sparseData;
}
public void setDataSource(DataSource ds) {
this.jdbcTemplate = new JdbcTemplate(ds);
this.simpleJdbcTemplate = new SimpleJdbcTemplate(ds);
this.namedJdbcTemplate = new NamedParameterJdbcTemplate(ds);
}
public void setKernelUtil(KernelUtil kernelUtil) {
this.kernelUtil = kernelUtil;
}
public void setNameToFormatterMap(
Map<String, SparseDataFormatterFactory> nameToFormatterMap) {
this.nameToFormatterMap = nameToFormatterMap;
}
public void setTxTemplateNew(TransactionTemplate txTemplateNew) {
this.txTemplateNew = txTemplateNew;
}
}