Package org.apache.ctakes.ytex.uima

Source Code of org.apache.ctakes.ytex.uima.DBCollectionReader

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.ytex.uima;

import java.io.IOException;
import java.sql.Driver;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.sql.DataSource;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.ctakes.ytex.uima.types.DocKey;
import org.apache.ctakes.ytex.uima.types.KeyValuePair;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
import org.apache.uima.resource.metadata.ProcessingResourceMetaData;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.springframework.jdbc.core.RowCallbackHandler;
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
import org.springframework.jdbc.core.simple.SimpleJdbcTemplate;
import org.springframework.jdbc.datasource.DataSourceTransactionManager;
import org.springframework.jdbc.datasource.SimpleDriverDataSource;
import org.springframework.jdbc.support.lob.DefaultLobHandler;
import org.springframework.jdbc.support.lob.LobHandler;
import org.springframework.transaction.TransactionStatus;
import org.springframework.transaction.support.TransactionCallback;
import org.springframework.transaction.support.TransactionTemplate;

/**
*
* Read documents from db. Config parameters:
* <ul>
* <li>queryGetDocumentKeys the query to get the document keys</li>
* <li>queryGetDocument the query to get a document given a key. should have
* named parameters that match the columns of the result set returned by
* queryGetDocumentKeys</li>
* <li>keyTypeName the uima type of the document key to be added to the cas.
* defaults to org.apache.ctakes.ytex.uima.types.DocKey.
* <li>keyNameToLowerCase convert the column names returned by
* queryGetDocumentKeys to lower case, default true</li>
* </ul>
*
* @TODO more doc
* @author vijay
*
*/
public class DBCollectionReader extends CollectionReader_ImplBase {
  private static final Log log = LogFactory.getLog(DBCollectionReader.class);

  /**
   * the query to get the document keys set in config file
   */
  protected String queryGetDocumentKeys;
  /**
   * the queyr to get a document given a key. set in config file
   */
  protected String queryGetDocument;
  /**
   * the key type. if not set, will default to
   * org.apache.ctakes.ytex.uima.types.DocKey
   */
  protected String keyTypeName = "org.apache.ctakes.ytex.uima.types.DocKey";

  protected DataSource dataSource;
  protected SimpleJdbcTemplate simpleJdbcTemplate;
  protected NamedParameterJdbcTemplate namedJdbcTemplate;
  protected TransactionTemplate txTemplate;
  protected boolean keyNameToLowerCase = true;

  public boolean isKeyNameToLowerCase() {
    return keyNameToLowerCase;
  }

  public void setKeyNameToLowerCase(boolean keyNameToLowerCase) {
    this.keyNameToLowerCase = keyNameToLowerCase;
  }

  List<Map<String, Object>> listDocumentIds;
  int i = 0;

  @Override
  public void initialize() throws ResourceInitializationException {
    initializePreLoad();
    loadDocumentIds();
  }

  protected void initializePreLoad() throws ResourceInitializationException {
    super.initialize();
    ProcessingResourceMetaData metaData = getProcessingResourceMetaData();
    ConfigurationParameterSettings paramSettings = metaData
        .getConfigurationParameterSettings();
    this.queryGetDocumentKeys = (String) paramSettings
        .getParameterValue("queryGetDocumentKeys");
    this.queryGetDocument = (String) paramSettings
        .getParameterValue("queryGetDocument");
    this.keyTypeName = (String) paramSettings
        .getParameterValue("keyTypeName");
    Boolean keyNameToLowerCase = (Boolean) paramSettings
        .getParameterValue("keyNameToLowerCase");
    if (keyNameToLowerCase != null)
      this.keyNameToLowerCase = keyNameToLowerCase.booleanValue();
    String dbURL = (String) paramSettings.getParameterValue("dbURL");
    String dbDriver = (String) paramSettings.getParameterValue("dbDriver");
    initDB(dbDriver, dbURL);
  }

  protected void initDB(String dbDriver, String dbURL)
      throws ResourceInitializationException {
    if (dbURL != null && dbURL.length() > 0) {
      try {

        if (dbDriver == null || dbDriver.length() == 0) {
          dbDriver = ApplicationContextHolder.getYtexProperties()
              .getProperty("db.driver");
        }
        dataSource = new SimpleDriverDataSource((Driver) Class.forName(
            dbDriver).newInstance(), dbURL);
        txTemplate = new TransactionTemplate(
            new DataSourceTransactionManager(dataSource));
      } catch (InstantiationException e) {
        throw new ResourceInitializationException(e);
      } catch (IllegalAccessException e) {
        throw new ResourceInitializationException(e);
      } catch (ClassNotFoundException e) {
        throw new ResourceInitializationException(e);
      }
    } else {
      txTemplate = (TransactionTemplate) ApplicationContextHolder
          .getApplicationContext().getBean("txTemplate");
      dataSource = (DataSource) ApplicationContextHolder
          .getApplicationContext().getBean(
              "collectionReaderDataSource");
    }
    simpleJdbcTemplate = new SimpleJdbcTemplate(dataSource);
    namedJdbcTemplate = new NamedParameterJdbcTemplate(dataSource);
  }

  protected void loadDocumentIds() {
    if (listDocumentIds == null) {
      listDocumentIds = txTemplate
          .execute(new TransactionCallback<List<Map<String, Object>>>() {

            @Override
            public List<Map<String, Object>> doInTransaction(
                TransactionStatus arg0) {
              return simpleJdbcTemplate
                  .queryForList(queryGetDocumentKeys);
            }
          });
      i = 0;
    }
  }

  @Override
  public void getNext(final CAS aCAS) throws IOException, CollectionException {
    try {
      getNext(aCAS.getJCas());
    } catch (CASException e) {
      throw new CollectionException(e);
    }
  }

  public void getNext(final JCas aCAS) throws IOException,
      CollectionException {
    if (i < listDocumentIds.size()) {
      final Map<String, Object> id = listDocumentIds.get(i++);
      if (log.isInfoEnabled()) {
        log.info("loading document with id = " + id);
      }
      getDocumentById(aCAS, id);
      addDocKey(aCAS, id);
    } else {
      // shouldn't get here?
      throw new CollectionException("no documents to process",
          new Object[] {});
    }
  }

  private void addDocKey(JCas aCAS, Map<String, Object> id)
      throws CollectionException {
    DocKey docKey = new DocKey(aCAS);
    FSArray keyValuePairs = new FSArray(aCAS, id.size());
    int i = 0;
    for (Map.Entry<String, Object> idVal : id.entrySet()) {
      String key = idVal.getKey();
      Object val = idVal.getValue();
      KeyValuePair p = new KeyValuePair(aCAS);
      p.setKey(key);
      if (val instanceof Number) {
        p.setValueLong(((Number) val).longValue());
      } else if (val instanceof String) {
        p.setValueString((String) val);
      } else {
        log.warn("Don't know how to handle key attribute, converting to string, key="
            + key + ", value=" + val);
        p.setValueString(val.toString());
      }
      keyValuePairs.set(i, p);
      i++;
    }
    docKey.setKeyValuePairs(keyValuePairs);
    docKey.addToIndexes();

  }

  protected void getDocumentById(final JCas aCAS, final Map<String, Object> id) {
    Map<String, Object> idMapTmp = id;
    if (this.isKeyNameToLowerCase()) {
      idMapTmp = new HashMap<String, Object>();
      for (Map.Entry<String, Object> e : id.entrySet()) {
        idMapTmp.put(e.getKey().toLowerCase(), e.getValue());
      }
    }
    final Map<String, Object> idQuery = idMapTmp;
    this.txTemplate.execute(new TransactionCallback<Object>() {

      @Override
      public Object doInTransaction(TransactionStatus arg0) {
        namedJdbcTemplate.query(queryGetDocument, idQuery,
            new RowCallbackHandler() {
              boolean bFirstRowRead = false;

              @Override
              public void processRow(ResultSet rs)
                  throws SQLException {
                if (!bFirstRowRead) {
                  LobHandler lobHandler = new DefaultLobHandler();
                  String clobText = lobHandler
                      .getClobAsString(rs, 1);
                  aCAS.setDocumentText(clobText);
                  bFirstRowRead = true;
                } else {
                  log.error("Multiple documents for document key: "
                      + idQuery);
                }
              }
            });
        return null;
      }
    });
  }

  @Override
  public Progress[] getProgress() {
    return new Progress[] { new ProgressImpl(i, listDocumentIds.size(),
        Progress.ENTITIES) };
  }

  @Override
  public boolean hasNext() throws IOException, CollectionException {
    return i < listDocumentIds.size();
  }

  @Override
  public void close() throws IOException {
    this.listDocumentIds = null;
    this.i = 0;
  }

}
TOP

Related Classes of org.apache.ctakes.ytex.uima.DBCollectionReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.