Package edu.isi.karma.controller.command.transformation

Source Code of edu.isi.karma.controller.command.transformation.PythonTransformationCommand

/*******************************************************************************
* Copyright 2012 University of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This code was developed by the Information Integration Group as part
* of the Karma project at the Information Sciences Institute of the
* University of Southern California.  For more information, publications,
* and related projects, please see: http://www.isi.edu/integration
******************************************************************************/

package edu.isi.karma.controller.command.transformation;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.python.core.PyCode;
import org.python.core.PyException;
import org.python.core.PyObject;
import org.python.util.PythonInterpreter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.isi.karma.controller.command.CommandType;
import edu.isi.karma.controller.command.WorksheetSelectionCommand;
import edu.isi.karma.controller.command.selection.SuperSelection;
import edu.isi.karma.controller.command.worksheet.MultipleValueEditColumnCommandFactory;
import edu.isi.karma.controller.update.UpdateContainer;
import edu.isi.karma.controller.update.WorksheetUpdateFactory;
import edu.isi.karma.er.helper.PythonRepository;
import edu.isi.karma.er.helper.PythonTransformationHelper;
import edu.isi.karma.rep.HNode;
import edu.isi.karma.rep.Node;
import edu.isi.karma.rep.RepFactory;
import edu.isi.karma.rep.Row;
import edu.isi.karma.rep.Worksheet;
import edu.isi.karma.rep.Workspace;


public abstract class PythonTransformationCommand extends WorksheetSelectionCommand {

  protected String transformationCode;
  final protected String hNodeId;
  final protected String errorDefaultValue;
 

  private static Logger logger = LoggerFactory
      .getLogger(PythonTransformationCommand.class);

  private enum JsonKeys {
    row, error
  }

  public PythonTransformationCommand(String id, String transformationCode,
      String worksheetId, String hNodeId, String errorDefaultValue, String selectionId) {
    super(id, worksheetId, selectionId);
    this.transformationCode = transformationCode;
    this.hNodeId = hNodeId;
    this.errorDefaultValue = errorDefaultValue;
    addTag(CommandTag.Transformation);
  }

  @Override
  public String getCommandName() {
    return this.getClass().getSimpleName();
  }

  @Override
  public String getTitle() {
    return "Python Transformation";
  }

  @Override
  public String getDescription() {
    return "";
  }

  @Override
  public CommandType getCommandType() {
    return CommandType.undoable;
  }

  protected void generateTransformedValues(Workspace workspace,
      Worksheet worksheet, RepFactory f, HNode hNode,
      JSONArray transformedRows, JSONArray errorValues, Integer limit)
          throws JSONException, IOException {

    SuperSelection selection = getSuperSelection(worksheet);
    String trimmedTransformationCode = transformationCode.trim();
    // Pedro: somehow we are getting empty statements, and these are causing
    // exceptions.
    if (trimmedTransformationCode.isEmpty()) {
      trimmedTransformationCode = "return \"\"";
      logger.info("Empty PyTransform statement in "
          + hNode.getColumnName());
    }
    String transformMethodStmt = PythonTransformationHelper
        .getPythonTransformMethodDefinitionState(worksheet,
            trimmedTransformationCode);


    logger.debug("Executing PyTransform\n" + transformMethodStmt);

    // Prepare the Python interpreter
    PythonInterpreter interpreter = new PythonInterpreter();

    PythonRepository repo = PythonRepository.getInstance();
    repo.initializeInterperter(interpreter);
    repo.importUserScripts(interpreter);
   
    repo.compileAndAddToRepositoryAndExec(interpreter, transformMethodStmt);

    Collection<Node> nodes = new ArrayList<Node>(Math.max(1000, worksheet
        .getDataTable().getNumRows()));
    worksheet.getDataTable().collectNodes(hNode.getHNodePath(f), nodes, selection);

    Map<String, String> rowToValueMap = new HashMap<String, String>();

    int counter = 0;
    long starttime = System.currentTimeMillis();
    // Go through all nodes collected for the column with given hNodeId

    interpreter.set("workspaceid", workspace.getId());
    interpreter.set("command", this);
    interpreter.set("selectionName", selection.getName());
    PyCode py = repo.getTransformCode();

    int numRowsWithErrors = 0;

    for (Node node : nodes) {
      Row row = node.getBelongsToRow();

      interpreter.set("nodeid", node.getId());

      try {
        PyObject output = interpreter.eval(py);
        String transformedValue = PythonTransformationHelper
            .getPyObjectValueAsString(output);
        addTransformedValue(transformedRows, row, transformedValue);
      } catch (PyException p) {
        logger.info("error in evaluation python, skipping one row");
        numRowsWithErrors++;
        // Error occured in the Python method execution
        addTransformedValue(transformedRows, row, errorDefaultValue);
        addError(errorValues, row, counter, p.value);
      } catch (Exception t) {
        // Error occured in the Python method execution
        logger.debug(
            "Error occured while transforming, using default value.",
            t);
        numRowsWithErrors++;
        rowToValueMap.put(row.getId(), errorDefaultValue);
      }
      if (limit != null && ++counter >= limit) {
        break;
      }
    }
    if (numRowsWithErrors > 0) {
      logger.debug("PyTransform errors in "
          + numRowsWithErrors
          + " rows. This could be normal when rows have unexpected values.");
    }
    logger.debug("transform time "
        + (System.currentTimeMillis() - starttime));
  }

  private void addError(JSONArray errorValues, Row row, int counter,
      PyObject value) throws JSONException {
    errorValues.put(new JSONObject().put(JsonKeys.row.name(), counter).put(
        JsonKeys.error.name(), value));

  }

  private void addTransformedValue(JSONArray transformedRows, Row row,
      String transformedValue) throws JSONException {
    JSONObject transformedRow = new JSONObject();
    transformedRow.put(
        MultipleValueEditColumnCommandFactory.Arguments.rowID.name(),
        row.getId());
    transformedRow.put(
        MultipleValueEditColumnCommandFactory.Arguments.value.name(),
        transformedValue);
    transformedRows.put(transformedRow);
  }

  @Override
  public UpdateContainer undoIt(Workspace workspace) {
    UpdateContainer c = (WorksheetUpdateFactory
        .createRegenerateWorksheetUpdates(worksheetId, getSuperSelection(workspace)));
    // TODO is it necessary to compute alignment and semantic types for
    // everything?
    c.append(computeAlignmentAndSemanticTypesAndCreateUpdates(workspace));
    return c;
  }

  public String getTransformationCode() {
    return transformationCode;
  }

  public void setTransformationCode(String transformationCode) {
    this.transformationCode = transformationCode;
  }
 
  public void addInputColumns(String hNodeId) {
    inputColumns.add(hNodeId);
  }
 
  public void addSelectedRowsColumns(String hNodeId) {
  }
 
  public void setSelectedRowsMethod(boolean t) {
  }
}
TOP

Related Classes of edu.isi.karma.controller.command.transformation.PythonTransformationCommand

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.