Package edu.isi.karma.controller.update

Source Code of edu.isi.karma.controller.update.WorksheetCleaningUpdate

/*******************************************************************************
* Copyright 2012 University of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This code was developed by the Information Integration Group as part
* of the Karma project at the Information Sciences Institute of the
* University of Southern California.  For more information, publications,
* and related projects, please see: http://www.isi.edu/integration
******************************************************************************/

package edu.isi.karma.controller.update;

import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;

import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.isi.karma.controller.command.selection.SuperSelection;
import edu.isi.karma.rep.ColumnMetadata;
import edu.isi.karma.rep.HNode;
import edu.isi.karma.rep.HNodePath;
import edu.isi.karma.rep.Node;
import edu.isi.karma.rep.Worksheet;
import edu.isi.karma.util.HTTPUtil;
import edu.isi.karma.view.VWorksheet;
import edu.isi.karma.view.VWorkspace;
import edu.isi.karma.view.ViewPreferences.ViewPreference;
import edu.isi.karma.webserver.ServletContextParameterMap;
import edu.isi.karma.webserver.ServletContextParameterMap.ContextParameter;

public class WorksheetCleaningUpdate extends
AbstractUpdate {

  private String  worksheetId;
  private boolean  forceUpdates;
  private SuperSelection selection;
  private static Logger logger = LoggerFactory.getLogger(
      WorksheetCleaningUpdate.class);

  public static int DEFAULT_COLUMN_LENGTH = 10;
  public static int MIN_COLUMN_LENGTH = 10;

  private enum JsonKeys {
    worksheetId, hNodeId, worksheetChartData,
    chartData, id, value, json,
    Preferred_Length, sampleSize, sampleRate
  }

  public WorksheetCleaningUpdate(String worksheetId, boolean forceUpdates, SuperSelection selection) {
    this.worksheetId = worksheetId;
    this.forceUpdates = forceUpdates;
    this.selection = selection;
  }

  @Override
  public void generateJson(String prefix, PrintWriter pw,
      VWorkspace vWorkspace) {
    VWorksheet vWorksheet = vWorkspace.getViewFactory().getVWorksheetByWorksheetId(worksheetId);
    Worksheet worksheet = vWorksheet.getWorksheet();
    List<HNodePath> columnPaths = worksheet.getHeaders().getAllPaths();
    ColumnMetadata colMetadata = worksheet.getMetadataContainer().getColumnMetadata();

    List<String> columnsInvoked = new ArrayList<String>();

    for (HNodePath path:columnPaths) {
      String leafHNodeId = path.getLeaf().getId();
      List<Node> nodes = new ArrayList<Node>(Math.max(1000, worksheet.getDataTable().getNumRows()));
      worksheet.getDataTable().collectNodes(path, nodes, selection);
      final int sampleSize = (nodes.size() > 1000) ? 1000 : nodes.size();
      columnsInvoked.add(leafHNodeId);
      try {
        // Check if the column metadata doesn't contains the cleaning information
        if (colMetadata.getColumnHistogramData(leafHNodeId) == null
            || forceUpdates) {
          // Prepare the input data for the cleaning service
          JSONArray requestJsonArray = new JSONArray()
          if (sampleSize == nodes.size()) {
            for (Node node : nodes) {
              JSONObject jsonRecord = new JSONObject();
              jsonRecord.put(JsonKeys.id.name(), node.getId());
              String originalVal = node.getValue().asString();
              originalVal = originalVal == null ? "" : originalVal;
              jsonRecord.put(JsonKeys.value.name(), originalVal);
              requestJsonArray.put(jsonRecord);
            }
          }
          else {
            Set<Integer> randomNums = new HashSet<Integer>();
            Random gen = new Random();
            for (int i = 0; i < sampleSize; i++) {
              int r = gen.nextInt(nodes.size());
              while (randomNums.contains(r))
                r = gen.nextInt(nodes.size());
              randomNums.add(r);
              Node node = nodes.get(r);
              JSONObject jsonRecord = new JSONObject();
              jsonRecord.put(JsonKeys.id.name(), node.getId());
              String originalVal = node.getValue().asString();
              originalVal = originalVal == null ? "" : originalVal;
              jsonRecord.put(JsonKeys.value.name(), originalVal);
              requestJsonArray.put(jsonRecord);
            }
          }
         
          //TODO put the estimate back
         
          if (requestJsonArray.length() == 0) {
            logger.error("Empty values input for path" + path.toColumnNamePath());
            continue;
          }
          String cleaningServiceURL = ServletContextParameterMap.getParameterValue(
              ContextParameter.CLEANING_SERVICE_URL);

          Map<String, String> formParams = new HashMap<String, String>();
          formParams.put(JsonKeys.json.name(), requestJsonArray.toString());
          String reqResponse = HTTPUtil.executeHTTPPostRequest(cleaningServiceURL, null,
              null, formParams);
          //         
          //          logger.debug("***");
          //          logger.debug(path.getLeaf().getColumnName());
          //          logger.debug(reqResponse);
          try {
            // Test if the output is valid JSON object. Throws exception if not.
            JSONObject output = new JSONObject(reqResponse);
            long sampleRate = Math.round(nodes.size() * 1.0 / sampleSize);
            JSONArray array = new JSONArray(output.getString("histogram"));
            for (int i = 0; i < array.length(); i++) {
              JSONObject obj = array.getJSONObject(i);
              long value = Integer.parseInt(obj.getString("Frequency")) * sampleRate;
              obj.put("Frequency", value);
            }
            output.put("histogram", array.toString());
            // Add to the metadata if valid
            colMetadata.addColumnHistogramData(leafHNodeId, output);

            // Parse the request response to populate the column metadata for the worksheet
            int colLength = getColumnLength(path.getLeaf(), output,
                vWorkspace.getPreferences().getIntViewPreferenceValue(
                    ViewPreference.maxCharactersInCell));
            colMetadata.addColumnPreferredLength(leafHNodeId, colLength);

            // Add the hNodeId to the list for which we invoked successfully
           
          } catch (JSONException e) {
            logger.error("Error occured with cleaning service for HNode: "
                + path.toColumnNamePath(), e);

            // Set to a default column word length
            colMetadata.addColumnPreferredLength(leafHNodeId, DEFAULT_COLUMN_LENGTH);
            continue;
         
        }
      } catch (Exception e) {
        logger.error("Error while invoking cleaning service", e);
      }
     
    }

    // Prepare the Update that is going to be sent to the browser
    JSONObject response = new JSONObject();
    try {
      response.put(GenericJsonKeys.updateType.name(), this.getClass().getSimpleName());
      response.put(JsonKeys.worksheetId.name(), worksheetId);
      JSONArray chartData = new JSONArray();

      for (String hNodeId:columnsInvoked) {
        JSONObject columnChartData = new JSONObject();
        columnChartData.put(JsonKeys.hNodeId.name(), hNodeId);
        try {
          columnChartData.put(JsonKeys.chartData.name(),
              colMetadata.getColumnHistogramData(hNodeId));
        } catch (JSONException e) {
          logger.error("Error occured with cleaning service for HNode: " + hNodeId, e);
          continue;
        }

        chartData.put(columnChartData);
      }
      response.put(JsonKeys.worksheetChartData.name(), chartData);
      pw.print(response.toString());
    } catch (Exception e) {
      logger.error("Error occured while writing to JSON!", e);
      return;
    }
  }

  private int getColumnLength(HNode hNode, JSONObject serviceResults, int maxColumnWidth)
      throws JSONException {
    int colLength = serviceResults.getInt(JsonKeys.Preferred_Length.name());
    colLength = (colLength == -1 || colLength == 0) ? DEFAULT_COLUMN_LENGTH : colLength;

    // Check if it is greater that max column data length
    colLength = (colLength > maxColumnWidth) ? maxColumnWidth : colLength;

    // Check if it is lesser than minimum required for the cleaning chart
    colLength = (colLength < MIN_COLUMN_LENGTH) ? MIN_COLUMN_LENGTH : colLength;

    // Check if column name requires more characters
    String colName = hNode.getColumnName();
    colLength = (colLength < colName.length()) ? colName.length() : colLength;
    return colLength;
  }
 
  public boolean equals(Object o) {
    if (o instanceof WorksheetCleaningUpdate) {
      WorksheetCleaningUpdate t = (WorksheetCleaningUpdate)o;
      return t.worksheetId.equals(worksheetId) && t.selection.equals(selection);
    }
    return false;
  }
}
TOP

Related Classes of edu.isi.karma.controller.update.WorksheetCleaningUpdate

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.