Package org.apache.mahout.h2obindings.ops

Source Code of org.apache.mahout.h2obindings.ops.Cbind

/*
*  Licensed to the Apache Software Foundation (ASF) under one or more
*  contributor license agreements.  See the NOTICE file distributed with
*  this work for additional information regarding copyright ownership.
*  The ASF licenses this file to You under the Apache License, Version 2.0
*  (the "License"); you may not use this file except in compliance with
*  the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/

package org.apache.mahout.h2obindings.ops;

import water.MRTask;
import water.fvec.Frame;
import water.fvec.Vec;
import water.fvec.Chunk;

import org.apache.mahout.h2obindings.drm.H2ODrm;

/**
* R-like cbind like operator, on two DRMs
*/
public class Cbind {
  /**
   * Combine the columns of two DRMs A and B to create a new DRM.
   *
   * @param drmA DRM representing matrix A.
   * @param drmB DRM representing matrix B.
   * @return new DRM containing columns of A and B adjacent.
   */
  public static H2ODrm exec(H2ODrm drmA, H2ODrm drmB) {
    Frame fra = drmA.frame;
    Vec keysa = drmA.keys;
    Frame frb = drmB.frame;
    Vec keysb = drmB.keys;

    // If A and B are similarly partitioned, ..
    if (fra.anyVec().group() == frb.anyVec().group()) {
      // .. then, do a light weight zip()
      return zip(fra, keysa, frb, keysb);
    } else {
      // .. else, do a heavy weight join() which involves moving data over the wire
      return join(fra, keysa, frb, keysb);
    }
  }

  /** Light weight zip(), no data movement */
  private static H2ODrm zip(final Frame fra, final Vec keysa, final Frame frb, final Vec keysb) {
    // Create a new Vec[] to hold the concatenated list of A and B's column vectors
    Vec vecs[] = new Vec[fra.vecs().length + frb.vecs().length];
    int d = 0;
    // fill A's column vectors
    for (Vec vfra : fra.vecs()) {
      vecs[d++] = vfra;
    }
    // and B's
    for (Vec vfrb : frb.vecs()) {
      vecs[d++] = vfrb;
    }
    // and create a new Frame with the combined list of column Vecs
    Frame fr = new Frame(vecs);
    /* Finally, inherit A's string labels into the result */
    return new H2ODrm(fr, keysa);
  }

  /** Heavy weight join(), involves moving data */
  private static H2ODrm join(final Frame fra, final Vec keysa, final Frame frb, final Vec keysb) {
    // The plan is to re-organize B to be "similarly partitioned as A", and then zip()
    Vec bvecs[] = new Vec[frb.vecs().length];

    for (int i = 0; i < bvecs.length; i++) {
      // First create column Vecs which are similarly partitioned as A
      bvecs[i] = fra.anyVec().makeZero();
    }

    // Next run an MRTask on the new vectors, and fill each cell (initially 0)
    // by pulling in appropriate values from B (frb)
    new MRTask() {
      public void map(Chunk chks[]) {
        int chunkSize = chks[0].len();
        long start = chks[0].start();
        Vec vecs[] = frb.vecs();

        for (int r = 0; r < chunkSize; r++) {
          for (int c = 0; c < chks.length; c++) {
            // assert va.atStr(start+r) == vb.atStr(start+r)
            chks[c].set0(r, vecs[c].at(start + r));
          }
        }
      }
    }.doAll(bvecs);

    // now that bvecs[] is compatible, just zip'em'up
    return zip(fra, keysa, new Frame(bvecs), null);
  }
}
TOP

Related Classes of org.apache.mahout.h2obindings.ops.Cbind

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.