Package org.apache.mahout.h2obindings.ops

Source Code of org.apache.mahout.h2obindings.ops.RowRange

/*
*  Licensed to the Apache Software Foundation (ASF) under one or more
*  contributor license agreements.  See the NOTICE file distributed with
*  this work for additional information regarding copyright ownership.
*  The ASF licenses this file to You under the Apache License, Version 2.0
*  (the "License"); you may not use this file except in compliance with
*  the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/

package org.apache.mahout.h2obindings.ops;

import scala.collection.immutable.Range;

import water.MRTask;
import water.fvec.Frame;
import water.fvec.Vec;
import water.fvec.Chunk;
import water.fvec.NewChunk;
import water.parser.ValueString;

import org.apache.mahout.h2obindings.drm.H2ODrm;

/**
* Filter operation
*/
public class RowRange {
  /**
   * Filter rows from intput DRM, to include only row indiced included in R.
   *
   * @param drmA Input DRM.
   * @param R Range object specifying the start and end row numbers to filter.
   * @return new DRM with just the filtered rows.
   */
  public static H2ODrm exec(H2ODrm drmA, final Range R) {
    Frame A = drmA.frame;
    Vec keys = drmA.keys;

    // Run a filtering MRTask on A. If row number falls within R.start() and
    // R.end(), then the row makes it into the output
    Frame Arr = new MRTask() {
        public void map(Chunk chks[], NewChunk ncs[]) {
          int chunkSize = chks[0].len();
          long chunkStart = chks[0].start();

          // First check if the entire chunk even overlaps with R
          if (chunkStart > R.end() || (chunkStart + chunkSize) < R.start()) {
            return;
          }

          // This chunk overlaps, filter out just the overlapping rows
          for (int r = 0; r < chunkSize; r++) {
            if (!R.contains(chunkStart + r)) {
              continue;
            }

            for (int c = 0; c < chks.length; c++) {
              ncs[c].addNum(chks[c].at0(r));
            }
          }
        }
      }.doAll(A.numCols(), A).outputFrame(null, null);

    Vec Vrr = (keys == null) ? null : new MRTask() {
        // This is a String keyed DRM. Do the same thing as above,
        // but this time just one column of Strings.
        public void map(Chunk chk, NewChunk nc) {
          int chunkSize = chk.len();
          long chunkStart = chk.start();
          ValueString vstr = new ValueString();

          if (chunkStart > R.end() || (chunkStart + chunkSize) < R.start()) {
            return;
          }

          for (int r = 0; r < chunkSize; r++) {
            if (!R.contains(chunkStart + r)) {
              continue;
            }

            nc.addStr(chk.atStr0(vstr, r));
          }
        }
      }.doAll(1, keys).outputFrame(null, null).anyVec();

    return new H2ODrm(Arr, Vrr);
  }
}
TOP

Related Classes of org.apache.mahout.h2obindings.ops.RowRange

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.