Package org.apache.mahout.math

Source Code of org.apache.mahout.math.FileBasedMatrix

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.math;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.DoubleBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.List;

/**
* Provides a way to get data from a file and treat it as if it were a matrix, but avoids putting all that
* data onto the Java heap.  Instead, the file is mapped into non-heap memory as a DoubleBuffer and we access
* that instead.
*/
public final class FileBasedMatrix extends AbstractMatrix {
  private final int rowsPerBlock;
  private final List<DoubleBuffer> content = Lists.newArrayList();

  /**
   * Constructs an empty matrix of the given size.
   *
   * @param rows    The number of rows in the result.
   * @param columns The number of columns in the result.
   */
  public FileBasedMatrix(int rows, int columns) {
    super(rows, columns);
    long maxRows = ((1L << 31) - 1) / (columns * 8);
    if (rows > maxRows) {
      rowsPerBlock = (int) maxRows;
    } else {
      rowsPerBlock = rows;
    }
  }

  private void addData(DoubleBuffer content) {
    this.content.add(content);
  }

  public void setData(File f, boolean loadNow) throws IOException {
    Preconditions.checkArgument(f.length() == rows * columns * 8L, "File " + f + " is wrong length");

    for (int i = 0; i < (rows + rowsPerBlock - 1) / rowsPerBlock; i++) {
      long start = i * rowsPerBlock * columns * 8L;
      long size = rowsPerBlock * columns * 8L;
      MappedByteBuffer buf = new FileInputStream(f).getChannel().map(FileChannel.MapMode.READ_ONLY, start,
                                                                     Math.min(f.length() - start, size));
      if (loadNow) {
        buf.load();
      }
      addData(buf.asDoubleBuffer());
    }
  }

  public static void writeMatrix(File f, Matrix m) throws IOException {
    Preconditions.checkArgument(f.canWrite(), "Can't write to output file");
    FileOutputStream fos = new FileOutputStream(f);
    try {
      ByteBuffer buf = ByteBuffer.allocate(m.columnSize() * 8);
      for (MatrixSlice row : m) {
        buf.clear();
        for (Vector.Element element : row.vector().all()) {
          buf.putDouble(element.get());
        }
        buf.flip();
        fos.write(buf.array());
      }
    } finally {
      fos.close();
    }
  }

  /**
   * Assign the other vector values to the column of the receiver
   *
   * @param column the int row to assign
   * @param other  a Vector
   * @return the modified receiver
   * @throws org.apache.mahout.math.CardinalityException
   *          if the cardinalities differ
   */
  @Override
  public Matrix assignColumn(int column, Vector other) {
    throw new UnsupportedOperationException("Default operation");
  }

  /**
   * Assign the other vector values to the row of the receiver
   *
   * @param row   the int row to assign
   * @param other a Vector
   * @return the modified receiver
   * @throws org.apache.mahout.math.CardinalityException
   *          if the cardinalities differ
   */
  @Override
  public Matrix assignRow(int row, Vector other) {
    throw new UnsupportedOperationException("Default operation");
  }

  /**
   * Return the value at the given indexes, without checking bounds
   *
   * @param row    an int row index
   * @param column an int column index
   * @return the double at the index
   */
  @Override
  public double getQuick(int row, int column) {
    int block = row / rowsPerBlock;
    return content.get(block).get((row % rowsPerBlock) * columns + column);
  }

  /**
   * Return an empty matrix of the same underlying class as the receiver
   *
   * @return a Matrix
   */
  @Override
  public Matrix like() {
    throw new UnsupportedOperationException("Default operation");
  }

  /**
   * Returns an empty matrix of the same underlying class as the receiver and of the specified size.
   *
   * @param rows    the int number of rows
   * @param columns the int number of columns
   */
  @Override
  public Matrix like(int rows, int columns) {
    return new DenseMatrix(rows, columns);
  }

  /**
   * Set the value at the given index, without checking bounds
   *
   * @param row    an int row index into the receiver
   * @param column an int column index into the receiver
   * @param value  a double value to set
   */
  @Override
  public void setQuick(int row, int column, double value) {
    throw new UnsupportedOperationException("Default operation");
  }

  /**
   * Return a view into part of a matrix.  Changes to the view will change the
   * original matrix.
   *
   * @param offset an int[2] offset into the receiver
   * @param size   the int[2] size of the desired result
   * @return a matrix that shares storage with part of the original matrix.
   * @throws org.apache.mahout.math.CardinalityException
   *          if the length is greater than the cardinality of the receiver
   * @throws org.apache.mahout.math.IndexException
   *          if the offset is negative or the offset+length is outside of the receiver
   */
  @Override
  public Matrix viewPart(int[] offset, int[] size) {
    throw new UnsupportedOperationException("Default operation");
  }
}
TOP

Related Classes of org.apache.mahout.math.FileBasedMatrix

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.