Package org.apache.mahout.math

Examples of org.apache.mahout.math.Matrix


   *         singular values) have been found.
   */
  public TrainingState solve(Matrix corpus,
                             int desiredRank) {
    int cols = corpus.numCols();
    Matrix eigens = new DenseMatrix(desiredRank, cols);
    List<Double> eigenValues = new ArrayList<Double>();
    log.info("Finding " + desiredRank + " singular vectors of matrix with " + corpus.numRows() + " rows, via Hebbian");
    /**
     * The corpusProjections matrix is a running cache of the residual projection of each corpus vector against all
     * of the previously found singular vectors.  Without this, if multiple passes over the data is made (per
     * singular vector), recalculating these projections eventually dominates the computational complexity of the
     * solver.
     */
    Matrix corpusProjections = new DenseMatrix(corpus.numRows(), desiredRank);
    TrainingState state = new TrainingState(eigens, corpusProjections);
    for (int i = 0; i < desiredRank; i++) {
      Vector currentEigen = new DenseVector(cols);
      Vector previousEigen = null;
      while (hasNotConverged(currentEigen, corpus, state)) {
View Full Code Here


    numPasses++;
    if (state.isFirstPass()) {
      log.info("First pass through the corpus, no need to check convergence...");
      return true;
    }
    Matrix previousEigens = state.getCurrentEigens();
    log.info("Have made {} passes through the corpus, checking convergence...", numPasses);
    /*
     * Step 1: orthogonalize currentPseudoEigen by subtracting off eigen(i) * helper.get(i)
     * Step 2: zero-out the helper vector because it has already helped.
     */
    for (int i = 0; i < state.getNumEigensProcessed(); i++) {
      Vector previousEigen = previousEigens.getRow(i);
      currentPseudoEigen.assign(previousEigen, new PlusMult(-state.getHelperVector().get(i)));
      state.getHelperVector().set(i, 0);
    }
    if (debug && currentPseudoEigen.norm(2) > 0) {
      for (int i = 0; i < state.getNumEigensProcessed(); i++) {
        Vector previousEigen = previousEigens.getRow(i);
        log.info("dot with previous: {}", (previousEigen.dot(currentPseudoEigen)) / currentPseudoEigen.norm(2));
      }
    }
    /*
     * Step 3: verify how eigen-like the prospective eigen is.  This is potentially asynchronous.
View Full Code Here

    SingularVectorVerifier verifier = new AsyncEigenVerifier();
    HebbianSolver solver = new HebbianSolver(updater,
        verifier,
        convergence,
        maxPasses);
    Matrix corpus = null;
    if (numThreads <= 1) {
      //  corpus = new DiskBufferedDoubleMatrix(new File(corpusDir), inBufferSize);
    } else {
      //  corpus = new ParallelMultiplyingDiskBufferedDoubleMatrix(new File(corpusDir), inBufferSize, numThreads);
    }
View Full Code Here

    } else {
      rand = RandomUtils.getRandom(seed);
    }
    // fetch the cumulative distributions
    Vector cip = HmmUtils.getCumulativeInitialProbabilities(model);
    Matrix ctm = HmmUtils.getCumulativeTransitionMatrix(model);
    Matrix com = HmmUtils.getCumulativeOutputMatrix(model);
    // allocate the result IntArrayList
    int[] result = new int[steps];
    // choose the initial state
    int hiddenState = 0;

    double randnr = rand.nextDouble();
    while (cip.get(hiddenState) < randnr) {
      hiddenState++;
    }

    // now draw steps output states according to the cumulative
    // distributions
    for (int step = 0; step < steps; ++step) {
      // choose output state to given hidden state
      randnr = rand.nextDouble();
      int outputState = 0;
      while (com.get(hiddenState, outputState) < randnr) {
        outputState++;
      }
      result[step] = outputState;
      // choose the next hidden state
      randnr = rand.nextDouble();
View Full Code Here

   */
  public static double modelLikelihood(HmmModel model, int[] outputSequence,
                                       Matrix beta, boolean scaled) {
    double likelihood = 0;
    // fetch the emission probabilities
    Matrix e = model.getEmissionMatrix();
    Vector pi = model.getInitialProbabilities();
    int firstOutput = outputSequence[0];
    if (scaled) {
      for (int i = 0; i < model.getNrOfHiddenStates(); ++i) {
        likelihood += pi.getQuick(i) * Math.exp(beta.getQuick(0, i)) * e.getQuick(i, firstOutput);
      }
    } else {
      for (int i = 0; i < model.getNrOfHiddenStates(); ++i) {
        likelihood += pi.getQuick(i) * beta.getQuick(0, i) * e.getQuick(i, firstOutput);
      }
    }
    return likelihood;
  }
View Full Code Here

    HmmModel sparseModel = HmmUtils.truncateModel(model, 0.01);
    // first make sure this is a valid model
    HmmUtils.validate(sparseModel);
    // now check whether the values are as expected
    Vector sparse_ip = sparseModel.getInitialProbabilities();
    Matrix sparse_tr = sparseModel.getTransitionMatrix();
    Matrix sparse_em = sparseModel.getEmissionMatrix();
    for (int i = 0; i < sparseModel.getNrOfHiddenStates(); ++i) {
      if (i == 2) {
        assertEquals(1.0, sparse_ip.getQuick(i), EPSILON);
      } else {
        assertEquals(0.0, sparse_ip.getQuick(i), EPSILON);
      }
      for (int j = 0; j < sparseModel.getNrOfHiddenStates(); ++j) {
        if (i == j) {
          assertEquals(1.0, sparse_tr.getQuick(i, j), EPSILON);
          assertEquals(1.0, sparse_em.getQuick(i, j), EPSILON);
        } else {
          assertEquals(0.0, sparse_tr.getQuick(i, j), EPSILON);
          assertEquals(0.0, sparse_em.getQuick(i, j), EPSILON);
        }
      }
    }
  }
View Full Code Here

    taggingModel = HmmTrainer.trainSupervisedSequence(nextTagId, nextWordId,
        hiddenSequences, observedSequences, 0.05);
    // we have to adjust the model a bit,
    // since we assume a higher probability that a given unknown word is NNP
    // than anything else
    Matrix emissions = taggingModel.getEmissionMatrix();
    for (int i = 0; i < taggingModel.getNrOfHiddenStates(); ++i) {
      emissions.setQuick(i, 0, 0.1 / (double) taggingModel.getNrOfHiddenStates());
    }
    int nnptag = tagIDs.get("NNP");
    emissions.setQuick(nnptag, 0, 1 / (double) taggingModel.getNrOfHiddenStates());
    // re-normalize the emission probabilities
    HmmUtils.normalizeModel(taggingModel);
    // now register the names
    taggingModel.registerHiddenStateNames(tagIDs);
    taggingModel.registerOutputStateNames(wordIDs);
View Full Code Here

    }

    @Override
    public Matrix deserialize(JsonElement x, Type type, JsonDeserializationContext jsonDeserializationContext) {
      JsonObject data = x.getAsJsonObject();
      Matrix r = new DenseMatrix(data.get("rows").getAsInt(), data.get("cols").getAsInt());
      int i = 0;
      for (JsonElement row : data.get("data").getAsJsonArray()) {
        int j = 0;
        for (JsonElement element : row.getAsJsonArray()) {
          r.set(i, j, element.getAsDouble());
          j++;
        }
        i++;
      }
      return r;
View Full Code Here

                 Path outputTmpPath,
                 int numRows,
                 int numCols,
                 boolean isSymmetric,
                 int desiredRank) throws Exception {
    Matrix eigenVectors = new DenseMatrix(desiredRank, numCols);
    List<Double> eigenValues = new ArrayList<Double>();

    DistributedRowMatrix matrix = new DistributedRowMatrix(inputPath, outputTmpPath, numRows, numCols);
    matrix.configure(new JobConf(getConf() != null ? getConf() : new Configuration()));
    solve(matrix, desiredRank, eigenVectors, eigenValues, isSymmetric);
View Full Code Here

    }

    @Override
    public Matrix deserialize(JsonElement x, Type type, JsonDeserializationContext jsonDeserializationContext) {
      JsonObject data = x.getAsJsonObject();
      Matrix r = new DenseMatrix(data.get("rows").getAsInt(), data.get("cols").getAsInt());
      int i = 0;
      for (JsonElement row : data.get("data").getAsJsonArray()) {
        int j = 0;
        for (JsonElement element : row.getAsJsonArray()) {
          r.set(i, j, element.getAsDouble());
          j++;
        }
        i++;
      }
      return r;
View Full Code Here

TOP

Related Classes of org.apache.mahout.math.Matrix

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.