Package org.renjin.stats.internals.models

Source Code of org.renjin.stats.internals.models.Formula

package org.renjin.stats.internals.models;

import com.google.common.base.Functions;
import com.google.common.collect.Collections2;
import com.google.common.collect.Lists;
import org.renjin.primitives.matrix.IntMatrixBuilder;
import org.renjin.sexp.*;

import java.util.List;

/**
* Encapsulates the properties of R model formulae, which consist of three elements:
*
* <ul>
* <li>A response (or dependent) <i>variable</i></li>
* <li>An intercept flag (0=do not fit intercept, 1=fit intercept)</li>
* <li>A list of independent variable <i>terms</i>
* </ul>
*
* <p>Within the context of R model formulas, a <i>variable</i> is any valid R
* expression that can be evaluated within the formula's {@link Environment} (usually a
* data.frame object). It is often a symbol, like simply {@code x} or {@code y}, but a
* <i>variable</i> in this context a <i>variable</i> can also be a function of a Symbol,
* such as {@code log(x)} or any valid R expression, such {@code sqrt(x/y)}.
*
* <p>A <i>term</i> is a combination of one or more <i>variables</i>. This class stores terms in
* their expanded form: the formula described by {@code y ~ a * b} actually has three terms:
* {@code a}, {@code b}, and {@code a:b}, where {@code a:b} is an interaction term. See
* {@link FormulaInterpreter} for more information on the DSL.
*
* <p>This internal class is used by the primitives that manipulate model formulas.
*/
public class Formula {
  private SEXP response;
  private List<Term> terms = Lists.newArrayList();
  private int intercept = 1;
 
  public Formula(SEXP response, int intercept, Iterable<Term> terms) {
    super();
    this.response = response;
    this.terms = Lists.newArrayList(terms);
    this.intercept = intercept;
  }
 
  public Formula(List<Term> terms) {
    this.terms = terms;
  }
 
  public SEXP getResponse() {
    return response;
  }
  public List<Term> getTerms() {
    return terms;
  }
 
  /**
   *
   * @return
   */
  public int getIntercept() {
    return intercept;
  }
 
  /**
   *
   * @return a list of all the unique <i>variables</i> that are referenced in this
   * model formula.
   */
  public List<SEXP> uniqueVariables() {
    List<SEXP> variables = Lists.newArrayList();
   
    if(response != null) {
      variables.add(response);
    }
   
    for(Term term : terms) {
      for(SEXP expr : term) {
        if(!variables.contains(expr)) {
          variables.add(expr);
        }
      }
    }
    return variables;
  }
 
  /**
   *
   * @return An unevaluated {@link FunctionCall} to ‘list’ of the variables in the model.
   * This is provided in unevaluated form so it can later be conveniently evaluated within
   * the formula's environment.
   */
  public FunctionCall buildVariablesAttribute() {
    PairList.Builder args = new PairList.Builder();
    for(SEXP variable : uniqueVariables()) {
      args.add(variable);
    }
    return new FunctionCall(Symbol.get("list"), args.build());
  }


  /**
   * @return A matrix of variables by terms showing which variables appear
   * in which terms.  The entries are 0 if the variable does not
   * occur in the term, 1 if it does occur and should be coded by
   * contrasts, and 2 if it occurs and should be coded via dummy
   * variables for all levels (as when an intercept or lower-order
   * term is missing).  If there are no terms other than an
   * intercept and offsets, this is ‘numeric(0)’.
   */
  public SEXP buildFactorsMatrix() {
    if(terms.size() == 0) {
      return new IntArrayVector();
    } else {
      List<SEXP> variables = uniqueVariables();
      IntMatrixBuilder matrix = new IntMatrixBuilder(variables.size(), terms.size());
      matrix.setRowNames(Collections2.transform(variables, Functions.toStringFunction()));
      matrix.setColNames(buildTermLabels());
      for(int row = 0; row != variables.size(); ++row) {
        for(int col = 0; col != terms.size(); ++col) {
          if(terms.get(col).getExpressions().contains(
              variables.get(row))) {
            // TODO(alex): I don't know when this should be 1 or 2??
            matrix.set(row, col, 1);
          } else {
            matrix.set(row, col, 0);
          }
        }
      }
      return matrix.build();
    }
  }
 
  /**
   * @return A character vector containing the labels for each of the
   * terms in the model, except for offsets.  Non-syntactic names
   * will be quoted by backticks.  Note that these are after
   * possible re-ordering (unless argument ‘keep.order’ was
   * false).
   */
  public StringVector buildTermLabels() {
    StringVector.Builder labels = new StringVector.Builder();
    for(Term term : terms) {
      labels.add(term.getLabel());
    }
    return labels.build();
  }
 
  /**
   * @return Either 0, indicating no intercept is to be fit, or 1
   *                indicating that an intercept is to be fit.
   */
  public IntVector buildInterceptAttribute() {
    return new IntArrayVector(intercept);
  }
 
  /**
   * @return  The index of the variable (in variables) of the response (the
   * left hand side of the formula). Zero, if there is no
   * response.
   */
  public IntVector buildResponseAttribute() {
    return new IntArrayVector(response == null ? 0 : 1 );
  }
 
  @Override
  public int hashCode() {
    final int prime = 31;
    int result = 1;
    result = prime * result + intercept;
    result = prime * result + ((response == null) ? 0 : response.hashCode());
    result = prime * result + ((terms == null) ? 0 : terms.hashCode());
    return result;
  }

  @Override
  public boolean equals(Object obj) {
    if (this == obj)
      return true;
    if (obj == null)
      return false;
    if (getClass() != obj.getClass())
      return false;
    Formula other = (Formula) obj;
    if (intercept != other.intercept) {
      return false;
    }
    if (response != other.response) {
        return false;
    }
    return terms.equals(other.terms);
  }

  @Override
  public String toString() {
    return response + " ~ " + intercept + " + " + terms.toString();
  }
}
TOP

Related Classes of org.renjin.stats.internals.models.Formula

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.