Package org.renjin.primitives.match

Source Code of org.renjin.primitives.match.Match$FactorString

/*
* R : A Computer Language for Statistical Data Analysis
* Copyright (C) 1995, 1996  Robert Gentleman and Ross Ihaka
* Copyright (C) 1997--2008  The R Development Core Team
* Copyright (C) 2003, 2004  The R Foundation
* Copyright (C) 2010 bedatadriven
*

import com.google.common.collect.UnmodifiableIterator;
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

package org.renjin.primitives.match;

import org.renjin.eval.Calls;
import org.renjin.eval.Context;
import org.renjin.eval.EvalException;
import org.renjin.invoke.annotations.Current;
import org.renjin.invoke.annotations.Internal;
import org.renjin.sexp.*;
import org.renjin.util.NamesBuilder;


/**
* Default implementations of match() related functions.
*/
public class Match {

  private static final int UNMATCHED = -1;
  private static final int MULTIPLE_MATCH = -2;

  private Match() { }

  /**
   * match returns a vector of the positions of (first) matches of its first argument in its second.
   * @param search vector or NULL: the values to be matched.
   * @param table vector or NULL: the values to be matched against.
   * @param noMatch the value to be returned in the case when no match is found. Note that it is coerced to integer.
   * @param incomparables a vector of values that cannot be matched. Any value in x matching a value in this vector is assigned the nomatch value.
   *        For historical reasons, FALSE is equivalent to NULL.
   * @return
   */
  @Internal
  public static int[] match(Vector search, Vector table, int noMatch, AtomicVector incomparables) {
    //For historical reasons, FALSE is equivalent to NULL.
    if(incomparables.equals( LogicalVector.FALSE ) ) {
      incomparables = Null.INSTANCE;
    }

    // We need to handle factors specially here -
    // treat them as strings if one of the other arguments
    // is a string
    if(search instanceof StringVector || table instanceof StringVector) {
      if(search.inherits("factor")) {
        search = new FactorString(search);
      }
      if(table.inherits("factor")) {
        table = new FactorString(table);
      }
    }

    int[] matches = new int[search.length()];
    for(int i=0;i!=search.length();++i) {
      if( incomparables.contains(search, i)) {
        matches[i] = noMatch;
      } else {
        int pos;
        if(search.isElementNA(i)) {
          pos = indexOfNA(table);
        } else {
          pos = table.indexOf(search, i, 0);
        }
        matches[i] = pos >= 0 ? pos+1 : noMatch;
      }
    }
    return matches;
  }
 

  private static int indexOfNA(Vector table) {
    for(int i=0;i!=table.length();++i) {
      if(table.isElementNA(i)) {
        return i;
      }
    }
    return -1;
  }
 

  /**
   * pmatch seeks matches for the elements of its first argument among those of its second.
   *
   * The behaviour differs by the value of duplicates.ok. Consider first the case
   * if this is true. First exact matches are considered, and the positions of the
   * first exact matches are recorded. Then unique partial matches are considered,
   * and if found recorded. (A partial match occurs if the whole of the element of x
   * matches the beginning of the element of table.) Finally, all remaining elements of
   * x are regarded as unmatched. In addition, an empty string can match nothing, not even an
   * exact match to an empty string. This is the appropriate behaviour for partial matching
   * of character indices, for example.
   *
   * <p>If duplicates.ok is FALSE, values of table once matched are excluded from the
   * search for subsequent matches. This behaviour is equivalent to the R algorithm
   * for argument matching, except for the consideration of empty strings (which in
   * argument matching are matched after exact and partial matching to any remaining arguments).
   *
   * @param x the values to be matched
   * @param table the values to be matched against: converted to a character vector.
   * @param noMatch the value to be returned at non-matching or multiply partially matching positions.
   * @param duplicatesOk can elements be in table be matched to multiple elements in x?
   * @return An integer vector (possibly including NA if nomatch = NA) of the same length as x,
   * giving the indices of the elements in table which matched, or {@code nomatch}.
   */
  @Internal
  public static IntVector pmatch(StringVector x, StringVector table, int noMatch, boolean duplicatesOk) {
    return commonStringMatch(x, table, noMatch, noMatch, duplicatesOk);
  }
 
  @Internal
  public static IntVector charmatch(StringVector x, StringVector table, int noMatch) {
    // I don't really understand the difference between charmatch and pmatch:
    // it seems that for pmatch, if the search string partially matches more than one
    // element in table, then it's returns the no match value (default = NA)
    //
    // charmatch() on the ohter hand seems to return 0 for the special case.
    return commonStringMatch(x, table, noMatch, 0, true );
  }
 

  /**
   * Common implementation for pmatch and charmatch
   * @param x
   * @param table
   * @param unmatchedCode the value to return when there is no match
   * @param duplicatePartialsCode the value to return when a value in x partially matches multiple values in table
   * @return
   */
  private static IntVector commonStringMatch(StringVector x, StringVector table,
      int unmatchedCode, int duplicatePartialsCode, boolean duplicatesOk) {
    IntArrayVector.Builder result = new IntArrayVector.Builder(x.length());
    boolean matchedTable[] = new boolean[table.length()];
    boolean matchedSearch[] = new boolean[x.length()];

    // first pass : exact matches
    for(int i=0;i!= x.length();++i) {
      String toMatch = pmatchElementAt(x, i);
      int match = exactMatch(toMatch, table);
      if(match != UNMATCHED && (duplicatesOk || !matchedTable[match])) {
        result.set(i, match+1);
        matchedTable[match] = true;
        matchedSearch[i] = true;
      }
    }

    // second pass : partial matches
    for(int i=0;i!= x.length();++i) {
      if(!matchedSearch[i]) {
        String toMatch = pmatchElementAt(x, i);
        int match = uniquePartialMatch(toMatch, table);
        if(match == UNMATCHED) {
          result.set(i, unmatchedCode);
        } else if(match == MULTIPLE_MATCH) {
          result.set(i, duplicatePartialsCode);
        } else if(duplicatesOk || !matchedTable[match]) {
          result.set(i, match+1);
          matchedTable[match] = true;
        }
      }
    }
    return result.build();
  }

  private static int exactMatch(String toMatch, StringVector table) {
    for(int i=0;i!=table.length();++i) {
         String t = pmatchElementAt(table, i);
         if(toMatch.equals(t)) {
           return i;
         }
    }
    return -1;
  }

  /**
   * Attempts to match a string value within in a table of values
   * @param toMatch
   * @param table
   * @return the index of the unique martial match, UNMATCHED if there were no partial matches,
   * and MULTIPLE_MATCH if there were multiple partial matches
   */
  private static int uniquePartialMatch(String toMatch, StringVector table) {
    int partialMatch = UNMATCHED;
    for(int i=0;i!=table.length();++i) {
      String t = pmatchElementAt(table, i);
      if(t.startsWith(toMatch)) {
        // if we've previously found a partial match, abort
        if(partialMatch != UNMATCHED) {
          return MULTIPLE_MATCH;
        }
        partialMatch = i;
      }
    }
    return partialMatch;
  }

  // NA values are treated as if they were the string constant "NA".
  private static String pmatchElementAt(StringVector vector, int i) {
    return vector.isElementNA(i) ? "NA" : vector.getElementAsString(i);
 
 
  @Internal("match.call")
  public static SEXP matchCall (@Current Context context, @Current Environment rho, SEXP definition, FunctionCall call, boolean expandDots){
   
    Closure closure;
    if(definition instanceof Closure) {
      closure = (Closure)definition;
    } else if(definition == Null.INSTANCE) {
      if(context.getParent().getType() != Context.Type.FUNCTION) {
        throw new EvalException("match.call() was called from outside a function");
      }
      closure = context.getParent().getClosure();
    } else {
      throw new EvalException("match.call cannot use definition of type '%s'", definition.getTypeName());
    }
   
    PairList matched = Calls.matchArguments(closure.getFormals(), call.getArguments(), true);
   
    PairList.Builder expandedArgs = new PairList.Builder();
    for(PairList.Node node : matched.nodes()) {
      if(node.getValue() != Symbol.MISSING_ARG) {
        if(expandDots && node.getTag() == Symbols.ELLIPSES) {
          for(PairList.Node elipseNode : ((PairList)node.getValue()).nodes()) {
            expandedArgs.add(elipseNode.getRawTag(), elipseNode.getValue());
          }
        } else {
          expandedArgs.add(node.getTag(), node.getValue());
        }
      }
    }
   
    return new FunctionCall(call.getFunction(), expandedArgs.build());
  }
 
  /**
   * Returns an IntVector indices of elements that are {@code TRUE}.
   *
   * <p>Note that the which() function in the base package handles
   * array indices and names, this internal function simply returns
   * the indices
   */
  @Internal
  public static IntVector which(Vector x) {
    IntArrayVector.Builder indices = new IntArrayVector.Builder();
    Vector xn = x.getNames();

    NamesBuilder names = NamesBuilder.withInitialLength(0);
    for(int i=0;i!=x.length();++i) {
      if(x.isElementTrue(i)) {
        indices.add(i+1);
        if(xn != Null.INSTANCE) {
          names.add(xn.getElementAsString(i));
        }
      }
    }
    return indices
      .setAttribute(Symbols.NAMES, names.build())
      .build();
  }

  private static class FactorString extends StringVector {

    private final Vector factor;
    private final Vector levels;

    private FactorString(Vector factor) {
      super(AttributeMap.EMPTY);
      this.factor = factor;
      this.levels = (Vector) factor.getAttribute(Symbols.LEVELS);
    }

    @Override
    public int length() {
      return factor.length();
    }

    @Override
    protected StringVector cloneWithNewAttributes(AttributeMap attributes) {
      throw new UnsupportedOperationException();
    }

    @Override
    public String getElementAsString(int index) {
      if(factor.isElementNA(index)) {
        return StringVector.NA;
      } else {
        int level = factor.getElementAsInt(index);
        return levels.getElementAsString(level-1);
      }
    }

    @Override
    public boolean isConstantAccessTime() {
      return true;
    }
  }
}

TOP

Related Classes of org.renjin.primitives.match.Match$FactorString

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.