Package org.apache.mahout.vectorizer.encoders

Source Code of org.apache.mahout.vectorizer.encoders.WordLikeValueEncoderTest

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.vectorizer.encoders;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Vector;
import org.junit.Test;

import java.util.Iterator;
import java.util.Locale;

public final class WordLikeValueEncoderTest extends MahoutTestCase {

  @Test
  public void testAddToVector() {
    FeatureVectorEncoder enc = new StaticWordValueEncoder("word");
    Vector v = new DenseVector(200);
    enc.addToVector("word1", v);
    enc.addToVector("word2", v);
    Iterator<Vector.Element> i = v.nonZeroes().iterator();
    Iterator<Integer> j = ImmutableList.of(7, 118, 119, 199).iterator();
    while (i.hasNext()) {
      Vector.Element element = i.next();
      assertEquals(j.next().intValue(), element.index());
      assertEquals(1, element.get(), 0);
    }
    assertFalse(j.hasNext());
  }

  @Test
  public void testAsString() {
    Locale.setDefault(Locale.ENGLISH);
    FeatureVectorEncoder enc = new StaticWordValueEncoder("word");
    assertEquals("word:w1:1.0000", enc.asString("w1"));
  }

  @Test
  public void testStaticWeights() {
    StaticWordValueEncoder enc = new StaticWordValueEncoder("word");
    enc.setDictionary(ImmutableMap.<String, Double>of("word1", 3.0, "word2", 1.5));
    Vector v = new DenseVector(200);
    enc.addToVector("word1", v);
    enc.addToVector("word2", v);
    enc.addToVector("word3", v);
    Iterator<Vector.Element> i = v.nonZeroes().iterator();
    Iterator<Integer> j = ImmutableList.of(7, 101, 118, 119, 152, 199).iterator();
    Iterator<Double> k = ImmutableList.of(3.0, 0.75, 1.5, 1.5, 0.75, 3.0).iterator();
    while (i.hasNext()) {
      Vector.Element element = i.next();
      assertEquals(j.next().intValue(), element.index());
    }
    i = v.nonZeroes().iterator();
    while (i.hasNext()) {
      Vector.Element element = i.next();
      assertEquals(String.format("checking v[%d]", element.index()), k.next(), element.get(), 0);
    }
    assertFalse(j.hasNext());
  }

  @Test
  public void testDynamicWeights() {
    FeatureVectorEncoder enc = new AdaptiveWordValueEncoder("word");
    Vector v = new DenseVector(200);
    enc.addToVector("word1", v)// weight is log(2/1.5)
    enc.addToVector("word2", v)// weight is log(3.5 / 1.5)
    enc.addToVector("word1", v)// weight is log(4.5 / 2.5) (but overlays on first value)
    enc.addToVector("word3", v)// weight is log(6 / 1.5)
    Iterator<Vector.Element> i = v.nonZeroes().iterator();
    Iterator<Integer> j = ImmutableList.of(7, 101, 118, 119, 152, 199).iterator();
    Iterator<Double> k = ImmutableList.of(Math.log(2 / 1.5) + Math.log(4.5 / 2.5),
                                          Math.log(6 / 1.5), Math.log(3.5 / 1.5),
                                          Math.log(3.5 / 1.5), Math.log(6 / 1.5),
                                          Math.log(2 / 1.5) + Math.log(4.5 / 2.5)).iterator();
    while (i.hasNext()) {
      Vector.Element element = i.next();
      assertEquals(j.next().intValue(), element.index());
      assertEquals(k.next(), element.get(), 1.0e-6);
    }
    assertFalse(j.hasNext());
  }
}
TOP

Related Classes of org.apache.mahout.vectorizer.encoders.WordLikeValueEncoderTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.