Package com.tdunning.plume

Source Code of com.tdunning.plume.WordCountTest

package com.tdunning.plume;

import static com.tdunning.plume.Plume.collectionOf;
import static com.tdunning.plume.Plume.integers;
import static com.tdunning.plume.Plume.strings;
import static com.tdunning.plume.Plume.tableOf;
import static org.junit.Assert.assertEquals;

import java.io.IOException;
import java.util.Map;

import org.junit.Test;

import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
import com.google.common.collect.Maps;
import com.google.common.io.Resources;
import com.tdunning.plume.local.eager.LocalPlume;
import com.tdunning.plume.local.lazy.LazyPlume;

public class WordCountTest {
  @Test
  public void wordCount() throws IOException {
    Plume p = new LocalPlume();
    countWords(p.readResourceFile("simple-text.txt"));
  }

  @Test
  public void lazyWordCount() throws IOException {
/*
* I comment out this test temporarily because to lazily execute a wordcount there is a bit more to do -
* (an Executor has to be called before reading the result)
*/
//    Plume p = new LazyPlume();
//    countWords(p.readResourceFile("simple-text.txt"));
  }

  @Test
  public void lazyAvroWordCount() throws IOException {
    Plume p = new LazyPlume();
    String file = Resources.getResource("simple-text.avro").getPath();
    countWords(p.readAvroFile(file, strings()));
  }

  @Test
  public void wordCountAvro() throws IOException {
    Plume p = new LocalPlume();
    String file = Resources.getResource("simple-text.avro").getPath();
    countWords(p.readAvroFile(file, strings()));
  }

  private <T extends CharSequence> void countWords(PCollection<T> lines) {
    final Splitter onNonWordChar = Splitter.on(CharMatcher.BREAKING_WHITESPACE);

    PCollection<String> words = lines.map(new DoFn<T, String>() {
      @Override
      public void process(T x, EmitFn<String> emitter) {
        for (String word : onNonWordChar.split(x.toString())) {
          emitter.emit(word);
        }
      }
    }, collectionOf(strings()));

    PTable<String, Integer> wc = words
            .map(new DoFn<String, Pair<String, Integer>>() {
              @Override
              public void process(String x, EmitFn<Pair<String, Integer>> emitter) {
                emitter.emit(Pair.create(x, 1));
              }
            }, tableOf(strings(), integers()))
            .groupByKey()
            .combine(new CombinerFn<Integer>() {
              @Override
              public Integer combine(Iterable<Integer> counts) {
                int sum = 0;
                for (Integer k : counts) {
                  sum += k;
                }
                return sum;
              }
            });

    Map<String, Integer> m = Maps.newHashMap();
    for (Pair<String, Integer> pair : wc) {
      m.put(pair.getKey(), pair.getValue());
    }
    assertEquals(3, m.get("is").intValue());
    assertEquals(3, m.get("some").intValue());
    assertEquals(3, m.get("simple").intValue());
    assertEquals(1, m.get("examples").intValue());
    assertEquals(2, m.get("text").intValue());
  }
}
TOP

Related Classes of com.tdunning.plume.WordCountTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.