Examples of PTypeFamily


Examples of org.apache.crunch.types.PTypeFamily

        }));
    return new FirstElementPObject<S>(minCollect);
  }

  public static <K, V> PTable<K, Collection<V>> collectValues(PTable<K, V> collect) {
    PTypeFamily tf = collect.getTypeFamily();
    final PType<V> valueType = collect.getValueType();
    return collect.groupByKey().mapValues("collect",
        new MapFn<Iterable<V>, Collection<V>>() {
          @Override
          public void initialize() {
            valueType.initialize(getConfiguration());
          }

          public Collection<V> map(Iterable<V> values) {
            List<V> collected = Lists.newArrayList();
            for (V value : values) {
              collected.add(valueType.getDetachedValue(value));
            }
            return collected;
          }
        }, tf.collections(collect.getValueType()));
  }
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

          }
        }, tf.collections(collect.getValueType()));
  }
 
  public static <S> PCollection<S> aggregate(PCollection<S> collect, Aggregator<S> aggregator) {
    PTypeFamily tf = collect.getTypeFamily();
    return collect.parallelDo("Aggregate.aggregator", new MapFn<S, Pair<Boolean, S>>() {
      public Pair<Boolean, S> map(S input) {
        return Pair.of(false, input);
      }
    }, tf.tableOf(tf.booleans(), collect.getPType()))
    .groupByKey(1)
    .combineValues(aggregator)
    .values();
  }
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

   * @param keyType The {@code PType} for the key of the SequenceFile entry
   * @param valueType The {@code PType} for the value of the SequenceFile entry
   * @return A new {@code SourceTable<K, V>} instance
   */
  public static <K, V> TableSource<K, V> sequenceFile(Path path, PType<K> keyType, PType<V> valueType) {
    PTypeFamily ptf = keyType.getFamily();
    return new SeqFileTableSource<K, V>(path, ptf.tableOf(keyType, valueType));
  }
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

   * @param keyType The {@code PType} for the key of the SequenceFile entry
   * @param valueType The {@code PType} for the value of the SequenceFile entry
   * @return A new {@code SourceTable<K, V>} instance
   */
  public static <K, V> TableSource<K, V> sequenceFile(List<Path> paths, PType<K> keyType, PType<V> valueType) {
    PTypeFamily ptf = keyType.getFamily();
    return new SeqFileTableSource<K, V>(paths, ptf.tableOf(keyType, valueType));
  }
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

   * @param keyType The {@code PType} for the key of the SequenceFile entry
   * @param valueType The {@code PType} for the value of the SequenceFile entry
   * @return A new {@code TableSourceTarget<K, V>} instance
   */
  public static <K, V> TableSourceTarget<K, V> sequenceFile(Path path, PType<K> keyType, PType<V> valueType) {
    PTypeFamily ptf = keyType.getFamily();
    return new SeqFileTableSourceTarget<K, V>(path, ptf.tableOf(keyType, valueType));
  }
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

  /**
   * Returns a {@code PTable} that contains the unique elements of this collection mapped to a count
   * of their occurrences.
   */
  public static <S> PTable<S, Long> count(PCollection<S> collect, int numPartitions) {
    PTypeFamily tf = collect.getTypeFamily();
    return collect.parallelDo("Aggregate.count", new MapFn<S, Pair<S, Long>>() {
      public Pair<S, Long> map(S input) {
        return Pair.of(input, 1L);
      }
    }, tf.tableOf(collect.getPType(), tf.longs()))
        .groupByKey(numPartitions)
        .combineValues(Aggregators.SUM_LONGS());
  }
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

   * @param collect The PCollection whose elements should be counted.
   * @param <S> The type of the PCollection.
   * @return A {@code PObject} containing the number of elements in the {@code PCollection}.
   */
  public static <S> PObject<Long> length(PCollection<S> collect) {
    PTypeFamily tf = collect.getTypeFamily();
    PTable<Integer, Long> countTable = collect
        .parallelDo("Aggregate.count", new MapFn<S, Pair<Integer, Long>>() {
          public Pair<Integer, Long> map(S input) {
            return Pair.of(1, 1L);
          }
        }, tf.tableOf(tf.ints(), tf.longs()))
        .groupByKey(GroupingOptions.builder().numReducers(1).build())
        .combineValues(Aggregators.SUM_LONGS());
    PCollection<Long> count = countTable.values();
    return new FirstElementPObject<Long>(count);
  }
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

   * @param maximize if true, the maximum N values from the table will be selected, otherwise the minimal
   *                 N values will be selected
   * @return table containing the top N values from the incoming table
   */
  public static <K, V> PTable<K, V> top(PTable<K, V> ptable, int limit, boolean maximize) {
    PTypeFamily ptf = ptable.getTypeFamily();
    PTableType<K, V> base = ptable.getPTableType();
    PType<Pair<K, V>> pairType = ptf.pairs(base.getKeyType(), base.getValueType());
    PTableType<Integer, Pair<K, V>> inter = ptf.tableOf(ptf.ints(), pairType);
    return ptable.parallelDo("top" + limit + "map", new TopKFn<K, V>(limit, maximize, pairType), inter)
        .groupByKey(1).combineValues(new TopKCombineFn<K, V>(limit, maximize, pairType))
        .parallelDo("top" + limit + "reduce", new DoFn<Pair<Integer, Pair<K, V>>, Pair<K, V>>() {
          public void process(Pair<Integer, Pair<K, V>> input, Emitter<Pair<K, V>> emitter) {
            emitter.emit(input.second());
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

   * the order specified using the given number of reducers.
   *
   * @return a {@code PCollection} representing the sorted collection.
   */
  public static <T> PCollection<T> sort(PCollection<T> collection, int numReducers, Order order) {
    PTypeFamily tf = collection.getTypeFamily();
    PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls());
    Configuration conf = collection.getPipeline().getConfiguration();
    PTable<T, Void> pt = collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() {
      @Override
      public void process(T input, Emitter<Pair<T, Void>> emitter) {
        emitter.emit(Pair.of(input, (Void) null));
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

  // TODO: move to type family?
  private static <K, V> GroupingOptions buildGroupingOptions(PTable<K, V> ptable, Configuration conf,
      int numReducers, Order order) {
    PType<K> ptype = ptable.getKeyType();
    PTypeFamily tf = ptable.getTypeFamily();
    Builder builder = GroupingOptions.builder();
    if (order == Order.DESCENDING) {
      if (tf == WritableTypeFamily.getInstance()) {
        builder.sortComparatorClass(ReverseWritableComparator.class);
      } else if (tf == AvroTypeFamily.getInstance()) {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.