Examples of PTypeFamily


Examples of org.apache.crunch.types.PTypeFamily

    pipeline.done();
  }

  @Test
  public void testWritablesJSON() throws Exception {
    PTypeFamily tf = WritableTypeFamily.getInstance();
    PType<PageRankData> prType = PTypes.jsonString(PageRankData.class, tf);
    String urlInput = tmpDir.copyResourceFileName("urls.txt");
    run(pipeline, urlInput, prType, tf);
    pipeline.done();
  }
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

    run(pipeline, urlInput, prType, tf);
    pipeline.done();
  }

  public static PTable<String, PageRankData> pageRank(PTable<String, PageRankData> input, final float d) {
    PTypeFamily ptf = input.getTypeFamily();
    PTable<String, Float> outbound = input.parallelDo(new DoFn<Pair<String, PageRankData>, Pair<String, Float>>() {
      @Override
      public void process(Pair<String, PageRankData> input, Emitter<Pair<String, Float>> emitter) {
        PageRankData prd = input.second();
        for (String link : prd.urls) {
          emitter.emit(Pair.of(link, prd.propagatedScore()));
        }
      }
    }, ptf.tableOf(ptf.strings(), ptf.floats()));

    return input.cogroup(outbound).mapValues(
        new MapFn<Pair<Collection<PageRankData>, Collection<Float>>, PageRankData>() {
          @Override
          public PageRankData map(Pair<Collection<PageRankData>, Collection<Float>> input) {
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

   */
  public static <K, U, V> PTable<K, Pair<Collection<U>, Collection<V>>> cogroup(
      int numReducers,
      PTable<K, U> left,
      PTable<K, V> right) {
    PTypeFamily tf = left.getTypeFamily();
    return cogroup(
        tf.pairs(tf.collections(left.getValueType()),
                 tf.collections(right.getValueType())),
        TupleFactory.PAIR,
        numReducers,
        left, right);
  }
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

   */
  public static <K, U, V> PTable<K, TupleN> cogroup(
      int numReducers,
      PTable<K, ?> first,
      PTable<K, ?>... rest) {
    PTypeFamily tf = first.getTypeFamily();
    PType[] components = new PType[1 + rest.length];
    components[0] = tf.collections(first.getValueType());
    for (int i = 0; i < rest.length; i++) {
      components[i + 1] = rest[i].getValueType();
    }
    return cogroup(
        tf.tuples(components),
        TupleFactory.TUPLEN,
        numReducers,
        first, rest);
  }
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

  private static <K, T extends Tuple> PTable<K, T> cogroup(
      PType<T> outputType,
      TupleFactory tupleFactory,
      int numReducers,
      PTable<K, ?> first, PTable<K, ?>... rest) {
    PTypeFamily ptf = first.getTypeFamily();
    PType[] ptypes = new PType[1 + rest.length];
    ptypes[0] = first.getValueType();
    for (int i = 0; i < rest.length; i++) {
      ptypes[i + 1] = rest[i].getValueType();
    }
    PType<TupleN> itype = ptf.tuples(ptypes);
   
    PTable<K, TupleN> firstInter = first.mapValues("coGroupTag1",
        new CogroupFn(0, 1 + rest.length),
        itype);
    PTable<K, TupleN>[] inter = new PTable[rest.length];
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

   * @param right right table to be joined
   * @param joinFn The user-specified implementation of the {@code JoinFn} class
   * @return joined tables
   */
  public PTable<K, Pair<U, V>> join(PTable<K, U> left, PTable<K, V> right, JoinFn<K, U, V> joinFn) {
    PTypeFamily ptf = left.getTypeFamily();
    PGroupedTable<Pair<K, Integer>, Pair<U, V>> grouped = preJoin(left, right);
    PTableType<K, Pair<U, V>> ret = ptf
        .tableOf(left.getKeyType(), ptf.pairs(left.getValueType(), right.getValueType()));

    return grouped.parallelDo(joinFn.getJoinType() + grouped.getName(), joinFn, ret);
  }
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

    return grouped.parallelDo(joinFn.getJoinType() + grouped.getName(), joinFn, ret);
  }

  static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin(PTable<K, U> left, PTable<K, V> right) {
    PTypeFamily ptf = left.getTypeFamily();
    PTableType<Pair<K, Integer>, Pair<U, V>> ptt = ptf.tableOf(ptf.pairs(left.getKeyType(), ptf.ints()),
        ptf.pairs(left.getValueType(), right.getValueType()));

    PTable<Pair<K, Integer>, Pair<U, V>> tag1 = left.parallelDo("joinTagLeft",
        new MapFn<Pair<K, U>, Pair<Pair<K, Integer>, Pair<U, V>>>() {
          @Override
          public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, U> input) {
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

    }
  }
 

  private PTable<K, Pair<U,V>> joinInternal(PTable<K, U> left, PTable<K, V> right, boolean includeUnmatchedLeftValues) {
    PTypeFamily tf = left.getTypeFamily();
    Iterable<Pair<K, V>> iterable = right.materialize();

    if (iterable instanceof MaterializableIterable) {
      MaterializableIterable<Pair<K, V>> mi = (MaterializableIterable<Pair<K, V>>) iterable;
      MapsideJoinDoFn<K, U, V> mapJoinDoFn = new MapsideJoinDoFn<K, U, V>(mi.getPath().toString(),
          includeUnmatchedLeftValues, right.getPType());
      ParallelDoOptions.Builder optionsBuilder = ParallelDoOptions.builder();
      if (mi.isSourceTarget()) {
        optionsBuilder.sourceTargets((SourceTarget) mi.getSource());
      }
      return left.parallelDo("mapjoin", mapJoinDoFn,
          tf.tableOf(left.getKeyType(), tf.pairs(left.getValueType(), right.getValueType())),
          optionsBuilder.build());
    } else { // in-memory pipeline
      return left.parallelDo(new InMemoryJoinFn<K, U, V>(iterable, includeUnmatchedLeftValues),
          tf.tableOf(left.getKeyType(), tf.pairs(left.getValueType(), right.getValueType())));
    }
  }
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

    Class<S> clazz = collect.getPType().getTypeClass();
    if (!clazz.isPrimitive() && !Comparable.class.isAssignableFrom(clazz)) {
      throw new IllegalArgumentException("Can only get max for Comparable elements, not for: "
          + collect.getPType().getTypeClass());
    }
    PTypeFamily tf = collect.getTypeFamily();
    PCollection<S> maxCollect = PTables.values(collect
        .parallelDo("max", new DoFn<S, Pair<Boolean, S>>() {
          private transient S max = null;

          public void process(S input, Emitter<Pair<Boolean, S>> emitter) {
            if (max == null || ((Comparable<S>) max).compareTo(input) < 0) {
              max = input;
            }
          }

          public void cleanup(Emitter<Pair<Boolean, S>> emitter) {
            if (max != null) {
              emitter.emit(Pair.of(true, max));
            }
          }
        }, tf.tableOf(tf.booleans(), collect.getPType())).groupByKey(1)
        .combineValues(new CombineFn<Boolean, S>() {
          public void process(Pair<Boolean, Iterable<S>> input, Emitter<Pair<Boolean, S>> emitter) {
            S max = null;
            for (S v : input.second()) {
              if (max == null || ((Comparable<S>) max).compareTo(v) < 0) {
View Full Code Here

Examples of org.apache.crunch.types.PTypeFamily

    Class<S> clazz = collect.getPType().getTypeClass();
    if (!clazz.isPrimitive() && !Comparable.class.isAssignableFrom(clazz)) {
      throw new IllegalArgumentException("Can only get min for Comparable elements, not for: "
          + collect.getPType().getTypeClass());
    }
    PTypeFamily tf = collect.getTypeFamily();
    PCollection<S> minCollect = PTables.values(collect
        .parallelDo("min", new DoFn<S, Pair<Boolean, S>>() {
          private transient S min = null;

          public void process(S input, Emitter<Pair<Boolean, S>> emitter) {
            if (min == null || ((Comparable<S>) min).compareTo(input) > 0) {
              min = input;
            }
          }

          public void cleanup(Emitter<Pair<Boolean, S>> emitter) {
            if (min != null) {
              emitter.emit(Pair.of(false, min));
            }
          }
        }, tf.tableOf(tf.booleans(), collect.getPType())).groupByKey(1)
        .combineValues(new CombineFn<Boolean, S>() {
          public void process(Pair<Boolean, Iterable<S>> input, Emitter<Pair<Boolean, S>> emitter) {
            S min = null;
            for (S v : input.second()) {
              if (min == null || ((Comparable<S>) min).compareTo(v) > 0) {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.