Examples of UnorderedPartitionedKVEdgeConfig


Examples of org.apache.tez.runtime.library.conf.UnorderedPartitionedKVEdgeConfig

     * the same fragments using hash partitioning. The data to be joined is the key itself
     * and so the value is null. The number of fragments is initially inferred from the
     * number of tasks running in the join vertex because each task will be handling one
     * fragment.
     */
    UnorderedPartitionedKVEdgeConfig streamConf =
        UnorderedPartitionedKVEdgeConfig
            .newBuilder(Text.class.getName(), NullWritable.class.getName(),
                HashPartitioner.class.getName()).build();

    /**
     * Connect the join vertex with the stream side
     */
    Edge e1 = Edge.create(streamFileVertex, joinVertex, streamConf.createDefaultEdgeProperty());
   
    EdgeProperty hashSideEdgeProperty = null;
    if (doBroadcast) {
      /**
       * This option can be used when the hash side is small. We can broadcast the entire data to
       * all fragments of the stream side. This avoids re-partitioning the fragments of the stream
       * side to match the partitioning scheme of the hash side and avoids costly network data
       * transfer. However, in this example the stream side is being partitioned in both cases for
       * brevity of code. The join task can perform the join of its fragment of keys with all the
       * keys of the hash side.
       * Using an unpartitioned edge to transfer the complete output of the hash side to be
       * broadcasted to all fragments of the streamed side. Again, since the data is the key, the
       * value is null.
       */
      UnorderedKVEdgeConfig broadcastConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(),
          NullWritable.class.getName()).build();
      hashSideEdgeProperty = broadcastConf.createDefaultBroadcastEdgeProperty();
    } else {
      /**
       * The hash side is also being partitioned into fragments with the same key going to the same
       * fragment using hash partitioning. This way all keys with the same hash value will go to the
       * same fragment from both sides. Thus the join task handling that fragment can join both data
       * set fragments.
       */
      hashSideEdgeProperty = streamConf.createDefaultEdgeProperty();
    }

    /**
     * Connect the join vertex to the hash side.
     * The join vertex is connected with 2 upstream vertices that provide it with inputs
View Full Code Here

Examples of org.apache.tez.runtime.library.conf.UnorderedPartitionedKVEdgeConfig

          .build();
      return et1Conf.createDefaultBroadcastEdgeProperty();
    case CUSTOM_EDGE:
      assert partitionerClassName != null;
      partitionerConf = createPartitionerConf(partitionerClassName, conf);
      UnorderedPartitionedKVEdgeConfig et2Conf = UnorderedPartitionedKVEdgeConfig
          .newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf)
          .setFromConfiguration(conf)
          .setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null)
          .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null)
          .build();
      EdgeManagerPluginDescriptor edgeDesc =
          EdgeManagerPluginDescriptor.create(CustomPartitionEdge.class.getName());
      CustomEdgeConfiguration edgeConf =
          new CustomEdgeConfiguration(edgeProp.getNumBuckets(), null);
      DataOutputBuffer dob = new DataOutputBuffer();
      edgeConf.write(dob);
      byte[] userPayload = dob.getData();
      edgeDesc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
      return et2Conf.createDefaultCustomEdgeProperty(edgeDesc);
    case CUSTOM_SIMPLE_EDGE:
      assert partitionerClassName != null;
      partitionerConf = createPartitionerConf(partitionerClassName, conf);
      UnorderedPartitionedKVEdgeConfig et3Conf = UnorderedPartitionedKVEdgeConfig
          .newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf)
          .setFromConfiguration(conf)
          .setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null)
          .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null)
          .build();
      return et3Conf.createDefaultEdgeProperty();
    case SIMPLE_EDGE:
    default:
      assert partitionerClassName != null;
      partitionerConf = createPartitionerConf(partitionerClassName, conf);
      OrderedPartitionedKVEdgeConfig et4Conf = OrderedPartitionedKVEdgeConfig
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.