Examples of DAG

hidden.org.codehaus.plexus.util.dag.DAG
org.apache.tez.dag.api.DAG
Top level entity that defines the DAG (Directed Acyclic Graph) representing the data flow graph. Consists of a set of Vertices and Edges connecting the vertices. Vertices represent transformations of data and edges represent movement of data between vertices.
org.apache.tez.dag.app.dag.DAG
Main interface to interact with the job.
org.codehaus.plexus.util.dag.DAG
DAG = Directed Acyclic Graph @author Michal Maczka @version $Id$ @todo this class should be reanmed from DAG to Dag
org.eclipse.ui.internal.texteditor.rulers.DAG
kipedia.org/wiki/Directed_acyclic_graph @since 3.3

Examples of org.apache.tez.dag.api.DAG

    int taskCount = TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS_DEFAULT;
    if (conf != null) {
      taskCount = conf.getInt(TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS, TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS_DEFAULT);
      payload = TezUtils.createUserPayloadFromConf(conf);
    }
    DAG dag = DAG.create(name);
    Vertex v1 = Vertex.create("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
    Vertex v2 = Vertex.create("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
    Vertex v3 = Vertex.create("v3", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
    dag.addVertex(v1).addVertex(v2).addVertex(v3);
    dag.addEdge(Edge.create(v1, v2,
        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            TestOutput.getOutputDesc(payload),
            TestInput.getInputDesc(payload))));
    dag.addEdge(Edge.create(v1, v3,
        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            TestOutput.getOutputDesc(payload),
            TestInput.getInputDesc(payload))));

View Full Code Here

Examples of org.apache.tez.dag.api.DAG

    }


    Path lhsPath = new Path(lhsDir);
    Path rhsPath = new Path(rhsDir);


    DAG dag = createDag(tezConf, lhsPath, rhsPath, numPartitions);


    tezClient.waitTillReady();
    DAGClient dagClient = tezClient.submitDAG(dag);
    DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(null);
    if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {

View Full Code Here

Examples of org.apache.tez.dag.api.DAG

    }
  }


  private DAG createDag(TezConfiguration tezConf, Path lhs, Path rhs, int numPartitions)
      throws IOException {
    DAG dag = DAG.create("JoinValidate");


    // Configuration for intermediate output - shared by Vertex1 and Vertex2
    // This should only be setting selective keys from the underlying conf. Fix after there's a
    // better mechanism to configure the IOs.
    OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
        .newBuilder(Text.class.getName(), NullWritable.class.getName(),
            HashPartitioner.class.getName()).build();


    Vertex lhsVertex = Vertex.create(LHS_INPUT_NAME, ProcessorDescriptor.create(
        ForwardingProcessor.class.getName())).addDataSource("lhs",
        MRInput
            .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
                lhs.toUri().toString()).groupSplits(false).build());


    Vertex rhsVertex = Vertex.create(RHS_INPUT_NAME, ProcessorDescriptor.create(
        ForwardingProcessor.class.getName())).addDataSource("rhs",
        MRInput
            .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
                rhs.toUri().toString()).groupSplits(false).build());


    Vertex joinValidateVertex = Vertex.create("joinvalidate", ProcessorDescriptor.create(
        JoinValidateProcessor.class.getName()), numPartitions);


    Edge e1 = Edge.create(lhsVertex, joinValidateVertex, edgeConf.createDefaultEdgeProperty());
    Edge e2 = Edge.create(rhsVertex, joinValidateVertex, edgeConf.createDefaultEdgeProperty());


    dag.addVertex(lhsVertex).addVertex(rhsVertex).addVertex(joinValidateVertex).addEdge(e1)
        .addEdge(e2);
    return dag;
  }

View Full Code Here

Examples of org.apache.tez.dag.api.DAG

    }


    // the remaining code is the same as submitting any DAG.
    try {
      for (int i=0; i<inputPaths.length; ++i) {
        DAG dag = OrderedWordCount.createDAG(tezConf, inputPaths[i], outputPaths[i], numPartitions,
            ("DAG-Iteration-" + i)); // the names of the DAGs must be unique in a session


        tezClient.waitTillReady();
        System.out.println("Running dag number " + i);
        DAGClient dagClient = tezClient.submitDAG(dag);

View Full Code Here

Examples of org.apache.tez.dag.api.DAG


    // No need to add jar containing this class as assumed to be part of the Tez jars. Otherwise 
    // we would have to add the jars for this code as local files to the vertices.
    
    // Create DAG and add the vertices. Connect the producer and consumer vertices via the edge
    DAG dag = DAG.create("WordCount");
    dag.addVertex(tokenizerVertex)
        .addVertex(summationVertex)
        .addEdge(
            Edge.create(tokenizerVertex, summationVertex, edgeConf.createDefaultEdgeProperty()));
    return dag;  
  }

View Full Code Here

Examples of org.apache.tez.dag.api.DAG

    TezClient tezClient = TezClient.create("WordCount", tezConf);
    // TezClient must be started before it can be used
    tezClient.start();


    try {
        DAG dag = createDAG(tezConf, inputPath, outputPath, numPartitions);


        // check that the execution environment is ready
        tezClient.waitTillReady();
        // submit the dag and receive a dag client to monitor the progress
        DAGClient dagClient = tezClient.submitDAG(dag);

View Full Code Here

Examples of org.apache.tez.dag.api.DAG

    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    TezClient tezClient = TezClient.create("TestVertexOrder", tezConf);
    tezClient.start();


    try {
    DAG dag = SimpleTestDAG.createDAGForVertexOrder("dag1", conf);
    DAGClient dagClient = tezClient.submitDAG(dag);
    DAGStatus dagStatus = dagClient.getDAGStatus(null);
    while (!dagStatus.isCompleted()) {
      LOG.info("Waiting for dag to complete. Sleeping for 500ms."
          + " DAG name: " + dag.getName()
          + " DAG context: " + dagClient.getExecutionContext()
          + " Current state: " + dagStatus.getState());
      Thread.sleep(100);
      dagStatus = dagClient.getDAGStatus(null);
    }

View Full Code Here

Examples of org.apache.tez.dag.api.DAG

    if (numPartitions <= 0) {
      System.err.println("NumPartitions must be > 0");
      return 4;
    }


    DAG dag = createDag(tezConf, streamInputPath, hashInputPath, outputPath, numPartitions, doBroadcast);


    tezClient.waitTillReady();
    DAGClient dagClient = tezClient.submitDAG(dag);
    DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(null);
    if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {

View Full Code Here

Examples of org.apache.tez.dag.api.DAG


  }


  private DAG createDag(TezConfiguration tezConf, Path streamPath, Path hashPath, Path outPath,
      int numPartitions, boolean doBroadcast) throws IOException {
    DAG dag = DAG.create("JoinExample" + (doBroadcast ? "-WithBroadcast" : ""));


    /**
     * This vertex represents the side of the join that will be accumulated in a hash 
     * table in order to join it against the other side. It reads text data using the
     * TextInputFormat. ForwardingProcessor simply forwards the data downstream as is.
     */
    Vertex hashFileVertex = Vertex.create(hashSide, ProcessorDescriptor.create(
        ForwardingProcessor.class.getName())).addDataSource(
        inputFile,
        MRInput
            .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
                hashPath.toUri().toString()).groupSplits(false).build());


    /**
     * This vertex represents that side of the data that will be streamed and joined 
     * against the other side that has been accumulated into a hash table. It reads 
     * text data using the TextInputFormat. ForwardingProcessor simply forwards the data 
     * downstream as is.
     */
    Vertex streamFileVertex = Vertex.create(streamingSide, ProcessorDescriptor.create(
        ForwardingProcessor.class.getName())).addDataSource(
        inputFile,
        MRInput
            .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
                streamPath.toUri().toString()).groupSplits(false).build());


    /**
     * This vertex represents the join operation. It writes the join output as text using
     * the TextOutputFormat. The JoinProcessor is going to perform the join of the 
     * streaming side and the hash side. It is load balanced across numPartitions 
     */
    Vertex joinVertex = Vertex.create(joiner, ProcessorDescriptor.create(
        JoinProcessor.class.getName()), numPartitions).addDataSink(joinOutput,
        MROutput.createConfigBuilder(new Configuration(tezConf),
            TextOutputFormat.class, outPath.toUri().toString()).build());


    /**
     * The streamed side will be partitioned into fragments with the same keys going to 
     * the same fragments using hash partitioning. The data to be joined is the key itself
     * and so the value is null. The number of fragments is initially inferred from the 
     * number of tasks running in the join vertex because each task will be handling one
     * fragment.
     */
    UnorderedPartitionedKVEdgeConfig streamConf =
        UnorderedPartitionedKVEdgeConfig
            .newBuilder(Text.class.getName(), NullWritable.class.getName(),
                HashPartitioner.class.getName()).build();


    /**
     * Connect the join vertex with the stream side
     */
    Edge e1 = Edge.create(streamFileVertex, joinVertex, streamConf.createDefaultEdgeProperty());
    
    EdgeProperty hashSideEdgeProperty = null;
    if (doBroadcast) {
      /**
       * This option can be used when the hash side is small. We can broadcast the entire data to 
       * all fragments of the stream side. This avoids re-partitioning the fragments of the stream 
       * side to match the partitioning scheme of the hash side and avoids costly network data 
       * transfer. However, in this example the stream side is being partitioned in both cases for 
       * brevity of code. The join task can perform the join of its fragment of keys with all the 
       * keys of the hash side.
       * Using an unpartitioned edge to transfer the complete output of the hash side to be 
       * broadcasted to all fragments of the streamed side. Again, since the data is the key, the 
       * value is null.
       */
      UnorderedKVEdgeConfig broadcastConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(),
          NullWritable.class.getName()).build();
      hashSideEdgeProperty = broadcastConf.createDefaultBroadcastEdgeProperty();
    } else {
      /**
       * The hash side is also being partitioned into fragments with the same key going to the same
       * fragment using hash partitioning. This way all keys with the same hash value will go to the
       * same fragment from both sides. Thus the join task handling that fragment can join both data
       * set fragments. 
       */
      hashSideEdgeProperty = streamConf.createDefaultEdgeProperty();
    }


    /**
     * Connect the join vertex to the hash side.
     * The join vertex is connected with 2 upstream vertices that provide it with inputs
     */
    Edge e2 = Edge.create(hashFileVertex, joinVertex, hashSideEdgeProperty);


    /**
     * Connect everything up by adding them to the DAG
     */
    dag.addVertex(streamFileVertex).addVertex(hashFileVertex).addVertex(joinVertex)
        .addEdge(e1).addEdge(e2);
    return dag;
  }

View Full Code Here

Examples of org.apache.tez.dag.api.DAG

        NoOpSorter.class.getName()), 1);
    sorterVertex.addDataSink(OUTPUT, dataSink);


    // No need to add jar containing this class as assumed to be part of the tez jars.
    
    DAG dag = DAG.create(dagName);
    dag.addVertex(tokenizerVertex)
        .addVertex(summationVertex)
        .addVertex(sorterVertex)
        .addEdge(
            Edge.create(tokenizerVertex, summationVertex,
                summationEdgeConf.createDefaultEdgeProperty()))

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.