Package org.apache.tez.dag.api

Examples of org.apache.tez.dag.api.DataSinkDescriptor


                VertexGroup vertexGroup = dag.createVertexGroup(groupName, groupMembers);
                tezOp.getVertexGroupInfo().setVertexGroup(vertexGroup);
                POStore store = tezOp.getVertexGroupInfo().getStore();
                if (store != null) {
                    vertexGroup.addDataSink(store.getOperatorKey().toString(),
                            new DataSinkDescriptor(tezOp.getVertexGroupInfo().getStoreOutputDescriptor(),
                            OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), dag.getCredentials()));
                }
            }
        }
    }
View Full Code Here


                            .setStoreOutputDescriptor(storeOutDescriptor);
                    continue;
                }
            }
            vertex.addDataSink(store.getOperatorKey().toString(),
                    new DataSinkDescriptor(storeOutDescriptor,
                    OutputCommitterDescriptor.create(MROutputCommitter.class.getName()),
                    dag.getCredentials()));
        }

        // LoadFunc and StoreFunc add delegation tokens to Job Credentials in
View Full Code Here

    }
    // Map only jobs.
    if (stageNum == totalStages -1) {
      OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName())
          .setUserPayload(vertexUserPayload);
      vertex.addDataSink("MROutput", new DataSinkDescriptor(od,
          OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null));
    }

    Map<String, String> taskEnv = new HashMap<String, String>();
    setupMapReduceEnv(stageConf, taskEnv, isMap);
View Full Code Here

        } catch (IOException e) {
          throw new TezUncheckedException(e);
        }
      }

      return new DataSinkDescriptor(
          OutputDescriptor.create(outputClassName).setUserPayload(createUserPayload()),
          (doCommit ? OutputCommitterDescriptor.create(
              MROutputCommitter.class.getName()) : null), credentials);
    }
View Full Code Here

    od.setUserPayload(UserPayload.create(ByteBuffer.wrap(
        new MultiAttemptDAG.FailingOutputCommitter.FailingOutputCommitterConfig(true)
            .toUserPayload())));
    OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(
        MultiAttemptDAG.FailingOutputCommitter.class.getName());
    dag.getVertex("v3").addDataSink("FailingOutput", new DataSinkDescriptor(od, ocd, null));
    runDAGAndVerify(dag, State.FAILED);
  }
View Full Code Here

    DataSourceDescriptor dataSource = MRInput.createConfigBuilder(new Configuration(tezConf),
        TextInputFormat.class, inputPath).build();

    // Create a descriptor that describes the output data to Tez. Using MROoutput to write text
    // data to the given output path. The TextOutputFormat is used to write the text data.
    DataSinkDescriptor dataSink = MROutput.createConfigBuilder(new Configuration(tezConf),
        TextOutputFormat.class, outputPath).build();

    // Create a vertex that reads the data from the data source and tokenizes it using the
    // TokenProcessor. The number of tasks that will do the work for this vertex will be decided
    // using the information provided by the data source descriptor.
View Full Code Here

      int numPartitions, String dagName) throws IOException {

    DataSourceDescriptor dataSource = MRInput.createConfigBuilder(new Configuration(tezConf),
        TextInputFormat.class, inputPath).build();

    DataSinkDescriptor dataSink = MROutput.createConfigBuilder(new Configuration(tezConf),
        TextOutputFormat.class, outputPath).build();

    Vertex tokenizerVertex = Vertex.create(TOKENIZER, ProcessorDescriptor.create(
        TokenProcessor.class.getName()));
    tokenizerVertex.addDataSource(INPUT, dataSource);
View Full Code Here

    stage2Vertex.addTaskLocalFiles(commonLocalResources);

    // Configure the Output for stage2
    stage2Vertex.addDataSink(
        "MROutput",
        new DataSinkDescriptor(OutputDescriptor.create(MROutput.class.getName())
            .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)),
            OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null));

    UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig
        .newBuilder(Text.class.getName(), TextLongPair.class.getName()).build();
View Full Code Here

    Configuration outputConf = new Configuration(tezConf);
    outputConf.setBoolean("mapred.reducer.new-api", false);
    outputConf.set("mapred.output.format.class", TextOutputFormat.class.getName());
    outputConf.set(FileOutputFormat.OUTDIR, outputPath);
    DataSinkDescriptor od = MROutput.createConfigBuilder(outputConf, null).build();
    checkerVertex.addDataSink("union", od);
   

    Configuration allPartsConf = new Configuration(tezConf);
    DataSinkDescriptor od2 = MROutput.createConfigBuilder(allPartsConf,
        TextOutputFormat.class, outputPath + "-all-parts").build();
    checkerVertex.addDataSink("all-parts", od2);

    Configuration partsConf = new Configuration(tezConf);   
    DataSinkDescriptor od1 = MROutput.createConfigBuilder(partsConf,
        TextOutputFormat.class, outputPath + "-parts").build();
    VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2);
    unionVertex.addDataSink("parts", od1);

    OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
View Full Code Here

    // Configure the Output for stage2
    OutputDescriptor od = OutputDescriptor.create(MROutput.class.getName())
        .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf));
    OutputCommitterDescriptor ocd =
        OutputCommitterDescriptor.create(MROutputCommitter.class.getName());
    stage2Vertex.addDataSink("MROutput", new DataSinkDescriptor(od, ocd, null));

    UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig
        .newBuilder(Text.class.getName(), TextLongPair.class.getName()).build();

    DAG dag = DAG.create("FilterLinesByWord");
View Full Code Here

TOP

Related Classes of org.apache.tez.dag.api.DataSinkDescriptor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.