Package org.apache.crunch.types

Examples of org.apache.crunch.types.Converter


          rdd.rdd().cache();
        }
        for (Target t : targets) {
          Configuration conf = new Configuration(getConfiguration());
          if (t instanceof MapReduceTarget) { //TODO: check this earlier
            Converter c = t.getConverter(ptype);
            JavaPairRDD<?, ?> outRDD;
            if (rdd instanceof JavaRDD) {
              outRDD = ((JavaRDD) rdd)
                  .map(new MapFunction(ptype.getOutputMapFn(), ctxt))
                  .map(new OutputConverterFunction(c));
            } else {
              outRDD = ((JavaPairRDD) rdd)
                  .map(new PairMapFunction(ptype.getOutputMapFn(), ctxt))
                  .map(new OutputConverterFunction(c));
            }

            try {
              Job job = new Job(conf);
              if (t instanceof PathTarget) {
                PathTarget pt = (PathTarget) t;
                pt.configureForMapReduce(job, ptype, pt.getPath(), null);
                Path tmpPath = pipeline.createTempPath();
                outRDD.saveAsNewAPIHadoopFile(
                    tmpPath.toString(),
                    c.getKeyClass(),
                    c.getValueClass(),
                    job.getOutputFormatClass(),
                    job.getConfiguration());
                pt.handleOutputs(job.getConfiguration(), tmpPath, -1);
              } else if (t instanceof MapReduceTarget) {
                MapReduceTarget mrt = (MapReduceTarget) t;
View Full Code Here


  }

  private void writeSequenceFileFromPCollection(final FileSystem fs, final Path path,
      final PCollection collection) throws IOException {
    final PType pType = collection.getPType();
    final Converter converter = pType.getConverter();
    final Class valueClass = converter.getValueClass();

    final SequenceFile.Writer writer = new SequenceFile.Writer(fs, fs.getConf(), path,
        NullWritable.class, valueClass);

    for (final Object o : collection.materialize()) {
View Full Code Here

  public JavaRDDLike<?, ?> getJavaRDDLike(SparkRuntime runtime) {
    try {
      Job job = new Job(runtime.getConfiguration());
      FileInputFormat.addInputPaths(job, "/tmp"); //placeholder
      source.configureSource(job, -1);
      Converter converter = source.getConverter();
      JavaPairRDD<?, ?> input = runtime.getSparkContext().newAPIHadoopRDD(
          job.getConfiguration(),
          CrunchInputFormat.class,
          converter.getKeyClass(),
          converter.getValueClass());
      input.rdd().setName(source.toString());
      MapFn mapFn = converter.applyPTypeTransforms() ? source.getType().getInputMapFn() : IdentityFn.getInstance();
      return input
          .map(new InputConverterFunction(source.getConverter()))
          .map(new MapFunction(mapFn, runtime.getRuntimeContext()));
    } catch (IOException e) {
      throw new RuntimeException(e);
View Full Code Here

  private static List<DoNode> allowsChildren() {
    return Lists.newArrayList();
  }

  public static <K, V> DoNode createGroupingNode(String name, PGroupedTableType<K, V> ptype) {
    Converter groupingConverter = ptype.getGroupingConverter();
    DoFn<?, ?> fn = groupingConverter.applyPTypeTransforms() ? ptype.getOutputMapFn() : IdentityFn.getInstance();
    return new DoNode(fn, name, ptype, NO_CHILDREN, ptype.getGroupingConverter(), null, null);
  }
View Full Code Here

  public static DoNode createFnNode(String name, DoFn<?, ?> function, PType<?> ptype, ParallelDoOptions options) {
    return new DoNode(function, name, ptype, allowsChildren(), null, null, options);
  }

  public static <S> DoNode createInputNode(Source<S> source) {
    Converter srcConverter = source.getConverter();
    PType<?> ptype = source.getType();
    DoFn<?, ?> fn = srcConverter.applyPTypeTransforms() ? ptype.getInputMapFn() : IdentityFn.getInstance();
    return new DoNode(fn, source.toString(), ptype, allowsChildren(), null, source, null);
  }
View Full Code Here

    fn.configure(conf);
    for (DoNode child : children) {
      childRTNodes.add(child.toRTNode(false, conf, nodeContext));
    }

    Converter inputConverter = null;
    if (inputNode) {
      if (nodeContext == NodeContext.MAP) {
        inputConverter = source.getConverter();
      } else {
        inputConverter = ((PGroupedTableType<?, ?>) ptype).getGroupingConverter();
View Full Code Here

        }
        for (Target t : targets) {
          Configuration conf = new Configuration(getConfiguration());
          getRuntimeContext().setConf(sparkContext.broadcast(WritableUtils.toByteArray(conf)));
          if (t instanceof MapReduceTarget) { //TODO: check this earlier
            Converter c = t.getConverter(ptype);
            IdentityFn ident = IdentityFn.getInstance();
            JavaPairRDD<?, ?> outRDD;
            if (rdd instanceof JavaRDD) {
              outRDD = ((JavaRDD) rdd)
                  .map(new MapFunction(c.applyPTypeTransforms() ? ptype.getOutputMapFn() : ident, ctxt))
                  .mapToPair(new OutputConverterFunction(c));
            } else {
              outRDD = ((JavaPairRDD) rdd)
                  .map(new PairMapFunction(c.applyPTypeTransforms() ? ptype.getOutputMapFn() : ident, ctxt))
                  .mapToPair(new OutputConverterFunction(c));
            }
            try {
              Job job = new Job(conf);
              if (t instanceof PathTarget) {
                PathTarget pt = (PathTarget) t;
                pt.configureForMapReduce(job, ptype, pt.getPath(), null);
                Path tmpPath = pipeline.createTempPath();
                outRDD.saveAsNewAPIHadoopFile(
                    tmpPath.toString(),
                    c.getKeyClass(),
                    c.getValueClass(),
                    job.getOutputFormatClass(),
                    job.getConfiguration());
                pt.handleOutputs(job.getConfiguration(), tmpPath, -1);
              } else if (t instanceof MapReduceTarget) {
                MapReduceTarget mrt = (MapReduceTarget) t;
View Full Code Here

  @Override
  public JavaRDDLike<?, ?> getJavaRDDLike(SparkRuntime runtime) {
    try {
      Job job = new Job(runtime.getConfiguration());
      source.configureSource(job, -1); // TODO: a custom input format for crunch-spark
      Converter converter = source.getConverter();
      JavaPairRDD<?, ?> input = runtime.getSparkContext().newAPIHadoopRDD(
          job.getConfiguration(),
          CrunchInputFormat.class,
          converter.getKeyClass(),
          converter.getValueClass());
      input.rdd().setName(source.toString());
      MapFn mapFn = converter.applyPTypeTransforms() ? source.getType().getInputMapFn() : IdentityFn.getInstance();
      return input
          .map(new InputConverterFunction(source.getConverter()))
          .map(new PairMapFunction(mapFn, runtime.getRuntimeContext()));
    } catch (IOException e) {
      throw new RuntimeException(e);
View Full Code Here

        }
        for (Target t : targets) {
          Configuration conf = new Configuration(getConfiguration());
          getRuntimeContext().setConf(sparkContext.broadcast(WritableUtils.toByteArray(conf)));
          if (t instanceof MapReduceTarget) { //TODO: check this earlier
            Converter c = t.getConverter(ptype);
            JavaPairRDD<?, ?> outRDD;
            if (rdd instanceof JavaRDD) {
              outRDD = ((JavaRDD) rdd)
                  .map(new MapFunction(ptype.getOutputMapFn(), ctxt))
                  .map(new OutputConverterFunction(c));
            } else {
              outRDD = ((JavaPairRDD) rdd)
                  .map(new PairMapFunction(ptype.getOutputMapFn(), ctxt))
                  .map(new OutputConverterFunction(c));
            }
            try {
              Job job = new Job(conf);
              if (t instanceof PathTarget) {
                PathTarget pt = (PathTarget) t;
                pt.configureForMapReduce(job, ptype, pt.getPath(), null);
                Path tmpPath = pipeline.createTempPath();
                outRDD.saveAsNewAPIHadoopFile(
                    tmpPath.toString(),
                    c.getKeyClass(),
                    c.getValueClass(),
                    job.getOutputFormatClass(),
                    job.getConfiguration());
                pt.handleOutputs(job.getConfiguration(), tmpPath, -1);
              } else if (t instanceof MapReduceTarget) {
                MapReduceTarget mrt = (MapReduceTarget) t;
View Full Code Here

    fn.configure(conf);
    for (DoNode child : children) {
      childRTNodes.add(child.toRTNode(false, conf, nodeContext));
    }

    Converter inputConverter = null;
    if (inputNode) {
      if (nodeContext == NodeContext.MAP) {
        inputConverter = source.getConverter();
      } else {
        inputConverter = ((PGroupedTableType<?, ?>) ptype).getGroupingConverter();
View Full Code Here

TOP

Related Classes of org.apache.crunch.types.Converter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.