Package com.datasalt.pangool.io

Examples of com.datasalt.pangool.io.Tuple


   * in the Tuple for deserializing custom Objects it is convenient to have such a Tuple event if no-one uses it
   * afterwards.
   */
  private ITuple cachedReadTuple() {
    if(cachedReadTuple == null) {
      cachedReadTuple = new Tuple(readSchema);
    }
    return cachedReadTuple;
  }
View Full Code Here


   * Moves data between a Record and a Tuple
   */
  public ITuple toTuple(Record record, ITuple reuse) throws IOException {
    ITuple tuple = reuse;
    if(tuple == null) {
      tuple = new Tuple(pangoolSchema);
    }

    Schema pangoolSchema = tuple.getSchema();
    for(org.apache.avro.Schema.Field avroField : avroSchema.getFields()) {
      int pos = avroField.pos();
View Full Code Here

  }

  @Override
  public ITuple deserialize(ITuple tuple) throws IOException {
    if(tuple == null) {
      tuple = new Tuple(schemaToDeserialize);
    }
    readFields(tuple, deserializers);
    return tuple;
  }
View Full Code Here

    }

    this.start = in.getPosition();
    more = start < end;

    tuple = new Tuple(in.getSchema());
  }
View Full Code Here

  /*
   * Read the Tuples from a TupleOutput using TupleInputReader.
   */
  public static void readTuples(Path file, Configuration conf, TupleVisitor iterator) throws IOException, InterruptedException {
    TupleFile.Reader reader = new TupleFile.Reader(FileSystem.get(file.toUri(), conf), conf, file);
    Tuple tuple = new Tuple(reader.getSchema());
    while(reader.next(tuple)) {
      iterator.onTuple(tuple);
    }
    reader.close();
  }
View Full Code Here

  private static CachedTuples createCachedTuples(TupleMRConfig config) {
    SerializationInfo serInfo = config.getSerializationInfo();
    boolean multipleSources = config.getNumIntermediateSchemas() >= 2;
    CachedTuples r = new CachedTuples();
    r.commonTuple = new Tuple(serInfo.getCommonSchema());
    for(Schema sourceSchema : config.getIntermediateSchemas()) {
      r.resultTuples.add(new Tuple(sourceSchema));
    }

    if(multipleSources) {
      for(Schema specificSchema : serInfo.getSpecificSchemas()) {
        r.specificTuples.add(new Tuple(specificSchema));
      }
    }
    return r;
  }
View Full Code Here

      int newSize = 0;
      if(line == null) {
        this.line = new Text();
      }
      if(tuple == null) {
        this.tuple = new Tuple(schema);
      }
      while(position < end) {
        newSize = in.readLine(line, maxLineLength,
            Math.max((int) Math.min(Integer.MAX_VALUE, end - position), maxLineLength));
View Full Code Here

    }
    job.setOutput(new Path(output), new HadoopOutputFormat(NullOutputFormat.class), ITuple.class, NullWritable.class);
    // The reducer will just emit the tuple to the corresponding Category output
    job.setTupleReducer(new TupleReducer<ITuple, NullWritable>() {

      ITuple outTuple = new Tuple(OUT_SCHEMA);
     
      public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector)
          throws IOException, InterruptedException, TupleMRException {

        for(ITuple tuple: tuples) {
          Category category = (Category) tuple.get("category");
          outTuple.set("line",  tuple.get("line"));
          outTuple.set("text",  tuple.get("text"));
          outTuple.set("title", tuple.get("title"));
          collector.getNamedOutput(category.toString().toLowerCase()).write(outTuple, NullWritable.get());
        }
      }
    });
View Full Code Here

    @Override
    public void map(LongWritable key, Text value, TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      if(tuple == null) {
        tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema(0));
      }

      String[] fields = value.toString().trim().split("\t");
      tuple.set("intField", Integer.parseInt(fields[0]));
      tuple.set("strField", fields[1]);
View Full Code Here

    private Tuple tuple;

    public void setup(TupleMRContext context, Collector collector) throws IOException, InterruptedException {
      jsonMapper = new ObjectMapper();
      jsonMapper.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false);
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("my_schema"));
    }
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Tuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.