Package com.datasalt.pangool.io

Examples of com.datasalt.pangool.io.Tuple


    int NUM_ITERATIONS = 10000;
    DatumWrapper<ITuple> wrapper = new DatumWrapper<ITuple>();
    // Different schemas
    for (int i = 0; i < NUM_ITERATIONS/2; i++) {
      Tuple tuple = new Tuple(intermediateSchemas.get(i%intermediateSchemas.size()));
      wrapper.datum(tuple);
      fillTuple(true, wrapper.datum());
      assertSerializable(serializer, deser, wrapper, false);
    }
    // Same schema
    Tuple tuple = new Tuple(pangoolConf.getIntermediateSchema("schema"));
    wrapper.datum(tuple);
    for (int i = 0; i < NUM_ITERATIONS/2; i++) {
      fillTuple(true, wrapper.datum());
      assertSerializable(serializer, deser, wrapper, false);
    }
View Full Code Here


  }

  @Test
  public void testSimpleSerializationTestWithNulls() throws IOException {
    Schema schema = new Schema("schema", Fields.parse("first:string,a:int?,b:string?"));
    Tuple t = new Tuple(schema);
    t.set(0, "first");
    t.set(1, 22);
    t.set(2, "hola");
    assertSerializable(t, false);
    t.set(1, null);
    assertSerializable(t, false);
    t.set(2, null);
    assertSerializable(t, false);
    t.set(1, 22);
    t.set(2, "hola");
    assertSerializable(t, false);

    // Now lets reuse
    Tuple re = new Tuple(schema);
    re.set(1, -1);
    re.set(2, "mal");
    t.set(1, 22);
    t.set(2, "hola");
    assertSerializable(t, re, false);
    re.set(1, -1);
    re.set(2, "mal");
    t.set(1, null);
    assertSerializable(t, re, false);
    re.set(1, -1);
    re.set(2, "mal");
    t.set(2, null);
    assertSerializable(t, re, false);
    re.set(1, -1);
    re.set(2, "mal");
    t.set(1, 22);
    t.set(2, "hola");
    assertSerializable(t, re, false);
  }
View Full Code Here

      }
    }
  }

  private static Tuple createTuple(String text, Schema schema) {
    Tuple tuple = new Tuple(schema);
    String[] tokens = text.split("\\s+");
    String country = tokens[0];
    Integer age = Integer.parseInt(tokens[1]);
    String name = tokens[2];
    Integer height = Integer.parseInt(tokens[3]);

    tuple.set(0, country);
    tuple.set(1, age);
    tuple.set(2, name);
    tuple.set(3, height);
    return tuple;
  }
View Full Code Here

    ToolRunner.run(getConf(), new TopicFingerprint(), new String[] {  INPUT, OUTPUT, 2 + "" } );

    Path outPath = new Path(OUTPUT + "/part-r-00000");
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    TupleFile.Reader reader = new TupleFile.Reader(fs, conf, outPath);
    Tuple tuple = new Tuple(reader.getSchema());

    // The order in the output file is deterministic (we have sorted by topic, count)
    reader.next(tuple);
    assertEquals(1, tuple.get("topic"));
    assertEquals("a", tuple.get("word").toString());
   
    reader.next(tuple);
    assertEquals(1, tuple.get("topic"));
    assertEquals("c", tuple.get("word").toString());

    reader.next(tuple);
    assertEquals(2, tuple.get("topic"));
    assertEquals("a", tuple.get("word").toString());

    reader.next(tuple);
    assertEquals(2, tuple.get("topic"));
    assertEquals("b", tuple.get("word").toString());
   
    // Check the named output
 
    reader.close();
    outPath = new Path(OUTPUT + "/" + TopicFingerprint.OUTPUT_TOTALCOUNT + "/" + "part-r-00000");
    reader = new TupleFile.Reader(fs, conf, outPath);
    tuple = new Tuple(reader.getSchema());

    reader.next(tuple);
    assertEquals(1, tuple.get("topic"));
    assertEquals(15, tuple.get("totalcount"));

    reader.next(tuple);
    assertEquals(2, tuple.get("topic"));
    assertEquals(19, tuple.get("totalcount"));

    reader.close();
   
    trash(INPUT, OUTPUT);
  }
View Full Code Here

    FileSystem fs = FileSystem.get(inPath.toUri(), conf);
    TupleFile.Writer writer = new TupleFile.Writer(fs, conf, inPath, TopicalWordCount.getSchema());

    // Topic 1, words: { a, 10 } { b, 1 } , { c, 5 }
    // Top 2 words = a(10), c(5)
    ITuple tuple = new Tuple(TopicalWordCount.getSchema());
    tuple.set("word", "a");
    tuple.set("topic", 1);
    tuple.set("count", 10);
    writer.append(tuple);
   
    tuple.set("word", "b");
    tuple.set("topic", 1);
    tuple.set("count", 1);
    writer.append(tuple);

    tuple.set("word", "c");
    tuple.set("topic", 1);
    tuple.set("count", 5);
    writer.append(tuple);
   
    // Topic 2, words: { a, 10 } { b, 9 } , { c, 5 }
    // Top 2 words = a(10), b(9)
    tuple.set("word", "a");
    tuple.set("topic", 2);
    tuple.set("count", 10);
    writer.append(tuple);
   
    tuple.set("word", "b");
    tuple.set("topic", 2);
    tuple.set("count", 9);
    writer.append(tuple);

    tuple.set("word", "c");
    tuple.set("topic", 2);
    tuple.set("count", 5);
    writer.append(tuple);

    writer.close();
  }
View Full Code Here

    private static final long serialVersionUID = 1L;

    @Override
    public void map(Text key, NullWritable value, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException {
      Tuple outputKey = createTuple(key.toString(), schema);
      collector.write(outputKey);
    }
View Full Code Here

    builder.addIntermediateSchema(schema2);

    builder.addInput(new Path(INPUT1), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {

          ITuple tupleInTuple1 = new Tuple(schema1);
          ITuple tuple1 = new Tuple(tupleSchema1);

          @Override
          public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
              throws IOException, InterruptedException {
            String[] split = value.toString().split("\t");
            tuple1.set("a", split[0]);
            tuple1.set("b", Integer.parseInt(split[1]));

            tupleInTuple1.set("partitionId", 0);
            tupleInTuple1.set("tuple1", tuple1);
            collector.write(tupleInTuple1);
          }
        });

    builder.addInput(new Path(INPUT2), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {

          ITuple tupleInTuple2 = new Tuple(schema2);
          ITuple tuple2 = new Tuple(tupleSchema2);

          @Override
          public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
              throws IOException, InterruptedException {
            String[] split = value.toString().split("\t");
            tuple2.set("c", Double.parseDouble(split[0]));
            tuple2.set("d", Boolean.parseBoolean(split[1]));

            tupleInTuple2.set("partitionId", 0);
            tupleInTuple2.set("tuple2", tuple2);
            collector.write(tupleInTuple2);
          }
View Full Code Here

      Schema schema = permuteSchema(SCHEMA);
      OrderBy sortCriteria = createRandomSortCriteria(schema, maxIndex + 1);
      // TODO could we get empty group fields ??
      String[] groupFields = getFirstFields(sortCriteria,
          1 + random.nextInt(sortCriteria.getElements().size() - 1));
      ITuple[] tuples = new ITuple[]{new Tuple(schema), new Tuple(schema)};
      for (ITuple tuple : tuples) {
        fillTuple(false, tuple, 0, maxIndex);
      }

      for (int minIndex = maxIndex; minIndex >= 0; minIndex--) {
View Full Code Here

    Schema schema = SCHEMA;
    if(withNulls) {
      decorateWithNullables(schema);
    }

    ITuple tuple = new Tuple(schema);
    for(int i = 0; i < NUM_ROWS_TO_GENERATE; i++) {
      withTupleInput(input, fillTuple(true, tuple));
    }

    TupleMRBuilder builder = new TupleMRBuilder(getConf(), "test");
View Full Code Here

    TupleMRConfig tupleMRConf = builder.buildConf();
    TupleMRConfig.set(tupleMRConf, conf);
   
    partitioner.setConf(conf);
   
    ITuple tuple = new Tuple(schema1);
    tuple.set("number1", 35);
    tuple.set("string1", "foo");
   
    // Check that for the same prefix (number1, string1) we obtain the same partition
   
    int partitionId = -N_PARTITIONS;
    for(int i = 0; i < MAX_ITERATIONS_OVER_ONE_SCHEMA; i++) {
      tuple.set("string2", TestUtils.randomString(10));
      int thisPartitionId = partitioner.getPartition(new DatumWrapper(tuple), NullWritable.get(), N_PARTITIONS);
      Assert.assertTrue(thisPartitionId >= 0);
      Assert.assertTrue(thisPartitionId < N_PARTITIONS);
      if(partitionId == -N_PARTITIONS) {
        partitionId = thisPartitionId;
      } else {
        // Check that the returned partition is always the same even if "string2" field changes its value
        Assert.assertEquals(thisPartitionId, partitionId);
      }
    }
   
    // On the other hand, check that when we vary one of the group by fields, partition varies
   
    int partitionMatches[] = new int[N_PARTITIONS];
    for(int i = 0; i < MAX_ITERATIONS_OVER_ONE_SCHEMA; i++) {
      tuple.set("string1", TestUtils.randomString(10));
      int thisPartitionId = partitioner.getPartition(new DatumWrapper(tuple), NullWritable.get(), N_PARTITIONS);
      Assert.assertTrue(thisPartitionId >= 0);
      Assert.assertTrue(thisPartitionId < N_PARTITIONS);
      partitionMatches[thisPartitionId]++;;
    }
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Tuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.