Package com.datasalt.pangool.io

Examples of com.datasalt.pangool.io.ITuple


    Schema schema = SCHEMA;
    if(withNulls) {
      decorateWithNullables(schema);
    }

    ITuple tuple = new Tuple(schema);
    for(int i = 0; i < NUM_ROWS_TO_GENERATE; i++) {
      withTupleInput(input, fillTuple(true, tuple));
    }

    TupleMRBuilder builder = new TupleMRBuilder(getConf(), "test");
View Full Code Here


    TupleMRConfig tupleMRConf = builder.buildConf();
    TupleMRConfig.set(tupleMRConf, conf);
   
    partitioner.setConf(conf);
   
    ITuple tuple = new Tuple(schema1);
    tuple.set("number1", 35);
    tuple.set("string1", "foo");
   
    // Check that for the same prefix (number1, string1) we obtain the same partition
   
    int partitionId = -N_PARTITIONS;
    for(int i = 0; i < MAX_ITERATIONS_OVER_ONE_SCHEMA; i++) {
      tuple.set("string2", TestUtils.randomString(10));
      int thisPartitionId = partitioner.getPartition(new DatumWrapper(tuple), NullWritable.get(), N_PARTITIONS);
      Assert.assertTrue(thisPartitionId >= 0);
      Assert.assertTrue(thisPartitionId < N_PARTITIONS);
      if(partitionId == -N_PARTITIONS) {
        partitionId = thisPartitionId;
      } else {
        // Check that the returned partition is always the same even if "string2" field changes its value
        Assert.assertEquals(thisPartitionId, partitionId);
      }
    }
   
    // On the other hand, check that when we vary one of the group by fields, partition varies
   
    int partitionMatches[] = new int[N_PARTITIONS];
    for(int i = 0; i < MAX_ITERATIONS_OVER_ONE_SCHEMA; i++) {
      tuple.set("string1", TestUtils.randomString(10));
      int thisPartitionId = partitioner.getPartition(new DatumWrapper(tuple), NullWritable.get(), N_PARTITIONS);
      Assert.assertTrue(thisPartitionId >= 0);
      Assert.assertTrue(thisPartitionId < N_PARTITIONS);
      partitionMatches[thisPartitionId]++;;
    }
View Full Code Here

    TupleMRConfig tupleMRConf = builder.buildConf();
    TupleMRConfig.set(tupleMRConf, conf);
   
    partitioner.setConf(conf);
   
    ITuple tuple = new Tuple(schema);
   
    int partitionMatches[] = new int[N_PARTITIONS];
   
    for(int i = 0; i < MAX_ITERATIONS_OVER_ONE_SCHEMA; i++) {
      tuple.set("foo", (int)(Math.random() * Integer.MAX_VALUE));
      int thisPartitionId = partitioner.getPartition(new DatumWrapper(tuple), NullWritable.get(), N_PARTITIONS);
      Assert.assertTrue(thisPartitionId >= 0);
      Assert.assertTrue(thisPartitionId < N_PARTITIONS);
      partitionMatches[thisPartitionId]++;;
    }
View Full Code Here

  }

  public void withTupleOutput(String output, ITuple expectedTuple) throws IOException {
    List<Pair<Object, Object>> outs = ensureTupleOutput(output);
    for(Pair<Object, Object> out : outs) {
      ITuple currentTuple = (ITuple) out.getFirst();
      if(currentTuple.equals(expectedTuple)) {
        return; //found
      }
    }
    /*
     * Not found. Let's create some meaningful message
View Full Code Here

      if(outs == null) {
        outs = new ArrayList<Pair<Object, Object>>();
        TupleInputReader reader = new TupleInputReader(getConf());
        reader.initialize(new Path(output), getConf());
        while(reader.nextKeyValueNoSync()) {
          ITuple tuple = reader.getCurrentKey();
          outs.add(new Pair<Object, Object>(tuple, NullWritable.get()));
        }
        reader.close();
      }
    } catch(InterruptedException e) {
View Full Code Here

      this.out = new DataOutputStream(out);
    }
  }

  public void serialize(DatumWrapper<ITuple> wrapper) throws IOException {
    ITuple tuple = wrapper.datum();
    if (isMultipleSources) {
      multipleSourcesSerialization(tuple);
    } else {
      oneSourceSerialization(tuple);
    }
View Full Code Here

    if(isRollup) {
      t.swapInstances();
      this.cachedTuples.swapInstances();
    }

    ITuple tuple = (multipleSources) ?
        deserializeMultipleSources() : deserializeOneSource(t.datum());
    t.datum(tuple);
   
    return t;
  }
View Full Code Here

    return t;
  }
 
  private ITuple deserializeMultipleSources() throws IOException {
    CachedTuples tuples = cachedTuples.datum();
    ITuple commonTuple =tuples.commonTuple;
    readFields(commonTuple,in);
    int schemaId = WritableUtils.readVInt(in);
    ITuple specificTuple = tuples.specificTuples.get(schemaId);
    readFields(specificTuple,in);
    ITuple result = tuples.resultTuples.get(schemaId);
    mixIntermediateIntoResult(commonTuple,specificTuple,result,schemaId);
    return result;
  }
View Full Code Here

    }
  }
 
  private ITuple deserializeOneSource(ITuple reuse) throws IOException {
    CachedTuples tuples = cachedTuples.datum();
    ITuple commonTuple = tuples.commonTuple;
    readFields(commonTuple,in);
    if (reuse == null){
      reuse = tuples.resultTuples.get(0);
    }
    int[] commonTranslation = serInfo.getCommonSchemaIndexTranslation(0); //just one common schema
    for (int i =0 ; i < commonTranslation.length ; i++){
      int destPos = commonTranslation[i];
      reuse.set(destPos,commonTuple.get(i));
    }
    return reuse;
  }
View Full Code Here

    for(FileStatus fileStatus : fileSystem.globStatus(generatedModel)) {
      TupleInputReader reader = new TupleInputReader(conf);
      reader.initialize(fileStatus.getPath(), conf);
      while(reader.nextKeyValueNoSync()) {
        // Read Tuple
        ITuple tuple = reader.getCurrentKey();
        Integer count = (Integer) tuple.get("count");
        Category category = (Category) tuple.get("category");
        String word = tuple.get("word").toString();
        vocabulary.add(word);
        tokensPerCategory.put(category, MapUtils.getInteger(tokensPerCategory, category, 0) + count);
        wordCountPerCategory.get(category).put(word, count);
      }
    }
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.ITuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.