Package com.datasalt.pangool.tuplemr.mapred.lib.input

Examples of com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat


    writer.close();

    TablespaceBuilder builder = new TablespaceBuilder();
    builder.setNPartitions(3);
    TableBuilder tableBuilder = new TableBuilder(theSchema1);
    tableBuilder.addFile(new TableInput(new TupleInputFormat(), new HashMap<String, String>(),
        theSchema1, new IdentityRecordProcessor(), new Path(INPUT)));
    // Partition by a javascript that returns the first two characters
    tableBuilder
        .partitionByJavaScript("function partition(record) { var str = record.get('id').toString(); return str.substring(0, 2); }");
    builder.add(tableBuilder.build());
View Full Code Here


      tuple.set("s", r.nextLong() + "");
      writer.append(tuple);
    }
    writer.close();

    TupleInputFormat format = ReflectionUtils.newInstance(TupleInputFormat.class, getConf());
    Job job = new Job(getConf());
    FileInputFormat.setInputPaths(job, outPath);
    logger.info("Using max input split size: " + maxSplitSize);
    FileInputFormat.setMaxInputSplitSize(job, maxSplitSize);
    job.setInputFormatClass(FileInputFormat.class);

    // Read all the splits and count. The number of read rows must
    // be the same than the written ones.
    int count = 0;
    for(InputSplit split : format.getSplits(job)) {
      TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1);
      TaskAttemptContext attemptContext = TaskAttemptContextFactory.get(getConf(), attemptId);
      logger.info("Sampling split: " + split);
      RecordReader<ITuple, NullWritable> reader = format.createRecordReader(split, attemptContext);
      reader.initialize(split, attemptContext);
      while(reader.nextKeyValue()) {
        tuple = reader.getCurrentKey();
        count++;
      }
View Full Code Here

   * Defines an input as in {@link PangoolMultipleInputs} using {@link TupleInputFormat}
   *
   * @see PangoolMultipleInputs
   */
  public void addTupleInput(Path path, TupleMapper<ITuple, NullWritable> tupleMapper) {
    multipleInputs.getMultiInputs().add(new Input(path, new TupleInputFormat(), tupleMapper));
  }
View Full Code Here

    writer.append(getTuple("id8", "value61"));
    writer.append(getTuple("id8", "value62"));
   
    writer.close();
   
    TablespaceSpec tablespace = TablespaceSpec.of(theSchema1, "id", new Path(INPUT), new TupleInputFormat()4);
    TablespaceGenerator viewGenerator = new TablespaceGenerator(tablespace, new Path(OUTPUT), this.getClass());
    viewGenerator.generateView(conf, SamplingType.FULL_SCAN, new TupleSampler.RandomSamplingOptions());
   
    List<PartitionEntry> partitionMap = viewGenerator.getPartitionMap().getPartitionEntries();
    assertEquals(4, partitionMap.size());
View Full Code Here

    writer.append(getTupleWithNulls("id4", "value14", 100, 2.0, ""));
    writer.append(getTupleWithNulls("id5", "value15", 100, 2.0, null));
   
    writer.close();
   
    TablespaceSpec tablespace = TablespaceSpec.of(theSchema2, "id", new Path(INPUT), new TupleInputFormat(), 1);
    TablespaceGenerator viewGenerator = new TablespaceGenerator(tablespace, new Path(OUTPUT), this.getClass());
    viewGenerator.generateView(conf, SamplingType.FULL_SCAN, new TupleSampler.RandomSamplingOptions());
   
    SQLite4JavaClient manager = new SQLite4JavaClient(OUTPUT + "/store/0.db", null);
    String results = manager.query("SELECT * FROM schema2;", 100);
View Full Code Here

        Arrays.asList(
            new Table(
                Arrays.asList(
                    new TableInput[] {
                        new TableInput(
                            new TupleInputFormat(),
                            new HashMap<String, String>(),
                            schema,
                            new IdentityRecordProcessor(),
                            new Path(INPUT + "_" + inputPostfix)) }),
                new TableSpec(
View Full Code Here

   * {@link TupleInputFormat}
   *
   * @see PangoolMultipleInputs
   */
  public void addTupleInput(Path path, TupleMapper<ITuple, NullWritable> tupleMapper) {
    this.multiInputs.add(new Input(path, new TupleInputFormat(), tupleMapper));
    AvroUtils.addAvroSerialization(conf);

  }
View Full Code Here

   * {@link TupleInputFormat}
   *
   * @see PangoolMultipleInputs
   */
  public void addTupleInput(Path path, TupleMapper<ITuple, NullWritable> tupleMapper) {
    this.multiInputs.add(new Input(path, new TupleInputFormat(), tupleMapper));
    AvroUtils.addAvroSerialization(conf);

  }
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.