Package com.splout.db.hadoop

Examples of com.splout.db.hadoop.TablespaceBuilder


  @Test
  public void testCorrectTablespace() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").partitionBy("id1").build();
    Table table2 = new TableBuilder(SCHEMA_2).addCSVTextFile("foo2.txt").partitionBy("id2").build();

    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);

    builder.setNPartitions(2);

    builder.build();
  }
View Full Code Here


  @Test
  public void testCorrectTablespaceMultiplePartitionBy() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").partitionBy("id1", "value1").build();
    Table table2 = new TableBuilder(SCHEMA_2).addCSVTextFile("foo2.txt").partitionBy("id2", "value2").build();

    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);

    builder.setNPartitions(2);

    builder.build();
  }
View Full Code Here

  public void testCorrectTablespaceWithReplicated() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").partitionBy("id1").build();
    Table table2 = new TableBuilder(SCHEMA_2).addCSVTextFile("foo2.txt").partitionBy("id2").build();
    Table table3 = new TableBuilder(SCHEMA_3).addCSVTextFile("foo3.txt").replicateToAll().build();

    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);
    builder.add(table3);

    builder.setNPartitions(2);

    builder.build();
  }
View Full Code Here

  @Test(expected=TablespaceBuilderException.class)
  public void testMissingNPartitions() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").partitionBy("id1").build();
    Table table2 = new TableBuilder(SCHEMA_2).addCSVTextFile("foo2.txt").partitionBy("id2").build();

    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);

    builder.build();
  }
View Full Code Here

  @Test(expected=TablespaceBuilderException.class)
  public void testNoPartitionTable() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").replicateToAll().build();
    Table table2 = new TableBuilder(SCHEMA_2).addCSVTextFile("foo2.txt").replicateToAll().build();

    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);
   
    builder.setNPartitions(2);

    builder.build();
  }
View Full Code Here

  @Test(expected=TablespaceBuilderException.class)
  public void testTableNameCollision() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").partitionBy("id1").build();
    Table table2 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo2.txt").partitionBy("id1").build();
   
    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);
   
    builder.setNPartitions(2);
   
    builder.build();
  }
View Full Code Here

  @Test(expected=TablespaceBuilderException.class)
  public void testInvalidCoPartition() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").partitionBy("id1").build();
    Table table2 = new TableBuilder(SCHEMA_3).addCSVTextFile("foo2.txt").partitionBy("id3").build();

    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);

    builder.setNPartitions(2);

    builder.build();
  }
View Full Code Here

  @Test(expected=TablespaceBuilderException.class)
  public void testInvalidCoPartitionMultipleFields() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").partitionBy("value1", "id1").build();
    Table table2 = new TableBuilder(SCHEMA_3).addCSVTextFile("foo2.txt").partitionBy("value3", "id3").build();

    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);

    builder.setNPartitions(2);

    builder.build();
  }
View Full Code Here

      tableBuilder.initialSQL("pragma page_size=" + pageSize);
      // insertion order is very important for optimizing query speed because it makes data be co-located in disk
      tableBuilder.insertionSortOrder(OrderBy.parse("pagename:asc, date:asc"));

      // instantiate a TablespaceBuilder
      TablespaceBuilder tablespaceBuilder = new TablespaceBuilder();

      // we will partition this dataset in as many partitions as:
      tablespaceBuilder.setNPartitions(nPartitions);
      tablespaceBuilder.add(tableBuilder.build());
      // we turn a specific SQLite pragma on for making autocomplete queries fast
      tablespaceBuilder.initStatements("pragma case_sensitive_like=true;");

      HadoopUtils.deleteIfExists(outFs, outPath);

      // finally, instantiate a TablespaceGenerator and execute it
      TablespaceGenerator tablespaceViewBuilder;

      if(generateTupleFiles) {
        // we subclass TablespaceGenerator to be able to run the generation without outputting the SQLite stores, for
        // benchmark comparisons.
        // In the future this feature may be useful in general for debugging store creation.
        tablespaceViewBuilder = new TablespaceGenerator(tablespaceBuilder.build(), outPath, this.getClass()) {

          @Override
          public void generateView(Configuration conf, SamplingType samplingType,
              SamplingOptions samplingOptions) throws Exception {

            prepareOutput(conf);
            final int nPartitions = tablespace.getnPartitions();
            if(nPartitions > 1) {
              partitionMap = sample(nPartitions, conf, samplingType, samplingOptions);
            } else {
              partitionMap = PartitionMap.oneShardOpenedMap();
            }
            writeOutputMetadata(conf);

            TupleMRBuilder builder = createMRBuilder(nPartitions, conf);
            // Set a TupleOutput here instead of SQLiteOutput
            builder.setOutput(new Path(outputPath, OUT_STORE), new TupleOutputFormat(tableSchema),
                ITuple.class, NullWritable.class);
            executeViewGeneration(builder);
          }
        };
      } else {
        // ... otherwise a standard TablespaceGenerator is used.
        tablespaceViewBuilder = new TablespaceGenerator(tablespaceBuilder.build(), outPath, this.getClass());
      }

      tablespaceViewBuilder.generateView(getConf(), SamplingType.FULL_SCAN,
          new TupleSampler.FullScanSamplingOptions());
    }
View Full Code Here

TOP

Related Classes of com.splout.db.hadoop.TablespaceBuilder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.