Examples of com.splout.db.hadoop.TablespaceBuilder

com.splout.db.hadoop.TablespaceBuilder
This class is the main entry point for generating Splout views. Here we will use a Builder for adding the mapping between a set of files in a FileSystem and the tables that we want to view in a Tablespace in Splout.
We need to use {@link TableBuilder} for obtaining {@link Table} beans. Then we can add Tables to the Tablespace.
We will obtain a {@link TablespaceSpec} bean after building it.

  @Test
  public void testCorrectTablespace() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").partitionBy("id1").build();
    Table table2 = new TableBuilder(SCHEMA_2).addCSVTextFile("foo2.txt").partitionBy("id2").build();


    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);


    builder.setNPartitions(2);


    builder.build();
  }

View Full Code Here

  @Test
  public void testCorrectTablespaceMultiplePartitionBy() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").partitionBy("id1", "value1").build();
    Table table2 = new TableBuilder(SCHEMA_2).addCSVTextFile("foo2.txt").partitionBy("id2", "value2").build();


    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);


    builder.setNPartitions(2);


    builder.build();
  }

View Full Code Here

  public void testCorrectTablespaceWithReplicated() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").partitionBy("id1").build();
    Table table2 = new TableBuilder(SCHEMA_2).addCSVTextFile("foo2.txt").partitionBy("id2").build();
    Table table3 = new TableBuilder(SCHEMA_3).addCSVTextFile("foo3.txt").replicateToAll().build();


    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);
    builder.add(table3);


    builder.setNPartitions(2);


    builder.build();
  }

View Full Code Here

  @Test(expected=TablespaceBuilderException.class)
  public void testMissingNPartitions() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").partitionBy("id1").build();
    Table table2 = new TableBuilder(SCHEMA_2).addCSVTextFile("foo2.txt").partitionBy("id2").build();


    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);


    builder.build();
  }

View Full Code Here

  @Test(expected=TablespaceBuilderException.class)
  public void testNoPartitionTable() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").replicateToAll().build();
    Table table2 = new TableBuilder(SCHEMA_2).addCSVTextFile("foo2.txt").replicateToAll().build();


    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);
    
    builder.setNPartitions(2);


    builder.build();
  }

View Full Code Here

  @Test(expected=TablespaceBuilderException.class)
  public void testTableNameCollision() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").partitionBy("id1").build();
    Table table2 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo2.txt").partitionBy("id1").build();
    
    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);
    
    builder.setNPartitions(2);
    
    builder.build();
  }

View Full Code Here

  @Test(expected=TablespaceBuilderException.class)
  public void testInvalidCoPartition() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").partitionBy("id1").build();
    Table table2 = new TableBuilder(SCHEMA_3).addCSVTextFile("foo2.txt").partitionBy("id3").build();


    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);


    builder.setNPartitions(2);


    builder.build();
  }

View Full Code Here

  @Test(expected=TablespaceBuilderException.class)
  public void testInvalidCoPartitionMultipleFields() throws TableBuilderException, TablespaceBuilderException {
    Table table1 = new TableBuilder(SCHEMA_1).addCSVTextFile("foo1.txt").partitionBy("value1", "id1").build();
    Table table2 = new TableBuilder(SCHEMA_3).addCSVTextFile("foo2.txt").partitionBy("value3", "id3").build();


    TablespaceBuilder builder = new TablespaceBuilder();
    builder.add(table1);
    builder.add(table2);


    builder.setNPartitions(2);


    builder.build();
  }

View Full Code Here

      tableBuilder.initialSQL("pragma page_size=" + pageSize);
      // insertion order is very important for optimizing query speed because it makes data be co-located in disk
      tableBuilder.insertionSortOrder(OrderBy.parse("pagename:asc, date:asc"));


      // instantiate a TablespaceBuilder
      TablespaceBuilder tablespaceBuilder = new TablespaceBuilder();


      // we will partition this dataset in as many partitions as:
      tablespaceBuilder.setNPartitions(nPartitions);
      tablespaceBuilder.add(tableBuilder.build());
      // we turn a specific SQLite pragma on for making autocomplete queries fast
      tablespaceBuilder.initStatements("pragma case_sensitive_like=true;");


      HadoopUtils.deleteIfExists(outFs, outPath);


      // finally, instantiate a TablespaceGenerator and execute it
      TablespaceGenerator tablespaceViewBuilder;


      if(generateTupleFiles) {
        // we subclass TablespaceGenerator to be able to run the generation without outputting the SQLite stores, for
        // benchmark comparisons.
        // In the future this feature may be useful in general for debugging store creation.
        tablespaceViewBuilder = new TablespaceGenerator(tablespaceBuilder.build(), outPath, this.getClass()) {


          @Override
          public void generateView(Configuration conf, SamplingType samplingType,
              SamplingOptions samplingOptions) throws Exception {


            prepareOutput(conf);
            final int nPartitions = tablespace.getnPartitions();
            if(nPartitions > 1) {
              partitionMap = sample(nPartitions, conf, samplingType, samplingOptions);
            } else {
              partitionMap = PartitionMap.oneShardOpenedMap();
            }
            writeOutputMetadata(conf);


            TupleMRBuilder builder = createMRBuilder(nPartitions, conf);
            // Set a TupleOutput here instead of SQLiteOutput
            builder.setOutput(new Path(outputPath, OUT_STORE), new TupleOutputFormat(tableSchema),
                ITuple.class, NullWritable.class);
            executeViewGeneration(builder);
          }
        };
      } else {
        // ... otherwise a standard TablespaceGenerator is used.
        tablespaceViewBuilder = new TablespaceGenerator(tablespaceBuilder.build(), outPath, this.getClass());
      }


      tablespaceViewBuilder.generateView(getConf(), SamplingType.FULL_SCAN,
          new TupleSampler.FullScanSamplingOptions());
    }

View Full Code Here

TOP

Related Classes of com.splout.db.hadoop.TablespaceBuilder

com.splout.db.examples.PageCountsExample

com.splout.db.hadoop.TestTablespaceBuilder

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.