Package org.kitesdk.data.spi.filesystem

Source Code of org.kitesdk.data.spi.filesystem.TestPartitionedDatasetWriter

/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.spi.filesystem;

import com.google.common.collect.Sets;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.generic.GenericRecord;
import org.junit.Assert;
import org.kitesdk.data.Dataset;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.DatasetWriter;
import org.kitesdk.data.Datasets;
import org.kitesdk.data.PartitionStrategy;
import com.google.common.io.Closeables;
import com.google.common.io.Files;
import java.io.IOException;
import org.apache.avro.generic.GenericData.Record;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.kitesdk.data.TestHelpers;
import org.kitesdk.data.View;

import static org.kitesdk.data.spi.filesystem.DatasetTestUtilities.USER_SCHEMA;

public class TestPartitionedDatasetWriter {

  private Configuration conf;
  private FileSystem fileSystem;
  private Path testDirectory;
  private FileSystemDatasetRepository repo;
  private PartitionedDatasetWriter<Object> writer;

  @Before
  public void setUp() throws IOException {
    this.conf = new Configuration();
    this.fileSystem = FileSystem.get(conf);
    this.testDirectory = new Path(Files.createTempDir().getAbsolutePath());
    this.repo = new FileSystemDatasetRepository(conf, testDirectory);

    PartitionStrategy partitionStrategy = new PartitionStrategy.Builder()
        .hash("username", 2).build();
    FileSystemDataset<Object> users = (FileSystemDataset<Object>) repo.create(
        "ns", "users",
        new DatasetDescriptor.Builder()
            .schema(USER_SCHEMA)
            .partitionStrategy(partitionStrategy)
            .build());
    writer = new PartitionedDatasetWriter<Object>(
        new FileSystemView<Object>(users, Object.class));
  }

  @After
  public void tearDown() throws IOException {
    fileSystem.delete(testDirectory, true);
  }

  @Test
  public void testBasicInitClose() throws IOException {
    writer.initialize();
    writer.close();
  }

  @Test
  public void testWriter() throws IOException {
    Record record = new GenericRecordBuilder(USER_SCHEMA)
        .set("username", "test1").set("email", "a@example.com").build();
    try {
      writer.initialize();
      writer.write(record);
      writer.flush();
      writer.close();
    } finally {
      Closeables.close(writer, true);
    }
  }

  @Test(expected = IllegalStateException.class)
  public void testWriteToClosedWriterFails() throws IOException {
    Record record = new GenericRecordBuilder(USER_SCHEMA)
        .set("username", "test1").set("email", "a@example.com").build();
    writer.initialize();
    writer.close();
    writer.write(record);
  }

  @Test
  public void testProvidedPartitioner() throws IOException {
    Schema user = SchemaBuilder.record("User").fields()
        .requiredString("username")
        .requiredString("email")
        .endRecord();
    PartitionStrategy strategy = new PartitionStrategy.Builder()
        .provided("version", "int")
        .build();
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(user)
        .partitionStrategy(strategy)
        .build();

    Path datasetPath = new Path("file:" + testDirectory + "/provided/users");

    final Dataset<GenericRecord> users = Datasets.create(
        "dataset:" + datasetPath, descriptor);

    final GenericRecord u1 = new GenericRecordBuilder(user)
        .set("username", "test1")
        .set("email", "a@example.com")
        .build();
    GenericRecord u2 = new GenericRecordBuilder(user)
        .set("username", "test2")
        .set("email", "b@example.com")
        .build();

    TestHelpers.assertThrows("Should reject write with unknown version",
        IllegalArgumentException.class, new Runnable() {
          @Override
          public void run() {
            writeToView(users, u1);
          }
        });

    Assert.assertFalse(fileSystem.exists(new Path(datasetPath, "version=6")));
    writeToView(users.with("version", 6), u1);
    Assert.assertTrue(fileSystem.exists(new Path(datasetPath, "version=6")));

    Assert.assertFalse(fileSystem.exists(new Path(datasetPath, "version=7")));
    writeToView(Datasets.load("view:" + datasetPath + "?version=7"), u2);
    Assert.assertTrue(fileSystem.exists(new Path(datasetPath, "version=7")));

    Assert.assertEquals("Should read from provided partitions without view",
        Sets.newHashSet(u1, u2), DatasetTestUtilities.materialize(users));

    Assert.assertEquals("Should read from provided partition",
        Sets.newHashSet(u1),
        DatasetTestUtilities.materialize(users.with("version", 6)));

    Assert.assertEquals("Should read from provided partition",
        Sets.newHashSet(u2),
        DatasetTestUtilities.materialize(users.with("version", 7)));
  }

  private static <E> void writeToView(View<E> view, E... entities) {
    DatasetWriter<E> writer = null;
    try {
      writer = view.newWriter();
      for (E entity : entities) {
        writer.write(entity);
      }
      writer.close();
    } finally {
      if (writer != null) {
        writer.close();
      }
    }
  }

}
TOP

Related Classes of org.kitesdk.data.spi.filesystem.TestPartitionedDatasetWriter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.