Package com.youtube.vitess.vtgate.integration.hadoop

Source Code of com.youtube.vitess.vtgate.integration.hadoop.MapReduceIT$TableMapper

package com.youtube.vitess.vtgate.integration.hadoop;

import com.google.common.collect.Lists;
import com.google.common.primitives.UnsignedLong;
import com.google.gson.Gson;

import com.youtube.vitess.vtgate.Exceptions.ConnectionException;
import com.youtube.vitess.vtgate.KeyspaceId;
import com.youtube.vitess.vtgate.Query;
import com.youtube.vitess.vtgate.VtGate;
import com.youtube.vitess.vtgate.hadoop.VitessInputFormat;
import com.youtube.vitess.vtgate.hadoop.writables.KeyspaceIdWritable;
import com.youtube.vitess.vtgate.hadoop.writables.RowWritable;
import com.youtube.vitess.vtgate.integration.util.TestEnv;
import com.youtube.vitess.vtgate.integration.util.Util;

import junit.extensions.TestSetup;
import junit.framework.TestSuite;

import org.apache.commons.codec.binary.Hex;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.HadoopTestCase;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.MapReduceTestUtil;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.junit.Test;

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
* Integration tests for MapReductions in Vitess. These tests use an in-process Hadoop cluster via
* {@link HadoopTestCase}. These tests are JUnit3 style because of this dependency. Vitess setup for
* these tests require at least one rdonly instance per shard.
*
*/
public class MapReduceIT extends HadoopTestCase {

  public static TestEnv testEnv = getTestEnv();
  private static Path outputPath = new Path("/tmp");

  public MapReduceIT() throws IOException {
    super(HadoopTestCase.LOCAL_MR, HadoopTestCase.LOCAL_FS, 1, 1);
  }

  @Override
  public void setUp() throws Exception {
    super.setUp();
    Util.createTable(testEnv);
  }

  @Test
  public void testSplitQuery() throws Exception {
    // Insert 20 rows per shard
    for (String shardName : testEnv.shardKidMap.keySet()) {
      Util.insertRowsInShard(testEnv, shardName, 20);
    }
    Util.waitForTablet("rdonly", 40, 3, testEnv);
    VtGate vtgate = VtGate.connect("localhost:" + testEnv.port, 0);
    Map<Query, Long> queries =
        vtgate.splitQuery("test_keyspace", "select id,keyspace_id from vtgate_test", 1);
    vtgate.close();

    // Verify 2 splits, one per shard
    assertEquals(2, queries.size());
    Set<String> shardsInSplits = new HashSet<>();
    for (Query q : queries.keySet()) {
      assertEquals("select id,keyspace_id from vtgate_test", q.getSql());
      assertEquals("test_keyspace", q.getKeyspace());
      assertEquals("rdonly", q.getTabletType());
      assertEquals(0, q.getBindVars().size());
      assertEquals(null, q.getKeyspaceIds());
      String start = Hex.encodeHexString(q.getKeyRanges().get(0).get("Start"));
      String end = Hex.encodeHexString(q.getKeyRanges().get(0).get("End"));
      shardsInSplits.add(start + "-" + end);
    }

    // Verify the keyrange queries in splits cover the entire keyspace
    assertTrue(shardsInSplits.containsAll(testEnv.shardKidMap.keySet()));
  }

  @Test
  public void testSplitQueryMultipleSplitsPerShard() throws Exception {
    int rowCount = 30;
    Util.insertRows(testEnv, 1, 30);
    List<String> expectedSqls =
        Lists.newArrayList("select id, keyspace_id from vtgate_test where id < 10",
            "select id, keyspace_id from vtgate_test where id < 11",
            "select id, keyspace_id from vtgate_test where id >= 10 and id < 19",
            "select id, keyspace_id from vtgate_test where id >= 11 and id < 19",
            "select id, keyspace_id from vtgate_test where id >= 19",
            "select id, keyspace_id from vtgate_test where id >= 19");
    Util.waitForTablet("rdonly", rowCount, 3, testEnv);
    VtGate vtgate = VtGate.connect("localhost:" + testEnv.port, 0);
    int splitsPerShard = 3;
    Map<Query, Long> queries = vtgate.splitQuery("test_keyspace",
        "select id,keyspace_id from vtgate_test", splitsPerShard);
    vtgate.close();

    // Verify 6 splits, 3 per shard
    assertEquals(2 * splitsPerShard, queries.size());
    Set<String> shardsInSplits = new HashSet<>();
    for (Query q : queries.keySet()) {
      String sql = q.getSql();
      assertTrue(expectedSqls.contains(sql));
      expectedSqls.remove(sql);
      assertEquals("test_keyspace", q.getKeyspace());
      assertEquals("rdonly", q.getTabletType());
      assertEquals(0, q.getBindVars().size());
      assertEquals(null, q.getKeyspaceIds());
      String start = Hex.encodeHexString(q.getKeyRanges().get(0).get("Start"));
      String end = Hex.encodeHexString(q.getKeyRanges().get(0).get("End"));
      shardsInSplits.add(start + "-" + end);
    }

    // Verify the keyrange queries in splits cover the entire keyspace
    assertTrue(shardsInSplits.containsAll(testEnv.shardKidMap.keySet()));
    assertTrue(expectedSqls.size() == 0);
  }

  @Test
  public void testSplitQueryInvalidTable() throws Exception {
    VtGate vtgate = VtGate.connect("localhost:" + testEnv.port, 0);
    try {
      vtgate.splitQuery("test_keyspace", "select id from invalid_table", 1);
      fail("failed to raise connection exception");
    } catch (ConnectionException e) {
      assertTrue(e.getMessage().contains("query validation error: can't find table in schema"));
    } finally {
      vtgate.close();
    }
  }

  /**
   * Run a mapper only MR job and verify all the rows in the source table were outputted into HDFS.
   */
  public void testDumpTableToHDFS() throws Exception {
    // Insert 20 rows per shard
    int rowsPerShard = 20;
    for (String shardName : testEnv.shardKidMap.keySet()) {
      Util.insertRowsInShard(testEnv, shardName, rowsPerShard);
    }
    Util.waitForTablet("rdonly", 40, 3, testEnv);
    // Configurations for the job, output from mapper as Text
    Configuration conf = createJobConf();
    Job job = new Job(conf);
    job.setJobName("table");
    job.setJarByClass(VitessInputFormat.class);
    job.setMapperClass(TableMapper.class);
    VitessInputFormat.setInput(job,
        "localhost:" + testEnv.port,
        testEnv.keyspace,
        "vtgate_test",
        Lists.newArrayList("keyspace_id", "name"),
        4);
    job.setOutputKeyClass(KeyspaceIdWritable.class);
    job.setOutputValueClass(RowWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(0);

    Path outDir = new Path(outputPath, "mrvitess/output");
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outDir)) {
      fs.delete(outDir, true);
    }
    FileOutputFormat.setOutputPath(job, outDir);

    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());

    String[] outputLines = MapReduceTestUtil.readOutput(outDir, conf).split("\n");
    // there should be one line per row in the source table
    assertEquals(rowsPerShard * 2, outputLines.length);
    Set<String> actualKids = new HashSet<>();
    Set<String> actualNames = new HashSet<>();

    // Rows are keyspace ids are written as JSON since this is
    // TextOutputFormat. Parse and verify we've gotten all the keyspace
    // ids and rows.
    Gson gson = new Gson();
    for (String line : outputLines) {
      System.out.println(line);
      String kidJson = line.split("\t")[0];
      Map<String, String> m = new HashMap<>();
      m = gson.fromJson(kidJson, m.getClass());
      actualKids.add(m.get("id"));

      String rowJson = line.split("\t")[1];
      Map<String, Map<String, Map<String, byte[]>>> map = new HashMap<>();
      map = gson.fromJson(rowJson, map.getClass());
      actualNames.add(new String(map.get("contents").get("name").get("value")));
    }

    Set<String> expectedKids = new HashSet<>();
    for (KeyspaceId kid : testEnv.getAllKeyspaceIds()) {
      expectedKids.add(((UnsignedLong) kid.getId()).toString());
    }
    assertEquals(expectedKids.size(), actualKids.size());
    assertTrue(actualKids.containsAll(expectedKids));

    Set<String> expectedNames = new HashSet<>();
    for (int i = 0; i < rowsPerShard; i++) {
      expectedNames.add("name_" + i);
    }

    assertEquals(rowsPerShard, actualNames.size());
    assertTrue(actualNames.containsAll(expectedNames));
  }

  /**
   * Map all rows and aggregate by keyspace id at the reducer.
   */
  @Test
  public void testReducerAggregateRows() throws Exception {
    int rowsPerShard = 20;
    for (String shardName : testEnv.shardKidMap.keySet()) {
      Util.insertRowsInShard(testEnv, shardName, rowsPerShard);
    }
    Util.waitForTablet("rdonly", 40, 3, testEnv);
    Configuration conf = createJobConf();

    Job job = new Job(conf);
    job.setJobName("table");
    job.setJarByClass(VitessInputFormat.class);
    job.setMapperClass(TableMapper.class);
    VitessInputFormat.setInput(job,
        "localhost:" + testEnv.port,
        testEnv.keyspace,
        "vtgate_test",
        Lists.newArrayList("keyspace_id", "name"),
        1);

    job.setMapOutputKeyClass(KeyspaceIdWritable.class);
    job.setMapOutputValueClass(RowWritable.class);

    job.setReducerClass(CountReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    Path outDir = new Path(outputPath, "mrvitess/output");
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outDir)) {
      fs.delete(outDir, true);
    }
    FileOutputFormat.setOutputPath(job, outDir);

    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());

    String[] outputLines = MapReduceTestUtil.readOutput(outDir, conf).split("\n");
    assertEquals(6, outputLines.length);
    int totalRowsReduced = 0;
    for (String line : outputLines) {
      totalRowsReduced += Integer.parseInt(line);
    }
    assertEquals(rowsPerShard * 2, totalRowsReduced);
  }

  public static class TableMapper extends
      Mapper<KeyspaceIdWritable, RowWritable, KeyspaceIdWritable, RowWritable> {
    @Override
    public void map(KeyspaceIdWritable key, RowWritable value, Context context) throws IOException,
        InterruptedException {
      context.write(key, value);
    }
  }

  public static class CountReducer extends
      Reducer<KeyspaceIdWritable, RowWritable, NullWritable, LongWritable> {
    @Override
    public void reduce(KeyspaceIdWritable key, Iterable<RowWritable> values, Context context)
        throws IOException, InterruptedException {
      long count = 0;
      Iterator<RowWritable> iter = values.iterator();
      while (iter.next() != null) {
        count++;
      }
      context.write(NullWritable.get(), new LongWritable(count));
    }
  }

  /**
   * Create env with two shards each having a master and rdonly
   */
  static TestEnv getTestEnv() {
    Map<String, List<String>> shardKidMap = new HashMap<>();
    shardKidMap.put("-80",
        Lists.newArrayList("527875958493693904", "626750931627689502", "345387386794260318"));
    shardKidMap.put("80-",
        Lists.newArrayList("9767889778372766922", "9742070682920810358", "10296850775085416642"));
    TestEnv env = new TestEnv(shardKidMap, "test_keyspace");
    env.addTablet("rdonly", 1);
    return env;
  }

  public static TestSetup suite() {
    return new TestSetup(new TestSuite(MapReduceIT.class)) {

      @Override
      protected void setUp() throws Exception {
        Util.setupTestEnv(testEnv, true);
      }

      @Override
      protected void tearDown() throws Exception {
        Util.setupTestEnv(testEnv, false);
      }
    };

  }
}
TOP

Related Classes of com.youtube.vitess.vtgate.integration.hadoop.MapReduceIT$TableMapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.