Package org.apache.hadoop.zebra.pig

Source Code of org.apache.hadoop.zebra.pig.TestBasicUnion

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.zebra.pig;

import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;

import junit.framework.Assert;
import junit.framework.TestCase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.zebra.io.BasicTable;
import org.apache.hadoop.zebra.io.TableInserter;
import org.apache.hadoop.zebra.io.TableScanner;
import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
import org.apache.hadoop.zebra.types.ParseException;
import org.apache.hadoop.zebra.types.Schema;
import org.apache.hadoop.zebra.types.TypesUtils;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.Tuple;
import org.apache.pig.test.MiniCluster;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

/**
* Note:
*
* Make sure you add the build/pig-0.1.0-dev-core.jar to the Classpath of the
* app/debug configuration, when run this from inside the Eclipse.
*
*/
public class TestBasicUnion {
  protected static ExecType execType = ExecType.MAPREDUCE;
  private static MiniCluster cluster;
  protected static PigServer pigServer;
  private static Path pathWorking, pathTable1, pathTable2, pathTable3,
      pathTable4, pathTable5;
  private static Configuration conf;
  final static String STR_SCHEMA1 = "a:string,b,c:string,e,f";
  final static String STR_STORAGE1 = "[a];[c]";
  final static String STR_SCHEMA2 = "a:string,b,d:string,f,e";
  final static String STR_STORAGE2 = "[a,b];[d]";
  final static String STR_SCHEMA3 = "b,a";
  final static String STR_STORAGE3 = "[a];[b]";
  final static String STR_SCHEMA4 = "b:string,a,c:string";
  final static String STR_STORAGE4 = "[a,b];[c]";
  final static String STR_SCHEMA5 = "b,a:string";
  final static String STR_STORAGE5 = "[a,b]";

  @BeforeClass
  public static void setUpOnce() throws Exception {
    if (System.getProperty("hadoop.log.dir") == null) {
      String base = new File(".").getPath(); // getAbsolutePath();
      System
          .setProperty("hadoop.log.dir", new Path(base).toString() + "./logs");
    }

    if (execType == ExecType.MAPREDUCE) {
      cluster = MiniCluster.buildCluster();
      pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    } else {
      pigServer = new PigServer(ExecType.LOCAL);
    }

    conf = new Configuration();
    FileSystem fs = cluster.getFileSystem();
    pathWorking = fs.getWorkingDirectory();

    /*
     * create 1st basic table;
     */
    pathTable1 = new Path(pathWorking, "1");
    System.out.println("pathTable1 =" + pathTable1);

    BasicTable.Writer writer = new BasicTable.Writer(pathTable1, STR_SCHEMA1,
        STR_STORAGE1, false, conf);
    Schema schema = writer.getSchema();
    Tuple tuple = TypesUtils.createTuple(schema);

    final int numsBatch = 10;
    final int numsInserters = 2;
    TableInserter[] inserters = new TableInserter[numsInserters];
    for (int i = 0; i < numsInserters; i++) {
      inserters[i] = writer.getInserter("ins" + i, false);
    }

    for (int b = 0; b < numsBatch; b++) {
      for (int i = 0; i < numsInserters; i++) {
        TypesUtils.resetTuple(tuple);
        for (int k = 0; k < tuple.size(); ++k) {
          try {
            tuple.set(k, b + "_" + i + "" + k);
          } catch (ExecException e) {
            e.printStackTrace();
          }
        }// k
        inserters[i].insert(new BytesWritable(("key1" + i).getBytes()), tuple);
      }// i
    }// b
    for (int i = 0; i < numsInserters; i++) {
      inserters[i].close();
    }

    /*
     * create 2nd basic table;
     */
    pathTable2 = new Path(pathWorking, "2");
    System.out.println("pathTable2 =" + pathTable2);

    writer = new BasicTable.Writer(pathTable2, STR_SCHEMA2, STR_STORAGE2,
        false, conf);
    schema = writer.getSchema();
    tuple = TypesUtils.createTuple(schema);

    inserters = new TableInserter[numsInserters];
    for (int i = 0; i < numsInserters; i++) {
      inserters[i] = writer.getInserter("ins" + i, false);
    }

    for (int b = 0; b < numsBatch; b++) {
      for (int i = 0; i < numsInserters; i++) {
        TypesUtils.resetTuple(tuple);
        for (int k = 0; k < tuple.size(); ++k) {
          try {
            tuple.set(k, b + "_" + i + "" + k);
          } catch (ExecException e) {
            e.printStackTrace();
          }
        }
        inserters[i].insert(new BytesWritable(("key2" + i).getBytes()), tuple);
      }
    }
    for (int i = 0; i < numsInserters; i++) {
      inserters[i].close();
    }

    /*
     * create 3rd basic table;
     */
    pathTable3 = new Path(pathWorking, "3");
    System.out.println("pathTable3 =" + pathTable3);

    writer = new BasicTable.Writer(pathTable3, STR_SCHEMA3, STR_STORAGE3,
        false, conf);
    schema = writer.getSchema();
    tuple = TypesUtils.createTuple(schema);

    inserters = new TableInserter[numsInserters];
    for (int i = 0; i < numsInserters; i++) {
      inserters[i] = writer.getInserter("ins" + i, false);
    }

    for (int b = 0; b < numsBatch; b++) {
      for (int i = 0; i < numsInserters; i++) {
        TypesUtils.resetTuple(tuple);
        for (int k = 0; k < tuple.size(); ++k) {
          try {
            tuple.set(k, b + "_" + i + "" + k);
          } catch (ExecException e) {
            e.printStackTrace();
          }
        }
        inserters[i].insert(new BytesWritable(("key3" + i).getBytes()), tuple);
      }
    }
    for (int i = 0; i < numsInserters; i++) {
      inserters[i].close();
    }
    /*
     * create 4th basic table;
     */
    pathTable4 = new Path(pathWorking, "4");
    System.out.println("pathTable4 =" + pathTable4);

    writer = new BasicTable.Writer(pathTable4, STR_SCHEMA4, STR_STORAGE4,
        false, conf);
    schema = writer.getSchema();
    tuple = TypesUtils.createTuple(schema);

    inserters = new TableInserter[numsInserters];
    for (int i = 0; i < numsInserters; i++) {
      inserters[i] = writer.getInserter("ins" + i, false);
    }

    for (int b = 0; b < numsBatch; b++) {
      for (int i = 0; i < numsInserters; i++) {
        TypesUtils.resetTuple(tuple);
        for (int k = 0; k < tuple.size(); ++k) {
          try {
            tuple.set(k, b + "_" + i + "" + k);
          } catch (ExecException e) {
            e.printStackTrace();
          }
        }
        inserters[i].insert(new BytesWritable(("key4" + i).getBytes()), tuple);
      }
    }
    for (int i = 0; i < numsInserters; i++) {
      inserters[i].close();
    }
    /*
     * create 5th basic table;
     */
    pathTable5 = new Path(pathWorking, "5");
    System.out.println("pathTable5 =" + pathTable5);

    writer = new BasicTable.Writer(pathTable5, STR_SCHEMA5, STR_STORAGE5,
        false, conf);
    schema = writer.getSchema();
    tuple = TypesUtils.createTuple(schema);

    inserters = new TableInserter[numsInserters];
    for (int i = 0; i < numsInserters; i++) {
      inserters[i] = writer.getInserter("ins" + i, false);
    }

    for (int b = 0; b < numsBatch; b++) {
      for (int i = 0; i < numsInserters; i++) {
        TypesUtils.resetTuple(tuple);
        for (int k = 0; k < tuple.size(); ++k) {
          try {
            tuple.set(k, b + "_" + i + "" + k);
          } catch (ExecException e) {
            e.printStackTrace();
          }
        }
        inserters[i].insert(new BytesWritable(("key5" + i).getBytes()), tuple);
      }
    }
    for (int i = 0; i < numsInserters; i++) {
      inserters[i].close();
    }

  }

  @AfterClass
  public static void tearDownOnce() throws Exception {
    pigServer.shutdown();
  }

  // all fields
  public void testReader1() throws ExecException, IOException {
    /*
     * remove hdfs prefix part like "hdfs://localhost.localdomain:42540" pig
     * will fill that in.
     */
    String str1 = pathTable1.toString().substring(
        pathTable1.toString().indexOf("/", 7), pathTable1.toString().length());
    String str2 = pathTable2.toString().substring(
        pathTable2.toString().indexOf("/", 7), pathTable2.toString().length());
    String query = "records = LOAD '" + str1 + "," + str2
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d');";
    System.out.println(query);
    // records = LOAD '/user/jing1234/1,/user/jing1234/2' USING
    // org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d');

    pigServer.registerQuery(query);
    Iterator<Tuple> it = pigServer.openIterator("records");

    int i = 0;
    int k = -1;
    Tuple cur = null;
    int t = -1;
    int j = -1;

    while (it.hasNext()) {
      cur = it.next();

      System.out.println("cur: " + cur);
      // first table
      if (i <= 9) {
        System.out.println("first table first part: " + cur.toString());
        Assert.assertEquals(i + "_00", cur.get(0));
        Assert.assertEquals(i + "_01", cur.get(1));
        Assert.assertEquals(i + "_02", cur.get(2));
        Assert.assertEquals(null, cur.get(3));
      }
      if (i >= 10) {
        k++;
      }
      if (k <= 9 && k >= 0) {
        System.out.println("first table second part:  : " + cur.toString());
        Assert.assertEquals(k + "_10", cur.get(0));
        Assert.assertEquals(k + "_11", cur.get(1));
        Assert.assertEquals(k + "_12", cur.get(2));
        Assert.assertEquals(null, cur.get(3));
      }

      // second table
      if (k >= 10) {
        t++;
      }
      if (t <= 9 && t >= 0) {
        System.out.println("second table first part: " + cur.toString());
        Assert.assertEquals(t + "_00", cur.get(0));
        Assert.assertEquals(t + "_01", cur.get(1));
        Assert.assertEquals(null, cur.get(2));
        Assert.assertEquals(t + "_02", cur.get(3));
      }
      if (t >= 10) {
        j++;
      }
      if (j <= 9 && j >= 0) {
        System.out.println("second table first part: " + cur.toString());
        Assert.assertEquals(j + "_10", cur.get(0));
        Assert.assertEquals(j + "_11", cur.get(1));
        Assert.assertEquals(null, cur.get(2));
        Assert.assertEquals(j + "_12", cur.get(3));
      }
      i++;
    }// while
    Assert.assertEquals(40, i);
  }

  public void testReaderThroughIO() throws ExecException, IOException,
      ParseException {

    String projection1 = new String("a,b,c");
    BasicTable.Reader reader = new BasicTable.Reader(pathTable1, conf);
    reader.setProjection(projection1);
    List<RangeSplit> splits = reader.rangeSplit(1);
    TableScanner scanner = reader.getScanner(splits.get(0), true);
    scanner = reader.getScanner(splits.get(0), true);
    BytesWritable key = new BytesWritable();
    Tuple RowValue = TypesUtils.createTuple(scanner.getSchema());

    scanner.getKey(key);
    // Assert.assertEquals(key, new BytesWritable("k11".getBytes()));

    System.out.println("read record or record:" + RowValue.toString());

    for (int i = 0; i <= 9; i++) {
      scanner.getValue(RowValue);
      System.out.println("read record or record:" + RowValue.toString());
      Assert.assertEquals(i + "_00", RowValue.get(0));
      Assert.assertEquals(i + "_01", RowValue.get(1));
      Assert.assertEquals(i + "_02", RowValue.get(2));
      scanner.advance();
    }
    for (int i = 0; i <= 9; i++) {
      scanner.getValue(RowValue);
      System.out.println("read record or record:" + RowValue.toString());
      Assert.assertEquals(i + "_10", RowValue.get(0));
      Assert.assertEquals(i + "_11", RowValue.get(1));
      Assert.assertEquals(i + "_12", RowValue.get(2));
      scanner.advance();
    }

    reader.close();
  }

  // all fields
  public void testReader2() throws ExecException, IOException {

    String str1 = pathTable1.toString().substring(
        pathTable1.toString().indexOf("/", 7), pathTable1.toString().length());
    String str2 = pathTable2.toString().substring(
        pathTable2.toString().indexOf("/", 7), pathTable2.toString().length());
    String query = "records = LOAD '" + str1 + "," + str2
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('c');";
    System.out.println(query);

    pigServer.registerQuery(query);
    Iterator<Tuple> it = pigServer.openIterator("records");

    int i = 0;
    int k = -1;
    Tuple cur = null;
    int t = -1;
    int j = -1;

    while (it.hasNext()) {
      cur = it.next();

      System.out.println("cur: " + cur);
      if (i <= 9) {
        System.out.println("first table first part: " + cur.toString());

        Assert.assertEquals(i + "_02", cur.get(0));
        try {
          cur.get(1);
          Assert.fail("should throw index out of bound exception ");
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
      if (i >= 10) {
        k++;
      }
      if (k <= 9 && k >= 0) {
        System.out.println("first table second part:  : " + cur.toString());
        Assert.assertEquals(k + "_12", cur.get(0));
        try {
          cur.get(1);
          Assert.fail("should throw index out of bound exception ");
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
      if (k >= 10) {
        t++;
      }
      if (t <= 9 && t >= 0) {
        System.out.println("second table first part: " + cur.toString());
        Assert.assertEquals(null, cur.get(0));
        try {
          cur.get(1);
          Assert.fail("should throw index out of bound exception ");
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
      if (t >= 10) {
        j++;
      }
      if (j <= 9 && j >= 0) {
        System.out.println("second table first part: " + cur.toString());

        Assert.assertEquals(null, cur.get(0));
        try {
          cur.get(1);
          Assert.fail("should throw index out of bound exception ");
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
      i++;
    }// while
    Assert.assertEquals(40, i);
  }

  // projection for common exist colum a
  public void testReader3() throws ExecException, IOException {
    String str1 = pathTable1.toString().substring(
        pathTable1.toString().indexOf("/", 7), pathTable1.toString().length());
    String str2 = pathTable2.toString().substring(
        pathTable2.toString().indexOf("/", 7), pathTable2.toString().length());
    String query = "records = LOAD '" + str1 + "," + str2
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('a');";
    System.out.println(query);

    pigServer.registerQuery(query);
    Iterator<Tuple> it = pigServer.openIterator("records");

    int i = 0;
    int k = -1;
    Tuple cur = null;
    int t = -1;
    int j = -1;

    while (it.hasNext()) {
      cur = it.next();

      System.out.println("cur: " + cur);
      // first table
      if (i <= 9) {
        System.out.println("first table first part: " + cur.toString());
        Assert.assertEquals(i + "_00", cur.get(0));
        try {
          cur.get(1);
          Assert.fail("should throw index out of bound exception ");
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
      if (i >= 10) {
        k++;
      }
      if (k <= 9 && k >= 0) {
        System.out.println("first table second part:  : " + cur.toString());
        Assert.assertEquals(k + "_10", cur.get(0));
        try {
          cur.get(1);
          Assert.fail("should throw index out of bound exception ");
        } catch (Exception e) {
          e.printStackTrace();
        }
      }

      // second table
      if (k >= 10) {
        t++;
      }
      if (t <= 9 && t >= 0) {
        System.out.println("second table first part: " + cur.toString());
        Assert.assertEquals(t + "_00", cur.get(0));
        try {
          cur.get(1);
          Assert.fail("should throw index out of bound exception ");
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
      if (t >= 10) {
        j++;
      }
      if (j <= 9 && j >= 0) {
        System.out.println("second table first part: " + cur.toString());
        Assert.assertEquals(j + "_10", cur.get(0));
        try {
          cur.get(1);
          Assert.fail("should throw index out of bound exception ");
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
      i++;
    }// while
    Assert.assertEquals(40, i);
  }

  // some common fields
  public void testReader4() throws ExecException, IOException {

    String str1 = pathTable1.toString().substring(
        pathTable1.toString().indexOf("/", 7), pathTable1.toString().length());
    String str2 = pathTable2.toString().substring(
        pathTable2.toString().indexOf("/", 7), pathTable2.toString().length());
    String query = "records = LOAD '" + str1 + "," + str2
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b');";
    System.out.println(query);
    // records = LOAD '/user/jing1234/1,/user/jing1234/2' USING
    // org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d');

    pigServer.registerQuery(query);
    Iterator<Tuple> it = pigServer.openIterator("records");

    int i = 0;
    int k = -1;
    Tuple cur = null;
    int t = -1;
    int j = -1;

    while (it.hasNext()) {
      cur = it.next();

      System.out.println("cur: " + cur);
      // first table
      if (i <= 9) {
        System.out.println("first table first part: " + cur.toString());
        Assert.assertEquals(i + "_00", cur.get(0));
        Assert.assertEquals(i + "_01", cur.get(1));
        try {
          cur.get(2);
          Assert.fail("should throw index out of bound exception");
        } catch (Exception e) {
          e.printStackTrace();
        }

      }
      if (i >= 10) {
        k++;
      }
      if (k <= 9 && k >= 0) {
        System.out.println("first table second part:  : " + cur.toString());
        Assert.assertEquals(k + "_10", cur.get(0));
        Assert.assertEquals(k + "_11", cur.get(1));
        try {
          cur.get(2);
          Assert.fail("should throw index out of bound exception");
        } catch (Exception e) {
          e.printStackTrace();
        }
      }

      // second table
      if (k >= 10) {
        t++;
      }
      if (t <= 9 && t >= 0) {
        System.out.println("second table first part: " + cur.toString());
        Assert.assertEquals(t + "_00", cur.get(0));
        Assert.assertEquals(t + "_01", cur.get(1));
        try {
          cur.get(2);
          Assert.fail("should throw index out of bound exception");
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
      if (t >= 10) {
        j++;
      }
      if (j <= 9 && j >= 0) {
        System.out.println("second table first part: " + cur.toString());
        Assert.assertEquals(j + "_10", cur.get(0));
        Assert.assertEquals(j + "_11", cur.get(1));
        try {
          cur.get(2);
          Assert.fail("should throw index out of bound exception");
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
      i++;
    }// while
    Assert.assertEquals(40, i);
  }

  // common column, but different posion
  public void testReader5() throws ExecException, IOException {
    String str1 = pathTable1.toString().substring(
        pathTable1.toString().indexOf("/", 7), pathTable1.toString().length());
    String str2 = pathTable2.toString().substring(
        pathTable2.toString().indexOf("/", 7), pathTable2.toString().length());
    String query = "records = LOAD '" + str1 + "," + str2
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('e, f');";
    System.out.println(query);
    // records = LOAD '/user/jing1234/1,/user/jing1234/2' USING
    // org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d');

    pigServer.registerQuery(query);
    Iterator<Tuple> it = pigServer.openIterator("records");

    int i = 0;
    int k = -1;
    Tuple cur = null;
    int t = -1;
    int j = -1;

    while (it.hasNext()) {
      cur = it.next();

      System.out.println("cur: " + cur);
      // first table
      if (i <= 9) {
        System.out.println("first table first part: " + cur.toString());
        Assert.assertEquals(i + "_03", cur.get(0));
        Assert.assertEquals(i + "_04", cur.get(1));
        try {
          cur.get(2);
          Assert.fail("should throw out of index bound exception");
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
      if (i >= 10) {
        k++;
      }
      if (k <= 9 && k >= 0) {
        System.out.println("first table second part:  : " + cur.toString());
        Assert.assertEquals(k + "_13", cur.get(0));
        Assert.assertEquals(k + "_14", cur.get(1));

      }

      // second table
      if (k >= 10) {
        t++;
      }
      if (t <= 9 && t >= 0) {
        System.out.println("second table first part: " + cur.toString());
        Assert.assertEquals(t + "_04", cur.get(0));
        Assert.assertEquals(t + "_03", cur.get(1));
      }
      if (t >= 10) {
        j++;
      }
      if (j <= 9 && j >= 0) {
        System.out.println("second table first part: " + cur.toString());
        Assert.assertEquals(j + "_14", cur.get(0));
        Assert.assertEquals(j + "_13", cur.get(1));
      }
      i++;
    }// while
    Assert.assertEquals(40, i);
  }

  @Test
  // union two tables with different column numbers and column positions
  public void testReader6() throws ExecException, IOException {
    String str1 = pathTable1.toString().substring(
        pathTable1.toString().indexOf("/", 7), pathTable1.toString().length());
    String str5 = pathTable2.toString().substring(
        pathTable5.toString().indexOf("/", 7), pathTable5.toString().length());
    String query = "records = LOAD '" + str1 + "," + str5
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('b,a');";
    System.out.println(query);

    pigServer.registerQuery(query);
    Iterator<Tuple> it = pigServer.openIterator("records");

    int i = 0;
    int k = -1;
    Tuple cur = null;
    int t = -1;
    int j = -1;

    while (it.hasNext()) {
      cur = it.next();

      System.out.println("cur: " + cur);
      // first table
      if (i <= 9) {
        System.out.println("first table first part: " + cur.toString());
        Assert.assertEquals(i + "_01", cur.get(0));
        Assert.assertEquals(i + "_00", cur.get(1));
        try {
          cur.get(2);
          Assert.fail("should throw index out of bound exception");
        } catch (Exception e) {
          e.printStackTrace();
        }

      }
      if (i >= 10) {
        k++;
      }
      if (k <= 9 && k >= 0) {
        System.out.println("first table second part:  : " + cur.toString());
        Assert.assertEquals(k + "_11", cur.get(0));
        Assert.assertEquals(k + "_10", cur.get(1));
        try {
          cur.get(2);
          Assert.fail("should throw index out of bound exception");
        } catch (Exception e) {
          e.printStackTrace();
        }

      }

      // second table
      if (k >= 10) {
        t++;
      }
      if (t <= 9 && t >= 0) {
        System.out.println("second table first part: " + cur.toString());
        Assert.assertEquals(t + "_01", cur.get(0));
        Assert.assertEquals(t + "_00", cur.get(1));
        try {
          cur.get(2);
          Assert.fail("should throw index out of bound exception");
        } catch (Exception e) {
          e.printStackTrace();
        }

      }
      if (t >= 10) {
        j++;
      }
      if (j <= 9 && j >= 0) {
        System.out.println("second table first part: " + cur.toString());
        Assert.assertEquals(j + "_11", cur.get(0));
        Assert.assertEquals(j + "_10", cur.get(1));
        try {
          cur.get(2);
          Assert.fail("should throw index out of bound exception");
        } catch (Exception e) {
          e.printStackTrace();
        }

      }
      i++;
    }// while
    Assert.assertEquals(40, i);
  }

  // both paths is hdfs://...
  public void testNeg1() throws ExecException, IOException {
    String str1 = pathTable1.toString();
    String str2 = pathTable2.toString();
    String query = "records = LOAD '" + str1 + "," + str2
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('a,b,c,d');";
    System.out.println(query);
    // records = LOAD
    // 'hdfs://localhost.localdomain:39125/user/jing1234/1,hdfs://localhost.localdomain:39125/user/jing1234/2'
    // USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d');
    pigServer.registerQuery(query);

    Iterator<Tuple> it = pigServer.openIterator("records");

    int cnt = 0;
    Tuple cur = it.next();
    cnt++;
    while (it.hasNext()) {
      cur = it.next();
      System.out.println(cur);
      cnt++;
      if (cnt == 1) {
        Assert.assertEquals("0_00", cur.get(0));
        Assert.assertEquals("0_01", cur.get(1));
        Assert.assertEquals("0_02", cur.get(2));
        Assert.assertEquals(null, cur.get(3));
      }
      if (cnt == 21) {
        Assert.assertEquals("0_00", cur.get(0));
        Assert.assertEquals("0_01", cur.get(1));
        Assert.assertEquals(null, cur.get(2));
        Assert.assertEquals("0_02", cur.get(3));
      }
    }
    Assert.assertEquals(cnt, 40);
  }

  // non-existing column
  public void testNeg2() throws ExecException, IOException {
    String str1 = pathTable1.toString().substring(
        pathTable1.toString().indexOf("/", 7), pathTable1.toString().length());
    String str2 = pathTable2.toString().substring(
        pathTable2.toString().indexOf("/", 7), pathTable2.toString().length());
    String query = "records = LOAD '" + str1 + "," + str2
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('a,f');";

    System.out.println(query);
    // records = LOAD
    // 'hdfs://localhost.localdomain:39125/user/jing1234/1,hdfs://localhost.localdomain:39125/user/jing1234/2'
    // USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d');
    pigServer.registerQuery(query);

    Iterator<Tuple> it = pigServer.openIterator("records");

    int cnt = 0;
    Tuple cur = it.next();
    cnt++;
    while (it.hasNext()) {
      cur = it.next();
      System.out.println(cur);
      cnt++;
      if (cnt == 1) {
        Assert.assertEquals("0_00", cur.get(0));
        Assert.assertEquals("0_01", cur.get(1));
        Assert.assertEquals("0_02", cur.get(2));
        Assert.assertEquals(null, cur.get(3));
      }
      if (cnt == 21) {
        Assert.assertEquals("0_00", cur.get(0));
        Assert.assertEquals("0_01", cur.get(1));
        Assert.assertEquals(null, cur.get(2));
        Assert.assertEquals("0_02", cur.get(3));
      }
    }
    Assert.assertEquals(cnt, 40);
  }

  @Test
  // 2 table with same column name but different type and different position.
  // should throw exception
  public void testNeg3() throws ExecException, IOException {
    String str1 = pathTable1.toString().substring(
        pathTable1.toString().indexOf("/", 7), pathTable1.toString().length());
    String str3 = pathTable3.toString().substring(
        pathTable3.toString().indexOf("/", 7), pathTable3.toString().length());
    String query = "records = LOAD '" + str1 + "," + str3
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b');";
    System.out.println(query);
    // records = LOAD '/user/jing1234/1,/user/jing1234/3' USING
    // org.apache.hadoop.zebra.pig.TableLoader('a,b');
    try {
      pigServer.registerQuery(query);
      Assert.fail("should throw exception");
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

  @Test
  // union table1 and table4. they have same culumn name with differnt types ,
  // should throw excepiton in union
  public void testNeg4() throws ExecException, IOException {

    String str1 = pathTable1.toString().substring(
        pathTable1.toString().indexOf("/", 7), pathTable1.toString().length());
    String str4 = pathTable4.toString().substring(
        pathTable4.toString().indexOf("/", 7), pathTable4.toString().length());
    String query = "records = LOAD '" + str1 + "," + str4
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c');";
    System.out.println(query);
    try {
      pigServer.registerQuery(query);
      Assert.fail("should throw exception");
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
}
TOP

Related Classes of org.apache.hadoop.zebra.pig.TestBasicUnion

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.