Examples of DataBag


Examples of org.apache.pig.data.DataBag

    map.put("a", "x");
    map.put("b", "y");
    map.put("c", "z");
    tuple.set(2, map);

    DataBag bagColl = TypesUtils.createBag();
    Schema schColl = schema.getColumn(3).getSchema();
    Tuple tupColl1 = TypesUtils.createTuple(schColl);
    Tuple tupColl2 = TypesUtils.createTuple(schColl);
    byte[] abs1 = new byte[3];
    byte[] abs2 = new byte[4];
    tupColl1.set(0, 3.1415926);
    tupColl1.set(1, 1.6);
    abs1[0] = 11;
    abs1[1] = 12;
    abs1[2] = 13;
    tupColl1.set(2, new DataByteArray(abs1));
    bagColl.add(tupColl1);
    tupColl2.set(0, 123.456789);
    tupColl2.set(1, 100);
    abs2[0] = 21;
    abs2[1] = 22;
    abs2[2] = 23;
    abs2[3] = 24;
    tupColl2.set(2, new DataByteArray(abs2));
    bagColl.add(tupColl2);
    tuple.set(3, bagColl);

    int row = 0;
    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
        .getBytes()), tuple);

    // row 2
    row++;
    TypesUtils.resetTuple(tuple);
    TypesUtils.resetTuple(tupRecord);
    map.clear();
    tuple.set(0, false);
    tupRecord.set(0, 2);
    tupRecord.set(1, 1002L);
    tuple.set(1, tupRecord);
    map.put("boy", "girl");
    map.put("adam", "amy");
    map.put("bob", "becky");
    map.put("carl", "cathy");
    tuple.set(2, map);
    bagColl.clear();
    TypesUtils.resetTuple(tupColl1);
    TypesUtils.resetTuple(tupColl2);
    tupColl1.set(0, 7654.321);
    tupColl1.set(1, 0.0001);
    abs1[0] = 31;
    abs1[1] = 32;
    abs1[2] = 33;
    tupColl1.set(2, new DataByteArray(abs1));
    bagColl.add(tupColl1);
    tupColl2.set(0, 0.123456789);
    tupColl2.set(1, 0.3333);
    abs2[0] = 41;
    abs2[1] = 42;
    abs2[2] = 43;
    abs2[3] = 44;
    tupColl2.set(2, new DataByteArray(abs2));
    bagColl.add(tupColl2);
    tuple.set(3, bagColl);
    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
        .getBytes()), tuple);

    inserter.close();
View Full Code Here

Examples of org.apache.pig.data.DataBag

    map.put("a", "x");
    map.put("b", "y");
    map.put("c", "z");
    tuple.set(2, map);

    DataBag bagColl = TypesUtils.createBag();
    Schema schColl = schema.getColumn(3).getSchema();
    Tuple tupColl1 = TypesUtils.createTuple(schColl);
    Tuple tupColl2 = TypesUtils.createTuple(schColl);
    byte[] abs1 = new byte[3];
    byte[] abs2 = new byte[4];
    tupColl1.set(0, 3.1415926);
    tupColl1.set(1, 1.6);
    abs1[0] = 11;
    abs1[1] = 12;
    abs1[2] = 13;
    tupColl1.set(2, new DataByteArray(abs1));
    bagColl.add(tupColl1);
    tupColl2.set(0, 123.456789);
    tupColl2.set(1, 100);
    abs2[0] = 21;
    abs2[1] = 22;
    abs2[2] = 23;
    abs2[3] = 24;
    tupColl2.set(2, new DataByteArray(abs2));
    bagColl.add(tupColl2);
    tuple.set(3, bagColl);

    int row = 0;
    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
        .getBytes()), tuple);
View Full Code Here

Examples of org.apache.pig.data.DataBag

    m2.put("x", m3);
    m2.put("y", m4);
    tuple.set(9, m2);

    // c:collection(f13:double, f14:float, f15:bytes)
    DataBag bagColl = TypesUtils.createBag();
    Schema schColl = schema.getColumn(10).getSchema();
    Tuple tupColl1 = TypesUtils.createTuple(schColl);
    Tuple tupColl2 = TypesUtils.createTuple(schColl);
    byte[] abs1 = new byte[3];
    byte[] abs2 = new byte[4];
    tupColl1.set(0, 3.1415926);
    tupColl1.set(1, 1.6);
    abs1[0] = 11;
    abs1[1] = 12;
    abs1[2] = 13;
    tupColl1.set(2, new DataByteArray(abs1));
    bagColl.add(tupColl1);
    tupColl2.set(0, 123.456789);
    tupColl2.set(1, 100);
    abs2[0] = 21;
    abs2[1] = 22;
    abs2[2] = 23;
    abs2[3] = 24;
    tupColl2.set(2, new DataByteArray(abs2));
    bagColl.add(tupColl2);
    tuple.set(10, bagColl);

    // set s7 to s23
    for (int i = 7; i <= 23; i++) {
      tuple.set(i + 4, "s" + "i" + ", line1");
    }

    int row = 0;
    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
        .getBytes()), tuple);

    // row 2
    row++;
    TypesUtils.resetTuple(tuple);
    TypesUtils.resetTuple(tupRecord1);
    TypesUtils.resetTuple(tupRecord2);
    TypesUtils.resetTuple(tupRecord3);
    m1.clear();
    m2.clear();
    m3.clear();
    m4.clear();
    tuple.set(0, false);
    tuple.set(1, 2); // int
    tuple.set(2, 1002L); // long
    tuple.set(3, 3.1); // float
    tuple.set(4, "hello world 2"); // string
    tuple.set(5, new DataByteArray("hello byte 2")); // byte

    // r1:record(f1:int, f2:long
    tupRecord1.set(0, 2);
    tupRecord1.set(1, 1002L);
    tuple.set(6, tupRecord1);

    // r2:record(r3:record(f3:float, f4))
    tupRecord2.set(0, tupRecord3);
    tupRecord3.set(0, 2.3);
    tupRecord3.set(1, new DataByteArray("r3 row2  byte array"));
    tuple.set(7, tupRecord2);

    // m1:map(string)
    m1.put("a2", "A2");
    m1.put("b2", "B2");
    m1.put("c2", "C2");
    tuple.set(8, m1);

    // m2:map(map(int))
    m3.put("m321", 321);
    m3.put("m322", 322);
    m3.put("m323", 323);
    m2.put("z", m3);
    tuple.set(9, m2);

    // c:collection(f13:double, f14:float, f15:bytes)
    bagColl.clear();
    TypesUtils.resetTuple(tupColl1);
    TypesUtils.resetTuple(tupColl2);
    tupColl1.set(0, 7654.321);
    tupColl1.set(1, 0.0001);
    abs1[0] = 31;
    abs1[1] = 32;
    abs1[2] = 33;
    tupColl1.set(2, new DataByteArray(abs1));
    bagColl.add(tupColl1);
    tupColl2.set(0, 0.123456789);
    tupColl2.set(1, 0.3333);
    abs2[0] = 41;
    abs2[1] = 42;
    abs2[2] = 43;
    abs2[3] = 44;
    tupColl2.set(2, new DataByteArray(abs2));
    bagColl.add(tupColl2);
    tuple.set(10, bagColl);
    // set s7 to s23
    for (int i = 7; i <= 23; i++) {
      tuple.set(i + 4, "s" + "i" + ", line2");
    }
View Full Code Here

Examples of org.apache.pig.data.DataBag

    BasicTable.Writer writer1 = new BasicTable.Writer(pathTable1, conf);
    int part = 0;
    TableInserter inserter = writer1.getInserter("part" + part, true);

    TypesUtils.resetTuple(tuple);
    DataBag bag1 = TypesUtils.createBag();
    Schema schColl = schema.getColumn(0).getSchema();
    Tuple tupColl1 = TypesUtils.createTuple(schColl);
    Tuple tupColl2 = TypesUtils.createTuple(schColl);

    int row = 0;
    tupColl1.set(0, "1.1");
    tupColl1.set(1, "1.11");
    bag1.add(tupColl1);
    tupColl2.set(0, "1.111");
    tupColl2.set(1, "1.1111");
    bag1.add(tupColl2);
    tuple.set(0, bag1);

    Map<String, String> m1 = new HashMap<String, String>();
    m1.put("k1", "k11");
    m1.put("b", "b1");
    m1.put("c", "c1");
    tuple.set(1, m1);

    Tuple tupRecord1;
    try {
      tupRecord1 = TypesUtils.createTuple(schema.getColumnSchema("c")
          .getSchema());
    } catch (ParseException e) {
      e.printStackTrace();
      throw new IOException(e);
    }

    tupRecord1.set(0, "1");
    tupRecord1.set(1, "hello1");
    tuple.set(2, tupRecord1);
    tuple.set(3, "world1");

    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
        .getBytes()), tuple);

    // second row
    row++;
    TypesUtils.resetTuple(tuple);
    TypesUtils.resetTuple(tupRecord1);
    TypesUtils.resetTuple(tupColl1);
    TypesUtils.resetTuple(tupColl2);
    m1.clear();
    bag1.clear();

    row++;
    tupColl1.set(0, "2.2");
    tupColl1.set(1, "2.22");
    bag1.add(tupColl1);
    tupColl2.set(0, "2.222");
    tupColl2.set(1, "2.2222");
    bag1.add(tupColl2);
    tuple.set(0, bag1);

    m1.put("k2", "k22");
    m1.put("k3", "k32");
    m1.put("k1", "k12");
    m1.put("k4", "k42");
    tuple.set(1, m1);

    tupRecord1.set(0, "2");
    tupRecord1.set(1, "hello2");
    tuple.set(2, tupRecord1);
    tuple.set(3, "world2");

    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
        .getBytes()), tuple);
    inserter.close();
    writer1.finish();
    writer.close();

    /*
     * create 2nd basic table;
     */
    pathTable2 = new Path(pathWorking, "2");
    System.out.println("pathTable2 =" + pathTable2);

    BasicTable.Writer writer2 = new BasicTable.Writer(pathTable2, STR_SCHEMA2,
        STR_STORAGE2, conf);
    Schema schema2 = writer.getSchema();

    Tuple tuple2 = TypesUtils.createTuple(schema2);

    BasicTable.Writer writer22 = new BasicTable.Writer(pathTable2, conf);
    part = 0;
    TableInserter inserter2 = writer22.getInserter("part" + part, true);

    TypesUtils.resetTuple(tuple2);
    TypesUtils.resetTuple(tuple);
    TypesUtils.resetTuple(tupRecord1);
    TypesUtils.resetTuple(tupColl1);
    TypesUtils.resetTuple(tupColl2);
    m1.clear();
    bag1.clear();

    row = 0;
    tupColl1.set(0, "3.3");
    tupColl1.set(1, "3.33");
    bag1.add(tupColl1);
    tupColl2.set(0, "3.333");
    tupColl2.set(1, "3.3333");
    bag1.add(tupColl2);
    tuple2.set(0, bag1);

    m1.put("k1", "k13");
    m1.put("b", "b3");
    m1.put("c", "c3");
    tuple2.set(1, m1);

    tupRecord1.set(0, "3");
    tupRecord1.set(1, "hello3");
    tuple2.set(2, tupRecord1);
    tuple2.set(3, "world13");

    inserter2.insert(new BytesWritable(String
        .format("k%d%d", part + 1, row + 1).getBytes()), tuple2);

    // second row
    row++;
    TypesUtils.resetTuple(tuple2);
    TypesUtils.resetTuple(tupRecord1);
    TypesUtils.resetTuple(tupColl1);
    TypesUtils.resetTuple(tupColl2);
    bag1.clear();
    m1.clear();

    row++;
    tupColl1.set(0, "4.4");
    tupColl1.set(1, "4.44");
    bag1.add(tupColl1);
    tupColl2.set(0, "4.444");
    tupColl2.set(1, "4.4444");
    bag1.add(tupColl2);
    tuple2.set(0, bag1);

    m1.put("k2", "k24");
    m1.put("k3", "k34");
    m1.put("k1", "k14");
View Full Code Here

Examples of org.apache.pig.data.DataBag

      "c1:collection(a:double, b:float, c:bytes),c2:collection(r1:record(f1:int, f2:string), d:string),c3:collection(c3_1:collection(e:int,f:bool))";
    Schema schema = new Schema(STR_SCHEMA);
    Tuple tuple = TypesUtils.createTuple(schema);
    TypesUtils.resetTuple(tuple);
   
    DataBag bag1 = TypesUtils.createBag();
    Schema schColl = new Schema("a:double, b:float, c:bytes");
    Tuple tupColl1 = TypesUtils.createTuple(schColl);
    Tuple tupColl2 = TypesUtils.createTuple(schColl);

    DataBag bag2 = TypesUtils.createBag();
    Schema schColl2 = new Schema("r1:record(f1:int, f2:string), d:string");
    Tuple tupColl2_1 = TypesUtils.createTuple(schColl2);
    Tuple tupColl2_2 = TypesUtils.createTuple(schColl2);
   
    Tuple collRecord1;
    try {
      collRecord1 = TypesUtils.createTuple(new Schema("f1:int, f2:string"));
    } catch (ParseException e) {
      e.printStackTrace();
      throw new IOException(e);
    }
    Tuple collRecord2;
    try {
      collRecord2 = TypesUtils.createTuple(new Schema("f1:int, f2:string"));
    } catch (ParseException e) {
      e.printStackTrace();
      throw new IOException(e);
    }

    // c3:collection(c3_1:collection(e:int,f:bool))
    DataBag bag3 = TypesUtils.createBag();
    DataBag bag3_1 = TypesUtils.createBag();
    DataBag bag3_2 = TypesUtils.createBag();

    Tuple tupColl3_1 = null;
    try {
      tupColl3_1 = TypesUtils.createTuple(new Schema("e:int,f:bool"));
    } catch (ParseException e) {
      e.printStackTrace();
      throw new IOException(e);
    }
    Tuple tupColl3_2;
    try {
      tupColl3_2 = TypesUtils.createTuple(new Schema("e:int,f:bool"));
    } catch (ParseException e) {
      e.printStackTrace();
      throw new IOException(e);
    }

    Tuple tupColl3_3 = null;
    try {
      tupColl3_3 = TypesUtils.createTuple(new Schema("e:int,f:bool"));
    } catch (ParseException e) {
      e.printStackTrace();
      throw new IOException(e);
    }
    Tuple tupColl3_4;
    try {
      tupColl3_4 = TypesUtils.createTuple(new Schema("e:int,f:bool"));
    } catch (ParseException e) {
      e.printStackTrace();
      throw new IOException(e);
    }

    byte[] abs1 = new byte[3];
    byte[] abs2 = new byte[4];
    tupColl1.set(0, 3.1415926);
    tupColl1.set(1, 1.6);
    abs1[0] = 'a';
    abs1[1] = 'a';
    abs1[2] = 'a';
    tupColl1.set(2, new DataByteArray(abs1));
    bag1.add(tupColl1);
    tupColl2.set(0, 123.456789);
    tupColl2.set(1, 100);
    abs2[0] = 'b';
    abs2[1] = 'c';
    abs2[2] = 'd';
    abs2[3] = 'e';
    tupColl2.set(2, new DataByteArray(abs2));
    bag1.add(tupColl2);
    tuple.set(0, bag1);

    collRecord1.set(0, 1);
    collRecord1.set(1, "record1_string1");
    tupColl2_1.set(0, collRecord1);
    tupColl2_1.set(1, "hello1");
    bag2.add(tupColl2_1);

    collRecord2.set(0, 2);
    collRecord2.set(1, "record2_string1");
    tupColl2_2.set(0, collRecord2);
    tupColl2_2.set(1, "hello2");
    bag2.add(tupColl2_2);
    tuple.set(1, bag2);

    TypesUtils.resetTuple(tupColl3_1);
    TypesUtils.resetTuple(tupColl3_2);
    tupColl3_1.set(0, 1);
    tupColl3_1.set(1, true);
    tupColl3_2.set(0, 2);
    tupColl3_2.set(1, false);
    bag3_1.add(tupColl3_1);
    bag3_1.add(tupColl3_2);
    bag3.addAll(bag3_1);

    tupColl3_3.set(0, 3);
    tupColl3_3.set(1, true);
    tupColl3_4.set(0, 4);
    tupColl3_4.set(1, false);
    bag3_2.add(tupColl3_3);
    bag3_2.add(tupColl3_4);
    bag3.addAll(bag3_2);
    tuple.set(2, bag3);

    Assert.assertTrue(tuple.toString().equals("3.1415926,1.6,#aaa\n" +
        "123.456789,100,#bcde\n" +
View Full Code Here

Examples of org.apache.pig.data.DataBag

    for (int b = 0; b < numsBatch; b++) {
      for (int i = 0; i < numsInserters; i++) {
        TypesUtils.resetTuple(tuple);

        DataBag bagColl = TypesUtils.createBag();
        Schema schColl = schema.getColumn(0).getSchema();
        Tuple tupColl1 = TypesUtils.createTuple(schColl);
        Tuple tupColl2 = TypesUtils.createTuple(schColl);
        byte[] abs1 = new byte[3];
        byte[] abs2 = new byte[4];
        tupColl1.set(0, 3.1415926);
        tupColl1.set(1, 1.6);
        abs1[0] = 11;
        abs1[1] = 12;
        abs1[2] = 13;
        tupColl1.set(2, new DataByteArray(abs1));
        bagColl.add(tupColl1);
        tupColl2.set(0, 123.456789);
        tupColl2.set(1, 100);
        abs2[0] = 21;
        abs2[1] = 22;
        abs2[2] = 23;
        abs2[3] = 24;
        tupColl2.set(2, new DataByteArray(abs2));
        bagColl.add(tupColl2);
        tuple.set(0, bagColl);

        inserters[i].insert(new BytesWritable(("key" + i).getBytes()), tuple);
      }
    }
View Full Code Here

Examples of org.apache.pig.data.DataBag

    for (int b = 0; b < numsBatch; b++) {
      for (int i = 0; i < numsInserters; i++) {
        TypesUtils.resetTuple(tuple);

        DataBag bagColl = TypesUtils.createBag();
        Schema schColl = schema.getColumn(0).getSchema();
        Tuple tupColl1 = TypesUtils.createTuple(schColl);
        Tuple tupColl2 = TypesUtils.createTuple(schColl);
        byte[] abs1 = new byte[3];
        byte[] abs2 = new byte[4];
        tupColl1.set(0, 3.1415926);
        tupColl1.set(1, 1.6);
        abs1[0] = 11;
        abs1[1] = 12;
        abs1[2] = 13;
        tupColl1.set(2, new DataByteArray(abs1));
        bagColl.add(tupColl1);
        tupColl2.set(0, 123.456789);
        tupColl2.set(1, 100);
        abs2[0] = 21;
        abs2[1] = 22;
        abs2[2] = 23;
        abs2[3] = 24;
        tupColl2.set(2, new DataByteArray(abs2));
        bagColl.add(tupColl2);
        tuple.set(0, bagColl);

        inserters[i].insert(new BytesWritable(("key" + i).getBytes()), tuple);
      }
    }
View Full Code Here

Examples of org.apache.pig.data.DataBag

        physOp.setLineageTracer(lineage);

        // replace the original inputs by POReads
        for (int i = 0; i < inputs.size(); i++) {
            DataBag bag = derivedData.get(cg.getInputs().get(i));
            PORead por = new PORead(new OperatorKey("", r.nextLong()), bag);
            phy.add(por);
            try {
                phy.connect(por, physOp.getInputs().get(i));
            } catch (PlanException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
                log.error("Error connecting " + por.name() + " to "
                        + physOp.name());
            }
        }

        DataBag output = BagFactory.getInstance().newDefaultBag();
        Tuple t = null;
        try {
            for (Result res = physOp.getNext(t); res.returnStatus != POStatus.STATUS_EOP; res = physOp
                    .getNext(t)) {
                output.add((Tuple) res.result);
            }
        } catch (ExecException e) {
            log.error("Error evaluating operator : " + physOp.name());
        }
        derivedData.put(cg, output);
View Full Code Here

Examples of org.apache.pig.data.DataBag

        PhysicalPlan phy = new PhysicalPlan();
        phy.add(physOp);

        // replace the original inputs by POReads
        for (LogicalOperator l : op.getPlan().getPredecessors(op)) {
            DataBag bag = derivedData.get(l);
            PORead por = new PORead(new OperatorKey("", r.nextLong()), bag);
            phy.add(por);
            try {
                phy.connect(por, physOp);
            } catch (PlanException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
                log.error("Error connecting " + por.name() + " to "
                        + physOp.name());
            }
        }

        DataBag output = BagFactory.getInstance().newDefaultBag();
        Tuple t = null;
        try {
            for (Result res = physOp.getNext(t); res.returnStatus != POStatus.STATUS_EOP; res = physOp
                    .getNext(t)) {
                output.add((Tuple) res.result);
            }
        } catch (ExecException e) {
            log.error("Error evaluating operator : " + physOp.name());
        }
        derivedData.put(op, output);
View Full Code Here

Examples of org.apache.pig.data.DataBag

        physOp.setLineageTracer(null);

        // replace the original inputs by POReads
        for (int i = 0; i < inputs.size(); i++) {
            DataBag bag = derivedData.get(op.getInputs().get(i));
            PORead por = new PORead(new OperatorKey("", r.nextLong()), bag);
            phy.add(por);
            try {
                phy.connect(por, physOp.getInputs().get(i));
            } catch (PlanException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
                log.error("Error connecting " + por.name() + " to "
                        + physOp.name());
            }
        }

        // replace the original inputs by POReads
        // for(LogicalOperator l : op.getPlan().getPredecessors(op)) {
        // DataBag bag = derivedData.get(l);
        // PORead por = new PORead(new OperatorKey("", r.nextLong()), bag);
        // phy.add(por);
        // try {
        // phy.connect(por, physOp);
        // } catch (PlanException e) {
        // // TODO Auto-generated catch block
        // e.printStackTrace();
        // log.error("Error connecting " + por.name() + " to " + physOp.name());
        // }
        // }

        DataBag output = BagFactory.getInstance().newDefaultBag();
        Tuple t = null;
        try {
            for (Result res = physOp.getNext(t); res.returnStatus != POStatus.STATUS_EOP; res = physOp
                    .getNext(t)) {
                output.add((Tuple) res.result);
            }
        } catch (ExecException e) {
            log.error("Error evaluating operator : " + physOp.name());
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.