Package org.apache.pig.data

Examples of org.apache.pig.data.DefaultDataBag$DefaultDataBagIterator


    @Test
    public void testSkewedJoinUDF() throws IOException {
        PartitionSkewedKeys udf = new PartitionSkewedKeys(new String[]{"0.1", "2", "1.txt"});
        Tuple t = TupleFactory.getInstance().newTuple();
        t.append(3);    // use 3 reducers
        DataBag db = new DefaultDataBag();
        Tuple sample;
        for (int i=0;i<=3;i++) {
            sample = TupleFactory.getInstance().newTuple();
            if (i!=3)
                sample.append("1");
            else
                sample.append("2");
            sample.append((long)200);
            if (i!=3)
                sample.append((long)0);
            else
                sample.append((long)30);
            db.add(sample);
        }
        t.append(db);
        Map<String, Object> output = udf.exec(t);
        DataBag parList = (DataBag)output.get(PartitionSkewedKeys.PARTITION_LIST);
        for (Tuple par : parList) {
View Full Code Here


    if (list == null){
      return null;
    }

    HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0);
    DataBag db = new DefaultDataBag();
    for (Object o : list){
      Tuple tuple;
      if (elementSubFieldSchema.getType() == Type.STRUCT){
        tuple = transformToTuple((List<Object>)o, elementSubFieldSchema);
      } else {
        // bags always contain tuples
        tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema));
      }
      db.add(tuple);
    }
    return db;
  }
View Full Code Here

    ((Tuple) groovyObject).set(0, "jambon");
    ((Tuple) groovyObject).set(1, "blanc");
    pigObject = GroovyUtils.groovyToPig(groovyObject);
    Assert.assertSame(groovyObject, pigObject);

    groovyObject = new DefaultDataBag();
    pigObject = GroovyUtils.groovyToPig(groovyObject);
    Assert.assertSame(groovyObject, pigObject);

    groovyObject = null;
    pigObject = GroovyUtils.groovyToPig(groovyObject);
View Full Code Here

    }

    public static class JiraPig1030 extends EvalFunc<DataBag> {
       
        public DataBag exec(Tuple input) throws IOException {
            return new DefaultDataBag();
        }
View Full Code Here

    if (list == null) {
      return null;
    }

    HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0);
    DataBag db = new DefaultDataBag();
    for (Object o : list) {
      Tuple tuple;
      if (elementSubFieldSchema.getType() == Type.STRUCT) {
        tuple = transformToTuple((List<?>) o, elementSubFieldSchema);
      } else {
        // bags always contain tuples
        tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema));
      }
      db.add(tuple);
    }
    return db;
  }
View Full Code Here

    if (list == null) {
      return null;
    }

    HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0);
    DataBag db = new DefaultDataBag();
    for (Object o : list) {
      Tuple tuple;
      if (elementSubFieldSchema.getType() == Type.STRUCT) {
        tuple = transformToTuple((List<Object>) o, elementSubFieldSchema);
      } else {
        // bags always contain tuples
        tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema));
      }
      db.add(tuple);
    }
    return db;
  }
View Full Code Here

        Tuple tuple = TupleFactory.getInstance().newTuple(2);
            ArrayList<Tuple> columns = new ArrayList<Tuple>();
            tuple.set(0, new DataByteArray(key));
            for (Map.Entry<byte[], IColumn> entry : cf.entrySet())
                columns.add(columnToTuple(entry.getKey(), entry.getValue()));
            tuple.set(1, new DefaultDataBag(columns));
            return tuple;
        }
        catch (InterruptedException e)
        {
            throw new IOException(e.getMessage());
View Full Code Here

        // super
        ArrayList<Tuple> subcols = new ArrayList<Tuple>();
        for (IColumn subcol : ((SuperColumn)col).getSubColumns())
            subcols.add(columnToTuple(subcol.name(), subcol));
        pair.set(1, new DefaultDataBag(subcols));
        return pair;
    }
View Full Code Here

import java.io.IOException;

public class Explode extends EvalFunc<DataBag> {
    @Override
    public DataBag exec(Tuple input) throws IOException {
        DataBag data = new DefaultDataBag();
        for (Object t_obj : input.getAll()) {
            Tuple tuple = (Tuple)t_obj;

            for (Object obj : tuple.getAll()) {
                Tuple t = TupleFactory.getInstance().newTuple();
                t.append(obj);
                data.add(t);
            }
        }

        return data;
    }
View Full Code Here

    @Test
    public void testSkewedJoinUDF() throws IOException {
        PartitionSkewedKeys udf = new PartitionSkewedKeys(new String[]{"0.1", "2", "1.txt"});
        Tuple t = TupleFactory.getInstance().newTuple();
        t.append(3);    // use 3 reducers
        DataBag db = new DefaultDataBag();
        Tuple sample;
        for (int i=0;i<=3;i++) {
            sample = TupleFactory.getInstance().newTuple();
            if (i!=3)
                sample.append("1");
            else
                sample.append("2");
            sample.append((long)200);
            if (i!=3)
                sample.append((long)0);
            else
                sample.append((long)30);
            db.add(sample);
        }
        t.append(db);
        Map<String, Object> output = udf.exec(t);
        DataBag parList = (DataBag)output.get(PartitionSkewedKeys.PARTITION_LIST);
        for (Tuple par : parList) {
View Full Code Here

TOP

Related Classes of org.apache.pig.data.DefaultDataBag$DefaultDataBagIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.