Examples of DataBag


Examples of org.apache.pig.data.DataBag

    }
   
    @Test
    public void testStatsFunc() throws Exception {
        COV cov = new COV("a","b");
        DataBag dBag = DefaultBagFactory.getInstance().newDefaultBag();
        Tuple tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 1.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 4.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 8.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 4.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 7.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 8.0);
        dBag.add(tup1);
        DataBag dBag1 = DefaultBagFactory.getInstance().newDefaultBag();
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 2.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 2.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 3.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 3.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 2.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 4.0);
        dBag1.add(tup1);
        Tuple input = TupleFactory.getInstance().newTuple(2);
        input.set(0, dBag);
        input.set(1, dBag1);
        DataBag output = cov.exec(input);
        Iterator<Tuple> it = output.iterator();
        Tuple ans = (Tuple)it.next();
        assertEquals((String)ans.get(0),"a");
        assertEquals((String)ans.get(1),"b");
        assertEquals(1.11111, (Double)ans.get(2),0.0005);
       
        COR cor = new COR("a","b");
        dBag = DefaultBagFactory.getInstance().newDefaultBag();
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 1.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 4.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 8.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 4.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 7.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 8.0);
        dBag.add(tup1);
        dBag1 = DefaultBagFactory.getInstance().newDefaultBag();
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 2.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 2.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 3.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 3.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 2.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 4.0);
        dBag1.add(tup1);
        input = TupleFactory.getInstance().newTuple(2);
        input.set(0, dBag);
        input.set(1, dBag1);
        output = cor.exec(input);
        it = output.iterator();
        ans = (Tuple) it.next();
        assertEquals((String)ans.get(0),"a");
        assertEquals((String)ans.get(1),"b");
        assertEquals(0.582222509739582, (Double)ans.get(2) ,0.0005);
    }
View Full Code Here

Examples of org.apache.pig.data.DataBag

        }     
        //test null value in input
        input.append(null);
       
        Set<Integer> s = new HashSet<Integer>();
        DataBag db = tb.exec(input);
        for (Tuple t : db) {
            s.add((Integer) t.get(0));
        }

        // finally check the bag had everything we put in the tuple.
        assertEquals(101, s.size());
        for (int i = 0; i < 100; ++i) {
            assertTrue(s.contains(i));
        }
        assertTrue("null in tobag result", s.contains(null));
       
        TOTUPLE tt = new TOTUPLE();

        input = TupleFactory.getInstance().newTuple();
        for (int i = 0; i < 100; ++i) {
            input.append(i);
        }

        Tuple output = tt.exec(input);
        assertTrue(!(input == output));
        assertEquals(input, output);
       
        TOP top = new TOP();
        TupleFactory tupleFactory = TupleFactory.getInstance();
        BagFactory bagFactory = DefaultBagFactory.getInstance();
        Tuple inputTuple = tupleFactory.newTuple(3);
        DataBag dBag = bagFactory.newDefaultBag();
       
        // set N = 10 i.e retain top 10 tuples
        inputTuple.set(0, 10);
        // compare tuples by field number 1
        inputTuple.set(1, 1);
        // set the data bag containing the tuples
        inputTuple.set(2, dBag);

        // generate tuples of the form (group-1, 1), (group-2, 2) ...
        for (long i = 0; i < 100; i++) {
            Tuple nestedTuple = tupleFactory.newTuple(2);
            nestedTuple.set(0, "group-" + i);
            nestedTuple.set(1, i);
            dBag.add(nestedTuple);
        }
       
        DataBag outBag = top.exec(inputTuple);
        assertEquals(outBag.size(), 10L);
        checkItemsGT(outBag, 1, 89);
       
        // two initial results
        Tuple init1 = (new TOP.Initial()).exec(inputTuple);
        Tuple init2 = (new TOP.Initial()).exec(inputTuple);
        // two intermediate results

        DataBag intermedBag = bagFactory.newDefaultBag();
        intermedBag.add(init1);
        intermedBag.add(init2);
        Tuple intermedInput = tupleFactory.newTuple(intermedBag);
        Tuple intermedOutput1 = (new TOP.Intermed()).exec(intermedInput);
        Tuple intermedOutput2 = (new TOP.Intermed()).exec(intermedInput);
        checkItemsGT((DataBag)intermedOutput1.get(2), 1, 94);

        // final result
        DataBag finalInputBag = bagFactory.newDefaultBag();
        finalInputBag.add(intermedOutput1);
        finalInputBag.add(intermedOutput2);
        Tuple finalInput = tupleFactory.newTuple(finalInputBag);
        outBag = (new TOP.Final()).exec(finalInput);
        assertEquals(outBag.size(), 10L);
        checkItemsGT(outBag, 1, 96);
    }
View Full Code Here

Examples of org.apache.pig.data.DataBag

   
    @Test
    public void testDistinct() throws Exception {
   
        Integer[] inp = new Integer[] { 1, 2 , 3, 1 ,4, 5, 3};
        DataBag inputBag = Util.createBagOfOneColumn(inp);
        EvalFunc<Tuple> initial = new Distinct.Initial();
        DataBag intermedInputBg1 = bagFactory.newDefaultBag();
        DataBag intermedInputBg2 = bagFactory.newDefaultBag();
        int i = 0;
        for (Tuple t : inputBag) {
            Tuple initialOutput = initial.exec(tupleFactory.newTuple(t));
            if(i < inp.length/2 ) {
                intermedInputBg1.add(initialOutput);
            } else {
                intermedInputBg2.add(initialOutput);
            }
            i++;
        }
       
        EvalFunc<Tuple> intermed = new Distinct.Intermediate();
       
        DataBag finalInputBg = bagFactory.newDefaultBag();
        finalInputBg.add(intermed.exec(tupleFactory.newTuple(intermedInputBg1)));
        finalInputBg.add(intermed.exec(tupleFactory.newTuple(intermedInputBg2)));
        EvalFunc<DataBag> fin = new Distinct.Final();
        DataBag result = fin.exec(tupleFactory.newTuple(finalInputBg));
       
        Integer[] exp = new Integer[] { 1, 2, 3, 4, 5};
        DataBag expectedBag = Util.createBagOfOneColumn(exp);
        assertEquals(expectedBag, result);
       
    }   
View Full Code Here

Examples of org.apache.pig.data.DataBag

        for(int i = 0; i < inputSize; i+=2) {
            inp[i] = i/2;
            inp[i+1] = i/2;
        }

        DataBag inputBag = Util.createBagOfOneColumn(inp);
        EvalFunc<DataBag> distinct = new Distinct();
        DataBag result = distinct.exec(tupleFactory.newTuple(inputBag));
       
        Integer[] exp = new Integer[inputSize/2];
        for(int j = 0; j < inputSize/2; ++j) {
            exp[j] = j;
        }

        DataBag expectedBag = Util.createBagOfOneColumn(exp);
        assertEquals(expectedBag, result);
       
    }
View Full Code Here

Examples of org.apache.pig.data.DataBag

        // Bag size
        Tuple t1 = Util.createTuple(new String[]{"a", "b", "c"});
        Tuple t2 = Util.createTuple(new String[]{"d", "e", "f"});
        Tuple t3 = Util.createTuple(new String[]{"g", "h", "i"});
        Tuple t4 = Util.createTuple(new String[]{"j", "k", "l"});
        DataBag b = Util.createBag(new Tuple[]{t1, t2, t3, t4});
        expected = new Long(4);
        t.set(0, b);
        size = new BagSize();
        msg = "[Testing BagSize on input type: Bag]";
        assertTrue(msg, expected.equals(size.exec(t)));
View Full Code Here

Examples of org.apache.pig.data.DataBag

        Tuple t2 = tf.newTuple(1);
        t2.set(0, null);
        Tuple t3 = tf.newTuple(0);
       
        TOKENIZE f = new TOKENIZE();
        DataBag b = f.exec(t1);
        assertTrue(b.size()==3);
        Iterator<Tuple> i = b.iterator();
        Tuple rt = i.next();
        assertTrue(rt.get(0).equals("123"));
        rt = i.next();
        assertTrue(rt.get(0).equals("456"));
        rt = i.next();
View Full Code Here

Examples of org.apache.pig.data.DataBag

    public void testDIFF() throws Exception {
        // Test it in the case with two bags.
        BagFactory bf = BagFactory.getInstance();
        TupleFactory tf = TupleFactory.getInstance();

        DataBag b1 = bf.newDefaultBag();
        DataBag b2 = bf.newDefaultBag();
        for (int i = 0; i < 10; i++) b1.add(tf.newTuple(new Integer(i)));
        for (int i = 0; i < 10; i += 2) b2.add(tf.newTuple(new Integer(i)));
        Tuple t = tf.newTuple(2);
        t.set(0, b1);
        t.set(1, b2);
        DIFF d = new DIFF();
        DataBag result = d.exec(t);

        assertEquals(5, result.size());
        Iterator<Tuple> i = result.iterator();
        int[] values = new int[5];
        for (int j = 0; j < 5; j++) values[j] = (Integer)i.next().get(0);
        Arrays.sort(values);
        for (int j = 1; j < 10; j += 2) assertEquals(j, values[j/2]);

        // Test it in the case of two objects that are equals
        t = tf.newTuple(2);
        t.set(0, new Integer(1));
        t.set(1, new Integer(1));
        result = d.exec(t);
        assertEquals(0, result.size());

        // Test it in the case of two objects that are not equal
        t = tf.newTuple(2);
        t.set(0, new Integer(1));
        t.set(1, new Integer(2));
        result = d.exec(t);
        assertEquals(2, result.size());
    }
View Full Code Here

Examples of org.apache.pig.data.DataBag

  TupleFactory mTupleFactory = TupleFactory.getInstance();
    BagFactory mBagFactory = BagFactory.getInstance();
   
    public DataBag exec(Tuple input) throws IOException{
      try {
          DataBag output = mBagFactory.newDefaultBag();
          Object o = input.get(0);
          if (!(o instanceof String)) {
              throw new IOException("Expected input to be chararray, but  got " + o.getClass().getName());
          }
          Tokenizer source = new WhitespaceTokenizer(Version.LUCENE_43, new StringReader((String)o));
          TokenStream tokenstream = new LowerCaseEntityPreservingFilter(source);
          tokenstream.reset();
          while (tokenstream.incrementToken()){
            String token = tokenstream.getAttribute(CharTermAttribute.class).toString();
            output.add(mTupleFactory.newTuple(token));
          }
          return output;
      } catch (Exception e) {
          // error handling goes here
        throw new IOException("caught exception",e);
View Full Code Here

Examples of org.apache.pig.data.DataBag

import org.apache.pig.data.Tuple;

// Sample usage: cc.twittertools.piggybank.GetLatitude($0#'geo'#'coordinates')
public class GetLatitude extends EvalFunc<String> {
  public String exec(Tuple input) throws IOException {
    DataBag bag = (DataBag) input.get(0);
    Iterator<Tuple> it = bag.iterator();
    if (!it.hasNext()) {
      return null;
    }
    Tuple tup = it.next();
View Full Code Here

Examples of org.apache.pig.data.DataBag

import org.apache.pig.data.Tuple;

// Sample usage: cc.twittertools.piggybank.GetLongitude($0#'geo'#'coordinates');
public class GetLongitude extends EvalFunc<String> {
  public String exec(Tuple input) throws IOException {
    DataBag bag = (DataBag) input.get(0);
    Iterator<Tuple> it = bag.iterator();
    if (!it.hasNext()) {
      return null;
    }
    it.next();
    if (!it.hasNext()) {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.