Package org.apache.pig.data

Examples of org.apache.pig.data.ExampleTuple


            // select first tuple from input data
            exampleTuple = inputData.iterator().next();
        } else {
            // input data is empty, so make up a tuple
            Tuple exampleT = new Tuple(schema.numFields());
            exampleTuple = new ExampleTuple();
            exampleTuple.copyFrom(exampleT);
            for (int i = 0; i < exampleTuple.arity(); i++) exampleTuple.setField(i, "0");
        }
       
        // run through output constraints; for each one synthesize a tuple and add it to the base data
        // (while synthesizing individual fields, try to match fields that exist in the real data)
        for (Iterator<Tuple> it = outputConstraints.iterator(); it.hasNext(); ) {
            Tuple outputConstraint = it.next();
           
            // sanity check:
            if (outputConstraint.arity() != schema.numFields()) throw new RuntimeException("Internal error: incorrect number of fields in constraint tuple.");
           
            Tuple inputT = new Tuple(outputConstraint.arity());
            ExampleTuple inputTuple = new ExampleTuple();
            inputTuple.copyFrom(inputT);
            for (int i = 0; i < inputTuple.arity(); i++) {
                Datum d = outputConstraint.getField(i);
                if (d == null) d = exampleTuple.getField(i);
                inputTuple.setField(i, d);
            }
            if(inputTuple.equals(exampleTuple)) {
              //System.out.println("Real tuple being copied!!");
             
            } else {
              inputTuple.makeSynthetic();
            }
            newInputData.add(inputTuple);
        }
    }
View Full Code Here


            if (outputConstraints.cardinality() > 0) {   // there's one or more output constraints; propagate them backwards through the projection
                for (Iterator<Tuple> it = outputConstraints.iterator(); it.hasNext(); ) {
                    Tuple outputConstraint = it.next();
                    Tuple inputConst = BackPropConstraint(outputConstraint, cols, inputSchema);
                    ExampleTuple inputConstraint = new ExampleTuple();
                    inputConstraint.copyFrom(inputConst);
                    if (inputConstraint != null) inputConstraints.add(inputConstraint);
                }
            }
           
            // note: if there are no output constraints, we don't have to do anything because the input operator
            // will ensure that we get at least one input tuple, which in turns ensures that we output at least one tuple
           
        } else if (spec instanceof FilterSpec) {
            FilterSpec fSpec = (FilterSpec) spec;
           
            Cond filterCond = fSpec.cond;
           
            // if necessary, insert one or more positive examples (i.e. tuples that pass the filter)
            if (outputConstraints.cardinality() > 0) {     // there's one or more output constraints; generate corresponding input constraints
                for (Iterator<Tuple> it = outputConstraints.iterator(); it.hasNext(); ) {
                    Tuple outputConstraint = it.next();
                    Tuple inputConst = GenerateMatchingTuple(outputConstraint, filterCond);
                    ExampleTuple inputConstraint = new ExampleTuple();
                    inputConstraint.copyFrom(inputConst);
                    if (inputConstraint != null) inputConstraints.add(inputConstraint);
                }
            } else if (outputData.cardinality() == 0) {    // no output constraints, but output is empty; generate one input that will pass the filter
                Tuple inputConst = GenerateMatchingTuple(inputSchema, filterCond);
                ExampleTuple inputConstraint = new ExampleTuple();
                inputConstraint.copyFrom(inputConst);
                if (inputConstraint != null) inputConstraints.add(inputConstraint);
            }
           
            // if necessary, insert a negative example (i.e. a tuple that does not pass the filter)
            if (outputData.cardinality() == inputData.cardinality()) {     // all tuples pass the filter; generate one input that will not pass the filter
                ExampleTuple inputConstraint = new ExampleTuple();
                Tuple inputConst = GenerateMatchingTuple(inputSchema, new NotCond(filterCond));
                //inputConstraint.copyFrom(inputConst);
                if (inputConst != null) {
                  inputConstraint.copyFrom(inputConst);
                  inputConstraints.add(inputConstraint);
                }
            }

        } else {
View Full Code Here

        for (int i = 0; i < numInputs; i++) inputConstraints.add(BagFactory.getInstance().newDefaultBag());
       
        int currentInput = 0;
        for (Iterator<Tuple> it = outputConstraints.iterator(); it.hasNext(); ) {
            Tuple outputConst = it.next();
            ExampleTuple outputConstraint = new ExampleTuple();
            outputConstraint.copyFrom(outputConst);
            inputConstraints.get(currentInput).add(outputConstraint);
            currentInput = (currentInput + 1) % numInputs;
        }
       
        // note: if there are no output constraints, don't have to do anything because inputs will ensure that
View Full Code Here

    ////////////////////////////
    // HELPER METHODS:
   
    static Tuple GenerateGroupByInput(Datum groupLabel, List<Integer> groupCols, int numInputFields) throws IOException {
        Tuple inputConst = new Tuple(numInputFields);
      Tuple inputConstraint = new ExampleTuple();
      inputConstraint.copyFrom(inputConst);
        if (groupLabel != null) {
            if (groupCols.size() == 1) {   // group by one column, so group label is a data atom
                inputConstraint.setField(groupCols.get(0), groupLabel);
            } else {                       // group by multiple columns, so group label is a tuple
                if (!(groupLabel instanceof Tuple)) throw new RuntimeException("Unexpected group label type.");
                Tuple groupLabelTuple = (Tuple) groupLabel;
                       
                for (int outCol = 0; outCol < groupCols.size(); outCol++) {
                    int inCol = groupCols.get(outCol);
                    Datum outVal = groupLabelTuple.getField(outCol);
                    inputConstraint.setField(inCol, outVal);
                }
            }
        }
        return inputConstraint;
    }
View Full Code Here

        return inputConstraint;
    }

    static Tuple BackPropConstraint(Tuple outputConstraint, List<Integer> cols, TupleSchema inputSchema) throws IOException {
        Tuple inputConst = new Tuple(inputSchema.numFields());
        Tuple inputConstraint = new ExampleTuple();
        inputConstraint.copyFrom(inputConst);

        for (int outCol = 0; outCol < outputConstraint.arity(); outCol++) {
            int inCol = cols.get(outCol);
            Datum outVal = outputConstraint.getField(outCol);
            Datum inVal = inputConstraint.getField(inCol);
           
            if (inVal == null) {
                inputConstraint.setField(inCol, outVal);
            } else {
                if (outVal != null) {
                    // unable to back-propagate, due to conflicting column constraints, so give up
                    return null;
                }
View Full Code Here

                    evalPipeline.add(nextTuple);
                    lastAdded = nextTuple;   // for lineage bookkeeping
                }
            }else{
                Tuple output = (Tuple)buf.removeFirst();
                ExampleTuple tOut = new ExampleTuple();
                tOut.copyFrom(output);
                if (lineageTracer != null) {
              List<Tuple> children = lineageTracer.getFlattenChildren(output);
              if(children != null) {
                //the output tuple we get is not a example tuple. so we take it out and put in the converted exampletuple
                lineageTracer.removeFlattenMap(output);
                lineageTracer.addFlattenMap(tOut, children);
              }
                lineageTracer.insert(tOut);
                if (lastAdded != null) {
                  if(((ExampleTuple)lastAdded).isSynthetic()) tOut.makeSynthetic();
                  lineageTracer.union(lastAdded, tOut);   // update lineage (assumes one-to-many relationship between tuples added to pipeline and output!!)
                  //lineageTracer.union(tOut, lastAdded);
                }
            }
                /*if (lineageTracer != null) {
View Full Code Here

            // find all tuples in each input pertaining to the group of interest, and combine the
            // data into a single tuple
           
            Tuple output;
            ExampleTuple tOut = new ExampleTuple();
            if (outputType == LogicalOperator.AMENDABLE) output = new AmendableTuple(1 + inputs.length, smallestGroup);
            else output = new Tuple(1 + inputs.length);

            // set first field to the group tuple
            output.setField(0, smallestGroup);
            tOut.copyFrom(output);
            if (lineageTracer != null) lineageTracer.insert(tOut);

            boolean done = true;
            for (int i = 0; i < inputs.length; i++) {
                DataBag b = BagFactory.getInstance().newDefaultBag();

                while (sortedInputs[i].size() > 0) {
                    Datum g = sortedInputs[i].get(0)[0];

                    Tuple t = (Tuple) sortedInputs[i].get(0)[1];

                    if (g.compareTo(smallestGroup) < 0) {
                        sortedInputs[i].remove(0); // discard this tuple
                    } else if (g.equals(smallestGroup)) {
                        b.add(t);
                        //if (lineageTracer != null) lineageTracer.union(t, output);   // update lineage
                        if (lineageTracer != null) {
                          if(((ExampleTuple)t).isSynthetic()) tOut.makeSynthetic();
                          lineageTracer.union(t, tOut);   // update lineage
                        }
                        sortedInputs[i].remove(0);
                    } else {
                        break;
View Full Code Here

   
    @Override
    public Tuple getNext() throws IOException {
      Tuple t = lf.getNext();
      if(lineageTracer != null) {
        ExampleTuple tOut = new ExampleTuple();
        if(t != null) {
          tOut.copyFrom(t);
          return tOut;
        }
        return null;
      }
      return t;
View Full Code Here

TOP

Related Classes of org.apache.pig.data.ExampleTuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.