Package com.ebay.erl.mobius.core.model

Examples of com.ebay.erl.mobius.core.model.Tuple


  }
 
 
  private DataJoinKey getKey(Tuple tuple, Sorter[] sorter, Byte datasetID, AbstractMobiusMapper mapper, Configuration conf)
  {
    Tuple columnsUsedToSort = new Tuple();
    for(Sorter aSorter:sorter )
    {
      String name    = aSorter.getColumn();
      Object value  = tuple.get(name);
      columnsUsedToSort.insert(name, value);
    }
   
    DataJoinKey nkey = new DataJoinKey(datasetID, columnsUsedToSort, mapper.extractSortValueKeyword(tuple), mapper.getSortValueComparator());
    nkey.setConf(conf);
    return nkey;
View Full Code Here


   */
  @Override
  public Tuple parse(NullWritable inkey, Tuple invalue)
      throws IllegalArgumentException, IOException
  {
    Tuple result = (Tuple)invalue;
    result.setSchema(this.getSchema());
    return result;
  }
View Full Code Here

        }
      }
      reporterSet = true;
    }
   
    Tuple record = null;
    try
    {
      record = this.parse(key, value);
    }
    catch(IllegalFormatException e)
    {
      this._COUNTER_INVALIDATE_FORMAT_RECORD++;
      this.updateCounter(this.dataset_display_id, "INVALIDATE_RECORDS", this._COUNTER_INVALIDATE_FORMAT_RECORD);
      return;
    }
   
    this._COUNTER_INPUT_RECORD++;
    this.updateCounter(this.dataset_display_id, "INPUT_RECORDS", this._COUNTER_INPUT_RECORD);
   
   
    Iterable<Tuple> rows_to_be_output = new ArrayList<Tuple>();
    ((List<Tuple>)rows_to_be_output).add(record);
   
    // apply computed column if any
    if( this.computedColumns!=null )
    {
      for( ComputedColumns aComputedColumn:this.computedColumns )
      {
        aComputedColumn.reset();
        aComputedColumn.consume(Tuple.immutable(record));
       
        if ( aComputedColumn.getResult()!=null && aComputedColumn.getResult().size()>0 )
        {
          BigTupleList computedResult = aComputedColumn.getResult();
          if( computedResult.size()<5000 )
          {
            // use in memory cross product
            Iterable<Tuple>[] allValues = new Iterable[2];
            allValues[0] = rows_to_be_output;
            allValues[1] = aComputedColumn.getResult();
           
            rows_to_be_output = Util.inMemoryCrossProduct(allValues);
          }
          else
          {
            // computed result is too big, don't use in memory cross
            // product.
            Iterable<Tuple>[] allValues = new Iterable[2];
            allValues[0] = rows_to_be_output;
            allValues[1] = aComputedColumn.getResult();
           
            rows_to_be_output = Util.crossProduct(this.conf, reporter,  allValues);
          }
        }
      }     
    }
   
   
    // apply the criteria if any and prepare output
    for( Tuple aRow:rows_to_be_output)
    {
      Tuple out_key  = this.getKeyTuple(this.key_columns, aRow, null);
      Tuple out_value = null;
      if( !this._IS_MAP_ONLY_JOB )
      {
        // tuple will go to reducer phase, we use the sorted column
        // so the reducer can set the schema back correctly.
        out_value = this.getTuple(this.value_columns, aRow, Tuple.NULL);
View Full Code Here

  {
    if( columns==null || columns.length==0 )
      return defaultValue;
    else
    {
      Tuple t = new KeyTuple();
      for( String aColumn:columns )
      {
        t.insert(aColumn, record.get(aColumn));
      }
      return t;
    }
  }
View Full Code Here

  {
    if( columns==null || columns.length==0 )
      return defaultValue;
    else
    {
      Tuple t = new Tuple();
      for( String aColumn:columns )
      {
        t.insert(aColumn, record.get(aColumn));
      }
      return t;
    }
  }
View Full Code Here

  {
    if( type==Tuple.NULL_WRITABLE_TYPE )
      return NullWritable.get();
    else if (type==Tuple.TUPLE_TYPE)
    {
      Tuple newTuple = new Tuple();
      newTuple.readFields(input);
      return newTuple;
    }
    else
    {
      WritableComparable w = (WritableComparable)ReflectionUtils.newInstance( Util.getClass(input.readUTF()), conf);
View Full Code Here

        _compare_result = 0;
      }
      else if ( type1==Tuple.TUPLE_TYPE && type2==Tuple.TUPLE_TYPE )
      {
        // both are Tuple
        Tuple k1 = (Tuple)getKey(type1, d1);
        Tuple k2 = (Tuple)getKey(type2, d2);
        _compare_result = _COLUMN_COMPARATOR.compareKey(k1, k2, this.getSorter(), conf);
      }
      else
      {
        // DataJoinKey only support NullWritable and Tuple for the DataJoinKey#KEY_FIELDNAME
View Full Code Here

      // times.
      if( valuesFromLastDataset!=null )
      {
        while( valuesFromLastDataset.hasNext() )
        {
          Tuple aRow = valuesFromLastDataset.next();
          this.rememberTuple(_lastDatasetID, aRow, reporter);
        }
       
        Iterable<Tuple> preCrossProduct = Util.crossProduct(conf, reporter, this.valuesForAllDatasets.values().toArray(new BigTupleList[0]));
        BigTupleList btl = new BigTupleList(reporter);
        for( Tuple aRow:preCrossProduct )
        {
          this.computeExtendFunctions(aRow, btl, this.multiDatasetExtendFunction);
          this.computeGroupFunctions(aRow, this.multiDatasetGroupFunction);
        }
       
        if( btl.size()>0 )
          toBeCrossProduct.add(btl);
        for(GroupFunction fun:this.multiDatasetGroupFunction )
          toBeCrossProduct.add(fun.getResult());
       
        valuesFromLastDataset = this.valuesForAllDatasets.get(_lastDatasetID).iterator();
      }
      else
      {
        if( this.multiDatasetExtendFunction.size()>0 )
        {
          BigTupleList btl = new BigTupleList(reporter);
          this.computeExtendFunctions(null, btl, this.multiDatasetExtendFunction);
          toBeCrossProduct.add(btl);
        }
        for(GroupFunction fun:this.multiDatasetGroupFunction )
          toBeCrossProduct.add(fun.getNoMatchResult(nullReplacement));
      }
    }
    // finished the computation of multi-dataset functions, start
    // to compute the projectable funcitons results for last
    // dataset
    //
    // first compute the cross product of all other functions
    Iterable<Tuple> others = null;
    if( toBeCrossProduct.size()>0 )
    {
      Iterable<Tuple>[] array = new Iterable[toBeCrossProduct.size()];
      for( int i=0;i<toBeCrossProduct.size();i++ )
      {
        array[i] = toBeCrossProduct.get(i);
      }
     
      others = Util.crossProduct(conf, reporter, array);
    }
   
    if( valuesFromLastDataset==null )
    {// outer-join, so <code>others</code> is always not null.
      List<BigTupleList> nullResult = new ArrayList<BigTupleList>();
     
      if( this.singleDatasetExtendFunction.get(_lastDatasetID)!=null )
      {
        BigTupleList btl = new BigTupleList(reporter);
        this.computeExtendFunctions(null, btl, this.singleDatasetExtendFunction.get(_lastDatasetID));
        nullResult.add(btl);
      }
      if( this.singleDatasetGroupFunction.get(_lastDatasetID)!=null )
      {
        for(GroupFunction fun:this.singleDatasetGroupFunction.get(_lastDatasetID) )
          nullResult.add(fun.getNoMatchResult(nullReplacement));
      }
     
      for( Tuple t1:Util.crossProduct(conf, reporter, nullResult) )
      {
        for( Tuple t2:others )
        {
          this.output(Tuple.merge(t1, t2), output, reporter);
        }
      }
    }
    else
    {
      boolean hasNoGroupFunctionForLastDS = this.singleDatasetGroupFunction.get(this._lastDatasetID)==null;
      while( valuesFromLastDataset.hasNext() )
      {
        Tuple aRow = valuesFromLastDataset.next();
        aRow.setSchema(this.getSchemaByDatasetID(_lastDatasetID));       
        if(hasNoGroupFunctionForLastDS)
        {
          // there is no group function from the last DS, we can
          // do some optimization here: as we streaming over the
          // values of last dataset, we also emit outputs.
       
         
          Tuple merged = new Tuple();
          for( ExtendFunction func:this.singleDatasetExtendFunction.get(_lastDatasetID) )
          {
            merged = Tuple.merge(merged, func.getResult(aRow));
          }
         
View Full Code Here

   */
  private void computeSingleDSFunctionsResults(Iterator<Tuple> tuples, Byte datasetID, Reporter reporter)
  {
    while( tuples.hasNext() )
    {
      Tuple aTuple = tuples.next();
      aTuple.setSchema(this.getSchemaByDatasetID(datasetID));
     
      if( this.requirePreCrossProduct )
      {
        // some functions need columns from multiple
        // dataset, so remember this value for later
View Full Code Here

   */
  private void computeExtendFunctions(Tuple aRow, BigTupleList result, List<ExtendFunction> functions)
  {
    if( functions!=null && !functions.isEmpty() )
    {
      Tuple mergedResult = new Tuple();
      for( ExtendFunction aFunction:functions )
      {
        if( aRow!=null )
          mergedResult = Tuple.merge(mergedResult, aFunction.getResult(aRow));
        else
View Full Code Here

TOP

Related Classes of com.ebay.erl.mobius.core.model.Tuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.