Package org.apache.hadoop.mapred.lib

Source Code of org.apache.hadoop.mapred.lib.MobiusDelegatingInputFormat

package org.apache.hadoop.mapred.lib;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;

import com.ebay.erl.mobius.core.ConfigureConstants;
import com.ebay.erl.mobius.core.mapred.AbstractMobiusMapper;
import com.ebay.erl.mobius.core.mapred.MultiInputsHelpersRepository;

/**
* <p>
* This product is licensed under the Apache License,  Version 2.0,
* available at http://www.apache.org/licenses/LICENSE-2.0.
*
* This product contains portions derived from Apache hadoop which is
* licensed under the Apache License, Version 2.0, available at
* http://hadoop.apache.org.
*
* © 2007 – 2012 eBay Inc., Evan Chiu, Woody Zhou, Neel Sundaresan
*
* @param <K>
* @param <V>
*/
@SuppressWarnings({ "deprecation" })
public class MobiusDelegatingInputFormat <K, V> extends DelegatingInputFormat<K, V>
  private Map<URI, String> _URI_TO_DATASETID_MAPPING;
 
  private List<URI> _INPUT_URIS;
 
  private final String _INIT_KEY = "";
 
  private static final Log LOGGER = LogFactory.getLog(MobiusDelegatingInputFormat.class);
 
  // getting the mapper which can process the input split
  public Class<AbstractMobiusMapper> getMapper(InputSplit split, JobConf conf)
    throws IOException
  {
    TaggedInputSplit taggedSplit  = (TaggedInputSplit)split;
    InputSplit inputSplit      = taggedSplit.getInputSplit();
    URI currentFileURI  = MultiInputsHelpersRepository.getInstance(conf).getURIBySplit(inputSplit, conf);
   
    try
    {
      String[] pathToMapperMappings = conf.get("mapred.input.dir.mappers").split(",");
      for( String aPathToMapper:pathToMapperMappings)
      {
        //System.out.println("aPathToMapper:"+aPathToMapper);
        //System.out.println("currentFileURI:"+currentFileURI.toString());
       
        String[] data = aPathToMapper.split(";");
        URI path = new URI(data[0]);
        URI relative = path.relativize(currentFileURI);
       
        //System.out.println("relative:"+relative);
       
       
        String mapperClassName = data[1];
        if( currentFileURI.equals(path) || !relative.equals(currentFileURI) )
        {
          return (Class<AbstractMobiusMapper>)Class.forName(mapperClassName);
        }
      }
    }catch(Exception e)
    {
      throw new RuntimeException(e);
    }
    return null;
  }
 

  @Override
  public RecordReader<K, V> getRecordReader(InputSplit split, JobConf conf, Reporter reporter) throws IOException
 
    this.setupLookupTables(conf);
   
    String datasetID = getDatasetIDBySplit(split, conf);
   
    conf.set (ConfigureConstants.CURRENT_DATASET_ID, datasetID);
    RecordReader<K, V> reader = super.getRecordReader (split, conf, reporter);
    return reader;   
  }
 
 
  private String getDatasetIDBySplit(InputSplit split, JobConf conf)
    throws IOException
  {
    // The <code>split</code> is an instance of {@link TaggedInputSplit}
    // but the TaggedInputSplit is not a public class, so we need to place
    // this class under the package of org.apache.hadoop.mapred.lib.
   
    TaggedInputSplit taggedSplit  = (TaggedInputSplit)split;
    InputSplit inputSplit      = taggedSplit.getInputSplit();   
    URI currentFileURI        = MultiInputsHelpersRepository.getInstance(conf).getURIBySplit(inputSplit, conf);
    String currentFile        = currentFileURI.toString();
   
   
    LOGGER.debug("Using ["+currentFile+"] to locate current Dataset");
   
    String datasetID = null;
    for( URI anInput:_INPUT_URIS )
    {
      if( anInput.equals(currentFileURI) )
      {
        datasetID = _URI_TO_DATASETID_MAPPING.get (anInput);
        if ( datasetID == null || datasetID.trim ().length () == 0 )
          throw new IllegalArgumentException ("Dataet ID for the input path:[" + anInput+ "] did not set.");
      }
      else
      {
        // not equal, compute the relative URI
        URI relative = anInput.relativize(currentFileURI);
        if( !relative.equals(currentFileURI) )
        {
          // found the key
          datasetID = _URI_TO_DATASETID_MAPPING.get (anInput);
          if ( datasetID == null || datasetID.trim ().length () == 0 )
            throw new IllegalArgumentException ("Dataet ID for the input path:[" + anInput+ "] did not set.");
        }
      }
    }
   
    if( datasetID==null )
    {
      throw new IllegalArgumentException ("Cannot find dataset id using the given uri:[" + currentFile + "], " +
            ConfigureConstants.INPUT_TO_DATASET_MAPPING+":" + conf.get (ConfigureConstants.INPUT_TO_DATASET_MAPPING));
    }
   
    return datasetID;
  }
 
  private void setupLookupTables(JobConf conf)
  {
    // due to this bug: https://issues.apache.org/jira/browse/MAPREDUCE-1743
    // map.input.file is not set when using MultipleInputs, which is used in
    // {@link MobiusMultiInputs}, we need to set it.
    synchronized(_INIT_KEY)
    { 
      _URI_TO_DATASETID_MAPPING  = new TreeMap<URI, String>();
      _INPUT_URIS          = new ArrayList<URI>();
       
      // in the format of datasetID;input_uri(,datasetID;input_uri)*
      String[] mappings = conf.getStrings (ConfigureConstants.INPUT_TO_DATASET_MAPPING);
      for(String aMapping:mappings )
      {
        String[] data     = aMapping.split(";");
        String datasetID   = data[0];
        String input_uri   = data[1];
         
        try
        {
          URI anInput   = new URI(input_uri);
          _INPUT_URIS.add(anInput);
          _URI_TO_DATASETID_MAPPING.put(anInput, datasetID);
        }
        catch (URISyntaxException e)
        {
          throw new RuntimeException(e);
        }
      }
    }
     
    Collections.sort(_INPUT_URIS);
    // reverse the order so the system can check the URI from most specific to
    // less specific
    Collections.reverse(_INPUT_URIS);
  }
}
TOP

Related Classes of org.apache.hadoop.mapred.lib.MobiusDelegatingInputFormat

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.