Package cascading.flow.tez.util

Source Code of cascading.flow.tez.util.TezUtil

/*
* Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package cascading.flow.tez.util;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

import cascading.CascadingException;
import cascading.flow.FlowException;
import cascading.flow.hadoop.util.HadoopUtil;
import cascading.tap.hadoop.io.MultiInputSplit;
import cascading.util.Util;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.URL;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.tez.common.TezUtils;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.mapreduce.input.MRInput;
import org.apache.tez.mapreduce.lib.MRReader;
import org.apache.tez.mapreduce.output.MROutput;
import org.apache.tez.runtime.api.AbstractLogicalInput;
import org.apache.tez.runtime.api.AbstractLogicalOutput;
import org.apache.tez.runtime.api.LogicalInput;
import org.apache.tez.runtime.api.LogicalOutput;
import org.apache.tez.runtime.api.MergedLogicalInput;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.hadoop.yarn.api.ApplicationConstants.CLASS_PATH_SEPARATOR;
import static org.apache.hadoop.yarn.api.ApplicationConstants.Environment.CLASSPATH;
import static org.apache.hadoop.yarn.api.ApplicationConstants.Environment.PWD;
import static org.apache.tez.common.TezUtils.createConfFromByteString;
import static org.apache.tez.common.TezUtils.createConfFromUserPayload;
import static org.apache.tez.mapreduce.hadoop.MRInputHelpers.parseMRInputPayload;

/**
*
*/
public class TezUtil
  {
  private static final Logger LOG = LoggerFactory.getLogger( TezUtil.class );

  /**
   * Attempting to localize all new JobConf calls
   *
   * @param configuration
   * @return
   */
  public static JobConf asJobConf( Configuration configuration )
    {
    return new JobConf( configuration );
    }

  public static TezConfiguration createTezConf( Map<Object, Object> properties, TezConfiguration defaultJobconf )
    {
    TezConfiguration jobConf = defaultJobconf == null ? new TezConfiguration() : new TezConfiguration( defaultJobconf );

    if( properties == null )
      return jobConf;

    Set<Object> keys = new HashSet<Object>( properties.keySet() );

    // keys will only be grabbed if both key/value are String, so keep orig keys
    if( properties instanceof Properties )
      keys.addAll( ( (Properties) properties ).stringPropertyNames() );

    for( Object key : keys )
      {
      Object value = properties.get( key );

      if( value == null && properties instanceof Properties && key instanceof String )
        value = ( (Properties) properties ).getProperty( (String) key );

      if( value == null ) // don't stuff null values
        continue;

      // don't let these objects pass, even though toString is called below.
      if( value instanceof Class || value instanceof TezConfiguration )
        continue;

      jobConf.set( key.toString(), value.toString() );
      }

    return jobConf;
    }

  public static UserGroupInformation getCurrentUser()
    {
    try
      {
      return UserGroupInformation.getCurrentUser();
      }
    catch( IOException exception )
      {
      throw new CascadingException( "unable to get current user", exception );
      }
    }

  public static String getEdgeSourceID( LogicalInput input, Configuration configuration )
    {
    String id = configuration.get( "cascading.node.source" );

    if( id == null )
      throw new IllegalStateException( "no source id found: " + input.getClass().getName() );

    return id;
    }

  public static String getEdgeSinkID( LogicalOutput output, Configuration configuration )
    {
    String id = configuration.get( "cascading.node.sink" );

    if( id == null )
      throw new IllegalStateException( "no sink id found: " + output.getClass().getName() );

    return id;
    }

  public static Configuration getInputConfiguration( LogicalInput input )
    {
    try
      {
      if( input instanceof MergedLogicalInput )
        input = (LogicalInput) Util.getFirst( ( (MergedLogicalInput) input ).getInputs() );

      if( input instanceof MRInput )
        return createConfFromByteString( parseMRInputPayload( ( (MRInput) input ).getContext().getUserPayload() ).getConfigurationBytes() );

      if( input instanceof AbstractLogicalInput )
        return createConfFromUserPayload( ( (AbstractLogicalInput) input ).getContext().getUserPayload() );
      }
    catch( IOException exception )
      {
      throw new FlowException( "unable to unpack payload", exception );
      }

    throw new IllegalStateException( "unknown input type: " + input.getClass().getName() );
    }

  public static Configuration getOutputConfiguration( LogicalOutput output )
    {
    try
      {
      if( output instanceof MROutput )
        return TezUtils.createConfFromUserPayload( ( (MROutput) output ).getContext().getUserPayload() );

      if( output instanceof AbstractLogicalOutput )
        return createConfFromUserPayload( ( (AbstractLogicalOutput) output ).getContext().getUserPayload() );
      }
    catch( IOException exception )
      {
      throw new FlowException( "unable to unpack payload", exception );
      }

    throw new IllegalStateException( "unknown input type: " + output.getClass().getName() );
    }

  public static void setSourcePathForSplit( MRInput input, MRReader reader, Configuration configuration )
    {
    Path path = null;

    if( Util.returnInstanceFieldIfExistsSafe( input, "useNewApi" ) )
      {
      org.apache.hadoop.mapreduce.InputSplit newInputSplit = (org.apache.hadoop.mapreduce.InputSplit) reader.getSplit();

      if( newInputSplit instanceof org.apache.hadoop.mapreduce.lib.input.FileSplit )
        path = ( (org.apache.hadoop.mapreduce.lib.input.FileSplit) newInputSplit ).getPath();
      }
    else
      {
      org.apache.hadoop.mapred.InputSplit oldInputSplit = (org.apache.hadoop.mapred.InputSplit) reader.getSplit();

      if( oldInputSplit instanceof org.apache.hadoop.mapred.FileSplit )
        path = ( (org.apache.hadoop.mapred.FileSplit) oldInputSplit ).getPath();
      }

    if( path != null )
      configuration.set( MultiInputSplit.CASCADING_SOURCE_PATH, path.toString() );
    }

  public static Map<Path, Path> addToClassPath( Configuration config, String stagingRoot, List<String> classpath, LocalResourceType resourceType, Map<String, LocalResource> localResources, Map<String, String> environment )
    {
    if( classpath == null )
      return null;

    // given to fully qualified
    Map<String, Path> localPaths = new HashMap<>();
    Map<String, Path> remotePaths = new HashMap<>();

    HadoopUtil.resolvePaths( config, classpath, stagingRoot, localPaths, remotePaths );

    try
      {
      LocalFileSystem localFS = HadoopUtil.getLocalFS( config );

      for( String fileName : localPaths.keySet() )
        {
        Path artifact = localPaths.get( fileName );
        Path remotePath = remotePaths.get( fileName );

        if( remotePath == null )
          remotePath = artifact;

        addResource( localResources, environment, fileName, localFS.getFileStatus( artifact ), remotePath, resourceType );
        }

      FileSystem defaultFS = HadoopUtil.getDefaultFS( config );

      for( String fileName : remotePaths.keySet() )
        {
        Path artifact = remotePaths.get( fileName );
        Path localPath = localPaths.get( fileName );

        if( localPath != null )
          continue;

        addResource( localResources, environment, fileName, defaultFS.getFileStatus( artifact ), artifact, resourceType );
        }
      }
    catch( IOException exception )
      {
      throw new FlowException( "unable to set remote resource paths", exception );
      }

    return HadoopUtil.getCommonPaths( localPaths, remotePaths );
    }

  protected static void addResource( Map<String, LocalResource> localResources, Map<String, String> environment, String fileName, FileStatus stats, Path fullPath, LocalResourceType type ) throws IOException
    {
    if( localResources.containsKey( fileName ) )
      throw new FlowException( "duplicate filename added to classpath resources: " + fileName );

    URL yarnUrlFromPath = ConverterUtils.getYarnUrlFromPath( fullPath );
    long len = stats.getLen();
    long modificationTime = stats.getModificationTime();

    LocalResource resource = LocalResource.newInstance(
      yarnUrlFromPath,
      type,
      LocalResourceVisibility.APPLICATION,
      len,
      modificationTime );

    if( type == LocalResourceType.PATTERN )
      {
      // todo: parametrize this for dynamic inclusion below
      String pattern = "(?:classes/|lib/).*";

      resource.setPattern( pattern );

      if( environment != null )
        {
        String current = "";

        current += PWD.$$() + File.separator + fileName + File.separator + "*" + CLASS_PATH_SEPARATOR;
        current += PWD.$$() + File.separator + fileName + File.separator + "lib" + File.separator + "*" + CLASS_PATH_SEPARATOR;
        current += PWD.$$() + File.separator + fileName + File.separator + "classes" + File.separator + "*" + CLASS_PATH_SEPARATOR;

        String classPath = environment.get( CLASSPATH.name() );

        if( classPath == null )
          classPath = "";
        else if( !classPath.startsWith( CLASS_PATH_SEPARATOR ) )
          classPath += CLASS_PATH_SEPARATOR;

        classPath += current;

        LOG.info( "adding to cluster side classpath: {} ", classPath );

        environment.put( CLASSPATH.name(), classPath );
        }
      }

    localResources.put( fileName, resource );
    }
  }
TOP

Related Classes of cascading.flow.tez.util.TezUtil

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.