/*
* Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cascading.flow.tez.util;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import cascading.CascadingException;
import cascading.flow.FlowException;
import cascading.flow.hadoop.util.HadoopUtil;
import cascading.tap.hadoop.io.MultiInputSplit;
import cascading.util.Util;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.URL;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.tez.common.TezUtils;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.mapreduce.input.MRInput;
import org.apache.tez.mapreduce.lib.MRReader;
import org.apache.tez.mapreduce.output.MROutput;
import org.apache.tez.runtime.api.AbstractLogicalInput;
import org.apache.tez.runtime.api.AbstractLogicalOutput;
import org.apache.tez.runtime.api.LogicalInput;
import org.apache.tez.runtime.api.LogicalOutput;
import org.apache.tez.runtime.api.MergedLogicalInput;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.hadoop.yarn.api.ApplicationConstants.CLASS_PATH_SEPARATOR;
import static org.apache.hadoop.yarn.api.ApplicationConstants.Environment.CLASSPATH;
import static org.apache.hadoop.yarn.api.ApplicationConstants.Environment.PWD;
import static org.apache.tez.common.TezUtils.createConfFromByteString;
import static org.apache.tez.common.TezUtils.createConfFromUserPayload;
import static org.apache.tez.mapreduce.hadoop.MRInputHelpers.parseMRInputPayload;
/**
*
*/
public class TezUtil
{
private static final Logger LOG = LoggerFactory.getLogger( TezUtil.class );
/**
* Attempting to localize all new JobConf calls
*
* @param configuration
* @return
*/
public static JobConf asJobConf( Configuration configuration )
{
return new JobConf( configuration );
}
public static TezConfiguration createTezConf( Map<Object, Object> properties, TezConfiguration defaultJobconf )
{
TezConfiguration jobConf = defaultJobconf == null ? new TezConfiguration() : new TezConfiguration( defaultJobconf );
if( properties == null )
return jobConf;
Set<Object> keys = new HashSet<Object>( properties.keySet() );
// keys will only be grabbed if both key/value are String, so keep orig keys
if( properties instanceof Properties )
keys.addAll( ( (Properties) properties ).stringPropertyNames() );
for( Object key : keys )
{
Object value = properties.get( key );
if( value == null && properties instanceof Properties && key instanceof String )
value = ( (Properties) properties ).getProperty( (String) key );
if( value == null ) // don't stuff null values
continue;
// don't let these objects pass, even though toString is called below.
if( value instanceof Class || value instanceof TezConfiguration )
continue;
jobConf.set( key.toString(), value.toString() );
}
return jobConf;
}
public static UserGroupInformation getCurrentUser()
{
try
{
return UserGroupInformation.getCurrentUser();
}
catch( IOException exception )
{
throw new CascadingException( "unable to get current user", exception );
}
}
public static String getEdgeSourceID( LogicalInput input, Configuration configuration )
{
String id = configuration.get( "cascading.node.source" );
if( id == null )
throw new IllegalStateException( "no source id found: " + input.getClass().getName() );
return id;
}
public static String getEdgeSinkID( LogicalOutput output, Configuration configuration )
{
String id = configuration.get( "cascading.node.sink" );
if( id == null )
throw new IllegalStateException( "no sink id found: " + output.getClass().getName() );
return id;
}
public static Configuration getInputConfiguration( LogicalInput input )
{
try
{
if( input instanceof MergedLogicalInput )
input = (LogicalInput) Util.getFirst( ( (MergedLogicalInput) input ).getInputs() );
if( input instanceof MRInput )
return createConfFromByteString( parseMRInputPayload( ( (MRInput) input ).getContext().getUserPayload() ).getConfigurationBytes() );
if( input instanceof AbstractLogicalInput )
return createConfFromUserPayload( ( (AbstractLogicalInput) input ).getContext().getUserPayload() );
}
catch( IOException exception )
{
throw new FlowException( "unable to unpack payload", exception );
}
throw new IllegalStateException( "unknown input type: " + input.getClass().getName() );
}
public static Configuration getOutputConfiguration( LogicalOutput output )
{
try
{
if( output instanceof MROutput )
return TezUtils.createConfFromUserPayload( ( (MROutput) output ).getContext().getUserPayload() );
if( output instanceof AbstractLogicalOutput )
return createConfFromUserPayload( ( (AbstractLogicalOutput) output ).getContext().getUserPayload() );
}
catch( IOException exception )
{
throw new FlowException( "unable to unpack payload", exception );
}
throw new IllegalStateException( "unknown input type: " + output.getClass().getName() );
}
public static void setSourcePathForSplit( MRInput input, MRReader reader, Configuration configuration )
{
Path path = null;
if( Util.returnInstanceFieldIfExistsSafe( input, "useNewApi" ) )
{
org.apache.hadoop.mapreduce.InputSplit newInputSplit = (org.apache.hadoop.mapreduce.InputSplit) reader.getSplit();
if( newInputSplit instanceof org.apache.hadoop.mapreduce.lib.input.FileSplit )
path = ( (org.apache.hadoop.mapreduce.lib.input.FileSplit) newInputSplit ).getPath();
}
else
{
org.apache.hadoop.mapred.InputSplit oldInputSplit = (org.apache.hadoop.mapred.InputSplit) reader.getSplit();
if( oldInputSplit instanceof org.apache.hadoop.mapred.FileSplit )
path = ( (org.apache.hadoop.mapred.FileSplit) oldInputSplit ).getPath();
}
if( path != null )
configuration.set( MultiInputSplit.CASCADING_SOURCE_PATH, path.toString() );
}
public static Map<Path, Path> addToClassPath( Configuration config, String stagingRoot, List<String> classpath, LocalResourceType resourceType, Map<String, LocalResource> localResources, Map<String, String> environment )
{
if( classpath == null )
return null;
// given to fully qualified
Map<String, Path> localPaths = new HashMap<>();
Map<String, Path> remotePaths = new HashMap<>();
HadoopUtil.resolvePaths( config, classpath, stagingRoot, localPaths, remotePaths );
try
{
LocalFileSystem localFS = HadoopUtil.getLocalFS( config );
for( String fileName : localPaths.keySet() )
{
Path artifact = localPaths.get( fileName );
Path remotePath = remotePaths.get( fileName );
if( remotePath == null )
remotePath = artifact;
addResource( localResources, environment, fileName, localFS.getFileStatus( artifact ), remotePath, resourceType );
}
FileSystem defaultFS = HadoopUtil.getDefaultFS( config );
for( String fileName : remotePaths.keySet() )
{
Path artifact = remotePaths.get( fileName );
Path localPath = localPaths.get( fileName );
if( localPath != null )
continue;
addResource( localResources, environment, fileName, defaultFS.getFileStatus( artifact ), artifact, resourceType );
}
}
catch( IOException exception )
{
throw new FlowException( "unable to set remote resource paths", exception );
}
return HadoopUtil.getCommonPaths( localPaths, remotePaths );
}
protected static void addResource( Map<String, LocalResource> localResources, Map<String, String> environment, String fileName, FileStatus stats, Path fullPath, LocalResourceType type ) throws IOException
{
if( localResources.containsKey( fileName ) )
throw new FlowException( "duplicate filename added to classpath resources: " + fileName );
URL yarnUrlFromPath = ConverterUtils.getYarnUrlFromPath( fullPath );
long len = stats.getLen();
long modificationTime = stats.getModificationTime();
LocalResource resource = LocalResource.newInstance(
yarnUrlFromPath,
type,
LocalResourceVisibility.APPLICATION,
len,
modificationTime );
if( type == LocalResourceType.PATTERN )
{
// todo: parametrize this for dynamic inclusion below
String pattern = "(?:classes/|lib/).*";
resource.setPattern( pattern );
if( environment != null )
{
String current = "";
current += PWD.$$() + File.separator + fileName + File.separator + "*" + CLASS_PATH_SEPARATOR;
current += PWD.$$() + File.separator + fileName + File.separator + "lib" + File.separator + "*" + CLASS_PATH_SEPARATOR;
current += PWD.$$() + File.separator + fileName + File.separator + "classes" + File.separator + "*" + CLASS_PATH_SEPARATOR;
String classPath = environment.get( CLASSPATH.name() );
if( classPath == null )
classPath = "";
else if( !classPath.startsWith( CLASS_PATH_SEPARATOR ) )
classPath += CLASS_PATH_SEPARATOR;
classPath += current;
LOG.info( "adding to cluster side classpath: {} ", classPath );
environment.put( CLASSPATH.name(), classPath );
}
}
localResources.put( fileName, resource );
}
}