Package cascading.platform.hadoop

Source Code of cascading.platform.hadoop.BaseHadoopPlatform

/*
* Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package cascading.platform.hadoop;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;

import cascading.platform.TestPlatform;
import cascading.scheme.Scheme;
import cascading.scheme.hadoop.TextDelimited;
import cascading.scheme.hadoop.TextLine;
import cascading.scheme.util.DelimitedParser;
import cascading.scheme.util.FieldTypeResolver;
import cascading.tap.SinkMode;
import cascading.tap.Tap;
import cascading.tap.hadoop.Hfs;
import cascading.tap.hadoop.PartitionTap;
import cascading.tap.hadoop.TemplateTap;
import cascading.tap.hadoop.util.Hadoop18TapUtil;
import cascading.tap.partition.Partition;
import cascading.tuple.Fields;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
*
*/
public abstract class BaseHadoopPlatform<Config extends Configuration> extends TestPlatform
  {
  private static final Logger LOG = LoggerFactory.getLogger( BaseHadoopPlatform.class );

  public transient static FileSystem fileSys;
  public transient static Configuration configuration;
  public transient static Map<Object, Object> properties = new HashMap<Object, Object>();

  protected String logger;

  public BaseHadoopPlatform()
    {
    this.logger = System.getProperty( "log4j.logger" );
    this.numMappers = 4;
    this.numReducers = 1;
    }

  @Override
  public boolean isMapReduce()
    {
    return true;
    }

  @Override
  public void setNumMappers( int numMapTasks )
    {
    if( numMapTasks > 0 )
      this.numMappers = numMapTasks;
    }

  @Override
  public void setNumReducers( int numReduceTasks )
    {
    if( numReduceTasks > 0 )
      this.numReducers = numReduceTasks;
    }

  @Override
  public void setNumGatherPartitions( int numGatherPartitions )
    {
    if( numGatherPartitions > 0 )
      this.numGatherPartitions = numGatherPartitions;
    }

  @Override
  public Map<Object, Object> getProperties()
    {
    return new HashMap<Object, Object>( properties );
    }

  @Override
  public void tearDown()
    {
    }

  public abstract Config getConfiguration();

  public boolean isHDFSAvailable()
    {
    try
      {
      FileSystem fileSystem = FileSystem.get( new URI( "hdfs:", null, null ), configuration );

      return fileSystem != null;
      }
    catch( IOException exception ) // if no hdfs, a no filesystem for scheme io exception will be caught
      {
      LOG.warn( "unable to get hdfs filesystem", exception );
      }
    catch( URISyntaxException exception )
      {
      throw new RuntimeException( "internal failure", exception );
      }

    return false;
    }

  @Override
  public void copyFromLocal( String inputFile ) throws IOException
    {
    if( !new File( inputFile ).exists() )
      throw new FileNotFoundException( "data file not found: " + inputFile );

    if( !isUseCluster() )
      return;

    Path path = new Path( safeFileName( inputFile ) );

    if( !fileSys.exists( path ) )
      FileUtil.copy( new File( inputFile ), fileSys, path, false, configuration );
    }

  @Override
  public void copyToLocal( String outputFile ) throws IOException
    {
    if( !isUseCluster() )
      return;

    Path path = new Path( safeFileName( outputFile ) );

    if( !fileSys.exists( path ) )
      throw new FileNotFoundException( "data file not found: " + outputFile );

    File file = new File( outputFile );

    if( file.exists() )
      file.delete();

    if( fileSys.isFile( path ) )
      {
      // its a file, so just copy it over
      FileUtil.copy( fileSys, path, file, false, configuration );
      return;
      }

    // it's a directory
    file.mkdirs();

    FileStatus contents[] = fileSys.listStatus( path );

    for( FileStatus fileStatus : contents )
      {
      Path currentPath = fileStatus.getPath();

      if( currentPath.getName().startsWith( "_" ) ) // filter out temp and log dirs
        continue;

      FileUtil.copy( fileSys, currentPath, new File( file, currentPath.getName() ), false, configuration );
      }
    }

  @Override
  public boolean remoteExists( String outputFile ) throws IOException
    {
    return fileSys.exists( new Path( safeFileName( outputFile ) ) );
    }

  @Override
  public boolean remoteRemove( String outputFile, boolean recursive ) throws IOException
    {
    return fileSys.delete( new Path( safeFileName( outputFile ) ), recursive );
    }

  @Override
  public Tap getTap( Scheme scheme, String filename, SinkMode mode )
    {
    return new Hfs( scheme, safeFileName( filename ), mode );
    }

  @Override
  public Tap getTextFile( Fields sourceFields, Fields sinkFields, String filename, SinkMode mode )
    {
    if( sourceFields == null )
      return new Hfs( new TextLine(), safeFileName( filename ), mode );

    return new Hfs( new TextLine( sourceFields, sinkFields ), safeFileName( filename ), mode );
    }

  @Override
  public Tap getDelimitedFile( Fields fields, boolean hasHeader, String delimiter, String quote, Class[] types, String filename, SinkMode mode )
    {
    return new Hfs( new TextDelimited( fields, hasHeader, delimiter, quote, types ), safeFileName( filename ), mode );
    }

  @Override
  public Tap getDelimitedFile( Fields fields, boolean skipHeader, boolean writeHeader, String delimiter, String quote, Class[] types, String filename, SinkMode mode )
    {
    return new Hfs( new TextDelimited( fields, skipHeader, writeHeader, delimiter, quote, types ), safeFileName( filename ), mode );
    }

  @Override
  public Tap getDelimitedFile( String delimiter, String quote, FieldTypeResolver fieldTypeResolver, String filename, SinkMode mode )
    {
    return new Hfs( new TextDelimited( true, new DelimitedParser( delimiter, quote, fieldTypeResolver ) ), safeFileName( filename ), mode );
    }

  @Override
  public Tap getTemplateTap( Tap sink, String pathTemplate, int openThreshold )
    {
    return new TemplateTap( (Hfs) sink, pathTemplate, openThreshold );
    }

  @Override
  public Tap getTemplateTap( Tap sink, String pathTemplate, Fields fields, int openThreshold )
    {
    return new TemplateTap( (Hfs) sink, pathTemplate, fields, openThreshold );
    }

  @Override
  public Tap getPartitionTap( Tap sink, Partition partition, int openThreshold )
    {
    return new PartitionTap( (Hfs) sink, partition, openThreshold );
    }

  @Override
  public Scheme getTestConfigDefScheme()
    {
    return new HadoopConfigDefScheme( new Fields( "line" ) );
    }

  @Override
  public Scheme getTestFailScheme()
    {
    return new HadoopFailScheme( new Fields( "line" ) );
    }

  @Override
  public Comparator getLongComparator( boolean reverseSort )
    {
    return new TestLongComparator( reverseSort );
    }

  @Override
  public Comparator getStringComparator( boolean reverseSort )
    {
    return new TestStringComparator( reverseSort );
    }

  @Override
  public String getHiddenTemporaryPath()
    {
    return Hadoop18TapUtil.TEMPORARY_PATH;
    }

  /**
   * Replaces characters, that are not allowed by HDFS with an "_".
   *
   * @param filename The filename to make safe
   * @return The filename with all non-supported characters removed.
   */
  protected String safeFileName( String filename )
    {
    return filename.replace( ":", "_" ); // not using Util.cleansePathName as it removes /
    }
  }
TOP

Related Classes of cascading.platform.hadoop.BaseHadoopPlatform

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.