Package cascading

Source Code of cascading.BasicPipesPlatformTest

/*
* Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package cascading;

import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import cascading.flow.Flow;
import cascading.operation.Aggregator;
import cascading.operation.Filter;
import cascading.operation.Function;
import cascading.operation.Identity;
import cascading.operation.aggregator.Count;
import cascading.operation.expression.ExpressionFilter;
import cascading.operation.function.UnGroup;
import cascading.operation.regex.RegexFilter;
import cascading.operation.regex.RegexParser;
import cascading.operation.regex.RegexSplitter;
import cascading.pipe.CoGroup;
import cascading.pipe.Each;
import cascading.pipe.Every;
import cascading.pipe.GroupBy;
import cascading.pipe.Pipe;
import cascading.tap.SinkMode;
import cascading.tap.Tap;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import org.junit.Test;

import static data.InputData.*;

/**
* These tests execute basic function using field positions, not names. so there will be duplicates with
* FieldedPipesPlatformTest
*/
public class BasicPipesPlatformTest extends PlatformTestCase
  {
  public BasicPipesPlatformTest()
    {
    }

  /**
   * Test the count aggregator function
   *
   * @throws IOException
   */
  @Test
  public void testCount() throws Exception
    {
    runTestCount( "count", new Fields( 1 ), new Fields( 0 ), new Fields( 0, 1 ) );
    }

  @Test
  public void testCount2() throws Exception
    {
    runTestCount( "count2", new Fields( 1 ), new Fields( "count" ), new Fields( 0, "count" ) );
    }

  @Test
  public void testCount3() throws Exception
    {
    runTestCount( "count3", new Fields( 1 ), new Fields( "count" ), Fields.ALL );
    }

  @Test
  public void testCount4() throws Exception
    {
    runTestCount( "count4", Fields.ALL, new Fields( "count" ), Fields.ALL );
    }

  void runTestCount( String name, Fields argumentSelector, Fields fieldDeclaration, Fields outputSelector ) throws Exception
    {
    getPlatform().copyFromLocal( inputFileIps );

    Tap source = getPlatform().getTextFile( Fields.size( 2 ), inputFileIps );
    Tap sink = getPlatform().getTextFile( Fields.size( 1 ), getOutputPath( name ), SinkMode.REPLACE );

    Pipe pipe = new Pipe( "count" );
    pipe = new GroupBy( pipe, new Fields( 1 ) );
    pipe = new Every( pipe, argumentSelector, new Count( fieldDeclaration ), outputSelector );

    Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );

    flow.start(); // simple test for start
    flow.complete();

    validateLength( flow, 17 );
    assertTrue( getSinkAsList( flow ).contains( new Tuple( "63.123.238.8\t2" ) ) );
    }

  /**
   * A slightly more complex pipe
   *
   * @throws IOException
   */
  @Test
  public void testSimple() throws Exception
    {
    copyFromLocal( inputFileApache );

    Tap source = getPlatform().getTextFile( Fields.size( 2 ), inputFileApache );
    Tap sink = getPlatform().getTextFile( Fields.size( 1 ), getOutputPath( "simple" ), SinkMode.REPLACE );

    Pipe pipe = new Pipe( "test" );

    Function parser = new RegexParser( "^[^ ]*" );

    pipe = new Each( pipe, new Fields( 1 ), parser, new Fields( 0, 2 ) );

    // test that selector against incoming creates proper outgoing
    pipe = new Each( pipe, new Fields( 1 ), new Identity() );

    pipe = new GroupBy( pipe, new Fields( 0 ) );

    Aggregator counter = new Count();

    pipe = new Every( pipe, new Fields( 0 ), counter, new Fields( 0, 1 ) );

    Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );

    flow.complete();

    validateLength( flow, 8, 1 );
    }

  /**
   * tests that the Fields.ARGS declarator properly resolves into a declarator
   *
   * @throws Exception
   */
  @Test
  public void testSimpleResult() throws Exception
    {
    copyFromLocal( inputFileLower );

    Tap source = getPlatform().getTextFile( Fields.size( 2 ), inputFileLower );
    Tap sink = getPlatform().getTextFile( Fields.size( 1 ), getOutputPath( "simpleresult" ), SinkMode.REPLACE );

    Pipe pipe = new Pipe( "test" );

    // skip the first line
    pipe = new Each( pipe, new Fields( 0 ), new ExpressionFilter( "$0 == 0", Long.class ) );

    pipe = new Each( pipe, new Fields( 1 ), new Identity() );

    pipe = new Each( pipe, Fields.ALL, new RegexFilter( "a|b|c" ) );

    pipe = new GroupBy( pipe, new Fields( 0 ) );

    Aggregator counter = new Count();

    pipe = new Every( pipe, new Fields( 0 ), counter, new Fields( 0, 1 ) );

    Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );

    flow.complete();

    validateLength( flow, 2, 1 );
    }

  @Test
  public void testSimpleRelative() throws Exception
    {
    copyFromLocal( inputFileApache );

    Tap source = getPlatform().getTextFile( Fields.size( 2 ), inputFileApache );
    Tap sink = getPlatform().getTextFile( getOutputPath( "simplerelative" ), SinkMode.REPLACE );

    Pipe pipe = new Pipe( "test" );

    Function parser = new RegexParser( "^[^ ]*" );

    pipe = new Each( pipe, new Fields( -1 ), parser, new Fields( -1 ) );

    pipe = new GroupBy( pipe, new Fields( 0 ) );

    Aggregator counter = new Count();

    pipe = new Every( pipe, new Fields( 0 ), counter, new Fields( 0, 1 ) );

    Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );

    flow.complete();

    validateLength( flow, 8 );
    }

  @Test
  public void testCoGroup() throws Exception
    {
    copyFromLocal( inputFileLower );

    Tap sourceLower = getPlatform().getTextFile( Fields.size( 2 ), inputFileLower );
    Tap sourceUpper = getPlatform().getTextFile( Fields.size( 2 ), inputFileUpper );

    Map sources = new HashMap();

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );

    // using null pos so all fields are written
    Tap sink = getPlatform().getTextFile( Fields.size( 1 ), getOutputPath( "complexcogroup" ), SinkMode.REPLACE );

    Function splitter = new RegexSplitter( Fields.size( 2 ), " " );

    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( 1 ), splitter, Fields.RESULTS );
    Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( 1 ), splitter, Fields.RESULTS );

    Pipe splice = new CoGroup( pipeLower, new Fields( 0 ), pipeUpper, new Fields( 0 ) );

    Flow flow = getPlatform().getFlowConnector().connect( sources, sink, splice );

    flow.complete();

    validateLength( flow, 5 );

    List<Tuple> results = getSinkAsList( flow );

    assertTrue( results.contains( new Tuple( "1\ta\t1\tA" ) ) );
    assertTrue( results.contains( new Tuple( "2\tb\t2\tB" ) ) );
    }

  @Test
  public void testUnGroup() throws Exception
    {
    copyFromLocal( inputFileJoined );

    Tap source = getPlatform().getTextFile( Fields.size( 2 ), inputFileJoined );
    Tap sink = getPlatform().getTextFile( getOutputPath( "ungrouped" ), SinkMode.REPLACE );

    Pipe pipe = new Pipe( "test" );

    pipe = new Each( pipe, new Fields( 1 ), new RegexSplitter( Fields.size( 3 ) ) );

    pipe = new Each( pipe, new UnGroup( Fields.size( 2 ), new Fields( 0 ), Fields.fields( new Fields( 1 ), new Fields( 2 ) ) ) );

    Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );

    flow.complete();

    validateLength( flow, 10 );
    }

  @Test
  public void testFilterAll() throws Exception
    {
    copyFromLocal( inputFileApache );

    Tap source = getPlatform().getTextFile( Fields.size( 2 ), inputFileApache );
    Tap sink = getPlatform().getTextFile( getOutputPath( "filterall" ), SinkMode.REPLACE );

    Pipe pipe = new Pipe( "test" );

    Filter filter = new RegexFilter( ".*", true );

    pipe = new Each( pipe, new Fields( 1 ), filter );

    Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );

    flow.complete();

    validateLength( flow, 0 );
    }

  @Test
  public void testFilter() throws Exception
    {
    copyFromLocal( inputFileApache );

    Tap source = getPlatform().getTextFile( Fields.size( 2 ), inputFileApache );
    Tap sink = getPlatform().getTextFile( getOutputPath( "filter" ), SinkMode.REPLACE );

    Pipe pipe = new Pipe( "test" );

    Filter filter = new RegexFilter( "^68.*" );

    pipe = new Each( pipe, new Fields( 1 ), filter );

    Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );

    flow.complete();

    validateLength( flow, 3 );
    }

  @Test
  public void testSimpleChain() throws Exception
    {
    copyFromLocal( inputFileApache );

    Tap source = getPlatform().getTextFile( Fields.size( 2 ), inputFileApache );
    Tap sink = getPlatform().getTextFile( getOutputPath( "simplechain" ), SinkMode.REPLACE );

    Pipe pipe = new Pipe( "test" );

    Function parser = new RegexParser( "^[^ ]*" );

    pipe = new Each( pipe, new Fields( 1 ), parser, new Fields( 2 ) );

    pipe = new GroupBy( pipe, new Fields( 0 ) );

    pipe = new Every( pipe, new Fields( 0 ), new Count(), new Fields( 0, 1 ) );

    // add a second group to force a new map/red
    pipe = new GroupBy( pipe, new Fields( 0 ) );

    Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );

    flow.complete();

    validateLength( flow, 8 );
    }

  @Test
  public void testReplace() throws Exception
    {
    copyFromLocal( inputFileApache );

    Tap source = getPlatform().getTextFile( Fields.size( 2 ), inputFileApache );
    Tap sink = getPlatform().getTextFile( getOutputPath( "replace" ), SinkMode.REPLACE );

    Pipe pipe = new Pipe( "test" );

    Function parser = new RegexParser( Fields.ARGS, "^[^ ]*" );
    pipe = new Each( pipe, new Fields( 1 ), parser, Fields.REPLACE );

    Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );

    flow.complete();

    validateLength( flow, 10, 2, Pattern.compile( "\\d+\\s\\d+\\s[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}" ) );
    }

  @Test
  public void testSwap() throws Exception
    {
    copyFromLocal( inputFileApache );

    Tap source = getPlatform().getTextFile( Fields.size( 2 ), inputFileApache );
    Tap sink = getPlatform().getTextFile( getOutputPath( "swap" ), SinkMode.REPLACE );

    Pipe pipe = new Pipe( "test" );

    Function parser = new RegexParser( new Fields( 0 ), "^[^ ]*" );
    pipe = new Each( pipe, new Fields( 1 ), parser, Fields.SWAP );

    Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );

    flow.complete();

    validateLength( flow, 10, 2, Pattern.compile( "^\\d+\\s\\d+\\s[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}$" ) );
    }
  }
TOP

Related Classes of cascading.BasicPipesPlatformTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.