/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.test.recordJobs.relational;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.Iterator;
import org.apache.log4j.Logger;
import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.common.Program;
import eu.stratosphere.api.common.ProgramDescription;
import eu.stratosphere.api.java.record.operators.FileDataSink;
import eu.stratosphere.api.java.record.operators.FileDataSource;
import eu.stratosphere.api.java.record.functions.JoinFunction;
import eu.stratosphere.api.java.record.functions.MapFunction;
import eu.stratosphere.api.java.record.functions.ReduceFunction;
import eu.stratosphere.api.java.record.operators.JoinOperator;
import eu.stratosphere.api.java.record.operators.MapOperator;
import eu.stratosphere.api.java.record.operators.ReduceOperator;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.test.recordJobs.util.IntTupleDataInFormat;
import eu.stratosphere.test.recordJobs.util.StringTupleDataOutFormat;
import eu.stratosphere.test.recordJobs.util.Tuple;
import eu.stratosphere.types.IntValue;
import eu.stratosphere.types.Record;
import eu.stratosphere.types.StringValue;
import eu.stratosphere.util.Collector;
/**
* Implementation of the TPC-H Query 4 as a stratosphere program.
*/
@SuppressWarnings("serial")
public class TPCHQuery4 implements Program, ProgramDescription {
private static Logger LOGGER = Logger.getLogger(TPCHQuery4.class);
private int degreeOfParallelism = 1;
private String ordersInputPath;
private String lineItemInputPath;
private String outputPath;
/**
* Small {@link MapFunction} to filer out the irrelevant orders.
*
*/
//@SameKey
public static class OFilter extends MapFunction {
private final String dateParamString = "1995-01-01";
private final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
private final GregorianCalendar gregCal = new GregorianCalendar();
private Date paramDate;
private Date plusThreeMonths;
@Override
public void open(Configuration parameters) {
try {
this.paramDate = sdf.parse(this.dateParamString);
this.plusThreeMonths = getPlusThreeMonths(paramDate);
} catch (ParseException e) {
throw new RuntimeException(e);
}
}
/* (non-Javadoc)
* @see eu.stratosphere.pact.common.stub.MapStub#map(eu.stratosphere.pact.common.type.Key, eu.stratosphere.pact.common.type.Value, eu.stratosphere.pact.common.stub.Collector)
*/
@Override
public void map(Record record, Collector<Record> out) throws Exception {
Tuple tuple = record.getField(1, Tuple.class);
Date orderDate;
String orderStringDate = tuple.getStringValueAt(4);
try {
orderDate = sdf.parse(orderStringDate);
} catch (ParseException e) {
throw new RuntimeException(e);
}
if(paramDate.before(orderDate) && plusThreeMonths.after(orderDate))
{
out.collect(record);
}
}
/**
* Calculates the {@link Date} which is three months after the given one.
* @param paramDate of type {@link Date}.
* @return a {@link Date} three month later.
*/
private Date getPlusThreeMonths(Date paramDate) {
gregCal.setTime(paramDate);
gregCal.add(Calendar.MONTH, 3);
Date plusThreeMonths = gregCal.getTime();
return plusThreeMonths;
}
}
/**
* Simple filter for the line item selection. It filters all teh tuples that do
* not satisfy the "l_commitdate < l_receiptdate" condition.
*
*/
//@SameKey
public static class LiFilter extends MapFunction {
private final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
@Override
public void map(Record record, Collector<Record> out) throws Exception {
Tuple tuple = record.getField(1, Tuple.class);
String commitString = tuple.getStringValueAt(11);
String receiptString = tuple.getStringValueAt(12);
Date commitDate;
Date receiptDate;
try {
commitDate = sdf.parse(commitString);
receiptDate = sdf.parse(receiptString);
} catch (ParseException e) {
throw new RuntimeException(e);
}
if (commitDate.before(receiptDate)) {
out.collect(record);
}
}
}
/**
* Implements the equijoin on the orderkey and performs the projection on
* the order priority as well.
*
*/
public static class JoinLiO extends JoinFunction {
@Override
public void join(Record order, Record line, Collector<Record> out)
throws Exception {
Tuple orderTuple = order.getField(1, Tuple.class);
orderTuple.project(32);
String newOrderKey = orderTuple.getStringValueAt(0);
order.setField(0, new StringValue(newOrderKey));
out.collect(order);
}
}
/**
* Implements the count(*) part.
*
*/
//@SameKey
public static class CountAgg extends ReduceFunction {
/* (non-Javadoc)
* @see eu.stratosphere.pact.common.stub.ReduceStub#reduce(eu.stratosphere.pact.common.type.Key, java.util.Iterator, eu.stratosphere.pact.common.stub.Collector)
*/
@Override
public void reduce(Iterator<Record> records, Collector<Record> out) throws Exception {
long count = 0;
Record rec = null;
while(records.hasNext()) {
rec = records.next();
count++;
}
if(rec != null)
{
Tuple tuple = new Tuple();
tuple.addAttribute("" + count);
rec.setField(1, tuple);
}
out.collect(rec);
}
}
@Override
public Plan getPlan(String... args) throws IllegalArgumentException {
if(args == null || args.length != 4)
{
LOGGER.warn("number of arguments do not match!");
this.ordersInputPath = "";
this.lineItemInputPath = "";
this.outputPath = "";
}else
{
setArgs(args);
}
FileDataSource orders =
new FileDataSource(new IntTupleDataInFormat(), this.ordersInputPath, "Orders");
orders.setDegreeOfParallelism(this.degreeOfParallelism);
//orders.setOutputContract(UniqueKey.class);
FileDataSource lineItems =
new FileDataSource(new IntTupleDataInFormat(), this.lineItemInputPath, "LineItems");
lineItems.setDegreeOfParallelism(this.degreeOfParallelism);
FileDataSink result =
new FileDataSink(new StringTupleDataOutFormat(), this.outputPath, "Output");
result.setDegreeOfParallelism(degreeOfParallelism);
MapOperator lineFilter =
MapOperator.builder(LiFilter.class)
.name("LineItemFilter")
.build();
lineFilter.setDegreeOfParallelism(degreeOfParallelism);
MapOperator ordersFilter =
MapOperator.builder(OFilter.class)
.name("OrdersFilter")
.build();
ordersFilter.setDegreeOfParallelism(degreeOfParallelism);
JoinOperator join =
JoinOperator.builder(JoinLiO.class, IntValue.class, 0, 0)
.name("OrdersLineitemsJoin")
.build();
join.setDegreeOfParallelism(degreeOfParallelism);
ReduceOperator aggregation =
ReduceOperator.builder(CountAgg.class, StringValue.class, 0)
.name("AggregateGroupBy")
.build();
aggregation.setDegreeOfParallelism(this.degreeOfParallelism);
lineFilter.setInput(lineItems);
ordersFilter.setInput(orders);
join.setFirstInput(ordersFilter);
join.setSecondInput(lineFilter);
aggregation.setInput(join);
result.setInput(aggregation);
return new Plan(result, "TPC-H 4");
}
/**
* Get the args into the members.
* @param args
*/
private void setArgs(String[] args) {
this.degreeOfParallelism = Integer.parseInt(args[0]);
this.ordersInputPath = args[1];
this.lineItemInputPath = args[2];
this.outputPath = args[3];
}
@Override
public String getDescription() {
return "Parameters: [dop] [orders-input] [lineitem-input] [output]";
}
}