/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.compiler.flow.mapreduce.parallel;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.io.NullWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.asakusafw.compiler.common.Naming;
import com.asakusafw.compiler.common.Precondition;
import com.asakusafw.compiler.flow.ExternalIoDescriptionProcessor.SourceInfo;
import com.asakusafw.compiler.flow.FlowCompilingEnvironment;
import com.asakusafw.compiler.flow.Location;
import com.asakusafw.compiler.flow.jobflow.CompiledStage;
import com.asakusafw.compiler.flow.stage.CompiledType;
import com.asakusafw.runtime.stage.AbstractStageClient;
import com.asakusafw.runtime.stage.BaseStageClient;
import com.asakusafw.runtime.stage.StageInput;
import com.asakusafw.runtime.stage.StageOutput;
import com.asakusafw.runtime.stage.collector.SortableSlot;
import com.asakusafw.runtime.stage.collector.WritableSlot;
import com.asakusafw.runtime.trace.TraceLocation;
import com.asakusafw.utils.collections.Lists;
import com.asakusafw.utils.java.model.syntax.Comment;
import com.asakusafw.utils.java.model.syntax.CompilationUnit;
import com.asakusafw.utils.java.model.syntax.Expression;
import com.asakusafw.utils.java.model.syntax.FormalParameterDeclaration;
import com.asakusafw.utils.java.model.syntax.Javadoc;
import com.asakusafw.utils.java.model.syntax.MethodDeclaration;
import com.asakusafw.utils.java.model.syntax.ModelFactory;
import com.asakusafw.utils.java.model.syntax.Name;
import com.asakusafw.utils.java.model.syntax.QualifiedName;
import com.asakusafw.utils.java.model.syntax.SimpleName;
import com.asakusafw.utils.java.model.syntax.Statement;
import com.asakusafw.utils.java.model.syntax.Type;
import com.asakusafw.utils.java.model.syntax.TypeBodyDeclaration;
import com.asakusafw.utils.java.model.syntax.TypeDeclaration;
import com.asakusafw.utils.java.model.syntax.TypeParameterDeclaration;
import com.asakusafw.utils.java.model.util.AttributeBuilder;
import com.asakusafw.utils.java.model.util.ExpressionBuilder;
import com.asakusafw.utils.java.model.util.ImportBuilder;
import com.asakusafw.utils.java.model.util.JavadocBuilder;
import com.asakusafw.utils.java.model.util.Models;
import com.asakusafw.utils.java.model.util.TypeBuilder;
/**
* parallel sortを行うステージクライアントクラスを生成する。
* @since 0.1.0
* @version 0.5.1
*/
public class ParallelSortClientEmitter {
static final Logger LOG = LoggerFactory.getLogger(ParallelSortClientEmitter.class);
/**
* Whether emulates legacy mode.
*/
public static final String ATTRIBUTE_LEGACY = ParallelSortClientEmitter.class.getName() + ".legacy";
private final FlowCompilingEnvironment environment;
/**
* インスタンスを生成する。
* @param environment 環境オブジェクト
* @throws IllegalArgumentException 引数に{@code null}が指定された場合
*/
public ParallelSortClientEmitter(FlowCompilingEnvironment environment) {
Precondition.checkMustNotBeNull(environment, "environment"); //$NON-NLS-1$
this.environment = environment;
}
static boolean legacy(FlowCompilingEnvironment environment) {
return environment.getOptions().getExtraAttribute(ParallelSortClientEmitter.ATTRIBUTE_LEGACY) != null;
}
/**
* 指定のステージ情報を元にステージクライアントクラスを生成し、生成したステージの情報を返す。
* @param moduleId モジュール識別子
* @param slots 処理対象のスロット一覧
* @param outputDirectory 出力先のディレクトリ
* @return ステージクライアントクラス
* @throws IOException 生成に失敗した場合
* @throws IllegalArgumentException 引数に{@code null}が指定された場合
*/
public CompiledStage emit(
String moduleId,
List<ResolvedSlot> slots,
Location outputDirectory) throws IOException {
Precondition.checkMustNotBeNull(moduleId, "moduleId"); //$NON-NLS-1$
Precondition.checkMustNotBeNull(slots, "slots"); //$NON-NLS-1$
Precondition.checkMustNotBeNull(outputDirectory, "outputDirectory"); //$NON-NLS-1$
LOG.debug("{}に対するエピローグジョブ実行クライアントを生成します", moduleId);
Engine engine = new Engine(environment, moduleId, slots, outputDirectory);
CompilationUnit source = engine.generate();
environment.emit(source);
Name packageName = source.getPackageDeclaration().getName();
SimpleName simpleName = source.getTypeDeclarations().get(0).getName();
QualifiedName name = environment
.getModelFactory()
.newQualifiedName(packageName, simpleName);
LOG.debug("エピローグ\"{}\"のジョブ実行には{}が利用されます", moduleId, name);
return new CompiledStage(name, Naming.getEpilogueName(moduleId));
}
private static class Engine {
private static final char PATH_SEPARATOR = '/';
private final FlowCompilingEnvironment environment;
private final String moduleId;
private final List<ResolvedSlot> slots;
private final Location outputDirectory;
private final ModelFactory factory;
private final ImportBuilder importer;
Engine(
FlowCompilingEnvironment environment,
String moduleId,
List<ResolvedSlot> slots,
Location outputDirectory) {
assert environment != null;
assert moduleId != null;
assert slots != null;
this.environment = environment;
this.moduleId = moduleId;
this.slots = slots;
this.outputDirectory = outputDirectory;
this.factory = environment.getModelFactory();
Name packageName = environment.getEpiloguePackageName(moduleId);
this.importer = new ImportBuilder(
factory,
factory.newPackageDeclaration(packageName),
ImportBuilder.Strategy.TOP_LEVEL);
}
public CompilationUnit generate() throws IOException {
TypeDeclaration type = createType();
return factory.newCompilationUnit(
importer.getPackageDeclaration(),
importer.toImportDeclarations(),
Collections.singletonList(type),
Collections.<Comment>emptyList());
}
private TypeDeclaration createType() throws IOException {
SimpleName name = factory.newSimpleName(Naming.getClientClass());
importer.resolvePackageMember(name);
List<TypeBodyDeclaration> members = Lists.create();
members.addAll(createIdMethods());
members.add(createStageOutputPath());
members.add(createStageInputsMethod());
members.add(createStageOutputsMethod());
members.addAll(createShuffleMethods());
return factory.newClassDeclaration(
createJavadoc(),
new AttributeBuilder(factory)
.annotation(t(TraceLocation.class), createTraceLocationElements())
.Public()
.Final()
.toAttributes(),
name,
Collections.<TypeParameterDeclaration>emptyList(),
t(AbstractStageClient.class),
Collections.<Type>emptyList(),
members);
}
private Map<String, Expression> createTraceLocationElements() {
Map<String, Expression> results = new LinkedHashMap<String, Expression>();
results.put("batchId", Models.toLiteral(factory, environment.getBatchId()));
results.put("flowId", Models.toLiteral(factory, environment.getFlowId()));
results.put("stageId", Models.toLiteral(factory, Naming.getEpilogueName(moduleId)));
return results;
}
private List<MethodDeclaration> createIdMethods() {
List<MethodDeclaration> results = Lists.create();
results.add(createValueMethod(
BaseStageClient.METHOD_BATCH_ID,
t(String.class),
Models.toLiteral(factory, environment.getBatchId())));
results.add(createValueMethod(
BaseStageClient.METHOD_FLOW_ID,
t(String.class),
Models.toLiteral(factory, environment.getFlowId())));
results.add(createValueMethod(
BaseStageClient.METHOD_STAGE_ID,
t(String.class),
Models.toLiteral(factory, Naming.getEpilogueName(moduleId))));
return results;
}
private MethodDeclaration createStageOutputPath() {
return createValueMethod(
AbstractStageClient.METHOD_STAGE_OUTPUT_PATH,
t(String.class),
Models.toLiteral(factory, outputDirectory.toPath(PATH_SEPARATOR)));
}
private MethodDeclaration createStageInputsMethod() throws IOException {
SimpleName list = factory.newSimpleName("results");
SimpleName attributes = factory.newSimpleName("attributes");
List<Statement> statements = Lists.create();
statements.add(new TypeBuilder(factory, t(ArrayList.class, t(StageInput.class)))
.newObject()
.toLocalVariableDeclaration(t(List.class, t(StageInput.class)), list));
statements.add(new ExpressionBuilder(factory, Models.toNullLiteral(factory))
.toLocalVariableDeclaration(t(Map.class, t(String.class), t(String.class)), attributes));
for (ResolvedSlot slot : slots) {
Type mapperType = generateMapper(slot);
statements.add(new ExpressionBuilder(factory, attributes)
.assignFrom(new TypeBuilder(factory, t(HashMap.class, t(String.class), t(String.class)))
.newObject()
.toExpression())
.toStatement());
for (SourceInfo input : slot.getSource().getInputs()) {
for (Map.Entry<String, String> entry : input.getAttributes().entrySet()) {
statements.add(new ExpressionBuilder(factory, attributes)
.method("put",
Models.toLiteral(factory, entry.getKey()),
Models.toLiteral(factory, entry.getValue()))
.toStatement());
}
for (Location location : input.getLocations()) {
statements.add(new ExpressionBuilder(factory, list)
.method("add", new TypeBuilder(factory, t(StageInput.class))
.newObject(
Models.toLiteral(factory, location.toPath(PATH_SEPARATOR)),
factory.newClassLiteral(t(input.getFormat())),
factory.newClassLiteral(mapperType),
attributes)
.toExpression())
.toStatement());
}
}
}
statements.add(new ExpressionBuilder(factory, list)
.toReturnStatement());
return factory.newMethodDeclaration(
null,
new AttributeBuilder(factory)
.annotation(t(Override.class))
.Protected()
.toAttributes(),
t(List.class, t(StageInput.class)),
factory.newSimpleName(AbstractStageClient.METHOD_STAGE_INPUTS),
Collections.<FormalParameterDeclaration>emptyList(),
statements);
}
private MethodDeclaration createStageOutputsMethod() {
SimpleName list = factory.newSimpleName("results");
List<Statement> statements = Lists.create();
statements.add(new TypeBuilder(factory, t(ArrayList.class, t(StageOutput.class)))
.newObject()
.toLocalVariableDeclaration(t(List.class, t(StageOutput.class)), list));
for (ResolvedSlot slot : slots) {
Expression valueType = factory.newClassLiteral(t(slot.getValueClass().getType()));
Class<?> outputFormatType = slot.getSource().getOutputFormatType();
statements.add(new ExpressionBuilder(factory, list)
.method("add", new TypeBuilder(factory, t(StageOutput.class))
.newObject(
Models.toLiteral(factory, slot.getSource().getOutputName()),
factory.newClassLiteral(t(NullWritable.class)),
valueType,
factory.newClassLiteral(t(outputFormatType)))
.toExpression())
.toStatement());
}
statements.add(new ExpressionBuilder(factory, list)
.toReturnStatement());
return factory.newMethodDeclaration(
null,
new AttributeBuilder(factory)
.annotation(t(Override.class))
.Protected()
.toAttributes(),
t(List.class, t(StageOutput.class)),
factory.newSimpleName(AbstractStageClient.METHOD_STAGE_OUTPUTS),
Collections.<FormalParameterDeclaration>emptyList(),
statements);
}
private List<MethodDeclaration> createShuffleMethods() throws IOException {
List<MethodDeclaration> results = Lists.create();
results.add(createClassLiteralMethod(
AbstractStageClient.METHOD_SHUFFLE_KEY_CLASS,
importer.toType(SortableSlot.class)));
results.add(createClassLiteralMethod(
AbstractStageClient.METHOD_SHUFFLE_VALUE_CLASS,
importer.toType(WritableSlot.class)));
if (doSort()) {
Type reducer = generateReducer();
results.add(createClassLiteralMethod(
AbstractStageClient.METHOD_PARTITIONER_CLASS,
importer.toType(SortableSlot.Partitioner.class)));
results.add(createClassLiteralMethod(
AbstractStageClient.METHOD_REDUCER_CLASS,
reducer));
}
return results;
}
private boolean doSort() {
if (ParallelSortClientEmitter.legacy(environment)) {
return true;
}
for (ResolvedSlot slot : slots) {
if (slot.getSortProperties().isEmpty() == false) {
return true;
}
}
return false;
}
private Type generateMapper(ResolvedSlot slot) throws IOException {
assert slot != null;
ParallelSortMapperEmitter sub = new ParallelSortMapperEmitter(environment);
CompiledType type = sub.emit(moduleId, slot);
return importer.toType(type.getQualifiedName());
}
private Type generateReducer() throws IOException {
ParallelSortReducerEmitter sub = new ParallelSortReducerEmitter(environment);
CompiledType type = sub.emit(moduleId, slots);
return importer.toType(type.getQualifiedName());
}
private Javadoc createJavadoc() {
return new JavadocBuilder(factory)
.text("\"{0}\"のエピローグステージのジョブを実行するクライアント。", moduleId)
.toJavadoc();
}
private MethodDeclaration createClassLiteralMethod(
String methodName,
Type type) {
assert methodName != null;
assert type != null;
return createValueMethod(
methodName,
t(Class.class, type),
factory.newClassLiteral(type));
}
private MethodDeclaration createValueMethod(
String methodName,
Type returnType,
Expression expression) {
return factory.newMethodDeclaration(
null,
new AttributeBuilder(factory)
.annotation(t(Override.class))
.Protected()
.toAttributes(),
returnType,
factory.newSimpleName(methodName),
Collections.<FormalParameterDeclaration>emptyList(),
Collections.singletonList(factory.newReturnStatement(expression)));
}
private Type t(java.lang.reflect.Type type, Type...typeArgs) {
assert type != null;
assert typeArgs != null;
Type raw = importer.toType(type);
if (typeArgs.length == 0) {
return raw;
}
return factory.newParameterizedType(raw, Arrays.asList(typeArgs));
}
}
}