/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.hadoop.executionengine.tez;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.pig.PigConfiguration;
import org.apache.pig.PigWarning;
import org.apache.pig.backend.BackendException;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
import org.apache.pig.backend.hadoop.executionengine.Launcher;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezCompiler;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezOperPlan;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezPOPackageAnnotator;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezPlanContainer;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezPlanContainerNode;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezPlanContainerPrinter;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.NativeTezOper;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.AccumulatorOptimizer;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.CombinerOptimizer;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.LoaderProcessor;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.MultiQueryOptimizerTez;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.NoopFilterRemover;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.ParallelismSetter;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.SecondaryKeyOptimizerTez;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.UnionOptimizer;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.plan.CompilationMessageCollector;
import org.apache.pig.impl.plan.CompilationMessageCollector.MessageType;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.PlanException;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.tools.pigstats.OutputStats;
import org.apache.pig.tools.pigstats.PigStats;
import org.apache.pig.tools.pigstats.tez.TezPigScriptStats;
import org.apache.pig.tools.pigstats.tez.TezScriptState;
import org.apache.pig.tools.pigstats.tez.TezVertexStats;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.Vertex;
import org.apache.tez.dag.api.client.DAGStatus;
import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
* Main class that launches pig for Tez
*/
public class TezLauncher extends Launcher {
private static final Log log = LogFactory.getLog(TezLauncher.class);
private static ThreadFactory namedThreadFactory;
private ExecutorService executor;
private boolean aggregateWarning = false;
private TezScriptState tezScriptState;
private TezPigScriptStats tezStats;
private TezJob runningJob;
public TezLauncher() {
if (namedThreadFactory == null) {
namedThreadFactory = new ThreadFactoryBuilder().setNameFormat(
"PigTezLauncher-%d").build();
}
executor = Executors.newSingleThreadExecutor(namedThreadFactory);
}
@Override
public PigStats launchPig(PhysicalPlan php, String grpName, PigContext pc) throws Exception {
if (pc.getExecType().isLocal()) {
pc.getProperties().setProperty(TezConfiguration.TEZ_LOCAL_MODE, "true");
pc.getProperties().setProperty(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, "true");
pc.getProperties().setProperty("tez.ignore.lib.uris", "true");
}
Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties(), true);
if (pc.defaultParallel == -1 && !conf.getBoolean(PigConfiguration.TEZ_AUTO_PARALLELISM, true)) {
pc.defaultParallel = 1;
}
aggregateWarning = conf.getBoolean("aggregate.warning", false);
TezResourceManager tezResourceManager = TezResourceManager.getInstance();
tezResourceManager.init(pc, conf);
Path stagingDir = tezResourceManager.getStagingDir();
log.info("Tez staging directory is " + stagingDir.toString());
conf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
List<TezOperPlan> processedPlans = new ArrayList<TezOperPlan>();
tezScriptState = TezScriptState.get();
tezStats = new TezPigScriptStats(pc);
PigStats.start(tezStats);
conf.set(TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS, "true");
TezJobCompiler jc = new TezJobCompiler(pc, conf);
TezPlanContainer tezPlanContainer = compile(php, pc);
tezStats.initialize(tezPlanContainer);
tezScriptState.emitInitialPlanNotification(tezPlanContainer);
tezScriptState.emitLaunchStartedNotification(tezPlanContainer.size()); //number of DAGs to Launch
TezPlanContainerNode tezPlanContainerNode;
TezOperPlan tezPlan;
int processedDAGs = 0;
while ((tezPlanContainerNode = tezPlanContainer.getNextPlan(processedPlans)) != null) {
tezPlan = tezPlanContainerNode.getTezOperPlan();
processLoadAndParallelism(tezPlan, pc);
processedPlans.add(tezPlan);
ProgressReporter reporter = new ProgressReporter(tezPlanContainer.size(), processedDAGs);
if (tezPlan.size()==1 && tezPlan.getRoots().get(0) instanceof NativeTezOper) {
// Native Tez Plan
NativeTezOper nativeOper = (NativeTezOper)tezPlan.getRoots().get(0);
tezScriptState.emitJobsSubmittedNotification(1);
nativeOper.runJob(tezPlanContainerNode.getOperatorKey().toString());
} else {
TezPOPackageAnnotator pkgAnnotator = new TezPOPackageAnnotator(tezPlan);
pkgAnnotator.visit();
runningJob = jc.compile(tezPlanContainerNode, tezPlanContainer);
//TODO: Exclude vertex groups from numVerticesToLaunch ??
tezScriptState.dagLaunchNotification(runningJob.getName(), tezPlan, tezPlan.size());
runningJob.setPigStats(tezStats);
// Set the thread UDFContext so registered classes are available.
final UDFContext udfContext = UDFContext.getUDFContext();
Thread task = new Thread(runningJob) {
@Override
public void run() {
UDFContext.setUdfContext(udfContext.clone());
super.run();
}
};
JobControlThreadExceptionHandler jctExceptionHandler = new JobControlThreadExceptionHandler();
task.setUncaughtExceptionHandler(jctExceptionHandler);
task.setContextClassLoader(PigContext.getClassLoader());
// Mark the times that the jobs were submitted so it's reflected in job
// history props. TODO: Fix this. unused now
long scriptSubmittedTimestamp = System.currentTimeMillis();
// Job.getConfiguration returns the shared configuration object
Configuration jobConf = runningJob.getConfiguration();
jobConf.set("pig.script.submitted.timestamp",
Long.toString(scriptSubmittedTimestamp));
jobConf.set("pig.job.submitted.timestamp",
Long.toString(System.currentTimeMillis()));
Future<?> future = executor.submit(task);
tezScriptState.emitJobsSubmittedNotification(1);
boolean jobStarted = false;
while (!future.isDone()) {
if (!jobStarted && runningJob.getApplicationId() != null) {
jobStarted = true;
String appId = runningJob.getApplicationId().toString();
//For Oozie Pig action job id matching compatibility with MR mode
log.info("HadoopJobId: "+ appId.replace("application", "job"));
tezScriptState.emitJobStartedNotification(appId);
tezScriptState.dagStartedNotification(runningJob.getName(), appId);
}
reporter.notifyUpdate();
Thread.sleep(1000);
}
}
processedDAGs++;
if (tezPlanContainer.size() == processedDAGs) {
tezScriptState.emitProgressUpdatedNotification(100);
} else {
tezScriptState.emitProgressUpdatedNotification(
((tezPlanContainer.size() - processedDAGs)/tezPlanContainer.size()) * 100);
}
tezPlanContainer.updatePlan(tezPlan, reporter.notifyFinishedOrFailed());
}
tezStats.finish();
tezScriptState.emitLaunchCompletedNotification(tezStats.getNumberSuccessfulJobs());
for (OutputStats output : tezStats.getOutputStats()) {
POStore store = output.getPOStore();
try {
if (!output.isSuccessful()) {
store.getStoreFunc().cleanupOnFailure(
store.getSFile().getFileName(),
Job.getInstance(output.getConf()));
} else {
store.getStoreFunc().cleanupOnSuccess(
store.getSFile().getFileName(),
Job.getInstance(output.getConf()));
}
} catch (IOException e) {
throw new ExecException(e);
} catch (AbstractMethodError nsme) {
// Just swallow it. This means we're running against an
// older instance of a StoreFunc that doesn't implement
// this method.
}
}
return tezStats;
}
private void computeWarningAggregate(Map<String, Map<String, Long>> counterGroups, Map<Enum, Long> aggMap) {
for (Map<String, Long> counters : counterGroups.values()) {
for (Enum e : PigWarning.values()) {
if (counters.containsKey(e.toString())) {
if (aggMap.containsKey(e.toString())) {
Long currentCount = aggMap.get(e.toString());
currentCount = (currentCount == null ? 0 : currentCount);
if (counters != null) {
currentCount += counters.get(e.toString());
}
aggMap.put(e, currentCount);
} else {
aggMap.put(e, counters.get(e.toString()));
}
}
}
}
}
private class ProgressReporter {
private int totalDAGs;
private int processedDAGS;
private int count = 0;
private int prevProgress = 0;
public ProgressReporter(int totalDAGs, int processedDAGs) {
this.totalDAGs = totalDAGs;
this.processedDAGS = processedDAGs;
}
public void notifyUpdate() {
DAGStatus dagStatus = runningJob.getDAGStatus();
if (dagStatus != null && dagStatus.getState() == DAGStatus.State.RUNNING) {
// Emit notification when the job has progressed more than 1%,
// or every 20 seconds
int currProgress = Math.round(runningJob.getDAGProgress() * 100f);
if (currProgress - prevProgress >= 1 || count % 100 == 0) {
tezScriptState.dagProgressNotification(runningJob.getName(), -1, currProgress);
tezScriptState.emitProgressUpdatedNotification((currProgress + (100 * processedDAGS))/totalDAGs);
prevProgress = currProgress;
}
count++;
}
// TODO: Add new vertex tracking methods to PigTezProgressNotificationListener
// and emit notifications for individual vertex start, progress and completion
}
public boolean notifyFinishedOrFailed() {
DAGStatus dagStatus = runningJob.getDAGStatus();
if (dagStatus == null) {
return false;
}
if (dagStatus.getState() == DAGStatus.State.SUCCEEDED) {
Map<Enum, Long> warningAggMap = new HashMap<Enum, Long>();
DAG dag = runningJob.getDAG();
for (Vertex v : dag.getVertices()) {
TezVertexStats tts = tezStats.getVertexStats(dag.getName(), v.getName());
if (tts == null) {
continue; //vertex groups
}
Map<String, Map<String, Long>> counterGroups = tts.getCounters();
if (counterGroups == null) {
log.warn("Counters are not available for vertex " + v.getName() + ". Not computing warning aggregates.");
} else {
computeWarningAggregate(counterGroups, warningAggMap);
}
}
if (aggregateWarning) {
CompilationMessageCollector.logAggregate(warningAggMap, MessageType.Warning, log);
}
return true;
}
return false;
}
}
@Override
public void explain(PhysicalPlan php, PigContext pc, PrintStream ps,
String format, boolean verbose) throws PlanException,
VisitorException, IOException {
log.debug("Entering TezLauncher.explain");
TezPlanContainer tezPlanContainer = compile(php, pc);
if (format.equals("text")) {
TezPlanContainerPrinter printer = new TezPlanContainerPrinter(ps, tezPlanContainer);
printer.setVerbose(verbose);
printer.visit();
} else {
// TODO: add support for other file format
throw new IOException("Non-text output of explain is not supported.");
}
}
public TezPlanContainer compile(PhysicalPlan php, PigContext pc)
throws PlanException, IOException, VisitorException {
TezCompiler comp = new TezCompiler(php, pc);
comp.compile();
TezPlanContainer planContainer = comp.getPlanContainer();
for (Map.Entry<OperatorKey, TezPlanContainerNode> entry : planContainer
.getKeys().entrySet()) {
TezOperPlan tezPlan = entry.getValue().getTezOperPlan();
optimize(tezPlan, pc);
}
return planContainer;
}
private void optimize(TezOperPlan tezPlan, PigContext pc) throws VisitorException {
Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
boolean aggregateWarning = conf.getBoolean("aggregate.warning", false);
NoopFilterRemover filter = new NoopFilterRemover(tezPlan);
filter.visit();
// Run CombinerOptimizer on Tez plan
boolean nocombiner = conf.getBoolean(PigConfiguration.PROP_NO_COMBINER, false);
if (!pc.inIllustrator && !nocombiner) {
boolean doMapAgg = Boolean.parseBoolean(pc.getProperties().getProperty(
PigConfiguration.PROP_EXEC_MAP_PARTAGG, "false"));
CombinerOptimizer co = new CombinerOptimizer(tezPlan, doMapAgg);
co.visit();
co.getMessageCollector().logMessages(MessageType.Warning, aggregateWarning, log);
}
// Run optimizer to make use of secondary sort key when possible for nested foreach
// order by and distinct. Should be done before AccumulatorOptimizer
boolean noSecKeySort = conf.getBoolean(PigConfiguration.PIG_EXEC_NO_SECONDARY_KEY, false);
if (!pc.inIllustrator && !noSecKeySort) {
SecondaryKeyOptimizerTez skOptimizer = new SecondaryKeyOptimizerTez(tezPlan);
skOptimizer.visit();
}
boolean isMultiQuery = conf.getBoolean(PigConfiguration.OPT_MULTIQUERY, true);
if (isMultiQuery) {
// reduces the number of TezOpers in the Tez plan generated
// by multi-query (multi-store) script.
MultiQueryOptimizerTez mqOptimizer = new MultiQueryOptimizerTez(tezPlan);
mqOptimizer.visit();
}
// Run AccumulatorOptimizer on Tez plan
boolean isAccum = conf.getBoolean(PigConfiguration.OPT_ACCUMULATOR, true);
if (isAccum) {
AccumulatorOptimizer accum = new AccumulatorOptimizer(tezPlan);
accum.visit();
}
// Use VertexGroup in Tez
boolean isUnionOpt = conf.getBoolean(PigConfiguration.TEZ_OPT_UNION, true);
if (isUnionOpt) {
UnionOptimizer uo = new UnionOptimizer(tezPlan);
uo.visit();
}
}
public static void processLoadAndParallelism(TezOperPlan tezPlan, PigContext pc) throws VisitorException {
if (!pc.inExplain && !pc.inDumpSchema) {
LoaderProcessor loaderStorer = new LoaderProcessor(tezPlan, pc);
loaderStorer.visit();
ParallelismSetter parallelismSetter = new ParallelismSetter(tezPlan, pc);
parallelismSetter.visit();
tezPlan.setEstimatedParallelism(parallelismSetter.getEstimatedTotalParallelism());
}
}
@Override
public void kill() throws BackendException {
if (runningJob != null) {
try {
runningJob.killJob();
} catch (Exception e) {
throw new BackendException(e);
}
}
destroy();
}
@Override
public void killJob(String jobID, Configuration conf) throws BackendException {
if (runningJob != null && runningJob.getApplicationId().toString() == jobID) {
try {
runningJob.killJob();
} catch (Exception e) {
throw new BackendException(e);
}
} else {
log.info("Cannot find job: " + jobID);
}
}
@Override
public void destroy() {
try {
if (executor != null && !executor.isShutdown()) {
log.info("Shutting down thread pool");
executor.shutdownNow();
}
} catch (Exception e) {
log.warn("Error shutting down threadpool");
}
}
}