Package org.apache.pig.tools.pigstats.mapreduce

Source Code of org.apache.pig.tools.pigstats.mapreduce.SimplePigStats$JobGraphBuilder

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.tools.pigstats.mapreduce;

import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.jobcontrol.Job;
import org.apache.pig.ExecType;
import org.apache.pig.PigRunner.ReturnCode;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.NativeMapReduceOper;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROpPlanVisitor;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.plan.DependencyOrderWalker;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.tools.pigstats.InputStats;
import org.apache.pig.tools.pigstats.mapreduce.MRJobStats;
import org.apache.pig.tools.pigstats.OutputStats;
import org.apache.pig.tools.pigstats.PigStats;
import org.apache.pig.tools.pigstats.JobStats;

/**
* SimplePigStats encapsulates the statistics collected from a running script.
* It includes status of the execution, the DAG of its MR jobs, as well as
* information about outputs and inputs of the script.
*/
public final class SimplePigStats extends PigStats {
    private static final Log LOG = LogFactory.getLog(SimplePigStats.class);

    private JobClient jobClient;
    private JobControlCompiler jcc;
    private Map<Job, MapReduceOper> jobMroMap;
    private Map<MapReduceOper, MRJobStats> mroJobMap;

    // successful jobs so far
    private Set<Job> jobSeen = new HashSet<Job>();

    /**
     * This class builds the job DAG from a MR plan
     */
    private class JobGraphBuilder extends MROpPlanVisitor {

        public JobGraphBuilder(MROperPlan plan) {
            super(plan, new DependencyOrderWalker<MapReduceOper, MROperPlan>(
                    plan));
            jobPlan = new JobGraph();
            mroJobMap = new HashMap<MapReduceOper, MRJobStats>();
        }

        @Override
        public void visitMROp(MapReduceOper mr) throws VisitorException {
            MRJobStats js = new MRJobStats(
                    mr.getOperatorKey().toString(), jobPlan);
            jobPlan.add(js);
            List<MapReduceOper> preds = getPlan().getPredecessors(mr);
            if (preds != null) {
                for (MapReduceOper pred : preds) {
                    MRJobStats jpred = mroJobMap.get(pred);
                    if (!jobPlan.isConnected(jpred, js)) {
                        jobPlan.connect(jpred, js);
                    }
                }
            }
            mroJobMap.put(mr, js);
        }
    }

    @Override
    public List<String> getAllErrorMessages() {
        throw new UnsupportedOperationException();
    }

    @Override
    public Map<String, List<PigStats>> getAllStats() {
        throw new UnsupportedOperationException();
    }

    @Override
    public boolean isEmbedded() {
        return false;
    }

    @Override
    public long getSMMSpillCount() {
        Iterator<JobStats> it = jobPlan.iterator();
        long ret = 0;
        while (it.hasNext()) {
            ret += ((MRJobStats) it.next()).getSMMSpillCount();
        }
        return ret;
    }

    @Override
    public long getProactiveSpillCountObjects() {
        Iterator<JobStats> it = jobPlan.iterator();
        long ret = 0;
        while (it.hasNext()) {
            ret += ((MRJobStats) it.next()).getProactiveSpillCountObjects();
        }
        return ret;
    }

    @Override
    public long getProactiveSpillCountRecords() {
        Iterator<JobStats> it = jobPlan.iterator();
        long ret = 0;
        while (it.hasNext()) {
            ret += ((MRJobStats) it.next()).getProactiveSpillCountRecs();
        }
        return ret;
    }

    public SimplePigStats() {
        jobMroMap = new HashMap<Job, MapReduceOper>();
        jobPlan = new JobGraph();
    }

    void initialize(PigContext pigContext, JobClient jobClient,
            JobControlCompiler jcc, MROperPlan mrPlan) {
        super.start();

        if (pigContext == null || jobClient == null || jcc == null) {
            LOG.warn("invalid params: " + pigContext + jobClient + jcc);
            return;
        }

        this.pigContext = pigContext;
        this.jobClient = jobClient;
        this.jcc = jcc;

        // build job DAG with job ids assigned to null
        try {
            new JobGraphBuilder(mrPlan).visit();
        } catch (VisitorException e) {
            LOG.warn("unable to build job plan", e);
        }
    }

    void finish() {
        super.stop();
    }

    boolean isInitialized() {
        return startTime > 0;
    }

    @Deprecated
    @Override
    public JobClient getJobClient() {
        return jobClient;
    }

    JobControlCompiler getJobControlCompiler() {
        return jcc;
    }

    MRJobStats addMRJobStats(Job job) {
        MapReduceOper mro = jobMroMap.get(job);

        if (mro == null) {
            LOG.warn("unable to get MR oper for job: " + job.toString());
            return null;
        }
        MRJobStats js = mroJobMap.get(mro);

        JobID jobId = job.getAssignedJobID();
        js.setId(jobId);
        js.setAlias(mro);
        js.setConf(job.getJobConf());
        return js;
    }

    MRJobStats addMRJobStatsForNative(NativeMapReduceOper mr) {
        MRJobStats js = mroJobMap.get(mr);
        js.setId(new JobID(mr.getJobId(), NativeMapReduceOper.getJobNumber()));
        js.setAlias(mr);

        return js;
    }

    void display() {
        if (returnCode == ReturnCode.UNKNOWN) {
            LOG.warn("unknown return code, can't display the results");
            return;
        }
        if (pigContext == null) {
            LOG.warn("unknown exec type, don't display the results");
            return;
        }

        SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
        StringBuilder sb = new StringBuilder();
        sb.append("\nHadoopVersion\tPigVersion\tUserId\tStartedAt\tFinishedAt\tFeatures\n");
        sb.append(getHadoopVersion()).append("\t").append(getPigVersion()).append("\t")
        .append(userId).append("\t")
        .append(sdf.format(new Date(startTime))).append("\t")
        .append(sdf.format(new Date(endTime))).append("\t")
        .append(getFeatures()).append("\n");
        sb.append("\n");
        if (returnCode == ReturnCode.SUCCESS) {
            sb.append("Success!\n");
        } else if (returnCode == ReturnCode.PARTIAL_FAILURE) {
            sb.append("Some jobs have failed! Stop running all dependent jobs\n");
        } else {
            sb.append("Failed!\n");
        }
        sb.append("\n");

        if (returnCode == ReturnCode.SUCCESS
                || returnCode == ReturnCode.PARTIAL_FAILURE) {
            sb.append("Job Stats (time in seconds):\n");
            sb.append(MRJobStats.SUCCESS_HEADER).append("\n");
            List<JobStats> arr = jobPlan.getSuccessfulJobs();
            for (JobStats js : arr) {
                sb.append(js.getDisplayString());
            }
            sb.append("\n");
        }
        if (returnCode == ReturnCode.FAILURE
                || returnCode == ReturnCode.PARTIAL_FAILURE) {
            sb.append("Failed Jobs:\n");
            sb.append(MRJobStats.FAILURE_HEADER).append("\n");
            List<JobStats> arr = jobPlan.getFailedJobs();
            for (JobStats js : arr) {
                sb.append(js.getDisplayString());
            }
            sb.append("\n");
        }
        sb.append("Input(s):\n");
        for (InputStats is : getInputStats()) {
            sb.append(is.getDisplayString());
        }
        sb.append("\n");
        sb.append("Output(s):\n");
        for (OutputStats ds : getOutputStats()) {
            sb.append(ds.getDisplayString());
        }

        sb.append("\nCounters:\n");
        sb.append("Total records written : " + getRecordWritten()).append("\n");
        sb.append("Total bytes written : " + getBytesWritten()).append("\n");
        sb.append("Spillable Memory Manager spill count : "
                + getSMMSpillCount()).append("\n");
        sb.append("Total bags proactively spilled: "
                + getProactiveSpillCountObjects()).append("\n");
        sb.append("Total records proactively spilled: "
                + getProactiveSpillCountRecords()).append("\n");

        sb.append("\nJob DAG:\n").append(jobPlan.toString());

        LOG.info("Script Statistics: \n" + sb.toString());
    }

    void mapMROperToJob(MapReduceOper mro, Job job) {
        if (mro == null) {
            LOG.warn("null MR operator");
        } else {
            MRJobStats js = mroJobMap.get(mro);
            if (js == null) {
                LOG.warn("null job stats for mro: " + mro.getOperatorKey());
            } else {
                jobMroMap.put(job, mro);
            }
        }
    }

    boolean isJobSeen(Job job) {
        return !jobSeen.add(job);
    }

}
TOP

Related Classes of org.apache.pig.tools.pigstats.mapreduce.SimplePigStats$JobGraphBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.