Package org.apache.pig.tools.pigstats

Source Code of org.apache.pig.tools.pigstats.PigStats

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.pig.tools.pigstats;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.jobcontrol.Job;
import org.apache.hadoop.mapred.jobcontrol.JobControl;
import org.apache.pig.ExecType;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
import org.apache.pig.backend.local.executionengine.physicalLayer.counters.POCounter;
import org.apache.pig.impl.util.ObjectSerializer;

public class PigStats {
    MROperPlan mrp;
    PhysicalPlan php;
    JobControl jc;
    JobClient jobClient;
    Map<String, Map<String, String>> stats = new HashMap<String, Map<String,String>>();
    // String lastJobID;
    ArrayList<String> rootJobIDs = new ArrayList<String>();
    ExecType mode;
   
    public void setMROperatorPlan(MROperPlan mrp) {
        this.mrp = mrp;
    }
   
    public void setJobControl(JobControl jc) {
        this.jc = jc;
    }
   
    public void setJobClient(JobClient jobClient) {
        this.jobClient = jobClient;
    }
   
    public String getMRPlan() {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        mrp.dump(new PrintStream(baos));
        return baos.toString();
    }
   
    public void setExecType(ExecType mode) {
        this.mode = mode;
    }
   
    public void setPhysicalPlan(PhysicalPlan php) {
        this.php = php;
    }
   
    public String getPhysicalPlan() {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        php.explain(baos);
        return baos.toString();
    }
   
    public Map<String, Map<String, String>> accumulateStats() throws ExecException {
        if(mode == ExecType.MAPREDUCE)
            return accumulateMRStats();
        else if(mode == ExecType.LOCAL)
            return accumulateLocalStats();
        else
            throw new RuntimeException("Unrecognized mode. Either MapReduce or Local mode expected.");
    }
   
    private Map<String, Map<String, String>> accumulateLocalStats() {
        //The counter placed before a store in the local plan should be able to get the number of records
        for(PhysicalOperator op : php.getLeaves()) {
            Map<String, String> jobStats = new HashMap<String, String>();
            stats.put(op.toString(), jobStats);
            POCounter counter = (POCounter) php.getPredecessors(op).get(0);
            jobStats.put("PIG_STATS_LOCAL_OUTPUT_RECORDS", (Long.valueOf(counter.getCount())).toString());
            jobStats.put("PIG_STATS_LOCAL_BYTES_WRITTEN", (Long.valueOf((new File(((POStore)op).getSFile().getFileName())).length())).toString());
        }
        return stats;
    }
   
    private Map<String, Map<String, String>> accumulateMRStats() throws ExecException {
       
        for(Job job : jc.getSuccessfulJobs()) {
           
           
            JobConf jobConf = job.getJobConf();
           
           
                RunningJob rj = null;
                try {
                    rj = jobClient.getJob(job.getAssignedJobID());
                } catch (IOException e1) {
                    String error = "Unable to get the job statistics from JobClient.";
                    throw new ExecException(error, e1);
                }
                if(rj == null)
                    continue;
               
                Map<String, String> jobStats = new HashMap<String, String>();
                stats.put(job.getAssignedJobID().toString(), jobStats);
               
                try {
                    PhysicalPlan plan = (PhysicalPlan) ObjectSerializer.deserialize(jobConf.get("pig.mapPlan"));
                    jobStats.put("PIG_STATS_MAP_PLAN", plan.toString());
                    plan = (PhysicalPlan) ObjectSerializer.deserialize(jobConf.get("pig.combinePlan"));
                    if(plan != null) {
                        jobStats.put("PIG_STATS_COMBINE_PLAN", plan.toString());
                    }
                    plan = (PhysicalPlan) ObjectSerializer.deserialize(jobConf.get("pig.reducePlan"));
                    if(plan != null) {
                        jobStats.put("PIG_STATS_REDUCE_PLAN", plan.toString());
                    }
                } catch (IOException e2) {
                    String error = "Error deserializing plans from the JobConf.";
                    throw new RuntimeException(error, e2);
                }
               
                Counters counters = null;
                try {
                    counters = rj.getCounters();
                    // This code checks if the counters is null, if it is, then all the stats are unknown.
                    // We use -1 to indicate unknown counter. In fact, Counters should not be null, it is
                    // a hadoop bug, once this bug is fixed in hadoop, the null handling code should never be hit.
                    // See Pig-943
                    if (counters!=null)
                    {
                        Counters.Group taskgroup = counters.getGroup("org.apache.hadoop.mapred.Task$Counter");
                        Counters.Group hdfsgroup = counters.getGroup("org.apache.hadoop.mapred.Task$FileSystemCounter");
                        jobStats.put("PIG_STATS_MAP_INPUT_RECORDS", (Long.valueOf(taskgroup.getCounterForName("MAP_INPUT_RECORDS").getCounter())).toString());
                        jobStats.put("PIG_STATS_MAP_OUTPUT_RECORDS", (Long.valueOf(taskgroup.getCounterForName("MAP_OUTPUT_RECORDS").getCounter())).toString());
                        jobStats.put("PIG_STATS_REDUCE_INPUT_RECORDS", (Long.valueOf(taskgroup.getCounterForName("REDUCE_INPUT_RECORDS").getCounter())).toString());
                        jobStats.put("PIG_STATS_REDUCE_OUTPUT_RECORDS", (Long.valueOf(taskgroup.getCounterForName("REDUCE_OUTPUT_RECORDS").getCounter())).toString());
                        jobStats.put("PIG_STATS_BYTES_WRITTEN", (Long.valueOf(hdfsgroup.getCounterForName("HDFS_WRITE").getCounter())).toString());
                    }
                    else
                    {
                        jobStats.put("PIG_STATS_MAP_INPUT_RECORDS", "-1");
                        jobStats.put("PIG_STATS_MAP_OUTPUT_RECORDS", "-1");
                        jobStats.put("PIG_STATS_REDUCE_INPUT_RECORDS", "-1");
                        jobStats.put("PIG_STATS_REDUCE_OUTPUT_RECORDS", "-1");
                        jobStats.put("PIG_STATS_BYTES_WRITTEN", "-1");
                    }
                   
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    String error = "Unable to get the counters.";
                    throw new ExecException(error, e);
                }
        }
       
        getLastJobIDs(jc.getSuccessfulJobs());
       
        return stats;
    }
   

    private void getLastJobIDs(List<Job> jobs) {
        rootJobIDs.clear();
         Set<Job> temp = new HashSet<Job>();
         for(Job job : jobs) {
             if(job.getDependingJobs() != null && job.getDependingJobs().size() > 0)
                 temp.addAll(job.getDependingJobs());
         }
        
         //difference between temp and jobs would be the set of leaves
         //we can safely assume there would be only one leaf
         for(Job job : jobs) {
             if(temp.contains(job)) continue;
             else rootJobIDs.add(job.getAssignedJobID().toString());
         }
    }
   
    public List<String> getRootJobIDs() {
        return rootJobIDs;
    }
   
    public Map<String, Map<String, String>> getPigStats() {
        return stats;
    }
   
    public long getRecordsWritten() {
        if(mode == ExecType.LOCAL)
            return getRecordsCountLocal();
        else if(mode == ExecType.MAPREDUCE)
            return getRecordsCountMR();
        else
            throw new RuntimeException("Unrecognized mode. Either MapReduce or Local mode expected.");
    }
   
    private long getRecordsCountLocal() {
        //System.out.println(getPhysicalPlan());
        //because of the nature of the parser, there will always be only one store

        for(PhysicalOperator op : php.getLeaves()) {
            return Long.parseLong(stats.get(op.toString()).get("PIG_STATS_LOCAL_OUTPUT_RECORDS"));
        }
        return 0;
    }
   
    /**
     * Returns the no. of records written by the pig script in MR mode
     * @return
     */
    private long getRecordsCountMR() {
        long records = 0;
        for (String jid : rootJobIDs) {
            Map<String, String> jobStats = stats.get(jid);
            if (jobStats == null) continue;
            String reducePlan = jobStats.get("PIG_STATS_REDUCE_PLAN");
          if(reducePlan == null) {
              if (Long.parseLong(jobStats.get("PIG_STATS_MAP_OUTPUT_RECORDS"))==-1L)
                {
                  records = -1;
                    break;
                }
              else
                  records += Long.parseLong(jobStats.get("PIG_STATS_MAP_OUTPUT_RECORDS"));
          } else {
              if (Long.parseLong(jobStats.get("PIG_STATS_REDUCE_OUTPUT_RECORDS"))==-1L)
                {
                    records = -1;
                    break;
                }
                else
                    records += Long.parseLong(jobStats.get("PIG_STATS_REDUCE_OUTPUT_RECORDS"));
          }
        }
      return records;
    }
   
    public long getBytesWritten() {
      if(mode == ExecType.LOCAL) {
        return getLocalBytesWritten();
      } else if(mode == ExecType.MAPREDUCE) {
        return getMapReduceBytesWritten();
      } else {
        throw new RuntimeException("Unrecognized mode. Either MapReduce or Local mode expected.");
      }
     
    }
   
    private long getLocalBytesWritten() {
      for(PhysicalOperator op : php.getLeaves())
        return Long.parseLong(stats.get(op.toString()).get("PIG_STATS_LOCAL_BYTES_WRITTEN"));
      return 0;
    }
   
    private long getMapReduceBytesWritten() {
        long bytesWritten = 0;
        for (String jid : rootJobIDs) {
            Map<String, String> jobStats = stats.get(jid);
            if (jobStats == null) continue;
            if (Long.parseLong(jobStats.get("PIG_STATS_BYTES_WRITTEN"))==-1L)
            {
                bytesWritten = -1L;
                break;
            }
            bytesWritten += Long.parseLong(jobStats.get("PIG_STATS_BYTES_WRITTEN"));
        }
        return bytesWritten;
    }
   
}
TOP

Related Classes of org.apache.pig.tools.pigstats.PigStats

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.