Source Code of org.apache.hadoop.hive.ql.Driver

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.hadoop.hive.ql;


import java.io.DataInput;
import java.io.IOException;
import java.io.Serializable;
import java.util.*;


import org.apache.hadoop.hive.ql.parse.ASTNode;


import org.apache.commons.lang.StringUtils;


import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Schema;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.parse.ParseException;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.ql.exec.ExecDriver;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.PreExecute;
import org.apache.hadoop.hive.ql.history.HiveHistory.Keys;
import org.apache.hadoop.hive.ql.processors.CommandProcessor;
import org.apache.hadoop.hive.ql.plan.tableDesc;
import org.apache.hadoop.hive.serde2.ByteStream;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.security.UserGroupInformation;


import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;


public class Driver implements CommandProcessor {


  static final private Log LOG = LogFactory.getLog(Driver.class.getName());
  static final private LogHelper console = new LogHelper(LOG);


  private int maxRows = 100;
  ByteStream.Output bos = new ByteStream.Output();


  private HiveConf conf;
  private DataInput resStream;
  private Context ctx;
  private QueryPlan plan;


  public int countJobs(List<Task<? extends Serializable>> tasks) {
    return countJobs(tasks, new ArrayList<Task<? extends Serializable>>());
  }


  public int countJobs(List<Task<? extends Serializable>> tasks, List<Task<? extends Serializable>> seenTasks) {
    if (tasks == null)
      return 0;
    int jobs = 0;
    for (Task<? extends Serializable> task : tasks) {
      if (!seenTasks.contains(task)) {
        seenTasks.add(task);
        if (task.isMapRedTask()) {
          jobs++;
        }
        jobs += countJobs(task.getChildTasks(), seenTasks);
      }
    }
    return jobs;
  }


  /**
   * Return the status information about the Map-Reduce cluster
   */
  public ClusterStatus getClusterStatus() throws Exception {
    ClusterStatus cs;
    try {
      JobConf job = new JobConf(conf, ExecDriver.class);
      JobClient jc = new JobClient(job);
      cs = jc.getClusterStatus();
    }
    catch (Exception e) {
      e.printStackTrace();
      throw e;
    }
    LOG.info("Returning cluster status: " + cs.toString());
    return cs;
  }
  
  /**
   * Get a Schema with fields represented with native Hive types
   */
  public Schema getSchema() throws Exception {
    Schema schema;
    try {
      if (plan != null && plan.getPlan().getFetchTask() != null) {
        BaseSemanticAnalyzer sem = plan.getPlan();


        if (!sem.getFetchTaskInit()) {
          sem.setFetchTaskInit(true);
          sem.getFetchTask().initialize(conf);
        }
        FetchTask ft = (FetchTask) sem.getFetchTask();


        tableDesc td = ft.getTblDesc();
        // partitioned tables don't have tableDesc set on the FetchTask. Instead
        // they have a list of PartitionDesc objects, each with a table desc. Let's
        // try to fetch the desc for the first partition and use it's deserializer.
        if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) {
          if (ft.getWork().getPartDesc().size() > 0) {
            td = ft.getWork().getPartDesc().get(0).getTableDesc();
          }
        }


        if (td == null) {
          throw new Exception("No table description found for fetch task: " + ft);
        }
 
        String tableName = "result";
        List<FieldSchema> lst = MetaStoreUtils.getFieldsFromDeserializer(
            tableName, td.getDeserializer());
        schema = new Schema(lst, null);
      }
      else {
        schema = new Schema();
      }
    }
    catch (Exception e) {
      e.printStackTrace();
      throw e;
    }
    LOG.info("Returning Hive schema: " + schema);
    return schema;
  }
  
  /**
   * Get a Schema with fields represented with Thrift DDL types
   */
  public Schema getThriftSchema() throws Exception {
    Schema schema;    
    try {
      schema = this.getSchema();
      if (schema != null) {
      List<FieldSchema> lst = schema.getFieldSchemas();
      // Go over the schema and convert type to thrift type
      if (lst != null) {
        for (FieldSchema f : lst) {
          f.setType(MetaStoreUtils.typeToThriftType(f.getType()));  
          }     
      }
      }
    }
    catch (Exception e) {
      e.printStackTrace();
      throw e;
    }
    LOG.info("Returning Thrift schema: " + schema);
    return schema;
  }


  /**
   * Return the maximum number of rows returned by getResults
   */
  public int getMaxRows() {
    return maxRows;
  }


  /**
   * Set the maximum number of rows returned by getResults
   */
  public void setMaxRows(int maxRows) {
    this.maxRows = maxRows;
  }


  public boolean hasReduceTasks(List<Task<? extends Serializable>> tasks) {
    if (tasks == null)
      return false;


    boolean hasReduce = false;
    for (Task<? extends Serializable> task : tasks) {
      if (task.hasReduce()) {
        return true;
      }


      hasReduce = (hasReduce || hasReduceTasks(task.getChildTasks()));
    }
    return hasReduce;
  }


  /**
   * for backwards compatibility with current tests
   */
  public Driver(HiveConf conf) {
    this.conf = conf;
  }


  public Driver() {
    if (SessionState.get() != null) {
      conf = SessionState.get().getConf();
    }
  }


  /**
   * Compile a new query. Any currently-planned query associated with this Driver is discarded.
   *
   * @param command The SQL query to compile.
   */
  public int compile(String command) {
    if (plan != null) {
      close();
      plan = null;
    }


    TaskFactory.resetId();


    try {
      ctx = new Context (conf);


      ParseDriver pd = new ParseDriver();
      ASTNode tree = pd.parse(command);


      while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
        tree = (ASTNode) tree.getChild(0);
      }


      BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree);
      // Do semantic analysis and plan generation
      sem.analyze(tree, ctx);
      LOG.info("Semantic Analysis Completed");
      
      // validate the plan
      sem.validate();


      plan = new QueryPlan(command, sem);
      return (0);
    } catch (SemanticException e) {
      console.printError("FAILED: Error in semantic analysis: "
          + e.getMessage(), "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return (10);
    } catch (ParseException e) {
      console.printError("FAILED: Parse Error: " + e.getMessage(), "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return (11);
    } catch (Exception e) {
      console.printError("FAILED: Unknown exception : " + e.getMessage(), "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return (12);
    }
  }


  /**
   * @return The current query plan associated with this Driver, if any.
   */
  public QueryPlan getPlan() {
    return plan;
  }


  public int run(String command) {
    int ret = compile(command);
    if (ret != 0)
      return (ret);


    return execute();
  }


  private List<PreExecute> getPreExecHooks() throws Exception {
    ArrayList<PreExecute> pehooks = new ArrayList<PreExecute>();
    String pestr = conf.getVar(HiveConf.ConfVars.PREEXECHOOKS);
    pestr = pestr.trim();
    if (pestr.equals(""))
      return pehooks;


    String[] peClasses = pestr.split(",");
    
    for(String peClass: peClasses) {
      try {
        pehooks.add((PreExecute)Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance());
      } catch (ClassNotFoundException e) {
        console.printError("Pre Exec Hook Class not found:" + e.getMessage());
        throw e;
      }
    }
    
    return pehooks;
  }
  
  public int execute() {
    boolean noName = StringUtils.isEmpty(conf
        .getVar(HiveConf.ConfVars.HADOOPJOBNAME));
    int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);


    String queryId = plan.getQueryId();
    String queryStr = plan.getQueryStr();


    conf.setVar(HiveConf.ConfVars.HIVEQUERYID, queryId);
    conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, queryStr);


    try {      
      LOG.info("Starting command: " + queryStr);


      if (SessionState.get() != null)
        SessionState.get().getHiveHistory().startQuery(queryStr, conf.getVar(HiveConf.ConfVars.HIVEQUERYID) );


      resStream = null;


      BaseSemanticAnalyzer sem = plan.getPlan();


      // Get all the pre execution hooks and execute them.
      for(PreExecute peh: getPreExecHooks()) {
        peh.run(SessionState.get(), 
                sem.getInputs(), sem.getOutputs(),
                UserGroupInformation.getCurrentUGI());        
      }
      
      int jobs = countJobs(sem.getRootTasks());
      if (jobs > 0) {
        console.printInfo("Total MapReduce jobs = " + jobs);
      }
      if (SessionState.get() != null){
        SessionState.get().getHiveHistory().setQueryProperty(queryId,
            Keys.QUERY_NUM_TASKS, String.valueOf(jobs));
        SessionState.get().getHiveHistory().setIdToTableMap(sem.getIdToTableNameMap());
      }
      String jobname = Utilities.abbreviate(queryStr, maxlen - 6);


      int curJobNo = 0;


      // A very simple runtime that keeps putting runnable tasks on a list and
      // when a job completes, it puts the children at the back of the list
      // while taking the job to run from the front of the list
      Queue<Task<? extends Serializable>> runnable = new LinkedList<Task<? extends Serializable>>();


      for (Task<? extends Serializable> rootTask : sem.getRootTasks()) {
        if (runnable.offer(rootTask) == false) {
          LOG.error("Could not insert the first task into the queue");
          return (1);
        }
      }


      while (runnable.peek() != null) {
        Task<? extends Serializable> tsk = runnable.remove();


        if (SessionState.get() != null)
          SessionState.get().getHiveHistory().startTask(queryId, tsk,
              tsk.getClass().getName());


        if (tsk.isMapRedTask()) {
          curJobNo++;
          if (noName) {
            conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname + "(" + curJobNo
                        + "/" + jobs + ")");
          }
        }


        tsk.initialize(conf);


        int exitVal = tsk.execute();
        if (SessionState.get() != null) {
          SessionState.get().getHiveHistory().setTaskProperty(queryId,
              tsk.getId(), Keys.TASK_RET_CODE, String.valueOf(exitVal));
          SessionState.get().getHiveHistory().endTask(queryId, tsk);
        }
        if (exitVal != 0) {
          console.printError("FAILED: Execution Error, return code " + exitVal
              + " from " + tsk.getClass().getName());
          return 9;
        }
        tsk.setDone();


        if (tsk.getChildTasks() == null) {
          continue;
        }


        for (Task<? extends Serializable> child : tsk.getChildTasks()) {          
          // Check if the child is runnable
          if (!child.isRunnable()) {
            continue;
          }


          if (runnable.offer(child) == false) {
            LOG.error("Could not add child task to queue");
          }
        }
      }
      if (SessionState.get() != null){
        SessionState.get().getHiveHistory().setQueryProperty(queryId,
            Keys.QUERY_RET_CODE, String.valueOf(0));
        SessionState.get().getHiveHistory().printRowCount(queryId);
      }
    } catch (Exception e) {
      if (SessionState.get() != null)
        SessionState.get().getHiveHistory().setQueryProperty(queryId,
            Keys.QUERY_RET_CODE, String.valueOf(12));
      console.printError("FAILED: Unknown exception : " + e.getMessage(), "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return (12);
    } finally {
      if (SessionState.get() != null)
        SessionState.get().getHiveHistory().endQuery(queryId);
      if (noName) {
        conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, "");
      }
    }


    console.printInfo("OK");
    return (0);
  }


  public boolean getResults(Vector<String> res) throws IOException {
    if (plan != null && plan.getPlan().getFetchTask() != null) {
      BaseSemanticAnalyzer sem = plan.getPlan();
      if (!sem.getFetchTaskInit()) {
        sem.setFetchTaskInit(true);
        sem.getFetchTask().initialize(conf);
      }
      FetchTask ft = (FetchTask) sem.getFetchTask();
      ft.setMaxRows(maxRows);
      return ft.fetch(res);
    }


    if (resStream == null)
      resStream = ctx.getStream();
    if (resStream == null)
      return false;


    int numRows = 0;
    String row = null;


    while (numRows < maxRows) {
      if (resStream == null) {
        if (numRows > 0)
          return true;
        else
          return false;
      }


      bos.reset();
      Utilities.streamStatus ss;
      try {
        ss = Utilities.readColumn(resStream, bos);
        if (bos.getCount() > 0)
          row = new String(bos.getData(), 0, bos.getCount(), "UTF-8");
        else if (ss == Utilities.streamStatus.TERMINATED)
          row = new String();


        if (row != null) {
          numRows++;
          res.add(row);
        }
      } catch (IOException e) {
        console.printError("FAILED: Unexpected IO exception : "
            + e.getMessage());
        res = null;
        return false;
      }


      if (ss == Utilities.streamStatus.EOF)
        resStream = ctx.getStream();
    }
    return true;
  }


  public int close() {
    try {
      ctx.clear();
    } catch (Exception e) {
      console.printError("FAILED: Unknown exception : " + e.getMessage(), "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return (13);
    }


    return (0);
  }
}
Source Code of org.apache.hadoop.hive.ql.Driver

Related Classes of org.apache.hadoop.hive.ql.Driver