Package org.apache.tajo.master.rm

Source Code of org.apache.tajo.master.rm.YarnRMContainerAllocator

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.tajo.master.rm;

import com.google.common.collect.Lists;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.YarnException;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.AMResponse;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.client.AMRMClientImpl;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
import org.apache.tajo.ExecutionBlockId;
import org.apache.tajo.TajoProtos;
import org.apache.tajo.master.event.ContainerAllocationEvent;
import org.apache.tajo.master.event.ContainerAllocatorEventType;
import org.apache.tajo.master.event.SubQueryContainerAllocationEvent;
import org.apache.tajo.master.querymaster.Query;
import org.apache.tajo.master.querymaster.QueryMasterTask;
import org.apache.tajo.master.querymaster.SubQuery;
import org.apache.tajo.master.querymaster.SubQueryState;
import org.apache.tajo.util.ApplicationIdUtils;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;

public class YarnRMContainerAllocator extends AMRMClientImpl
    implements EventHandler<ContainerAllocationEvent> {

  /** Class Logger */
  private static final Log LOG = LogFactory.getLog(YarnRMContainerAllocator.
      class.getName());

  private QueryMasterTask.QueryMasterTaskContext context;
  private final EventHandler eventHandler;

  public YarnRMContainerAllocator(QueryMasterTask.QueryMasterTaskContext context) {
    super(ApplicationIdUtils.createApplicationAttemptId(context.getQueryId()));
    this.context = context;
    this.eventHandler = context.getDispatcher().getEventHandler();
  }

  public void init(Configuration conf) {
    super.init(conf);
  }

  private static final int WAIT_INTERVAL_AVAILABLE_NODES = 500; // 0.5 second
  public void start() {
    super.start();

    RegisterApplicationMasterResponse response;
    try {
      response = registerApplicationMaster("localhost", 10080, "http://localhost:1234");

      // If the number of cluster nodes is ZERO, it waits for available nodes.
      AllocateResponse allocateResponse = allocate(0.0f);
      while(allocateResponse.getNumClusterNodes() < 1) {
        try {
          Thread.sleep(WAIT_INTERVAL_AVAILABLE_NODES);
          LOG.info("Waiting for Available Cluster Nodes");
          allocateResponse = allocate(0);
        } catch (InterruptedException e) {
          LOG.error(e);
        }
      }
      context.getQueryMasterContext().getWorkerContext().setNumClusterNodes(allocateResponse.getNumClusterNodes());
    } catch (YarnRemoteException e) {
      LOG.error(e);
    }

    startAllocatorThread();
  }

  protected Thread allocatorThread;
  private final AtomicBoolean stopped = new AtomicBoolean(false);
  private int rmPollInterval = 100;//millis

  protected void startAllocatorThread() {
    allocatorThread = new Thread(new Runnable() {
      @Override
      public void run() {
        while (!stopped.get() && !Thread.currentThread().isInterrupted()) {
          try {
            try {
              heartbeat();
            } catch (YarnException e) {
              LOG.error("Error communicating with RM: " + e.getMessage() , e);
              return;
            } catch (Exception e) {
              LOG.error("ERROR IN CONTACTING RM. ", e);
              // TODO: for other exceptions
              if(stopped.get()) {
                break;
              }
            }
            Thread.sleep(rmPollInterval);
          } catch (InterruptedException e) {
            if (!stopped.get()) {
              LOG.warn("Allocated thread interrupted. Returning.");
            }
            break;
          }
        }
        LOG.info("Allocated thread stopped");
      }
    });
    allocatorThread.setName("YarnRMContainerAllocator");
    allocatorThread.start();
  }

  public void stop() {
    if(stopped.get()) {
      return;
    }
    LOG.info("un-registering ApplicationMaster(QueryMaster):" + appAttemptId);
    stopped.set(true);

    try {
      FinalApplicationStatus status = FinalApplicationStatus.UNDEFINED;
      Query query = context.getQuery();
      if (query != null) {
        TajoProtos.QueryState state = query.getState();
        if (state == TajoProtos.QueryState.QUERY_SUCCEEDED) {
          status = FinalApplicationStatus.SUCCEEDED;
        } else if (state == TajoProtos.QueryState.QUERY_FAILED || state == TajoProtos.QueryState.QUERY_ERROR) {
          status = FinalApplicationStatus.FAILED;
        } else if (state == TajoProtos.QueryState.QUERY_ERROR) {
          status = FinalApplicationStatus.FAILED;
        }
      }
      unregisterApplicationMaster(status, "tajo query finished", null);
    } catch (Exception e) {
      LOG.error(e.getMessage(), e);
    }

    allocatorThread.interrupt();
    LOG.info("un-registered ApplicationMAster(QueryMaster) stopped:" + appAttemptId);

    super.stop();
  }

  private final Map<Priority, ExecutionBlockId> subQueryMap =
      new HashMap<Priority, ExecutionBlockId>();

  private AtomicLong prevReportTime = new AtomicLong(0);
  private int reportInterval = 5 * 1000; // second

  public void heartbeat() throws Exception {
    AllocateResponse allocateResponse = allocate(context.getProgress());
    AMResponse response = allocateResponse.getAMResponse();
    if(response == null) {
      LOG.warn("AM Response is null");
      return;
    }
    List<Container> allocatedContainers = response.getAllocatedContainers();

    long currentTime = System.currentTimeMillis();
    if ((currentTime - prevReportTime.longValue()) >= reportInterval) {
      LOG.debug("Available Cluster Nodes: " + allocateResponse.getNumClusterNodes());
      LOG.debug("Num of Allocated Containers: " + allocatedContainers.size());
      LOG.info("Available Resource: " + response.getAvailableResources());
      prevReportTime.set(currentTime);
    }

    if (allocatedContainers.size() > 0) {
      LOG.info("================================================================");
      for (Container container : response.getAllocatedContainers()) {
        LOG.info("> Container Id: " + container.getId());
        LOG.info("> Node Id: " + container.getNodeId());
        LOG.info("> Resource (Mem): " + container.getResource().getMemory());
        LOG.info("> State : " + container.getState());
        LOG.info("> Priority: " + container.getPriority());
      }
      LOG.info("================================================================");

      Map<ExecutionBlockId, List<Container>> allocated = new HashMap<ExecutionBlockId, List<Container>>();
      for (Container container : allocatedContainers) {
        ExecutionBlockId executionBlockId = subQueryMap.get(container.getPriority());
        SubQueryState state = context.getSubQuery(executionBlockId).getState();
        if (!(SubQuery.isRunningState(state))) {
          releaseAssignedContainer(container.getId());
        } else {
          if (allocated.containsKey(executionBlockId)) {
            allocated.get(executionBlockId).add(container);
          } else {
            allocated.put(executionBlockId, Lists.newArrayList(container));
          }
        }
      }

      for (Entry<ExecutionBlockId, List<Container>> entry : allocated.entrySet()) {
        eventHandler.handle(new SubQueryContainerAllocationEvent(entry.getKey(), entry.getValue()));
      }
    }
  }

  @Override
  public void handle(ContainerAllocationEvent event) {

    if (event.getType() == ContainerAllocatorEventType.CONTAINER_REQ) {
      LOG.info(event);
      subQueryMap.put(event.getPriority(), event.getExecutionBlockId());
      addContainerRequest(new ContainerRequest(event.getCapability(), null, null,
          event.getPriority(), event.getRequiredNum()));

    } else if (event.getType() == ContainerAllocatorEventType.CONTAINER_DEALLOCATE) {
      LOG.info(event);
    } else {
      LOG.info(event);
    }
  }
}
TOP

Related Classes of org.apache.tajo.master.rm.YarnRMContainerAllocator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.