Examples of TezEvent


Examples of org.apache.tez.runtime.api.impl.TezEvent

  }

  private static synchronized boolean heartbeat(
      Collection<TezEvent> outOfBandEvents)
      throws TezException, IOException {
    TezEvent updateEvent = null;
    int eventCounter = 0;
    int eventsRange = 0;
    TezTaskAttemptID taskAttemptID = null;
    List<TezEvent> events = new ArrayList<TezEvent>();
    try {
      taskLock.readLock().lock();
      if (currentTask != null) {
        eventsToSend.drainTo(events);
        taskAttemptID = currentTaskAttemptID;
        eventCounter = currentTask.getEventCounter();
        eventsRange = maxEventsToGet;
        if (!currentTask.isTaskDone() && !currentTask.hadFatalError()) {
          updateEvent = new TezEvent(new TaskStatusUpdateEvent(
              currentTask.getCounters(), currentTask.getProgress()),
                new EventMetaData(EventProducerConsumerType.SYSTEM,
                    currentTask.getVertexName(), "", taskAttemptID));
          events.add(updateEvent);
        } else if (outOfBandEvents == null && events.isEmpty()) {
View Full Code Here

Examples of org.apache.tez.runtime.api.impl.TezEvent

      @Override
      public void signalFatalError(TezTaskAttemptID taskAttemptID,
          String diagnostics,
          EventMetaData sourceInfo) {
        currentTask.setFrameworkCounters();
        TezEvent statusUpdateEvent =
            new TezEvent(new TaskStatusUpdateEvent(
                currentTask.getCounters(), currentTask.getProgress()),
                new EventMetaData(EventProducerConsumerType.SYSTEM,
                    currentTask.getVertexName(), "",
                    currentTask.getTaskAttemptID()));
        TezEvent taskAttemptFailedEvent =
            new TezEvent(new TaskAttemptFailedEvent(diagnostics),
                sourceInfo);
        try {
          // Not setting taskComplete - since the main loop responsible for cleanup doesn't have
          // control yet. Getting control depends on whether the I/P/O returns correctly after
          // reporting an error.
          heartbeat(Lists.newArrayList(statusUpdateEvent, taskAttemptFailedEvent));
        } catch (Throwable t) {
          LOG.fatal("Failed to communicate task attempt failure to AM via"
              + " umbilical", t);
          if (t instanceof Error) {
            LOG.error("Exception of type Error. Exiting now", t);
            ExitUtil.terminate(-1, t);
          }
          // FIXME NEWTEZ maybe send a container failed event to AM?
          // Irrecoverable error unless heartbeat sync can be re-established
          heartbeatErrorException = t;
          heartbeatError.set(true);
          heartbeatThread.interrupt();
        }
      }

      @Override
      public boolean canCommit(TezTaskAttemptID taskAttemptID)
          throws IOException {
        return umbilical.canCommit(taskAttemptID);
      }
    };

    // report non-pid to application master
    String pid = System.getenv().get("JVM_PID");
   
    LOG.info("PID, containerIdentifier: " + pid + ", " + containerIdentifier);
   
    ContainerTask containerTask = null;
    UserGroupInformation childUGI = null;
    ContainerContext containerContext = new ContainerContext(
        containerIdentifier, pid);
    int getTaskMaxSleepTime = defaultConf.getInt(
        TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX,
        TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX_DEFAULT);
    int taskCount = 0;
    TezVertexID lastVertexId = null;
    EventMetaData currentSourceInfo = null;
    try {
      String loggerAddend = "";
      while (true) {
        // poll for new task
        if (taskCount > 0) {
          TezUtils.updateLoggers(loggerAddend);
        }
        boolean isNewGetTask = true;
        long getTaskPollStartTime = System.currentTimeMillis();
        long nextGetTaskPrintTime = getTaskPollStartTime + 2000l;
        for (int idle = 0; null == containerTask; ++idle) {
          if (!isNewGetTask) { // Don't sleep on the first iteration.
            long sleepTimeMilliSecs = Math.min(idle * 10, getTaskMaxSleepTime);
            if (sleepTimeMilliSecs + System.currentTimeMillis() > nextGetTaskPrintTime) {
              LOG.info("Sleeping for "
                  + sleepTimeMilliSecs
                  + "ms before retrying getTask again. Got null now. "
                  + "Next getTask sleep message after 2s");
              nextGetTaskPrintTime = System.currentTimeMillis() + sleepTimeMilliSecs + 2000l;
            }
            MILLISECONDS.sleep(sleepTimeMilliSecs);
          } else {
            LOG.info("Attempting to fetch new task");
          }
          isNewGetTask = false;
          containerTask = umbilical.getTask(containerContext);
        }
        LOG.info("Got TaskUpdate: "
            + (System.currentTimeMillis() - getTaskPollStartTime)
            + " ms after starting to poll."
            + " TaskInfo: shouldDie: " + containerTask.shouldDie()
            + (containerTask.shouldDie() == true ? "" : ", currentTaskAttemptId: "
                + containerTask.getTaskSpec().getTaskAttemptID()));
        if (containerTask.shouldDie()) {
          return;
        }
        taskCount++;

        // Reset FileSystem statistics
        FileSystem.clearStatistics();

        // Re-use the UGI only if the Credentials have not changed.
        if (containerTask.haveCredentialsChanged()) {
          LOG.info("Refreshing UGI since Credentials have changed");
          Credentials taskCreds = containerTask.getCredentials();
          if (taskCreds != null) {
            LOG.info("Credentials : #Tokens=" + taskCreds.numberOfTokens() + ", #SecretKeys="
                + taskCreds.numberOfSecretKeys());
            childUGI = UserGroupInformation.createRemoteUser(System
                .getenv(ApplicationConstants.Environment.USER.toString()));
            childUGI.addCredentials(containerTask.getCredentials());
          } else {
            LOG.info("Not loading any credentials, since no credentials provided");
          }
        }

        Map<String, TezLocalResource> additionalResources = containerTask.getAdditionalResources();
        if (LOG.isDebugEnabled()) {
          LOG.debug("Additional Resources added to container: " + additionalResources);
        }

        LOG.info("Localizing additional local resources for Task : " + additionalResources);
        List<URL> downloadedUrls = RelocalizationUtils.processAdditionalResources(
            Maps.transformValues(additionalResources, new Function<TezLocalResource, URI>() {
              @Override
              public URI apply(TezLocalResource input) {
                return input.getUri();
              }
            }), defaultConf);
        RelocalizationUtils.addUrlsToClassPath(downloadedUrls);

        LOG.info("Done localizing additional resources");
        final TaskSpec taskSpec = containerTask.getTaskSpec();
        if (LOG.isDebugEnabled()) {
          LOG.debug("New container task context:"
              + taskSpec.toString());
        }

        try {
          taskLock.writeLock().lock();
          currentTaskAttemptID = taskSpec.getTaskAttemptID();
          TezVertexID newVertexId =
              currentTaskAttemptID.getTaskID().getVertexID();
          currentTaskComplete.set(false);

          if (lastVertexId != null) {
            if (!lastVertexId.equals(newVertexId)) {
              objectRegistry.clearCache(ObjectLifeCycle.VERTEX);
            }
            if (!lastVertexId.getDAGId().equals(newVertexId.getDAGId())) {
              objectRegistry.clearCache(ObjectLifeCycle.DAG);
              startedInputsMap = HashMultimap.create();
            }
          }
          lastVertexId = newVertexId;
          TezUtils.updateLoggers(currentTaskAttemptID.toString());
          loggerAddend = currentTaskAttemptID.toString() + "_post";
         
          currentTask = createLogicalTask(attemptNumber, taskSpec,
              defaultConf, tezUmbilical, serviceConsumerMetadata);
         
          taskNonOobHeartbeatCounter = 0;
          nextHeartbeatNumToLog = (Math.max(1,
              (int) (LOG_COUNTER_START_INTERVAL / (amPollInterval == 0 ? 0.000001f
                  : (float) amPollInterval))));
        } finally {
          taskLock.writeLock().unlock();
        }

        final EventMetaData sourceInfo = new EventMetaData(
            EventProducerConsumerType.SYSTEM,
            taskSpec.getVertexName(), "", currentTaskAttemptID);
        currentSourceInfo = sourceInfo;

        // TODO Initiate Java VM metrics
        // JvmMetrics.initSingleton(containerId.toString(), job.getSessionId());

        childUGI.doAs(new PrivilegedExceptionAction<Object>() {
          @Override
          public Object run() throws Exception {
            try {
              setFileSystemWorkingDir(defaultConf);
              LOG.info("Initializing task"
                  + ", taskAttemptId=" + currentTaskAttemptID);
              currentTask.initialize();
              if (!currentTask.hadFatalError()) {
                LOG.info("Running task"
                    + ", taskAttemptId=" + currentTaskAttemptID);
                currentTask.run();
                LOG.info("Closing task"
                    + ", taskAttemptId=" + currentTaskAttemptID);
                currentTask.close();
              }
              LOG.info("Task completed"
                  + ", taskAttemptId=" + currentTaskAttemptID
                  + ", fatalErrorOccurred=" + currentTask.hadFatalError());
              // Mark taskComplete - irrespective of failure, framework has control from this point.
              currentTaskComplete.set(true);
              // TODONEWTEZ Should the container continue to run if the running task reported a fatal error ?
              if (!currentTask.hadFatalError()) {
                // Set counters in case of a successful task.
                currentTask.setFrameworkCounters();
                TezEvent statusUpdateEvent =
                    new TezEvent(new TaskStatusUpdateEvent(
                        currentTask.getCounters(), currentTask.getProgress()),
                        new EventMetaData(EventProducerConsumerType.SYSTEM,
                            currentTask.getVertexName(), "",
                            currentTask.getTaskAttemptID()));
                TezEvent taskCompletedEvent =
                    new TezEvent(new TaskAttemptCompletedEvent(), sourceInfo);
                heartbeat(Arrays.asList(statusUpdateEvent, taskCompletedEvent));
              } // Should the fatalError be reported ?
            } finally {
              currentTask.cleanup();
            }
            try {
              taskLock.writeLock().lock();
              currentTask = null;
              currentTaskAttemptID = null;
            } finally {
              taskLock.writeLock().unlock();
            }
            return null;
          }
        });
        FileSystem.closeAllForUGI(childUGI);
        containerTask = null;
        if (heartbeatError.get()) {
          LOG.fatal("Breaking out of task loop, heartbeat error occurred",
              heartbeatErrorException);
          break;
        }
      }
    } catch (FSError e) {
      // Heartbeats controlled manually after this.
      stopped.set(true);
      heartbeatThread.interrupt();
      LOG.fatal("FSError from child", e);
      // TODO NEWTEZ this should be a container failed event?
      try {
        taskLock.readLock().lock();
        if (currentTask != null && !currentTask.hadFatalError()) {
          // TODO Is this of any use if the heartbeat thread is being interrupted first ?
          // Prevent dup failure events
          currentTask.setFrameworkCounters();
          TezEvent statusUpdateEvent =
              new TezEvent(new TaskStatusUpdateEvent(
                  currentTask.getCounters(), currentTask.getProgress()),
                  new EventMetaData(EventProducerConsumerType.SYSTEM,
                      currentTask.getVertexName(), "",
                      currentTask.getTaskAttemptID()));
          currentTask.setFatalError(e, "FS Error in Child JVM");
          TezEvent taskAttemptFailedEvent =
              new TezEvent(new TaskAttemptFailedEvent(
                  StringUtils.stringifyException(e)),
                  currentSourceInfo);
          currentTaskComplete.set(true);
          heartbeat(Lists.newArrayList(statusUpdateEvent, taskAttemptFailedEvent));
        }
      } finally {
        taskLock.readLock().unlock();
      }
    } catch (Throwable throwable) {
      // Heartbeats controlled manually after this.
      if (throwable instanceof Error) {
        LOG.error("Exception of type Error. Exiting now", throwable);
        ExitUtil.terminate(-1, throwable);
      }
      stopped.set(true);
      heartbeatThread.interrupt();
      String cause = StringUtils.stringifyException(throwable);
      LOG.fatal("Error running child : " + cause);
      taskLock.readLock().lock();
      try {
        if (currentTask != null && !currentTask.hadFatalError()) {
          // TODO Is this of any use if the heartbeat thread is being interrupted first ?
          // Prevent dup failure events
          currentTask.setFatalError(throwable, "Error in Child JVM");
          currentTask.setFrameworkCounters();
          TezEvent statusUpdateEvent =
              new TezEvent(new TaskStatusUpdateEvent(
                  currentTask.getCounters(), currentTask.getProgress()),
                  new EventMetaData(EventProducerConsumerType.SYSTEM,
                      currentTask.getVertexName(), "",
                      currentTask.getTaskAttemptID()));
          TezEvent taskAttemptFailedEvent =
            new TezEvent(new TaskAttemptFailedEvent(cause),
              currentSourceInfo);
          currentTaskComplete.set(true);
          heartbeat(Lists.newArrayList(statusUpdateEvent, taskAttemptFailedEvent));
        }
      } finally {
View Full Code Here

Examples of org.apache.tez.runtime.api.impl.TezEvent

      Assert.fail("Invalid creation should have errored out");
    } catch (RuntimeException e) {
      // Expected
    }
    List<TezEvent> events =
        Arrays.asList(new TezEvent(new DataMovementEvent(1, null), new EventMetaData(
            EventProducerConsumerType.SYSTEM, "foo", "bar", null)));
    event = new VertexDataMovementEventsGeneratedEvent(
            TezVertexID.getInstance(
                TezDAGID.getInstance(ApplicationId.newInstance(0, 1), 1), 1), events);
    VertexDataMovementEventsGeneratedEvent deserializedEvent =
View Full Code Here

Examples of org.apache.tez.runtime.api.impl.TezEvent

    EventMetaData srcMeta = new EventMetaData(EventProducerConsumerType.OUTPUT, "consumerVertex", "producerVertex", srcTAID);
   
    // Verification via a CompositeEvent
    CompositeDataMovementEvent cdmEvent = new CompositeDataMovementEvent(0, destTasks.size(), "bytes".getBytes());
    cdmEvent.setVersion(2); // AttemptNum
    TezEvent tezEvent = new TezEvent(cdmEvent, srcMeta);
    // Event setup to look like it would after the Vertex is done with it.

    edge.sendTezEventToDestinationTasks(tezEvent);
   
    ArgumentCaptor<Event> args = ArgumentCaptor.forClass(Event.class);
    verify(eventHandler, times(destTasks.size())).handle(args.capture());
   
    verifyEvents(args.getAllValues(), srcTAID, destTasks);
   
   
    // Same Verification via regular DataMovementEvents
    reset(eventHandler);
    for (int i = 0 ; i < destTasks.size() ; i++) {
      DataMovementEvent dmEvent = new DataMovementEvent(i, "bytes".getBytes());
      dmEvent.setVersion(2);
      tezEvent = new TezEvent(dmEvent, srcMeta);
      edge.sendTezEventToDestinationTasks(tezEvent);
    }
    args = ArgumentCaptor.forClass(Event.class);
    verify(eventHandler, times(destTasks.size())).handle(args.capture());
   
View Full Code Here

Examples of org.apache.tez.runtime.api.impl.TezEvent

    for (Event event : events) {
      Entry<TezTaskID, Task> expEntry = taskIter.next();
      assertTrue(event instanceof TaskEventAddTezEvent);
      TaskEventAddTezEvent taEvent = (TaskEventAddTezEvent) event;
      assertEquals(expEntry.getKey(), taEvent.getTaskID());
      TezEvent tezEvent = taEvent.getTezEvent();

      DataMovementEvent dmEvent = (DataMovementEvent)tezEvent.getEvent();
      assertEquals(srcTAID.getId(), dmEvent.getVersion());
      assertEquals(count, dmEvent.getSourceIndex());
      assertEquals(srcTAID.getTaskID().getId(), dmEvent.getTargetIndex());
      assertTrue(Arrays.equals("bytes".getBytes(), dmEvent.getUserPayload()));
View Full Code Here

Examples of org.apache.tez.runtime.api.impl.TezEvent

    InputReadErrorEvent mockReEvent = new InputReadErrorEvent("", 0, 1);
    EventMetaData mockMeta = mock(EventMetaData.class);
    TezTaskAttemptID mockDestId1 = mock(TezTaskAttemptID.class);
    when(mockMeta.getTaskAttemptID()).thenReturn(mockDestId1);
    TezEvent tzEvent = new TezEvent(mockReEvent, mockMeta);
    taImpl.handle(new TaskAttemptEventOutputFailed(taskAttemptID, tzEvent, 4));
   
    // failure threshold not met. state is SUCCEEDED
    assertEquals("Task attempt is not in succeeded state", taImpl.getState(),
        TaskAttemptState.SUCCEEDED);
View Full Code Here

Examples of org.apache.tez.runtime.api.impl.TezEvent

  private void sendTezEventsToTask(TezTaskID taskId, int numTezEvents) {
    TaskEventAddTezEvent event = null;
    EventMetaData eventMetaData = new EventMetaData();
    DataMovementEvent dmEvent = new DataMovementEvent(null);
    TezEvent tezEvent = new TezEvent(dmEvent, eventMetaData);
    for (int i = 0; i < numTezEvents; i++) {
      event = new TaskEventAddTezEvent(taskId, tezEvent);
      mockTask.handle(event);
    }
  }
View Full Code Here

Examples of org.apache.tez.runtime.api.impl.TezEvent

   
    TezTaskID t0_v2 = TezTaskID.getInstance(v2.getVertexId(), 0);
    TezTaskAttemptID ta0_t0_v2 = TezTaskAttemptID.getInstance(t0_v2, 0);

    List<TezEvent> taskEvents = Lists.newLinkedList();
    TezEvent tezEvent1 = new TezEvent(
        new CompositeDataMovementEvent(0, 1, new byte[0]),
        new EventMetaData(EventProducerConsumerType.OUTPUT, "vertex2", "vertex3", ta0_t0_v2));
    TezEvent tezEvent2 = new TezEvent(
        new DataMovementEvent(0, new byte[0]),
        new EventMetaData(EventProducerConsumerType.OUTPUT, "vertex2", "vertex3", ta0_t0_v2));
    taskEvents.add(tezEvent1);
    taskEvents.add(tezEvent2);
    // send events and test that they are buffered until some task is scheduled
View Full Code Here

Examples of org.apache.tez.runtime.api.impl.TezEvent

  }

  private static synchronized boolean heartbeat(
      Collection<TezEvent> outOfBandEvents)
      throws TezException, IOException {
    TezEvent updateEvent = null;
    int eventCounter = 0;
    int eventsRange = 0;
    TezTaskAttemptID taskAttemptID = null;
    List<TezEvent> events = new ArrayList<TezEvent>();
    try {
      taskLock.readLock().lock();
      if (currentTask != null) {
        eventsToSend.drainTo(events);
        taskAttemptID = currentTaskAttemptID;
        eventCounter = currentTask.getEventCounter();
        eventsRange = maxEventsToGet;
        if (!currentTask.isTaskDone() && !currentTask.hadFatalError()) {
          updateEvent = new TezEvent(new TaskStatusUpdateEvent(
              currentTask.getCounters(), currentTask.getProgress()),
                new EventMetaData(EventProducerConsumerType.SYSTEM,
                    currentTask.getVertexName(), "", taskAttemptID));
          events.add(updateEvent);
        } else if (outOfBandEvents == null && events.isEmpty()) {
View Full Code Here

Examples of org.apache.tez.runtime.api.impl.TezEvent

      @Override
      public void signalFatalError(TezTaskAttemptID taskAttemptID,
          String diagnostics,
          EventMetaData sourceInfo) {
        currentTask.setFrameworkCounters();
        TezEvent statusUpdateEvent =
            new TezEvent(new TaskStatusUpdateEvent(
                currentTask.getCounters(), currentTask.getProgress()),
                new EventMetaData(EventProducerConsumerType.SYSTEM,
                    currentTask.getVertexName(), "",
                    currentTask.getTaskAttemptID()));
        TezEvent taskAttemptFailedEvent =
            new TezEvent(new TaskAttemptFailedEvent(diagnostics),
                sourceInfo);
        try {
          // Not setting taskComplete - since the main loop responsible for cleanup doesn't have
          // control yet. Getting control depends on whether the I/P/O returns correctly after
          // reporting an error.
          heartbeat(Lists.newArrayList(statusUpdateEvent, taskAttemptFailedEvent));
        } catch (Throwable t) {
          LOG.fatal("Failed to communicate task attempt failure to AM via"
              + " umbilical", t);
          if (t instanceof Error) {
            LOG.error("Exception of type Error. Exiting now", t);
            ExitUtil.terminate(-1, t);
          }
          // FIXME NEWTEZ maybe send a container failed event to AM?
          // Irrecoverable error unless heartbeat sync can be re-established
          heartbeatErrorException = t;
          heartbeatError.set(true);
          heartbeatThread.interrupt();
        }
      }

      @Override
      public boolean canCommit(TezTaskAttemptID taskAttemptID)
          throws IOException {
        return umbilical.canCommit(taskAttemptID);
      }
    };

    // report non-pid to application master
    String pid = System.getenv().get("JVM_PID");
   
    LOG.info("PID, containerIdentifier: " + pid + ", " + containerIdentifier);
   
    ContainerTask containerTask = null;
    UserGroupInformation childUGI = null;
    ContainerContext containerContext = new ContainerContext(
        containerIdentifier, pid);
    int getTaskMaxSleepTime = defaultConf.getInt(
        TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX,
        TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX_DEFAULT);
    int taskCount = 0;
    TezVertexID lastVertexId = null;
    EventMetaData currentSourceInfo = null;
    try {
      while (true) {
        // poll for new task
        if (taskCount > 0) {
          updateLoggers(null);
        }
        boolean isNewGetTask = true;
        long getTaskPollStartTime = System.currentTimeMillis();
        long nextGetTaskPrintTime = getTaskPollStartTime + 2000l;
        for (int idle = 0; null == containerTask; ++idle) {
          if (!isNewGetTask) { // Don't sleep on the first iteration.
            long sleepTimeMilliSecs = Math.min(idle * 10, getTaskMaxSleepTime);
            if (sleepTimeMilliSecs + System.currentTimeMillis() > nextGetTaskPrintTime) {
              LOG.info("Sleeping for "
                  + sleepTimeMilliSecs
                  + "ms before retrying getTask again. Got null now. "
                  + "Next getTask sleep message after 2s");
              nextGetTaskPrintTime = System.currentTimeMillis() + sleepTimeMilliSecs + 2000l;
            }
            MILLISECONDS.sleep(sleepTimeMilliSecs);
          } else {
            LOG.info("Attempting to fetch new task");
          }
          isNewGetTask = false;
          containerTask = umbilical.getTask(containerContext);
        }
        LOG.info("Got TaskUpdate: "
            + (System.currentTimeMillis() - getTaskPollStartTime)
            + " ms after starting to poll."
            + " TaskInfo: shouldDie: " + containerTask.shouldDie()
            + (containerTask.shouldDie() == true ? "" : ", currentTaskAttemptId: "
                + containerTask.getTaskSpec().getTaskAttemptID()));
        if (containerTask.shouldDie()) {
          return;
        }
        taskCount++;

        // Reset FileSystem statistics
        FileSystem.clearStatistics();

        // Re-use the UGI only if the Credentials have not changed.
        if (containerTask.haveCredentialsChanged()) {
          LOG.info("Refreshing UGI since Credentials have changed");
          Credentials taskCreds = containerTask.getCredentials();
          if (taskCreds != null) {
            LOG.info("Credentials : #Tokens=" + taskCreds.numberOfTokens() + ", #SecretKeys="
                + taskCreds.numberOfSecretKeys());
            childUGI = UserGroupInformation.createRemoteUser(System
                .getenv(ApplicationConstants.Environment.USER.toString()));
            childUGI.addCredentials(containerTask.getCredentials());
          } else {
            LOG.info("Not loading any credentials, since no credentials provided");
          }
        }

        Map<String, TezLocalResource> additionalResources = containerTask.getAdditionalResources();
        if (LOG.isDebugEnabled()) {
          LOG.debug("Additional Resources added to container: " + additionalResources);
        }

        LOG.info("Localizing additional local resources for Task : " + additionalResources);
        List<URL> downloadedUrls = RelocalizationUtils.processAdditionalResources(
            Maps.transformValues(additionalResources, new Function<TezLocalResource, URI>() {
              @Override
              public URI apply(TezLocalResource input) {
                return input.getUri();
              }
            }), defaultConf);
        RelocalizationUtils.addUrlsToClassPath(downloadedUrls);

        LOG.info("Done localizing additional resources");
        final TaskSpec taskSpec = containerTask.getTaskSpec();
        if (LOG.isDebugEnabled()) {
          LOG.debug("New container task context:"
              + taskSpec.toString());
        }

        try {
          taskLock.writeLock().lock();
          currentTaskAttemptID = taskSpec.getTaskAttemptID();
          TezVertexID newVertexId =
              currentTaskAttemptID.getTaskID().getVertexID();
          currentTaskComplete.set(false);

          if (lastVertexId != null) {
            if (!lastVertexId.equals(newVertexId)) {
              objectRegistry.clearCache(ObjectLifeCycle.VERTEX);
            }
            if (!lastVertexId.getDAGId().equals(newVertexId.getDAGId())) {
              objectRegistry.clearCache(ObjectLifeCycle.DAG);
              startedInputsMap = HashMultimap.create();
            }
          }
          lastVertexId = newVertexId;
          updateLoggers(currentTaskAttemptID);

          currentTask = createLogicalTask(attemptNumber, taskSpec,
              defaultConf, tezUmbilical, serviceConsumerMetadata);
         
          taskNonOobHeartbeatCounter = 0;
          nextHeartbeatNumToLog = (Math.max(1,
              (int) (LOG_COUNTER_START_INTERVAL / (amPollInterval == 0 ? 0.000001f
                  : (float) amPollInterval))));
        } finally {
          taskLock.writeLock().unlock();
        }

        final EventMetaData sourceInfo = new EventMetaData(
            EventProducerConsumerType.SYSTEM,
            taskSpec.getVertexName(), "", currentTaskAttemptID);
        currentSourceInfo = sourceInfo;

        // TODO Initiate Java VM metrics
        // JvmMetrics.initSingleton(containerId.toString(), job.getSessionId());

        childUGI.doAs(new PrivilegedExceptionAction<Object>() {
          @Override
          public Object run() throws Exception {
            try {
              setFileSystemWorkingDir(defaultConf);
              LOG.info("Initializing task"
                  + ", taskAttemptId=" + currentTaskAttemptID);
              currentTask.initialize();
              if (!currentTask.hadFatalError()) {
                LOG.info("Running task"
                    + ", taskAttemptId=" + currentTaskAttemptID);
                currentTask.run();
                LOG.info("Closing task"
                    + ", taskAttemptId=" + currentTaskAttemptID);
                currentTask.close();
              }
              LOG.info("Task completed"
                  + ", taskAttemptId=" + currentTaskAttemptID
                  + ", fatalErrorOccurred=" + currentTask.hadFatalError());
              // Mark taskComplete - irrespective of failure, framework has control from this point.
              currentTaskComplete.set(true);
              // TODONEWTEZ Should the container continue to run if the running task reported a fatal error ?
              if (!currentTask.hadFatalError()) {
                // Set counters in case of a successful task.
                currentTask.setFrameworkCounters();
                TezEvent statusUpdateEvent =
                    new TezEvent(new TaskStatusUpdateEvent(
                        currentTask.getCounters(), currentTask.getProgress()),
                        new EventMetaData(EventProducerConsumerType.SYSTEM,
                            currentTask.getVertexName(), "",
                            currentTask.getTaskAttemptID()));
                TezEvent taskCompletedEvent =
                    new TezEvent(new TaskAttemptCompletedEvent(), sourceInfo);
                heartbeat(Arrays.asList(statusUpdateEvent, taskCompletedEvent));
              } // Should the fatalError be reported ?
            } finally {
              currentTask.cleanup();
            }
            try {
              taskLock.writeLock().lock();
              currentTask = null;
              currentTaskAttemptID = null;
            } finally {
              taskLock.writeLock().unlock();
            }
            return null;
          }
        });
        FileSystem.closeAllForUGI(childUGI);
        containerTask = null;
        if (heartbeatError.get()) {
          LOG.fatal("Breaking out of task loop, heartbeat error occurred",
              heartbeatErrorException);
          break;
        }
      }
    } catch (FSError e) {
      // Heartbeats controlled manually after this.
      stopped.set(true);
      heartbeatThread.interrupt();
      LOG.fatal("FSError from child", e);
      // TODO NEWTEZ this should be a container failed event?
      try {
        taskLock.readLock().lock();
        if (currentTask != null && !currentTask.hadFatalError()) {
          // TODO Is this of any use if the heartbeat thread is being interrupted first ?
          // Prevent dup failure events
          currentTask.setFrameworkCounters();
          TezEvent statusUpdateEvent =
              new TezEvent(new TaskStatusUpdateEvent(
                  currentTask.getCounters(), currentTask.getProgress()),
                  new EventMetaData(EventProducerConsumerType.SYSTEM,
                      currentTask.getVertexName(), "",
                      currentTask.getTaskAttemptID()));
          currentTask.setFatalError(e, "FS Error in Child JVM");
          TezEvent taskAttemptFailedEvent =
              new TezEvent(new TaskAttemptFailedEvent(
                  StringUtils.stringifyException(e)),
                  currentSourceInfo);
          currentTaskComplete.set(true);
          heartbeat(Lists.newArrayList(statusUpdateEvent, taskAttemptFailedEvent));
        }
      } finally {
        taskLock.readLock().unlock();
      }
    } catch (Throwable throwable) {
      // Heartbeats controlled manually after this.
      if (throwable instanceof Error) {
        LOG.error("Exception of type Error. Exiting now", throwable);
        ExitUtil.terminate(-1, throwable);
      }
      stopped.set(true);
      heartbeatThread.interrupt();
      String cause = StringUtils.stringifyException(throwable);
      LOG.fatal("Error running child : " + cause);
      taskLock.readLock().lock();
      try {
        if (currentTask != null && !currentTask.hadFatalError()) {
          // TODO Is this of any use if the heartbeat thread is being interrupted first ?
          // Prevent dup failure events
          currentTask.setFatalError(throwable, "Error in Child JVM");
          currentTask.setFrameworkCounters();
          TezEvent statusUpdateEvent =
              new TezEvent(new TaskStatusUpdateEvent(
                  currentTask.getCounters(), currentTask.getProgress()),
                  new EventMetaData(EventProducerConsumerType.SYSTEM,
                      currentTask.getVertexName(), "",
                      currentTask.getTaskAttemptID()));
          TezEvent taskAttemptFailedEvent =
            new TezEvent(new TaskAttemptFailedEvent(cause),
              currentSourceInfo);
          currentTaskComplete.set(true);
          heartbeat(Lists.newArrayList(statusUpdateEvent, taskAttemptFailedEvent));
        }
      } finally {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.