Package org.apache.tez.runtime.common.objectregistry

Examples of org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl


    // Should this be part of main - Metrics and ObjectRegistry. TezTask setup should be independent
    // of this class. Leaving it here, till there's some entity representing a running JVM.
    DefaultMetricsSystem.initialize("TezTask");

    // singleton of ObjectRegistry for this JVM
    ObjectRegistryImpl objectRegistry = new ObjectRegistryImpl();

    return new TezChild(conf, host, port, containerIdentifier, tokenIdentifier,
        attemptNumber, localDirs, objectRegistry);
  }
View Full Code Here


    }
    // FIXME fix initialize metrics in child runner
    DefaultMetricsSystem.initialize("VertexTask");
    YarnTezDagChild.containerIdStr = containerIdentifier;

    ObjectRegistryImpl objectRegistry = new ObjectRegistryImpl();
    @SuppressWarnings("unused")
    Injector injector = Guice.createInjector(
        new ObjectRegistryModule(objectRegistry));

    // Security framework already loaded the tokens into current ugi
    Credentials credentials =
        UserGroupInformation.getCurrentUser().getCredentials();

    if (LOG.isDebugEnabled()) {
      LOG.debug("Executing with tokens:");
      for (Token<?> token : credentials.getAllTokens()) {
        LOG.debug(token);
      }
    }

    amPollInterval = defaultConf.getLong(
        TezConfiguration.TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS,
        TezConfiguration.TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS_DEFAULT);
    maxEventsToGet = defaultConf.getInt(
        TezConfiguration.TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT,
        TezConfiguration.TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT_DEFAULT);

    // Create TaskUmbilicalProtocol as actual task owner.
    UserGroupInformation taskOwner =
      UserGroupInformation.createRemoteUser(tokenIdentifier);

    Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
    SecurityUtil.setTokenService(jobToken, address);
    taskOwner.addToken(jobToken);
    // Will jobToken change across DAGs ?
    Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
    serviceConsumerMetadata.put(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID,
        ShuffleUtils.convertJobTokenToBytes(jobToken));

    umbilical =
      taskOwner.doAs(new PrivilegedExceptionAction<TezTaskUmbilicalProtocol>() {
      @Override
      public TezTaskUmbilicalProtocol run() throws Exception {
        return (TezTaskUmbilicalProtocol)RPC.getProxy(TezTaskUmbilicalProtocol.class,
            TezTaskUmbilicalProtocol.versionID, address, defaultConf);
      }
    });

    final Thread heartbeatThread = startHeartbeatThread();

    TezUmbilical tezUmbilical = new TezUmbilical() {
      @Override
      public void addEvents(Collection<TezEvent> events) {
        eventsToSend.addAll(events);
      }

      @Override
      public void signalFatalError(TezTaskAttemptID taskAttemptID,
          String diagnostics,
          EventMetaData sourceInfo) {
        currentTask.setFrameworkCounters();
        TezEvent statusUpdateEvent =
            new TezEvent(new TaskStatusUpdateEvent(
                currentTask.getCounters(), currentTask.getProgress()),
                new EventMetaData(EventProducerConsumerType.SYSTEM,
                    currentTask.getVertexName(), "",
                    currentTask.getTaskAttemptID()));
        TezEvent taskAttemptFailedEvent =
            new TezEvent(new TaskAttemptFailedEvent(diagnostics),
                sourceInfo);
        try {
          // Not setting taskComplete - since the main loop responsible for cleanup doesn't have
          // control yet. Getting control depends on whether the I/P/O returns correctly after
          // reporting an error.
          heartbeat(Lists.newArrayList(statusUpdateEvent, taskAttemptFailedEvent));
        } catch (Throwable t) {
          LOG.fatal("Failed to communicate task attempt failure to AM via"
              + " umbilical", t);
          if (t instanceof Error) {
            LOG.error("Exception of type Error. Exiting now", t);
            ExitUtil.terminate(-1, t);
          }
          // FIXME NEWTEZ maybe send a container failed event to AM?
          // Irrecoverable error unless heartbeat sync can be re-established
          heartbeatErrorException = t;
          heartbeatError.set(true);
          heartbeatThread.interrupt();
        }
      }

      @Override
      public boolean canCommit(TezTaskAttemptID taskAttemptID)
          throws IOException {
        return umbilical.canCommit(taskAttemptID);
      }
    };

    // report non-pid to application master
    String pid = System.getenv().get("JVM_PID");
   
    LOG.info("PID, containerIdentifier: " + pid + ", " + containerIdentifier);
   
    ContainerTask containerTask = null;
    UserGroupInformation childUGI = null;
    ContainerContext containerContext = new ContainerContext(
        containerIdentifier, pid);
    int getTaskMaxSleepTime = defaultConf.getInt(
        TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX,
        TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX_DEFAULT);
    int taskCount = 0;
    TezVertexID lastVertexId = null;
    EventMetaData currentSourceInfo = null;
    try {
      while (true) {
        // poll for new task
        if (taskCount > 0) {
          updateLoggers(null);
        }
        boolean isNewGetTask = true;
        long getTaskPollStartTime = System.currentTimeMillis();
        long nextGetTaskPrintTime = getTaskPollStartTime + 2000l;
        for (int idle = 0; null == containerTask; ++idle) {
          if (!isNewGetTask) { // Don't sleep on the first iteration.
            long sleepTimeMilliSecs = Math.min(idle * 10, getTaskMaxSleepTime);
            if (sleepTimeMilliSecs + System.currentTimeMillis() > nextGetTaskPrintTime) {
              LOG.info("Sleeping for "
                  + sleepTimeMilliSecs
                  + "ms before retrying getTask again. Got null now. "
                  + "Next getTask sleep message after 2s");
              nextGetTaskPrintTime = System.currentTimeMillis() + sleepTimeMilliSecs + 2000l;
            }
            MILLISECONDS.sleep(sleepTimeMilliSecs);
          } else {
            LOG.info("Attempting to fetch new task");
          }
          isNewGetTask = false;
          containerTask = umbilical.getTask(containerContext);
        }
        LOG.info("Got TaskUpdate: "
            + (System.currentTimeMillis() - getTaskPollStartTime)
            + " ms after starting to poll."
            + " TaskInfo: shouldDie: " + containerTask.shouldDie()
            + (containerTask.shouldDie() == true ? "" : ", currentTaskAttemptId: "
                + containerTask.getTaskSpec().getTaskAttemptID()));
        if (containerTask.shouldDie()) {
          return;
        }
        taskCount++;

        // Reset file system statistics for the new task.
        FileSystem.clearStatistics();
       
        // Re-use the UGI only if the Credentials have not changed.
        if (containerTask.haveCredentialsChanged()) {
          LOG.info("Refreshing UGI since Credentials have changed");
          Credentials taskCreds = containerTask.getCredentials();
          if (taskCreds != null) {
            LOG.info("Credentials : #Tokens=" + taskCreds.numberOfTokens() + ", #SecretKeys="
                + taskCreds.numberOfSecretKeys());
            childUGI = UserGroupInformation.createRemoteUser(System
                .getenv(ApplicationConstants.Environment.USER.toString()));
            childUGI.addCredentials(containerTask.getCredentials());
          } else {
            LOG.info("Not loading any credentials, since no credentials provided");
          }
        }

        Map<String, TezLocalResource> additionalResources = containerTask.getAdditionalResources();
        if (LOG.isDebugEnabled()) {
          LOG.debug("Additional Resources added to container: " + additionalResources);
        }
        processAdditionalResources(additionalResources, defaultConf);
        final TaskSpec taskSpec = containerTask.getTaskSpec();
        if (LOG.isDebugEnabled()) {
          LOG.debug("New container task context:"
              + taskSpec.toString());
        }

        try {
          taskLock.writeLock().lock();
          currentTaskAttemptID = taskSpec.getTaskAttemptID();
          TezVertexID newVertexId =
              currentTaskAttemptID.getTaskID().getVertexID();
          currentTaskComplete.set(false);

          if (lastVertexId != null) {
            if (!lastVertexId.equals(newVertexId)) {
              objectRegistry.clearCache(ObjectLifeCycle.VERTEX);
            }
            if (!lastVertexId.getDAGId().equals(newVertexId.getDAGId())) {
              objectRegistry.clearCache(ObjectLifeCycle.DAG);
              startedInputsMap = HashMultimap.create();
            }
          }
          lastVertexId = newVertexId;
          updateLoggers(currentTaskAttemptID);
View Full Code Here

    }
    // FIXME fix initialize metrics in child runner
    DefaultMetricsSystem.initialize("VertexTask");
    YarnTezDagChild.containerIdStr = containerIdentifier;

    ObjectRegistryImpl objectRegistry = new ObjectRegistryImpl();
    @SuppressWarnings("unused")
    Injector injector = Guice.createInjector(
        new ObjectRegistryModule(objectRegistry));

    // Security framework already loaded the tokens into current ugi
    Credentials credentials =
        UserGroupInformation.getCurrentUser().getCredentials();

    if (LOG.isDebugEnabled()) {
      LOG.info("Executing with tokens:");
      for (Token<?> token: credentials.getAllTokens()) {
        LOG.info(token);
      }
    }

    amPollInterval = defaultConf.getLong(
        TezConfiguration.TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS,
        TezConfiguration.TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS_DEFAULT);
    maxEventsToGet = defaultConf.getInt(
        TezConfiguration.TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT,
        TezConfiguration.TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT_DEFAULT);

    // Create TaskUmbilicalProtocol as actual task owner.
    UserGroupInformation taskOwner =
      UserGroupInformation.createRemoteUser(tokenIdentifier);

    Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(credentials);
    SecurityUtil.setTokenService(jobToken, address);
    taskOwner.addToken(jobToken);
    // Will jobToken change across DAGs ?
    Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
    serviceConsumerMetadata.put(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID,
        ShuffleUtils.convertJobTokenToBytes(jobToken));

    umbilical =
      taskOwner.doAs(new PrivilegedExceptionAction<TezTaskUmbilicalProtocol>() {
      @Override
      public TezTaskUmbilicalProtocol run() throws Exception {
        return (TezTaskUmbilicalProtocol)RPC.getProxy(TezTaskUmbilicalProtocol.class,
            TezTaskUmbilicalProtocol.versionID, address, defaultConf);
      }
    });

    final Thread heartbeatThread = startHeartbeatThread();

    TezUmbilical tezUmbilical = new TezUmbilical() {
      @Override
      public void addEvents(Collection<TezEvent> events) {
        eventsToSend.addAll(events);
      }

      @Override
      public void signalFatalError(TezTaskAttemptID taskAttemptID,
          String diagnostics,
          EventMetaData sourceInfo) {
        TezEvent taskAttemptFailedEvent =
            new TezEvent(new TaskAttemptFailedEvent(diagnostics),
                sourceInfo);
        try {
          heartbeat(Collections.singletonList(taskAttemptFailedEvent));
        } catch (Throwable t) {
          LOG.fatal("Failed to communicate task attempt failure to AM via"
              + " umbilical", t);
          // FIXME NEWTEZ maybe send a container failed event to AM?
          // Irrecoverable error unless heartbeat sync can be re-established
          heartbeatErrorException = t;
          heartbeatError.set(true);
          heartbeatThread.interrupt();
        }
      }

      @Override
      public boolean canCommit(TezTaskAttemptID taskAttemptID)
          throws IOException {
        return umbilical.canCommit(taskAttemptID);
      }
    };

    // report non-pid to application master
    String pid = System.getenv().get("JVM_PID");
   
    LOG.info("PID, containerIdentifier: " + pid + ", " + containerIdentifier);
   
    ContainerTask containerTask = null;
    UserGroupInformation childUGI = null;
    ContainerContext containerContext = new ContainerContext(
        containerIdentifier, pid);
    int getTaskMaxSleepTime = defaultConf.getInt(
        TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX,
        TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX_DEFAULT);
    int taskCount = 0;
    TezVertexID lastVertexId = null;
    EventMetaData currentSourceInfo = null;
    try {
      while (true) {
        // poll for new task
        if (taskCount > 0) {
          updateLoggers(null);
        }
        boolean isNewGetTask = true;
        long getTaskPollStartTime = System.currentTimeMillis();
        long nextGetTaskPrintTime = getTaskPollStartTime + 2000l;
        for (int idle = 0; null == containerTask; ++idle) {
          if (!isNewGetTask) { // Don't sleep on the first iteration.
            long sleepTimeMilliSecs = Math.min(idle * 10, getTaskMaxSleepTime);
            if (sleepTimeMilliSecs + System.currentTimeMillis() > nextGetTaskPrintTime) {
              LOG.info("Sleeping for "
                  + sleepTimeMilliSecs
                  + "ms before retrying getTask again. Got null now. "
                  + "Next getTask sleep message after 2s");
              nextGetTaskPrintTime = System.currentTimeMillis() + sleepTimeMilliSecs + 2000l;
            }
            MILLISECONDS.sleep(sleepTimeMilliSecs);
          } else {
            LOG.info("Attempting to fetch new task");
          }
          isNewGetTask = false;
          containerTask = umbilical.getTask(containerContext);
        }
        LOG.info("Got TaskUpdate: "
            + (System.currentTimeMillis() - getTaskPollStartTime)
            + " ms after starting to poll."
            + " TaskInfo: shouldDie: " + containerTask.shouldDie()
            + (containerTask.shouldDie() == true ? "" : ", currentTaskAttemptId: "
                + containerTask.getTaskSpec().getTaskAttemptID()));
        if (containerTask.shouldDie()) {
          return;
        }
        taskCount++;
        final TaskSpec taskSpec = containerTask.getTaskSpec();
        if (LOG.isDebugEnabled()) {
          LOG.debug("New container task context:"
              + taskSpec.toString());
        }

        try {
          taskLock.writeLock().lock();
          currentTaskAttemptID = taskSpec.getTaskAttemptID();
          TezVertexID newVertexId =
              currentTaskAttemptID.getTaskID().getVertexID();

          if (lastVertexId != null) {
            if (!lastVertexId.equals(newVertexId)) {
              objectRegistry.clearCache(ObjectLifeCycle.VERTEX);
            }
            if (!lastVertexId.getDAGId().equals(newVertexId.getDAGId())) {
              objectRegistry.clearCache(ObjectLifeCycle.DAG);
            }
          }
          lastVertexId = newVertexId;
          updateLoggers(currentTaskAttemptID);

View Full Code Here

    }
    // FIXME fix initialize metrics in child runner
    DefaultMetricsSystem.initialize("VertexTask");
    YarnTezDagChild.containerIdStr = containerIdentifier;

    ObjectRegistryImpl objectRegistry = new ObjectRegistryImpl();
    @SuppressWarnings("unused")
    Injector injector = Guice.createInjector(
        new ObjectRegistryModule(objectRegistry));

    // Security framework already loaded the tokens into current ugi
    Credentials credentials =
        UserGroupInformation.getCurrentUser().getCredentials();

    if (LOG.isDebugEnabled()) {
      LOG.debug("Executing with tokens:");
      for (Token<?> token : credentials.getAllTokens()) {
        LOG.debug(token);
      }
    }

    amPollInterval = defaultConf.getLong(
        TezConfiguration.TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS,
        TezConfiguration.TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS_DEFAULT);
    maxEventsToGet = defaultConf.getInt(
        TezConfiguration.TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT,
        TezConfiguration.TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT_DEFAULT);

    // Create TaskUmbilicalProtocol as actual task owner.
    UserGroupInformation taskOwner =
      UserGroupInformation.createRemoteUser(tokenIdentifier);

    Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
    SecurityUtil.setTokenService(jobToken, address);
    taskOwner.addToken(jobToken);
    // Will jobToken change across DAGs ?
    Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
    serviceConsumerMetadata.put(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID,
        ShuffleUtils.convertJobTokenToBytes(jobToken));

    umbilical =
      taskOwner.doAs(new PrivilegedExceptionAction<TezTaskUmbilicalProtocol>() {
      @Override
      public TezTaskUmbilicalProtocol run() throws Exception {
        return (TezTaskUmbilicalProtocol)RPC.getProxy(TezTaskUmbilicalProtocol.class,
            TezTaskUmbilicalProtocol.versionID, address, defaultConf);
      }
    });

    final Thread heartbeatThread = startHeartbeatThread();

    TezUmbilical tezUmbilical = new TezUmbilical() {
      @Override
      public void addEvents(Collection<TezEvent> events) {
        eventsToSend.addAll(events);
      }

      @Override
      public void signalFatalError(TezTaskAttemptID taskAttemptID,
          String diagnostics,
          EventMetaData sourceInfo) {
        currentTask.setFrameworkCounters();
        TezEvent statusUpdateEvent =
            new TezEvent(new TaskStatusUpdateEvent(
                currentTask.getCounters(), currentTask.getProgress()),
                new EventMetaData(EventProducerConsumerType.SYSTEM,
                    currentTask.getVertexName(), "",
                    currentTask.getTaskAttemptID()));
        TezEvent taskAttemptFailedEvent =
            new TezEvent(new TaskAttemptFailedEvent(diagnostics),
                sourceInfo);
        try {
          // Not setting taskComplete - since the main loop responsible for cleanup doesn't have
          // control yet. Getting control depends on whether the I/P/O returns correctly after
          // reporting an error.
          heartbeat(Lists.newArrayList(statusUpdateEvent, taskAttemptFailedEvent));
        } catch (Throwable t) {
          LOG.fatal("Failed to communicate task attempt failure to AM via"
              + " umbilical", t);
          if (t instanceof Error) {
            LOG.error("Exception of type Error. Exiting now", t);
            ExitUtil.terminate(-1, t);
          }
          // FIXME NEWTEZ maybe send a container failed event to AM?
          // Irrecoverable error unless heartbeat sync can be re-established
          heartbeatErrorException = t;
          heartbeatError.set(true);
          heartbeatThread.interrupt();
        }
      }

      @Override
      public boolean canCommit(TezTaskAttemptID taskAttemptID)
          throws IOException {
        return umbilical.canCommit(taskAttemptID);
      }
    };

    // report non-pid to application master
    String pid = System.getenv().get("JVM_PID");
   
    LOG.info("PID, containerIdentifier: " + pid + ", " + containerIdentifier);
   
    ContainerTask containerTask = null;
    UserGroupInformation childUGI = null;
    ContainerContext containerContext = new ContainerContext(
        containerIdentifier, pid);
    int getTaskMaxSleepTime = defaultConf.getInt(
        TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX,
        TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX_DEFAULT);
    int taskCount = 0;
    TezVertexID lastVertexId = null;
    EventMetaData currentSourceInfo = null;
    try {
      String loggerAddend = "";
      while (true) {
        // poll for new task
        if (taskCount > 0) {
          TezUtils.updateLoggers(loggerAddend);
        }
        boolean isNewGetTask = true;
        long getTaskPollStartTime = System.currentTimeMillis();
        long nextGetTaskPrintTime = getTaskPollStartTime + 2000l;
        for (int idle = 0; null == containerTask; ++idle) {
          if (!isNewGetTask) { // Don't sleep on the first iteration.
            long sleepTimeMilliSecs = Math.min(idle * 10, getTaskMaxSleepTime);
            if (sleepTimeMilliSecs + System.currentTimeMillis() > nextGetTaskPrintTime) {
              LOG.info("Sleeping for "
                  + sleepTimeMilliSecs
                  + "ms before retrying getTask again. Got null now. "
                  + "Next getTask sleep message after 2s");
              nextGetTaskPrintTime = System.currentTimeMillis() + sleepTimeMilliSecs + 2000l;
            }
            MILLISECONDS.sleep(sleepTimeMilliSecs);
          } else {
            LOG.info("Attempting to fetch new task");
          }
          isNewGetTask = false;
          containerTask = umbilical.getTask(containerContext);
        }
        LOG.info("Got TaskUpdate: "
            + (System.currentTimeMillis() - getTaskPollStartTime)
            + " ms after starting to poll."
            + " TaskInfo: shouldDie: " + containerTask.shouldDie()
            + (containerTask.shouldDie() == true ? "" : ", currentTaskAttemptId: "
                + containerTask.getTaskSpec().getTaskAttemptID()));
        if (containerTask.shouldDie()) {
          return;
        }
        taskCount++;

        // Reset FileSystem statistics
        FileSystem.clearStatistics();

        // Re-use the UGI only if the Credentials have not changed.
        if (containerTask.haveCredentialsChanged()) {
          LOG.info("Refreshing UGI since Credentials have changed");
          Credentials taskCreds = containerTask.getCredentials();
          if (taskCreds != null) {
            LOG.info("Credentials : #Tokens=" + taskCreds.numberOfTokens() + ", #SecretKeys="
                + taskCreds.numberOfSecretKeys());
            childUGI = UserGroupInformation.createRemoteUser(System
                .getenv(ApplicationConstants.Environment.USER.toString()));
            childUGI.addCredentials(containerTask.getCredentials());
          } else {
            LOG.info("Not loading any credentials, since no credentials provided");
          }
        }

        Map<String, TezLocalResource> additionalResources = containerTask.getAdditionalResources();
        if (LOG.isDebugEnabled()) {
          LOG.debug("Additional Resources added to container: " + additionalResources);
        }

        LOG.info("Localizing additional local resources for Task : " + additionalResources);
        List<URL> downloadedUrls = RelocalizationUtils.processAdditionalResources(
            Maps.transformValues(additionalResources, new Function<TezLocalResource, URI>() {
              @Override
              public URI apply(TezLocalResource input) {
                return input.getUri();
              }
            }), defaultConf);
        RelocalizationUtils.addUrlsToClassPath(downloadedUrls);

        LOG.info("Done localizing additional resources");
        final TaskSpec taskSpec = containerTask.getTaskSpec();
        if (LOG.isDebugEnabled()) {
          LOG.debug("New container task context:"
              + taskSpec.toString());
        }

        try {
          taskLock.writeLock().lock();
          currentTaskAttemptID = taskSpec.getTaskAttemptID();
          TezVertexID newVertexId =
              currentTaskAttemptID.getTaskID().getVertexID();
          currentTaskComplete.set(false);

          if (lastVertexId != null) {
            if (!lastVertexId.equals(newVertexId)) {
              objectRegistry.clearCache(ObjectLifeCycle.VERTEX);
            }
            if (!lastVertexId.getDAGId().equals(newVertexId.getDAGId())) {
              objectRegistry.clearCache(ObjectLifeCycle.DAG);
              startedInputsMap = HashMultimap.create();
            }
          }
          lastVertexId = newVertexId;
          TezUtils.updateLoggers(currentTaskAttemptID.toString());
View Full Code Here

    }
    // FIXME fix initialize metrics in child runner
    DefaultMetricsSystem.initialize("VertexTask");
    YarnTezDagChild.containerIdStr = containerIdentifier;

    ObjectRegistryImpl objectRegistry = new ObjectRegistryImpl();
    @SuppressWarnings("unused")
    Injector injector = Guice.createInjector(
        new ObjectRegistryModule(objectRegistry));

    // Security framework already loaded the tokens into current ugi
    Credentials credentials =
        UserGroupInformation.getCurrentUser().getCredentials();

    if (LOG.isDebugEnabled()) {
      LOG.debug("Executing with tokens:");
      for (Token<?> token : credentials.getAllTokens()) {
        LOG.debug(token);
      }
    }

    amPollInterval = defaultConf.getLong(
        TezConfiguration.TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS,
        TezConfiguration.TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS_DEFAULT);
    maxEventsToGet = defaultConf.getInt(
        TezConfiguration.TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT,
        TezConfiguration.TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT_DEFAULT);

    // Create TaskUmbilicalProtocol as actual task owner.
    UserGroupInformation taskOwner =
      UserGroupInformation.createRemoteUser(tokenIdentifier);

    Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
    SecurityUtil.setTokenService(jobToken, address);
    taskOwner.addToken(jobToken);
    // Will jobToken change across DAGs ?
    Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
    serviceConsumerMetadata.put(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID,
        ShuffleUtils.convertJobTokenToBytes(jobToken));

    umbilical =
      taskOwner.doAs(new PrivilegedExceptionAction<TezTaskUmbilicalProtocol>() {
      @Override
      public TezTaskUmbilicalProtocol run() throws Exception {
        return (TezTaskUmbilicalProtocol)RPC.getProxy(TezTaskUmbilicalProtocol.class,
            TezTaskUmbilicalProtocol.versionID, address, defaultConf);
      }
    });

    final Thread heartbeatThread = startHeartbeatThread();

    TezUmbilical tezUmbilical = new TezUmbilical() {
      @Override
      public void addEvents(Collection<TezEvent> events) {
        eventsToSend.addAll(events);
      }

      @Override
      public void signalFatalError(TezTaskAttemptID taskAttemptID,
          String diagnostics,
          EventMetaData sourceInfo) {
        currentTask.setFrameworkCounters();
        TezEvent statusUpdateEvent =
            new TezEvent(new TaskStatusUpdateEvent(
                currentTask.getCounters(), currentTask.getProgress()),
                new EventMetaData(EventProducerConsumerType.SYSTEM,
                    currentTask.getVertexName(), "",
                    currentTask.getTaskAttemptID()));
        TezEvent taskAttemptFailedEvent =
            new TezEvent(new TaskAttemptFailedEvent(diagnostics),
                sourceInfo);
        try {
          // Not setting taskComplete - since the main loop responsible for cleanup doesn't have
          // control yet. Getting control depends on whether the I/P/O returns correctly after
          // reporting an error.
          heartbeat(Lists.newArrayList(statusUpdateEvent, taskAttemptFailedEvent));
        } catch (Throwable t) {
          LOG.fatal("Failed to communicate task attempt failure to AM via"
              + " umbilical", t);
          if (t instanceof Error) {
            LOG.error("Exception of type Error. Exiting now", t);
            ExitUtil.terminate(-1, t);
          }
          // FIXME NEWTEZ maybe send a container failed event to AM?
          // Irrecoverable error unless heartbeat sync can be re-established
          heartbeatErrorException = t;
          heartbeatError.set(true);
          heartbeatThread.interrupt();
        }
      }

      @Override
      public boolean canCommit(TezTaskAttemptID taskAttemptID)
          throws IOException {
        return umbilical.canCommit(taskAttemptID);
      }
    };

    // report non-pid to application master
    String pid = System.getenv().get("JVM_PID");
   
    LOG.info("PID, containerIdentifier: " + pid + ", " + containerIdentifier);
   
    ContainerTask containerTask = null;
    UserGroupInformation childUGI = null;
    ContainerContext containerContext = new ContainerContext(
        containerIdentifier, pid);
    int getTaskMaxSleepTime = defaultConf.getInt(
        TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX,
        TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX_DEFAULT);
    int taskCount = 0;
    TezVertexID lastVertexId = null;
    EventMetaData currentSourceInfo = null;
    try {
      while (true) {
        // poll for new task
        if (taskCount > 0) {
          updateLoggers(null);
        }
        boolean isNewGetTask = true;
        long getTaskPollStartTime = System.currentTimeMillis();
        long nextGetTaskPrintTime = getTaskPollStartTime + 2000l;
        for (int idle = 0; null == containerTask; ++idle) {
          if (!isNewGetTask) { // Don't sleep on the first iteration.
            long sleepTimeMilliSecs = Math.min(idle * 10, getTaskMaxSleepTime);
            if (sleepTimeMilliSecs + System.currentTimeMillis() > nextGetTaskPrintTime) {
              LOG.info("Sleeping for "
                  + sleepTimeMilliSecs
                  + "ms before retrying getTask again. Got null now. "
                  + "Next getTask sleep message after 2s");
              nextGetTaskPrintTime = System.currentTimeMillis() + sleepTimeMilliSecs + 2000l;
            }
            MILLISECONDS.sleep(sleepTimeMilliSecs);
          } else {
            LOG.info("Attempting to fetch new task");
          }
          isNewGetTask = false;
          containerTask = umbilical.getTask(containerContext);
        }
        LOG.info("Got TaskUpdate: "
            + (System.currentTimeMillis() - getTaskPollStartTime)
            + " ms after starting to poll."
            + " TaskInfo: shouldDie: " + containerTask.shouldDie()
            + (containerTask.shouldDie() == true ? "" : ", currentTaskAttemptId: "
                + containerTask.getTaskSpec().getTaskAttemptID()));
        if (containerTask.shouldDie()) {
          return;
        }
        taskCount++;

        // Reset FileSystem statistics
        FileSystem.clearStatistics();

        // Re-use the UGI only if the Credentials have not changed.
        if (containerTask.haveCredentialsChanged()) {
          LOG.info("Refreshing UGI since Credentials have changed");
          Credentials taskCreds = containerTask.getCredentials();
          if (taskCreds != null) {
            LOG.info("Credentials : #Tokens=" + taskCreds.numberOfTokens() + ", #SecretKeys="
                + taskCreds.numberOfSecretKeys());
            childUGI = UserGroupInformation.createRemoteUser(System
                .getenv(ApplicationConstants.Environment.USER.toString()));
            childUGI.addCredentials(containerTask.getCredentials());
          } else {
            LOG.info("Not loading any credentials, since no credentials provided");
          }
        }

        Map<String, TezLocalResource> additionalResources = containerTask.getAdditionalResources();
        if (LOG.isDebugEnabled()) {
          LOG.debug("Additional Resources added to container: " + additionalResources);
        }

        LOG.info("Localizing additional local resources for Task : " + additionalResources);
        List<URL> downloadedUrls = RelocalizationUtils.processAdditionalResources(
            Maps.transformValues(additionalResources, new Function<TezLocalResource, URI>() {
              @Override
              public URI apply(TezLocalResource input) {
                return input.getUri();
              }
            }), defaultConf);
        RelocalizationUtils.addUrlsToClassPath(downloadedUrls);

        LOG.info("Done localizing additional resources");
        final TaskSpec taskSpec = containerTask.getTaskSpec();
        if (LOG.isDebugEnabled()) {
          LOG.debug("New container task context:"
              + taskSpec.toString());
        }

        try {
          taskLock.writeLock().lock();
          currentTaskAttemptID = taskSpec.getTaskAttemptID();
          TezVertexID newVertexId =
              currentTaskAttemptID.getTaskID().getVertexID();
          currentTaskComplete.set(false);

          if (lastVertexId != null) {
            if (!lastVertexId.equals(newVertexId)) {
              objectRegistry.clearCache(ObjectLifeCycle.VERTEX);
            }
            if (!lastVertexId.getDAGId().equals(newVertexId.getDAGId())) {
              objectRegistry.clearCache(ObjectLifeCycle.DAG);
              startedInputsMap = HashMultimap.create();
            }
          }
          lastVertexId = newVertexId;
          updateLoggers(currentTaskAttemptID);
View Full Code Here

TOP

Related Classes of org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.