Package org.apache.hadoop.util

Examples of org.apache.hadoop.util.Progress


   
    public boolean fetchOutputs() throws IOException {
      int totalFailures = 0;
      int            numInFlight = 0, numCopied = 0;
      DecimalFormat  mbpsFormat = new DecimalFormat("0.00");
      final Progress copyPhase =
        reduceTask.getProgress().phase();
      LocalFSMerger localFSMergerThread = null;
      InMemFSMergeThread inMemFSMergeThread = null;
      GetMapEventsThread getMapEventsThread = null;
     
      for (int i = 0; i < numMaps; i++) {
        copyPhase.addPhase();       // add sub-phase per file
      }
     
      copiers = new ArrayList<MapOutputCopier>(numCopiers);
     
      // start all the copying threads
      for (int i=0; i < numCopiers; i++) {
        MapOutputCopier copier = new MapOutputCopier(conf, reporter,
            reduceTask.getJobTokenSecret());
        copiers.add(copier);
        copier.start();
      }
     
      //start the on-disk-merge thread
      localFSMergerThread = new LocalFSMerger((LocalFileSystem)localFileSys);
      //start the in memory merger thread
      inMemFSMergeThread = new InMemFSMergeThread();
      localFSMergerThread.start();
      inMemFSMergeThread.start();
     
      // start the map events thread
      getMapEventsThread = new GetMapEventsThread();
      getMapEventsThread.start();
     
      // start the clock for bandwidth measurement
      long startTime = System.currentTimeMillis();
      long currentTime = startTime;
      long lastProgressTime = startTime;
      long lastOutputTime = 0;
     
        // loop until we get all required outputs
        while (copiedMapOutputs.size() < numMaps && mergeThrowable == null) {
         
          currentTime = System.currentTimeMillis();
          boolean logNow = false;
          if (currentTime - lastOutputTime > MIN_LOG_TIME) {
            lastOutputTime = currentTime;
            logNow = true;
          }
          if (logNow) {
            LOG.info(reduceTask.getTaskID() + " Need another "
                   + (numMaps - copiedMapOutputs.size()) + " map output(s) "
                   + "where " + numInFlight + " is already in progress");
          }

          // Put the hash entries for the failed fetches.
          Iterator<MapOutputLocation> locItr = retryFetches.iterator();

          while (locItr.hasNext()) {
            MapOutputLocation loc = locItr.next();
            List<MapOutputLocation> locList =
              mapLocations.get(loc.getHost());
           
            // Check if the list exists. Map output location mapping is cleared
            // once the jobtracker restarts and is rebuilt from scratch.
            // Note that map-output-location mapping will be recreated and hence
            // we continue with the hope that we might find some locations
            // from the rebuild map.
            if (locList != null) {
              // Add to the beginning of the list so that this map is
              //tried again before the others and we can hasten the
              //re-execution of this map should there be a problem
              locList.add(0, loc);
            }
          }

          if (retryFetches.size() > 0) {
            LOG.info(reduceTask.getTaskID() + ": "
                  "Got " + retryFetches.size() +
                  " map-outputs from previous failures");
          }
          // clear the "failed" fetches hashmap
          retryFetches.clear();

          // now walk through the cache and schedule what we can
          int numScheduled = 0;
          int numDups = 0;
         
          synchronized (scheduledCopies) {
 
            // Randomize the map output locations to prevent
            // all reduce-tasks swamping the same tasktracker
            List<String> hostList = new ArrayList<String>();
            hostList.addAll(mapLocations.keySet());
           
            Collections.shuffle(hostList, this.random);
             
            Iterator<String> hostsItr = hostList.iterator();

            while (hostsItr.hasNext()) {
           
              String host = hostsItr.next();

              List<MapOutputLocation> knownOutputsByLoc =
                mapLocations.get(host);

              // Check if the list exists. Map output location mapping is
              // cleared once the jobtracker restarts and is rebuilt from
              // scratch.
              // Note that map-output-location mapping will be recreated and
              // hence we continue with the hope that we might find some
              // locations from the rebuild map and add then for fetching.
              if (knownOutputsByLoc == null || knownOutputsByLoc.size() == 0) {
                continue;
              }
             
              //Identify duplicate hosts here
              if (uniqueHosts.contains(host)) {
                 numDups += knownOutputsByLoc.size();
                 continue;
              }

              Long penaltyEnd = penaltyBox.get(host);
              boolean penalized = false;
           
              if (penaltyEnd != null) {
                if (currentTime < penaltyEnd.longValue()) {
                  penalized = true;
                } else {
                  penaltyBox.remove(host);
                }
              }
             
              if (penalized)
                continue;

              synchronized (knownOutputsByLoc) {
             
                locItr = knownOutputsByLoc.iterator();
           
                while (locItr.hasNext()) {
             
                  MapOutputLocation loc = locItr.next();
             
                  // Do not schedule fetches from OBSOLETE maps
                  if (obsoleteMapIds.contains(loc.getTaskAttemptId())) {
                    locItr.remove();
                    continue;
                  }

                  uniqueHosts.add(host);
                  scheduledCopies.add(loc);
                  locItr.remove()// remove from knownOutputs
                  numInFlight++; numScheduled++;

                  break; //we have a map from this host
                }
              }
            }
            scheduledCopies.notifyAll();
          }

          if (numScheduled > 0 || logNow) {
            LOG.info(reduceTask.getTaskID() + " Scheduled " + numScheduled +
                   " outputs (" + penaltyBox.size() +
                   " slow hosts and" + numDups + " dup hosts)");
          }

          if (penaltyBox.size() > 0 && logNow) {
            LOG.info("Penalized(slow) Hosts: ");
            for (String host : penaltyBox.keySet()) {
              LOG.info(host + " Will be considered after: " +
                  ((penaltyBox.get(host) - currentTime)/1000) + " seconds.");
            }
          }

          // if we have no copies in flight and we can't schedule anything
          // new, just wait for a bit
          try {
            if (numInFlight == 0 && numScheduled == 0) {
              // we should indicate progress as we don't want TT to think
              // we're stuck and kill us
              reporter.progress();
              Thread.sleep(5000);
            }
          } catch (InterruptedException e) { } // IGNORE
         
          while (numInFlight > 0 && mergeThrowable == null) {
            LOG.debug(reduceTask.getTaskID() + " numInFlight = " +
                      numInFlight);
            //the call to getCopyResult will either
            //1) return immediately with a null or a valid CopyResult object,
            //                 or
            //2) if the numInFlight is above maxInFlight, return with a
            //   CopyResult object after getting a notification from a
            //   fetcher thread,
            //So, when getCopyResult returns null, we can be sure that
            //we aren't busy enough and we should go and get more mapcompletion
            //events from the tasktracker
            CopyResult cr = getCopyResult(numInFlight);

            if (cr == null) {
              break;
            }
           
            if (cr.getSuccess()) {  // a successful copy
              numCopied++;
              lastProgressTime = System.currentTimeMillis();
              reduceShuffleBytes.increment(cr.getSize());
               
              long secsSinceStart =
                (System.currentTimeMillis()-startTime)/1000+1;
              float mbs = ((float)reduceShuffleBytes.getCounter())/(1024*1024);
              float transferRate = mbs/secsSinceStart;
               
              copyPhase.startNextPhase();
              copyPhase.setStatus("copy (" + numCopied + " of " + numMaps
                                  + " at " +
                                  mbpsFormat.format(transferRate) " MB/s)");
               
              // Note successful fetch for this mapId to invalidate
              // (possibly) old fetch-failures
View Full Code Here


      int            numInFlight = 0, numCopied = 0;
      int            lowThreshold = numCopiers*2;
      long           bytesTransferred = 0;
      DecimalFormat  mbpsFormat = new DecimalFormat("0.00");
      Random         backoff = new Random();
      final Progress copyPhase =
        reduceTask.getProgress().phase();
     
      //tweak the probe sample size (make it a function of numCopiers)
      probe_sample_size = Math.max(numCopiers*5, 50);
     
      for (int i = 0; i < numOutputs; i++) {
        neededOutputs.add(new Integer(i));
        copyPhase.addPhase();       // add sub-phase per file
      }
     
      copiers = new MapOutputCopier[numCopiers];
     
      // start all the copying threads
      for (int i=0; i < copiers.length; i++) {
        copiers[i] = new MapOutputCopier();
        copiers[i].start();
      }
     
      // start the clock for bandwidth measurement
      long startTime = System.currentTimeMillis();
      long currentTime = startTime;
      IntWritable fromEventId = new IntWritable(0);
     
      Thread copyProgress = createProgressThread(umbilical);
      copyProgress.start();
      try {
        // loop until we get all required outputs
        while (!neededOutputs.isEmpty() && mergeThrowable == null) {
         
          LOG.info(reduceTask.getTaskId() + " Need " + neededOutputs.size() +
          " map output(s)");
         
          try {
            // Put the hash entries for the failed fetches. Entries here
            // might be replaced by (mapId) hashkeys from new successful
            // Map executions, if the fetch failures were due to lost tasks.
            // The replacements, if at all, will happen when we query the
            // tasktracker and put the mapId hashkeys with new
            // MapOutputLocations as values
            knownOutputs.addAll(retryFetches);
            // The call getsMapCompletionEvents will modify fromEventId to a val
            // that it should be for the next call to getSuccessMapEvents
            List <MapOutputLocation> locs = getMapCompletionEvents(fromEventId);

            // put discovered them on the known list
            for (int i=0; i < locs.size(); i++) {
              knownOutputs.add(locs.get(i));
            }
            LOG.info(reduceTask.getTaskId() +
                    " Got " + locs.size() +
                    " new map outputs from tasktracker and " + retryFetches.size()
                    + " map outputs from previous failures");
            // clear the "failed" fetches hashmap
            retryFetches.clear();
          }
          catch (IOException ie) {
            LOG.warn(reduceTask.getTaskId() +
                    " Problem locating map outputs: " +
                    StringUtils.stringifyException(ie));
          }
         
          // now walk through the cache and schedule what we can
          int numKnown = knownOutputs.size(), numScheduled = 0;
          int numSlow = 0, numDups = 0;
         
          LOG.info(reduceTask.getTaskId() + " Got " + numKnown +
                   " known map output location(s); scheduling...");
         
          synchronized (scheduledCopies) {
            // Randomize the map output locations to prevent
            // all reduce-tasks swamping the same tasktracker
            Collections.shuffle(knownOutputs, this.random);
           
            Iterator locIt = knownOutputs.iterator();
           
            currentTime = System.currentTimeMillis();
            while (locIt.hasNext()) {
             
              MapOutputLocation loc = (MapOutputLocation)locIt.next();
              Long penaltyEnd = penaltyBox.get(loc.getHost());
              boolean penalized = false, duplicate = false;
             
              if (penaltyEnd != null && currentTime < penaltyEnd.longValue()) {
                penalized = true; numSlow++;
              }
              if (uniqueHosts.contains(loc.getHost())) {
                duplicate = true; numDups++;
              }
             
              if (!penalized && !duplicate) {
                uniqueHosts.add(loc.getHost());
                scheduledCopies.add(loc);
                locIt.remove()// remove from knownOutputs
                numInFlight++; numScheduled++;
              }
            }
            scheduledCopies.notifyAll();
          }
          LOG.info(reduceTask.getTaskId() + " Scheduled " + numScheduled +
                   " of " + numKnown + " known outputs (" + numSlow +
                   " slow hosts and " + numDups + " dup hosts)");
         
          // if we have no copies in flight and we can't schedule anything
          // new, just wait for a bit
          try {
            if (numInFlight == 0 && numScheduled == 0) {
              Thread.sleep(5000);
            }
          } catch (InterruptedException e) { } // IGNORE
         
          while (numInFlight > 0 && mergeThrowable == null) {
            LOG.debug(reduceTask.getTaskId() + " numInFlight = " +
                      numInFlight);
            CopyResult cr = getCopyResult();
           
            if (cr != null) {
              if (cr.getSuccess()) {  // a successful copy
                numCopied++;
                bytesTransferred += cr.getSize();
               
                long secsSinceStart =
                  (System.currentTimeMillis()-startTime)/1000+1;
                float mbs = ((float)bytesTransferred)/(1024*1024);
                float transferRate = mbs/secsSinceStart;
               
                copyPhase.startNextPhase();
                copyPhase.setStatus("copy (" + numCopied + " of " + numOutputs
                                    + " at " +
                                    mbpsFormat.format(transferRate) " MB/s)");         
              } else if (cr.isObsolete()) {
                //ignore
                LOG.info(reduceTask.getTaskId() +
View Full Code Here

      int            numInFlight = 0, numCopied = 0;
      int            lowThreshold = numCopiers*2;
      long           bytesTransferred = 0;
      DecimalFormat  mbpsFormat = new DecimalFormat("0.00");
      Random         backoff = new Random();
      final Progress copyPhase =
        reduceTask.getProgress().phase();
     
      //tweak the probe sample size (make it a function of numCopiers)
      probe_sample_size = Math.max(numCopiers*5, 50);
     
      for (int i = 0; i < numOutputs; i++) {
        neededOutputs.add(i);
        copyPhase.addPhase();       // add sub-phase per file
      }
     
      copiers = new MapOutputCopier[numCopiers];
     
      Reporter reporter = getReporter(umbilical);
      // start all the copying threads
      for (int i=0; i < copiers.length; i++) {
        copiers[i] = new MapOutputCopier(reporter);
        copiers[i].start();
      }
     
      // start the clock for bandwidth measurement
      long startTime = System.currentTimeMillis();
      long currentTime = startTime;
      IntWritable fromEventId = new IntWritable(0);
     
      try {
        // loop until we get all required outputs
        while (!neededOutputs.isEmpty() && mergeThrowable == null) {
         
          LOG.info(reduceTask.getTaskId() + " Need " + neededOutputs.size() +
          " map output(s)");
         
          try {
            // Put the hash entries for the failed fetches. Entries here
            // might be replaced by (mapId) hashkeys from new successful
            // Map executions, if the fetch failures were due to lost tasks.
            // The replacements, if at all, will happen when we query the
            // tasktracker and put the mapId hashkeys with new
            // MapOutputLocations as values
            knownOutputs.addAll(retryFetches);
            
            // The call getMapCompletionEvents will update fromEventId to
            // used for the next call to getMapCompletionEvents
            int currentNumKnownMaps = knownOutputs.size();
            int currentNumObsoleteMapIds = obsoleteMapIds.size();
            getMapCompletionEvents(fromEventId, knownOutputs);

           
            LOG.info(reduceTask.getTaskId() + ": "
                     "Got " + (knownOutputs.size()-currentNumKnownMaps) +
                     " new map-outputs & " +
                     (obsoleteMapIds.size()-currentNumObsoleteMapIds) +
                     " obsolete map-outputs from tasktracker and " +
                     retryFetches.size() + " map-outputs from previous failures"
                     );
           
            // clear the "failed" fetches hashmap
            retryFetches.clear();
          }
          catch (IOException ie) {
            LOG.warn(reduceTask.getTaskId() +
                    " Problem locating map outputs: " +
                    StringUtils.stringifyException(ie));
          }
         
          // now walk through the cache and schedule what we can
          int numKnown = knownOutputs.size(), numScheduled = 0;
          int numSlow = 0, numDups = 0;
         
          LOG.info(reduceTask.getTaskId() + " Got " + numKnown +
                   " known map output location(s); scheduling...");
         
          synchronized (scheduledCopies) {
            // Randomize the map output locations to prevent
            // all reduce-tasks swamping the same tasktracker
            Collections.shuffle(knownOutputs, this.random);
           
            Iterator locIt = knownOutputs.iterator();
           
            currentTime = System.currentTimeMillis();
            while (locIt.hasNext()) {
             
              MapOutputLocation loc = (MapOutputLocation)locIt.next();
             
              // Do not schedule fetches from OBSOLETE maps
              if (obsoleteMapIds.contains(loc.getMapTaskId())) {
                locIt.remove();
                continue;
              }
             
              Long penaltyEnd = penaltyBox.get(loc.getHost());
              boolean penalized = false, duplicate = false;
             
              if (penaltyEnd != null && currentTime < penaltyEnd.longValue()) {
                penalized = true; numSlow++;
              }
              if (uniqueHosts.contains(loc.getHost())) {
                duplicate = true; numDups++;
              }
             
              if (!penalized && !duplicate) {
                uniqueHosts.add(loc.getHost());
                scheduledCopies.add(loc);
                locIt.remove()// remove from knownOutputs
                numInFlight++; numScheduled++;
              }
            }
            scheduledCopies.notifyAll();
          }
          LOG.info(reduceTask.getTaskId() + " Scheduled " + numScheduled +
                   " of " + numKnown + " known outputs (" + numSlow +
                   " slow hosts and " + numDups + " dup hosts)");
         
          // if we have no copies in flight and we can't schedule anything
          // new, just wait for a bit
          try {
            if (numInFlight == 0 && numScheduled == 0) {
              // we should indicate progress as we don't want TT to think
              // we're stuck and kill us
              reporter.progress();
              Thread.sleep(5000);
            }
          } catch (InterruptedException e) { } // IGNORE
         
          while (numInFlight > 0 && mergeThrowable == null) {
            LOG.debug(reduceTask.getTaskId() + " numInFlight = " +
                      numInFlight);
            CopyResult cr = getCopyResult();
           
            if (cr != null) {
              if (cr.getSuccess()) {  // a successful copy
                numCopied++;
                bytesTransferred += cr.getSize();
               
                long secsSinceStart =
                  (System.currentTimeMillis()-startTime)/1000+1;
                float mbs = ((float)bytesTransferred)/(1024*1024);
                float transferRate = mbs/secsSinceStart;
               
                copyPhase.startNextPhase();
                copyPhase.setStatus("copy (" + numCopied + " of " + numOutputs
                                    + " at " +
                                    mbpsFormat.format(transferRate) " MB/s)");
               
                // Note successfull fetch for this mapId to invalidate
                // (possibly) old fetch-failures
View Full Code Here

      int            numInFlight = 0, numCopied = 0;
      int            lowThreshold = numCopiers*2;
      long           bytesTransferred = 0;
      DecimalFormat  mbpsFormat = new DecimalFormat("0.00");
      Random         backoff = new Random();
      final Progress copyPhase =
        reduceTask.getProgress().phase();
     
      //tweak the probe sample size (make it a function of numCopiers)
      probe_sample_size = Math.max(numCopiers*5, 50);
     
      for (int i = 0; i < numOutputs; i++) {
        neededOutputs.add(i);
        copyPhase.addPhase();       // add sub-phase per file
      }
     
      copiers = new MapOutputCopier[numCopiers];
     
      Reporter reporter = getReporter(umbilical);
      // start all the copying threads
      for (int i=0; i < copiers.length; i++) {
        copiers[i] = new MapOutputCopier(reporter);
        copiers[i].start();
      }
     
      // start the clock for bandwidth measurement
      long startTime = System.currentTimeMillis();
      long currentTime = startTime;
      IntWritable fromEventId = new IntWritable(0);
     
      try {
        // loop until we get all required outputs
        while (!neededOutputs.isEmpty() && mergeThrowable == null) {
         
          LOG.info(reduceTask.getTaskId() + " Need " + neededOutputs.size() +
          " map output(s)");
         
          try {
            // Put the hash entries for the failed fetches. Entries here
            // might be replaced by (mapId) hashkeys from new successful
            // Map executions, if the fetch failures were due to lost tasks.
            // The replacements, if at all, will happen when we query the
            // tasktracker and put the mapId hashkeys with new
            // MapOutputLocations as values
            knownOutputs.addAll(retryFetches);
            
            // The call getMapCompletionEvents will update fromEventId to
            // used for the next call to getMapCompletionEvents
            int currentNumKnownMaps = knownOutputs.size();
            int currentNumObsoleteMapIds = obsoleteMapIds.size();
            getMapCompletionEvents(fromEventId, knownOutputs);

           
            LOG.info(reduceTask.getTaskId() + ": "
                     "Got " + (knownOutputs.size()-currentNumKnownMaps) +
                     " new map-outputs & " +
                     (obsoleteMapIds.size()-currentNumObsoleteMapIds) +
                     " obsolete map-outputs from tasktracker and " +
                     retryFetches.size() + " map-outputs from previous failures"
                     );
           
            // clear the "failed" fetches hashmap
            retryFetches.clear();
          }
          catch (IOException ie) {
            LOG.warn(reduceTask.getTaskId() +
                    " Problem locating map outputs: " +
                    StringUtils.stringifyException(ie));
          }
         
          // now walk through the cache and schedule what we can
          int numKnown = knownOutputs.size(), numScheduled = 0;
          int numSlow = 0, numDups = 0;
         
          LOG.info(reduceTask.getTaskId() + " Got " + numKnown +
                   " known map output location(s); scheduling...");
         
          synchronized (scheduledCopies) {
            // Randomize the map output locations to prevent
            // all reduce-tasks swamping the same tasktracker
            Collections.shuffle(knownOutputs, this.random);
           
            Iterator locIt = knownOutputs.iterator();
           
            currentTime = System.currentTimeMillis();
            while (locIt.hasNext()) {
             
              MapOutputLocation loc = (MapOutputLocation)locIt.next();
             
              // Do not schedule fetches from OBSOLETE maps
              if (obsoleteMapIds.contains(loc.getMapTaskId())) {
                locIt.remove();
                continue;
              }
             
              Long penaltyEnd = penaltyBox.get(loc.getHost());
              boolean penalized = false, duplicate = false;
             
              if (penaltyEnd != null && currentTime < penaltyEnd.longValue()) {
                penalized = true; numSlow++;
              }
              if (uniqueHosts.contains(loc.getHost())) {
                duplicate = true; numDups++;
              }
             
              if (!penalized && !duplicate) {
                uniqueHosts.add(loc.getHost());
                scheduledCopies.add(loc);
                locIt.remove()// remove from knownOutputs
                numInFlight++; numScheduled++;
              }
            }
            scheduledCopies.notifyAll();
          }
          LOG.info(reduceTask.getTaskId() + " Scheduled " + numScheduled +
                   " of " + numKnown + " known outputs (" + numSlow +
                   " slow hosts and " + numDups + " dup hosts)");
         
          // if we have no copies in flight and we can't schedule anything
          // new, just wait for a bit
          try {
            if (numInFlight == 0 && numScheduled == 0) {
              // we should indicate progress as we don't want TT to think
              // we're stuck and kill us
              reporter.progress();
              Thread.sleep(5000);
            }
          } catch (InterruptedException e) { } // IGNORE
         
          while (numInFlight > 0 && mergeThrowable == null) {
            LOG.debug(reduceTask.getTaskId() + " numInFlight = " +
                      numInFlight);
            CopyResult cr = getCopyResult();
           
            if (cr != null) {
              if (cr.getSuccess()) {  // a successful copy
                numCopied++;
                bytesTransferred += cr.getSize();
               
                long secsSinceStart =
                  (System.currentTimeMillis()-startTime)/1000+1;
                float mbs = ((float)bytesTransferred)/(1024*1024);
                float transferRate = mbs/secsSinceStart;
               
                copyPhase.startNextPhase();
                copyPhase.setStatus("copy (" + numCopied + " of " + numOutputs
                                    + " at " +
                                    mbpsFormat.format(transferRate) " MB/s)");
               
                // Note successfull fetch for this mapId to invalidate
                // (possibly) old fetch-failures
View Full Code Here

   
    public boolean fetchOutputs() throws IOException {
      int totalFailures = 0;
      int            numInFlight = 0, numCopied = 0;
      DecimalFormat  mbpsFormat = new DecimalFormat("0.00");
      final Progress copyPhase =
        reduceTask.getProgress().phase();
      LocalFSMerger localFSMergerThread = null;
      InMemFSMergeThread inMemFSMergeThread = null;
      GetMapEventsThread getMapEventsThread = null;
     
      for (int i = 0; i < numMaps; i++) {
        copyPhase.addPhase();       // add sub-phase per file
      }
     
      copiers = new ArrayList<MapOutputCopier>(numCopiers);
     
      // start all the copying threads
      for (int i=0; i < numCopiers; i++) {
        MapOutputCopier copier = new MapOutputCopier(conf, reporter);
        copiers.add(copier);
        copier.start();
      }
     
      //start the on-disk-merge thread
      localFSMergerThread = new LocalFSMerger((LocalFileSystem)localFileSys);
      //start the in memory merger thread
      inMemFSMergeThread = new InMemFSMergeThread();
      localFSMergerThread.start();
      inMemFSMergeThread.start();
     
      // start the map events thread
      getMapEventsThread = new GetMapEventsThread();
      getMapEventsThread.start();
     
      // start the clock for bandwidth measurement
      long startTime = System.currentTimeMillis();
      long currentTime = startTime;
      long lastProgressTime = startTime;
      long lastOutputTime = 0;
     
        // loop until we get all required outputs
        while (copiedMapOutputs.size() < numMaps && mergeThrowable == null) {
         
          currentTime = System.currentTimeMillis();
          boolean logNow = false;
          if (currentTime - lastOutputTime > MIN_LOG_TIME) {
            lastOutputTime = currentTime;
            logNow = true;
          }
          if (logNow) {
            LOG.info(reduceTask.getTaskID() + " Need another "
                   + (numMaps - copiedMapOutputs.size()) + " map output(s) "
                   + "where " + numInFlight + " is already in progress");
          }

          // Put the hash entries for the failed fetches.
          Iterator<MapOutputLocation> locItr = retryFetches.iterator();

          while (locItr.hasNext()) {
            MapOutputLocation loc = locItr.next();
            List<MapOutputLocation> locList =
              mapLocations.get(loc.getHost());
           
            // Check if the list exists. Map output location mapping is cleared
            // once the jobtracker restarts and is rebuilt from scratch.
            // Note that map-output-location mapping will be recreated and hence
            // we continue with the hope that we might find some locations
            // from the rebuild map.
            if (locList != null) {
              // Add to the beginning of the list so that this map is
              //tried again before the others and we can hasten the
              //re-execution of this map should there be a problem
              locList.add(0, loc);
            }
          }

          if (retryFetches.size() > 0) {
            LOG.info(reduceTask.getTaskID() + ": "
                  "Got " + retryFetches.size() +
                  " map-outputs from previous failures");
          }
          // clear the "failed" fetches hashmap
          retryFetches.clear();

          // now walk through the cache and schedule what we can
          int numScheduled = 0;
          int numDups = 0;
         
          synchronized (scheduledCopies) {
 
            // Randomize the map output locations to prevent
            // all reduce-tasks swamping the same tasktracker
            List<String> hostList = new ArrayList<String>();
            hostList.addAll(mapLocations.keySet());
           
            Collections.shuffle(hostList, this.random);
             
            Iterator<String> hostsItr = hostList.iterator();

            while (hostsItr.hasNext()) {
           
              String host = hostsItr.next();

              List<MapOutputLocation> knownOutputsByLoc =
                mapLocations.get(host);

              // Check if the list exists. Map output location mapping is
              // cleared once the jobtracker restarts and is rebuilt from
              // scratch.
              // Note that map-output-location mapping will be recreated and
              // hence we continue with the hope that we might find some
              // locations from the rebuild map and add then for fetching.
              if (knownOutputsByLoc == null || knownOutputsByLoc.size() == 0) {
                continue;
              }
             
              //Identify duplicate hosts here
              if (uniqueHosts.contains(host)) {
                 numDups += knownOutputsByLoc.size();
                 continue;
              }

              Long penaltyEnd = penaltyBox.get(host);
              boolean penalized = false;
           
              if (penaltyEnd != null) {
                if (currentTime < penaltyEnd.longValue()) {
                  penalized = true;
                } else {
                  penaltyBox.remove(host);
                }
              }
             
              if (penalized)
                continue;

              synchronized (knownOutputsByLoc) {
             
                locItr = knownOutputsByLoc.iterator();
           
                while (locItr.hasNext()) {
             
                  MapOutputLocation loc = locItr.next();
             
                  // Do not schedule fetches from OBSOLETE maps
                  if (obsoleteMapIds.contains(loc.getTaskAttemptId())) {
                    locItr.remove();
                    continue;
                  }

                  uniqueHosts.add(host);
                  scheduledCopies.add(loc);
                  locItr.remove()// remove from knownOutputs
                  numInFlight++; numScheduled++;

                  break; //we have a map from this host
                }
              }
            }
            scheduledCopies.notifyAll();
          }

          if (numScheduled > 0 || logNow) {
            LOG.info(reduceTask.getTaskID() + " Scheduled " + numScheduled +
                   " outputs (" + penaltyBox.size() +
                   " slow hosts and" + numDups + " dup hosts)");
          }

          if (penaltyBox.size() > 0 && logNow) {
            LOG.info("Penalized(slow) Hosts: ");
            for (String host : penaltyBox.keySet()) {
              LOG.info(host + " Will be considered after: " +
                  ((penaltyBox.get(host) - currentTime)/1000) + " seconds.");
            }
          }

          // if we have no copies in flight and we can't schedule anything
          // new, just wait for a bit
          try {
            if (numInFlight == 0 && numScheduled == 0) {
              // we should indicate progress as we don't want TT to think
              // we're stuck and kill us
              reporter.progress();
              Thread.sleep(5000);
            }
          } catch (InterruptedException e) { } // IGNORE
         
          while (numInFlight > 0 && mergeThrowable == null) {
            LOG.debug(reduceTask.getTaskID() + " numInFlight = " +
                      numInFlight);
            //the call to getCopyResult will either
            //1) return immediately with a null or a valid CopyResult object,
            //                 or
            //2) if the numInFlight is above maxInFlight, return with a
            //   CopyResult object after getting a notification from a
            //   fetcher thread,
            //So, when getCopyResult returns null, we can be sure that
            //we aren't busy enough and we should go and get more mapcompletion
            //events from the tasktracker
            CopyResult cr = getCopyResult(numInFlight);

            if (cr == null) {
              break;
            }
           
            if (cr.getSuccess()) {  // a successful copy
              numCopied++;
              lastProgressTime = System.currentTimeMillis();
              reduceShuffleBytes.increment(cr.getSize());
               
              long secsSinceStart =
                (System.currentTimeMillis()-startTime)/1000+1;
              float mbs = ((float)reduceShuffleBytes.getCounter())/(1024*1024);
              float transferRate = mbs/secsSinceStart;
               
              copyPhase.startNextPhase();
              copyPhase.setStatus("copy (" + numCopied + " of " + numMaps
                                  + " at " +
                                  mbpsFormat.format(transferRate) " MB/s)");
               
              // Note successful fetch for this mapId to invalidate
              // (possibly) old fetch-failures
View Full Code Here

      int totalFailures = 0;
      int            numInFlight = 0, numCopied = 0;
      int            lowThreshold = numCopiers*2;
      long           bytesTransferred = 0;
      DecimalFormat  mbpsFormat = new DecimalFormat("0.00");
      final Progress copyPhase =
        reduceTask.getProgress().phase();
     
      //tweak the probe sample size (make it a function of numCopiers)
      probe_sample_size = Math.max(numCopiers*5, 50);
     
      for (int i = 0; i < numOutputs; i++) {
        neededOutputs.add(i);
        copyPhase.addPhase();       // add sub-phase per file
      }
     
      copiers = new MapOutputCopier[numCopiers];
     
      Reporter reporter = getReporter(umbilical);
      // start all the copying threads
      for (int i=0; i < copiers.length; i++) {
        copiers[i] = new MapOutputCopier(reporter);
        copiers[i].start();
      }
     
      // start the clock for bandwidth measurement
      long startTime = System.currentTimeMillis();
      long currentTime = startTime;
      long lastProgressTime = System.currentTimeMillis();
      IntWritable fromEventId = new IntWritable(0);
     
      try {
        // loop until we get all required outputs
        while (!neededOutputs.isEmpty() && mergeThrowable == null) {
         
          LOG.info(reduceTask.getTaskId() + " Need " + neededOutputs.size() +
          " map output(s)");
         
          try {
            // Put the hash entries for the failed fetches. Entries here
            // might be replaced by (mapId) hashkeys from new successful
            // Map executions, if the fetch failures were due to lost tasks.
            // The replacements, if at all, will happen when we query the
            // tasktracker and put the mapId hashkeys with new
            // MapOutputLocations as values
            knownOutputs.addAll(retryFetches);
            
            // ensure we have enough to keep us busy
            boolean busy = isBusy(numInFlight, numCopiers, lowThreshold,
                                  uniqueHosts.size(), probe_sample_size,
                                  numOutputs - numCopied);
            if (!busy) {
              // The call getMapCompletionEvents will update fromEventId to
              // used for the next call to getMapCompletionEvents
              int currentNumKnownMaps = knownOutputs.size();
              int currentNumObsoleteMapIds = obsoleteMapIds.size();
              getMapCompletionEvents(fromEventId, knownOutputs);

           
              LOG.info(reduceTask.getTaskId() + ": "
                     "Got " + (knownOutputs.size()-currentNumKnownMaps) +
                     " new map-outputs & " +
                     (obsoleteMapIds.size()-currentNumObsoleteMapIds) +
                     " obsolete map-outputs from tasktracker and " +
                     retryFetches.size() + " map-outputs from previous failures"
                     );
            } else {
              LOG.info(" Busy enough - did not query the tasktracker for "
                       + "new map outputs. Have "+ retryFetches.size()
                       + " map outputs from previous failures");
            }
            // clear the "failed" fetches hashmap
            retryFetches.clear();
          }
          catch (IOException ie) {
            LOG.warn(reduceTask.getTaskId() +
                    " Problem locating map outputs: " +
                    StringUtils.stringifyException(ie));
          }
         
          // now walk through the cache and schedule what we can
          int numKnown = knownOutputs.size(), numScheduled = 0;
          int numSlow = 0, numDups = 0;
         
          LOG.info(reduceTask.getTaskId() + " Got " + numKnown +
                   " known map output location(s); scheduling...");
         
          synchronized (scheduledCopies) {
            // Randomize the map output locations to prevent
            // all reduce-tasks swamping the same tasktracker
            Collections.shuffle(knownOutputs, this.random);
           
            Iterator locIt = knownOutputs.iterator();
           
            currentTime = System.currentTimeMillis();
            while (locIt.hasNext()) {
             
              MapOutputLocation loc = (MapOutputLocation)locIt.next();
             
              // Do not schedule fetches from OBSOLETE maps
              if (obsoleteMapIds.contains(loc.getMapTaskId())) {
                locIt.remove();
                continue;
              }
             
              Long penaltyEnd = penaltyBox.get(loc.getHost());
              boolean penalized = false, duplicate = false;
             
              if (penaltyEnd != null && currentTime < penaltyEnd.longValue()) {
                penalized = true; numSlow++;
              }
              if (uniqueHosts.contains(loc.getHost())) {
                duplicate = true; numDups++;
              }
             
              if (!penalized && !duplicate) {
                uniqueHosts.add(loc.getHost());
                scheduledCopies.add(loc);
                locIt.remove()// remove from knownOutputs
                numInFlight++; numScheduled++;
              }
            }
            scheduledCopies.notifyAll();
          }
          LOG.info(reduceTask.getTaskId() + " Scheduled " + numScheduled +
                   " of " + numKnown + " known outputs (" + numSlow +
                   " slow hosts and " + numDups + " dup hosts)");
         
          // if we have no copies in flight and we can't schedule anything
          // new, just wait for a bit
          try {
            if (numInFlight == 0 && numScheduled == 0) {
              // we should indicate progress as we don't want TT to think
              // we're stuck and kill us
              reporter.progress();
              Thread.sleep(5000);
            }
          } catch (InterruptedException e) { } // IGNORE
         
          while (numInFlight > 0 && mergeThrowable == null) {
            LOG.debug(reduceTask.getTaskId() + " numInFlight = " +
                      numInFlight);
            CopyResult cr = getCopyResult();
           
            if (cr != null) {
              if (cr.getSuccess()) {  // a successful copy
                numCopied++;
                lastProgressTime = System.currentTimeMillis();
                bytesTransferred += cr.getSize();
               
                long secsSinceStart =
                  (System.currentTimeMillis()-startTime)/1000+1;
                float mbs = ((float)bytesTransferred)/(1024*1024);
                float transferRate = mbs/secsSinceStart;
               
                copyPhase.startNextPhase();
                copyPhase.setStatus("copy (" + numCopied + " of " + numOutputs
                                    + " at " +
                                    mbpsFormat.format(transferRate) " MB/s)");
               
                // Note successfull fetch for this mapId to invalidate
                // (possibly) old fetch-failures
View Full Code Here

    public boolean fetchOutputs() throws IOException {
      int totalFailures = 0;
      int numInFlight = 0, numCopied = 0;
      DecimalFormat  mbpsFormat = new DecimalFormat("0.00");
      final Progress copyPhase =
        reduceTask.getProgress().phase();
      LocalFSMerger localFSMergerThread = null;
      InMemFSMergeThread inMemFSMergeThread = null;
      GetMapEventsThread getMapEventsThread = null;

      for (int i = 0; i < numMaps; i++) {
        copyPhase.addPhase();       // add sub-phase per file
      }

      copiers = new ArrayList<MapOutputCopier>(numCopiers);

      // start all the copying threads
      for (int i=0; i < numCopiers; i++) {
        MapOutputCopier copier = new MapOutputCopier(conf, reporter);
        copiers.add(copier);
        copier.start();
      }

      //start the on-disk-merge thread
      localFSMergerThread = new LocalFSMerger((LocalFileSystem)localFileSys);
      //start the in memory merger thread
      inMemFSMergeThread = new InMemFSMergeThread();
      localFSMergerThread.start();
      inMemFSMergeThread.start();

      // start the map events thread
      getMapEventsThread = new GetMapEventsThread();
      getMapEventsThread.start();

      // start the clock for bandwidth measurement
      long startTime = System.currentTimeMillis();
      long currentTime = startTime;
      long lastProgressTime = startTime;
      long lastOutputTime = 0;

        // loop until we get all required outputs
        while (getNumMapsCopyCompleted() < numMaps && mergeThrowable == null) {

          currentTime = System.currentTimeMillis();
          boolean logNow = false;
          if (currentTime - lastOutputTime > MIN_LOG_TIME) {
            lastOutputTime = currentTime;
            logNow = true;
          }
          if (logNow) {
            LOG.info(reduceTask.getTaskID() + " Need another "
                   + (numMaps - getNumMapsCopyCompleted()) + " map output(s) "
                   + "where " + numInFlight + " is already in progress");
          }

          // Put the hash entries for the failed fetches.
          Iterator<MapOutputLocation> locItr = retryFetches.iterator();

          while (locItr.hasNext()) {
            MapOutputLocation loc = locItr.next();
            List<MapOutputLocation> locList =
              mapLocations.get(loc.getHost());

            // Check if the list exists. Map output location mapping is cleared
            // once the jobtracker restarts and is rebuilt from scratch.
            // Note that map-output-location mapping will be recreated and hence
            // we continue with the hope that we might find some locations
            // from the rebuild map.
            if (locList != null) {
              // Add to the beginning of the list so that this map is
              //tried again before the others and we can hasten the
              //re-execution of this map should there be a problem
              locList.add(0, loc);
            }
          }

          if (retryFetches.size() > 0) {
            LOG.info(reduceTask.getTaskID() + ": " +
                  "Got " + retryFetches.size() +
                  " map-outputs from previous failures");
          }
          // clear the "failed" fetches hashmap
          retryFetches.clear();

          // now walk through the cache and schedule what we can
          int numScheduled = 0;
          int numDups = 0;

          synchronized (scheduledCopies) {

            // Randomize the map output locations to prevent
            // all reduce-tasks swamping the same tasktracker
            List<String> hostList = new ArrayList<String>();
            hostList.addAll(mapLocations.keySet());

            Collections.shuffle(hostList, this.random);

            Iterator<String> hostsItr = hostList.iterator();

            while (hostsItr.hasNext()) {

              String host = hostsItr.next();

              List<MapOutputLocation> knownOutputsByLoc =
                mapLocations.get(host);

              // Check if the list exists. Map output location mapping is
              // cleared once the jobtracker restarts and is rebuilt from
              // scratch.
              // Note that map-output-location mapping will be recreated and
              // hence we continue with the hope that we might find some
              // locations from the rebuild map and add then for fetching.
              if (knownOutputsByLoc == null || knownOutputsByLoc.size() == 0) {
                continue;
              }

              //Identify duplicate hosts here
              if (uniqueHosts.contains(host)) {
                 numDups += knownOutputsByLoc.size();
                 continue;
              }

              Long penaltyEnd = penaltyBox.get(host);
              boolean penalized = false;

              if (penaltyEnd != null) {
                if (currentTime < penaltyEnd.longValue()) {
                  penalized = true;
                } else {
                  penaltyBox.remove(host);
                }
              }

              if (penalized)
                continue;

              synchronized (knownOutputsByLoc) {

                locItr = knownOutputsByLoc.iterator();

                while (locItr.hasNext()) {

                  MapOutputLocation loc = locItr.next();

                  // Do not schedule fetches from OBSOLETE maps
                  if (obsoleteMapIds.contains(loc.getTaskAttemptId())) {
                    locItr.remove();
                    continue;
                  }

                  uniqueHosts.add(host);
                  scheduledCopies.add(loc);
                  locItr.remove()// remove from knownOutputs
                  numInFlight++; numScheduled++;

                  //
                  // Comment out this break allows fetching all the shards at
                  // once from a host, instead of fetching one at a time.
                  // See MAPREDUCE-318.
                  //
                  // break; //we have a map from this host
                  //
                }
              }
            }
            scheduledCopies.notifyAll();
          }

          if (numScheduled > 0 || logNow) {
            LOG.info(reduceTask.getTaskID() + " Scheduled " + numScheduled +
                   " outputs (" + penaltyBox.size() +
                   " slow hosts and " + numDups + " dup hosts)");
          }

          if (penaltyBox.size() > 0 && logNow) {
            LOG.info("Penalized(slow) Hosts: ");
            for (String host : penaltyBox.keySet()) {
              LOG.info(host + " Will be considered after: " +
                  ((penaltyBox.get(host) - currentTime)/1000) + " seconds.");
            }
          }

          // if we have no copies in flight and we can't schedule anything
          // new, just wait for a bit
          try {
            if (numInFlight == 0 && numScheduled == 0) {
              // we should indicate progress as we don't want TT to think
              // we're stuck and kill us
              reporter.progress();
              synchronized (mapLocations) {
                mapLocations.wait(5000);
              }
            }
          } catch (InterruptedException e) { } // IGNORE

          while (numInFlight > 0 && mergeThrowable == null) {
            LOG.debug(reduceTask.getTaskID() + " numInFlight = " +
                      numInFlight);
            //the call to getCopyResult will either
            //1) return immediately with a null or a valid CopyResult object,
            //                 or
            //2) if the numInFlight is above maxInFlight, return with a
            //   CopyResult object after getting a notification from a
            //   fetcher thread,
            //So, when getCopyResult returns null, we can be sure that
            //we aren't busy enough and we should go and get more mapcompletion
            //events from the tasktracker
            CopyResult cr = getCopyResult(numInFlight);

            if (cr == null) {
              break;
            }

            if (cr.getSuccess()) {  // a successful copy
              numCopied++;
              lastProgressTime = System.currentTimeMillis();
              reduceShuffleBytes.increment(cr.getSize());

              long secsSinceStart =
                (System.currentTimeMillis()-startTime)/1000+1;
              float mbs = ((float)reduceShuffleBytes.getCounter())/(1024*1024);
              float transferRate = mbs/secsSinceStart;

              copyPhase.startNextPhase();
              copyPhase.setStatus("copy (" + numCopied + " of " + numMaps
                                  + " at " +
                                  mbpsFormat.format(transferRate) " MB/s)");

              // Note successful fetch for this mapId to invalidate
              // (possibly) old fetch-failures
View Full Code Here

        new HashMap<String, List<MapOutputLocation>>();
      int totalFailures = 0;
      int            numInFlight = 0, numCopied = 0;
      long           bytesTransferred = 0;
      DecimalFormat  mbpsFormat = new DecimalFormat("0.00");
      final Progress copyPhase =
        reduceTask.getProgress().phase();
      LocalFSMerger localFSMergerThread = null;
      InMemFSMergeThread inMemFSMergeThread = null;
     
      for (int i = 0; i < numMaps; i++) {
        copyPhase.addPhase();       // add sub-phase per file
      }
     
      copiers = new ArrayList<MapOutputCopier>(numCopiers);
     
      Reporter reporter = getReporter(umbilical);

      // start all the copying threads
      for (int i=0; i < numCopiers; i++) {
        MapOutputCopier copier = new MapOutputCopier(conf, reporter);
        copiers.add(copier);
        copier.start();
      }
     
      //start the on-disk-merge thread
      localFSMergerThread = new LocalFSMerger((LocalFileSystem)localFileSys);
      //start the in memory merger thread
      inMemFSMergeThread = new InMemFSMergeThread();
      localFSMergerThread.start();
      inMemFSMergeThread.start();
     
      // start the clock for bandwidth measurement
      long startTime = System.currentTimeMillis();
      long currentTime = startTime;
      long lastProgressTime = startTime;
      long lastOutputTime = 0;
      IntWritable fromEventId = new IntWritable(0);

      //List of unique hosts containing map outputs
      List<String> hostList = new ArrayList<String>();
     
        // loop until we get all required outputs
        while (copiedMapOutputs.size() < numMaps && mergeThrowable == null) {
         
          currentTime = System.currentTimeMillis();
          boolean logNow = false;
          if (currentTime - lastOutputTime > MIN_LOG_TIME) {
            lastOutputTime = currentTime;
            logNow = true;
          }
          if (logNow) {
            LOG.info(reduceTask.getTaskID() + " Need another "
                   + (numMaps - copiedMapOutputs.size()) + " map output(s) "
                   + "where " + numInFlight + " is already in progress");
          }
         
          try {
            // Put the hash entries for the failed fetches.
            Iterator<MapOutputLocation> locItr = retryFetches.iterator();
            while (locItr.hasNext()) {
              MapOutputLocation loc = locItr.next();
              List<MapOutputLocation> locList =
                mapLocations.get(loc.getHost());
              if (locList == null) {
                locList = new LinkedList<MapOutputLocation>();
                mapLocations.put(loc.getHost(), locList);
                hostList.add(loc.getHost());
              }
              //Add to the beginning of the list so that this map is
              //tried again before the others and we can hasten the
              //re-execution of this map should there be a problem
              locList.add(0, loc);
            }
            
            // The call getMapCompletionEvents will update fromEventId to
            // used for the next call to getMapCompletionEvents

            int currentNumObsoleteMapIds = obsoleteMapIds.size();

            int numNewOutputs = getMapCompletionEvents(fromEventId,
                                                       mapLocations,
                                                       hostList);

            if (numNewOutputs > 0 || logNow) {
              LOG.info(reduceTask.getTaskID() + ": "
                  "Got " + numNewOutputs +
                  " new map-outputs");
            }
           
            int numNewObsoleteMaps = obsoleteMapIds.size()-currentNumObsoleteMapIds;

            if (numNewObsoleteMaps > 0) {
              LOG.info(reduceTask.getTaskID() + ": "
                  "Got " + numNewObsoleteMaps +
                  " obsolete map-outputs from tasktracker ")
            }
           
            if (retryFetches.size() > 0) {
              LOG.info(reduceTask.getTaskID() + ": "
                  "Got " + retryFetches.size() +
                  " map-outputs from previous failures");
            }
            // clear the "failed" fetches hashmap
            retryFetches.clear();
          }
          catch (IOException ie) {
            LOG.warn(reduceTask.getTaskID() +
                    " Problem locating map outputs: " +
                    StringUtils.stringifyException(ie));
          }
         
          // now walk through the cache and schedule what we can
          int numScheduled = 0;
          int numDups = 0;
         
          synchronized (scheduledCopies) {

            // Randomize the map output locations to prevent
            // all reduce-tasks swamping the same tasktracker
            Collections.shuffle(hostList, this.random);
           
            Iterator<String> hostsItr = hostList.iterator();
            while (hostsItr.hasNext()) {
           
              String host = hostsItr.next();

              List<MapOutputLocation> knownOutputsByLoc =
                mapLocations.get(host);

              //Identify duplicate hosts here
              if (uniqueHosts.contains(host)) {
                 numDups += knownOutputsByLoc.size() -1;
                 continue;
              }

              Long penaltyEnd = penaltyBox.get(host);
              boolean penalized = false;
           
              if (penaltyEnd != null) {
                if (currentTime < penaltyEnd.longValue()) {
                  penalized = true;
                } else {
                  penaltyBox.remove(host);
                }
              }
             
              if (penalized)
                continue;

              Iterator<MapOutputLocation> locItr =
                knownOutputsByLoc.iterator();
           
              while (locItr.hasNext()) {
             
                MapOutputLocation loc = locItr.next();
             
                // Do not schedule fetches from OBSOLETE maps
                if (obsoleteMapIds.contains(loc.getTaskAttemptId())) {
                  locItr.remove();
                  continue;
                }

                uniqueHosts.add(host);
                scheduledCopies.add(loc);
                locItr.remove()// remove from knownOutputs
                numInFlight++; numScheduled++;

                break; //we have a map from this host
              }
    
              if (knownOutputsByLoc.size() == 0) {
                mapLocations.remove(host);
                hostsItr.remove();
              }
            }
            scheduledCopies.notifyAll();
          }
          if (numScheduled > 0 || logNow) {
            LOG.info(reduceTask.getTaskID() + " Scheduled " + numScheduled +
                   " outputs (" + penaltyBox.size() +
                   " slow hosts and" + numDups + " dup hosts)");
          }

          if (penaltyBox.size() > 0 && logNow) {
            LOG.info("Penalized(slow) Hosts: ");
            for (String host : penaltyBox.keySet()) {
              LOG.info(host + " Will be considered after: " +
                  ((penaltyBox.get(host) - currentTime)/1000) + " seconds.");
            }
          }
         
          // if we have no copies in flight and we can't schedule anything
          // new, just wait for a bit
          try {
            if (numInFlight == 0 && numScheduled == 0) {
              // we should indicate progress as we don't want TT to think
              // we're stuck and kill us
              reporter.progress();
              Thread.sleep(5000);
            }
          } catch (InterruptedException e) { } // IGNORE
         
          while (numInFlight > 0 && mergeThrowable == null) {
            LOG.debug(reduceTask.getTaskID() + " numInFlight = " +
                      numInFlight);
            //the call to getCopyResult will either
            //1) return immediately with a null or a valid CopyResult object,
            //                 or
            //2) if the numInFlight is above maxInFlight, return with a
            //   CopyResult object after getting a notification from a
            //   fetcher thread,
            //So, when getCopyResult returns null, we can be sure that
            //we aren't busy enough and we should go and get more mapcompletion
            //events from the tasktracker
            CopyResult cr = getCopyResult(numInFlight);

            if (cr == null) {
              break;
            }
           
            if (cr.getSuccess()) {  // a successful copy
              numCopied++;
              lastProgressTime = System.currentTimeMillis();
              bytesTransferred += cr.getSize();
               
              long secsSinceStart =
                (System.currentTimeMillis()-startTime)/1000+1;
              float mbs = ((float)bytesTransferred)/(1024*1024);
              float transferRate = mbs/secsSinceStart;
               
              copyPhase.startNextPhase();
              copyPhase.setStatus("copy (" + numCopied + " of " + numMaps
                                  + " at " +
                                  mbpsFormat.format(transferRate) " MB/s)");
               
              // Note successfull fetch for this mapId to invalidate
              // (possibly) old fetch-failures
View Full Code Here

   
    LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LOCAL_DIR);
   
    MergeManagerImpl<Text, Text> mergeManager = new MergeManagerImpl<Text, Text>(
        reduceId, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null,
        null, null, new Progress(), new MROutputFiles());
   
    // write map outputs
    Map<String, String> map1 = new TreeMap<String, String>();
    map1.put("apple", "disgusting");
    map1.put("carrot", "delicious");
View Full Code Here

    Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass();
    Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass();
    RawComparator<Text> comparator = jobConf.getOutputKeyComparator();
    Counter readsCounter = new Counter();
    Counter writesCounter = new Counter();
    Progress mergePhase = new Progress();
    RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass,
        valueClass, segments, 2, tmpDir, comparator, getReporter(),
        readsCounter, writesCounter, mergePhase);
    Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), 0.0f);
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.util.Progress

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.