Package org.apache.hadoop.util

Examples of org.apache.hadoop.util.Progress


    }
    public DataInputBuffer getKey() throws IOException { return iter.getKey(); }
    public DataInputBuffer getValue() throws IOException { return iter.getValue(); }
    public void close() throws IOException { }
    public Progress getProgress() {
      return new Progress();
    }
View Full Code Here


    public void close() throws IOException {
    }

    public Progress getProgress() {
      // TODO
      return new Progress();
    }
View Full Code Here

      int            numInFlight = 0, numCopied = 0;
      int            lowThreshold = numCopiers*2;
      long           bytesTransferred = 0;
      DecimalFormat  mbpsFormat = new DecimalFormat("0.00");
      Random         backoff = new Random();
      final Progress copyPhase =
        reduceTask.getProgress().phase();
     
      //tweak the probe sample size (make it a function of numCopiers)
      probe_sample_size = Math.max(numCopiers*5, 50);
     
      for (int i = 0; i < numOutputs; i++) {
        neededOutputs.add(i);
        copyPhase.addPhase();       // add sub-phase per file
      }
     
      copiers = new MapOutputCopier[numCopiers];
     
      Reporter reporter = getReporter(umbilical);
      // start all the copying threads
      for (int i=0; i < copiers.length; i++) {
        copiers[i] = new MapOutputCopier(reporter);
        copiers[i].start();
      }
     
      // start the clock for bandwidth measurement
      long startTime = System.currentTimeMillis();
      long currentTime = startTime;
      IntWritable fromEventId = new IntWritable(0);
     
      try {
        // loop until we get all required outputs
        while (!neededOutputs.isEmpty() && mergeThrowable == null) {
         
          LOG.info(reduceTask.getTaskId() + " Need " + neededOutputs.size() +
          " map output(s)");
         
          try {
            // Put the hash entries for the failed fetches. Entries here
            // might be replaced by (mapId) hashkeys from new successful
            // Map executions, if the fetch failures were due to lost tasks.
            // The replacements, if at all, will happen when we query the
            // tasktracker and put the mapId hashkeys with new
            // MapOutputLocations as values
            knownOutputs.addAll(retryFetches);
           
            // The call getMapCompletionEvents will update fromEventId to
            // used for the next call to getMapCompletionEvents
            int currentNumKnownMaps = knownOutputs.size();
            int currentNumObsoleteMapIds = obsoleteMapIds.size();
            getMapCompletionEvents(fromEventId, knownOutputs);

            LOG.info(reduceTask.getTaskId() + ": "
                     "Got " + (knownOutputs.size()-currentNumKnownMaps) +
                     " new map-outputs & " +
                     (obsoleteMapIds.size()-currentNumObsoleteMapIds) +
                     " obsolete map-outputs from tasktracker and " +
                     retryFetches.size() + " map-outputs from previous failures"
                    );

            // clear the "failed" fetches hashmap
            retryFetches.clear();
          }
          catch (IOException ie) {
            LOG.warn(reduceTask.getTaskId() +
                    " Problem locating map outputs: " +
                    StringUtils.stringifyException(ie));
          }
         
          // now walk through the cache and schedule what we can
          int numKnown = knownOutputs.size(), numScheduled = 0;
          int numSlow = 0, numDups = 0;
         
          LOG.info(reduceTask.getTaskId() + " Got " + numKnown +
                   " known map output location(s); scheduling...");
         
          synchronized (scheduledCopies) {
            // Randomize the map output locations to prevent
            // all reduce-tasks swamping the same tasktracker
            Collections.shuffle(knownOutputs, this.random);
           
            Iterator locIt = knownOutputs.iterator();
           
            currentTime = System.currentTimeMillis();
            while (locIt.hasNext()) {
             
              MapOutputLocation loc = (MapOutputLocation)locIt.next();
             
              // Do not schedule fetches from OBSOLETE maps
              if (obsoleteMapIds.contains(loc.getMapTaskId())) {
                locIt.remove();
                continue;
              }

              Long penaltyEnd = penaltyBox.get(loc.getHost());
              boolean penalized = false, duplicate = false;
             
              if (penaltyEnd != null && currentTime < penaltyEnd.longValue()) {
                penalized = true; numSlow++;
              }
              if (uniqueHosts.contains(loc.getHost())) {
                duplicate = true; numDups++;
              }
             
              if (!penalized && !duplicate) {
                uniqueHosts.add(loc.getHost());
                scheduledCopies.add(loc);
                locIt.remove()// remove from knownOutputs
                numInFlight++; numScheduled++;
              }
            }
            scheduledCopies.notifyAll();
          }
          LOG.info(reduceTask.getTaskId() + " Scheduled " + numScheduled +
                   " of " + numKnown + " known outputs (" + numSlow +
                   " slow hosts and " + numDups + " dup hosts)");
         
          // if we have no copies in flight and we can't schedule anything
          // new, just wait for a bit
          try {
            if (numInFlight == 0 && numScheduled == 0) {
              // we should indicate progress as we don't want TT to think
              // we're stuck and kill us
              reporter.progress();
              Thread.sleep(5000);
            }
          } catch (InterruptedException e) { } // IGNORE
         
          while (numInFlight > 0 && mergeThrowable == null) {
            LOG.debug(reduceTask.getTaskId() + " numInFlight = " +
                      numInFlight);
            CopyResult cr = getCopyResult();
           
            if (cr != null) {
              if (cr.getSuccess()) {  // a successful copy
                numCopied++;
                bytesTransferred += cr.getSize();
               
                long secsSinceStart =
                  (System.currentTimeMillis()-startTime)/1000+1;
                float mbs = ((float)bytesTransferred)/(1024*1024);
                float transferRate = mbs/secsSinceStart;
               
                copyPhase.startNextPhase();
                copyPhase.setStatus("copy (" + numCopied + " of " + numOutputs
                                    + " at " +
                                    mbpsFormat.format(transferRate) " MB/s)");         
              } else if (cr.isObsolete()) {
                //ignore
                LOG.info(reduceTask.getTaskId() +
View Full Code Here

 
  private static class EmptyIterator implements TezRawKeyValueIterator {
    final Progress progress;

    EmptyIterator() {
      progress = new Progress();
      progress.set(1.0f);
    }
View Full Code Here

    }
    public DataInputBuffer getKey() throws IOException { return iter.getKey(); }
    public DataInputBuffer getValue() throws IOException { return iter.getValue(); }
    public void close() throws IOException { }
    public Progress getProgress() {
      return new Progress();
    }
View Full Code Here

    public void close() throws IOException {
    }

    public Progress getProgress() {
      // TODO
      return new Progress();
    }
View Full Code Here

   
    public boolean fetchOutputs() throws IOException {
      int totalFailures = 0;
      int            numInFlight = 0, numCopied = 0;
      DecimalFormat  mbpsFormat = new DecimalFormat("0.00");
      final Progress copyPhase =
        reduceTask.getProgress().phase();
      LocalFSMerger localFSMergerThread = null;
      InMemFSMergeThread inMemFSMergeThread = null;
      GetMapEventsThread getMapEventsThread = null;
     
      for (int i = 0; i < numMaps; i++) {
        copyPhase.addPhase();       // add sub-phase per file
      }
     
      copiers = new ArrayList<MapOutputCopier>(numCopiers);
     
      // start all the copying threads
      for (int i=0; i < numCopiers; i++) {
        MapOutputCopier copier = new MapOutputCopier(conf, reporter,
            reduceTask.getJobTokenSecret());
        copiers.add(copier);
        copier.start();
      }
     
      //start the on-disk-merge thread
      localFSMergerThread = new LocalFSMerger((LocalFileSystem)localFileSys);
      //start the in memory merger thread
      inMemFSMergeThread = new InMemFSMergeThread();
      localFSMergerThread.start();
      inMemFSMergeThread.start();
     
      // start the map events thread
      getMapEventsThread = new GetMapEventsThread();
      getMapEventsThread.start();
     
      // start the clock for bandwidth measurement
      long startTime = System.currentTimeMillis();
      long currentTime = startTime;
      long lastProgressTime = startTime;
      long lastOutputTime = 0;
     
        // loop until we get all required outputs
        while (copiedMapOutputs.size() < numMaps && mergeThrowable == null) {
         
          currentTime = System.currentTimeMillis();
          boolean logNow = false;
          if (currentTime - lastOutputTime > MIN_LOG_TIME) {
            lastOutputTime = currentTime;
            logNow = true;
          }
          if (logNow) {
            LOG.info(reduceTask.getTaskID() + " Need another "
                   + (numMaps - copiedMapOutputs.size()) + " map output(s) "
                   + "where " + numInFlight + " is already in progress");
          }

          // Put the hash entries for the failed fetches.
          Iterator<MapOutputLocation> locItr = retryFetches.iterator();

          while (locItr.hasNext()) {
            MapOutputLocation loc = locItr.next();
            List<MapOutputLocation> locList =
              mapLocations.get(loc.getHost());
           
            // Check if the list exists. Map output location mapping is cleared
            // once the jobtracker restarts and is rebuilt from scratch.
            // Note that map-output-location mapping will be recreated and hence
            // we continue with the hope that we might find some locations
            // from the rebuild map.
            if (locList != null) {
              // Add to the beginning of the list so that this map is
              //tried again before the others and we can hasten the
              //re-execution of this map should there be a problem
              locList.add(0, loc);
            }
          }

          if (retryFetches.size() > 0) {
            LOG.info(reduceTask.getTaskID() + ": "
                  "Got " + retryFetches.size() +
                  " map-outputs from previous failures");
          }
          // clear the "failed" fetches hashmap
          retryFetches.clear();

          // now walk through the cache and schedule what we can
          int numScheduled = 0;
          int numDups = 0;
         
          synchronized (scheduledCopies) {
 
            // Randomize the map output locations to prevent
            // all reduce-tasks swamping the same tasktracker
            List<String> hostList = new ArrayList<String>();
            hostList.addAll(mapLocations.keySet());
           
            Collections.shuffle(hostList, this.random);
             
            Iterator<String> hostsItr = hostList.iterator();

            while (hostsItr.hasNext()) {
           
              String host = hostsItr.next();

              List<MapOutputLocation> knownOutputsByLoc =
                mapLocations.get(host);

              // Check if the list exists. Map output location mapping is
              // cleared once the jobtracker restarts and is rebuilt from
              // scratch.
              // Note that map-output-location mapping will be recreated and
              // hence we continue with the hope that we might find some
              // locations from the rebuild map and add then for fetching.
              if (knownOutputsByLoc == null || knownOutputsByLoc.size() == 0) {
                continue;
              }
             
              //Identify duplicate hosts here
              if (uniqueHosts.contains(host)) {
                 numDups += knownOutputsByLoc.size();
                 continue;
              }

              Long penaltyEnd = penaltyBox.get(host);
              boolean penalized = false;
           
              if (penaltyEnd != null) {
                if (currentTime < penaltyEnd.longValue()) {
                  penalized = true;
                } else {
                  penaltyBox.remove(host);
                }
              }
             
              if (penalized)
                continue;

              synchronized (knownOutputsByLoc) {
             
                locItr = knownOutputsByLoc.iterator();
           
                while (locItr.hasNext()) {
             
                  MapOutputLocation loc = locItr.next();
             
                  // Do not schedule fetches from OBSOLETE maps
                  if (obsoleteMapIds.contains(loc.getTaskAttemptId())) {
                    locItr.remove();
                    continue;
                  }

                  uniqueHosts.add(host);
                  scheduledCopies.add(loc);
                  locItr.remove()// remove from knownOutputs
                  numInFlight++; numScheduled++;

                  break; //we have a map from this host
                }
              }
            }
            scheduledCopies.notifyAll();
          }

          if (numScheduled > 0 || logNow) {
            LOG.info(reduceTask.getTaskID() + " Scheduled " + numScheduled +
                   " outputs (" + penaltyBox.size() +
                   " slow hosts and" + numDups + " dup hosts)");
          }

          if (penaltyBox.size() > 0 && logNow) {
            LOG.info("Penalized(slow) Hosts: ");
            for (String host : penaltyBox.keySet()) {
              LOG.info(host + " Will be considered after: " +
                  ((penaltyBox.get(host) - currentTime)/1000) + " seconds.");
            }
          }

          // if we have no copies in flight and we can't schedule anything
          // new, just wait for a bit
          try {
            if (numInFlight == 0 && numScheduled == 0) {
              // we should indicate progress as we don't want TT to think
              // we're stuck and kill us
              reporter.progress();
              Thread.sleep(5000);
            }
          } catch (InterruptedException e) { } // IGNORE
         
          while (numInFlight > 0 && mergeThrowable == null) {
            LOG.debug(reduceTask.getTaskID() + " numInFlight = " +
                      numInFlight);
            //the call to getCopyResult will either
            //1) return immediately with a null or a valid CopyResult object,
            //                 or
            //2) if the numInFlight is above maxInFlight, return with a
            //   CopyResult object after getting a notification from a
            //   fetcher thread,
            //So, when getCopyResult returns null, we can be sure that
            //we aren't busy enough and we should go and get more mapcompletion
            //events from the tasktracker
            CopyResult cr = getCopyResult(numInFlight);

            if (cr == null) {
              break;
            }
           
            if (cr.getSuccess()) {  // a successful copy
              numCopied++;
              lastProgressTime = System.currentTimeMillis();
              reduceShuffleBytes.increment(cr.getSize());
               
              long secsSinceStart =
                (System.currentTimeMillis()-startTime)/1000+1;
              float mbs = ((float)reduceShuffleBytes.getCounter())/(1024*1024);
              float transferRate = mbs/secsSinceStart;
               
              copyPhase.startNextPhase();
              copyPhase.setStatus("copy (" + numCopied + " of " + numMaps
                                  + " at " +
                                  mbpsFormat.format(transferRate) " MB/s)");
               
              // Note successful fetch for this mapId to invalidate
              // (possibly) old fetch-failures
View Full Code Here

      final int numInMemSegments = memDiskSegments.size();
      diskSegments.addAll(0, memDiskSegments);
      memDiskSegments.clear();
      // Pass mergePhase only if there is a going to be intermediate
      // merges. See comment where mergePhaseFinished is being set
      Progress thisPhase = (mergePhaseFinished) ? null : mergePhase;
      RawKeyValueIterator diskMerge = Merger.merge(
          job, fs, keyClass, valueClass,codec, diskSegments,
          ioSortFactor, numInMemSegments, tmpDir, comparator,
          reporter, false, spilledRecordsCounter, null, thisPhase);
      diskSegments.clear();
View Full Code Here

      int totalFailures = 0;
      int            numInFlight = 0, numCopied = 0;
      int            lowThreshold = numCopiers*2;
      long           bytesTransferred = 0;
      DecimalFormat  mbpsFormat = new DecimalFormat("0.00");
      final Progress copyPhase =
        reduceTask.getProgress().phase();
     
      //tweak the probe sample size (make it a function of numCopiers)
      probe_sample_size = Math.max(numCopiers*5, 50);
     
      for (int i = 0; i < numOutputs; i++) {
        neededOutputs.add(i);
        copyPhase.addPhase();       // add sub-phase per file
      }
     
      copiers = new MapOutputCopier[numCopiers];
     
      Reporter reporter = getReporter(umbilical);
      // create an instance of the sorter for merging the on-disk files
      SequenceFile.Sorter localFileSystemSorter =
        new SequenceFile.Sorter(localFileSys, conf.getOutputKeyComparator(),
                                conf.getMapOutputKeyClass(),
                                conf.getMapOutputValueClass(), conf);
      localFileSystemSorter.setProgressable(reporter);
     
      // start all the copying threads
      for (int i=0; i < copiers.length; i++) {
        copiers[i] = new MapOutputCopier(reporter);
        copiers[i].start();
      }
     
      // start the clock for bandwidth measurement
      long startTime = System.currentTimeMillis();
      long currentTime = startTime;
      long lastProgressTime = System.currentTimeMillis();
      IntWritable fromEventId = new IntWritable(0);
     
      try {
        // loop until we get all required outputs
        while (!neededOutputs.isEmpty() && mergeThrowable == null) {
         
          LOG.info(reduceTask.getTaskId() + " Need " + neededOutputs.size() +
          " map output(s)");
         
          try {
            // Put the hash entries for the failed fetches. Entries here
            // might be replaced by (mapId) hashkeys from new successful
            // Map executions, if the fetch failures were due to lost tasks.
            // The replacements, if at all, will happen when we query the
            // tasktracker and put the mapId hashkeys with new
            // MapOutputLocations as values
            knownOutputs.addAll(retryFetches);
            
            // ensure we have enough to keep us busy
            boolean busy = isBusy(numInFlight, numCopiers, lowThreshold,
                                  uniqueHosts.size(), probe_sample_size,
                                  numOutputs - numCopied);
            if (!busy) {
              // The call getMapCompletionEvents will update fromEventId to
              // used for the next call to getMapCompletionEvents
              int currentNumKnownMaps = knownOutputs.size();
              int currentNumObsoleteMapIds = obsoleteMapIds.size();
              getMapCompletionEvents(fromEventId, knownOutputs);

           
              LOG.info(reduceTask.getTaskId() + ": "
                     "Got " + (knownOutputs.size()-currentNumKnownMaps) +
                     " new map-outputs & " +
                     (obsoleteMapIds.size()-currentNumObsoleteMapIds) +
                     " obsolete map-outputs from tasktracker and " +
                     retryFetches.size() + " map-outputs from previous failures"
                     );
            } else {
              LOG.info(" Busy enough - did not query the tasktracker for "
                       + "new map outputs. Have "+ retryFetches.size()
                       + " map outputs from previous failures");
            }
            // clear the "failed" fetches hashmap
            retryFetches.clear();
          }
          catch (IOException ie) {
            LOG.warn(reduceTask.getTaskId() +
                    " Problem locating map outputs: " +
                    StringUtils.stringifyException(ie));
          }
         
          // now walk through the cache and schedule what we can
          int numKnown = knownOutputs.size(), numScheduled = 0;
          int numSlow = 0, numDups = 0;
         
          LOG.info(reduceTask.getTaskId() + " Got " + numKnown +
                   " known map output location(s); scheduling...");
         
          synchronized (scheduledCopies) {
            // Randomize the map output locations to prevent
            // all reduce-tasks swamping the same tasktracker
            Collections.shuffle(knownOutputs, this.random);
           
            Iterator<MapOutputLocation> locIt = knownOutputs.iterator();
           
            currentTime = System.currentTimeMillis();
            while (locIt.hasNext()) {
             
              MapOutputLocation loc = (MapOutputLocation)locIt.next();
             
              // Do not schedule fetches from OBSOLETE maps
              if (obsoleteMapIds.contains(loc.getMapTaskId())) {
                locIt.remove();
                continue;
              }
             
              Long penaltyEnd = penaltyBox.get(loc.getHost());
              boolean penalized = false, duplicate = false;
             
              if (penaltyEnd != null && currentTime < penaltyEnd.longValue()) {
                penalized = true; numSlow++;
              }
              if (uniqueHosts.contains(loc.getHost())) {
                duplicate = true; numDups++;
              }
             
              if (!penalized && !duplicate) {
                uniqueHosts.add(loc.getHost());
                scheduledCopies.add(loc);
                locIt.remove()// remove from knownOutputs
                numInFlight++; numScheduled++;
              }
            }
            scheduledCopies.notifyAll();
          }
          LOG.info(reduceTask.getTaskId() + " Scheduled " + numScheduled +
                   " of " + numKnown + " known outputs (" + numSlow +
                   " slow hosts and " + numDups + " dup hosts)");
         
          // Check if a on-disk merge can be done. This will help if there
          // are no copies to be fetched but sufficient copies to be merged.
          synchronized (mapOutputFilesOnDisk) {
            if (!localFSMergeInProgress
                && (mapOutputFilesOnDisk.size() >= (2 * ioSortFactor - 1))) {
              // make sure that only one thread merges the disk files
              localFSMergeInProgress = true;
              // start the on-disk-merge process
              LocalFSMerger lfsm = 
                new LocalFSMerger((LocalFileSystem)localFileSys,
                                  localFileSystemSorter);
              lfsm.setName("Thread for merging on-disk files");
              lfsm.setDaemon(true);
              lfsm.start();
            }
          }
         
          // if we have no copies in flight and we can't schedule anything
          // new, just wait for a bit
          try {
            if (numInFlight == 0 && numScheduled == 0) {
              // we should indicate progress as we don't want TT to think
              // we're stuck and kill us
              reporter.progress();
              Thread.sleep(5000);
            }
          } catch (InterruptedException e) { } // IGNORE
         
          while (numInFlight > 0 && mergeThrowable == null) {
            LOG.debug(reduceTask.getTaskId() + " numInFlight = " +
                      numInFlight);
            CopyResult cr = getCopyResult();
           
            if (cr != null) {
              if (cr.getSuccess()) {  // a successful copy
                numCopied++;
                lastProgressTime = System.currentTimeMillis();
                bytesTransferred += cr.getSize();
               
                long secsSinceStart =
                  (System.currentTimeMillis()-startTime)/1000+1;
                float mbs = ((float)bytesTransferred)/(1024*1024);
                float transferRate = mbs/secsSinceStart;
               
                copyPhase.startNextPhase();
                copyPhase.setStatus("copy (" + numCopied + " of " + numOutputs
                                    + " at " +
                                    mbpsFormat.format(transferRate) " MB/s)");
               
                // Note successfull fetch for this mapId to invalidate
                // (possibly) old fetch-failures
View Full Code Here

      mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
    CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
    Counter mockCounter = mock(Counter.class);
    TaskStatus mockTaskStatus = mock(TaskStatus.class);
    Progress mockProgress = mock(Progress.class);
    MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
    Task mockTask = mock(Task.class);

    try {
      String [] dirs = jobConf.getLocalDirs();
View Full Code Here

TOP

Related Classes of org.apache.hadoop.util.Progress

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.