Package org.apache.hadoop.util

Examples of org.apache.hadoop.util.Progress


      final int numInMemSegments = memDiskSegments.size();
      diskSegments.addAll(0, memDiskSegments);
      memDiskSegments.clear();
      // Pass mergePhase only if there is a going to be intermediate
      // merges. See comment where mergePhaseFinished is being set
      Progress thisPhase = (mergePhaseFinished) ? null : mergePhase;
      RawKeyValueIterator diskMerge = Merger.merge(
          job, fs, keyClass, valueClass, diskSegments,
          ioSortFactor, numInMemSegments, tmpDir, comparator,
          reporter, false, spilledRecordsCounter, null, thisPhase);
      diskSegments.clear();
View Full Code Here


      final int numInMemSegments = memDiskSegments.size();
      diskSegments.addAll(0, memDiskSegments);
      memDiskSegments.clear();
      // Pass mergePhase only if there is a going to be intermediate
      // merges. See comment where mergePhaseFinished is being set
      Progress thisPhase = (mergePhaseFinished) ? null : mergePhase;
      RawKeyValueIterator diskMerge = Merger.merge(
          job, fs, keyClass, valueClass, diskSegments,
          ioSortFactor, numInMemSegments, tmpDir, comparator,
          reporter, false, spilledRecordsCounter, null, thisPhase);
      diskSegments.clear();
View Full Code Here

   
    LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LOCAL_DIR);
   
    MergeManagerImpl<Text, Text> mergeManager = new MergeManagerImpl<Text, Text>(
        reduceId, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null,
        null, null, new Progress(), new MROutputFiles());
   
    // write map outputs
    Map<String, String> map1 = new TreeMap<String, String>();
    map1.put("apple", "disgusting");
    map1.put("carrot", "delicious");
View Full Code Here

    Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass();
    Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass();
    RawComparator<Text> comparator = jobConf.getOutputKeyComparator();
    Counter readsCounter = new Counter();
    Counter writesCounter = new Counter();
    Progress mergePhase = new Progress();
    RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass,
        valueClass, segments, 2, tmpDir, comparator, getReporter(),
        readsCounter, writesCounter, mergePhase);
    Assert.assertEquals(1.0f, mergeQueue.getProgress().get());
  }
View Full Code Here

      @Override
      public void addFetchFailedMap(TaskAttemptID mapTaskId) {
      }
    };
    Progress progress = new Progress();

    TaskAttemptID reduceId = new TaskAttemptID("314159", 0, TaskType.REDUCE,
        0, 0);
    ShuffleSchedulerImpl scheduler = new ShuffleSchedulerImpl(job, status,
        reduceId, null, progress, null, null, null);

    JobID jobId = new JobID();
    TaskID taskId1 = new TaskID(jobId, TaskType.REDUCE, 1);
    scheduler.tipFailed(taskId1);

    Assert.assertEquals("Progress should be 0.5", 0.5f, progress.getProgress(),
        0.0f);
    Assert.assertFalse(scheduler.waitUntilDone(1));

    TaskID taskId0 = new TaskID(jobId, TaskType.REDUCE, 0);
    scheduler.tipFailed(taskId0);
    Assert.assertEquals("Progress should be 1.0", 1.0f, progress.getProgress(),
        0.0f);
    Assert.assertTrue(scheduler.waitUntilDone(1));
  }
View Full Code Here

      mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
    CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
    Counter mockCounter = mock(Counter.class);
    TaskStatus mockTaskStatus = mock(TaskStatus.class);
    Progress mockProgress = mock(Progress.class);
    MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
    Task mockTask = mock(Task.class);

    try {
      String [] dirs = jobConf.getLocalDirs();
View Full Code Here

   
    public boolean fetchOutputs() throws IOException {
      int totalFailures = 0;
      int            numInFlight = 0, numCopied = 0;
      DecimalFormat  mbpsFormat = new DecimalFormat("0.00");
      final Progress copyPhase =
        reduceTask.getProgress().phase();
      LocalFSMerger localFSMergerThread = null;
      InMemFSMergeThread inMemFSMergeThread = null;
      GetMapEventsThread getMapEventsThread = null;
     
      for (int i = 0; i < numMaps; i++) {
        copyPhase.addPhase();       // add sub-phase per file
      }
     
      copiers = new ArrayList<MapOutputCopier>(numCopiers);
     
      // start all the copying threads
      for (int i=0; i < numCopiers; i++) {
        MapOutputCopier copier = new MapOutputCopier(conf, reporter,
            reduceTask.getJobTokenSecret());
        copiers.add(copier);
        copier.start();
      }
     
      //start the on-disk-merge thread
      localFSMergerThread = new LocalFSMerger((LocalFileSystem)localFileSys);
      //start the in memory merger thread
      inMemFSMergeThread = new InMemFSMergeThread();
      localFSMergerThread.start();
      inMemFSMergeThread.start();
     
      // start the map events thread
      getMapEventsThread = new GetMapEventsThread();
      getMapEventsThread.start();
     
      // start the clock for bandwidth measurement
      long startTime = System.currentTimeMillis();
      long currentTime = startTime;
      long lastProgressTime = startTime;
      long lastOutputTime = 0;
     
        // loop until we get all required outputs
        while (copiedMapOutputs.size() < numMaps && mergeThrowable == null) {
          int numEventsAtStartOfScheduling;
          synchronized (copyResultsOrNewEventsLock) {
            numEventsAtStartOfScheduling = numEventsFetched;
          }
         
          currentTime = System.currentTimeMillis();
          boolean logNow = false;
          if (currentTime - lastOutputTime > MIN_LOG_TIME) {
            lastOutputTime = currentTime;
            logNow = true;
          }
          if (logNow) {
            LOG.info(reduceTask.getTaskID() + " Need another "
                   + (numMaps - copiedMapOutputs.size()) + " map output(s) "
                   + "where " + numInFlight + " is already in progress");
          }

          // Put the hash entries for the failed fetches.
          Iterator<MapOutputLocation> locItr = retryFetches.iterator();

          while (locItr.hasNext()) {
            MapOutputLocation loc = locItr.next();
            List<MapOutputLocation> locList =
              mapLocations.get(loc.getHost());
           
            // Check if the list exists. Map output location mapping is cleared
            // once the jobtracker restarts and is rebuilt from scratch.
            // Note that map-output-location mapping will be recreated and hence
            // we continue with the hope that we might find some locations
            // from the rebuild map.
            if (locList != null) {
              // Add to the beginning of the list so that this map is
              //tried again before the others and we can hasten the
              //re-execution of this map should there be a problem
              locList.add(0, loc);
            }
          }

          if (retryFetches.size() > 0) {
            LOG.info(reduceTask.getTaskID() + ": "
                  "Got " + retryFetches.size() +
                  " map-outputs from previous failures");
          }
          // clear the "failed" fetches hashmap
          retryFetches.clear();

          // now walk through the cache and schedule what we can
          int numScheduled = 0;
          int numDups = 0;
         
          synchronized (scheduledCopies) {
 
            // Randomize the map output locations to prevent
            // all reduce-tasks swamping the same tasktracker
            List<String> hostList = new ArrayList<String>();
            hostList.addAll(mapLocations.keySet());
           
            Collections.shuffle(hostList, this.random);
             
            Iterator<String> hostsItr = hostList.iterator();

            while (hostsItr.hasNext()) {
           
              String host = hostsItr.next();

              List<MapOutputLocation> knownOutputsByLoc =
                mapLocations.get(host);

              // Check if the list exists. Map output location mapping is
              // cleared once the jobtracker restarts and is rebuilt from
              // scratch.
              // Note that map-output-location mapping will be recreated and
              // hence we continue with the hope that we might find some
              // locations from the rebuild map and add then for fetching.
              if (knownOutputsByLoc == null || knownOutputsByLoc.size() == 0) {
                continue;
              }
             
              //Identify duplicate hosts here
              if (uniqueHosts.contains(host)) {
                 numDups += knownOutputsByLoc.size();
                 continue;
              }

              Long penaltyEnd = penaltyBox.get(host);
              boolean penalized = false;
           
              if (penaltyEnd != null) {
                if (currentTime < penaltyEnd.longValue()) {
                  penalized = true;
                } else {
                  penaltyBox.remove(host);
                }
              }
             
              if (penalized)
                continue;

              synchronized (knownOutputsByLoc) {
             
                locItr = knownOutputsByLoc.iterator();
           
                while (locItr.hasNext()) {
             
                  MapOutputLocation loc = locItr.next();
             
                  // Do not schedule fetches from OBSOLETE maps
                  if (obsoleteMapIds.contains(loc.getTaskAttemptId())) {
                    locItr.remove();
                    continue;
                  }

                  uniqueHosts.add(host);
                  scheduledCopies.add(loc);
                  locItr.remove()// remove from knownOutputs
                  numInFlight++; numScheduled++;

                  break; //we have a map from this host
                }
              }
            }
            scheduledCopies.notifyAll();
          }

          if (numScheduled > 0 || logNow) {
            LOG.info(reduceTask.getTaskID() + " Scheduled " + numScheduled +
                   " outputs (" + penaltyBox.size() +
                   " slow hosts and" + numDups + " dup hosts)");
          }

          if (penaltyBox.size() > 0 && logNow) {
            LOG.info("Penalized(slow) Hosts: ");
            for (String host : penaltyBox.keySet()) {
              LOG.info(host + " Will be considered after: " +
                  ((penaltyBox.get(host) - currentTime)/1000) + " seconds.");
            }
          }

          // if we have no copies in flight and we can't schedule anything
          // new, just wait for a bit
          try {
            if (numInFlight == 0 && numScheduled == 0) {
              // we should indicate progress as we don't want TT to think
              // we're stuck and kill us
              reporter.progress();
              Thread.sleep(5000);
            }
          } catch (InterruptedException e) { } // IGNORE
         
          while (numInFlight > 0 && mergeThrowable == null) {
            if (LOG.isDebugEnabled()) {
              LOG.debug(reduceTask.getTaskID() + " numInFlight = " +
                        numInFlight);
            }
            //the call to getCopyResult will either
            //1) return immediately with a null or a valid CopyResult object,
            //                 or
            //2) if the numInFlight is above maxInFlight, return with a
            //   CopyResult object after getting a notification from a
            //   fetcher thread,
            //So, when getCopyResult returns null, we can be sure that
            //we aren't busy enough and we should go and get more mapcompletion
            //events from the tasktracker
            CopyResult cr = getCopyResult(numInFlight, numEventsAtStartOfScheduling);

            if (cr == null) {
              break;
            }
           
            if (cr.getSuccess()) {  // a successful copy
              numCopied++;
              lastProgressTime = System.currentTimeMillis();
              reduceShuffleBytes.increment(cr.getSize());
               
              long secsSinceStart =
                (System.currentTimeMillis()-startTime)/1000+1;
              float mbs = ((float)reduceShuffleBytes.getCounter())/(1024*1024);
              float transferRate = mbs/secsSinceStart;
               
              copyPhase.startNextPhase();
              copyPhase.setStatus("copy (" + numCopied + " of " + numMaps
                                  + " at " +
                                  mbpsFormat.format(transferRate) " MB/s)");
               
              // Note successful fetch for this mapId to invalidate
              // (possibly) old fetch-failures
View Full Code Here

      final int numInMemSegments = memDiskSegments.size();
      diskSegments.addAll(0, memDiskSegments);
      memDiskSegments.clear();
      // Pass mergePhase only if there is a going to be intermediate
      // merges. See comment where mergePhaseFinished is being set
      Progress thisPhase = (mergePhaseFinished) ? null : mergePhase;
      RawKeyValueIterator diskMerge = Merger.merge(
          job, fs, keyClass, valueClass,codec, diskSegments,
          ioSortFactor, numInMemSegments, tmpDir, comparator,
          reporter, false, spilledRecordsCounter, null, thisPhase);
      diskSegments.clear();
View Full Code Here

    public boolean fetchOutputs() throws IOException {
      int totalFailures = 0;
      int totalCopyResultsReceived = 0;
      int numInFlight = 0, numCopied = 0;
      DecimalFormat  mbpsFormat = new DecimalFormat("0.00");
      final Progress copyPhase =
        reduceTask.getProgress().phase();
      LocalFSMerger localFSMergerThread = null;
      InMemFSMergeThread inMemFSMergeThread = null;
      GetMapEventsThread getMapEventsThread = null;

      for (int i = 0; i < numMaps; i++) {
        copyPhase.addPhase();       // add sub-phase per file
      }

      copiers = new ArrayList<MapOutputCopier>(numCopiers);

      // start all the copying threads
      for (int i=0; i < numCopiers; i++) {
        MapOutputCopier copier = new MapOutputCopier(conf, reporter);
        copiers.add(copier);
        copier.start();
        // register the copier to jmxThreadInfoTracker
        jmxThreadInfoTracker.registerThreadToTask(
            "REDUCE_COPY_TASK", copier.getId());
      }

      //start the on-disk-merge thread
      localFSMergerThread = new LocalFSMerger((LocalFileSystem)localFileSys);
      //start the in memory merger thread
      inMemFSMergeThread = new InMemFSMergeThread();
      localFSMergerThread.start();
      inMemFSMergeThread.start();
      jmxThreadInfoTracker.registerThreadToTask(
          "REDUCE_COPY_TASK", localFSMergerThread.getId());
      jmxThreadInfoTracker.registerThreadToTask(
          "REDUCE_COPY_TASK", inMemFSMergeThread.getId());

      // start the map events thread
      getMapEventsThread = new GetMapEventsThread();
      getMapEventsThread.start();
      jmxThreadInfoTracker.registerThreadToTask(
          "REDUCE_COPY_TASK", getMapEventsThread.getId());

      // start the clock for bandwidth measurement
      long startTime = System.currentTimeMillis();
      long currentTime = startTime;
      long lastProgressTime = startTime;
      long lastOutputTime = 0;

        // loop until we get all required outputs
        while (getNumMapsCopyCompleted() < numMaps && mergeThrowable == null) {

          currentTime = System.currentTimeMillis();
          boolean logNow = false;
          if (currentTime - lastOutputTime > MIN_LOG_TIME) {
            lastOutputTime = currentTime;
            logNow = true;
          }
          if (logNow) {
            LOG.info(
                reduceTask.getTaskID() + " Need another " +
                (numMaps - getNumMapsCopyCompleted()) + " map output(s) " +
                ", total " + numMaps + " where " + numInFlight + " is " +
                "already in progress");
          }

          // Put the hash entries for the failed fetches.
          Iterator<MapOutputLocation> locItr = retryFetches.iterator();

          while (locItr.hasNext()) {
            MapOutputLocation loc = locItr.next();
            loc.reset();
            List<MapOutputLocation> locList =
              mapLocations.get(loc.getHost());

            // Check if the list exists. Map output location mapping is cleared
            // once the jobtracker restarts and is rebuilt from scratch.
            // Note that map-output-location mapping will be recreated and hence
            // we continue with the hope that we might find some locations
            // from the rebuild map.
            if (locList != null) {
              // Add to the beginning of the list so that this map is
              //tried again before the others and we can hasten the
              //re-execution of this map should there be a problem
              locList.add(0, loc);
            }
          }

          if (retryFetches.size() > 0) {
            LOG.info(reduceTask.getTaskID() + ": " +
                  "Got " + retryFetches.size() +
                  " map-outputs from previous failures");
          }
          // clear the "failed" fetches hashmap
          retryFetches.clear();

          // now walk through the cache and schedule what we can
          int numScheduled = 0;
          int numHostDups = 0;
          int numDups = 0;

          synchronized (scheduledCopies) {
            // Map of http host to list of output locations (http is unique,
            // even if there are multiple task trackers on the same machine
            Map<String, List<MapOutputLocation>> chosenLocationMap =
                new HashMap<String, List<MapOutputLocation>>();

            // Randomize the map output locations to prevent
            // all reduce-tasks swamping the same tasktracker
            List<String> hostList = new ArrayList<String>();
            hostList.addAll(mapLocations.keySet());

            Iterator<String> hostsItr = hostList.iterator();

            while (hostsItr.hasNext()) {
              String host = hostsItr.next();
              LOG.debug("fetchOutputs: Looking at host " + host + ", " +
                  "total  " + mapLocations.keySet().size());
              List<MapOutputLocation> knownOutputsByLoc =
                mapLocations.get(host);

              // Check if the list exists. Map output location mapping is
              // cleared once the jobtracker restarts and is rebuilt from
              // scratch.
              // Note that map-output-location mapping will be recreated and
              // hence we continue with the hope that we might find some
              // locations from the rebuild map and add then for fetching.
              if (knownOutputsByLoc == null || knownOutputsByLoc.size() == 0) {
                continue;
              }

              //Identify duplicate hosts here
              if (uniqueHosts.contains(host)) {
                LOG.debug("fetchOutputs: Duplicate " + host +
                    ", numDups= " + numDups);
                numDups += knownOutputsByLoc.size();
                ++numHostDups;
                continue;
              }

              Long penaltyEnd = penaltyBox.get(host);
              boolean penalized = false;

              if (penaltyEnd != null) {
                if (currentTime < penaltyEnd.longValue()) {
                  penalized = true;
                } else {
                  penaltyBox.remove(host);
                }
              }

              if (penalized)
                continue;

              synchronized (knownOutputsByLoc) {
                locItr = knownOutputsByLoc.iterator();

                while (locItr.hasNext()) {
                  MapOutputLocation loc = locItr.next();

                  // Do not schedule fetches from OBSOLETE maps
                  if (obsoleteMapIds.contains(loc.getTaskAttemptId())) {
                    locItr.remove();
                    continue;
                  }

                  uniqueHosts.add(host);

                  List<MapOutputLocation> locationList =
                      chosenLocationMap.get(loc.getHttpTaskTracker());
                  if (locationList == null) {
                    locationList = new ArrayList<MapOutputLocation>();
                    chosenLocationMap.put(loc.getHttpTaskTracker(),
                                          locationList);
                  }
                  locationList.add(loc);
                  LOG.info("fetchOutputs: Scheduling location " + loc);
                  locItr.remove()// remove from knownOutputs
                  numInFlight++; numScheduled++;

                  //
                  // Comment out this break allows fetching all the shards at
                  // once from a host, instead of fetching one at a time.
                  // See MAPREDUCE-318.
                  //
                  // break; //we have a map from this host
                  //
                }
              }

              // Add the HostMapOutputLocations to scheduled copies in chunks
              // of maxMapOutputsPerFetch
              List<HostMapOutputLocations> tmpScheduledCopies =
                  new ArrayList<HostMapOutputLocations>();
              for (Map.Entry<String, List<MapOutputLocation>> entry :
                  chosenLocationMap.entrySet()) {
                final List<MapOutputLocation> outputList = entry.getValue();
                int remaining = outputList.size();
                int index = 0;
                while (remaining >= maxMapOutputsPerFetch) {
                  tmpScheduledCopies.add(
                      new HostMapOutputLocations(entry.getKey(),
                          new ArrayList<MapOutputLocation>(
                              outputList.subList(
                                  index, index + maxMapOutputsPerFetch))));
                  index += maxMapOutputsPerFetch;
                  remaining -= maxMapOutputsPerFetch;
                }
                if (remaining > 0) {
                  tmpScheduledCopies.add(
                      new HostMapOutputLocations(entry.getKey(),
                          new ArrayList<MapOutputLocation>(
                              outputList.subList(index, index + remaining))));
                }
              }
              chosenLocationMap.clear();

              Collections.shuffle(tmpScheduledCopies, this.random);
              scheduledCopies.addAll(tmpScheduledCopies);
            }
            scheduledCopies.notifyAll();
          }

          if (numScheduled > 0 || logNow) {
            LOG.info(reduceTask.getTaskID() + " Scheduled " + numScheduled +
                   " outputs (" + penaltyBox.size() +
                   " slow hosts and " + numDups + " dup because hosts " +
                   "dup hosts " + numHostDups + ")");
          }

          if (penaltyBox.size() > 0 && logNow) {
            LOG.info("Penalized(slow) Hosts: ");
            for (String host : penaltyBox.keySet()) {
              LOG.info(host + " Will be considered after: " +
                  ((penaltyBox.get(host) - currentTime)/1000) + " seconds.");
            }
          }

          // if we have no copies in flight and we can't schedule anything
          // new, just wait for a bit
          try {
            if (numInFlight == 0 && numScheduled == 0) {
              // we should indicate progress as we don't want TT to think
              // we're stuck and kill us
              reporter.progress();
              synchronized (mapLocations) {
                mapLocations.wait(5000);
              }
            }
          } catch (InterruptedException e) { } // IGNORE

          while (numInFlight > 0 && mergeThrowable == null) {
            LOG.debug(reduceTask.getTaskID() + " numInFlight = " +
                      numInFlight);
            //the call to getCopyResult will either
            //1) return immediately with a null or a valid CopyResult object,
            //                 or
            //2) if the numInFlight is above maxInFlight, return with a
            //   CopyResult object after getting a notification from a
            //   fetcher thread,
            //So, when getCopyResult returns null, we can be sure that
            //we aren't busy enough and we should go and get more mapcompletion
            //events from the tasktracker
            CopyResult cr = getCopyResult(numInFlight);

            if (cr == null) {
              break;
            }

            LOG.info("Got new copy result - " + (++totalCopyResultsReceived)
                + " " + cr);
            if (cr.getSuccess()) {  // a successful copy
              numCopied++;
              lastProgressTime = System.currentTimeMillis();
              reduceShuffleBytes.increment(cr.getSize());

              long secsSinceStart =
                (System.currentTimeMillis()-startTime)/1000+1;
              float mbs = ((float)reduceShuffleBytes.getCounter())/(1024*1024);
              float transferRate = mbs/secsSinceStart;

              copyPhase.startNextPhase();
              copyPhase.setStatus("copy (" + numCopied + " of " + numMaps
                                  + " at " +
                                  mbpsFormat.format(transferRate) " MB/s)");

              // Note successful fetch for this mapId to invalidate
              // (possibly) old fetch-failures
View Full Code Here

TOP

Related Classes of org.apache.hadoop.util.Progress

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.