Package com.ikanow.infinit.e.data_model.InfiniteEnums

Examples of com.ikanow.infinit.e.data_model.InfiniteEnums.HarvestEnum


          }
          else { // 2] If in the middle of a multiple harvest cycle....
            // Specifically for custom, need to handle m/r changing ... we'll fake the harvest status
            // to force it to check the last doc's modified time vs the current file time...
           
            HarvestEnum saved = source.getHarvestStatus().getHarvest_status();
            source.getHarvestStatus().setHarvest_status(HarvestEnum.success);
            try {
              if (_context.getDuplicateManager().needsUpdated_Url(new Date(file.getDate()), null, source)) {
                _deleteExistingFilesBySourceKey = true;               
              }
View Full Code Here


      //System.out.println(" CANDIDATE=" + candidate.getKey() + " ..." + candidate.getId());     
     
      if ((null != sSourceType) && !candidate.getExtractType().equalsIgnoreCase(sSourceType)) {
        continue;
      }
      HarvestEnum candidateStatus = null;
      if (null != candidate.getHarvestStatus()) {
        candidateStatus = candidate.getHarvestStatus().getHarvest_status();
      }
      if (bSync && (null == candidateStatus)) { // Don't sync unharvested sources, obviously!
        continue;
      }
      //(DISTRIBUTON LOGIC)
     
      // Checking whether to respect the searchCycle_secs for distributed sources is a bit more complex
      boolean isDistributed = (null != candidate.getDistributionFactor());     
      boolean distributedInProcess = isDistributed && 
        candidate.reachedMaxDocs() ||  // (<- only set inside a process)
          ((null != candidate.getHarvestStatus()) && // (robustness)
              (null != candidate.getHarvestStatus().getDistributionTokensFree()) && // (else starting out)
                (candidate.getDistributionFactor() != candidate.getHarvestStatus().getDistributionTokensFree()));
                  // (else this is the start)
      //(TESTED - local and distributed)
      //(END DISTRIBUTON LOGIC)
     
      if (((HarvestEnum.success_iteration != candidateStatus) && !distributedInProcess)
          ||
          ((null != candidate.getSearchCycle_secs()) && (candidate.getSearchCycle_secs() < 0)))
      {
        // (ie EITHER we're not iteration OR we're disabled)
        //(^^^ don't respect iteration status if source manually disabled)
       
        if ((null != candidate.getSearchCycle_secs()) || (null != defaultSearchCycle_ms)) {
          if (null == candidate.getSearchCycle_secs()) {
            candidate.setSearchCycle_secs((int)(defaultSearchCycle_ms/1000));
          }
          if (candidate.getSearchCycle_secs() < 0) {
            continue; // negative search cycle => disabled
          }
          if ((null != candidate.getHarvestStatus()) && (null != candidate.getHarvestStatus().getHarvested())) {
            //(ie the source has been harvested, and there is a non-default search cycle setting)
           
            if ((candidate.getHarvestStatus().getHarvested().getTime() + 1000L*candidate.getSearchCycle_secs())
                > now.getTime())
            {
              if ((HarvestEnum.in_progress != candidateStatus) && (null != candidateStatus) && (null == candidate.getOwnerId()))
              {
                //(^^ last test, if it's in_progress then it died recently (or hasn't started) so go ahead and harvest anyway)
                // (also hacky use of getOwnerId just to see if this is a source override source or not)
                continue; // (too soon since the last harvest...)
              }//TESTED (including hacky use of ownerId)
            }
          }
        }//TESTED
      }
      //TESTED: manually disabled (ignore), not success_iteration (ignore if outside cycle), success_iteration (always process)
     
      query.put(SourcePojo._id_, candidate.getId());
      BasicDBObject modifyClause = new BasicDBObject();
      modifyClause.put(SourceHarvestStatusPojo.sourceQuery_harvest_status_, HarvestEnum.in_progress.toString());
      if (bSync) {
        modifyClause.put(SourceHarvestStatusPojo.sourceQuery_synced_, now);       
      }
      else {
        modifyClause.put(SourceHarvestStatusPojo.sourceQuery_harvested_, now);
      }
      modifyClause.put(SourceHarvestStatusPojo.sourceQuery_lastHarvestedBy_, getHostname());
      BasicDBObject modify = new BasicDBObject(MongoDbManager.set_, modifyClause);
     
      try {
        BasicDBObject fields = new BasicDBObject(SourcePojo.templateProcessingFlow_, 0);
        BasicDBObject dbo = (BasicDBObject) DbManager.getIngest().getSource().findAndModify(query, fields, null, false, modify, false, false);
        if (null != dbo) {
          SourcePojo fullSource = SourcePojo.fromDb(dbo, SourcePojo.class, new SourcePojoSubstitutionDbMap());
          nextSetToProcess.add(fullSource);
          nNumSourcesGot++;
         
          ////////////////////////////////////////////////////////////////////////
          //
          // DISTRIBUTION LOGIC:
          // If distributionFactor set then grab one token and set state back to
          // success_iteration, to allow other threads/processes to grab me
          if ((null != fullSource.getDistributionFactor()) && !bSync)
          {
            // Get the current distribution token
            int distributionToken = 0;           
            boolean bReset = false;
            if ((null == fullSource.getHarvestStatus()) || (null == fullSource.getHarvestStatus().getDistributionTokensFree())) {
              distributionToken = fullSource.getDistributionFactor();
              // (also set up some parameters so don't need to worry about null checks later)
              if (null == fullSource.getHarvestStatus()) {
                fullSource.setHarvestStatus(new SourceHarvestStatusPojo());
              }
              fullSource.getHarvestStatus().setDistributionTokensFree(distributionToken);
              fullSource.getHarvestStatus().setDistributionTokensComplete(0);
            }
            else {
              distributionToken = fullSource.getHarvestStatus().getDistributionTokensFree();
             
              //Check last harvested time to ensure this isn't an old state (reset if so)
              if ((distributionToken != fullSource.getDistributionFactor()) ||
                  (0 != fullSource.getHarvestStatus().getDistributionTokensComplete()))
              {
                if (null != fullSource.getHarvestStatus().getRealHarvested()) { // harvested is useless here because it's already been updated
                  if ((new Date().getTime() - fullSource.getHarvestStatus().getRealHarvested().getTime()) >
                      _ONEDAY) // (ie older than a day)
                  {
                    distributionToken = fullSource.getDistributionFactor(); // ie start again
                  }
                }
              }//TESTED
            }//(end check for any existing state)         

            if (distributionToken == fullSource.getDistributionFactor()) {
              bReset = true; // (first time through, might as well go ahead and reset to ensure all the vars are present)
            }

            // If in error then just want to grab all remaining tokens and reset the status
            if (HarvestEnum.error == fullSource.getHarvestStatus().getHarvest_status()) { // currently an error
              if (distributionToken != fullSource.getDistributionFactor()) { // In the middle, ie just errored
                fullSource.setDistributionTokens(new HashSet<Integer>());
                while (distributionToken > 0) {
                  distributionToken--;
                  fullSource.getDistributionTokens().add(distributionToken);                 
                }
                BasicDBObject dummy = new BasicDBObject();
                bReset = updateHarvestDistributionState_tokenComplete(fullSource, HarvestEnum.error, dummy, dummy);
                  // (then finish off completion down below)               
              }
            }//TESTED (error mode, 2 cases: complete and incomplete)
           
            //DEBUG
            //System.out.println(" DIST_SOURCE=" + fullSource.getKey() + "/" + fullSource.getDistributionFactor() + ": " + distributionToken + ", " + bReset);
           
            //(note we'll see this even if searchCycle is set because the "source" var (which still has the old
            // state) is stuck back at the start of uncheckedList, so each harvester will see the source >1 time)
           
            if (0 != distributionToken) { // (else no available tokens for this cycle)
              distributionToken--;
             
              fullSource.setDistributionTokens(new HashSet<Integer>());
              fullSource.getDistributionTokens().add(distributionToken);
             
              // Remove one of the available tokens (they don't get reset until the source is complete)
              updateHarvestDistributionState_newToken(fullSource.getId(), distributionToken, HarvestEnum.success_iteration, bReset);

              // After this loop is complete, put back at the start of the unchecked list
              // so another thread can pick up more tokens:
              if (null == putMeBackAtTheStart_distributed) {
                putMeBackAtTheStart_distributed = new LinkedList<SourcePojo>();
              }
              putMeBackAtTheStart_distributed.add(candidate);
             
              // Before adding back to list, set a transient field to ensure it bypasses any search cycle checks
              // (for in process logic where we won't see the update status from the DB)
              candidate.setReachedMaxDocs();
             
              // Reset full source's status so we know if we started in success/error/success_iteration
              if (null == candidateStatus) {
                candidateStatus = HarvestEnum.success;
              }
              fullSource.getHarvestStatus().setHarvest_status(candidateStatus);             
             
            } // (end if available tokens)
            else { // (don't process, just set back to original status)
              HarvestEnum harvestStatus = HarvestEnum.success;
              if (null != fullSource.getHarvestStatus()) {
                if (null != fullSource.getHarvestStatus().getHarvest_status()) {
                  harvestStatus = fullSource.getHarvestStatus().getHarvest_status();
                }
              }
View Full Code Here

TOP

Related Classes of com.ikanow.infinit.e.data_model.InfiniteEnums.HarvestEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.