Package com.google.enterprise.connector.sharepoint.state

Examples of com.google.enterprise.connector.sharepoint.state.ListState


          // check is added to reduce the frequency with which
          // getAlerts WS call is made.
          LOGGER.fine("Getting alerts under site [ " + webURL + " ]");
          processAlerts(ws, sharePointClientContext);      
        }       
        ListState listForWeb = ws.lookupList(ws.getPrimaryKey());
        if (listForWeb != null) {
          LOGGER.fine("List State for web [ " + listForWeb.getListURL()
              + " ] is not null. Last Doc from List State is "
              + listForWeb.getLastDocProcessed());
        }
        boolean isFirstBatch = ((listForWeb == null)
            || (listForWeb.getLastDocProcessed() == null));
        // Crawl the site home page and web application policy in the
        // first batch and when a web application policy change is detected.
        if (ws.isWebApplicationPolicyChange()
            || isFirstBatch) {                       
          // Get site data for the web and update webState.       
View Full Code Here


      return;
    }

    final Calendar cLastMod = Calendar.getInstance();
    cLastMod.setTime(new Date());
    ListState currentDummySiteDataList = null;

    try {
      currentDummySiteDataList = new ListState(webState.getPrimaryKey(),
          webState.getTitle(), webState.getPrimaryKey(), cLastMod,
          SPConstants.SITE, webState.getPrimaryKey(), webState);
    } catch (final Exception e) {
      LOGGER.log(Level.WARNING, "Unable to create the dummy list state for site. "
          + webState.getWebUrl(), e);
      return;
    }

    // find the list in the Web state
    ListState dummySiteListState =
        webState.lookupList(currentDummySiteDataList.getPrimaryKey());
    if (dummySiteListState == null) {
      dummySiteListState = currentDummySiteDataList;
    }
    LOGGER.log(Level.INFO, "Getting site data. internalName [ "
        + webState.getWebUrl() + " ] ");
    List<SPDocument> documentList = new ArrayList<SPDocument>();
    SPDocument document = null;

    try {
      // SharePoint Client Context used to create SiteDataWS should point to
      // WebState URL. If not then SharePoint default page will point to
      // incorrect Web ID for Web State.
      SharepointClientContext ctxToPass =
          (SharepointClientContext) tempCtx.clone();
      ctxToPass.setSiteURL(webState.getWebUrl());
      final SiteDataHelper siteData = new SiteDataHelper(ctxToPass);
      // need to check whether the site exist or not and is not null
      if (webState.isExisting() && null != webState) {
        document = siteData.getSiteData(webState);
        document.setParentList(dummySiteListState);
        // Site Home Page document will be added as last doc from
        // dummy list state. This is required for sending delete feed.
      }
    } catch (final Exception e) {
      LOGGER.log(Level.WARNING, "Problem while getting site data. ", e);
   
    // Web Application Policy Document processing.
    // Web Application Policy Document will be associated with each webstate.
    if (sharepointClientContext.isPushAcls()) {
      try {
        AclHelper aclHelper = new AclHelper(sharepointClientContext,
            webState.getWebUrl());
        SPDocument webAppPolicy = aclHelper.getWebApplicationPolicy(webState,
            sharepointClientContext.getFeedType().toString());
        if (webAppPolicy != null) {
          webAppPolicy.setParentList(dummySiteListState);
          documentList.add(webAppPolicy);
        }
      } catch (final Exception e) {
        LOGGER.log(Level.WARNING, "Problem while getting web app policy. ", e);
      }
    }
    if ((dummySiteListState.isExisting() ||
        webState.isWebApplicationPolicyChange())
        && null != document) {
      // Mark dummy list state to true in order to differentiate this list state
      // with
      // other lists in web state.
      //adding list page document.
      documentList.add(document);
      dummySiteListState.setSiteDefaultPage(true);
      webState.AddOrUpdateListStateInWebState(dummySiteListState, currentDummySiteDataList.getLastMod());
      dummySiteListState.setCrawlQueue(documentList);
      // Resetting web application policy change flag. This will ensure
      // same webstate will not be processed again
      // for web application policy change.
      webState.setWebApplicationPolicyChange(false);
    }
View Full Code Here

              }
            }
          }
        }
      } else if (objType == ObjectType.LIST && null != changeObjectHint) {
        ListState listState = webstate.getListStateForGuid(changeObjectHint);
        if (null == listState) {
          LOGGER.log(Level.WARNING, "Changed List ID [ "
              + changeObjectHint
              + " ] is not found in the WebState. Skipping to the next change.");
          continue;
        }

        if (changeType == SPChangeType.AssignmentDelete) {
          // Assuming the worst case scenario of Limited Access
          // deletion
          LOGGER.log(Level.INFO, "Resetting list state URL [ "
              + listState
              + " ] because some role has been deleted and the deleted role"
              + " could be Limited Access.");
          listState.resetState();
        } else {
          if (supportsInheritedAcls) {
            // Revisit List home for ACL changes.
            listState.markListToRevisitListHome(sharepointClientContext.getFeedType());
            if (listState.isApplyReadSecurity()) {
              LOGGER.log(Level.INFO, "Resetting list state URL [ "
                  + listState.getListURL()
                  + " ] because effective permisssions modified"
                  + " for List with Read Security");
              listState.resetState();
            }
          } else {
            if (!processedLists.contains(listState)) {
              LOGGER.log(Level.INFO, "Marking List [ "
                  + listState
                  + " ] as a candidate for ACL based crawl because the effective"
                  + " ACL at this list have been updated. All the items with"
                  + " inheriting permissions wil be crawled from this list.");
              listState.startAclCrawl();
              processedLists.add(listState);
            }
          }
        }
      } else if (objType == ObjectType.USER
View Full Code Here

      return null;
    }
    ImmutableList.Builder<SPDocument> newListBuilder =
        new ImmutableList.Builder<SPDocument>();
    for (SPDocument doc : list.getCrawlQueue()) {
      ListState parentList = doc.getParentList();
      if (parentList == null) {
        LOGGER.log(Level.WARNING, "Document [{0}] is missing parent list. "
            + "Assigning [{1}] as parent list for document.",
            new Object[] {doc.getUrl(), list.getListURL()});
        doc.setParentList(list);
      } else {
        if (!list.getPrimaryKey().equals(parentList.getPrimaryKey())) {
          LOGGER.log(Level.WARNING,
              "Skipping document . Parent List - crawl queue mismatch"
              + " for document [{0}]. Parent List is [{1}]. "
              + "Crawl Queue is associated with list is [{2}].",
              new Object[] {doc, parentList, list});
View Full Code Here

    noOfVisitedListStates = 0;
    SPDocumentList resultSet = null;
    Iterator<ListState> iter = sendPendingDocs ? webState.getIterator()
        : webState.getCurrentListstateIterator();
    while (iter.hasNext()) {
      final ListState list = iter.next();
      if (list.isSiteDefaultPage()) {
        continue;
      }

      // Mark this list as current list so that the next traversal
      // request starts from here and already scanned lists are not
      // unnecessarily re-scanned.
      webState.setCurrentList(list);
      if (list.getCrawlQueue() == null) {
        continue;
      }

      SPDocumentList resultsList = null;

      try {
        LOGGER.log(Level.FINE, "Handling crawl queue for list URL [ "
            + list.getListURL() + " ]. ");
        resultsList = handleCrawlQueueForList(globalState, webState, list);
        noOfVisitedListStates++;
      } catch (final Exception e) {
        LOGGER.log(Level.WARNING, "Problem in handling crawl queue for list URL [ "
            + list.getListURL() + " ]. ", e);
      }

      if ((resultsList != null) && (resultsList.size() > 0)) {
        LOGGER.log(Level.INFO, resultsList.size()
            + " document(s) to be sent from list URL [ " + list.getListURL()
            + " ]. ");
        if (resultSet == null) {
          resultSet = resultsList;
        } else {
          resultSet.addAll(resultsList);
        }
      } else {
        LOGGER.log(Level.FINE, "No documents to be sent from list URL [ "
            + list.getListURL() + " ]. ");
      }
      if (resultsList != null) {
        sizeSoFar += resultsList.size();
      }

      // Check if the docs added so far meet the batchHint
      if (sizeSoFar >= sharepointClientContext.getBatchHint()) {
        LOGGER.info("Stopping traversal because batch hint "
            + sharepointClientContext.getBatchHint()
            + " has been reached. Processed documents: " + sizeSoFar);
        break;
      }
    }

    ListState listForWeb = webState.lookupList(webState.getPrimaryKey());
    if (listForWeb != null) {
      SPDocumentList resultsList =
          handleCrawlQueueForList(globalState, webState, listForWeb);
      if (resultsList != null) {
        if (resultSet != null) {
View Full Code Here

    }

    nDocuments = 0;
    doCrawl = true;

    ListState nextList = globalState.getLastCrawledList();
    WebState nextWeb = globalState.getLastCrawledWeb();

    if (null == nextWeb) {
      nextWeb = globalState.lookupWeb(sharepointClientContext.getSiteURL(),
          sharepointClientContext);
View Full Code Here

    }
    internalName += "_" + SPConstants.ALERTS_TYPE;

    final Calendar cLastMod = Calendar.getInstance();
    cLastMod.setTime(new Date());
    ListState currentDummyAlertList = null;

    try {
      currentDummyAlertList = new ListState(internalName,
          SPConstants.ALERTS_TYPE, SPConstants.ALERTS_TYPE, cLastMod,
          SPConstants.ALERTS_TYPE, internalName, webState);
    } catch (final Exception e) {
      LOGGER.log(Level.WARNING, "Unable to create the dummy list state for alerts. ", e);
      return;
    }
    if (currentDummyAlertList == null) {
      LOGGER.log(Level.WARNING, "Unable to create the dummy list state for alerts.");
      return;
    }

    // find the list in the Web state
    ListState dummyAlertListState = webState.lookupList(currentDummyAlertList.getPrimaryKey());
    if (dummyAlertListState == null) {
      dummyAlertListState = currentDummyAlertList;
    }
    LOGGER.log(Level.INFO, "Getting alerts. internalName [ " + internalName
        + " ] ");
    List<SPDocument> listCollectionAlerts = null;

    try {
      final AlertsHelper alerts = new AlertsHelper(tempCtx);
      listCollectionAlerts = alerts.getAlerts(webState, dummyAlertListState);
    } catch (final Exception e) {
      LOGGER.log(Level.WARNING, "Problem while getting alerts. ", e);
    }
    if (dummyAlertListState.isExisting()) {
      webState.AddOrUpdateListStateInWebState(dummyAlertListState, currentDummyAlertList.getLastMod());
      dummyAlertListState.setCrawlQueue(listCollectionAlerts);
      if (listCollectionAlerts != null) {
        nDocuments += listCollectionAlerts.size();
      }
    }
  }
View Full Code Here

    // That metadata is ListState.InheritiedSecurity flag which is very
    // important while processing ACL related changes.
    // TODO: with some re-structuring of code, we can still avoid this extra
    // iteration.
    for (ListState currentListState : listCollection) {
      ListState listState = webState.lookupList(currentListState.getPrimaryKey());
      if (null != listState) {
        if (!listState.getListURL().equalsIgnoreCase(
            currentListState.getListURL())) {
          tempCtx.logToFile(SPConstants.DEFAULT_VIEW_URL_CHANGE_LOG,
              listState.getListURL());         
        }
        listState.updateList(currentListState);       
      }
    }

    /*
     * If the nextList belongs the current web and is still existing on the
     * SharePoint site, start traversing for this list onwards.
     */
    if (null != nextList && nextList.getParentWebState().equals(webState)
        && listCollection.contains(nextList)) {
      Collections.rotate(listCollection, -(listCollection.indexOf(nextList)));
    }

    AclHelper aclHelper = new AclHelper(tempCtx, webState.getWebUrl());
    try {
      aclHelper.fetchAclChangesSinceTokenAndUpdateState(webState);
    } catch (final Exception e) {
      LOGGER.log(Level.WARNING, "Problem Interacting with Custom ACL WS. web site [ "
          + webState.getWebUrl() + " ]. ", e);
    }

    List<SPDocument> aclChangedItems = null;
    final ListsHelper listsHelper = new ListsHelper(tempCtx);
    for (int i = 0; i < listCollection.size(); i++) {
      final ListState currentList = listCollection.get(i);
      ListState listState = webState.lookupList(currentList.getPrimaryKey());

      if (sharepointClientContext.isUseSPSearchVisibility()) {
        // If this list is marked for No Crawling, do not crawl this
        // list.
        // Please note that, if this list is already known to the
        // connector, it'll keep existing in the connector's state. This
        // implies that if a list is marked as NoCrawl list on
        // SharePoint in between the connector's traversal, crawling of
        // this list will be paused at whatever state it is in. As soon
        // as the NoCrawl flag on SharePoint is reverted, the crawling
        // will be resumed from the saved state.
        if (currentList.isNoCrawl()) {
          LOGGER.log(Level.WARNING, "Skipping List URL [ "
              + currentList.getListURL()
              + " ] while crawling because it has been marked for No Crawling on SharePoint. ");
          if (null == listState) {
            // Make this list known by keeping it in the state. But,
            // do not crawl
            webState.AddOrUpdateListStateInWebState(currentList, currentList.getLastMod());
          }
          continue;
        }
      }

      /*
       * If we already knew about this list, then only fetch docs that have
       * changed since the last doc we processed. If it's a new list (e.g. the
       * first SharePoint traversal), we fetch everything.
       */
      if (listState == null) {
        listState = currentList;
        listState.setNewList(true);
        webState.AddOrUpdateListStateInWebState(listState, listState.getLastMod());
        LOGGER.info("discovered new listState. List URL: "
            + listState.getListURL());
        if (SPType.SP2007 == webState.getSharePointType()) {
          if (FeedType.CONTENT_FEED == sharepointClientContext.getFeedType()) {
            // In case of content feed, we need to keep track of
            // folders and the items under that. This is required
            // for sending delete feeds for the documents when their
            // parent folder is deleted.
            LOGGER.log(Level.CONFIG, "Discovering all folders under current list/library [ "
                + listState.getListURL() + " ] ");
            try {
              listsHelper.getSubFoldersRecursively(listState, null, null);
            } catch (final Exception e) {
              LOGGER.log(Level.WARNING, "Exception occured while getting the folders hierarchy for list [ "
                  + listState.getListURL() + " ]. ", e);
            } catch (final Throwable t) {
              LOGGER.log(Level.WARNING, "Error occured while getting the folders hierarchy for list [ "
                  + listState.getListURL() + " ]. ", t);
            }
          }

          try {
            listItems = listsHelper.getListItemChangesSinceToken(listState, allWebs);
          } catch (final Exception e) {
            LOGGER.log(Level.WARNING, "Exception thrown while getting the documents under list [ "
                + listState.getListURL() + " ].", e);
          } catch (final Throwable t) {
            LOGGER.log(Level.WARNING, "Error thrown while getting the documents under list [ "
                + listState.getListURL() + " ].", t);
          }
        } else {
          try {
            listItems = listsHelper.getListItems(listState, null, null, allWebs);
          } catch (final Exception e) {
            LOGGER.log(Level.WARNING, "Exception thrown while getting the documents under list [ "
                + listState.getListURL() + " ].", e);
          }
        }
      } else {
        LOGGER.info("revisiting listState [ " + listState.getListURL() + " ]. ");
        listState.setExisting(true);
        listState.setNextPage(null);

        String lastDocID = null;

        SPDocument lastDoc = listState.getLastDocForWSRefresh();

        /*
         * We must ensure that the last doc that we are using was actually sent
         * as ADD feed and not as DELETE feed. It might be possible that in one
         * cycle we identify a list as non-existing and hence started sending
         * delete feeds for it. But, in the next cycle that list has been
         * restored, in that case we can not rely on the lastDoc which has been
         * set by a delete feed. We also need to reset the change token in that
         * case to start a full crawl.
         */
        if (lastDoc != null) {
          if (FeedType.CONTENT_FEED == sharepointClientContext.getFeedType()
              && ActionType.DELETE.equals(lastDoc.getAction())) {
            listState.resetState();
            if (FeedType.CONTENT_FEED == sharepointClientContext.getFeedType()) {
              // In case of content feed, we need to keep track of
              // folders and the items under that. This is
              // required for sending delete feeds for the
              // documents when their parent folder is deleted.
              LOGGER.log(Level.CONFIG, "Discovering all folders under current list/library [ "
                  + listState.getListURL() + " ] ");
              try {
                listsHelper.getSubFoldersRecursively(listState, null, null);
              } catch (final Exception e) {
                LOGGER.log(Level.WARNING, "Exception occured while getting the folders hierarchy for list [ "
                    + listState.getListURL() + " ]. ", e);
              } catch (final Throwable t) {
                LOGGER.log(Level.WARNING, "Error occured while getting the folders hierarchy for list [ "
                    + listState.getListURL() + " ]. ", t);
              }
            }
            LOGGER.info("recrawling the items under listState [ "
                + listState.getListURL()
                + " ] because this list has been restored after deletion.");
          } else {
            lastDocID = Util.getOriginalDocId(lastDoc.getDocId(), sharepointClientContext.getFeedType());
          }
        }

        if (SPType.SP2007.equals(webState.getSharePointType())) {
          try {
            webState.AddOrUpdateListStateInWebState(listState, currentList.getLastMod());

            // Any documents to be crawled because of ACL Changes
            aclChangedItems = aclHelper.
                getListItemsForAclChangeAndUpdateState(listState, listsHelper);

            if (null == aclChangedItems
                || aclChangedItems.size() < sharepointClientContext.getBatchHint()) {
              // Do regular incremental crawl
              listItems = listsHelper.getListItemChangesSinceToken(listState, allWebs);
            }
          } catch (final Exception e) {
            LOGGER.log(Level.WARNING, "Exception thrown while getting the documents under list [ "
                + listState.getListURL() + " ].", e);
          } catch (final Throwable t) {
            LOGGER.log(Level.WARNING, "Error thrown while getting the documents under list [ "
                + listState.getListURL() + " ].", t);
          }
        } else {
          try {
            final Calendar dateSince = listState.getDateForWSRefresh();
            webState.AddOrUpdateListStateInWebState(listState, currentList.getLastMod());
            LOGGER.info("fetching changes since " + Util.formatDate(dateSince)
                + " for list [ " + listState.getListURL() + " ]. ");

            // check if date modified for the document library
            final Calendar dateCurrent = listState.getLastModCal();
            if (dateSince.before(dateCurrent)) {
              listState.setNewList(true);
            }

            listItems = listsHelper.getListItems(listState, dateSince, lastDocID, allWebs);
          } catch (final Exception e) {
            LOGGER.log(Level.WARNING, "Exception thrown while getting the documents under list [ "
                + listState.getListURL() + " ].", e);
          } catch (final Throwable t) {
            LOGGER.log(Level.WARNING, "Error thrown while getting the documents under list [ "
                + listState.getListURL() + " ].", t);
          }
        }
      }

      // Get the attachments for each discovered items, if the list allows
      // attachments
      if (listState.canContainAttachments() && (listItems != null)) {
        final List<SPDocument> attachmentItems = new ArrayList<SPDocument>();
        for (int j = 0; j < listItems.size(); j++) {
          final SPDocument doc = listItems.get(j);
          if (ActionType.ADD.equals(doc.getAction())) {
            final List<SPDocument> attachments = listsHelper.getAttachments(listState, doc);
            attachmentItems.addAll(attachments);
          }
        }
        listItems.addAll(attachmentItems);
      }

      if (listState.getNextPage() == null) {
        if (((listItems != null) && (listItems.size() > 0))
            || (listState.isNewList())) {
          SPDocument listDoc = listState.getDocumentInstance(
              sharepointClientContext.getFeedType());
          listItems.add(listDoc);
          listState.setNewList(false);
        }
      } else {
        // Send List home page as part of this batch to complete inheritance
        // chain for discovered child items for partially traversed List.
        if (listState.isNewList() && listItems != null && listItems.size() > 0
            && sharepointClientContext.getTraversalContext()
            .supportsInheritedAcls() && !Strings.isNullOrEmpty(
            listState.getListItemCollectionPositionNext())) {
          SPDocument listDoc = listState.getDocumentInstance(
              sharepointClientContext.getFeedType());
          listItems.add(listDoc);
        }

        // If any of the list has not been traversed completely, doCrawl
        // must not be set true.
        doCrawl = false;
      }

      // Add aclChangedItems to the docs crawled under regular crawling.
      // This is the right place to do this because all the operations
      // pertaining to regular crawling have been made. But, the
      // batch-hint check is yet to be done
      if (null != aclChangedItems) {
        if (null != listItems) {
          listItems.addAll(aclChangedItems);
        } else {
          listItems = aclChangedItems;
        }
      }

      listState.setCrawlQueue(listItems);
      // Set the last crawled date time. This is informative value for the
      // user viewing the state file
      listState.setLastCrawledDateTime(Util.getCurrentTimestampString());

      if (null == listItems || listItems.size() == 0) {
        LOGGER.log(Level.CONFIG, "No items found from list " + listState);
      } else {
        Collections.sort(listItems);
View Full Code Here

    if (null == listCrawlInfo) {
      return;
    }

    for (ListCrawlInfo info : listCrawlInfo) {
      ListState listState = listCrawlInfoMap.get(info.getListGuid());
      if (null == listState) {
        LOGGER.log(Level.SEVERE, "One of the List GUID [ " + info.getListGuid()
            + " ] can not be found in the parentWebState. ");
        continue;
      }
      if (!info.isStatus()) {
        LOGGER.log(Level.WARNING, "GSSiteDiscovery has encountered the "
            + "following error while getting the crawl info for list URL [ "
            + listState.getListURL() + " ], GUID [ "
            + listState.getPrimaryKey() + " ]. "
            + "WS error [ " + info.getError() + " ].");
        continue;
      }
     
      // Set ListState.NoCrawl only if connector is configured
      // to use SharePoint Search Visibility.
      if (sharepointClientContext.isUseSPSearchVisibility()) {
        listState.setNoCrawl(info.isNoCrawl());
      }
      boolean allowAnonymousAccess =
          Boolean.parseBoolean(info.getAnonymousAccess());
      if (allowAnonymousAccess != listState.isAllowAnonymousAccess()) {
        LOGGER.log(Level.INFO, "Anonymous Access settings changed from "
            + listState.isAllowAnonymousAccess() + " to " + allowAnonymousAccess
            + " for List [" + listState.getListURL() + "]");
      }
      listState.setAllowAnonymousAccess(allowAnonymousAccess);
    }
  }
View Full Code Here

   * @return instance of {@link ListState}
   */
  public static ListState getListState(String url, int dayOfMonth, int docId,
      String primaryKey, WebState webState) throws SharepointException {
    DateTime dt = new DateTime(2009, 9, dayOfMonth, 11, 26, 38, 100);
    ListState ls = new ListState(primaryKey, "inTitle",
        SPConstants.GENERIC_LIST, dt.toCalendar(Locale.ENGLISH), "", url,
        webState);

    ls.setPrimaryKey(primaryKey);
    ls.setType(SPConstants.GENERIC_LIST);
    SPDocument doc = new SPDocument(new Integer(docId).toString(), "X",
        Calendar.getInstance(), null);
    ls.setLastDocProcessed(doc);
    ls.setUrl(url);

    ls.setLastMod(dt);

    ls.setCrawlQueue(getDocuments(webState, ls));

    return ls;
  }
View Full Code Here

TOP

Related Classes of com.google.enterprise.connector.sharepoint.state.ListState

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.