Package com.google.enterprise.connector.sharepoint.spiimpl

Examples of com.google.enterprise.connector.sharepoint.spiimpl.SPDocument$SPContent


   * making WS calls.
   *
   * @return {@link SPDocument}
   */
  public SPDocument getLastDocForWSRefresh() {
    SPDocument lastDocFromCrawlQueue = getLastDocInCrawlQueueOfActionTypeADD();
    if (null == lastDocFromCrawlQueue) {
      return lastDocProcessed;
    } else if (lastDocFromCrawlQueue.compareTo(lastDocProcessed) > 0) {
      return lastDocFromCrawlQueue;
    } else {
      return lastDocProcessed;
    }
  }
View Full Code Here


  private SPDocument getLastDocInCrawlQueueOfActionTypeADD() {
    if (null == crawlQueue || crawlQueue.size() == 0) {
      return null;
    } else {
      for (int i = crawlQueue.size() - 1; i >= 0; --i) {
        SPDocument lastDoc = crawlQueue.get(i);
        if (ActionType.ADD.equals(lastDoc.getAction())) {
          return lastDoc;
        }
      }
    }
    return null;
View Full Code Here

   */
  public void dumpCrawlQueue() {
    if ((crawlQueue != null) && (crawlQueue.size() > 0)) {
      LOGGER.config("Crawl queue for " + getListURL());
      for (int iDoc = 0; iDoc < crawlQueue.size(); ++iDoc) {
        final SPDocument doc = crawlQueue.get(iDoc);
        LOGGER.config(doc.getLastMod().getTime() + ", " + doc.getUrl());
        doc.dumpAllAttrs();
      }
    } else {
      LOGGER.config("Empty crawl queue for " + getListURL());
    }
  }
View Full Code Here

  /**
   * Marks a list to revisit List Home Page
   */
  public void markListToRevisitListHome(FeedType feedType) {
    SPDocument spDocListHome = getDocumentInstance(feedType);
    if (spDocListHome != null) {
      if (crawlQueue == null) {
        // Initialize crawl queue for List if empty.
        crawlQueue = new ArrayList<SPDocument>();
      }
View Full Code Here

  public SPDocument getDocumentInstance(FeedType feedType) {
    String docId = getPrimaryKey();
    if (FeedType.CONTENT_FEED == feedType) {
      docId = getListURL() + SPConstants.DOC_TOKEN + getPrimaryKey();
    }
    final SPDocument listDoc = new SPDocument(docId, getListURL(),
        getLastModCal(), SPConstants.NO_AUTHOR, getBaseTemplate(),
        getParentWebState().getTitle(), feedType,
        getParentWebState().getSharePointType());

    listDoc.setAllAttributes(getAttrs());
    listDoc.setParentList(this);

    // If SharePoint is not configured to crawl aspx pages
    // then set Document Type as ACL
    if (getParentWebState().isCrawlAspxPages() == false) {
      listDoc.setDocumentType(DocumentType.ACL);
    }

    if (!isSendListAsDocument()) {
      // send the listState as a feed only if it was
      // included
      // (not excluded) in the URL pattern matching 
      listDoc.setToBeFed(false);
      LOGGER.log(Level.WARNING, "List Document marked as not to be fed "
          + "because list ASPX page is not supposed to be crawled as per "
          + "exclusion patterns");
      // TODO log it in excludedUrl.log
    }
View Full Code Here

        LOGGER.log(Level.WARNING, "Either entityUrl [ " + entityUrl
            + " ] is unavailable or No ACE found in the ACL. WSLog [ "
            + acl.getLogMessage() + " ] ");
        continue;
      }
      SPDocument document = urlToDocMap.get(entityUrl);
      if (document == null) {
        LOGGER.warning(
            "No document found in urlToDocMap map for the entityUrl [ "
            + entityUrl + " ], WSLog [ " + acl.getLogMessage() + " ] ");
        continue;
      }
      LOGGER.log(Level.CONFIG, "WsLog [ " + acl.getLogMessage() + " ] ");
      boolean allowAnonymousAccess = Boolean.parseBoolean(
          acl.getAnonymousAccess());
      if (allowAnonymousAccess) {
        LOGGER.log(Level.INFO, "Document [ " + document.getUrl()
            + " ] is identified as Public Document");
       document.setPublicDocument(allowAnonymousAccess);
       continue;
      }
      boolean largeAcl = Boolean.parseBoolean(acl.getLargeAcl());
      if (largeAcl) {
        boolean inheritPermissions =
            Boolean.parseBoolean(acl.getInheritPermissions());
        if (inheritPermissions) {
          String parentUrl = acl.getParentUrl();
          // if parentUrl is null or empty then document will be processed
          // as largeAcl document.
          if (!Strings.isNullOrEmpty(parentUrl)) {
            LOGGER.log(Level.INFO, "Document [ " + document.getUrl()
                + " ] is idenified as Large ACL but with inherit permissions, "
                + "with Parent URL as " + parentUrl);
            List<SPDocument> childList =
                reprocessDocs.get(parentUrl);
            if (childList == null) {
              childList = Lists.newArrayList();             
            }
            childList.add(document);
            reprocessDocs.put(parentUrl, childList);
            continue ACL;
          }         
        }
        LOGGER.log(Level.INFO, "Document [ " + document.getUrl()
            + " ] needs to be reprocessed as Large ACL Document");
        largeAclUrlsToReprocess.add(document.getUrl());
        largeACLUrlToDocMap.put(document.getUrl(), document);
        continue ACL;
      }
      Set<Principal> aclUsers = Sets.newHashSet();
      Set<Principal> aclGroups = Sets.newHashSet();
      Set<Principal> aclDenyUsers = Sets.newHashSet();
      Set<Principal> aclDenyGroups = Sets.newHashSet();
      document.setUniquePermissions(
          !Boolean.parseBoolean(acl.getInheritPermissions()));
      if (!Strings.isNullOrEmpty(acl.getParentUrl())) {
        if (sharepointClientContext.isIncludedUrl(acl.getParentUrl())) {
          document.setParentUrl(acl.getParentUrl());
          document.setParentId(acl.getParentId());
        } else {
          if (document.isUniquePermissions()) {
            document.setParentUrl(sharepointClientContext.getSiteURL());
            document.setParentId(acl.getParentId());
          } else {
            LOGGER.log(Level.INFO, "Document [ " + document.getUrl()
                + " ] needs to be reprocessed as Parent Url ["
                + acl.getParentUrl() + "] is not included for Traversal");
            docUrlsToReprocess.add(document.getUrl());
            excludedParentUrlToDocMap.put(document.getUrl(), document);
            continue ACL;
          }
        }
      }
      for (GssAce ace : allAces) {
        // Handle Principal
        GssPrincipal principal = ace.getPrincipal();
        if (null == principal) {
          LOGGER.log(Level.WARNING, "No Principal found in ace.");
          continue;
        }
        if (null == principal.getType() || null == principal.getName()) {
          LOGGER.log(Level.WARNING, "Either Principal Name [ "
              + principal.getName() + " ] or Principal Type [ "
              + principal.getType() + " ]  is unavailable");
          continue;
        }

        // Handle Permissions
        GssSharepointPermission permissions = ace.getPermission();
        if (null == permissions) {
          LOGGER.log(Level.WARNING, "No permissions found for Principal [ "
              + principal.getName() + " ] ");
          continue;
        }
        // Check to determine whether the object-type of the document is list
        // list-item or site.

        ObjectType objectType = ObjectType.ITEM;

        if (document.getObjType().equals(SPConstants.SITE)) {
          objectType = ObjectType.SITE_LANDING_PAGE;
        } else if (null != document.getParentList()) {
          if (document.getParentList().getPrimaryKey().equals(
              Util.getOriginalDocId(document.getDocId(),
                  document.getFeedType()))) {
            objectType = ObjectType.LIST;
          }
        }
        final String principalName = getPrincipalName(principal);
        String siteCollUrl = wsResult.getSiteCollectionUrl();
        String[] deniedPermissions = permissions.getDeniedPermission();
        if (null != deniedPermissions) {
          Set<RoleType> deniedRoleTypes =
              getRoleTypesFor(deniedPermissions, objectType);
          if (deniedRoleTypes.size() > 0) {
            LOGGER.fine("Denied Permission list "
                + Arrays.asList(permissions.getDeniedPermission())
                + " for the User " + principalName);
            LOGGER.fine("Principal [" + principalName
                + "] Denied Role Types [ " + deniedRoleTypes + " ]");
            // Pass denied permissions only if Peeker or Reader role is denied.
            if (deniedRoleTypes.contains(RoleType.PEEKER)
                || deniedRoleTypes.contains(RoleType.READER)) {
              if (supportsDenyAcls) {
                LOGGER.fine("Processing Deny permissions"
                    + " for Principal ["+ principalName + "]");
                processPrincipal(principal, aclDenyUsers, aclDenyGroups,
                    principalName, siteCollUrl, memberships, webState);
              } else {
                // Skipping ACL as denied ACLs are not supported as per
                // Traversal Context.
                LOGGER.warning("Skipping ACL as Deny permissions are detected"
                    + "for Document [" + entityUrl + "] for Principal ["
                    + principalName + " ] when Supports Deny ACL ["
                    + supportsDenyAcls + "].");
                continue ACL;
              }
            }
          }
        }
        LOGGER.fine("Permission list "
            + Arrays.asList(permissions.getAllowedPermissions())
            + " for the User " + principalName);
        Set<RoleType> allowedRoleTypes =
            getRoleTypesFor(permissions.getAllowedPermissions(), objectType);
        if (!allowedRoleTypes.isEmpty()) {
          LOGGER.fine("Principal [ "+ principalName
              + " ] Allowed Role Types [ "+ allowedRoleTypes + " ]");
          // Pass allowed permissions only if role other than Peeker is allowed.
          if (allowedRoleTypes.contains(RoleType.READER)
              || allowedRoleTypes.contains(RoleType.WRITER)
              || allowedRoleTypes.contains(RoleType.OWNER)) {
            processPrincipal(principal, aclUsers, aclGroups,
                principalName, siteCollUrl, memberships, webState);
          }
        }
      }
      document.setAclUsers(aclUsers);
      document.setAclGroups(aclGroups);
      document.setAclDenyUsers(aclDenyUsers);
      document.setAclDenyGroups(aclDenyGroups);
    }

    if (!reprocessDocs.isEmpty()) {
      for (String parentUrl : reprocessDocs.keySet()) {
        LOGGER.fine("Processing Parent URL [ "+ parentUrl + " ] ");
        SPDocument parent = new SPDocument(parentUrl, parentUrl,
            Calendar.getInstance(), ActionType.ADD);
        fetchAclForSPDocument(parentUrl, parent, webState);
        List<SPDocument> childList = reprocessDocs.get(parentUrl);
        if (childList != null) {
          for(SPDocument child : childList) {
            copyAcls(parent, child);
          }
        }
      }
    }

    if (!largeAclUrlsToReprocess.isEmpty()) {
      for (String largeACLUrl : largeAclUrlsToReprocess) {
        SPDocument documentToPass = largeACLUrlToDocMap.get(largeACLUrl);
        if (documentToPass != null) {
          fetchAclForSPDocument(largeACLUrl, documentToPass, webState);
        }
      }
    }
View Full Code Here

        }

        if (wsResult.isMoreDocs()) {
          listState.updateAclCrawlStatus(true, wsResult.getLastIdVisited());
        } else {
          SPDocument listDoc = listState.getDocumentInstance(sharepointClientContext.getFeedType());
          listDoc.setForAclChange(true);
          aclChangedDocs.add(listDoc);
          if (null != aclChangedDocs && aclChangedDocs.size() > 0) {
            // We have crawled the last set of documents and there
            // are
            // no more documents to be crawled. However, we can not
View Full Code Here

            "Call to getAclForWebApplicationPolicy failed.", e);
      }
    });

    FeedType feedType = FeedType.getFeedType(strFeedType);
    SPDocument webAppPolicy = null;
    if (result == null) {
      return webAppPolicy;
    }
    String siteCollectionUrlToUse;
    if (sharepointClientContext.isIncludedUrl(result.getSiteCollectionUrl())) {
      siteCollectionUrlToUse = result.getSiteCollectionUrl();
    } else {
      LOGGER.log(Level.INFO,
          "Changing web app policy URL to connector URL ["
          + sharepointClientContext.getSiteURL() + "] as policy URL [ "
          + result.getSiteCollectionUrl() + " ] is not included.");
      siteCollectionUrlToUse = sharepointClientContext.getSiteURL();
    }
    String docID = siteCollectionUrlToUse;
    if (feedType == FeedType.CONTENT_FEED) {
      docID = docID + "|{" + result.getSiteCollectionGuid().toUpperCase() +"}";
    }
    // TODO Set SPType and Last Modified correctly.
    webAppPolicy = new SPDocument(docID, siteCollectionUrlToUse,
        Calendar.getInstance(), SPConstants.NO_AUTHOR, SPConstants.NO_OBJTYPE,
        siteCollectionUrlToUse, feedType, SPType.SP2007);
    webAppPolicy.setDocumentType(DocumentType.ACL);
    Map<String, SPDocument> urlToDocMap = Maps.newHashMap();
    urlToDocMap.put(result.getSiteCollectionUrl(), webAppPolicy);
    try {
      processWsResponse(result, urlToDocMap, webState);
    } catch (SharepointException ex) {
      LOGGER.log(Level.WARNING,
          "Error processing ACL response for web application policy", ex);
    }
    webAppPolicy.setWebAppPolicyDoc(true);
    return webAppPolicy;
  }
View Full Code Here

      final String docName, final FeedType feedType, final Boolean fixedId) {
    String docUrl = webUrl + "/" + docName;
    String docId = generateId(docUrl, fixedId);

    // TODO: Using hardcoded SPType.SP2007.
    return new SPDocument(docId, docUrl, Calendar.getInstance(),
        SPConstants.NO_AUTHOR, SPConstants.NO_OBJTYPE,
        SPConstants.PARENT_WEB_TITLE, feedType, SPConstants.SPType.SP2007);
  }
View Full Code Here

        listState.setExisting(true);
        listState.setNextPage(null);

        String lastDocID = null;

        SPDocument lastDoc = listState.getLastDocForWSRefresh();

        /*
         * We must ensure that the last doc that we are using was actually sent
         * as ADD feed and not as DELETE feed. It might be possible that in one
         * cycle we identify a list as non-existing and hence started sending
         * delete feeds for it. But, in the next cycle that list has been
         * restored, in that case we can not rely on the lastDoc which has been
         * set by a delete feed. We also need to reset the change token in that
         * case to start a full crawl.
         */
        if (lastDoc != null) {
          if (FeedType.CONTENT_FEED == sharepointClientContext.getFeedType()
              && ActionType.DELETE.equals(lastDoc.getAction())) {
            listState.resetState();
            if (FeedType.CONTENT_FEED == sharepointClientContext.getFeedType()) {
              // In case of content feed, we need to keep track of
              // folders and the items under that. This is
              // required for sending delete feeds for the
              // documents when their parent folder is deleted.
              LOGGER.log(Level.CONFIG, "Discovering all folders under current list/library [ "
                  + listState.getListURL() + " ] ");
              try {
                listsHelper.getSubFoldersRecursively(listState, null, null);
              } catch (final Exception e) {
                LOGGER.log(Level.WARNING, "Exception occured while getting the folders hierarchy for list [ "
                    + listState.getListURL() + " ]. ", e);
              } catch (final Throwable t) {
                LOGGER.log(Level.WARNING, "Error occured while getting the folders hierarchy for list [ "
                    + listState.getListURL() + " ]. ", t);
              }
            }
            LOGGER.info("recrawling the items under listState [ "
                + listState.getListURL()
                + " ] because this list has been restored after deletion.");
          } else {
            lastDocID = Util.getOriginalDocId(lastDoc.getDocId(), sharepointClientContext.getFeedType());
          }
        }

        if (SPType.SP2007.equals(webState.getSharePointType())) {
          try {
            webState.AddOrUpdateListStateInWebState(listState, currentList.getLastMod());

            // Any documents to be crawled because of ACL Changes
            aclChangedItems = aclHelper.
                getListItemsForAclChangeAndUpdateState(listState, listsHelper);

            if (null == aclChangedItems
                || aclChangedItems.size() < sharepointClientContext.getBatchHint()) {
              // Do regular incremental crawl
              listItems = listsHelper.getListItemChangesSinceToken(listState, allWebs);
            }
          } catch (final Exception e) {
            LOGGER.log(Level.WARNING, "Exception thrown while getting the documents under list [ "
                + listState.getListURL() + " ].", e);
          } catch (final Throwable t) {
            LOGGER.log(Level.WARNING, "Error thrown while getting the documents under list [ "
                + listState.getListURL() + " ].", t);
          }
        } else {
          try {
            final Calendar dateSince = listState.getDateForWSRefresh();
            webState.AddOrUpdateListStateInWebState(listState, currentList.getLastMod());
            LOGGER.info("fetching changes since " + Util.formatDate(dateSince)
                + " for list [ " + listState.getListURL() + " ]. ");

            // check if date modified for the document library
            final Calendar dateCurrent = listState.getLastModCal();
            if (dateSince.before(dateCurrent)) {
              listState.setNewList(true);
            }

            listItems = listsHelper.getListItems(listState, dateSince, lastDocID, allWebs);
          } catch (final Exception e) {
            LOGGER.log(Level.WARNING, "Exception thrown while getting the documents under list [ "
                + listState.getListURL() + " ].", e);
          } catch (final Throwable t) {
            LOGGER.log(Level.WARNING, "Error thrown while getting the documents under list [ "
                + listState.getListURL() + " ].", t);
          }
        }
      }

      // Get the attachments for each discovered items, if the list allows
      // attachments
      if (listState.canContainAttachments() && (listItems != null)) {
        final List<SPDocument> attachmentItems = new ArrayList<SPDocument>();
        for (int j = 0; j < listItems.size(); j++) {
          final SPDocument doc = listItems.get(j);
          if (ActionType.ADD.equals(doc.getAction())) {
            final List<SPDocument> attachments = listsHelper.getAttachments(listState, doc);
            attachmentItems.addAll(attachments);
          }
        }
        listItems.addAll(attachmentItems);
      }

      if (listState.getNextPage() == null) {
        if (((listItems != null) && (listItems.size() > 0))
            || (listState.isNewList())) {
          SPDocument listDoc = listState.getDocumentInstance(
              sharepointClientContext.getFeedType());
          listItems.add(listDoc);
          listState.setNewList(false);
        }
      } else {
        // Send List home page as part of this batch to complete inheritance
        // chain for discovered child items for partially traversed List.
        if (listState.isNewList() && listItems != null && listItems.size() > 0
            && sharepointClientContext.getTraversalContext()
            .supportsInheritedAcls() && !Strings.isNullOrEmpty(
            listState.getListItemCollectionPositionNext())) {
          SPDocument listDoc = listState.getDocumentInstance(
              sharepointClientContext.getFeedType());
          listItems.add(listDoc);
        }

        // If any of the list has not been traversed completely, doCrawl
View Full Code Here

TOP

Related Classes of com.google.enterprise.connector.sharepoint.spiimpl.SPDocument$SPContent

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.