Package com.google.enterprise.connector.sharepoint.state

Examples of com.google.enterprise.connector.sharepoint.state.WebState


    sharepointClientContext = TestConfiguration.initContext();

    final GlobalState state = new GlobalState(clientFactory,
        TestConfiguration.googleConnectorWorkDir, FeedType.CONTENT_FEED);
    WebState ws = state.makeWebState(sharepointClientContext, TestConfiguration.Site1_URL);

    final SiteDataHelper siteData = new SiteDataHelper(sharepointClientContext);
    List<ListState> listCollection = siteData.getNamedLists(ws);
    assertNotNull(listCollection);
    for (ListState baseList : listCollection) {
      ListsHelper listHelper = new ListsHelper(this.sharepointClientContext);
      List<SPDocument> listItems = listHelper.getListItems(baseList, null, null, null);
      if (listItems.size() > 0) {
        for (SPDocument spdoc : listItems) {
          spdoc.setParentWeb(ws);
          spdoc.setParentList(baseList);
        }
        System.out.println("Using " + baseList.getListURL()
            + " as test list...");
        this.docs = new SPDocumentList(listItems, state);
        ws.AddOrUpdateListStateInWebState(baseList, baseList.getLastMod());
        break;
      }
    }

    this.docs.setAliasMap(sharepointClientContext.getAliasMap());
View Full Code Here


    Set<WebState> newWebs = new HashSet<WebState>();
    if ((null == allSites) || (allSites.size() == 0)) {
      return newWebs;
    }
    for (String url : allSites) {
      final WebState webState = updateGlobalState(globalState, url);
      if (null != webState) {
        newWebs.add(webState);
      }
    }
    return newWebs;
View Full Code Here

   *         globalstate. Otherwise a valid reference to the newly created
   *         WebState
   */
  private WebState updateGlobalState(final GlobalState globalState,
      final String url) {
    WebState web = null;
    if (null == url) {
      LOGGER.log(Level.WARNING, "url not found!");
      return web;
    }
    String webUrl = url;
    WebState wsGS = globalState.lookupWeb(url, null);

    /*
     * The incoming url might not always be exactly the web URL that is used
     * while creation of web state and is required by Web Services as such.
     * Hence, a second check is required.
     */
    if (null == wsGS) {
      final String webAppURL = Util.getWebApp(url);
      WebsHelper webs = null;
      try {
        sharepointClientContext.setSiteURL(webAppURL);
        webs = new WebsHelper(sharepointClientContext);
      } catch (final Exception e) {
        LOGGER.log(Level.WARNING, "WebsHelper creation failed for URL [ "
            + url + " ]. ", e);
      }
      if (null != webs) {
        webUrl = webs.getWebURLFromPageURL(url);
        if (!url.equals(webUrl)) {
          wsGS = globalState.lookupWeb(webUrl, null);
        }
      }
    }

    if (null == wsGS) {// new web
      LOGGER.config("Making WebState for : " + webUrl);
      try {
        int responseCode = sharepointClientContext.checkConnectivity(
            Util.encodeURL(webUrl) + SPConstants.LISTS_END_POINT, null);
        if (responseCode != 400 && responseCode != 404) {
          web = globalState.makeWebState(sharepointClientContext, webUrl);
        } else {
          LOGGER.warning("Unable to connect to list web service for web. "
              + "Skipping WebState creation for URL [ " + webUrl + " ].");
          sharepointClientContext.logExcludedURL("[ " + webUrl
              + " ] identified as invalid Web Url");
        }
      } catch (final Exception e) {
        LOGGER.log(Level.WARNING, "Problem while creating web state for url [ "
            + webUrl + " ]. ", e);
      }
    } else {
      wsGS.setExisting(true);
    }

    return web;
  }
View Full Code Here

    nDocuments = 0;
    doCrawl = true;

    ListState nextList = globalState.getLastCrawledList();
    WebState nextWeb = globalState.getLastCrawledWeb();

    if (null == nextWeb) {
      nextWeb = globalState.lookupWeb(sharepointClientContext.getSiteURL(),
          sharepointClientContext);
    } else {
      sharepointClientContext.setSiteURL(nextWeb.getWebUrl());
    }

    // start and end recrawl is used for detecting non-existent webs/lists
    globalState.startRecrawl();

    if (null == nextWeb) {
      nextWeb = updateGlobalState(globalState, sharepointClientContext.getSiteURL());
      if (null == nextWeb) {
        throw new SharepointException(
            "Starting WebState for the current traversal can not be determined.");
      }
      if (null != webCrawlInfoFetcher) {
        nextWeb.setWebCrawlInfo(webCrawlInfoFetcher.getCurrentWebCrawlInfo());
      }
    }

    LOGGER.info("Starting traversal from site [ " + nextWeb + " ]. ");

    SPType spType = nextWeb.getSharePointType();

    // To store the intermediate webs discovered during crawl
    Set<String> allSites = new TreeSet<String>();

    ArrayList<String> lstLookupForWebs = new ArrayList<String>();
View Full Code Here

      ListState nextList, ArrayList<String> lstLookupForWebs)
          throws SharepointException {
    globalState.setCurrentWeb(nextWeb);
    final Iterator<WebState> itWebs = globalState.getCircularIterator();
    while (itWebs.hasNext()) {
      WebState ws = itWebs.next(); // Get the first web
      if (ws == null) {
        continue;
      }

      final String webURL = ws.getPrimaryKey();

      // Note: Lookup table maintains keeps track of the links which has
      // been visited till now.
      // This helps to curb the cyclic link problem in which SiteA can
      // have link to SiteB and SiteB having link to SiteA.
      if (lstLookupForWebs.contains(webURL)) {
        continue;
      } else {
        lstLookupForWebs.add(webURL);
      }

      try {
        sharePointClientContext.setSiteURL(webURL);
      } catch (Exception e) {
        LOGGER.log(Level.WARNING, "Exception occurred when trying to set the webUrl [ "
            + webURL + " ] context", e);
        continue;
      }

      if (sharepointClientContext.isUseSPSearchVisibility()) {
        // Even if a web is not crawled due to the SP search visibility,
        // it's reference is kept in the connector's state. This is to
        // avoid unnecessary discovery (and WebState construction) of
        // these webs again and again.
        if (ws.isNoCrawl()) {
          LOGGER.log(Level.WARNING, "Skipping Web URL [ "
              + webURL
              + " ] while crawling because it has been marked for No Crawling on SharePoint. ");
          continue;
        }
      }

      nextWeb = ws;
      LOGGER.config("Crawling site [ " + webURL + " ] ");
      final int currDocCount = nDocuments;
      try {
        // Process the web site, and add the link site info to allSites.
        updateWebStateFromSite(sharePointClientContext, ws, nextList, allSites);

        if (currDocCount == nDocuments) {
          // get Alerts for the web and update webState. The above
          // check is added to reduce the frequency with which
          // getAlerts WS call is made.
          LOGGER.fine("Getting alerts under site [ " + webURL + " ]");
          processAlerts(ws, sharePointClientContext);      
        }       
        ListState listForWeb = ws.lookupList(ws.getPrimaryKey());
        if (listForWeb != null) {
          LOGGER.fine("List State for web [ " + listForWeb.getListURL()
              + " ] is not null. Last Doc from List State is "
              + listForWeb.getLastDocProcessed());
        }
        boolean isFirstBatch = ((listForWeb == null)
            || (listForWeb.getLastDocProcessed() == null));
        // Crawl the site home page and web application policy in the
        // first batch and when a web application policy change is detected.
        if (ws.isWebApplicationPolicyChange()
            || isFirstBatch) {                       
          // Get site data for the web and update webState.       
          LOGGER.fine("Getting landing page data for the site [ " + webURL
              + " ]");
          processSiteData(ws, sharepointClientContext);
View Full Code Here

      if (null == webCrawlInfos) {
        return;
      }
      for (WebCrawlInfo webCrawlInfo : webCrawlInfos) {
        if (webCrawlInfo.isStatus()) {
          WebState webState = webUrlMap.get(webCrawlInfo.getWebKey());
          webState.setWebCrawlInfo(webCrawlInfo);
        } else {
          LOGGER.log(Level.WARNING, "WS encountered problem while fetching the crawl info of one of the web. WS ERROR -> "
              + webCrawlInfo.getError());
        }
      }
View Full Code Here

   * @return instance of {@link WebState}
   */
  public static WebState createWebState(GlobalState globalState,
      SharepointClientContext spContext, String url, int indexOfLastCrawledList)
      throws SharepointException {
    WebState ws = globalState.makeWebState(spContext, url);
    ws.setPrimaryKey(url);
    DateTime dt = new DateTime();
    ws.setInsertionTime(dt);
    ListState ls = getListState("http://testcase.com:22819/tempSite/Lists/Announcements/AllItems.aspx", 10, 156790, "{872819FC-6FA7-42AF-A71F-DCF7B8CD1E4A}", ws);
    ListState ls2 = getListState("http://testcase.com:22819/tempSite2/Lists/Announcements/AllItems.aspx", 11, 122790, "{872819FC-6FA7-42AF-A71F-DCF7B8CD1G4A}", ws);
    ListState ls3 = getListState("http://testcase.com/tempSite2/Lists/Announcements/AllItems.aspx", 12, 157790, "{872819FC-6FA7-42AF-A71F-DCF7B8CD1T4A}", ws);
    ListState ls4 = getListState("http://testcase.com/tempSite4/Lists/Announcements/AllItems.aspx", 22, 158790, "{872819FC-6FA7-42AF-A71F-DCF7B8RT1T4A}", ws);

    ws.AddOrUpdateListStateInWebState(ls, ls.getLastMod());
    ws.AddOrUpdateListStateInWebState(ls2, ls2.getLastMod());
    ws.AddOrUpdateListStateInWebState(ls3, ls3.getLastMod());
    ws.AddOrUpdateListStateInWebState(ls4, ls4.getLastMod());

    switch (indexOfLastCrawledList) {
    case 1:
      ws.setLastCrawledList(ls);
      ws.setCurrentList(ls);
      break;
    case 2:
      ws.setLastCrawledList(ls2);
      ws.setCurrentList(ls2);
      break;
    case 3:
      ws.setLastCrawledList(ls3);
      ws.setCurrentList(ls3);
      break;
    case 4:
      ws.setLastCrawledList(ls4);
      ws.setCurrentList(ls4);
      break;
    }

    ws.setWebUrl("http://testcase.com:22819/sites/testissue85");

    return ws;
  }
View Full Code Here

   * @since 2.4
   */
  private SPDocumentList traverse(final SharepointClient sharepointClient,
      boolean checkForPendingDocs) {
    if (checkForPendingDocs) {
      WebState ws = globalState.getLastCrawledWeb();
      ListState listState = globalState.getLastCrawledList();
      globalState.setCurrentWeb(ws);
      if (null != ws) {
        ws.setCurrentList(listState);
      }
    }

    // CurrentWeb and CurrentList will define the starting point for
    // the traversal/scan-of-crawl-queues. In case of list, all the
    // lists before CurrentList will not be scanned.
    // TODO: The same is to be done for webs also so that only the relevant
    // WebStates
    // gets scanned. It does not make sense to traverse all the WebStates
    // all the time. Precisely, what we need here is an intelligent
    // liniarIterator instead of a dumb circularIterator.

    SPDocumentList rsAll = null;
    int sizeSoFar = 0;
    LOGGER.log(Level.INFO, "Checking crawl queues of all ListStates/WebStates for pending docs.");
    for (final Iterator<WebState> iter = globalState.getCircularIterator(); iter.hasNext()
        && (sizeSoFar < hint);) {
      final WebState webState = iter.next();
      globalState.setCurrentWeb(webState);
      SPDocumentList rs = null;
      try {
        rs = sharepointClient.traverse(globalState, webState, sizeSoFar, checkForPendingDocs);
      } catch (final Exception e) {
        LOGGER.log(Level.WARNING, "Exception occured while traversing web URL [ "
            + webState.getWebUrl() + " ] ", e);
      } catch (final Throwable t) {
        LOGGER.log(Level.WARNING, "Error occured while traversing web URL [ "
            + webState.getWebUrl() + " ] ", t);
      }
      if ((rs != null) && (rs.size() > 0)) {
        LOGGER.log(Level.INFO, rs.size()
            + " document(s) to be sent from web URL [ " + webState.getWebUrl()
            + " ]. ");
        if (rsAll == null) {
          rsAll = rs;
        } else {
          rsAll.addAll(rs);
        }
        sizeSoFar = rsAll.size();
      } else {
        LOGGER.log(Level.CONFIG, "No documents to be sent from web [ "
            + webState.getWebUrl() + " ] ");
      }
    }
    return rsAll;
  }
View Full Code Here

      throws SharepointException {
    GlobalState globalState = new GlobalState(clientFactory,
        TestConfiguration.googleConnectorWorkDir, TestConfiguration.feedType);

    if (null != Site1_URL && Site1_URL.trim().length() > 0) {
      WebState webstate1 = globalState.makeWebState(sharepointClientContext, TestConfiguration.Site1_URL);
      if (null != Site1_List1_URL && Site1_List1_URL.trim().length() > 0) {
        ListState liststate11 = new ListState(Site1_List1_GUID, "inTitle",
            SPConstants.DOC_LIB, Calendar.getInstance(), "", Site1_List1_URL,
            webstate1);
        List<SPDocument> docs = new ArrayList<SPDocument>();
        if (null != Site1_List1_Item1_URL
            && Site1_List1_Item1_URL.trim().length() > 0) {
          SPDocument doc = new SPDocument("111", Site1_List1_Item1_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        if (null != Site1_List1_Item2_URL
            && Site1_List1_Item2_URL.trim().length() > 0) {
          SPDocument doc = new SPDocument("112", Site1_List1_Item2_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        liststate11.setCrawlQueue(docs);
        webstate1.AddOrUpdateListStateInWebState(liststate11, new DateTime());
      }
      if (null != Site1_List2_URL && Site1_List2_URL.trim().length() > 0) {
        ListState liststate12 = new ListState(Site1_List2_GUID, "inTitle",
            SPConstants.GENERIC_LIST, Calendar.getInstance(), "",
            Site1_List2_URL, webstate1);
        List<SPDocument> docs = new ArrayList<SPDocument>();
        if (null != Site1_List2_Item1_URL
            && Site1_List2_Item1_URL.trim().length() > 0) {
          SPDocument doc = new SPDocument("121", Site1_List2_Item1_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        if (null != Site1_List2_Item2_URL
            && Site1_List2_Item2_URL.trim().length() > 0) {
          SPDocument doc = new SPDocument("122", Site1_List2_Item2_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        liststate12.setCrawlQueue(docs);
        webstate1.AddOrUpdateListStateInWebState(liststate12, new DateTime());
      }
    }

    if (null != Site2_URL && Site2_URL.trim().length() > 0) {
      WebState webstate2 = globalState.makeWebState(sharepointClientContext, TestConfiguration.Site2_URL);
      if (null != Site2_List1_URL && Site2_List1_URL.trim().length() > 0) {
        ListState liststate21 = new ListState(Site2_List1_GUID, "inTitle",
            SPConstants.DOC_LIB, Calendar.getInstance(), "", Site2_List1_URL,
            webstate2);
        List<SPDocument> docs = new ArrayList<SPDocument>();
        if (null != Site2_List1_Item1_URL) {
          SPDocument doc = new SPDocument("211", Site2_List1_Item1_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        if (null != Site2_List1_Item2_URL) {
          SPDocument doc = new SPDocument("212", Site2_List1_Item2_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        liststate21.setCrawlQueue(docs);
        webstate2.AddOrUpdateListStateInWebState(liststate21, new DateTime());
      }
      if (null != Site2_List2_URL && Site2_List2_URL.trim().length() > 0) {
        ListState liststate22 = new ListState(Site1_List2_GUID, "inTitle",
            SPConstants.GENERIC_LIST, Calendar.getInstance(), "",
            Site2_List2_URL, webstate2);
        List<SPDocument> docs = new ArrayList<SPDocument>();
        if (null != Site2_List2_Item1_URL) {
          SPDocument doc = new SPDocument("221", Site2_List2_Item1_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        if (null != Site2_List2_Item2_URL) {
          SPDocument doc = new SPDocument("222", Site2_List2_Item2_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        liststate22.setCrawlQueue(docs);
        webstate2.AddOrUpdateListStateInWebState(liststate22, new DateTime());
      }
    }

    if (null != Site3_URL && Site3_URL.trim().length() > 0) {
      WebState webstate3 = globalState.makeWebState(sharepointClientContext, TestConfiguration.Site3_URL);
      if (null != Site3_List1_URL && Site3_List1_URL.trim().length() > 0) {
        ListState liststate31 = new ListState(Site3_List1_GUID, "inTitle",
            SPConstants.DOC_LIB, Calendar.getInstance(), "", Site3_List1_URL,
            webstate3);
        List<SPDocument> docs = new ArrayList<SPDocument>();
        if (null != Site3_List1_Item1_URL) {
          SPDocument doc = new SPDocument("311", Site3_List1_Item1_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        if (null != Site3_List1_Item2_URL) {
          SPDocument doc = new SPDocument("312", Site3_List1_Item2_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        liststate31.setCrawlQueue(docs);
        webstate3.AddOrUpdateListStateInWebState(liststate31, new DateTime());
      }
      if (null != Site3_List2_URL && Site3_List2_URL.trim().length() > 0) {
        ListState liststate32 = new ListState(Site3_List2_GUID, "inTitle",
            SPConstants.GENERIC_LIST, Calendar.getInstance(), "",
            Site3_List2_URL, webstate3);
        List<SPDocument> docs = new ArrayList<SPDocument>();
        if (null != Site3_List2_Item1_URL) {
          SPDocument doc = new SPDocument("321", Site3_List2_Item1_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        if (null != Site3_List2_Item2_URL) {
          SPDocument doc = new SPDocument("322", Site2_List2_Item2_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        liststate32.setCrawlQueue(docs);
        webstate3.AddOrUpdateListStateInWebState(liststate32, new DateTime());
      }
    }
    if (null != Site4_URL && Site4_URL.trim().length() > 0) {
      WebState webstate4 = globalState.makeWebState(sharepointClientContext, TestConfiguration.Site4_URL);
      if (null != Site4_List1_URL && Site4_List1_URL.trim().length() > 0) {
        ListState liststate41 = new ListState(Site4_List1_GUID, "inTitle",
            SPConstants.DOC_LIB, Calendar.getInstance(), "", Site4_List1_URL,
            webstate4);
        List<SPDocument> docs = new ArrayList<SPDocument>();
        if (null != Site4_List1_Item1_URL) {
          SPDocument doc = new SPDocument("411", Site4_List1_Item1_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        if (null != Site4_List1_Item2_URL) {
          SPDocument doc = new SPDocument("412", Site4_List1_Item2_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        liststate41.setCrawlQueue(docs);
        webstate4.AddOrUpdateListStateInWebState(liststate41, new DateTime());
      }
      if (null != Site4_List2_URL && Site4_List2_URL.trim().length() > 0) {
        ListState liststate42 = new ListState(Site4_List2_GUID, "inTitle",
            SPConstants.GENERIC_LIST, Calendar.getInstance(), "",
            Site4_List2_URL, webstate4);
        List<SPDocument> docs = new ArrayList<SPDocument>();
        if (null != Site4_List2_Item1_URL) {
          SPDocument doc = new SPDocument("421", Site4_List2_Item1_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        if (null != Site4_List2_Item2_URL) {
          SPDocument doc = new SPDocument("422", Site2_List2_Item2_URL,
              Calendar.getInstance(), ActionType.ADD);
          docs.add(doc);
        }
        liststate42.setCrawlQueue(docs);
        webstate4.AddOrUpdateListStateInWebState(liststate42, new DateTime());
      }
    }
    return globalState;
  }
View Full Code Here

    System.out.println("Creating test List ...");
    final SiteDataHelper siteData = new SiteDataHelper(this.sharepointClientContext);

    final GlobalState state = new GlobalState(clientFactory,
        TestConfiguration.googleConnectorWorkDir, FeedType.CONTENT_FEED);
    WebState ws = state.makeWebState(sharepointClientContext, TestConfiguration.sharepointUrl);

    List<ListState> listCollection = siteData.getNamedLists(ws);
    assertNotNull(listCollection);

    for (ListState baseList : listCollection) {
View Full Code Here

TOP

Related Classes of com.google.enterprise.connector.sharepoint.state.WebState

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.