Package com.google.enterprise.connector.adgroups

Source Code of com.google.enterprise.connector.adgroups.AdGroupsTraversalManager

// Copyright 2012 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.enterprise.connector.adgroups;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.enterprise.connector.adgroups.AdConstants.Method;
import com.google.enterprise.connector.adgroups.AdDbUtil.Query;
import com.google.enterprise.connector.spi.DocumentList;
import com.google.enterprise.connector.spi.RepositoryException;
import com.google.enterprise.connector.spi.TraversalManager;
import com.google.enterprise.connector.util.EmptyDocumentList;

import java.sql.SQLException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.ResourceBundle;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.naming.InterruptedNamingException;
import javax.naming.NamingException;

public class AdGroupsTraversalManager implements TraversalManager {
  private static final Logger LOGGER =
      Logger.getLogger(AdGroupsTraversalManager.class.getName());

  private ArrayList<AdServer> servers;
  private Set<AdEntity> wellKnownEntities;
  private AdDbUtil db;
  private long fullRecrawlThresholdInMillis = 24 * 60 * 60 * 1000;
  private final String databaseType;

  public AdGroupsTraversalManager(AdGroupsConnector connector)
      throws RepositoryException {
    servers = new ArrayList<AdServer>();
    wellKnownEntities = new HashSet<AdEntity>();
   
    databaseType = connector.getDatabaseType();
    LOGGER.info("Connector Database Type = " + connector.getDatabaseType());

    db = new AdDbUtil(connector.getDataSource(), databaseType);

    ResourceBundle rb = ResourceBundle.getBundle(
        getClass().getPackage().getName() + ".wellknowidentifiers");
    Enumeration<String> keys = rb.getKeys();
    while (keys.hasMoreElements()) {
      String key = keys.nextElement();
      wellKnownEntities.add(new AdEntity(key, rb.getString(key)));
    }

    LOGGER.info("Adding servers");
    // I will leave this as undocumented hack to get crawling of multiple ADs
    // with one instance of the connector until better solution is created
    // via the configuration
    String[] methods = connector.getMethod().split("\\|");
    String[] hostnames = connector.getHostname().split("\\|");
    String[] ports = connector.getPort().split("\\|");
    String[] principals = connector.getPrincipal().split("\\|");
    String[] passwords = connector.getPassword().split("\\|");

    for (int i = 0; i < hostnames.length; ++i) {
      if (hostnames[i].trim().length() != 0) {
        AdServer server = new AdServer(
            methods[i].equals("SSL") ? Method.SSL : Method.STANDARD,
            hostnames[i],
            Integer.parseInt(ports[i]),
            principals[i],
            passwords[i]);
        try {
          server.initialize();
        } catch (NamingException ex) {
          throw new RepositoryException(ex);
        }
        servers.add(server);
      }
    }
  }

  private long getLastCrawledChange(AdServer server, boolean resetTraversal) {
    List<HashMap<String, Object>> dbServers;
    try {
      dbServers =
          db.select(AdDbUtil.Query.SELECT_SERVER, server.getSqlParams());
    } catch (SQLException e) {
      LOGGER.log(Level.INFO, "Other connector is currently crawling the dn ["
          + server.getDn() + "]", e);
      return -1;
    }

    if (resetTraversal) {
      LOGGER.info(server + "Start traversal requested. Performing full crawl.");
      return 0;
    }

    if (dbServers.size() == 0) {
      LOGGER.fine(server +
          "Connected to Directory Controller [" + server.getDsServiceName()
          + "] for the first time. Performing full crawl.");
      return 0;
    } else if (dbServers.size() == 1) {
      HashMap<String, Object> dbServer = dbServers.get(0);
      //TODO: investigate impact of timezones
      Timestamp lastFullSync =
          (Timestamp) dbServer.get(AdConstants.DB_LASTFULLSYNC);
      server.setLastFullSync(lastFullSync);
      if ((new Date().getTime()) - lastFullSync.getTime()
          > fullRecrawlThresholdInMillis && Calendar.getInstance().get(
              Calendar.DAY_OF_WEEK) == Calendar.SUNDAY) {
        LOGGER.info(
            server + "Full recrawl threshold reached. Performing full recrawl");
        return 0;
      }

      if (dbServer.get(AdConstants.DB_DSSERVICENAME)
          .equals(server.getDsServiceName())) {
        if (dbServer.get(AdConstants.DB_INVOCATIONID)
            .equals(server.getInvocationID())) {
          long last = ((Number) dbServer.get(
              AdConstants.DB_HIGHESTCOMMITTEDUSN)).longValue();
          LOGGER.info(server + "Last crawled change [" + last
              + "]. Last change on the server ["
              + server.getHighestCommittedUSN() + "]");
          return last;
        } else {
          LOGGER.warning("Directory Controller [" + server.getDsServiceName()
              + "] has been restored from backup. Performing full recrawl.");
        }
      } else {
        LOGGER.warning("Directory Controller changed!!! Connected to ["
            + dbServer.get(AdConstants.DB_INVOCATIONID) + "], but expected ["
            + server.getDsServiceName()
            + "]. Not able to perform partial updates - performing full "
            + "recrawl. Consider configuring AD groups connector to connect "
            + "directly to FQDN address of one domain controller for partial "
            + "updates support.");
      }
    } else {
      LOGGER.warning("Multiple servers for dn [" + server.getDn() + "] found");
      // TODO: think what to do in this case - high availability
    }

    return 0;
  }

  private void run(boolean resetTraversal) {
    try {
      db.executeBatch(AdDbUtil.Query.MERGE_ENTITIES, wellKnownEntities);
      Map<String, Object> wellKnownServer = new HashMap<String, Object>();
      wellKnownServer.put(AdConstants.DB_DN, "DC=NT Authority");
      wellKnownServer.put(AdConstants.DB_DSSERVICENAME, "S-1-5");
      wellKnownServer.put(AdConstants.DB_INVOCATIONID, "S-1-5");
      wellKnownServer.put(AdConstants.DB_HIGHESTCOMMITTEDUSN, "0");
      wellKnownServer.put(AdConstants.DB_NETBIOSNAME, "NT AUTHORITY");
      wellKnownServer.put(AdConstants.DB_SID, "S-1-5");
      wellKnownServer.put(AdConstants.DB_DNSROOT, "");
      db.execute(Query.UPDATE_SERVER, wellKnownServer);

      wellKnownServer.put(AdConstants.DB_DN, "DC=BUILTIN");
      wellKnownServer.put(AdConstants.DB_DSSERVICENAME, "S-1-5-32");
      wellKnownServer.put(AdConstants.DB_INVOCATIONID, "S-1-5-32");
      wellKnownServer.put(AdConstants.DB_HIGHESTCOMMITTEDUSN, "0");
      wellKnownServer.put(AdConstants.DB_NETBIOSNAME, "BUILTIN");
      wellKnownServer.put(AdConstants.DB_SID, "S-1-5-32");
      wellKnownServer.put(AdConstants.DB_DNSROOT, "");
      db.execute(Query.UPDATE_SERVER, wellKnownServer);
    } catch (SQLException e) {
      LOGGER.log(Level.WARNING, "Merging of well known identifiers failed", e);
    }

    LOGGER.info("Starting AD crawl");

    for (AdServer server : servers) {
      try {
        server.initialize();

        String ldapQuery;
        String tombstoneQuery = null;
        long last = getLastCrawledChange(server, resetTraversal);
        if (last == -1) {
          LOGGER.info(server + "Skipping crawl");
          continue;
        } else if (last == 0) {
          LOGGER.info(server + "Full recrawl start");
          ldapQuery = AdConstants.LDAP_QUERY;
        } else if (last == server.getHighestCommittedUSN()){
          LOGGER.info(server + "No updates on the server");
          continue;
        } else {
          LOGGER.info(server + "Partial recrawl start");
          ldapQuery = String.format(AdConstants.PARTIAL_LDAP_QUERY, last + 1);
          tombstoneQuery = String.format(
              AdConstants.TOMBSTONE_QUERY, last + 1);
        }

        LOGGER.info(server + "Querying server " + ldapQuery);
        Set<AdEntity> entities = server.search(ldapQuery, false, new String[] {
            AdConstants.ATTR_USNCHANGED,
            AdConstants.ATTR_SAMACCOUNTNAME,
            AdConstants.ATTR_OBJECTSID,
            AdConstants.ATTR_OBJECTGUID,
            AdConstants.ATTR_UPN,
            AdConstants.ATTR_PRIMARYGROUPID,
            AdConstants.ATTR_MEMBER});

        // list of DNs to delete from database during incremental traversal
        Set<AdEntity> tombstones;
        // list of tombstone entities to remove during full traversal
        List<HashMap<String, Object>> tombstonesInDb =
            new ArrayList<HashMap<String, Object>>();
        Set<AdEntity> entitiesToUpdate;
        boolean firstTimeForDomain = false;

        // when performing full recrawl we retrieve all entities from DB
        // and delete everything that was not rediscovered in AD
        int numberOfTombstones;
        if (last == 0) {
          LOGGER.info(server + "Retrieving all existing objects from DB.");
          tombstones = ImmutableSet.of();
          List<HashMap<String, Object>> dbEntities = db.select(
              Query.SELECT_ALL_ENTITIES_BY_SID, server.getSqlParams());
          Map<String, HashMap<String, Object>> dns =
              new HashMap<String, HashMap<String, Object>>();
          for (HashMap<String, Object> dbEntity : dbEntities) {
            dns.put((String) dbEntity.get(AdConstants.DB_DN), dbEntity);
          }
          firstTimeForDomain = dns.isEmpty();
          if (!firstTimeForDomain) {
            for (AdEntity e : entities) {
              dns.remove(e.getDn());
            }
            tombstonesInDb.addAll(dns.values());
          }
          numberOfTombstones = tombstonesInDb.size();
        } else {
          // when performing partial crawl we ask the LDAP to list removed
          // objects - by default works only for members of Domain Admins group
          // http://support.microsoft.com/kb/892806
          tombstones = server.search(tombstoneQuery, true,
              new String[] {AdConstants.ATTR_OBJECTGUID,
                  AdConstants.ATTR_SAMACCOUNTNAME});
          numberOfTombstones = tombstones.size();
          tombstonesInDb = ImmutableList.of();
        }

        LOGGER.info(server + "Found " + entities.size()
            + " entities to update in the database and " + numberOfTombstones
            + " entities to remove.");

        if (entities.size() > 0 || numberOfTombstones > 0) {
          // Remove all tombstones from the database
          if (last == 0) {
            LOGGER.log(Level.INFO,
                "{0} update 1/6 - Removing tombstones from database ({1})",
                new Object[] {server, tombstonesInDb.size()});
            db.executeBatch(
                Query.DELETE_MEMBERSHIPS_BY_ENTITYID, tombstonesInDb);
            db.executeBatch(Query.DELETE_ENTITY_BY_ENTITYID, tombstonesInDb);
          } else {
            LOGGER.log(Level.INFO,
                "{0} update 1/6 - Removing tombstones from database ({1})",
                new Object[] {server, tombstones.size()});
            db.executeBatch(Query.DELETE_MEMBERSHIPS, tombstones);
            db.executeBatch(Query.DELETE_ENTITY, tombstones);
          }

          // Check for each new/updated entity if it wasn't deleted and
          // recreated with the same name.
          LOGGER.info(
              server + "update 2/6 - Checking resurrected entities");
          if (!firstTimeForDomain) {
            entitiesToUpdate = new HashSet<AdEntity>();
            for (AdEntity e : entities) {
              // Check for duplicates with different GUID than e.
              List<HashMap<String, Object>> dbEntities =
                  db.select(Query.SELECT_ENTITY_BY_DN_AND_NOT_GUID,
                      e.getSqlParams());
              if (dbEntities.isEmpty()) {
                // new entity
                entitiesToUpdate.add(e);
              } else if (dbEntities.size() == 1) {
                HashMap<String, Object> dbEntity = dbEntities.get(0);
                if (!isSameEntity(e, dbEntity)) {
                  entitiesToUpdate.add(e);
                  // If entities are not same, check for Object GUID for
                  // resurrected entity.
                  if (!dbEntity.get(AdConstants.DB_OBJECTGUID).equals(
                      e.getSqlParams().get(AdConstants.DB_OBJECTGUID))) {
                    // Resurrected entity
                    LOGGER.info("Resurrected entity [" + e + "] discovered.");
                    db.execute(Query.DELETE_MEMBERSHIPS, e.getSqlParams());                 
                    LOGGER.fine("Deleting old version with objectguid ["
                        + dbEntity.get(AdConstants.DB_OBJECTGUID) + "]");
                    db.execute(
                        Query.DELETE_ENTITY, ImmutableMap.<String, Object>of(
                            AdConstants.DB_OBJECTGUID,
                            dbEntity.get(AdConstants.DB_OBJECTGUID)));                   
                  }
                }
              } else {
                // Multiple DB entities discovered. This is unexpexcted.
                for (HashMap<String, Object> dbEntity : dbEntities) {                 
                  LOGGER.fine("Duplicate entity [" + e + "] discovered.");
                  db.execute(Query.DELETE_MEMBERSHIPS, e.getSqlParams());
                 
                  LOGGER.fine("Deleting old version with objectguid ["
                      + dbEntity.get(AdConstants.DB_OBJECTGUID) + "]");
                  db.execute(
                      Query.DELETE_ENTITY, ImmutableMap.<String, Object>of(
                          AdConstants.DB_OBJECTGUID,
                          dbEntity.get(AdConstants.DB_OBJECTGUID)));
                }
                // Add entity to reprocess.
                entitiesToUpdate.add(e);
              }
            }
          } else {
            entitiesToUpdate = entities;
          }

          // Merge entities discovered on current server into the database
          LOGGER.info(
              server + "update 3/6 - Merging AD Entities into database ("
              + entitiesToUpdate.size() + ")");
          Query entityQuery =
              firstTimeForDomain ? Query.ADD_ENTITIES : Query.MERGE_ENTITIES;
          db.executeBatch(entityQuery, entitiesToUpdate);

          // Perform bulk processing only if its full traversal.
          boolean bulkProcessing =
              databaseType.equalsIgnoreCase("SQLSERVER") && (last == 0);
          if (bulkProcessing) {
            // Merge group memberships into the database
            LOGGER.info(server
                + "update 4A/6 - Inserting relationships into database.");
            // Merge group memberships into the database
            db.mergeMemberships(entities, !bulkProcessing);  
            LOGGER.info(
                server + "update 4B/6 - Match entities.");
            db.execute(Query.MATCH_ENTITIES, null);
            LOGGER.info(server
                + "update 4C/6 - Resolving primary groups for entities.");
            db.execute(Query.RESOLVE_PRIMARY_GROUPS, null);
          } else {
            // Merge group memberships into the database
            LOGGER.info(server
                + "update 4/6 - Inserting relationships into database.");
            // Merge group memberships into the database
            db.mergeMemberships(entities, !bulkProcessing);
            // Since H2 database is single threaded, resolve
            // primary groups one at a time to avoid blocking
            // authentication and group resolution calls during traversal.
            for (AdEntity e : entities) {
              // If we are user merge the primary group
              if (!e.isGroup()) {
                Long groupId = db.getEntityId(
                    Query.FIND_PRIMARY_GROUP, e.getSqlParams());
                Long memberId = db.getEntityId(
                    Query.FIND_ENTITY, e.getSqlParams());

                // due to exception during last traversal primary group might
                // not exist in the DB yet
                if (groupId != null) {
                  Map<String, Object> map = new HashMap<String, Object>(3);
                  map.put(AdConstants.DB_GROUPID, groupId);
                  map.put(AdConstants.DB_MEMBERDN, e.getDn());
                  map.put(AdConstants.DB_MEMBERID, memberId);
                  db.execute(Query.MERGE_MEMBERSHIP, map);
                }
              }
            }
          }

          // Update the server information
          if (last == 0) {
            server.setLastFullSync(new Timestamp(new Date().getTime()));
          }
          LOGGER.info(server + "update 5/6 - Updating Domain controller info.");
          db.execute(Query.UPDATE_SERVER, server.getSqlParams());

          LOGGER.info(server + "update 6/6 - Domain information updated.");
        } else {
          LOGGER.info(server + "No updates found.");
          db.execute(Query.UPDATE_SERVER, server.getSqlParams());
        }
      } catch (SQLException e) {
        LOGGER.log(Level.WARNING, "Merging data into database failed\n:", e);     
      } catch (InterruptedNamingException e) {
        LOGGER.log(Level.INFO, "Thread was interrupted, exiting AD crawl.", e);
        break;
      } catch (NamingException e) {
        LOGGER.log(Level.WARNING, "Connecting to the domain ["
            + server.getnETBIOSName() + "] failed\n:", e);
      }
    }
  }

  private boolean isSameEntity(AdEntity e, HashMap<String, Object> dbEntity) {
    Map<String, Object> adEntity = e.getSqlParams();
    for (String key : dbEntity.keySet()) {
      String dbValue = "" + dbEntity.get(key);
      String adValue = "" + adEntity.get(key);
      if (!dbValue.equals(adValue)) {
        LOGGER.log(Level.FINE,
            "Detected difference on key {0} value from db {1} value from AD {2} for Entity {3}",
            new Object[] {key, dbValue, adValue, e});
        return false;
      }
    }
    return true;
  }

  @Override
  public DocumentList resumeTraversal(String checkpoint)
      throws RepositoryException {
    run(false);
    return null;
  }

  @Override
  public void setBatchHint(int batchHint) throws RepositoryException {
    db.setBatchHint(batchHint / 10);
  }

  @Override
  public DocumentList startTraversal() throws RepositoryException {
    run(true);
    // Force later batches to call resumeTraversal.
    return new EmptyDocumentList(AdConstants.CHECKPOINT_VALUE);
  }
}
TOP

Related Classes of com.google.enterprise.connector.adgroups.AdGroupsTraversalManager

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.