Package com.sleepycat.je.rep.impl

Source Code of com.sleepycat.je.rep.impl.RepGroupDB$GroupBinding

/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 2002, 2011 Oracle and/or its affiliates.  All rights reserved.
*
*/

package com.sleepycat.je.rep.impl;

import static com.sleepycat.je.rep.NoConsistencyRequiredPolicy.NO_CONSISTENCY;
import static com.sleepycat.je.rep.impl.RepParams.GROUP_NAME;
import static com.sleepycat.je.rep.impl.RepParams.NODE_NAME;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.logging.Logger;

import com.sleepycat.bind.tuple.StringBinding;
import com.sleepycat.bind.tuple.TupleBinding;
import com.sleepycat.bind.tuple.TupleInput;
import com.sleepycat.bind.tuple.TupleOutput;
import com.sleepycat.je.Cursor;
import com.sleepycat.je.CursorConfig;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.DatabaseNotFoundException;
import com.sleepycat.je.DbInternal;
import com.sleepycat.je.Durability;
import com.sleepycat.je.Durability.ReplicaAckPolicy;
import com.sleepycat.je.Durability.SyncPolicy;
import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.LockConflictException;
import com.sleepycat.je.LockMode;
import com.sleepycat.je.OperationStatus;
import com.sleepycat.je.ReplicaConsistencyPolicy;
import com.sleepycat.je.Transaction;
import com.sleepycat.je.TransactionConfig;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.DbConfigManager;
import com.sleepycat.je.dbi.DbTree;
import com.sleepycat.je.dbi.DbType;
import com.sleepycat.je.rep.InsufficientAcksException;
import com.sleepycat.je.rep.InsufficientReplicasException;
import com.sleepycat.je.rep.NodeType;
import com.sleepycat.je.rep.impl.RepGroupImpl.BarrierState;
import com.sleepycat.je.rep.impl.node.NameIdPair;
import com.sleepycat.je.rep.impl.node.RepNode;
import com.sleepycat.je.rep.monitor.GroupChangeEvent.GroupChangeType;
import com.sleepycat.je.rep.stream.Protocol;
import com.sleepycat.je.rep.txn.MasterTxn;
import com.sleepycat.je.rep.txn.ReadonlyTxn;
import com.sleepycat.je.rep.util.DbResetRepGroup;
import com.sleepycat.je.rep.utilint.HostPortPair;
import com.sleepycat.je.txn.Txn;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.VLSN;

/**
* This class is used to encapsulate all access to the rep group data that is
* present in every replicated JE environment. The rep group data exists
* primarily to support dynamic group membership. Both read and update access
* must be done through the APIs provided by this class.
*
* The database is simply a representation of the RepGroup. Each entry in the
* database represents a node in RepGroup; the key is the String node name, and
* the data is the serialized ReplicationNode.  There is a special entry keyed
* by GROUP_KEY that holds the contents of the RepGroup (excluding the nodes)
* itself.
*
* The database may be modified concurrently by multiple transactions as a
* master processes requests to update it. It may also be accessed by multiple
* overlapping transactions as a Replica replays the rep stream. These updates
* need to be interleaved with operations like getGroup() that create copies of
* the RepGroup instance. To avoid deadlocks, entries in the database are
* accessed in order of ascending key. GROUP_KEY in particular is associated
* with the lowest key value so that it's locked first implicitly as part of
* any iteration and any other modifications to the database must first lock it
* before making changes to the group itself.
*
* An instance of this class is created as part of a replication node and is
* retained for the entire lifetime of that node.
*/
public class RepGroupDB {

    private final RepImpl repImpl;

    /* A convenient, cached empty group. */
    public final RepGroupImpl emptyGroup;

    private final Logger logger;

    /* The key used to store group-wide information in the database. It must
     * be the lowest key in the database, so that it's locked first during
     * database iteration.
     */
    public final static String GROUP_KEY = "$$GROUP_KEY$$";
    public final static DatabaseEntry groupKeyEntry = new DatabaseEntry();

    /* Initialize the entry. */
    static {
        StringBinding.stringToEntry(GROUP_KEY, groupKeyEntry);
    }

    /* The fixed DB ID associated with the internal rep group database. */
    public final static long DB_ID = DbTree.NEG_DB_ID_START - 1;

    /*
     * Number of times to retry for ACKs on the master before returning to
     * to the Replica, which will then again retry on some periodic basis.
     */
    private final static int QUORUM_ACK_RETRIES = 5;

    /* Convenience Durability and Config constants. */
    private final static Durability QUORUM_ACK_DURABILITY =
        new Durability(SyncPolicy.SYNC,
                       SyncPolicy.SYNC,
                       ReplicaAckPolicy.SIMPLE_MAJORITY);

    private final static TransactionConfig QUORUM_ACK =
        new TransactionConfig();

    private final static TransactionConfig NO_ACK = new TransactionConfig();

    /*
     * TODO: Change this when we support true read only transactions.
     */
    final static TransactionConfig READ_ONLY = NO_ACK;

    private final static Durability NO_ACK_DURABILITY =
        new Durability(SyncPolicy.SYNC,
                       SyncPolicy.SYNC,
                       ReplicaAckPolicy.NONE);

    private final static Durability NO_ACK_NO_SYNC_DURABILITY =
        new Durability(SyncPolicy.NO_SYNC,
                       SyncPolicy.NO_SYNC,
                       ReplicaAckPolicy.NONE);

    private final static TransactionConfig NO_ACK_NO_SYNC =
        new TransactionConfig();

    static {
        /* Initialize config constants. */
        QUORUM_ACK.setDurability(QUORUM_ACK_DURABILITY);
        NO_ACK.setDurability(NO_ACK_DURABILITY);
        NO_ACK_NO_SYNC.setDurability(NO_ACK_NO_SYNC_DURABILITY);
    }

    /**
     * Create an instance. Note that the database handle is not initialized at
     * this time, since the state of the node master/replica is not known
     * at the time the replication node (and consequently this instance) is
     * created.
     * @throws IOException
     * @throws DatabaseException
     */
    public RepGroupDB(RepImpl repImpl)
        throws DatabaseException, IOException {

        this.repImpl = repImpl;

        DbConfigManager configManager = repImpl.getConfigManager();
        emptyGroup = new RepGroupImpl(configManager.get(GROUP_NAME));
        logger = LoggerUtils.getLogger(getClass());
    }

    /**
     * Returns all the members that are currently part of the replication
     * group. This method can read the database directly, and can be used when
     * the replicated environment is detached and the RepNode is null. It's for
     * the latter reason that the method reads uncommitted data. In detached
     * mode, there may be transactions on the database that were in progress
     * when the node was last shutdown. These transactions may have locks which
     * will not be released until after the node is re-attached and the
     * replication stream is resumed. Using uncommitted reads avoids use of
     * locks in this circumstance. It's safe to read these records, since the
     * database will eventually be updated with these changes.
     *
     * @param policy determines how current the information must be if it's
     * invoked on a Replica.
     *
     * @return the group object
     * @throws DatabaseException if the object could not be obtained
     */
    public static RepGroupImpl getGroup(RepImpl rImpl,
                                        String groupName,
                                        ReplicaConsistencyPolicy policy)
        throws DatabaseException {

        DatabaseImpl dbImpl = null;
        try {
            dbImpl = rImpl.getGroupDb(policy);
        } catch (DatabaseNotFoundException e) {
            /* Creates a temporary placeholder group for use until the real
             * definition comes over the replication stream as part of the
             * replicated group database.
             */
            return new RepGroupImpl(groupName, true);
        }

        TransactionConfig txnConfig = new TransactionConfig();
        txnConfig.setDurability(READ_ONLY.getDurability());
        txnConfig.setConsistencyPolicy(policy);
        txnConfig.setReadUncommitted(true);

        Txn txn = null;
        try {
            txn = new ReadonlyTxn(rImpl, txnConfig);
            RepGroupImpl group = fetchGroup(groupName, dbImpl, txn);
            /* Correct summary info since we are reading uncommitted data */
            group.makeConsistent();
            txn.commit();
            txn = null;

            return group;
        } finally {
            if (txn != null) {
                txn.abort();
            }
        }
    }

    public RepGroupImpl getGroup(ReplicaConsistencyPolicy policy)
        throws DatabaseException {

        return getGroup(repImpl,
                        repImpl.getConfigManager().get(GROUP_NAME),
                        policy);
    }

    /**
     * All rep group db access uses cursors with eviction disabled.
     */
    static private Cursor makeCursor(DatabaseImpl dbImpl,
                                     Txn txn,
                                     CursorConfig cursorConfig) {
        Cursor cursor = DbInternal.makeCursor(dbImpl,
                                              txn,
                                              cursorConfig);
        DbInternal.getCursorImpl(cursor).setAllowEviction(false);
        return cursor;
    }

    /**
     * Returns a representation of the nodes of the group stored in the
     * database, using the txn and handles that were passed in.
     */
    private static RepGroupImpl fetchGroup(String groupName,
                                           DatabaseImpl dbImpl,
                                           Txn txn)
        throws DatabaseException {

        final DatabaseEntry keyEntry = new DatabaseEntry();
        final DatabaseEntry value = new DatabaseEntry();
        final NodeBinding miBinding = new NodeBinding();
        final GroupBinding groupBinding = new GroupBinding();

        RepGroupImpl group = null;
        Map <Integer, RepNodeImpl> nodes =
            new HashMap<Integer, RepNodeImpl>();
        final CursorConfig cursorConfig = new CursorConfig();
        cursorConfig.setReadCommitted(true);

        Cursor mcursor = null;

        try {
            mcursor = makeCursor(dbImpl, txn, cursorConfig);
            while (mcursor.getNext(keyEntry, value, LockMode.DEFAULT) ==
                   OperationStatus.SUCCESS) {

                final String key = StringBinding.entryToString(keyEntry);

                if (GROUP_KEY.equals(key)) {
                    group = groupBinding.entryToObject(value);
                    if (!group.getName().equals(groupName)) {
                        throw EnvironmentFailureException.unexpectedState
                            ("The argument: " + groupName +
                             " does not match the expected group name: " +
                             group.getName());
                    }
                } else {
                    final RepNodeImpl mi = miBinding.entryToObject(value);
                    nodes.put(mi.getNameIdPair().getId(), mi);
                }
            }
            if (group == null) {
                throw EnvironmentFailureException.unexpectedState
                    ("Group key: " + GROUP_KEY + " is missing");
            }
            group.setNodes(nodes);
            return group;
        } finally {
            if (mcursor != null) {
                mcursor.close();
            }
        }
    }

    /**
     * Ensures that information about this node, the current master is in the
     * member database. If it isn't, enter it into the database. If the
     * database does not exist, create it as well.
     *
     * Note that this overloading is only used by a node that is the master.
     *
     * @throws DatabaseException
     */
    public void addFirstNode()
        throws DatabaseException {

        DbConfigManager configManager = repImpl.getConfigManager();
        String groupName = configManager.get(GROUP_NAME);
        String nodeName = configManager.get(NODE_NAME);

        DatabaseImpl groupDbImpl = repImpl.createGroupDb();

        /* setup the group information as data. */
        GroupBinding groupBinding = new GroupBinding();
        RepGroupImpl repGroup =  new RepGroupImpl(groupName);
        DatabaseEntry groupEntry = new DatabaseEntry();
        groupBinding.objectToEntry(repGroup, groupEntry);

        /* Create the common group entry. */
        TransactionConfig txnConfig = new TransactionConfig();
        txnConfig.setDurability(NO_ACK.getDurability());
        txnConfig.setConsistencyPolicy(NO_CONSISTENCY);
        Txn txn = null;
        Cursor cursor = null;
        try {
            txn = new MasterTxn(repImpl,
                                txnConfig,
                                repImpl.getNameIdPair());

            cursor = makeCursor(groupDbImpl, txn, CursorConfig.DEFAULT);
            OperationStatus status = cursor.put(groupKeyEntry, groupEntry);
            if (status != OperationStatus.SUCCESS) {
                throw EnvironmentFailureException.unexpectedState
                    ("Couldn't write first group entry " + status);
            }
            cursor.close();
            cursor = null;
            txn.commit();
            txn = null;
        } finally {
            if (cursor != null) {
                cursor.close();
            }

            if (txn != null) {
                txn.abort();
            }
        }

        ensureMember(new RepNodeImpl(nodeName,
                                     repImpl.getHostName(),
                                     repImpl.getPort()));
    }

    /**
     * Ensures that the membership info for the replica is in the database. A
     * call to this method is initiated by the master as part of the
     * feeder/replica handshake, where the replica provides membership
     * information as part of the handshake protocol. The membership database
     * must already exist, with the master in it, when this method is invoked.
     *
     * @param membershipInfo provided by the replica
     *
     * @throws InsufficientReplicasException upon failure of 2p member update
     * @throw  InsufficientAcksException upon failure of 2p member update
     * @throws DatabaseException when the membership info could not be entered
     * into the membership database.
     */
    public void ensureMember(Protocol.NodeGroupInfo membershipInfo)
        throws InsufficientReplicasException,
               InsufficientAcksException,
               DatabaseException {

        ensureMember(new RepNodeImpl(membershipInfo));
    }

    void ensureMember(RepNodeImpl ensureNode)
        throws DatabaseException {

        DatabaseImpl groupDbImpl;
        try {
            groupDbImpl = repImpl.getGroupDb();
        } catch (DatabaseNotFoundException e) {
            /* Should never happen. */
            throw EnvironmentFailureException.unexpectedException(e);
        }

        DatabaseEntry nodeNameKey = new DatabaseEntry();
        StringBinding.stringToEntry(ensureNode.getName(), nodeNameKey);

        DatabaseEntry value = new DatabaseEntry();
        final RepGroupDB.NodeBinding mib = new RepGroupDB.NodeBinding();

        Txn txn = null;
        Cursor cursor = null;
        try {
            txn = new ReadonlyTxn(repImpl, NO_ACK);
            CursorConfig config = new CursorConfig();
            config.setReadCommitted(true);
            cursor = makeCursor(groupDbImpl, txn, config);

            OperationStatus status =
                cursor.getSearchKey(nodeNameKey, value, null);
            if (status == OperationStatus.SUCCESS) {
                /* Let's see if the entry needs updating. */
                RepNodeImpl miInDb = mib.entryToObject(value);
                if (miInDb.equivalent(ensureNode)) {
                    if (miInDb.isQuorumAck()) {
                        /* Present, matched and acknowledged. */
                        return;
                    }
                    ensureNode.getNameIdPair().update(miInDb.getNameIdPair());
                    /* Not acknowledged, retry the update. */
                } else {
                    /* Present but not equivalent. */
                    LoggerUtils.warning(logger, repImpl,
                                        "Incompatible node descriptions. " +
                                        "Membership database definition: " +
                                        miInDb.toString() +
                                        " Transient definition: " +
                                        ensureNode.toString());
                    throw EnvironmentFailureException.unexpectedState
                        ("Incompatible node descriptions for node ID: " +
                         ensureNode.getNodeId());
                }
                LoggerUtils.info(logger, repImpl,
                                 "Present but not ack'd node: " +
                                 ensureNode.getNodeId() +
                                 " ack status: " + miInDb.isQuorumAck());
            }
            cursor.close();
            cursor = null;
            txn.commit();
            txn = null;
        } finally {
            if (cursor != null) {
                cursor.close();
            }

            if (txn != null) {
                txn.abort();
            }

        }
        createMember(ensureNode);

        /* Refresh group and Fire an ADD GroupChangeEvent. */
        refreshGroupAndNotifyGroupChange
            (ensureNode.getName(), GroupChangeType.ADD);
    }

    private void refreshGroupAndNotifyGroupChange(String nodeName,
                                                  GroupChangeType opType) {
        repImpl.getRepNode().refreshCachedGroup();
        repImpl.getRepNode().getMonitorEventManager().notifyGroupChange
            (nodeName, opType);
    }

    /**
     * Deletes a node from the replication group by marking it as such in the
     * rep group db.
     */
    public void removeMember(RepNodeImpl removeNode) {
        LoggerUtils.info
            (logger, repImpl, "Deleting node: " + removeNode.getName());

        TwoPhaseUpdate twoPhaseUpdate = new TwoPhaseUpdate(removeNode) {

            @Override
            void phase1Body() {
                RepGroupImpl repGroup = fetchGroupObject(txn, groupDbImpl);
                int changeVersion = repGroup.incrementChangeVersion();
                saveGroupObject(txn, repGroup, groupDbImpl);
                node.setChangeVersion(changeVersion);
                node.setRemoved(true);
                saveNodeObject(txn, node, groupDbImpl);
            }
        };

        twoPhaseUpdate.execute();

        /* Refresh group and fire a REMOVE GroupChangeEvent. */
        refreshGroupAndNotifyGroupChange
            (removeNode.getName(), GroupChangeType.REMOVE);

        LoggerUtils.info(logger, repImpl,
                         "Successfully deleted node: " + removeNode.getName());
    }

    /* Add a new rep node into the RepGroupDB. */
    private void createMember(final RepNodeImpl node)
        throws InsufficientReplicasException,
               InsufficientAcksException,
               DatabaseException {

        LoggerUtils.fine
            (logger, repImpl, "Adding node: " + node.getNameIdPair());

        twoPhaseMemberUpdate(node);

        LoggerUtils.info(logger, repImpl,
                         "Successfully added node:" + node.getNameIdPair() +
                         " HostPort = " + node.getHostName() + ": " +
                         node.getPort() + " [" + node.getType() + "]");
    }

    /* Update a current rep node information in the RepGroupDB. */
    public void updateMember(final RepNodeImpl node)
        throws InsufficientReplicasException,
               InsufficientAcksException,
               DatabaseException {

        LoggerUtils.fine
            (logger, repImpl, "Updating node: " + node.getNameIdPair());

        twoPhaseMemberUpdate(node);

        LoggerUtils.info(logger, repImpl,
                         "Successfully updated node: " + node.getNameIdPair() +
                         " Hostport = " + node.getHostName() + ": " +
                         node.getPort() + " [" + node.getType() + "]");
    }

    /**
     * Implements the two phase update of membership information.
     *
     * In the first phase the master repeatedly tries to commit the "put"
     * operation until it gets a Quorum of acks, ensuring that the operation
     * has been made durable. Nodes that obtain this entry will start using it
     * in elections. However, the node itself will not participate in elections
     * until it has successfully completed phase 2.
     *
     * In the second phase, the entry for the member is updated the to note
     * that a quorum of acks was received.
     *
     * Failure leaves the database with the member info absent, or present but
     * with the update to quorumAcks indicating that a quorum has acknowledged
     * the change.
     *
     * @param node the member info for the node.
     *
     * @throws DatabaseException upon failure.
     */
    private void twoPhaseMemberUpdate(final RepNodeImpl node)
        throws InsufficientReplicasException,
               InsufficientAcksException,
               DatabaseException {

        TwoPhaseUpdate twoPhaseUpdate = new TwoPhaseUpdate(node) {

            @Override
            void phase1Body() {
                RepGroupImpl repGroup = fetchGroupObject(txn, groupDbImpl);
                fetchGroup(repGroup.getName(), groupDbImpl, txn).
                    checkForConflicts(node);
                int changeVersion = repGroup.incrementChangeVersion();
                if (node.getNameIdPair().hasNullId()) {
                    node.getNameIdPair().setId(repGroup.getNextNodeId());
                }
                saveGroupObject(txn, repGroup, groupDbImpl);
                node.setChangeVersion(changeVersion);
                saveNodeObject(txn, node, groupDbImpl);
            }

            @Override
            void deadlockHandler() {
                node.getNameIdPair().revertToNull();
            }

            @Override
            void insufficientReplicasHandler() {
                node.getNameIdPair().revertToNull();
            }
        };

        twoPhaseUpdate.execute();
    }

    /**
     * Updates the database entry associated with the node with the new local
     * CBVLSN, if it can do so without encountering lock contention. If it
     * encounters contention, it returns false, and the caller must retry at
     * some later point in time.
     *
     * Note that changes to the local CBVLSN do not update the group version
     * number since they do not impact group membership.
     *
     * @param nameIdPair identifies the node being updated
     * @param newCBVLSN the new local CBVLSN to be associated with the node.
     * @return true if the update succeeded.
     * @throws DatabaseException
     */
    public boolean updateLocalCBVLSN(final NameIdPair nameIdPair,
                                     final VLSN newCBVLSN)
        throws DatabaseException {

        DatabaseImpl groupDbImpl = null;
        try {
            groupDbImpl = repImpl.probeGroupDb();
        } catch (DatabaseException e) {
            /* Contention on the groupDbImpl, try later. */
            return false;
        }

        if (groupDbImpl == null) {
            /* Contention on the groupDbImpl, try later. */
            return false;
        }

        DatabaseEntry nodeNameKey = new DatabaseEntry();
        StringBinding.stringToEntry(nameIdPair.getName(), nodeNameKey);
        DatabaseEntry value = new DatabaseEntry();
        final RepGroupDB.NodeBinding mib = new RepGroupDB.NodeBinding();
        final RepGroupImpl.BarrierState barrierState =
            new RepGroupImpl.BarrierState(newCBVLSN,
                                          System.currentTimeMillis());
        Txn txn = null;
        Cursor cursor = null;
        boolean ok = false;
        try {
            txn = new MasterTxn(repImpl,
                                NO_ACK_NO_SYNC,
                                repImpl.getNameIdPair());
            cursor = makeCursor(groupDbImpl, txn, CursorConfig.DEFAULT);

            OperationStatus status =
                    cursor.getSearchKey(nodeNameKey, value, LockMode.RMW);
            if (status != OperationStatus.SUCCESS) {
                throw EnvironmentFailureException.unexpectedState
                    ("Node ID: " + nameIdPair + " not present in group db");
            }

            /* Let's see if the entry needs updating. */
            RepNodeImpl node = mib.entryToObject(value);
            final VLSN lastCBVLSN = node.getBarrierState().getLastCBVLSN();
            if (lastCBVLSN.equals(newCBVLSN)) {
                ok = true;
                return true;
            }

            node.setBarrierState(barrierState);
            mib.objectToEntry(node, value);
            status = cursor.putCurrent(value);
            if (status != OperationStatus.SUCCESS) {
                throw EnvironmentFailureException.unexpectedState
                    ("Node ID: " + nameIdPair +
                     " stored localCBVLSN could not be updated. Status: " +
                     status);
            }
            LoggerUtils.fine(logger, repImpl,
                             "Local CBVLSN updated to " + newCBVLSN +
                             " for node " + nameIdPair);
            ok = true;
        } finally {
            if (cursor != null) {
                cursor.close();
            }

            if (txn != null) {
                if (ok) {
                    txn.commit(NO_ACK_NO_SYNC_DURABILITY);
                } else {
                    txn.abort();
                }
                txn = null;
            }
            if (ok) {
                /* RepNode may be null during shutdown. [#17424] */
                RepNode repNode = repImpl.getRepNode();
                if (repNode != null) {
                    repNode.updateGroupInfo(nameIdPair, barrierState);
                }
            }
        }

        return true;
    }

    /*
     * Returns just the de-serialized special rep group object from the
     * database, while ensuring that it's locked for update.
     */
    private RepGroupImpl fetchGroupObject(Txn txn,
                                          DatabaseImpl groupDbImpl)
        throws DatabaseException {

        RepGroupDB.GroupBinding groupBinding = new RepGroupDB.GroupBinding();
        DatabaseEntry groupEntry = new DatabaseEntry();

        Cursor cursor = null;
        try {
            cursor = makeCursor(groupDbImpl, txn, CursorConfig.DEFAULT);

            OperationStatus status = cursor.getSearchKey(groupKeyEntry,
                                                         groupEntry,
                                                         LockMode.RMW);
            if (status != OperationStatus.SUCCESS) {
                throw EnvironmentFailureException.unexpectedState
                    ("Group entry key: " + GROUP_KEY +
                     " missing from group database");
            }
        } finally {
            if (cursor != null) {
                cursor.close();
            }
        }

        return groupBinding.entryToObject(groupEntry);
    }

    /*
     * Saves the rep group in the database.
     */
    private void saveGroupObject(Txn txn,
                                 RepGroupImpl repGroup,
                                 DatabaseImpl groupDbImpl)
        throws DatabaseException {

        RepGroupDB.GroupBinding groupBinding = new RepGroupDB.GroupBinding();
        DatabaseEntry groupEntry = new DatabaseEntry();
        groupBinding.objectToEntry(repGroup, groupEntry);

        Cursor cursor = null;
        try {
            cursor = makeCursor(groupDbImpl, txn, CursorConfig.DEFAULT);

            OperationStatus status =  cursor.put(groupKeyEntry, groupEntry);
            if (status != OperationStatus.SUCCESS) {
                throw EnvironmentFailureException.unexpectedState
                    ("Group entry save failed");
            }
        } finally {
            if (cursor != null) {
                cursor.close();
            }
        }
    }

    /*
     * Save a ReplicationNode in the database.
     */
    private void saveNodeObject(Txn txn,
                                RepNodeImpl node,
                                DatabaseImpl groupDbImpl)
        throws DatabaseException {

        DatabaseEntry nodeNameKey = new DatabaseEntry();
        StringBinding.stringToEntry(node.getName(), nodeNameKey);

        final RepGroupDB.NodeBinding nodeBinding =
            new RepGroupDB.NodeBinding();
        DatabaseEntry memberInfoEntry = new DatabaseEntry();
        nodeBinding.objectToEntry(node, memberInfoEntry);

        Cursor cursor = null;
        try {
            cursor = makeCursor(groupDbImpl, txn, CursorConfig.DEFAULT);

            OperationStatus status =  cursor.put(nodeNameKey, memberInfoEntry);
            if (status != OperationStatus.SUCCESS) {
                throw EnvironmentFailureException.unexpectedState
                    ("Group entry save failed");
            }
        } finally {
            if (cursor != null) {
                cursor.close();
            }
        }
    }

    public static class GroupBinding extends TupleBinding<RepGroupImpl>  {

        @Override
        public RepGroupImpl entryToObject(TupleInput input) {
            return new RepGroupImpl(input.readString(),
                                          new UUID(input.readLong(),
                                                   input.readLong()),
                                          input.readInt(),
                                          input.readInt(),
                                          input.readInt(),
                                          null);
        }

        @Override
        public void objectToEntry(RepGroupImpl group, TupleOutput output) {
          output.writeString(group.getName());
          output.writeLong(group.getUUID().getMostSignificantBits());
          output.writeLong(group.getUUID().getLeastSignificantBits());
          output.writeInt(group.getVersion());
          output.writeInt(group.getChangeVersion());
          output.writeInt(group.getNodeIdSequence());
        }
    }

    /**
     * Supports the serialization/deserialization of node info into and out of
     * the database.
     */
    public static class NodeBinding extends TupleBinding<RepNodeImpl> {

        @Override
        public RepNodeImpl entryToObject(TupleInput input) {
            RepNodeImpl mi =
                new RepNodeImpl(NameIdPair.deserialize(input),
                                  NodeType.values()[input.readByte()],
                                  input.readBoolean(),
                                  input.readBoolean(),
                                  input.readString(),
                                  input.readInt(),
                                  new BarrierState(new VLSN(input.readLong()),
                                                  input.readLong()),
                                                  input.readInt());
            return mi;
        }

        @Override
        public void objectToEntry(RepNodeImpl mi, TupleOutput output) {
            final BarrierState syncState = mi.getBarrierState();
            mi.getNameIdPair().serialize(output);
            output.writeByte(mi.getType().ordinal());
            output.writeBoolean(mi.isQuorumAck());
            output.writeBoolean(mi.isRemoved());
            output.writeString(mi.getHostName());
            output.writeInt(mi.getPort());
            output.writeLong(syncState.getLastCBVLSN().getSequence());
            output.writeLong(syncState.getBarrierTime());
            output.writeInt(mi.getChangeVersion());
        }
    }

    /**
     * Implements two phase updates for membership changes to the group
     * database. It compartmentalizes the retry operations and exception
     * handling so that it's independent of the core logic.
     */
    private abstract class TwoPhaseUpdate {

        final RepNodeImpl node;
        final DatabaseImpl groupDbImpl;

        protected Txn txn;
        private DatabaseException phase1Exception = null;

        private TwoPhaseUpdate(RepNodeImpl node) {
            super();
            this.node = node;
            try {
                groupDbImpl = repImpl.getGroupDb();
            } catch (DatabaseNotFoundException e) {
                /* Should never happen. */
                throw EnvironmentFailureException.unexpectedException(e);
            }
        }

        /* Phase1 exception handlers for phase1Body-specific cleanup */
        void insufficientReplicasHandler() {}

        void deadlockHandler() {}

        /* The changes to be made in phase1 */
        abstract void phase1Body();

        /* The changes to be made in phase2. */
        void phase2Body() {
            node.setQuorumAck(true);
            saveNodeObject(txn, node, groupDbImpl);
        }

        private void phase1()
            throws DatabaseException {

            for (int i=0; i < QUORUM_ACK_RETRIES; i++ ) {
                txn = null;
                try {
                    txn = new MasterTxn(repImpl,
                                        QUORUM_ACK,
                                        repImpl.getNameIdPair());
                    phase1Body();
                    txn.commit(QUORUM_ACK_DURABILITY);
                    txn = null;
                    return;
                } catch (InsufficientReplicasException e) {
                    phase1Exception = e;
                    insufficientReplicasHandler();
                    /* Commit was aborted. */
                    LoggerUtils.warning(logger, repImpl,
                                        "Phase 1 retry; for node: " +
                                        node.getName() +
                                        " insufficient active replicas: " +
                                        e.getMessage());
                    continue;
                } catch (InsufficientAcksException e) {
                    phase1Exception = e;
                    /* Local commit completed but did not get enough acks. */
                    LoggerUtils.warning(logger, repImpl,
                                        "Phase 1 retry; for node: " +
                                        node.getName() +
                                        " insufficient acks: " +
                                        e.getMessage());
                    continue;
                } catch (LockConflictException e) {
                    /* Likely a timeout, can't distinguish between them. */
                    phase1Exception = e;
                    deadlockHandler();
                    LoggerUtils.warning(logger, repImpl,
                                        "Phase 1 retry; for node: " +
                                        node.getName() +
                                        " deadlock exception: " +
                                        e.getMessage());
                    continue;
                } catch (DatabaseException e) {
                    LoggerUtils.severe(logger, repImpl,
                                       "Phase 1 failed unexpectedly: " +
                                       e.getMessage());
                    if (txn != null) {
                        txn.abort();
                    }
                    throw e;
                } finally {
                    if (txn != null) {
                        txn.abort();
                    }
                }
            }
            LoggerUtils.warning(logger,
                                repImpl,
                                "Phase 1 failed: " +
                                phase1Exception.getMessage());
            throw phase1Exception;
        }

        private void phase2() {
            try {
                txn = new MasterTxn(repImpl, NO_ACK, repImpl.getNameIdPair());
                phase2Body();
                txn.commit();
                txn = null;
            } catch (DatabaseException e) {
                LoggerUtils.severe(logger, repImpl,
                                   "Unexpected failure in Phase 2: " +
                                   e.getMessage());
                throw e;
            } finally {
                if (txn != null) {
                    txn.abort();
                }
            }
        }

        void execute() {
            phase1();
            /* Only executed if phase 1 succeeds. */
            phase2();
        }
    }

    /**
     * An internal API used to obtain group information by opening a stand
     * alone environment handle and reading the RepGroupDB. Used for debugging
     * and utilities.
     *
     * @param envDir the directory containing the environment log files
     *
     * @return the group as currently defined by the environment
     */
    public static RepGroupImpl getGroup(final File envDir) {

        EnvironmentConfig envConfig = new EnvironmentConfig();
        envConfig.setReadOnly(true);
        envConfig.setTransactional(true);
        envConfig.setAllowCreate(false);
        Environment env = new Environment(envDir, envConfig);
        DatabaseConfig dbConfig = new DatabaseConfig();
        dbConfig.setReadOnly(true);
        dbConfig.setTransactional(true);
        dbConfig.setAllowCreate(false);
        Transaction txn = env.beginTransaction(null, null);
        Database db = env.openDatabase(txn, DbType.REP_GROUP.getInternalName(),
                                       dbConfig);

        DatabaseEntry groupEntry = new DatabaseEntry();
        OperationStatus status =
            db.get(txn, groupKeyEntry, groupEntry, LockMode.READ_COMMITTED);
        if (status != OperationStatus.SUCCESS) {
            throw new IllegalStateException
                ("Group entry not found " + status);
        }
        GroupBinding groupBinding = new GroupBinding();
        RepGroupImpl group = groupBinding.entryToObject(groupEntry);

        group = fetchGroup(group.getName(),
                           DbInternal.getDatabaseImpl(db),
                           DbInternal.getTxn(txn));
        txn.commit();
        db.close();
        env.close();
        return group;
    }

    /**
     * Deletes all the current members from the rep group database and creates
     * a new group, with just the member supplied via the configuration. This
     * method exists to support the utility {@link DbResetRepGroup}
     * <p>
     * The changes proceed in three steps:
     *
     * 1) Determine the node id sequence number. This is to ensure that rep
     * node ids are not reused. Old rep node ids are present in the logs as
     * commit records.
     *
     * 2) A new group object, with the node id sequence number determined
     * in step 1), is created and all existing nodes are deleted.
     *
     * 3) The first node is added to the rep group.
     *
     * @param lastOldVLSN the VLSN used to associate the new barrier wrt this
     * node.
     */
    public void reinitFirstNode(VLSN lastOldVLSN) {

        DbConfigManager configManager = repImpl.getConfigManager();
        String groupName = configManager.get(GROUP_NAME);
        String nodeName = configManager.get(NODE_NAME);
        String hostPortPair = configManager.get(RepParams.NODE_HOST_PORT);
        String hostname = HostPortPair.getHostname(hostPortPair);
        int port = HostPortPair.getPort(hostPortPair);

        final DatabaseImpl dbImpl = repImpl.getGroupDb();

        /*
         * Retrieve the previous rep group object, so we can use its node
         * sequence id.
         */
        TransactionConfig txnConfig = new TransactionConfig();
        txnConfig.setDurability(NO_ACK.getDurability());
        txnConfig.setConsistencyPolicy(NO_CONSISTENCY);

        NameIdPair nameIdPair = repImpl.getRepNode().getNameIdPair();
        nameIdPair.revertToNull(); /* read transaction, so null id is ok. */

        /* Now delete old nodes and the group, and establish a new group */
        Txn txn = new MasterTxn(repImpl, txnConfig, nameIdPair);
        RepGroupImpl prevRepGroup = fetchGroupObject(txn, dbImpl);
        txn.commit();

        final int nodeIdSequenceStart = prevRepGroup.getNodeIdSequence();

        final DatabaseEntry keyEntry = new DatabaseEntry();
        final DatabaseEntry value = new DatabaseEntry();

        /*
         * We have the "predicted" real node id, so set it and it will be used
         * in the commit lns that will be written in future.
         */
        final int firstNodeId = nodeIdSequenceStart + 1;
        nameIdPair.setId(firstNodeId);

        RepNodeImpl firstNode = new RepNodeImpl(nodeName, hostname, port);
        final BarrierState barrierState = new BarrierState(lastOldVLSN,
                                                   System.currentTimeMillis());
        firstNode.setBarrierState(barrierState);

        txn = new MasterTxn(repImpl, txnConfig, nameIdPair);

        final CursorConfig cursorConfig = new CursorConfig();
        cursorConfig.setReadCommitted(true);
        Cursor mcursor = makeCursor(dbImpl, txn, cursorConfig);

        while (mcursor.getNext(keyEntry, value, LockMode.DEFAULT) ==
               OperationStatus.SUCCESS) {
            final String key = StringBinding.entryToString(keyEntry);

            if (GROUP_KEY.equals(key)) {
                GroupBinding groupBinding = new GroupBinding();
                RepGroupImpl repGroup = new RepGroupImpl(groupName);
                repGroup.setNodeIdSequence(nodeIdSequenceStart);
                DatabaseEntry groupEntry = new DatabaseEntry();
                groupBinding.objectToEntry(repGroup, groupEntry);
                OperationStatus status = mcursor.putCurrent(groupEntry);
                if (!OperationStatus.SUCCESS.equals(status)) {
                    throw new IllegalStateException("Unexpected state:" +
                                                    status);
                }
            } else {
                LoggerUtils.info(logger, repImpl, "Removing node: " + key);
                mcursor.delete();
            }
        }
        mcursor.close();
        txn.commit();

        /* Now add the first node of the new group. */
        ensureMember(firstNode);
        if (firstNodeId != firstNode.getNodeId()) {
            throw new IllegalStateException("Expected nodeid:" + firstNodeId +
                                            " but found:" +
                                            firstNode.getNodeId());
        }
    }
}
TOP

Related Classes of com.sleepycat.je.rep.impl.RepGroupDB$GroupBinding

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.