Package org.apache.hedwig.server.topics

Source Code of org.apache.hedwig.server.topics.ZkTopicManager$ZkGetOwnerOp

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hedwig.server.topics;

import java.net.UnknownHostException;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.SynchronousQueue;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.KeeperException.Code;
import org.apache.zookeeper.ZooDefs.Ids;
import org.apache.zookeeper.data.Stat;

import com.google.protobuf.ByteString;
import org.apache.hedwig.exceptions.PubSubException;
import org.apache.hedwig.server.common.ServerConfiguration;
import org.apache.hedwig.util.Callback;
import org.apache.hedwig.util.ConcurrencyUtils;
import org.apache.hedwig.util.Either;
import org.apache.hedwig.util.HedwigSocketAddress;
import org.apache.hedwig.zookeeper.SafeAsyncZKCallback;
import org.apache.hedwig.zookeeper.ZkUtils;
import org.apache.hedwig.zookeeper.SafeAsyncZKCallback.DataCallback;
import org.apache.hedwig.zookeeper.SafeAsyncZKCallback.StatCallback;

/**
* Topics are operated on in parallel as they are independent.
*
*/
public class ZkTopicManager extends AbstractTopicManager implements TopicManager {

    static Logger logger = LoggerFactory.getLogger(ZkTopicManager.class);
    Random rand = new Random();

    /**
     * Persistent storage for topic metadata.
     */
    private ZooKeeper zk;
    String ephemeralNodePath;

    StatCallback loadReportingStatCallback = new StatCallback() {
        @Override
        public void safeProcessResult(int rc, String path, Object ctx, Stat stat) {
            if (rc != KeeperException.Code.OK.intValue()) {
                logger.warn("Failed to update load information in zk");
            }
        }
    };

    // Boolean flag indicating if we should suspend activity. If this is true,
    // all of the Ops put into the queuer will fail automatically.
    protected volatile boolean isSuspended = false;

    /**
     * Create a new topic manager. Pass in an active ZooKeeper client object.
     *
     * @param zk
     */
    public ZkTopicManager(final ZooKeeper zk, final ServerConfiguration cfg, ScheduledExecutorService scheduler)
            throws UnknownHostException, PubSubException {

        super(cfg, scheduler);
        this.zk = zk;
        this.ephemeralNodePath = cfg.getZkHostsPrefix(new StringBuilder()).append("/").append(addr).toString();

        zk.register(new Watcher() {
            @Override
            public void process(WatchedEvent event) {
                if (event.getType().equals(Watcher.Event.EventType.None)) {
                    if (event.getState().equals(Watcher.Event.KeeperState.Disconnected)) {
                        logger.warn("ZK client has been disconnected to the ZK server!");
                        isSuspended = true;
                    } else if (event.getState().equals(Watcher.Event.KeeperState.SyncConnected)) {
                        if (isSuspended) {
                            logger.info("ZK client has been reconnected to the ZK server!");
                        }
                        isSuspended = false;
                    }
                }
                // Check for expired connection.
                if (event.getState().equals(Watcher.Event.KeeperState.Expired)) {
                    logger.error("ZK client connection to the ZK server has expired!");
                    System.exit(1);
                }
            }
        });
        final SynchronousQueue<Either<Void, PubSubException>> queue = new SynchronousQueue<Either<Void, PubSubException>>();

        registerWithZookeeper(new Callback<Void>() {
            @Override
            public void operationFailed(Object ctx, PubSubException exception) {
                logger.error("Failed to register hub with zookeeper", exception);
                ConcurrencyUtils.put(queue, Either.of((Void) null, exception));
            }

            @Override
            public void operationFinished(Object ctx, Void resultOfOperation) {
                logger.info("Successfully registered hub with zookeeper");
                ConcurrencyUtils.put(queue, Either.of(resultOfOperation, (PubSubException) null));
            }
        }, null);

        PubSubException pse = ConcurrencyUtils.take(queue).right();

        if (pse != null) {
            throw pse;
        }
    }

    void registerWithZookeeper(final Callback<Void> callback, Object ctx) {

        ZkUtils.createFullPathOptimistic(zk, ephemeralNodePath, getCurrentLoadData(), Ids.OPEN_ACL_UNSAFE,
        CreateMode.EPHEMERAL, new SafeAsyncZKCallback.StringCallback() {

            @Override
            public void safeProcessResult(int rc, String path, Object ctx, String name) {
                if (rc == Code.OK.intValue()) {
                    callback.operationFinished(ctx, null);
                    return;
                }
                if (rc != Code.NODEEXISTS.intValue()) {
                    KeeperException ke = ZkUtils.logErrorAndCreateZKException(
                                             "Could not create ephemeral node to register hub", ephemeralNodePath, rc);
                    callback.operationFailed(ctx, new PubSubException.ServiceDownException(ke));
                    return;
                }

                logger.info("Found stale ephemeral node while registering hub with ZK, deleting it");

                // Node exists, lets try to delete it and retry
                zk.delete(ephemeralNodePath, -1, new SafeAsyncZKCallback.VoidCallback() {
                    @Override
                    public void safeProcessResult(int rc, String path, Object ctx) {
                        if (rc == Code.OK.intValue() || rc == Code.NONODE.intValue()) {
                            registerWithZookeeper(callback, ctx);
                            return;
                        }
                        KeeperException ke = ZkUtils.logErrorAndCreateZKException(
                                                 "Could not delete stale ephemeral node to register hub", ephemeralNodePath, rc);
                        callback.operationFailed(ctx, new PubSubException.ServiceDownException(ke));
                        return;

                    }
                }, ctx);

            }
        }, null);
    }

    String hubPath(ByteString topic) {
        return cfg.getZkTopicPath(new StringBuilder(), topic).append("/hub").toString();
    }

    @Override
    protected void realGetOwner(final ByteString topic, final boolean shouldClaim,
                                final Callback<HedwigSocketAddress> cb, final Object ctx) {
        // If operations are suspended due to a ZK client disconnect, just error
        // out this call and return.
        if (isSuspended) {
            cb.operationFailed(ctx, new PubSubException.ServiceDownException(
                                   "ZKTopicManager service is temporarily suspended!"));
            return;
        }

        if (topics.contains(topic)) {
            cb.operationFinished(ctx, addr);
            return;
        }

        new ZkGetOwnerOp(topic, shouldClaim, cb, ctx).read();
    }

    // Recursively call each other.
    class ZkGetOwnerOp {
        ByteString topic;
        boolean shouldClaim;
        Callback<HedwigSocketAddress> cb;
        Object ctx;
        String hubPath;

        public ZkGetOwnerOp(ByteString topic, boolean shouldClaim, Callback<HedwigSocketAddress> cb, Object ctx) {
            this.topic = topic;
            this.shouldClaim = shouldClaim;
            this.cb = cb;
            this.ctx = ctx;
            hubPath = hubPath(topic);

        }

        public void choose() {
            // Get the list of existing hosts
            String registeredHostsPath = cfg.getZkHostsPrefix(new StringBuilder()).toString();
            zk.getChildren(registeredHostsPath, false, new SafeAsyncZKCallback.ChildrenCallback() {
                @Override
                public void safeProcessResult(int rc, String path, Object ctx, List<String> children) {
                    if (rc != Code.OK.intValue()) {
                        KeeperException e = ZkUtils.logErrorAndCreateZKException(
                                                "Could not get list of available hubs", path, rc);
                        cb.operationFailed(ctx, new PubSubException.ServiceDownException(e));
                        return;
                    }
                    chooseLeastLoadedNode(children);
                }
            }, null);
        }

        public void chooseLeastLoadedNode(final List<String> children) {
            DataCallback dataCallback = new DataCallback() {
                int numResponses = 0;
                int minLoad = Integer.MAX_VALUE;
                String leastLoaded = null;

                @Override
                public void safeProcessResult(int rc, String path, Object ctx, byte[] data, Stat stat) {
                    synchronized (this) {
                        if (rc == KeeperException.Code.OK.intValue()) {
                            try {
                                int load = Integer.parseInt(new String(data));
                                if (logger.isDebugEnabled()) {
                                    logger.debug("Found server: " + ctx + " with load: " + load);
                                }
                                if (load < minLoad  || (load == minLoad && rand.nextBoolean())) {
                                    minLoad = load;
                                    leastLoaded = (String) ctx;
                                }
                            } catch (NumberFormatException e) {
                                logger.warn("Corrupted load information from hub:" + ctx);
                                // some corrupted data, we'll just ignore this
                                // hub
                            }
                        }
                        numResponses++;

                        if (numResponses == children.size()) {
                            if (leastLoaded == null) {
                                cb.operationFailed(ZkGetOwnerOp.this.ctx, new PubSubException.ServiceDownException(
                                                       "No hub available"));
                                return;
                            }
                            HedwigSocketAddress owner = new HedwigSocketAddress(leastLoaded);
                            if (owner.equals(addr)) {
                                claim();
                            } else {
                                cb.operationFinished(ZkGetOwnerOp.this.ctx, owner);
                            }
                        }
                    }

                }
            };

            for (String child : children) {
                zk.getData(cfg.getZkHostsPrefix(new StringBuilder()).append("/").append(child).toString(), false,
                           dataCallback, child);
            }
        }

        public void claimOrChoose() {
            if (shouldClaim)
                claim();
            else
                choose();
        }

        public void read() {
            zk.getData(hubPath, false, new SafeAsyncZKCallback.DataCallback() {
                @Override
                public void safeProcessResult(int rc, String path, Object ctx, byte[] data, Stat stat) {

                    if (rc == Code.NONODE.intValue()) {
                        claimOrChoose();
                        return;
                    }

                    if (rc != Code.OK.intValue()) {
                        KeeperException e = ZkUtils.logErrorAndCreateZKException("Could not read ownership for topic: "
                                            + topic.toStringUtf8(), path, rc);
                        cb.operationFailed(ctx, new PubSubException.ServiceDownException(e));
                        return;
                    }

                    // successfully did a read
                    HedwigSocketAddress owner = new HedwigSocketAddress(new String(data));
                    if (!owner.equals(addr)) {
                        if (logger.isDebugEnabled()) {
                            logger.debug("topic: " + topic.toStringUtf8() + " belongs to someone else: " + owner);
                        }
                        cb.operationFinished(ctx, owner);
                        return;
                    }

                    logger.info("Discovered stale self-node for topic: " + topic.toStringUtf8() + ", will delete it");

                    // we must have previously failed and left a
                    // residual ephemeral node here, so we must
                    // delete it (clean it up) and then
                    // re-create/re-acquire the topic.
                    zk.delete(hubPath, stat.getVersion(), new VoidCallback() {
                        @Override
                        public void processResult(int rc, String path, Object ctx) {
                            if (Code.OK.intValue() == rc || Code.NONODE.intValue() == rc) {
                                claimOrChoose();
                            } else {
                                KeeperException e = ZkUtils.logErrorAndCreateZKException(
                                                        "Could not delete self node for topic: " + topic.toStringUtf8(), path, rc);
                                cb.operationFailed(ctx, new PubSubException.ServiceDownException(e));
                            }
                        }
                    }, ctx);
                }
            }, ctx);
        }

        public void claim() {
            if (logger.isDebugEnabled()) {
                logger.debug("claiming topic: " + topic.toStringUtf8());
            }

            ZkUtils.createFullPathOptimistic(zk, hubPath, addr.toString().getBytes(), Ids.OPEN_ACL_UNSAFE,
            CreateMode.EPHEMERAL, new SafeAsyncZKCallback.StringCallback() {

                @Override
                public void safeProcessResult(int rc, String path, Object ctx, String name) {
                    if (rc == Code.OK.intValue()) {
                        if (logger.isDebugEnabled()) {
                            logger.debug("claimed topic: " + topic.toStringUtf8());
                        }
                        notifyListenersAndAddToOwnedTopics(topic, cb, ctx);
                        updateLoadInformation();
                    } else if (rc == Code.NODEEXISTS.intValue()) {
                        read();
                    } else {
                        KeeperException e = ZkUtils.logErrorAndCreateZKException(
                                                "Failed to create ephemeral node to claim ownership of topic: "
                                                + topic.toStringUtf8(), path, rc);
                        cb.operationFailed(ctx, new PubSubException.ServiceDownException(e));
                    }
                }
            }, ctx);
        }

    }

    byte[] getCurrentLoadData() {
        // For now, using the number of topics as an indicator of load
        // information
        return (topics.size() + "").getBytes();
    }

    void updateLoadInformation() {
        byte[] currentLoad = getCurrentLoadData();
        if (logger.isDebugEnabled()) {
            logger.debug("Reporting load of " + new String(currentLoad));
        }
        zk.setData(ephemeralNodePath, currentLoad, -1, loadReportingStatCallback, null);
    }

    @Override
    protected void postReleaseCleanup(final ByteString topic, final Callback<Void> cb, Object ctx) {

        zk.getData(hubPath(topic), false, new SafeAsyncZKCallback.DataCallback() {
            @Override
            public void safeProcessResult(int rc, String path, Object ctx, byte[] data, Stat stat) {
                if (rc == Code.NONODE.intValue()) {
                    // Node has somehow disappeared from under us, live with it
                    // since its a transient node
                    logger.warn("While deleting self-node for topic: " + topic.toStringUtf8() + ", node not found");
                    cb.operationFinished(ctx, null);
                    return;
                }

                if (rc != Code.OK.intValue()) {
                    KeeperException e = ZkUtils.logErrorAndCreateZKException(
                                            "Failed to delete self-ownership node for topic: " + topic.toStringUtf8(), path, rc);
                    cb.operationFailed(ctx, new PubSubException.ServiceDownException(e));
                    return;
                }

                HedwigSocketAddress owner = new HedwigSocketAddress(new String(data));
                if (!owner.equals(addr)) {
                    logger.warn("Wanted to delete self-node for topic: " + topic.toStringUtf8() + " but node for "
                                + owner + " found, leaving untouched");
                    // Not our node, someone else's, leave it alone
                    cb.operationFinished(ctx, null);
                    return;
                }

                zk.delete(path, stat.getVersion(), new SafeAsyncZKCallback.VoidCallback() {
                    @Override
                    public void safeProcessResult(int rc, String path, Object ctx) {
                        if (rc != Code.OK.intValue() && rc != Code.NONODE.intValue()) {
                            KeeperException e = ZkUtils
                                                .logErrorAndCreateZKException("Failed to delete self-ownership node for topic: "
                                                        + topic.toStringUtf8(), path, rc);
                            cb.operationFailed(ctx, new PubSubException.ServiceDownException(e));
                            return;
                        }

                        cb.operationFinished(ctx, null);
                    }
                }, ctx);
            }
        }, ctx);
    }

}
TOP

Related Classes of org.apache.hedwig.server.topics.ZkTopicManager$ZkGetOwnerOp

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.