[sheepdog] [PATCH] zookeeper: hande node joining race
Christoph Hellwig
hch at infradead.org
Mon May 28 17:25:24 CEST 2012
I got a bug report where the nr_sd_nodes == nr_zk_nodes assert in
build_node_list is trigger by a larger number of sheep joining at the same
time.
>From code inspection it seems like this case can be triggered if
node_btree_add is called twice for the same node, and tsearch returns
the existing node, but nr_zk_nodes is still incremented.
It seems like node_btree_add could be called twice if the startup of
the zookeeper driver hits a race between reading out the list of existing
nodes and new events.
The patch below fixes the problem by handling this case in node_btree_add
and asserting it only happens for valid case. I'm not overly happy with
this fix, fixes based on better knowledge of zookeeper would be welcome.
Signed-off-by: Christoph Hellwig <hch at lst.de>
diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
index 859f2b0..a09d55a 100644
--- a/sheep/cluster/zookeeper.c
+++ b/sheep/cluster/zookeeper.c
@@ -330,7 +330,7 @@ static inline int zk_node_cmp(const void *a, const void *b)
return node_cmp(&znode1->node, &znode2->node);
}
-static void node_btree_add(void **btroot, struct zk_node *znode)
+static int node_btree_add(void **btroot, struct zk_node *znode)
{
struct zk_node *n, **p;
@@ -343,11 +343,14 @@ static void node_btree_add(void **btroot, struct zk_node *znode)
p = (struct zk_node **)tsearch((void *)n, btroot, zk_node_cmp);
if (p == NULL)
panic("tsearch, oom\n");
- else if (*p != n) {
+
+ if (*p != n) {
**p = *n;
free(n);
+ return -1;
}
nr_zk_nodes++;
+ return 0;
}
static inline void node_btree_del(void **btroot, struct zk_node *znode)
@@ -466,7 +469,10 @@ static void zk_member_init(zhandle_t *zh)
switch (rc) {
case ZOK:
- node_btree_add(&zk_node_btroot, &znode);
+ rc = node_btree_add(&zk_node_btroot, &znode);
+ if (rc)
+ panic("node %s already exists\n", path);
+ break;
case ZNONODE:
break;
default:
@@ -770,7 +776,13 @@ static void zk_handler(int listen_fd, int events, void *data)
*/
node_btree_clear(&zk_node_btroot);
- node_btree_add(&zk_node_btroot, &ev.sender);
+ rc = node_btree_add(&zk_node_btroot, &ev.sender);
+ if (rc) {
+ if (node_eq(&ev.sender.node, &this_node.node))
+ panic("this_node already in btree\n");
+ dprintf("node %s already existed\n",
+ node_to_str(&ev.sender.node));
+ }
dprintf("one sheep joined[down], nr_nodes:%ld, sender:%s, joined:%d\n",
nr_zk_nodes, node_to_str(&ev.sender.node), ev.sender.joined);
More information about the sheepdog
mailing list