[sheepdog] [PATCH] zookeeper: hande node joining race

Christoph Hellwig hch at infradead.org
Mon May 28 17:25:24 CEST 2012


I got a bug report where the nr_sd_nodes == nr_zk_nodes assert in
build_node_list is trigger by a larger number of sheep joining at the same
time.

>From code inspection it seems like this case can be triggered if
node_btree_add is called twice for the same node, and tsearch returns
the existing node, but nr_zk_nodes is still incremented.

It seems like node_btree_add could be called twice if the startup of
the zookeeper driver hits a race between reading out the list of existing
nodes and new events.

The patch below fixes the problem by handling this case in node_btree_add
and asserting it only happens for valid case.  I'm not overly happy with
this fix, fixes based on better knowledge of zookeeper would be welcome.

Signed-off-by: Christoph Hellwig <hch at lst.de>

diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
index 859f2b0..a09d55a 100644
--- a/sheep/cluster/zookeeper.c
+++ b/sheep/cluster/zookeeper.c
@@ -330,7 +330,7 @@ static inline int zk_node_cmp(const void *a, const void *b)
 	return node_cmp(&znode1->node, &znode2->node);
 }
 
-static void node_btree_add(void **btroot, struct zk_node *znode)
+static int node_btree_add(void **btroot, struct zk_node *znode)
 {
 	struct zk_node *n, **p;
 
@@ -343,11 +343,14 @@ static void node_btree_add(void **btroot, struct zk_node *znode)
 	p = (struct zk_node **)tsearch((void *)n, btroot, zk_node_cmp);
 	if (p == NULL)
 		panic("tsearch, oom\n");
-	else if (*p != n) {
+
+	if (*p != n) {
 		**p = *n;
 		free(n);
+		return -1;
 	}
 	nr_zk_nodes++;
+	return 0;
 }
 
 static inline void node_btree_del(void **btroot, struct zk_node *znode)
@@ -466,7 +469,10 @@ static void zk_member_init(zhandle_t *zh)
 
 			switch (rc) {
 			case ZOK:
-				node_btree_add(&zk_node_btroot, &znode);
+				rc = node_btree_add(&zk_node_btroot, &znode);
+				if (rc)
+					panic("node %s already exists\n", path);
+				break;
 			case ZNONODE:
 				break;
 			default:
@@ -770,7 +776,13 @@ static void zk_handler(int listen_fd, int events, void *data)
 			 */
 			node_btree_clear(&zk_node_btroot);
 
-		node_btree_add(&zk_node_btroot, &ev.sender);
+		rc = node_btree_add(&zk_node_btroot, &ev.sender);
+		if (rc) {
+			if (node_eq(&ev.sender.node, &this_node.node))
+				panic("this_node already in btree\n");
+			dprintf("node %s already existed\n",
+				node_to_str(&ev.sender.node));
+		}
 		dprintf("one sheep joined[down], nr_nodes:%ld, sender:%s, joined:%d\n",
 				nr_zk_nodes, node_to_str(&ev.sender.node), ev.sender.joined);
 



More information about the sheepdog mailing list