[sheepdog] [PATCH 07/13] zookeeper: use rbtree instead of glib's binary tree
Liu Yuan
namei.unix at gmail.com
Tue Dec 18 06:37:56 CET 2012
From: Liu Yuan <tailai.ly at taobao.com>
rbtree works better than balanced binary tree most of the time and it is easy to
maintain because it is used throughout the sheepdog.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/cluster/zookeeper.c | 215 ++++++++++++++++++++++-----------------------
1 file changed, 103 insertions(+), 112 deletions(-)
diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
index 8a9083b..3c482a4 100644
--- a/sheep/cluster/zookeeper.c
+++ b/sheep/cluster/zookeeper.c
@@ -21,6 +21,7 @@
#include "event.h"
#include "work.h"
#include "util.h"
+#include "rbtree.h"
#define SESSION_TIMEOUT 30000 /* millisecond */
#define MEMBER_CREATE_TIMEOUT SESSION_TIMEOUT
@@ -49,6 +50,7 @@ enum zk_event_type {
struct zk_node {
bool joined;
+ struct rb_node rb;
clientid_t clientid;
struct sd_node node;
};
@@ -72,17 +74,66 @@ static unsigned zk_levent_head;
static unsigned zk_levent_tail;
static bool called_by_zk_unblock;
-static void *zk_node_btroot;
-static struct zk_node *zk_master;
static struct sd_node sd_nodes[SD_MAX_NODES];
static size_t nr_sd_nodes;
-static size_t nr_zk_nodes;
+
+struct rb_root zk_node_root = RB_ROOT;
static inline bool is_blocking_event(struct zk_event *ev)
{
return ev->type == EVENT_BLOCK || ev->type == EVENT_JOIN_REQUEST;
}
+static struct zk_node *zk_tree_insert(struct zk_node *new)
+{
+ struct rb_node **p = &zk_node_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct zk_node *entry;
+
+ while (*p) {
+ int cmp;
+
+ parent = *p;
+ entry = rb_entry(parent, struct zk_node, rb);
+
+ cmp = node_id_cmp(&new->node.nid, &entry->node.nid);
+ if (cmp < 0)
+ p = &(*p)->rb_left;
+ else if (cmp > 0)
+ p = &(*p)->rb_right;
+ else
+ /* already has this entry */
+ return entry;
+ }
+
+ rb_link_node(&new->rb, parent, p);
+ rb_insert_color(&new->rb, &zk_node_root);
+
+ return NULL; /* insert successfully */
+}
+
+static struct zk_node *zk_tree_search(const struct node_id *nid)
+{
+ struct rb_node *n = zk_node_root.rb_node;
+ struct zk_node *t;
+
+ while (n) {
+ int cmp;
+
+ t = rb_entry(n, struct zk_node, rb);
+ cmp = node_id_cmp(nid, &t->node.nid);
+
+ if (cmp < 0)
+ n = n->rb_left;
+ else if (cmp > 0)
+ n = n->rb_right;
+ else
+ return t; /* found it */
+ }
+
+ return NULL;
+}
+
/* zookeeper API wrapper */
static zhandle_t *zhandle;
static struct zk_node this_node;
@@ -200,8 +251,8 @@ static void zk_queue_push(struct zk_event *ev)
sprintf(path, "%s/", QUEUE_ZNODE);
zk_create_node(path, (char *)ev, len,
&ZOO_OPEN_ACL_UNSAFE, ZOO_SEQUENCE, buf, sizeof(buf));
- dprintf("create path:%s, nr_nodes:%zu, queue_pos:%010"PRId32", len:%d\n"
- , buf,nr_zk_nodes, queue_pos, len);
+ dprintf("create:%s, queue_pos:%010"PRId32", len:%d\n", buf, queue_pos,
+ len);
if (first_push) {
uint32_t seq;
@@ -308,8 +359,8 @@ static int zk_queue_pop(struct zk_event *ev)
rc = zk_get_data(path, 1, (char *)ev, &len, NULL);
if (rc != ZOK)
panic("failed to zk_get_data path:%s, rc:%d\n", path, rc);
- dprintf("read path:%s, nr_nodes:%zu, type:%d, len:%d, rc:%d\n", path,
- nr_zk_nodes, ev->type, len, rc);
+ dprintf("read path:%s, type:%d, len:%d, rc:%d\n", path, ev->type,
+ len, rc);
queue_pos++;
@@ -331,119 +382,63 @@ static int zk_member_empty(void)
struct String_vector strs;
zk_get_children(MEMBER_ZNODE, 1, &strs);
-
return (strs.count == 0);
}
-static inline int zk_node_cmp(const void *a, const void *b)
-{
- const struct zk_node *znode1 = a;
- const struct zk_node *znode2 = b;
- return node_id_cmp(&znode1->node.nid, &znode2->node.nid);
-}
-
-static void node_btree_add(void **btroot, struct zk_node *znode)
-{
- struct zk_node *n, **p;
-
- n = (struct zk_node *)malloc(sizeof(struct zk_node));
- if (n == NULL)
- panic("malloc, oom\n");
-
- *n = *znode;
-
- p = (struct zk_node **)tsearch((void *)n, btroot, zk_node_cmp);
- if (p == NULL)
- panic("tsearch, oom\n");
- else if (*p != n) {
- **p = *n;
- free(n);
- } else
- nr_zk_nodes++;
-}
-
-static inline void node_btree_del(void **btroot, struct zk_node *znode)
+static inline void zk_tree_add(struct zk_node *node)
{
- tdelete((void *)znode, btroot, zk_node_cmp);
- free(znode);
- nr_zk_nodes--;
+ struct zk_node *zk = malloc(sizeof(*zk));
+ *zk = *node;
+ if (zk_tree_insert(zk))
+ free(zk);
}
-static inline void node_btree_clear(void **btroot)
+static inline void zk_tree_del(struct zk_node *node)
{
- tdestroy(*btroot, free);
- *btroot = NULL;
- nr_zk_nodes = 0;
+ rb_erase(&node->rb, &zk_node_root);
+ free(node);
}
-static struct zk_node *node_btree_find(void **btroot, struct zk_node *znode)
+static inline void zk_tree_destroy(void)
{
- struct zk_node **p;
-
- p = (struct zk_node **)tfind((void *)znode, btroot, zk_node_cmp);
- if (p)
- return *p;
+ struct zk_node *zk;
+ int i;
- return NULL;
-}
-
-static void node_btree_build_list_fn(const void *nodep,
- const VISIT which, const int depth)
-{
- struct zk_node *znode;
-
- switch (which) {
- case preorder:
- break;
- case postorder:
- case leaf:
- znode = *(struct zk_node **) nodep;
- sd_nodes[nr_sd_nodes++] = znode->node;
- break;
- case endorder:
- break;
+ for (i = 0; i < nr_sd_nodes; i++) {
+ zk = zk_tree_search(&sd_nodes[i].nid);
+ if (zk)
+ zk_tree_del(zk);
}
}
-static inline void build_node_list(void *btroot)
+static inline void build_node_list(void)
{
- nr_sd_nodes = 0;
- twalk(btroot, node_btree_build_list_fn);
- assert(nr_sd_nodes == nr_zk_nodes);
- dprintf("nr_sd_nodes:%zu\n", nr_sd_nodes);
-}
+ struct rb_node *n;
+ struct zk_node *zk;
-static void node_btree_find_master_fn(const void *nodep,
- const VISIT which, const int depth)
-{
- switch (which) {
- case preorder:
- break;
- case postorder:
- case leaf:
- if (zk_master)
- break;
- zk_master = *(struct zk_node **) nodep;
- dprintf("master:%s\n", node_to_str(&zk_master->node));
- break;
- case endorder:
- break;
+ nr_sd_nodes = 0;
+ for (n = rb_first(&zk_node_root); n; n = rb_next(n)) {
+ zk = rb_entry(n, struct zk_node, rb);
+ sd_nodes[nr_sd_nodes++] = zk->node;
}
+ dprintf("nr_sd_nodes:%zu\n", nr_sd_nodes);
}
-static bool is_master(struct zk_node *znode)
+static bool is_master(void)
{
- zk_master = NULL;
+ struct rb_node *n;
+ struct zk_node *zk;
- if (!zk_node_btroot) {
+ if (!nr_sd_nodes) {
if (zk_member_empty())
return true;
else
return false;
}
- twalk(zk_node_btroot, node_btree_find_master_fn);
- if (node_eq(&zk_master->node, &znode->node))
+ n = rb_first(&zk_node_root);
+ zk = rb_entry(n, struct zk_node, rb);
+ if (node_eq(&zk->node, &this_node.node))
return true;
return false;
@@ -478,7 +473,7 @@ static void zk_member_init(void)
switch (rc) {
case ZOK:
- node_btree_add(&zk_node_btroot, &znode);
+ zk_tree_add(&znode);
case ZNONODE:
break;
default:
@@ -486,7 +481,6 @@ static void zk_member_init(void)
}
}
}
- dprintf("nr_nodes:%zu\n", nr_zk_nodes);
}
static int add_event(enum zk_event_type type, struct zk_node *znode, void *buf,
@@ -645,11 +639,11 @@ static void zk_handle_join_request(struct zk_event *ev)
{
enum cluster_join_result res;
- dprintf("nr_nodes: %zu, sender: %s, joined: %d\n",
- nr_zk_nodes, node_to_str(&ev->sender.node),
+ dprintf("sender: %s, joined: %d\n", node_to_str(&ev->sender.node),
ev->sender.joined);
- if (!is_master(&this_node)) {
+ if (!is_master()) {
+ /* Let's await master acking the join-request */
queue_pos--;
return;
}
@@ -659,7 +653,6 @@ static void zk_handle_join_request(struct zk_event *ev)
ev->type = EVENT_JOIN_RESPONSE;
ev->sender.joined = true;
- dprintf("I'm master, push back join event\n");
zk_queue_push_back(ev);
if (res == CJ_RES_MASTER_TRANSFER) {
@@ -668,6 +661,7 @@ static void zk_handle_join_request(struct zk_event *ev)
zk_leave();
exit(1);
}
+ dprintf("I'm the master now\n");
}
static void zk_handle_join_response(struct zk_event *ev)
@@ -676,7 +670,7 @@ static void zk_handle_join_response(struct zk_event *ev)
int rc;
dprintf("JOIN RESPONSE\n");
- if (is_master(&this_node) &&
+ if (is_master() &&
!node_eq(&ev->sender.node, &this_node.node)) {
/* wait util the member node has been created */
int retry =
@@ -691,7 +685,7 @@ static void zk_handle_join_response(struct zk_event *ev)
retry--;
}
if (retry <= 0) {
- dprintf("Sender:%s failed to create member, ignore it\n",
+ dprintf("%s failed to create member, ignore it\n",
node_to_str(&ev->sender.node));
return;
}
@@ -706,11 +700,11 @@ static void zk_handle_join_response(struct zk_event *ev)
* itself) is alive in MASTER_TRANSFER scenario. So only
* the joining sheep will run into here.
*/
- node_btree_clear(&zk_node_btroot);
+ zk_tree_destroy();
- node_btree_add(&zk_node_btroot, &ev->sender);
- dprintf("nr_nodes:%zu, sender:%s, joined:%d\n", nr_zk_nodes,
- node_to_str(&ev->sender.node), ev->sender.joined);
+ zk_tree_add(&ev->sender);
+ dprintf("sender:%s, joined:%d\n", node_to_str(&ev->sender.node),
+ ev->sender.joined);
switch (ev->join_result) {
case CJ_RES_SUCCESS:
@@ -733,24 +727,21 @@ static void zk_handle_join_response(struct zk_event *ev)
break;
}
- build_node_list(zk_node_btroot);
+ build_node_list();
sd_join_handler(&ev->sender.node, sd_nodes, nr_sd_nodes,
ev->join_result, ev->buf);
}
static void zk_handle_leave(struct zk_event *ev)
{
- struct zk_node *n = node_btree_find(&zk_node_btroot, &ev->sender);
+ struct zk_node *n = zk_tree_search(&ev->sender.node.nid);
if (!n) {
dprintf("can't find this leave node:%s, ignore it.\n",
node_to_str(&ev->sender.node));
return;
}
-
- node_btree_del(&zk_node_btroot, n);
- dprintf("one sheep left, nr_nodes:%zu\n", nr_zk_nodes);
-
- build_node_list(zk_node_btroot);
+ zk_tree_del(n);
+ build_node_list();
sd_leave_handler(&ev->sender.node, sd_nodes, nr_sd_nodes);
}
--
1.7.9.5
More information about the sheepdog
mailing list