[sheepdog] [PATCH 3/3] sheep: start cluster if all the nodes in the previous epoch are gathered
Liu Yuan
namei.unix at gmail.com
Thu Jul 4 08:42:44 CEST 2013
On Wed, Jul 03, 2013 at 03:49:23PM +0900, MORITA Kazutaka wrote:
> From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
>
> The current rules to start sheepdog automatically are too complex:
>
> - One of the node in the latest epoch becomes a master. If the
> joining node has the larger epoch, the node becomes a new master
> and the current master must exit and rejoin later.
>
> - If the joining node has a smaller epoch than the existing nodes,
> the node will be linked to the delayed node list and rejoin
> automatically after sheepdog starts.
>
> - Sheepdog starts if the current node list becomes the same one as in
> the previous epoch.
>
> After this patch, we have only one rule:
>
> - Sheepdog starts if all the nodes in the previous epoch are
> gathered.
>
> The nodes with smaller or larger epoch also can join, and all the
> joined nodes share the same (latest) cluster_info among them. All the
> nodes have the latest epoch, so any node can be a master. If all the
> nodes in the previous epoch join Sheepdog, the cluster will start
> automatically.
>
> This change also brings the following benefit:
>
> - We can remove the failed node list, the delayed node list, and the
> CJ_RES_JOIN_LATER status since the node with smaller epoch can join
> Sheepdog.
>
> - The CJ_RES_MASTER_TRANSFER status can be removed because any node
> can be a master and no need to transfer mastership even if the node
> with larger epoch joins Sheepdog.
>
> Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
> ---
> include/internal_proto.h | 16 +--
> include/shepherd.h | 5 +-
> sheep/cluster.h | 2 +
> sheep/cluster/corosync.c | 16 +--
> sheep/cluster/local.c | 21 +--
> sheep/cluster/shepherd.c | 32 ++---
> sheep/cluster/zookeeper.c | 20 +--
> sheep/group.c | 313 ++++++++-------------------------------------
> shepherd/shepherd.c | 8 +-
> tests/functional/001 | 13 --
> tests/functional/001.out | 19 +--
> tests/functional/002 | 14 --
> tests/functional/002.out | 18 +--
> tests/functional/003 | 14 --
> tests/functional/003.out | 18 +--
> tests/functional/004 | 16 ---
> tests/functional/004.out | 39 +-----
> tests/functional/005 | 17 ---
> tests/functional/005.out | 45 +------
> tests/functional/060 | 17 +--
> 20 files changed, 105 insertions(+), 558 deletions(-)
>
> diff --git a/include/internal_proto.h b/include/internal_proto.h
> index f0c55c0..c5cd76d 100644
> --- a/include/internal_proto.h
> +++ b/include/internal_proto.h
> @@ -155,9 +155,7 @@ struct epoch_log {
>
> struct join_message {
> uint8_t proto_ver;
> - uint8_t __pad1[3];
> - uint16_t nr_failed_nodes;
> - uint16_t nr_delayed_nodes;
> + uint8_t __pad1[7];
> uint32_t cluster_status;
> uint8_t inc_epoch; /* set non-zero when we increment epoch of all nodes */
> uint8_t __pad2[3];
> @@ -195,18 +193,6 @@ enum cluster_join_result {
>
> /* Fail to join. The joining node has an invalid epoch. */
> CJ_RES_FAIL,
> -
> - /*
> - * Fail to join. The joining node should be added after the cluster
> - * start working.
> - */
> - CJ_RES_JOIN_LATER,
> -
> - /*
> - * Transfer mastership. The joining node has a newer epoch, so this
> - * node will leave the cluster (restart later).
> - */
> - CJ_RES_MASTER_TRANSFER,
> };
>
> static inline __attribute__((used)) void __sd_epoch_format_build_bug_ons(void)
> diff --git a/include/shepherd.h b/include/shepherd.h
> index 5dbc64b..e1fbac1 100644
> --- a/include/shepherd.h
> +++ b/include/shepherd.h
> @@ -39,8 +39,11 @@ struct sph_msg {
>
> struct sph_msg_join {
> uint32_t res; /* original type: enum cluster_join_result */
> - struct sd_node node;
> + struct sd_node new_node;
> uint8_t master_elected;
> +
> + struct sd_node nodes[SD_MAX_NODES];
> + uint32_t nr_nodes;
> uint8_t opaque[0];
> };
>
> diff --git a/sheep/cluster.h b/sheep/cluster.h
> index 80a701b..7665d75 100644
> --- a/sheep/cluster.h
> +++ b/sheep/cluster.h
> @@ -158,6 +158,8 @@ void sd_notify_handler(const struct sd_node *sender, void *msg, size_t msg_len);
> bool sd_block_handler(const struct sd_node *sender);
> int sd_reconnect_handler(void);
> enum cluster_join_result sd_check_join_cb(const struct sd_node *joining,
> + const struct sd_node *members,
> + size_t nr_members,
> void *opaque);
> void recalculate_vnodes(struct sd_node *nodes, int nr_nodes);
>
> diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c
> index 2be978b..bf90209 100644
> --- a/sheep/cluster/corosync.c
> +++ b/sheep/cluster/corosync.c
> @@ -295,28 +295,18 @@ static bool __corosync_dispatch_one(struct corosync_event *cevent)
> /* check_join() must be called only once */
> return false;
>
> - res = sd_check_join_cb(&cevent->sender.ent,
> - cevent->msg);
> - if (res == CJ_RES_MASTER_TRANSFER)
> - nr_cpg_nodes = 0;
> -
> + build_node_list(cpg_nodes, nr_cpg_nodes, entries);
> + res = sd_check_join_cb(&cevent->sender.ent, entries,
> + nr_cpg_nodes, cevent->msg);
> send_message(COROSYNC_MSG_TYPE_JOIN_RESPONSE, res,
> &cevent->sender, cpg_nodes, nr_cpg_nodes,
> cevent->msg, cevent->msg_len);
>
> - if (res == CJ_RES_MASTER_TRANSFER) {
> - sd_eprintf("failed to join sheepdog cluster:"
> - " please retry when master is up");
> - exit(1);
> - }
> -
> cevent->callbacked = true;
> return false;
> case COROSYNC_EVENT_TYPE_JOIN_RESPONSE:
> switch (cevent->result) {
> case CJ_RES_SUCCESS:
> - case CJ_RES_MASTER_TRANSFER:
> - case CJ_RES_JOIN_LATER:
> add_cpg_node(cpg_nodes, nr_cpg_nodes, &cevent->sender);
> nr_cpg_nodes++;
> /* fall through */
> diff --git a/sheep/cluster/local.c b/sheep/cluster/local.c
> index 491e3ea..572aa19 100644
> --- a/sheep/cluster/local.c
> +++ b/sheep/cluster/local.c
> @@ -441,31 +441,18 @@ static bool local_process_event(void)
>
> switch (ev->type) {
> case EVENT_JOIN_REQUEST:
> - res = sd_check_join_cb(&ev->sender.node, ev->buf);
> + /* nodes[nr_nodes - 1] is a sender, so don't include it */
> + assert(node_eq(&ev->sender.node, &nodes[nr_nodes - 1]));
> + res = sd_check_join_cb(&ev->sender.node, nodes, nr_nodes - 1,
> + ev->buf);
> ev->join_result = res;
> ev->type = EVENT_JOIN_RESPONSE;
> msync(ev, sizeof(*ev), MS_SYNC);
>
> shm_queue_notify();
>
> - if (res == CJ_RES_MASTER_TRANSFER) {
> - sd_eprintf("failed to join sheepdog cluster: "
> - "please retry when master is up");
> - shm_queue_unlock();
> - exit(1);
> - }
> return false;
> case EVENT_JOIN_RESPONSE:
> - if (ev->join_result == CJ_RES_MASTER_TRANSFER) {
> - /* FIXME: This code is tricky, but Sheepdog assumes that */
> - /* nr_nodes = 1 when join_result = MASTER_TRANSFER... */
> - ev->nr_lnodes = 1;
> - ev->lnodes[0] = this_node;
> - nr_nodes = 1;
> - nodes[0] = this_node.node;
> - msync(ev, sizeof(*ev), MS_SYNC);
> - }
> -
> sd_join_handler(&ev->sender.node, nodes, nr_nodes,
> ev->join_result, ev->buf);
> break;
> diff --git a/sheep/cluster/shepherd.c b/sheep/cluster/shepherd.c
> index 26fb005..db8336e 100644
> --- a/sheep/cluster/shepherd.c
> +++ b/sheep/cluster/shepherd.c
> @@ -58,7 +58,7 @@ static int do_shepherd_join(void)
> msg.body_len = msg_join_len;
>
> msg_join = xzalloc(msg_join_len);
> - msg_join->node = this_node;
> + msg_join->new_node = this_node;
> memcpy(msg_join->opaque, kept_opaque, kept_opaque_len);
>
> ret = writev2(sph_comm_fd, &msg, msg_join, msg_join_len);
> @@ -115,7 +115,7 @@ retry:
> * FIXME: member change events must be ordered with nonblocked
> * events
> */
> - res = sd_check_join_cb(&join->node, join->opaque);
> + res = sd_check_join_cb(&join->new_node, NULL, 0, join->opaque);
> if (res == CJ_RES_FAIL) {
> sd_eprintf("sd_check_join_cb() failed");
> exit(1);
> @@ -161,19 +161,9 @@ retry:
>
> sd_iprintf("join reply arrived, nr_nodes: %d", join_reply->nr_nodes);
>
> - if (join_reply->res == CJ_RES_MASTER_TRANSFER) {
> - is_master = true;
> -
> - /* FIXME: This code is tricky, but Sheepdog assumes that */
> - /* nr_nodes = 1 when join_result = MASTER_TRANSFER... */
> - nr_nodes = 1;
> - nodes[0] = this_node;
> - } else {
> - memcpy(nodes, join_reply->nodes,
> - join_reply->nr_nodes * sizeof(struct sd_node));
> -
> - nr_nodes = join_reply->nr_nodes;
> - }
> + memcpy(nodes, join_reply->nodes,
> + join_reply->nr_nodes * sizeof(struct sd_node));
> + nr_nodes = join_reply->nr_nodes;
>
> /* FIXME: member change events must be ordered with nonblocked events */
> sd_join_handler(&this_node, nodes, nr_nodes,
> @@ -343,7 +333,7 @@ static void msg_new_node(struct sph_msg *rcv)
> }
>
> /* FIXME: member change events must be ordered with nonblocked events */
> - res = sd_check_join_cb(&join->node, join->opaque);
> + res = sd_check_join_cb(&join->new_node, nodes, nr_nodes, join->opaque);
>
> join->res = res;
>
> @@ -357,12 +347,6 @@ static void msg_new_node(struct sph_msg *rcv)
> exit(1);
> }
> free(join);
> -
> - if (res == CJ_RES_MASTER_TRANSFER) {
> - sd_eprintf("failed to join sheepdog cluster: "
> - "please retry when master is up");
> - exit(1);
> - }
> }
>
> static void msg_new_node_finish(struct sph_msg *rcv)
> @@ -380,11 +364,11 @@ static void msg_new_node_finish(struct sph_msg *rcv)
>
> jm = (struct join_message *)join_node_finish->opaque;
> memcpy(nodes, join_node_finish->nodes,
> - join_node_finish->nr_nodes * sizeof(struct sd_node));
> + join_node_finish->nr_nodes * sizeof(struct sd_node));
> nr_nodes = join_node_finish->nr_nodes;
>
> sd_iprintf("new node: %s",
> - node_to_str(&join_node_finish->new_node));
> + node_to_str(&join_node_finish->new_node));
>
> /* FIXME: member change events must be ordered with nonblocked events */
> sd_join_handler(&join_node_finish->new_node, nodes, nr_nodes,
> diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
> index 5ed16cf..aafc18d 100644
> --- a/sheep/cluster/zookeeper.c
> +++ b/sheep/cluster/zookeeper.c
> @@ -845,15 +845,11 @@ static void zk_handle_join_request(struct zk_event *ev)
> return;
> }
>
> - res = sd_check_join_cb(&ev->sender.node, ev->buf);
> + res = sd_check_join_cb(&ev->sender.node, sd_nodes, nr_sd_nodes,
> + ev->buf);
> ev->join_result = res;
> push_join_response(ev);
> - if (res == CJ_RES_MASTER_TRANSFER) {
> - sd_eprintf("failed to join sheepdog cluster: "
> - "please retry when master is up");
> - zk_leave();
> - exit(1);
> - }
> +
> sd_dprintf("I'm the master now");
> }
>
> @@ -896,19 +892,9 @@ static void zk_handle_join_response(struct zk_event *ev)
> /* newly joined node */
> init_node_list(ev);
>
> - if (ev->join_result == CJ_RES_MASTER_TRANSFER)
> - /*
> - * Sheepdog assumes that only one sheep is alive in
> - * MASTER_TRANSFER scenario. So only the joining sheep is
> - * supposed to return single node view to sd_join_handler().
> - */
> - zk_tree_destroy();
> -
> sd_dprintf("%s, %d", node_to_str(&ev->sender.node), ev->join_result);
> switch (ev->join_result) {
> case CJ_RES_SUCCESS:
> - case CJ_RES_JOIN_LATER:
> - case CJ_RES_MASTER_TRANSFER:
> snprintf(path, sizeof(path), MEMBER_ZNODE"/%s",
> node_to_str(&ev->sender.node));
> if (node_eq(&ev->sender.node, &this_node.node)) {
> diff --git a/sheep/group.c b/sheep/group.c
> index cb87711..743b4fb 100644
> --- a/sheep/group.c
> +++ b/sheep/group.c
> @@ -50,18 +50,6 @@ static main_thread(struct vnode_info *) current_vnode_info;
> static main_thread(struct list_head *) pending_block_list;
> static main_thread(struct list_head *) pending_notify_list;
>
> -/*
> - * List of nodes that were part of the last epoch before a shutdown,
> - * but failed to join.
> - */
> -static main_thread(struct list_head *) failed_nodes;
> -
> -/*
> - * List of nodes that weren't part of the last epoch, but joined
> - * before restarting the cluster.
> - */
> -static main_thread(struct list_head *) delayed_nodes;
> -
> static int get_zones_nr_from(const struct sd_node *nodes, int nr_nodes)
> {
> int nr_zones = 0, i, j;
> @@ -341,120 +329,6 @@ static inline int get_nodes_nr_from(struct list_head *l)
> return nr;
> }
>
> -static int get_nodes_nr_epoch(uint32_t epoch)
> -{
> - struct sd_node nodes[SD_MAX_NODES];
> -
> - return epoch_log_read(epoch, nodes, sizeof(nodes));
> -}
> -
> -static const struct sd_node *find_entry_list(const struct sd_node *entry,
> - struct list_head *head)
> -{
> - struct node *n;
> - list_for_each_entry(n, head, list)
> - if (node_eq(&n->ent, entry))
> - return entry;
> -
> - return NULL;
> -
> -}
> -
> -static const struct sd_node *find_entry_epoch(const struct sd_node *entry,
> - uint32_t epoch)
> -{
> - struct sd_node nodes[SD_MAX_NODES];
> - int nr;
> -
> - if (!epoch)
> - return NULL;
> -
> - nr = epoch_log_read(epoch, nodes, sizeof(nodes));
> -
> - return xlfind(entry, nodes, nr, node_cmp);
> -}
> -
> -/*
> - * Add a node to the list of nodes that weren't part of the cluster before
> - * it shut down, and thus do not count toward the nodes required to allow
> - * an automated restart. These nodes will become part of the cluster by
> - * the time it does get restarted.
> - */
> -static bool add_delayed_node(uint32_t epoch, const struct sd_node *node)
> -{
> - struct node *n;
> -
> - if (find_entry_list(node, main_thread_get(delayed_nodes)))
> - return false;
> - assert(!find_entry_epoch(node, epoch));
> -
> - n = xmalloc(sizeof(*n));
> - n->ent = *node;
> - list_add_tail(&n->list, main_thread_get(delayed_nodes));
> - return true;
> -}
> -
> -/*
> - * For a node that failed to join check if was part of the original
> - * epoch, and if so add it to the list of node expected to be present
> - * but failing to join.
> - */
> -static bool add_failed_node(uint32_t epoch, const struct sd_node *node)
> -{
> - struct node *n;
> -
> - if (find_entry_list(node, main_thread_get(failed_nodes)))
> - return false;
> - if (!find_entry_epoch(node, epoch))
> - return false;
> -
> - n = xmalloc(sizeof(*n));
> - n->ent = *node;
> - list_add_tail(&n->list, main_thread_get(failed_nodes));
> - return true;
> -}
> -
> -/*
> - * Add the failed and delayed nodes in a join message to the local
> - * lists of such nodes.
> - */
> -static void update_exceptional_node_list(uint32_t epoch,
> - const struct join_message *jm)
> -{
> - int i;
> -
> - for (i = 0; i < jm->nr_failed_nodes; i++)
> - add_failed_node(epoch, &jm->cinfo.nodes[i]);
> - for ( ; i < jm->nr_failed_nodes + jm->nr_delayed_nodes; i++)
> - add_delayed_node(epoch, &jm->cinfo.nodes[i]);
> -}
> -
> -/* Format the lists of failed or delayed nodes into the join message. */
> -static void format_exceptional_node_list(struct join_message *jm)
> -{
> - struct node *n;
> -
> - list_for_each_entry(n, main_thread_get(failed_nodes), list)
> - jm->cinfo.nodes[jm->nr_failed_nodes++] = n->ent;
> - list_for_each_entry(n, main_thread_get(delayed_nodes), list)
> - jm->cinfo.nodes[jm->nr_failed_nodes +
> - jm->nr_delayed_nodes++] = n->ent;
> -}
> -
> -static void clear_exceptional_node_lists(void)
> -{
> - struct node *n, *t;
> -
> - list_for_each_entry_safe(n, t, main_thread_get(failed_nodes), list) {
> - list_del(&n->list);
> - free(n);
> - }
> - list_for_each_entry_safe(n, t, main_thread_get(delayed_nodes), list) {
> - list_del(&n->list);
> - free(n);
> - }
> -}
> -
> int epoch_log_read_remote(uint32_t epoch, struct sd_node *nodes, int len,
> time_t *timestamp, struct vnode_info *vinfo)
> {
> @@ -496,8 +370,6 @@ int epoch_log_read_remote(uint32_t epoch, struct sd_node *nodes, int len,
>
> static int cluster_sanity_check(struct join_message *jm)
> {
> - uint32_t local_epoch = get_latest_epoch();
> -
> if (jm->cinfo.ctime != sys->cinfo.ctime) {
> sd_eprintf("joining node ctime doesn't match: %"
> PRIu64 " vs %" PRIu64, jm->cinfo.ctime,
> @@ -505,12 +377,6 @@ static int cluster_sanity_check(struct join_message *jm)
> return CJ_RES_FAIL;
> }
>
> - if (jm->cinfo.epoch > local_epoch) {
> - sd_eprintf("joining node epoch too large: %"
> - PRIu32 " vs %" PRIu32, jm->cinfo.epoch, local_epoch);
> - return CJ_RES_FAIL;
> - }
> -
> if (jm->cinfo.nr_copies != sys->cinfo.nr_copies) {
> sd_eprintf("joining node nr_copies doesn't match: %u vs %u",
> jm->cinfo.nr_copies, sys->cinfo.nr_copies);
> @@ -526,88 +392,71 @@ static int cluster_sanity_check(struct join_message *jm)
> return CJ_RES_SUCCESS;
> }
>
> -static int cluster_wait_for_join_check(const struct sd_node *joined,
> - struct join_message *jm)
> +/*
> + * Check whether enough node members are gathered.
> + *
> + * Sheepdog can start automatically if and only if all the members in the latest
> + * epoch are gathered.
> + */
> +static bool enough_nodes_gathered(struct join_message *jm,
> + const struct sd_node *joining,
> + const struct sd_node *members,
> + size_t nr_members)
> {
> - struct sd_node local_entries[SD_MAX_NODES];
> - int nr, nr_local_entries, nr_failed_entries, nr_delayed_nodes;
> - uint32_t local_epoch = get_latest_epoch();
> - int ret;
> - struct vnode_info *cur_vinfo;
> + for (int i = 0; i < jm->cinfo.nr_nodes; i++) {
> + const struct sd_node *key = jm->cinfo.nodes + i, *n;
>
> - if (jm->cinfo.nr_nodes == 0)
> - return CJ_RES_JOIN_LATER;
> -
> - ret = cluster_sanity_check(jm);
> - if (ret != CJ_RES_SUCCESS) {
> - if (jm->cinfo.epoch > sys->cinfo.epoch) {
> - sd_eprintf("transfer mastership (%d, %d)", jm->cinfo.epoch,
> - sys->cinfo.epoch);
> - return CJ_RES_MASTER_TRANSFER;
> + n = xlfind(key, members, nr_members, node_cmp);
> + if (n == NULL && !node_eq(key, joining)) {
> + sd_dprintf("%s doesn't join yet", node_to_str(key));
> + return false;
> }
> - return ret;
> }
>
> - nr_local_entries = epoch_log_read(jm->cinfo.epoch, local_entries,
> - sizeof(local_entries));
> - if (nr_local_entries == -1)
> - return CJ_RES_FAIL;
> -
> - if (jm->cinfo.epoch < local_epoch) {
> - sd_eprintf("joining node epoch too small: %"
> - PRIu32 " vs %" PRIu32, jm->cinfo.epoch, local_epoch);
> + sd_dprintf("all the nodes are gathered, %d, %zd", jm->cinfo.nr_nodes,
> + nr_members);
> + return true;
> +}
>
> - if (xbsearch(joined, local_entries, nr_local_entries, node_cmp))
> - return CJ_RES_FAIL;
> - return CJ_RES_JOIN_LATER;
> - }
> +static int cluster_wait_for_join_check(const struct sd_node *joined,
> + const struct sd_node *members,
> + size_t nr_members,
> + struct join_message *jm)
> +{
> + int ret;
>
> - if (jm->cinfo.nr_nodes != nr_local_entries) {
> - sd_eprintf("epoch log entries do not match: %d vs %d",
> - jm->cinfo.nr_nodes, nr_local_entries);
> - return CJ_RES_FAIL;
> + if (jm->cinfo.epoch != 0 && sys->cinfo.epoch != 0) {
> + /* check whether joining node is valid or not */
> + ret = cluster_sanity_check(jm);
> + if (ret != CJ_RES_SUCCESS)
> + return ret;
> }
>
> -
> - if (memcmp(jm->cinfo.nodes, local_entries,
> - sizeof(jm->cinfo.nodes[0]) * jm->cinfo.nr_nodes) != 0) {
> + if (jm->cinfo.epoch > sys->cinfo.epoch)
> + sys->cinfo = jm->cinfo;
> + else if (jm->cinfo.epoch < sys->cinfo.epoch) {
> + sd_dprintf("joining node has a smaller epoch, %" PRIu32 ", %"
> + PRIu32, jm->cinfo.epoch, sys->cinfo.epoch);
> + jm->cinfo = sys->cinfo;
> + } else if (memcmp(jm->cinfo.nodes, sys->cinfo.nodes,
> + sizeof(jm->cinfo.nodes[0]) * jm->cinfo.nr_nodes) != 0) {
> sd_eprintf("epoch log entries does not match");
> return CJ_RES_FAIL;
> }
>
> - cur_vinfo = main_thread_get(current_vnode_info);
> - if (!cur_vinfo)
> - nr = 1;
> - else
> - nr = cur_vinfo->nr_nodes + 1;
> -
> - nr_delayed_nodes = get_nodes_nr_from(main_thread_get(delayed_nodes));
> -
> /*
> * If we have all members from the last epoch log in the in-memory
> - * node list, and no new nodes joining we can set the cluster live
> - * now without incrementing the epoch.
> - */
> - if (nr == nr_local_entries && !nr_delayed_nodes) {
> - jm->cluster_status = SD_STATUS_OK;
> - return CJ_RES_SUCCESS;
> - }
> -
> - /*
> - * If we reach the old node count, but some node failed we have to
> - * update the epoch before setting the cluster live.
> + * node list, we can set the cluster live now.
> */
> - nr_failed_entries = get_nodes_nr_from(main_thread_get(failed_nodes));
> - if (nr_local_entries == nr + nr_failed_entries - nr_delayed_nodes) {
> - jm->inc_epoch = 1;
> + if (sys->cinfo.epoch > 0 &&
> + enough_nodes_gathered(jm, joined, members, nr_members)) {
> + if (jm->cinfo.nr_nodes < nr_members + 1)
I'd suggest adding a comment here why you plus 1 onto nr_members or we'd better
add a notice that nr_members means nr of nodes *exclude* joining node or leaving
node. Probably we should rename nr_members as nr_nodes too.
Thanks
Yuan
More information about the sheepdog
mailing list