[sheepdog] [PATCH V2 02/11] sheep: share joining nodes with newly added sheep
Yunkai Zhang
yunkai.me at gmail.com
Thu Aug 9 10:43:40 CEST 2012
From: Yunkai Zhang <qiushu.zyk at taobao.com>
Sheep master will fill joining nodes in join_message when a sheep joins.
Newly added sheep get joining nodes from join_message and use it to initialize
and recalculate all_nodes array which will be used to generate current_vnode_info
in cluster_enable_recover().
Signed-off-by: Yunkai Zhang <qiushu.zyk at taobao.com>
---
include/internal_proto.h | 10 ++++++----
sheep/group.c | 39 +++++++++++++++++++++++++++++++--------
sheep/ops.c | 1 +
3 files changed, 38 insertions(+), 12 deletions(-)
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 83d98f1..717fb79 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -194,6 +194,7 @@ struct join_message {
uint16_t nr_nodes;
uint16_t nr_failed_nodes;
uint16_t nr_delayed_nodes;
+ uint16_t nr_joining_nodes;
uint16_t cluster_flags;
uint32_t cluster_status;
uint32_t epoch;
@@ -204,10 +205,11 @@ struct join_message {
/*
* A joining sheep puts the local node list here, which is nr_nodes
- * entries long. After the master replies it will contain the list of
- * nodes that attempted to join but failed the join process. The
- * number of entries in that case is nr_failed_nodes, which by
- * defintion must be smaller than nr_nodes.
+ * entries long. After the master replies it will contain the list of
+ * nodes by following order:
+ * [ failed nodes ]: size = nr_failed_nodes
+ * [ delayed nodes ]: size = nr_delayed_nodes
+ * [ joining nodes ]: size = nr_joining_nodes
*/
struct sd_node nodes[];
};
diff --git a/sheep/group.c b/sheep/group.c
index cb244f7..ad3447d 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -767,16 +767,32 @@ static void get_vdi_bitmap(struct sd_node *nodes, size_t nr_nodes)
queue_work(sys->block_wqueue, &w->work);
}
-static void prepare_recovery(struct sd_node *joined,
- struct sd_node *nodes, size_t nr_nodes)
+static void prepare_recovery(struct join_message *jm,
+ struct sd_node *joined,
+ struct sd_node *nodes, size_t nr_nodes)
{
- int i;
+ int i, j, n, found;
joining_nodes[nr_joining_nodes++] = *joined;
+
if (!nr_all_nodes) {
- /* exclude the newly added one */
- for (i = 0; i < nr_nodes; i++) {
- if (!node_eq(nodes + i, joined))
+ /* initialize joining_nodes */
+ n = jm->nr_failed_nodes + jm->nr_delayed_nodes;
+ memcpy(&joining_nodes[nr_joining_nodes], &jm->nodes[n],
+ jm->nr_joining_nodes * sizeof(*joining_nodes));
+ nr_joining_nodes += jm->nr_joining_nodes;
+
+ /* initialize all_nodes */
+ for (found = 0, i = 0; i < nr_nodes; i++) {
+ /* exclude all joining nodes */
+ for (j = 0; j < nr_joining_nodes; j++) {
+ if (node_eq(nodes + i, joining_nodes + j)) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found)
all_nodes[nr_all_nodes++] = nodes[i];
}
}
@@ -866,7 +882,7 @@ static void update_cluster_info(struct join_message *msg,
start_recovery(current_vnode_info,
old_vnode_info);
} else
- prepare_recovery(joined, nodes, nr_nodes);
+ prepare_recovery(msg, joined, nodes, nr_nodes);
}
if (have_enough_zones())
@@ -1013,7 +1029,14 @@ enum cluster_join_result sd_check_join_cb(struct sd_node *joining,
(ret == CJ_RES_SUCCESS || ret == CJ_RES_JOIN_LATER))
format_exceptional_node_list(jm);
- n = jm->nr_failed_nodes + jm->nr_delayed_nodes;
+ if (sys->disable_recovery) {
+ n = jm->nr_failed_nodes + jm->nr_delayed_nodes;
+ memcpy(&jm->nodes[n], joining_nodes,
+ nr_joining_nodes * sizeof(*joining_nodes));
+ jm->nr_joining_nodes = nr_joining_nodes;
+ }
+
+ n = jm->nr_failed_nodes + jm->nr_delayed_nodes + jm->nr_joining_nodes;
*opaque_len = sizeof(*jm) + n * sizeof(jm->nodes[0]);
return ret;
}
diff --git a/sheep/ops.c b/sheep/ops.c
index 0cddf66..b2e9c69 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -292,6 +292,7 @@ static int cluster_enable_recover(const struct sd_req *req,
put_vnode_info(old_vnode_info);
}
+ nr_all_nodes = 0;
nr_joining_nodes = 0;
sys->disable_recovery = 0;
return SD_RES_SUCCESS;
--
1.7.11.2
More information about the sheepdog
mailing list