[Sheepdog] [PATCH 10/18] collie: verify epoch tree at the master node
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Thu Mar 11 07:48:09 CET 2010
A master node check the epoch tree to check whether nodes can start up or not.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
collie/collie.h | 1 +
collie/group.c | 173 +++++++++++++++++++++++++++++++++++++++++++++-
include/sheepdog_proto.h | 13 ++++
3 files changed, 186 insertions(+), 1 deletions(-)
diff --git a/collie/collie.h b/collie/collie.h
index ce8e6e4..f99466b 100644
--- a/collie/collie.h
+++ b/collie/collie.h
@@ -64,6 +64,7 @@ struct cluster_info {
uint32_t epoch;
uint32_t is_object_updated;
+ uint32_t status;
struct list_head cpg_node_list;
struct list_head sd_node_list;
diff --git a/collie/group.c b/collie/group.c
index 02cb2d5..a74ba39 100644
--- a/collie/group.c
+++ b/collie/group.c
@@ -60,7 +60,8 @@ struct join_message {
uint32_t epoch;
uint32_t nr_nodes;
uint32_t nr_sobjs;
- uint32_t pad;
+ uint32_t cluster_status;
+
struct {
uint32_t nodeid;
uint32_t pid;
@@ -295,6 +296,170 @@ static int is_master(void)
return 0;
}
+static int __check_split_brain(struct tree_vertex *vertex, int depth,
+ struct tree_vertex **updated_vertex)
+{
+ struct epoch_tree *tree;
+ tree = container_of(vertex, struct epoch_tree, vertex);
+ if (!tree->updated)
+ return 0;
+
+ if (!*updated_vertex) {
+ *updated_vertex = vertex;
+ return 0;
+ }
+
+ while (vertex) {
+ if (vertex == *updated_vertex)
+ return 0;
+ vertex = tree_parent(vertex);
+ }
+ return 1;
+}
+
+static int check_split_brain(struct tree_vertex *root)
+{
+ struct tree_vertex *vertex = NULL;
+
+ return tree_walk(root, (tree_vertex_func_t)__check_split_brain, &vertex);
+}
+
+static int __find_start_epoch(struct tree_vertex *vertex, int depth,
+ struct epoch_tree **ptree)
+{
+ struct epoch_tree *tree;
+ tree = container_of(vertex, struct epoch_tree, vertex);
+ if (tree->updated) {
+ dprintf("new updated epoch = %ld\n", vertex->id);
+ *ptree = tree;
+ }
+ return 0;
+}
+
+static struct epoch_tree *find_start_epoch(struct tree_vertex *root)
+{
+ struct epoch_tree *tree = NULL;
+
+ tree_walk(root, (tree_vertex_func_t)__find_start_epoch, &tree);
+
+ return tree;
+}
+
+struct start_epoch_info {
+ int nr_logs;
+ struct epoch_log *logs;
+};
+
+static int __check_enough_epochs(struct tree_vertex *vertex, int depth, void *data)
+{
+ struct epoch_tree *tree;
+ tree = container_of(vertex, struct epoch_tree, vertex);
+
+ if (tree->updated)
+ return 1;
+
+ return 0;
+}
+
+static int check_enough_epochs(struct tree_vertex *root)
+{
+ int res;
+
+ if (tree_no_children(root))
+ return SD_STATUS_NO_EPOCH;
+
+ if (tree_first_child(root) != tree_last_child(root))
+ return SD_STATUS_MULTIPLE_EPOCH_TREES;
+
+ if (!tree_walk(root, __check_enough_epochs, &res))
+ return SD_STATUS_NO_UPDATED_EPOCH;
+
+ return SD_STATUS_OK;
+}
+
+struct check_enough_nodes_info {
+ int nr_nodes;
+ struct sheepdog_node_list_entry *nodes;
+};
+
+static int __check_enough_nodes(struct tree_vertex *vertex, int depth, struct check_enough_nodes_info *ceni)
+{
+ int i, j;
+ struct epoch_tree *tree;
+ int nr_nodes = ceni->nr_nodes;
+ struct sheepdog_node_list_entry *nodes = ceni->nodes;
+
+ tree = container_of(vertex, struct epoch_tree, vertex);
+ for (i = 0; i < tree->nr_nodes; i++) {
+ for (j = 0; j < nr_nodes; j++) {
+ if (nodes[j].id == tree->nodes[i].id)
+ goto next;
+ }
+ return 1;
+ next:
+ ;
+ }
+
+ return 0;
+}
+
+static int check_enough_nodes(struct epoch_tree *tree, struct sheepdog_node_list_entry *nodes, int nr_nodes)
+{
+ struct check_enough_nodes_info ceni;
+
+ ceni.nr_nodes = nr_nodes;
+ ceni.nodes = nodes;
+
+ return !tree_walk(&tree->vertex, (tree_vertex_func_t)__check_enough_nodes, &ceni);
+}
+
+static int __print_epoch_tree(struct tree_vertex *vertex, char *buf, int len)
+{
+ struct epoch_tree *tree;
+ tree = container_of(vertex, struct epoch_tree, vertex);
+ snprintf(buf, len, "%016" PRIx64"(%d)", vertex->id, tree->epoch);
+
+ return strlen(buf);
+}
+
+static void print_epoch_tree(struct tree_vertex *root)
+{
+ tree_print(root, "epoch_tree", __print_epoch_tree);
+}
+
+static int get_cluster_status(struct sheepdog_node_list_entry *entries, int nr_entries,
+ struct epoch_tree **start_point)
+{
+ struct epoch_tree *tree;
+ struct tree_vertex *v;
+ uint32_t status = sys->status;
+
+ tree_for_each_child(v, &sys->epoch_tree_root) {
+ print_epoch_tree(v);
+ }
+
+ status = check_enough_epochs(&sys->epoch_tree_root);
+ if (status == SD_STATUS_OK) {
+ if (check_split_brain(&sys->epoch_tree_root)) {
+ eprintf("Object was updated when split brain was occurred\n");
+ status = SD_STATUS_EPOCH_CONFLICT;
+ goto out;
+ }
+
+ tree = find_start_epoch(&sys->epoch_tree_root);
+ if (check_enough_nodes(tree, entries, nr_entries)) {
+ *start_point = tree;
+ status = SD_STATUS_OK;
+ } else
+ status = SD_STATUS_MISSING_NODES;
+ } else if (sys->status == SD_STATUS_OK) {
+ eprintf("a newly added node has incorrect epoch info\n");
+ status = SD_STATUS_UNKNOWN_ERROR;
+ }
+out:
+ return status;
+}
+
static int add_epoch_log(int epoch, uint64_t parent_hval, uint64_t hval,
int nr_nodes, struct sheepdog_node_list_entry *nodes,
int is_updated, uint64_t ctime)
@@ -333,6 +498,7 @@ static int add_epoch_log(int epoch, uint64_t parent_hval, uint64_t hval,
static void join(struct join_message *msg)
{
struct node *node;
+ struct epoch_tree *start_point = NULL;
struct sheepdog_node_list_entry entries[SD_MAX_NODES];
if (!sys->synchronized)
@@ -458,6 +624,10 @@ static void join(struct join_message *msg)
msg->nodes[msg->nr_nodes].ent = node->ent;
msg->nr_nodes++;
}
+
+ int nr_nodes = build_node_list(&sys->cpg_node_list, entries);
+ msg->cluster_status = get_cluster_status(entries, nr_nodes, &start_point);
+ dprintf("nr_nodes %d, status %d\n", nr_nodes, msg->cluster_status);
out:
return;
}
@@ -1009,6 +1179,7 @@ join_retry:
sys->this_node.id = hval;
sys->synchronized = 0;
+ sys->status = SD_STATUS_NO_EPOCH;
INIT_LIST_HEAD(&sys->sd_node_list);
INIT_LIST_HEAD(&sys->cpg_node_list);
INIT_LIST_HEAD(&sys->vm_list);
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index c1197fb..c28dcba 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -65,6 +65,19 @@
#define SD_FLAG_CMD_SNAPSHOT (1U << 8)
+#define SD_STATUS_STARTUP_MASK 0x20
+#define SD_STATUS_SHUTDOWN_MASK 0x40
+#define SD_STATUS_ERROR_MASK 0x80
+
+#define SD_STATUS_OK 0x00
+#define SD_STATUS_NO_EPOCH 0x21 /* There is no epoch */
+#define SD_STATUS_MULTIPLE_EPOCH_TREES 0x22 /* Too many epoch tree */
+#define SD_STATUS_NO_UPDATED_EPOCH 0x23 /* There is no updated epoch */
+#define SD_STATUS_MISSING_NODES 0x24 /* Too few nodes to start sheepdog */
+#define SD_STATUS_SHUTDOWN 0x41
+#define SD_STATUS_EPOCH_CONFLICT 0x85 /* Cannot resolve where to start */
+#define SD_STATUS_UNKNOWN_ERROR 0x86 /* Unknown error has occurred */
+
#define SD_RES_SUCCESS 0x00 /* Success */
#define SD_RES_UNKNOWN 0x01 /* Unknown error */
#define SD_RES_NO_OBJ 0x02 /* No object found */
--
1.5.6.5
More information about the sheepdog
mailing list