[Sheepdog] [PATCH] sheep: handle network partition failure
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Sun Jun 19 19:28:47 CEST 2011
This patch kills minority nodes when a network partition has occurred.
Though this approach kills many nodes, it is the most simple way to
keep strong consistency.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
sheep/group.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 55 insertions(+), 0 deletions(-)
diff --git a/sheep/group.c b/sheep/group.c
index 77c9b78..9e039e7 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -1028,8 +1028,63 @@ static int is_my_cpg_addr(struct cpg_address *addr)
(sys->this_pid == addr->pid);
}
+/*
+ * Check whether the majority of Sheepdog nodes are still alive or not
+ */
+static int check_majority(struct cpg_address *left_list,
+ size_t left_list_entries)
+{
+ int nr_nodes = 0, nr_majority, nr_reachable, i, fd;
+ struct node *node;
+ char name[INET6_ADDRSTRLEN];
+
+ if (left_list_entries == 0)
+ return 1; /* we don't need this check in this case */
+
+ list_for_each_entry(node, &sys->sd_node_list, list) {
+ nr_nodes++;
+ }
+ nr_majority = nr_nodes / 2 + 1;
+
+ /* we need at least 3 nodes to handle network partition
+ * failure */
+ if (nr_nodes < 3)
+ return 1;
+
+ list_for_each_entry(node, &sys->sd_node_list, list) {
+ for (i = 0; i < left_list_entries; i++) {
+ if (left_list[i].nodeid == node->nodeid &&
+ left_list[i].pid == node->pid)
+ break;
+ }
+ if (i != left_list_entries)
+ continue;
+
+ addr_to_str(name, sizeof(name), node->ent.addr, 0);
+ fd = connect_to(name, node->ent.port);
+ if (fd < 0)
+ continue;
+
+ close(fd);
+ nr_reachable++;
+ if (nr_reachable >= nr_majority) {
+ dprintf("majority nodes are alive\n");
+ return 1;
+ }
+ }
+ dprintf("%d, %d, %d\n", nr_nodes, nr_majority, nr_reachable);
+ eprintf("majority nodes are not alive\n");
+ return 0;
+}
+
static void __sd_confchg(struct cpg_event *cevent)
{
+ struct work_confchg *w = container_of(cevent, struct work_confchg, cev);
+
+ if (!check_majority(w->left_list, w->left_list_entries)) {
+ eprintf("perhaps network partition failure has occurred\n");
+ abort();
+ }
}
static void send_join_request(struct cpg_address *addr, struct work_confchg *w)
--
1.7.2.5
More information about the sheepdog
mailing list