[sheepdog] [PATCH 1/2] Revert "corosync: handle network partition"

Hitoshi Mitake mitake.hitoshi at lab.ntt.co.jp
Tue Sep 30 10:01:25 CEST 2014


This reverts commit 15df161958a38cf3f7bc83b5bc2c8a1817b3072e.

The commit introduced a better handling method of network
partition. But the method seems to have a possibility of losing event
from corosync. According to an experiment by Tsuji-san, dispatching
of leave event can be delayed because of the poll(2) in
__corosync_dispatch() with corosync < 2.x.

In the next commit, simpler method for detecting network partition
based on a parameter of cpg callback function will be implemented. The
reverted method based on poll(2) depends on semantics of fd obtained
by cpg_fd_get(3) of corosync. The poll(2) based method worked well on
corosync 1.x, but it doesn't work well on 2.x. Internal big change
of corosync (maybe employing libqb) seemed to change the semantics so
we cannot use poll(2) for detecting queued messages.

With this change, sheepdog cannot use corosync 1.x as its cluster
manager any more. But the version has a critical bug in its message
delivery mechanism which can produce broken epoch log. So it is
reasonable to force users to stop using the version and introduce
this change.

Reported bug: https://bugs.launchpad.net/sheepdog-project/+bug/1368503

Conflicts:
	sheep/cluster/corosync.c
[ Conflicts were resolved by Hitoshi Mitake ]

Reported-by: Masahiro Tsuji <tuji at atworks.co.jp>
Cc: Masahiro Tsuji <tuji at atworks.co.jp>
Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
 sheep/cluster/corosync.c | 45 +++------------------------------------------
 1 file changed, 3 insertions(+), 42 deletions(-)

diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c
index cd00733..477e442 100644
--- a/sheep/cluster/corosync.c
+++ b/sheep/cluster/corosync.c
@@ -10,7 +10,6 @@
  */
 #include <stdio.h>
 #include <unistd.h>
-#include <poll.h>
 #include <sys/epoll.h>
 #include <corosync/cpg.h>
 #include <corosync/cfg.h>
@@ -41,8 +40,6 @@ static struct cpg_node cpg_nodes[COROSYNC_MAX_NODES];
 static size_t nr_cpg_nodes;
 static bool self_elect;
 static bool join_finished;
-static int cpg_fd;
-static size_t nr_majority; /* used for network partition detection */
 
 /* event types which are dispatched in corosync_dispatch() */
 enum corosync_event_type {
@@ -358,23 +355,6 @@ static bool update_join_status(struct corosync_event *cevent)
 static void __corosync_dispatch(void)
 {
 	struct corosync_event *cevent;
-	struct pollfd pfd = {
-		.fd = cpg_fd,
-		.events = POLLIN,
-	};
-
-	if (poll(&pfd, 1, 0)) {
-		/*
-		 * Corosync dispatches leave events one by one even
-		 * when network partition has occurred.  To count the
-		 * number of alive nodes correctly, we postpone
-		 * processing events if there are incoming ones.
-		 */
-		sd_debug("wait for a next dispatch event");
-		return;
-	}
-
-	nr_majority = 0;
 
 	while (!list_empty(&corosync_block_event_list) ||
 	       !list_empty(&corosync_nonblock_event_list)) {
@@ -554,25 +534,6 @@ static void cdrv_cpg_confchg(cpg_handle_t handle,
 	sd_debug("mem:%zu, joined:%zu, left:%zu", member_list_entries,
 		 joined_list_entries, left_list_entries);
 
-	/* check network partition */
-	if (left_list_entries) {
-		if (nr_majority == 0) {
-			size_t total = member_list_entries + left_list_entries;
-
-			/*
-			 * we need at least 3 nodes to handle network
-			 * partition failure
-			 */
-			if (total > 2)
-				nr_majority = total / 2 + 1;
-		}
-
-		if (member_list_entries == 0)
-			panic("NIC failure?");
-		if (member_list_entries < nr_majority)
-			panic("Network partition is detected");
-	}
-
 	/* convert cpg_address to cpg_node */
 	build_cpg_node_list(member_sheep, member_list, member_list_entries);
 	build_cpg_node_list(left_sheep, left_list, left_list_entries);
@@ -714,7 +675,7 @@ out:
 
 static int corosync_init(const char *option)
 {
-	int ret, retry_cnt = 0;
+	int ret, fd, retry_cnt = 0;
 	uint32_t nodeid;
 	cpg_callbacks_t cb = {
 		.cpg_deliver_fn = cdrv_cpg_deliver,
@@ -759,13 +720,13 @@ again:
 	this_node.nodeid = nodeid;
 	this_node.pid = getpid();
 
-	ret = cpg_fd_get(cpg_handle, &cpg_fd);
+	ret = cpg_fd_get(cpg_handle, &fd);
 	if (ret != CS_OK) {
 		sd_err("failed to get cpg file descriptor (%d)", ret);
 		return -1;
 	}
 
-	ret = register_event(cpg_fd, corosync_handler, NULL);
+	ret = register_event(fd, corosync_handler, NULL);
 	if (ret) {
 		sd_err("failed to register corosync event handler (%d)", ret);
 		return -1;
-- 
1.8.3.2




More information about the sheepdog mailing list