[Sheepdog] [PATCH 2/2] fix cluster event sequences

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Thu Nov 24 19:22:04 CET 2011


Cluster drivers cannot call a 'check_join_cb' callback before sheep
finishes the previous event handling.  The simplest approach to solve
this is: 1) call coroutine_yield() before exiting the event handler,
and 2) re-enter the coroutine after the event is completely processed.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 sheep/group.c |   43 ++++++++++++++++++++++++++++++-------------
 1 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/sheep/group.c b/sheep/group.c
index 31d1f76..b8a3cfe 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -23,6 +23,9 @@
 #include "logger.h"
 #include "work.h"
 #include "cluster.h"
+#include "coroutine.h"
+
+static struct coroutine *cdrv_co;
 
 struct node {
 	struct sheepdog_node_list_entry ent;
@@ -246,22 +249,27 @@ void do_cluster_request(struct work *work, int idx)
 	free(msg);
 }
 
+static void group_handler(int listen_fd, int events, void *data);
+
+static void cluster_dispatch(void *opaque)
+{
+	int fd = *(int *)opaque;
+
+	unregister_event(fd);
+
+	if (sys->cdrv->dispatch() != 0)
+		panic("oops... an error occurred inside corosync\n");
+
+	register_event(fd, group_handler, NULL);
+}
+
 static void group_handler(int listen_fd, int events, void *data)
 {
-	int ret;
-	if (events & EPOLLHUP) {
-		eprintf("received EPOLLHUP event: has corosync exited?\n");
-		goto out;
-	}
+	if (events & EPOLLHUP)
+		panic("received EPOLLHUP event: has corosync exited?\n");
 
-	ret = sys->cdrv->dispatch();
-	if (ret == 0)
-		return;
-	else
-		eprintf("oops... an error occurred inside corosync\n");
-out:
-	log_close();
-	exit(1);
+	cdrv_co = coroutine_create(cluster_dispatch);
+	coroutine_enter(cdrv_co, &listen_fd);
 }
 
 static inline int get_nodes_nr_from(struct list_head *l)
@@ -644,6 +652,8 @@ static void sd_notify_handler(struct sheepdog_node_list_entry *sender,
 	list_add_tail(&cevent->cpg_event_list, &sys->cpg_event_siblings);
 
 	start_cpg_event_work();
+
+	coroutine_yield();
 }
 
 /*
@@ -941,6 +951,8 @@ static void cpg_event_done(struct work *work, int idx)
 	cpg_event_free(cevent);
 	cpg_event_running = 0;
 
+	coroutine_enter(cdrv_co, NULL);
+
 	if (!list_empty(&sys->cpg_event_siblings))
 		start_cpg_event_work();
 }
@@ -1199,6 +1211,9 @@ static void sd_join_handler(struct sheepdog_node_list_entry *joined,
 
 		list_add_tail(&cevent->cpg_event_list, &sys->cpg_event_siblings);
 		start_cpg_event_work();
+
+		coroutine_yield();
+
 		break;
 	case CJ_RES_FAIL:
 	case CJ_RES_JOIN_LATER:
@@ -1316,6 +1331,8 @@ static void sd_leave_handler(struct sheepdog_node_list_entry *left,
 	list_add_tail(&cevent->cpg_event_list, &sys->cpg_event_siblings);
 	start_cpg_event_work();
 
+	coroutine_yield();
+
 	return;
 oom:
 	if (w) {
-- 
1.7.2.5




More information about the sheepdog mailing list