[Sheepdog] [PATCH 2/2] sheep: fix cluster event sequences
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Fri Nov 25 13:57:11 CET 2011
Cluster drivers cannot call a 'check_join_cb' callback before sheep
finishes the previous event handling. The simplest approach to solve
this is: 1) call coroutine_yield() before exiting the event handler,
and 2) re-enter the coroutine after the event is completely processed.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
sheep/group.c | 52 +++++++++++++++++++++++++++++++++++-----------------
1 files changed, 35 insertions(+), 17 deletions(-)
diff --git a/sheep/group.c b/sheep/group.c
index 8c748ab..6513b14 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -23,6 +23,10 @@
#include "logger.h"
#include "work.h"
#include "cluster.h"
+#include "coroutine.h"
+
+static int cdrv_fd;
+static struct coroutine *cdrv_co;
struct node {
struct sheepdog_node_list_entry ent;
@@ -246,22 +250,21 @@ void do_cluster_request(struct work *work, int idx)
free(msg);
}
+static void group_handler(int listen_fd, int events, void *data);
+
+static void cluster_dispatch(void *opaque)
+{
+ if (sys->cdrv->dispatch() != 0)
+ panic("oops... an error occurred inside corosync\n");
+}
+
static void group_handler(int listen_fd, int events, void *data)
{
- int ret;
- if (events & EPOLLHUP) {
- eprintf("received EPOLLHUP event: has corosync exited?\n");
- goto out;
- }
+ if (events & EPOLLHUP)
+ panic("received EPOLLHUP event: has corosync exited?\n");
- ret = sys->cdrv->dispatch();
- if (ret == 0)
- return;
- else
- eprintf("oops... an error occurred inside corosync\n");
-out:
- log_close();
- exit(1);
+ cdrv_co = coroutine_create(cluster_dispatch);
+ coroutine_enter(cdrv_co, NULL);
}
static inline int get_nodes_nr_from(struct list_head *l)
@@ -644,6 +647,10 @@ static void sd_notify_handler(struct sheepdog_node_list_entry *sender,
list_add_tail(&cevent->cpg_event_list, &sys->cpg_event_siblings);
start_cpg_event_work();
+
+ unregister_event(cdrv_fd);
+ coroutine_yield();
+ register_event(cdrv_fd, group_handler, NULL);
}
/*
@@ -942,6 +949,8 @@ static void cpg_event_done(struct work *work, int idx)
cpg_event_free(cevent);
cpg_event_running = 0;
+ coroutine_enter(cdrv_co, NULL);
+
if (!list_empty(&sys->cpg_event_siblings))
start_cpg_event_work();
}
@@ -1200,6 +1209,11 @@ static void sd_join_handler(struct sheepdog_node_list_entry *joined,
list_add_tail(&cevent->cpg_event_list, &sys->cpg_event_siblings);
start_cpg_event_work();
+
+ unregister_event(cdrv_fd);
+ coroutine_yield();
+ register_event(cdrv_fd, group_handler, NULL);
+
break;
case CJ_RES_FAIL:
case CJ_RES_JOIN_LATER:
@@ -1317,6 +1331,10 @@ static void sd_leave_handler(struct sheepdog_node_list_entry *left,
list_add_tail(&cevent->cpg_event_list, &sys->cpg_event_siblings);
start_cpg_event_work();
+ unregister_event(cdrv_fd);
+ coroutine_yield();
+ register_event(cdrv_fd, group_handler, NULL);
+
return;
oom:
if (w) {
@@ -1329,7 +1347,7 @@ oom:
int create_cluster(int port, int64_t zone)
{
- int fd, ret;
+ int ret;
struct cdrv_handlers handlers = {
.join_handler = sd_join_handler,
.leave_handler = sd_leave_handler,
@@ -1347,8 +1365,8 @@ int create_cluster(int port, int64_t zone)
}
}
- fd = sys->cdrv->init(&handlers, sys->cdrv_option, sys->this_node.addr);
- if (fd < 0)
+ cdrv_fd = sys->cdrv->init(&handlers, sys->cdrv_option, sys->this_node.addr);
+ if (cdrv_fd < 0)
return -1;
sys->this_node.port = port;
@@ -1375,7 +1393,7 @@ int create_cluster(int port, int64_t zone)
INIT_LIST_HEAD(&sys->cpg_event_siblings);
- ret = register_event(fd, group_handler, NULL);
+ ret = register_event(cdrv_fd, group_handler, NULL);
if (ret) {
eprintf("failed to register epoll events (%d)\n", ret);
return 1;
--
1.7.2.5
More information about the sheepdog
mailing list