[Sheepdog] [PATCH] inform new nodes of the running vm state
FUJITA Tomonori
fujita.tomonori at lab.ntt.co.jp
Thu Apr 22 04:02:03 CEST 2010
When a node joins in Sheepdog, the master node informs it of the
running vm state.
Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
collie/group.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 90 insertions(+), 4 deletions(-)
diff --git a/collie/group.c b/collie/group.c
index b6cb633..f80e5a8 100644
--- a/collie/group.c
+++ b/collie/group.c
@@ -771,6 +771,36 @@ out:
req->done(req);
}
+static void update_running_vm_state(struct cpg_event *cevent)
+{
+ struct work_deliver *w = &cevent->d;
+ struct message_header *m = w->msg;
+ struct sheepdog_vm_list_entry *e;
+ int nr, i;
+ struct vm *vm;
+
+ if (sys->join_finished)
+ goto out;
+
+ /* This is my JOIN message. */
+ vprintf(SDOG_DEBUG "we update the vm list\n");
+
+ nr = (m->msg_length - sizeof(*m)) / sizeof(*e);
+ e = (struct sheepdog_vm_list_entry *)(m + 1);
+
+ for (i = 0; i < nr; i++) {
+ vm = zalloc(sizeof(*vm));
+ if (!vm)
+ panic("failed to allocate memory for a vm\n");
+
+ vm->ent = e[i];
+ vprintf(SDOG_DEBUG "%d, got %s\n", i, e[i].name);
+ list_add(&vm->list, &sys->vm_list);
+ }
+out:
+ cevent->skip = 1;
+}
+
static void __sd_deliver(struct cpg_event *cevent)
{
struct work_deliver *w = &cevent->d;
@@ -822,7 +852,9 @@ static void __sd_deliver(struct cpg_event *cevent)
}
}
- if (m->state == DM_FIN) {
+ if (m->state == DM_CONT)
+ update_running_vm_state(cevent);
+ else if (m->state == DM_FIN) {
switch (m->op) {
case SD_MSG_JOIN:
update_cluster_info((struct join_message *)m);
@@ -837,6 +869,55 @@ static void __sd_deliver(struct cpg_event *cevent)
}
}
+static void send_join_response(struct work_deliver *w)
+{
+ struct message_header *m;
+ struct vm *vm;
+ struct sheepdog_vm_list_entry *e;
+ int i, nr = 2000;
+ char *buf;
+
+ /*
+ * FIXME: we need to inform the node of the JOIN failure in
+ * the case of OOM.
+ */
+ buf = malloc(sizeof(*m) + sizeof(*e) * nr);
+ m = (struct message_header *)buf;
+ e = (struct sheepdog_vm_list_entry *)(buf + sizeof(*m));
+
+ i = 0;
+ m->state = DM_CONT;
+ m->pid = w->msg->pid;
+ m->nodeid = w->msg->nodeid;
+
+ vprintf(SDOG_DEBUG "%u %u\n", m->pid, m->nodeid);
+
+ list_for_each_entry(vm, &sys->vm_list, list) {
+ *e = vm->ent;
+ vprintf(SDOG_DEBUG "%d %s\n", i, e->name);
+ e++;
+ i++;
+
+ if (!(i % nr)) {
+ m->msg_length = sizeof(*m) + i * sizeof(*e);
+ send_message(sys->handle, m);
+ e = (struct sheepdog_vm_list_entry *)(buf + sizeof(*m));
+ i = 0;
+ }
+ }
+
+ if (i) {
+ m->msg_length = sizeof(*m) + i * sizeof(*e);
+ vprintf(SDOG_DEBUG "%d %d\n", i, m->msg_length);
+ send_message(sys->handle, m);
+ }
+
+ m = w->msg;
+ join((struct join_message *)m);
+ m->state = DM_FIN;
+ send_message(sys->handle, m);
+}
+
static void __sd_deliver_done(struct cpg_event *cevent)
{
struct work_deliver *w = &cevent->d;
@@ -853,8 +934,7 @@ static void __sd_deliver_done(struct cpg_event *cevent)
if (m->state == DM_INIT && is_master()) {
switch (m->op) {
case SD_MSG_JOIN:
- m->state = DM_FIN;
- send_message(sys->handle, m);
+ send_join_response(w);
break;
case SD_MSG_VDI_OP:
m->state = DM_FIN;
@@ -1167,7 +1247,13 @@ static void cpg_event_done(struct work *w, int idx)
__sd_confchg_done(cevent);
break;
case CPG_EVENT_DELIVER:
- if (cevent->d.msg->state == DM_INIT) {
+ /*
+ * if we are in the process of the JOIN, we will not
+ * be suspended. So sd_deliver() links events to
+ * cpg_event_siblings in order. The events except for
+ * JOIN with DM_CONT and DM_FIN are skipped.
+ */
+ if (sys->join_finished && cevent->d.msg->state == DM_INIT) {
struct cpg_event *f_cevent;
list_for_each_entry(f_cevent, &sys->cpg_event_siblings,
--
1.6.5
More information about the sheepdog
mailing list