[Sheepdog] [PATCH] inform new nodes of the running vm state
FUJITA Tomonori
fujita.tomonori at lab.ntt.co.jp
Mon Apr 12 16:28:55 CEST 2010
The master node needs to inform newly added nodes of the running vm
state.
Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
collie/group.c | 134 +++++++++++++++++++++++++++++++++++++--------
include/meta.h | 1 -
include/sheepdog_proto.h | 6 +--
3 files changed, 112 insertions(+), 29 deletions(-)
diff --git a/collie/group.c b/collie/group.c
index ed9ea4a..f3bef66 100644
--- a/collie/group.c
+++ b/collie/group.c
@@ -37,11 +37,19 @@ struct node {
struct list_head list;
};
+enum deliver_msg_state {
+ DM_INIT = 1,
+ DM_CONT,
+ DM_FIN,
+};
+
struct message_header {
uint8_t op;
- uint8_t done;
+ uint8_t state;
uint8_t pad[2];
uint32_t msg_length;
+ uint32_t nodeid;
+ uint32_t pid;
struct sheepdog_node_list_entry from;
};
@@ -252,7 +260,7 @@ forward:
}
msg->header.op = SD_MSG_VDI_OP;
- msg->header.done = 0;
+ msg->header.state = DM_INIT;
msg->header.msg_length = sizeof(*msg) + hdr->data_length;
msg->header.from = sys->this_node;
msg->req = *((struct sd_vdi_req *)&req->rq);
@@ -742,6 +750,51 @@ out:
req->done(req);
}
+static void handle_join(struct work_deliver *w)
+{
+ struct message_header *m;
+ struct vm *vm;
+ struct sheepdog_vm_list_entry *e;
+ int i, nr = 2000;
+ char *buf;
+
+ buf = malloc(sizeof(*m) + sizeof(*e) * nr);
+ m = (struct message_header *)buf;
+ e = (struct sheepdog_vm_list_entry *)(buf + sizeof(*m));
+
+ i = 0;
+ m->state = DM_CONT;
+ m->pid = ((struct join_message *)w->msg)->pid;
+ m->nodeid = ((struct join_message *)w->msg)->nodeid;
+
+ vprintf(SDOG_DEBUG "%u %u\n", m->pid, m->nodeid);
+
+ list_for_each_entry(vm, &sys->vm_list, list) {
+ *e = vm->ent;
+ vprintf(SDOG_DEBUG "%d %s\n", i, e->name);
+ e++;
+ i++;
+
+ if (!(i % nr)) {
+ m->msg_length = sizeof(*m) + i * sizeof(*e);
+ send_message(sys->handle, m);
+ e = (struct sheepdog_vm_list_entry *)(buf + sizeof(*m));
+ i = 0;
+ }
+ }
+
+ if (i) {
+ m->msg_length = sizeof(*m) + i * sizeof(*e);
+ vprintf(SDOG_DEBUG "%d %d\n", i, m->msg_length);
+ send_message(sys->handle, m);
+ }
+
+ m = w->msg;
+ join((struct join_message *)m);
+ m->state = DM_FIN;
+ send_message(sys->handle, m);
+}
+
static void __sd_deliver(struct work *work, int idx)
{
struct work_deliver *w = container_of(work, struct work_deliver, work);
@@ -749,8 +802,8 @@ static void __sd_deliver(struct work *work, int idx)
char name[128];
struct node *node;
- dprintf("op: %d, done: %d, size: %d, from: %s\n",
- m->op, m->done, m->msg_length,
+ dprintf("op: %d, state: %u, size: %d, from: %s\n",
+ m->op, m->state, m->msg_length,
addr_to_str(name, sizeof(name), m->from.addr, m->from.port));
if (m->op == SD_MSG_JOIN) {
@@ -767,27 +820,24 @@ static void __sd_deliver(struct work *work, int idx)
node->ent = m->from;
}
- if (!m->done) {
+ if (m->state == DM_INIT) {
if (!is_master())
return;
switch (m->op) {
case SD_MSG_JOIN:
- join((struct join_message *)m);
+ handle_join(w);
break;
case SD_MSG_VDI_OP:
vdi_op((struct vdi_op_message *)m);
+ m->state = DM_FIN;
+ send_message(sys->handle, m);
break;
default:
eprintf("unknown message %d\n", m->op);
break;
}
-
- vprintf(SDOG_DEBUG "will send\n");
-
- m->done = 1;
- send_message(sys->handle, m);
- } else {
+ } else if (m->state == DM_FIN) {
switch (m->op) {
case SD_MSG_JOIN:
update_cluster_info((struct join_message *)m);
@@ -819,7 +869,7 @@ static void __sd_deliver_done(struct work *work, int idx)
* for the non master nodes, when I get one finished message,
* if I can forget it.
*/
- if (m->done && !list_empty(&sys->work_deliver_siblings)) {
+ if (m->state == DM_FIN && !list_empty(&sys->work_deliver_siblings)) {
n = list_first_entry(&sys->work_deliver_siblings,
struct work_deliver, work_deliver_list);
@@ -830,7 +880,7 @@ static void __sd_deliver_done(struct work *work, int idx)
* synchronized
*/
- if (m->done && m->op == SD_MSG_JOIN && sys->epoch >= 2)
+ if (m->state == DM_FIN && m->op == SD_MSG_JOIN && sys->epoch >= 2)
start_recovery(sys->epoch);
free(w->msg);
@@ -845,8 +895,8 @@ static void __sd_deliver_done(struct work *work, int idx)
char name[128];
m = n->msg;
- dprintf("op: %d, done: %d, size: %d, from: %s\n",
- m->op, m->done, m->msg_length,
+ dprintf("op: %d, state: %u, size: %d, from: %s\n",
+ m->op, m->state, m->msg_length,
addr_to_str(name, sizeof(name), m->from.addr, m->from.port));
list_del(&n->work_deliver_list);
@@ -862,10 +912,47 @@ static void sd_deliver(cpg_handle_t handle, const struct cpg_name *group_name,
struct message_header *m = msg;
char name[128];
- dprintf("op: %d, done: %d, size: %d, from: %s\n",
- m->op, m->done, m->msg_length,
+ dprintf("op: %d, state: %u, size: %d, from: %s\n",
+ m->op, m->state, m->msg_length,
addr_to_str(name, sizeof(name), m->from.addr, m->from.port));
+ if (m->state == DM_CONT) {
+ struct sheepdog_vm_list_entry *e;
+ int nr, i;
+ struct vm *vm;
+
+ dprintf("op: %d, state: %u, size: %d, from: %s\n",
+ m->op, m->state, m->msg_length,
+ addr_to_str(name, sizeof(name), m->from.addr, m->from.port));
+
+ if (is_master())
+ return;
+
+ vprintf(SDOG_DEBUG "%u %u %u %u\n",
+ m->nodeid, m->pid, sys->this_nodeid, sys->this_pid);
+
+ if (sys->this_nodeid != m->nodeid ||
+ sys->this_pid != m->pid)
+ return;
+
+ /* This is my JOIN message. */
+ vprintf(SDOG_DEBUG "we update the vm list\n");
+
+ nr = (m->msg_length - sizeof(*m)) / sizeof(*e);
+ e = (struct sheepdog_vm_list_entry *)((char *)msg + sizeof(*m));
+
+ for (i = 0; i < nr; i++) {
+ vm = zalloc(sizeof(*vm));
+ if (!vm)
+ break;
+
+ vm->ent = e[i];
+ vprintf(SDOG_DEBUG "%d, got %s\n", i, e[i].name);
+ list_add(&vm->list, &sys->vm_list);
+ }
+ return;
+ }
+
w = zalloc(sizeof(*w));
if (!w)
return;
@@ -880,7 +967,7 @@ static void sd_deliver(cpg_handle_t handle, const struct cpg_name *group_name,
w->work.done = __sd_deliver_done;
if (is_master()) {
- if (!m->done) {
+ if (m->state == DM_INIT) {
int run = 0;
/*
@@ -902,12 +989,12 @@ static void sd_deliver(cpg_handle_t handle, const struct cpg_name *group_name,
} else
/*
* must be blocked until the message with
- * m->done == 0 is completely finished
+ * m->state == DM_INIT is completely finished
* (__sd_deliver_done is called)
*/
w->work.attr = WORK_ORDERED;
} else {
- if (!m->done) {
+ if (m->state == DM_INIT) {
list_add_tail(&w->work_deliver_list,
&sys->work_deliver_siblings);
@@ -916,8 +1003,7 @@ static void sd_deliver(cpg_handle_t handle, const struct cpg_name *group_name,
* work_deliver_siblings.
*/
return;
- }
-
+ } else
/*
* __sd_deliver_done() frees requests on
* work_deliver_siblings in order.
@@ -1016,7 +1102,7 @@ static void __sd_confch(struct work *work, int idx)
struct join_message msg;
msg.header.op = SD_MSG_JOIN;
- msg.header.done = 0;
+ msg.header.state = DM_INIT;
msg.header.msg_length = sizeof(msg);
msg.header.from = sys->this_node;
msg.nodeid = sys->this_nodeid;
diff --git a/include/meta.h b/include/meta.h
index 208b2e1..7f88c4d 100644
--- a/include/meta.h
+++ b/include/meta.h
@@ -31,7 +31,6 @@
#define VDI_SPACE_SHIFT 32
#define VDI_BIT (UINT64_C(1) << 63)
#define DEAFAULT_NR_COPIES 1
-#define SD_MAX_VDI_LEN 256
#define MAX_DATA_OBJS (1ULL << 20)
#define MAX_CHILDREN 1024
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index f124286..b2c0fb8 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -19,6 +19,7 @@
#define SD_MAX_NODES 1024
#define SD_MAX_VMS 4096
+#define SD_MAX_VDI_LEN 256
/* -> vmon */
@@ -268,7 +269,7 @@ struct sd_node_rsp {
};
struct sheepdog_vm_list_entry {
- uint8_t name[32];
+ uint8_t name[SD_MAX_VDI_LEN];
uint8_t host_addr[16];
uint16_t host_port;
uint8_t pad[6];
@@ -314,11 +315,8 @@ static inline int hval_to_sheep(struct sheepdog_node_list_entry *entries,
int i;
struct sheepdog_node_list_entry *e = entries, *n;
- printf("%lx\n", id);
-
for (i = 0; i < nr_entries - 1; i++, e++) {
n = e + 1;
- printf("%d, %lx, %lx, %lx\n", i, e->id, n->id, id);
if (id > e->id && id <= n->id)
break;
}
--
1.6.5
More information about the sheepdog
mailing list