[Sheepdog] [PATCH 09/18] collie: build epoch tree at the master node
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Thu Mar 11 07:48:08 CET 2010
A master node gathers epoch information from all the nodes, and create a tree
about these information.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
collie/Makefile | 3 +-
collie/collie.h | 3 +
collie/group.c | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 154 insertions(+), 1 deletions(-)
diff --git a/collie/Makefile b/collie/Makefile
index 2bfc9c6..9314b27 100644
--- a/collie/Makefile
+++ b/collie/Makefile
@@ -5,7 +5,8 @@ CFLAGS += -D_GNU_SOURCE -DSD_VERSION=\"$(VERSION)\"
LIBS += -lpthread -lcpg
PROGRAMS = collie
-COLLIE_OBJS = collie.o net.o vdi.o group.o store.o work.o ../lib/event.o ../lib/net.o ../lib/logger.o
+COLLIE_OBJS = collie.o net.o vdi.o group.o store.o work.o ../lib/event.o \
+ ../lib/net.o ../lib/logger.o ../lib/tree.o
COLLIE_DEP = $(COLLIE_OBJS:.o=.d)
.PHONY:all
diff --git a/collie/collie.h b/collie/collie.h
index 968925e..ce8e6e4 100644
--- a/collie/collie.h
+++ b/collie/collie.h
@@ -19,6 +19,7 @@
#include "logger.h"
#include "work.h"
#include "net.h"
+#include "tree.h"
#define SD_MSG_JOIN 0x01
#define SD_MSG_VDI_OP 0x02
@@ -70,6 +71,8 @@ struct cluster_info {
struct list_head vm_list;
struct list_head pending_list;
+ struct tree_vertex epoch_tree_root;
+
int nr_sobjs;
};
diff --git a/collie/group.c b/collie/group.c
index 0de23e6..02cb2d5 100644
--- a/collie/group.c
+++ b/collie/group.c
@@ -43,6 +43,15 @@ struct message_header {
struct sheepdog_node_list_entry from;
};
+struct epoch_tree {
+ uint64_t ctime;
+ uint32_t epoch;
+ uint16_t nr_nodes;
+ uint16_t updated;
+ struct tree_vertex vertex;
+ struct sheepdog_node_list_entry nodes[SD_MAX_NODES];
+};
+
struct join_message {
struct message_header header;
uint32_t nodeid;
@@ -286,9 +295,45 @@ static int is_master(void)
return 0;
}
+static int add_epoch_log(int epoch, uint64_t parent_hval, uint64_t hval,
+ int nr_nodes, struct sheepdog_node_list_entry *nodes,
+ int is_updated, uint64_t ctime)
+{
+ struct tree_vertex *v;
+ struct epoch_tree *tree;
+
+ if (parent_hval) {
+ v = tree_find(parent_hval, &sys->epoch_tree_root);
+ tree = container_of(v, struct epoch_tree, vertex);
+ }
+ v = tree_find(hval, &sys->epoch_tree_root);
+
+ if (v) {
+ tree = container_of(v, struct epoch_tree, vertex);
+ if (is_updated)
+ tree->updated = is_updated;
+ return 0;
+ }
+
+ tree = zalloc(sizeof(*tree));
+ if (!tree) {
+ eprintf("out of memory\n");
+ return 1;
+ }
+ tree->epoch = epoch;
+ tree->ctime = ctime;
+ tree->updated = is_updated;
+ tree->nr_nodes = nr_nodes;
+ memcpy(tree->nodes, nodes, sizeof(nodes[0]) * nr_nodes);
+ INIT_TREE_VERTEX(&tree->vertex, hval, parent_hval);
+ tree_add(&tree->vertex, &sys->epoch_tree_root);
+ return 0;
+}
+
static void join(struct join_message *msg)
{
struct node *node;
+ struct sheepdog_node_list_entry entries[SD_MAX_NODES];
if (!sys->synchronized)
return;
@@ -299,8 +344,109 @@ static void join(struct join_message *msg)
if (msg->nr_sobjs)
sys->nr_sobjs = msg->nr_sobjs;
+ {
+ struct sd_epoch_req hdr;
+ struct sd_epoch_rsp *rsp = (struct sd_epoch_rsp *)&hdr;
+ uint32_t epoch_list[SD_MAX_NODES];
+ unsigned int rlen, wlen;
+ int ret, i, n, local;
+ int fd = 0;
+ uint64_t pre_hval;
+ char name[128];
+
+ local = (msg->header.from.id == sys->this_node.id);
+ if (local) {
+ n = ARRAY_SIZE(epoch_list);
+ ret = get_epoch_list(epoch_list, &n);
+ } else {
+ addr_to_str(name, sizeof(name), msg->header.from.addr, 0);
+
+ fd = connect_to(name, msg->header.from.port);
+ if (fd < 0)
+ goto out;
+
+ memset(&hdr, 0, sizeof(hdr));
+
+ hdr.opcode = SD_OP_GET_EPOCH_LIST;
+ hdr.epoch = sys->epoch;
+ hdr.data_length = sizeof(epoch_list);
+
+ rlen = hdr.data_length;
+ wlen = 0;
+ ret = exec_req(fd, (struct sd_req *)&hdr, epoch_list,
+ &wlen, &rlen);
+ n = rlen / sizeof(uint32_t);
+ /* TOOD: error handling */
+ if (ret != 0) {
+ eprintf("failed to send request %s\n", name);
+ goto out;
+ }
+ ret = rsp->result;
+ }
+
+ if (ret != SD_RES_SUCCESS) {
+ eprintf("failed to get epoch list, %x\n", ret);
+ goto out;
+ }
+
+ pre_hval = 0;
+ dprintf("nr_epoch = %d\n", n);
+
+ for (i = 0; i < n; i++) {
+ int size;
+ uint32_t is_updated;
+ uint64_t ctime;
+ uint64_t hval;
+
+ if (local) {
+ size = ARRAY_SIZE(entries);
+ ret = read_epoch(epoch_list[i], &is_updated, &ctime,
+ &hval, entries, &size);
+ } else {
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.opcode = SD_OP_READ_EPOCH;
+ hdr.epoch = sys->epoch;
+ hdr.data_length = sizeof(entries);
+ hdr.request_epoch = epoch_list[i];
+ rlen = hdr.data_length;
+
+ ret = exec_req(fd, (struct sd_req *)&hdr, entries,
+ &wlen, &rlen);
+
+
+ size = rlen / sizeof(*entries);
+ if (ret != 0) {
+ eprintf("failed to send request, %x\n", ret);
+ break;
+ }
+ is_updated = rsp->is_updated;
+ hval = rsp->hval;
+ ctime = rsp->ctime;
+ ret = rsp->result;
+ }
+
+ if (ret != SD_RES_SUCCESS) {
+ eprintf("failed to read epoch, %x\n", ret);
+ break;
+ }
+ if (size <= 0) {
+ pre_hval = 0;
+ continue;
+ }
+
+ dprintf("add_epoch_log, %d, %lu -> %lu, %d\n",
+ epoch_list[i], pre_hval, hval, is_updated);
+ add_epoch_log(epoch_list[i], pre_hval, hval, size,
+ entries, is_updated, ctime);
+
+ pre_hval = hval;
+ }
+ close(fd);
+ }
+
msg->epoch = sys->epoch;
msg->nr_sobjs = sys->nr_sobjs;
+ msg->nr_nodes = 0;
list_for_each_entry(node, &sys->cpg_node_list, list) {
if (node->nodeid == msg->nodeid && node->pid == msg->pid)
continue;
@@ -312,6 +458,8 @@ static void join(struct join_message *msg)
msg->nodes[msg->nr_nodes].ent = node->ent;
msg->nr_nodes++;
}
+out:
+ return;
}
static void update_cluster_info(struct join_message *msg)
@@ -865,6 +1013,7 @@ join_retry:
INIT_LIST_HEAD(&sys->cpg_node_list);
INIT_LIST_HEAD(&sys->vm_list);
INIT_LIST_HEAD(&sys->pending_list);
+ INIT_TREE_VERTEX(&sys->epoch_tree_root, 0, 0);
cpg_context_set(cpg_handle, sys);
cpg_fd_get(cpg_handle, &fd);
--
1.5.6.5
More information about the sheepdog
mailing list