[Sheepdog] [PATCH V3 3/3] sheep: get consistent cluster information on each node

Liu Yuan namei.unix at gmail.com
Wed Sep 21 12:14:01 CEST 2011


From: Liu Yuan <tailai.ly at taobao.com>

Currently, we just try to get cluster information from local epoch history.
We coudn't get the full history when local epoch log doesn't has requested
epoch version, resulting in displaying inconsistent epoch history.

This patch add a new function to read epoch log remotely and we also add a
private sheepdog operation (SD_OP_GET_EPOCH) to achieve the objective.

When any of the nodes in the cluster doesn't has the requested epoch, we just
display null string like following:

root at taobao:/home/dev/sheepdog# collie/collie cluster info
Cluster status: running

Creation time        Epoch Nodes
2011-09-21 17:58:15      7 [192.168.0.1:7000, 192.168.0.2:7000]
2011-09-21 17:58:15      6 [192.168.0.1:7000, 192.168.0.2:7000, 192.168.0.3:7000]
2011-09-21 17:58:15      5 [192.168.0.1:7000, 192.168.0.2:7000]
2011-09-21 17:58:15      4 [192.168.0.1:7000]
2011-09-21 17:58:15      3 [] <---- null string
2011-09-21 17:58:15      2 [192.168.0.2:7000, 192.168.0.3:7000]
2011-09-21 17:58:15      1 [192.168.0.1:7000, 192.168.0.2:7000, 192.168.0.3:7000]

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/group.c      |   31 ++++++++++++++++++++++++++-----
 sheep/sdnet.c      |    1 +
 sheep/sheep_priv.h |    2 ++
 sheep/store.c      |   44 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/sheep/group.c b/sheep/group.c
index bc92de6..f54ee02 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -256,6 +256,23 @@ static void get_node_list(struct sd_node_req *req,
 	rsp->master_idx = get_node_idx(&node->ent, data, nr_nodes);
 }
 
+static int get_epoch(struct sd_obj_req *req,
+		      struct sd_obj_rsp *rsp, void *data)
+{
+	int epoch = req->tgt_epoch;
+	int len, ret;
+	dprintf("%d\n", epoch);
+	len = epoch_log_read(epoch, (char *)data, req->data_length);
+	if (len == -1) {
+		ret = SD_RES_NO_TAG;
+		rsp->data_length = 0;
+	} else {
+		ret = SD_RES_SUCCESS;
+		rsp->data_length = len;
+	}
+	return ret;
+}
+
 void cluster_queue_request(struct work *work, int idx)
 {
 	struct request *req = container_of(work, struct request, work);
@@ -268,6 +285,10 @@ void cluster_queue_request(struct work *work, int idx)
 	eprintf("%p %x\n", req, hdr->opcode);
 
 	switch (hdr->opcode) {
+	case SD_OP_GET_EPOCH:
+		ret = get_epoch((struct sd_obj_req *)hdr,
+			  (struct sd_obj_rsp *)rsp, req->data);
+		break;
 	case SD_OP_GET_NODE_LIST:
 		get_node_list((struct sd_node_req *)hdr,
 			      (struct sd_node_rsp *)rsp, req->data);
@@ -286,12 +307,12 @@ void cluster_queue_request(struct work *work, int idx)
 			log->nr_nodes = epoch_log_read(epoch, (char *)log->nodes,
 						       sizeof(log->nodes));
 			if (log->nr_nodes == -1)
-				i--;
-			else{
-				rsp->data_length += sizeof(*log);
-				log->nr_nodes /= sizeof(log->nodes[0]);
-			}
+				log->nr_nodes = epoch_log_read_remote(epoch,
+								      (char *)log->nodes,
+								      sizeof(log->nodes));
 
+			rsp->data_length += sizeof(*log);
+			log->nr_nodes /= sizeof(log->nodes[0]);
 			epoch--;
 		}
 
diff --git a/sheep/sdnet.c b/sheep/sdnet.c
index ae4a5fc..94a8ae3 100644
--- a/sheep/sdnet.c
+++ b/sheep/sdnet.c
@@ -271,6 +271,7 @@ static void queue_request(struct request *req)
 	case SD_OP_SHUTDOWN:
 	case SD_OP_STAT_CLUSTER:
 	case SD_OP_GET_VDI_ATTR:
+	case SD_OP_GET_EPOCH:
 		req->work.fn = cluster_queue_request;
 		break;
 	case SD_OP_READ_VDIS:
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index f1f96e5..1068e03 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -24,6 +24,7 @@
 #define SD_OP_REMOVE_OBJ     0x91
 
 #define SD_OP_GET_OBJ_LIST   0xA1
+#define SD_OP_GET_EPOCH      0XA2
 
 #define SD_MSG_JOIN             0x01
 #define SD_MSG_VDI_OP           0x02
@@ -210,6 +211,7 @@ int get_global_nr_copies(uint32_t *copies);
 
 int epoch_log_write(uint32_t epoch, char *buf, int len);
 int epoch_log_read(uint32_t epoch, char *buf, int len);
+int epoch_log_read_remote(uint32_t epoch, char *buf, int len);
 int get_latest_epoch(void);
 int remove_epoch(int epoch);
 int set_cluster_ctime(uint64_t ctime);
diff --git a/sheep/store.c b/sheep/store.c
index 8583455..91f17c6 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -875,6 +875,50 @@ int epoch_log_write(uint32_t epoch, char *buf, int len)
 	return 0;
 }
 
+int epoch_log_read_remote(uint32_t epoch, char *buf, int len)
+{
+	struct sd_obj_req hdr;
+	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
+	int fd, i, ret;
+	unsigned int rlen, wlen, nr, le = get_latest_epoch();
+	char host[128];
+	struct sheepdog_node_list_entry nodes[SD_MAX_NODES];
+
+	nr = epoch_log_read(le, (char *)nodes, ARRAY_SIZE(nodes));
+	nr /= sizeof(nodes[0]);
+	for (i = 0; i < nr; i++) {
+		if (is_myself(nodes[i].addr, nodes[i].port))
+			continue;
+
+		addr_to_str(host, sizeof(host), nodes[i].addr, 0);
+		fd = connect_to(host, nodes[i].port);
+		if (fd < 0) {
+			vprintf(SDOG_ERR "can't connect to %s, %m\n", host);
+			continue;
+		}
+
+		memset(&hdr, 0, sizeof(hdr));
+		hdr.opcode = SD_OP_GET_EPOCH;
+		hdr.tgt_epoch = epoch;
+		hdr.data_length = len;
+		rlen = hdr.data_length;
+		wlen = 0;
+
+		ret = exec_req(fd, (struct sd_req *)&hdr, buf, &wlen, &rlen);
+		close(fd);
+
+		if (ret)
+			continue;
+		if (rsp->result == SD_RES_SUCCESS) {
+			ret = rsp->data_length;
+			goto out;
+		}
+	}
+	ret = 0; /* If no one has targeted epoch file, we can safely return 0 */
+out:
+	return ret;
+}
+
 int epoch_log_read(uint32_t epoch, char *buf, int len)
 {
 	int fd;
-- 
1.7.5.1




More information about the sheepdog mailing list