[Sheepdog] sheepdog image created but sheperd does not show it

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Sat Dec 26 22:32:25 CET 2009


Hi,

Thanks for reporting the bug.
Can you try the below patch against the current git head?

> PS. another question since I've used --copies=2 , i've expected to find
> each sheepdog  VM block repliacted to 2 nodes not 3 but
> under /sheepdog/0/  i see that exactly the same number (with same names)
> of files where created on all 3 nodes - the only exceptions
> is that /sheepdog/0/vdi/zopa was created only on 2 nodes.
> Is that expected and what is the actual meaning --copies=N ? 

The meaning of --copies is just what you expected, and this patch will also fix
the problem, I think.

==
>From 7a45f310bd6b81f0c655217f3f1dfc63fd68c634 Mon Sep 17 00:00:00 2001
From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
Date: Sun, 27 Dec 2009 06:08:09 +0900
Subject: [PATCH] use ANAME_COPIES as a number of replication

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 collie/net.c             |    1 +
 collie/store.c           |   27 ++++++++++++++++++++
 collie/vdi.c             |   60 +++++++++++++++++++++++++++++++++------------
 include/net.h            |    3 +-
 include/sheepdog_proto.h |    5 +++-
 lib/net.c                |   36 +++++++++++++++++----------
 shepherd/shepherd.c      |    3 +-
 7 files changed, 103 insertions(+), 32 deletions(-)

diff --git a/collie/net.c b/collie/net.c
index 5505613..be19bcc 100644
--- a/collie/net.c
+++ b/collie/net.c
@@ -65,6 +65,7 @@ static void queue_request(struct request *req)
 	case SD_OP_SO_NEW_VDI:
 	case SD_OP_SO_LOOKUP_VDI:
 	case SD_OP_SO_READ_VDIS:
+	case SD_OP_SO_STAT:
 		req->work.fn = so_queue_request;
 		break;
 	default:
diff --git a/collie/store.c b/collie/store.c
index 4e95469..fce71ff 100644
--- a/collie/store.c
+++ b/collie/store.c
@@ -606,6 +606,33 @@ void so_queue_request(struct work *work, int idx)
 	case SD_OP_SO_READ_VDIS:
 		ret = so_read_vdis(req);
 		break;
+	case SD_OP_SO_STAT:
+		fd = open(path, O_RDONLY);
+		if (fd < 0) {
+			result = SD_RES_EIO;
+			goto out;
+		}
+
+		rsp->oid = 0;
+		ret = fgetxattr(fd, ANAME_LAST_OID, &rsp->oid,
+				sizeof(rsp->oid));
+		if (ret != sizeof(rsp->oid)) {
+			close(fd);
+			result = SD_RES_SYSTEM_ERROR;
+			goto out;
+		}
+
+		rsp->copies = 0;
+		ret = fgetxattr(fd, ANAME_COPIES, &rsp->copies,
+				sizeof(rsp->copies));
+		if (ret != sizeof(rsp->copies)) {
+			close(fd);
+			result = SD_RES_SYSTEM_ERROR;
+			goto out;
+		}
+
+		result = SD_RES_SUCCESS;
+		break;
 	}
 
 out:
diff --git a/collie/vdi.c b/collie/vdi.c
index 31567d0..290d919 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -84,11 +84,12 @@ int add_vdi(struct cluster_info *ci, char *name, int len, uint64_t size,
 	    uint64_t *added_oid, uint64_t base_oid, uint32_t tag)
 {
 	struct sheepdog_node_list_entry entries[SD_MAX_NODES];
-	int nr_nodes;
+	int nr_nodes, nr_reqs;
 	uint64_t oid = 0;
 	int ret;
 	int copies;
 	struct sd_so_req req;
+	struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req;
 
 	memset(&req, 0, sizeof(req));
 
@@ -97,22 +98,31 @@ int add_vdi(struct cluster_info *ci, char *name, int len, uint64_t size,
 	dprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size,
 		base_oid);
 
-	/* todo */
-/* 	copies = sb->default_nr_copies; */
-	copies = 3;
-	if (copies > nr_nodes)
-		copies = nr_nodes;
+	req.opcode = SD_OP_SO_STAT;
+	ret = exec_reqs(entries, nr_nodes, ci->epoch,
+			SD_DIR_OID, (struct sd_req *)&req, NULL, 0, 0,
+			nr_nodes, 1);
+	if (ret < 0)
+		return rsp->result;
+
+	copies = rsp->copies;
+	nr_reqs = copies;
+	if (nr_reqs > nr_nodes)
+		nr_reqs = nr_nodes;
+
+	memset(&req, 0, sizeof(req));
 
 	req.opcode = SD_OP_SO_NEW_VDI;
 	req.copies = copies;
 	req.tag = tag;
 
 	ret = exec_reqs(entries, nr_nodes, ci->epoch,
-			SD_DIR_OID, (struct sd_req *)&req, name, len, 0, copies);
+			SD_DIR_OID, (struct sd_req *)&req, name, len, 0,
+			nr_reqs, nr_reqs);
 
 	/* todo: error handling */
 
-	oid = ((struct sd_so_rsp *)&req)->oid;
+	oid = rsp->oid;
 	*added_oid = oid;
 
 	dprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size,
@@ -134,7 +144,7 @@ int lookup_vdi(struct cluster_info *ci,
 	       int *current)
 {
 	struct sheepdog_node_list_entry entries[SD_MAX_NODES];
-	int nr_nodes;
+	int nr_nodes, nr_reqs;
 	int ret, copies;
 	struct sd_so_req req;
 	struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req;
@@ -147,16 +157,30 @@ int lookup_vdi(struct cluster_info *ci,
 
 	dprintf("looking for %s %zd\n", filename, strlen(filename));
 
-	/* todo */
-	copies = 3;
-	if (copies > nr_nodes)
-		copies = nr_nodes;
+	req.opcode = SD_OP_SO_STAT;
+	ret = exec_reqs(entries, nr_nodes, ci->epoch,
+			SD_DIR_OID, (struct sd_req *)&req, NULL, 0, 0,
+			nr_nodes, 1);
+	if (ret < 0)
+		return rsp->result;
+
+	copies = rsp->copies;
+	nr_reqs = copies;
+	if (nr_reqs > nr_nodes)
+		nr_reqs = nr_nodes;
+
+	memset(&req, 0, sizeof(req));
+	copies = rsp->copies;
+	nr_reqs = copies;
+	if (nr_reqs > nr_nodes)
+		nr_reqs = nr_nodes;
 
 	req.opcode = SD_OP_SO_LOOKUP_VDI;
 	req.tag = tag;
 
 	ret = exec_reqs(entries, nr_nodes, ci->epoch,
-			SD_DIR_OID, (struct sd_req *)&req, filename, strlen(filename), 0, copies);
+			SD_DIR_OID, (struct sd_req *)&req, filename, strlen(filename), 0,
+			nr_reqs, 1);
 
 	*oid = rsp->oid;
 	if (rsp->flags & SD_VDI_RSP_FLAG_CURRENT)
@@ -186,7 +210,11 @@ int make_super_object(struct cluster_info *ci, struct sd_vdi_req *hdr)
 	nr_nodes = build_node_list(&ci->node_list, entries);
 
 	ret = exec_reqs(entries, nr_nodes, ci->epoch,
-			SD_DIR_OID, (struct sd_req *)&req, NULL, 0, 0, req.copies);
+			SD_DIR_OID, (struct sd_req *)&req, NULL, 0, 0, req.copies,
+			req.copies);
 
-	return ret;
+	if (ret < 0)
+		return SD_RES_EIO;
+
+	return SD_RES_SUCCESS;
 }
diff --git a/include/net.h b/include/net.h
index b0e3df0..7bf0dbb 100644
--- a/include/net.h
+++ b/include/net.h
@@ -46,7 +46,8 @@ int read_object(struct sheepdog_node_list_entry *e,
 
 int exec_reqs(struct sheepdog_node_list_entry *e,
 	      int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr,
-	      char *data, unsigned int wdatalen, unsigned int rdatalen, int nr);
+	      char *data, unsigned int wdatalen, unsigned int rdatalen, int nr,
+	      int quorum);
 
 int create_listen_ports(int port, int (*callback)(int fd, void *), void *data);
 
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 4bfb4e5..9557cd8 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -48,6 +48,7 @@
 #define SD_OP_SO_DEL_VDI     0x62
 #define SD_OP_SO_LOOKUP_VDI  0x63
 #define SD_OP_SO_READ_VDIS   0x64
+#define SD_OP_SO_STAT        0x65
 
 #define SD_OP_STAT_SHEEP     0xB0
 
@@ -125,8 +126,10 @@ struct sd_so_rsp {
 	uint32_t        id;
 	uint32_t        data_length;
 	uint32_t        result;
+	uint32_t	copies;
+	uint64_t	ctime;
 	uint64_t	oid;
-	uint32_t	opcode_specific[5];
+	uint32_t	opcode_specific[2];
 };
 
 struct sd_obj_req {
diff --git a/lib/net.c b/lib/net.c
index caf592f..5e26f46 100644
--- a/lib/net.c
+++ b/lib/net.c
@@ -433,16 +433,19 @@ int read_object(struct sheepdog_node_list_entry *e,
 /* TODO: clean up with the above functions */
 int exec_reqs(struct sheepdog_node_list_entry *e,
 	      int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr,
-	      char *data, unsigned int wdatalen, unsigned int rdatalen, int nr)
+	      char *data, unsigned int wdatalen, unsigned int rdatalen, int nr,
+	      int quorum)
 {
 	char name[128];
 	int i = 0, n, fd, ret;
 	int success = 0;
 	struct sd_req tmp;
 	struct sd_rsp *rsp = (struct sd_rsp *)&tmp;
+	unsigned wlen, rlen;
 
 	for (i = 0; i < nr; i++) {
-		unsigned wlen = wdatalen, rlen = rdatalen;
+		wlen = wdatalen;
+		rlen = rdatalen;
 
 		n = obj_to_sheep(e, nodes, oid, i);
 
@@ -453,8 +456,10 @@ int exec_reqs(struct sheepdog_node_list_entry *e,
 			 e[n].addr[15]);
 
 		fd = connect_to(name, e[n].port);
-		if (fd < 0)
+		if (fd < 0) {
+			((struct sd_rsp *) hdr)->result = SD_RES_EIO;
 			return -1;
+		}
 
 		hdr->epoch = node_version;
 		if (wdatalen) {
@@ -470,18 +475,23 @@ int exec_reqs(struct sheepdog_node_list_entry *e,
 		close(fd);
 
 		rsp = (struct sd_rsp *)&tmp;
-		if (rdatalen) {
-			if (!ret) {
-				if (rsp->result == SD_RES_SUCCESS) {
-					memcpy(hdr, rsp, sizeof(*rsp));
-					return rlen;
-				}
-			}
-		} else
-			if (!ret)
+
+		if (!ret) {
+			if (rsp->result == SD_RES_SUCCESS)
 				success++;
+		}
+
+		if (success >= quorum)
+			break;
 	}
+
 	memcpy(hdr, rsp, sizeof(*rsp));
 
-	return !success;
+	if (success < quorum)
+		return -1;
+
+	if (rdatalen)
+		return rlen;
+	else
+		return wlen;
 }
diff --git a/shepherd/shepherd.c b/shepherd/shepherd.c
index 9654888..5bbf29d 100644
--- a/shepherd/shepherd.c
+++ b/shepherd/shepherd.c
@@ -398,7 +398,8 @@ int parse_vdi(vdi_parser_func_t func, void *data)
 	req.opcode = SD_OP_SO_READ_VDIS;
 
 	ret = exec_reqs(node_list_entries, nr_nodes, node_list_version,
-			SD_DIR_OID, (struct sd_req *)&req, buf, 0, DIR_BUF_LEN,nr_nodes);
+			SD_DIR_OID, (struct sd_req *)&req, buf, 0, DIR_BUF_LEN,
+			nr_nodes, 1);
 
 	if (ret < 0) {
 		ret = 1;
-- 
1.6.5




More information about the sheepdog mailing list