[sheepdog] [PATCH 1/3] sheep: fix a bug that read_object() fail in recovery

levin li levin108 at gmail.com
Sun Jun 24 15:24:54 CEST 2012


From: levin li <xingke.lwp at taobao.com>

read_object() calls forward_read_obj_req() to get the object,
but may fail with result SD_RES_OLD_NODE_VER, read_object() do
nothing to handle this error, this patch fixed this problem
by sending a gateway request to local node, making gateway to
retry when error occur.

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 sheep/store.c |  152 ++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 86 insertions(+), 66 deletions(-)

diff --git a/sheep/store.c b/sheep/store.c
index 52c4716..cfd980b 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -516,9 +516,11 @@ int write_object(struct vnode_info *vnodes, uint32_t epoch,
 		 uint64_t oid, char *data, unsigned int datalen,
 		 uint64_t offset, uint16_t flags, int nr_copies, int create)
 {
-	struct request write_req;
-	struct sd_req *hdr = &write_req.rq;
-	int ret;
+	struct sd_req hdr;
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+	char host[128];
+	unsigned int wlen = datalen, rlen = 0;
+	int fd, ret;
 
 	if (sys->enable_write_cache && object_is_cached(oid)) {
 		ret = object_cache_write(oid, data, datalen, offset,
@@ -530,23 +532,35 @@ int write_object(struct vnode_info *vnodes, uint32_t epoch,
 		}
 	}
 
-	memset(&write_req, 0, sizeof(write_req));
-	hdr->opcode = create ? SD_OP_CREATE_AND_WRITE_OBJ : SD_OP_WRITE_OBJ;
-	hdr->flags = SD_FLAG_CMD_WRITE;
-	hdr->data_length = datalen;
-	hdr->epoch = epoch;
+	addr_to_str(host, sizeof(host), sys->this_node.addr, 0);
+	fd = connect_to(host, sys->this_node.port);
+	if (fd < 0) {
+		dprintf("Failed to connect to local node\n");
+		return SD_RES_NETWORK_ERROR;
+	}
 
-	hdr->obj.oid = oid;
-	hdr->obj.offset = offset;
-	hdr->obj.copies = nr_copies;
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.opcode = create ? SD_OP_CREATE_AND_WRITE_OBJ : SD_OP_WRITE_OBJ;
+	hdr.flags = SD_FLAG_CMD_WRITE;
+	hdr.data_length = datalen;
 
-	write_req.data = data;
-	write_req.op = get_sd_op(hdr->opcode);
-	write_req.vnodes = vnodes;
+	hdr.obj.oid = oid;
+	hdr.obj.offset = offset;
+
+	ret = exec_req(fd, &hdr, data, &wlen, &rlen);
+	close(fd);
+
+	if (ret) {
+		dprintf("Failed to write object %" PRIx64 "\n", oid);
+		return SD_RES_NETWORK_ERROR;
+	}
+
+	if (rsp->result != SD_RES_SUCCESS) {
+		dprintf("Failed to write object %" PRIx64 " %s\n", oid,
+			sd_strerror(rsp->result));
+		return rsp->result;
+	}
 
-	ret = forward_write_obj_req(&write_req);
-	if (ret != SD_RES_SUCCESS)
-		eprintf("failed to forward write object %x\n", ret);
 	return ret;
 }
 
@@ -558,9 +572,11 @@ int read_object(struct vnode_info *vnodes, uint32_t epoch,
 		uint64_t oid, char *data, unsigned int datalen,
 		uint64_t offset, int nr_copies)
 {
-	struct request read_req;
-	struct sd_req *hdr = &read_req.rq;
-	int ret;
+	struct sd_req hdr;
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+	char host[128];
+	unsigned int wlen = 0, rlen = datalen;
+	int fd, ret;
 
 	if (sys->enable_write_cache && object_is_cached(oid)) {
 		ret = object_cache_read(oid, data, datalen, offset,
@@ -572,23 +588,35 @@ int read_object(struct vnode_info *vnodes, uint32_t epoch,
 		}
 		return ret;
 	}
-	memset(&read_req, 0, sizeof(read_req));
+
 forward_read:
-	hdr->opcode = SD_OP_READ_OBJ;
-	hdr->data_length = datalen;
-	hdr->epoch = epoch;
+	addr_to_str(host, sizeof(host), sys->this_node.addr, 0);
+	fd = connect_to(host, sys->this_node.port);
+	if (fd < 0) {
+		dprintf("Failed to connect to local node\n");
+		return SD_RES_NETWORK_ERROR;
+	}
 
-	hdr->obj.oid = oid;
-	hdr->obj.offset = offset;
-	hdr->obj.copies = nr_copies;
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.opcode = SD_OP_READ_OBJ;
+	hdr.data_length = datalen;
 
-	read_req.data = data;
-	read_req.op = get_sd_op(hdr->opcode);
-	read_req.vnodes = vnodes;
+	hdr.obj.oid = oid;
+	hdr.obj.offset = offset;
 
-	ret = forward_read_obj_req(&read_req);
-	if (ret != SD_RES_SUCCESS)
-		eprintf("failed to forward read object %x\n", ret);
+	ret = exec_req(fd, &hdr, data, &wlen, &rlen);
+	close(fd);
+
+	if (ret) {
+		dprintf("Failed to read object %" PRIx64 "\n", oid);
+		return SD_RES_NETWORK_ERROR;
+	}
+
+	if (rsp->result != SD_RES_SUCCESS) {
+		dprintf("Failed to read object %" PRIx64 " %s\n", oid,
+			sd_strerror(rsp->result));
+		return rsp->result;
+	}
 
 	return ret;
 }
@@ -596,47 +624,39 @@ forward_read:
 int remove_object(struct vnode_info *vnodes, uint32_t epoch,
 		  uint64_t oid, int nr)
 {
-	struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
-	int err = 0, i = 0;
-
-	oid_to_vnodes(vnodes, oid, nr, obj_vnodes);
-	for (i = 0; i < nr; i++) {
-		struct sd_req hdr;
-		struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
-		struct sd_vnode *v;
-		unsigned wlen = 0, rlen = 0;
-		char name[128];
-		int fd, ret;
-
-		v = obj_vnodes[i];
-		addr_to_str(name, sizeof(name), v->addr, 0);
+	struct sd_req hdr;
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+	char host[128];
+	unsigned int wlen = 0, rlen = 0;
+	int fd, ret;
 
-		fd = connect_to(name, v->port);
-		if (fd < 0) {
-			rsp->result = SD_RES_NETWORK_ERROR;
-			return -1;
-		}
+	addr_to_str(host, sizeof(host), sys->this_node.addr, 0);
 
-		memset(&hdr, 0, sizeof(hdr));
-		hdr.epoch = epoch;
-		hdr.opcode = SD_OP_REMOVE_OBJ;
-		hdr.flags = 0;
-		hdr.data_length = rlen;
+	fd = connect_to(host, sys->this_node.port);
+	if (fd < 0) {
+		rsp->result = SD_RES_NETWORK_ERROR;
+		return -1;
+	}
 
-		hdr.obj.oid = oid;
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.opcode = SD_OP_REMOVE_OBJ;
+	hdr.flags = SD_FLAG_CMD_WRITE;
 
-		ret = exec_req(fd, &hdr, NULL, &wlen, &rlen);
-		close(fd);
+	hdr.obj.oid = oid;
 
-		if (ret)
-			return -1;
+	ret = exec_req(fd, &hdr, NULL, &wlen, &rlen);
+	close(fd);
 
-		if (rsp->result != SD_RES_SUCCESS)
-			err = 1;
+	if (ret) {
+		dprintf("Failed to remove object %" PRIx64 "\n", oid);
+		return SD_RES_NETWORK_ERROR;
 	}
 
-	if (err)
-		return -1;
+	if (rsp->result != SD_RES_SUCCESS) {
+		dprintf("Failed to remove object %" PRIx64 " %s\n", oid,
+			sd_strerror(rsp->result));
+		return rsp->result;
+	}
 
 	return 0;
 }
-- 
1.7.10




More information about the sheepdog mailing list