[sheepdog] [PATCH v2] sheep: do COW at local node for just one time

yaohaiting.wujue at gmail.com yaohaiting.wujue at gmail.com
Wed Jun 27 05:05:58 CEST 2012


From: HaiTing Yao <wujue.yht at taobao.com>

Node A receives a COW write request, and then forward the request to
replica nodes B, C, D. Nodes B, C, D do COW separately. On some
conditions, nodes B, C, D maybe need read base data from node A. Now
node A is waiting for the reply from B, C, D. If the I/O threads are
busy, there is dead lock.

Signed-off-by: HaiTing Yao <wujue.yht at taobao.com>
---
 sheep/gateway.c    |   43 +++++++++++++++++++++++++++++++++++++++++++
 sheep/ops.c        |    2 +-
 sheep/sheep_priv.h |    3 +++
 3 files changed, 47 insertions(+), 1 deletions(-)

diff --git a/sheep/gateway.c b/sheep/gateway.c
index 42f028a..dfee8f3 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -208,6 +208,42 @@ err:
 	return ret;
 }
 
+static int do_cow_at_gateway(struct request *req)
+{
+	int ret = 0;
+	char *buf = NULL;
+
+	dprintf("%" PRIx64 ", %" PRIx64 "\n", req->rq.obj.oid,
+		req->rq.obj.cow_oid);
+
+	buf = valloc(SD_DATA_OBJ_SIZE);
+	if (!buf) {
+		eprintf("can not allocate memory\n");
+		return -1;
+	}
+
+	if (req->rq.data_length != SD_DATA_OBJ_SIZE) {
+		ret = read_copy_from_replica(req->vnodes, req->rq.epoch,
+			req->rq.obj.cow_oid, buf);
+		if (ret != SD_RES_SUCCESS) {
+			eprintf("failed to read cow object\n");
+			free(buf);
+			return -1;
+		}
+
+		memcpy(buf + req->rq.obj.offset, req->data,
+			req->rq.data_length);
+
+		free(req->data);
+		req->data = buf;
+
+		req->rq.data_length = SD_DATA_OBJ_SIZE;
+		req->rq.obj.offset = 0;
+	}
+
+	return ret;
+}
+
 void do_gateway_request(struct work *work)
 {
 	struct request *req = container_of(work, struct request, work);
@@ -216,6 +252,13 @@ void do_gateway_request(struct work *work)
 	dprintf("%x, %" PRIx64" , %u\n",
 		req->rq.opcode, req->rq.obj.oid, req->rq.epoch);
 
+	if ((req->rq.flags & SD_FLAG_CMD_WRITE) &&
+		(req->rq.flags & SD_FLAG_CMD_COW)) {
+		ret = do_cow_at_gateway(req);
+		if (!ret)
+			req->rq.flags &= ~SD_FLAG_CMD_COW;
+	}
+
 	if (!sys->enable_write_cache || bypass_object_cache(req)) {
 		if (req->rq.flags & SD_FLAG_CMD_WRITE)
 			ret = forward_write_obj_req(req);
diff --git a/sheep/ops.c b/sheep/ops.c
index 6bdcc83..af63505 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -553,7 +553,7 @@ static int local_trace_cat_ops(const struct sd_req *req, struct sd_rsp *rsp, voi
 	return SD_RES_SUCCESS;
 }
 
-static int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch,
+int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch,
 				  uint64_t oid, char *buf)
 {
 	int i, j, nr_copies, ret;
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index afdaad8..282daa1 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -404,4 +404,7 @@ void object_cache_delete(uint32_t vid);
 
 int object_cache_init(const char *p);
 
+int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch,
+				  uint64_t oid, char *buf);
+
 #endif
-- 
1.7.1




More information about the sheepdog mailing list