From: HaiTing Yao <wujue.yht at taobao.com> Node A receives a COW write request, and then forward the request to replica nodes B, C, D. Nodes B, C, D do COW separately. On some conditions, nodes B, C, D maybe need read base data from node A. Now node A is waiting for the reply from B, C, D. If the I/O threads are busy, there is dead lock. Signed-off-by: HaiTing Yao <wujue.yht at taobao.com> --- sheep/gateway.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ sheep/ops.c | 2 +- sheep/sheep_priv.h | 3 +++ 3 files changed, 50 insertions(+), 1 deletions(-) diff --git a/sheep/gateway.c b/sheep/gateway.c index e92f3ed..3e1de75 100644 --- a/sheep/gateway.c +++ b/sheep/gateway.c @@ -82,6 +82,44 @@ read_remote: return ret; } +static int do_cow_at_local(struct request *req, struct sd_req *cow_hdr) +{ + int ret = 0; + char *buf = NULL; + + dprintf("%" PRIx64 ", %" PRIx64 "\n", req->rq.obj.oid, + cow_hdr->obj.cow_oid); + + buf = valloc(SD_DATA_OBJ_SIZE); + if (!buf) { + eprintf("can not allocate memory\n"); + return -1; + } + + if (cow_hdr->data_length != SD_DATA_OBJ_SIZE) { + ret = read_copy_from_replica(req->vnodes, cow_hdr->epoch, + cow_hdr->obj.cow_oid, buf); + if (ret != SD_RES_SUCCESS) { + eprintf("failed to read cow object\n"); + free(buf); + return -1; + } + } + + memcpy(buf + cow_hdr->obj.offset, req->data, cow_hdr->data_length); + + free(req->data); + req->data = buf; + + cow_hdr->data_length = SD_DATA_OBJ_SIZE; + cow_hdr->obj.offset = 0; + + req->rq.data_length = SD_DATA_OBJ_SIZE; + req->rq.obj.offset = 0; + + return ret; +} + int forward_write_obj_req(struct request *req) { int i, fd, ret, pollret; @@ -106,6 +144,14 @@ int forward_write_obj_req(struct request *req) memcpy(&fwd_hdr, &req->rq, sizeof(fwd_hdr)); fwd_hdr.flags |= SD_FLAG_CMD_IO_LOCAL; + if (fwd_hdr.flags & SD_FLAG_CMD_COW) { + ret = do_cow_at_local(req, &fwd_hdr); + if (!ret) { + fwd_hdr.flags &= ~SD_FLAG_CMD_COW; + req->rq.flags &= ~SD_FLAG_CMD_COW; + } + } + wlen = fwd_hdr.data_length; nr_copies = get_nr_copies(req->vnodes); diff --git a/sheep/ops.c b/sheep/ops.c index e164cbc..68f7605 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -553,7 +553,7 @@ static int local_trace_cat_ops(const struct sd_req *req, struct sd_rsp *rsp, voi return SD_RES_SUCCESS; } -static int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch, +int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch, uint64_t oid, char *buf) { struct request read_req; diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 7a86533..ef78ace 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -415,4 +415,7 @@ void object_cache_delete(uint32_t vid); int object_cache_init(const char *p); +int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch, + uint64_t oid, char *buf); + #endif -- 1.7.1 |