From: levin li <xingke.lwp at taobao.com> When recovery is in RW_INIT state, the IO request would be marked as recovering, then the sender will busy retrying, we should make the request wait until recovery enters RW_RUN state to determine whether the object requested is in recovery instead of just return SD_RES_NEW_NODE_VER. Signed-off-by: levin li <xingke.lwp at taobao.com> --- include/sheepdog_proto.h | 1 + sheep/recovery.c | 34 +++++++++++++++++++++++++++++----- sheep/sdnet.c | 12 +++++++++--- sheep/sheep_priv.h | 1 + 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index 0cd8d9d..e20254e 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -65,6 +65,7 @@ #define SD_RES_NO_STORE 0x20 /* No targeted backend store */ #define SD_RES_NO_SUPPORT 0x21 /* Operation is not supported by backend store */ #define SD_RES_CLUSTER_RECOVERING 0x22 /* Cluster is recovering. */ +#define SD_RES_OBJ_RECOVERING 0x23 /* Object is recovering */ /* * Object ID rules diff --git a/sheep/recovery.c b/sheep/recovery.c index de4bc62..72c74c7 100644 --- a/sheep/recovery.c +++ b/sheep/recovery.c @@ -329,6 +329,13 @@ int node_in_recovery(void) return !!recovering_work; } +int is_recovery_init(void) +{ + struct recovery_work *rw = recovering_work; + + return rw->state == RW_INIT; +} + int is_recoverying_oid(uint64_t oid) { uint64_t hval = fnv_64a_buf(&oid, sizeof(uint64_t), FNV1A_64_INIT); @@ -347,14 +354,14 @@ int is_recoverying_oid(uint64_t oid) if (before(rw->epoch, sys->epoch)) return 1; - if (rw->state == RW_INIT) - return 1; - if (sd_store->exist(oid)) { dprintf("the object %" PRIx64 " is already recoverd\n", oid); return 0; } + if (rw->state == RW_INIT) + return 1; + /* the first 'rw->nr_blocking' objects were already scheduled to be done earlier */ for (i = 0; i < rw->nr_blocking; i++) if (rw->oids[rw->done + i] == oid) @@ -384,14 +391,31 @@ int is_recoverying_oid(uint64_t oid) return 0; } +static void resume_wait_recovery_requests(void) +{ + struct request *req, *t; + + list_for_each_entry_safe(req, t, &sys->wait_rw_queue, + request_list) { + dprintf("resume wait oid %" PRIx64 "\n", req->local_oid); + if (req->rp.result == SD_RES_OBJ_RECOVERING) { + list_del(&req->request_list); + list_add_tail(&req->request_list, &sys->request_queue); + } + } + + process_request_event_queues(); +} + static void do_recover_main(struct work *work) { struct recovery_work *rw = container_of(work, struct recovery_work, work); uint64_t oid; - if (rw->state == RW_INIT) + if (rw->state == RW_INIT) { rw->state = RW_RUN; - else if (!rw->retry) { + resume_wait_recovery_requests(); + } else if (!rw->retry) { rw->done++; if (rw->nr_blocking > 0) rw->nr_blocking--; diff --git a/sheep/sdnet.c b/sheep/sdnet.c index bcca6be..7f7e761 100644 --- a/sheep/sdnet.c +++ b/sheep/sdnet.c @@ -213,9 +213,15 @@ static int check_request(struct request *req) if (is_recoverying_oid(req->local_oid)) { if (req->rq.flags & SD_FLAG_CMD_IO_LOCAL) { /* Sheep peer request */ - req->rp.result = SD_RES_NEW_NODE_VER; - sys->nr_outstanding_io++; - req->work.done(&req->work); + if (is_recovery_init()) { + req->rp.result = SD_RES_OBJ_RECOVERING; + list_add_tail(&req->request_list, + &sys->wait_rw_queue); + } else { + req->rp.result = SD_RES_NEW_NODE_VER; + sys->nr_outstanding_io++; + req->work.done(&req->work); + } } else { /* Gateway request */ list_add_tail(&req->request_list, &sys->req_wait_for_obj_list); diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index ba29a2a..b9ae438 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -305,6 +305,7 @@ int get_obj_list(const struct sd_list_req *, struct sd_list_rsp *, void *); int start_recovery(uint32_t epoch); void resume_recovery_work(void); int is_recoverying_oid(uint64_t oid); +int is_recovery_init(void); int node_in_recovery(void); int write_object(struct vnode_info *vnodes, uint32_t node_version, -- 1.7.10 |