From: levin li <xingke.lwp at taobao.com> When recovery is in RW_INIT state, the IO request would be marked as recovering, then the sender will busy retrying, we should make the request wait until recovery enters RW_RUN state to determine whether the object requested is in recovery instead of just return SD_RES_NEW_NODE_VER. Signed-off-by: levin li <xingke.lwp at taobao.com> --- sheep/group.c | 1 + sheep/recovery.c | 25 +++++++++++++++++++++++-- sheep/sdnet.c | 11 ++++++++--- sheep/sheep_priv.h | 2 ++ 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/sheep/group.c b/sheep/group.c index 3266c38..63742a2 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -1379,6 +1379,7 @@ int create_cluster(int port, int64_t zone, int nr_vnodes) INIT_LIST_HEAD(&sys->request_queue); INIT_LIST_HEAD(&sys->event_queue); INIT_LIST_HEAD(&sys->wait_epoch_queue); + INIT_LIST_HEAD(&sys->wait_rw_queue); ret = send_join_request(&sys->this_node); if (ret != 0) diff --git a/sheep/recovery.c b/sheep/recovery.c index 3b5caa3..0d05661 100644 --- a/sheep/recovery.c +++ b/sheep/recovery.c @@ -526,6 +526,13 @@ int node_in_recovery(void) return !!recovering_work; } +int is_recovery_init(void) +{ + struct recovery_work *rw = recovering_work; + + return rw->state == RW_INIT; +} + int is_recovering_oid(uint64_t oid) { struct recovery_work *rw = recovering_work; @@ -573,6 +580,19 @@ int is_recovering_oid(uint64_t oid) return 0; } +static void resume_wait_recovery_queue(void) +{ + struct request *req, *t; + + list_for_each_entry_safe(req, t, &sys->wait_rw_queue, + request_list) { + dprintf("resume wait oid %" PRIx64 "\n", req->local_oid); + list_del(&req->request_list); + list_add_tail(&req->request_list, &sys->request_queue); + process_request_event_queues(); + } +} + static void do_recover_main(struct work *work) { struct recovery_work *rw = container_of(work, struct recovery_work, work); @@ -582,9 +602,10 @@ static void do_recover_main(struct work *work) again: if (rw->prior_count == 0) { - if (rw->state == RW_INIT) + if (rw->state == RW_INIT) { rw->state = RW_RUN; - else if (!rw->retry) + resume_wait_recovery_queue(); + } else if (!rw->retry) rw->done++; } diff --git a/sheep/sdnet.c b/sheep/sdnet.c index e1334c8..565625e 100644 --- a/sheep/sdnet.c +++ b/sheep/sdnet.c @@ -227,9 +227,14 @@ static int check_request(struct request *req) if (is_recovering_oid(req->local_oid)) { if (req->rq.flags & SD_FLAG_CMD_IO_LOCAL) { /* Sheep peer request */ - req->rp.result = SD_RES_NEW_NODE_VER; - sys->nr_outstanding_io++; - req->work.done(&req->work); + if (is_recovery_init()) + list_add_tail(&req->request_list, + &sys->wait_rw_queue); + else + req->rp.result = SD_RES_NEW_NODE_VER; + sys->nr_outstanding_io++; + req->work.done(&req->work); + } } else { /* Gateway request */ list_add_tail(&req->request_list, &sys->req_wait_for_obj_list); diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 00c299a..5e804c4 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -137,6 +137,7 @@ struct cluster_info { struct list_head request_queue; struct list_head event_queue; struct list_head wait_epoch_queue; + struct list_head wait_rw_queue; struct event_struct *cur_cevent; int nr_outstanding_io; int nr_outstanding_reqs; @@ -300,6 +301,7 @@ int get_obj_list(const struct sd_list_req *, struct sd_list_rsp *, void *); int start_recovery(uint32_t epoch); void resume_recovery_work(void); int is_recovering_oid(uint64_t oid); +int is_recovery_init(void); int node_in_recovery(void); int write_object(struct vnode_info *vnodes, uint32_t node_version, -- 1.7.10 |