From: levin li <xingke.lwp at taobao.com> If requests comes with epoch newer than system epoch, then we shouldn't just make it done with result SD_RES_NEW_NODE_VER, if so, the sender would busy retrying this request, which may casue CPU too busy to process other request. We push the requests with new epoch into a wait_epoch_queue to make it wait for epoch consistency, after epoch changes we wake up these requests in the queue, which avoids busy retrying. Signed-off-by: levin li <xingke.lwp at taobao.com> --- sheep/group.c | 1 + sheep/recovery.c | 2 ++ sheep/sdnet.c | 52 ++++++++++++++++++++++++++++++---------------------- sheep/sheep_priv.h | 2 ++ 4 files changed, 35 insertions(+), 22 deletions(-) diff --git a/sheep/group.c b/sheep/group.c index e2632ac..8673b7a 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -1381,6 +1381,7 @@ int create_cluster(int port, int64_t zone, int nr_vnodes) INIT_LIST_HEAD(&sys->request_queue); INIT_LIST_HEAD(&sys->event_queue); + INIT_LIST_HEAD(&sys->wait_rw_queue); ret = send_join_request(&sys->this_node); if (ret != 0) diff --git a/sheep/recovery.c b/sheep/recovery.c index f341fc6..de4bc62 100644 --- a/sheep/recovery.c +++ b/sheep/recovery.c @@ -692,5 +692,7 @@ int start_recovery(uint32_t epoch) queue_work(sys->recovery_wqueue, &rw->work); } + resume_wait_epoch_requests(); + return 0; } diff --git a/sheep/sdnet.c b/sheep/sdnet.c index 5e9cb3b..a8483cb 100644 --- a/sheep/sdnet.c +++ b/sheep/sdnet.c @@ -167,24 +167,6 @@ static void do_local_request(struct work *work) req->rp.result = ret; } -static int check_epoch(struct request *req) -{ - uint32_t req_epoch = req->rq.epoch; - uint32_t opcode = req->rq.opcode; - int ret = SD_RES_SUCCESS; - - if (before(req_epoch, sys->epoch)) { - ret = SD_RES_OLD_NODE_VER; - eprintf("old node version %u, %u, %x\n", - sys->epoch, req_epoch, opcode); - } else if (after(req_epoch, sys->epoch)) { - ret = SD_RES_NEW_NODE_VER; - eprintf("new node version %u, %u, %x\n", - sys->epoch, req_epoch, opcode); - } - return ret; -} - static int check_request(struct request *req) { /* @@ -199,11 +181,24 @@ static int check_request(struct request *req) if (!req->local_oid && !req->local_cow_oid) return 0; else { - int ret = check_epoch(req); - if (ret != SD_RES_SUCCESS) { - req->rp.result = ret; - sys->nr_outstanding_io++; + if (before(req->rq.epoch, sys->epoch)) { + eprintf("old node version %u, %u, %x\n", + sys->epoch, req->rq.epoch, req->rq.opcode); + /* ask gateway to retry. */ + req->rp.result = SD_RES_OLD_NODE_VER; + req->rp.epoch = sys->epoch; req->work.done(&req->work); + sys->nr_outstanding_io++; + return -1; + } else if (after(req->rq.epoch, sys->epoch)) { + eprintf("new node version %u, %u, %x\n", + sys->epoch, req->rq.epoch, req->rq.opcode); + + /* put on local wait queue, waiting for local epoch + to be lifted */ + req->rp.result = SD_RES_NEW_NODE_VER; + list_add_tail(&req->request_list, &sys->wait_rw_queue); + sys->nr_outstanding_io++; return -1; } } @@ -251,6 +246,19 @@ void resume_pending_requests(void) process_request_event_queues(); } +void resume_wait_epoch_requests(void) +{ + struct request *req, *t; + + list_for_each_entry_safe(req, t, &sys->wait_rw_queue, + request_list) { + + list_del(&req->request_list); + list_add_tail(&req->request_list, &sys->request_queue); + } + process_request_event_queues(); +} + static void queue_request(struct request *req) { struct sd_req *hdr = &req->rq; diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 3d5a964..ba29a2a 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -137,6 +137,7 @@ struct cluster_info { struct list_head request_queue; struct list_head event_queue; + struct list_head wait_rw_queue; struct event_struct *cur_cevent; int nr_outstanding_io; int nr_outstanding_reqs; @@ -267,6 +268,7 @@ int get_nr_copies(struct vnode_info *vnode_info); int is_access_to_busy_objects(uint64_t oid); void resume_pending_requests(void); +void resume_wait_epoch_requests(void); int create_cluster(int port, int64_t zone, int nr_vnodes); int leave_cluster(void); -- 1.7.10 |