On 05/24/2012 02:44 PM, Liu Yuan wrote: > On 05/24/2012 11:37 AM, levin li wrote: > >> From: levin li <xingke.lwp at taobao.com> >> >> If requests comes with epoch newer than system epoch, then >> we shouldn't just make it done with result SD_RES_NEW_NODE_VER, >> if so, the sender would busy retrying this request, which may >> casue CPU too busy to process other request. >> >> We push the requests with new epoch into a wait_epoch_queue to >> make it wait for epoch consistency, after epoch changes we wake >> up these requests in the queue, which avoids busy retrying. >> >> Signed-off-by: levin li <xingke.lwp at taobao.com> >> --- >> sheep/group.c | 1 + >> sheep/recovery.c | 2 ++ >> sheep/sdnet.c | 28 ++++++++++++++++++++++++---- >> sheep/sheep_priv.h | 2 ++ >> 4 files changed, 29 insertions(+), 4 deletions(-) >> >> diff --git a/sheep/group.c b/sheep/group.c >> index e2632ac..8673b7a 100644 >> --- a/sheep/group.c >> +++ b/sheep/group.c >> @@ -1381,6 +1381,7 @@ int create_cluster(int port, int64_t zone, int nr_vnodes) >> >> INIT_LIST_HEAD(&sys->request_queue); >> INIT_LIST_HEAD(&sys->event_queue); >> + INIT_LIST_HEAD(&sys->wait_rw_queue); >> >> ret = send_join_request(&sys->this_node); >> if (ret != 0) >> diff --git a/sheep/recovery.c b/sheep/recovery.c >> index f341fc6..de4bc62 100644 >> --- a/sheep/recovery.c >> +++ b/sheep/recovery.c >> @@ -692,5 +692,7 @@ int start_recovery(uint32_t epoch) >> queue_work(sys->recovery_wqueue, &rw->work); >> } >> >> + resume_wait_epoch_requests(); >> + >> return 0; >> } >> diff --git a/sheep/sdnet.c b/sheep/sdnet.c >> index 5e9cb3b..cd3311e 100644 >> --- a/sheep/sdnet.c >> +++ b/sheep/sdnet.c >> @@ -174,14 +174,23 @@ static int check_epoch(struct request *req) >> int ret = SD_RES_SUCCESS; >> >> if (before(req_epoch, sys->epoch)) { >> - ret = SD_RES_OLD_NODE_VER; >> eprintf("old node version %u, %u, %x\n", >> sys->epoch, req_epoch, opcode); >> + /* make gateway to retry. */ > > > better rewording as /* ask sender to retry */ > >> + req->rp.result = SD_RES_OLD_NODE_VER; >> + req->rp.epoch = sys->epoch; >> + req->work.done(&req->work); >> + ret = req->rp.result; >> } else if (after(req_epoch, sys->epoch)) { >> - ret = SD_RES_NEW_NODE_VER; >> eprintf("new node version %u, %u, %x\n", >> sys->epoch, req_epoch, opcode); >> + >> + /* wait for epoch consistency. */ > > > better rewording as /* put on local wait queue, waiting for local epoch > to be lifted */ > >> + req->rp.result = SD_RES_NEW_NODE_VER; >> + list_add_tail(&req->request_list, &sys->wait_rw_queue); >> + ret = req->rp.result; >> } >> + >> return ret; >> } >> > > > Seems that unfold check_epoch() into check_request() will clarify the > logic a bit. > > And also, recovery requests will also visit this code, your comments > only consider gateway requests. Well, recovery code has its own retry > mechanism. > Yes, I'll refactor the recovery logic in my later patch to stop recovery from retrying. thanks, levin > >> @@ -201,9 +210,7 @@ static int check_request(struct request *req) >> else { >> int ret = check_epoch(req); >> if (ret != SD_RES_SUCCESS) { >> - req->rp.result = ret; >> sys->nr_outstanding_io++; >> - req->work.done(&req->work); >> return -1; >> } >> } >> @@ -251,6 +258,19 @@ void resume_pending_requests(void) >> process_request_event_queues(); >> } >> >> +void resume_wait_epoch_requests(void) >> +{ >> + struct request *req, *t; >> + >> + list_for_each_entry_safe(req, t, &sys->wait_rw_queue, >> + request_list) { >> + >> + list_del(&req->request_list); >> + list_add_tail(&req->request_list, &sys->request_queue); >> + process_request_event_queues(); >> + } >> +} > > > How about drag process_request_event_queues() out of the loop? > >> + >> static void queue_request(struct request *req) >> { >> struct sd_req *hdr = &req->rq; >> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h >> index 3d5a964..ba29a2a 100644 >> --- a/sheep/sheep_priv.h >> +++ b/sheep/sheep_priv.h >> @@ -137,6 +137,7 @@ struct cluster_info { >> >> struct list_head request_queue; >> struct list_head event_queue; >> + struct list_head wait_rw_queue; >> struct event_struct *cur_cevent; >> int nr_outstanding_io; >> int nr_outstanding_reqs; >> @@ -267,6 +268,7 @@ int get_nr_copies(struct vnode_info *vnode_info); >> int is_access_to_busy_objects(uint64_t oid); >> >> void resume_pending_requests(void); >> +void resume_wait_epoch_requests(void); >> >> int create_cluster(int port, int64_t zone, int nr_vnodes); >> int leave_cluster(void); > > |