On 05/24/2012 11:37 AM, levin li wrote: > From: levin li <xingke.lwp at taobao.com> > > When gateway requests get SD_RES_OLD_NODE_VER in io_op_done(), > it means epoch of gateway is older than peer, and peer has set > response's epoch with its system epoch, then gateway should check > whether its system epoch has reach peer's system epoch which in > req->rp.epoch, and whether the result is SD_RES_OLD_NODE_VER, > if so, the request should be pushed into wait_epoch_queue to > wait until system epoch change, but if system epoch has reached > peer's system epoch, retry this request at once. > > Signed-off-by: levin li <xingke.lwp at taobao.com> > --- > sheep/sdnet.c | 29 +++++++++++++++++++++++------ > 1 file changed, 23 insertions(+), 6 deletions(-) > > diff --git a/sheep/sdnet.c b/sheep/sdnet.c > index cd3311e..efdd1bf 100644 > --- a/sheep/sdnet.c > +++ b/sheep/sdnet.c > @@ -105,8 +105,14 @@ static void io_op_done(struct work *work) > case SD_RES_NETWORK_ERROR: > case SD_RES_WAIT_FOR_JOIN: > case SD_RES_WAIT_FOR_FORMAT: > - if (!(req->rq.flags & SD_FLAG_CMD_IO_LOCAL)) > - goto retry; > + if (!(req->rq.flags & SD_FLAG_CMD_IO_LOCAL)) { > + if (req->rp.epoch > sys->epoch && > + req->rp.result == SD_RES_OLD_NODE_VER) { > + list_add_tail(&req->request_list, > + &sys->wait_rw_queue); > + } else > + goto retry; > + } > break; > case SD_RES_EIO: > if (is_access_local(req, hdr->obj.oid)) { > @@ -264,11 +270,22 @@ void resume_wait_epoch_requests(void) > > list_for_each_entry_safe(req, t, &sys->wait_rw_queue, > request_list) { > - > - list_del(&req->request_list); > - list_add_tail(&req->request_list, &sys->request_queue); > - process_request_event_queues(); > + switch (req->rp.result) { > + case SD_RES_OLD_NODE_VER: > + /* retry as gateway. */ > + req->rq.epoch = sys->epoch; > + put_vnode_info(req->vnodes); > + req->vnodes = get_vnode_info(); > + setup_access_to_local_objects(req); > + case SD_RES_NEW_NODE_VER: > + list_del(&req->request_list); > + list_add_tail(&req->request_list, &sys->request_queue); > + break; > + default: > + break; > + } > } Both sender and receiver use the same wait_rw_queue, we'd better comment out which code is for which one. > + process_request_event_queues(); > } > > static void queue_request(struct request *req) |