[sheepdog] [PATCH v3 2/8] sheep: make requests with new epoch sleep until epoch is updated

Thu May 24 13:24:11 CEST 2012

On 05/24/2012 02:44 PM, Liu Yuan wrote:
> On 05/24/2012 11:37 AM, levin li wrote:
> 
>> From: levin li <xingke.lwp at taobao.com>
>>
>> If requests comes with epoch newer than system epoch, then
>> we shouldn't just make it done with result SD_RES_NEW_NODE_VER,
>> if so, the sender would busy retrying this request, which may
>> casue CPU too busy to process other request.
>>
>> We push the requests with new epoch into a wait_epoch_queue to
>> make it wait for epoch consistency, after epoch changes we wake
>> up these requests in the queue, which avoids busy retrying.
>>
>> Signed-off-by: levin li <xingke.lwp at taobao.com>
>> ---
>>  sheep/group.c      |    1 +
>>  sheep/recovery.c   |    2 ++
>>  sheep/sdnet.c      |   28 ++++++++++++++++++++++++----
>>  sheep/sheep_priv.h |    2 ++
>>  4 files changed, 29 insertions(+), 4 deletions(-)
>>
>> diff --git a/sheep/group.c b/sheep/group.c
>> index e2632ac..8673b7a 100644
>> --- a/sheep/group.c
>> +++ b/sheep/group.c
>> @@ -1381,6 +1381,7 @@ int create_cluster(int port, int64_t zone, int nr_vnodes)
>>  
>>  	INIT_LIST_HEAD(&sys->request_queue);
>>  	INIT_LIST_HEAD(&sys->event_queue);
>> +	INIT_LIST_HEAD(&sys->wait_rw_queue);
>>  
>>  	ret = send_join_request(&sys->this_node);
>>  	if (ret != 0)
>> diff --git a/sheep/recovery.c b/sheep/recovery.c
>> index f341fc6..de4bc62 100644
>> --- a/sheep/recovery.c
>> +++ b/sheep/recovery.c
>> @@ -692,5 +692,7 @@ int start_recovery(uint32_t epoch)
>>  		queue_work(sys->recovery_wqueue, &rw->work);
>>  	}
>>  
>> +	resume_wait_epoch_requests();
>> +
>>  	return 0;
>>  }
>> diff --git a/sheep/sdnet.c b/sheep/sdnet.c
>> index 5e9cb3b..cd3311e 100644
>> --- a/sheep/sdnet.c
>> +++ b/sheep/sdnet.c
>> @@ -174,14 +174,23 @@ static int check_epoch(struct request *req)
>>  	int ret = SD_RES_SUCCESS;
>>  
>>  	if (before(req_epoch, sys->epoch)) {
>> -		ret = SD_RES_OLD_NODE_VER;
>>  		eprintf("old node version %u, %u, %x\n",
>>  				sys->epoch, req_epoch, opcode);
>> +		/* make gateway to retry. */
> 
> 
> better rewording as /* ask sender to retry */
> 
>> +		req->rp.result = SD_RES_OLD_NODE_VER;
>> +		req->rp.epoch = sys->epoch;
>> +		req->work.done(&req->work);
>> +		ret = req->rp.result;
>>  	} else if (after(req_epoch, sys->epoch)) {
>> -		ret = SD_RES_NEW_NODE_VER;
>>  		eprintf("new node version %u, %u, %x\n",
>>  				sys->epoch, req_epoch, opcode);
>> +
>> +		/* wait for epoch consistency. */
> 
> 
> better rewording as /* put on local wait queue, waiting for local epoch
> to be lifted */
> 
>> +		req->rp.result = SD_RES_NEW_NODE_VER;
>> +		list_add_tail(&req->request_list, &sys->wait_rw_queue);
>> +		ret = req->rp.result;
>>  	}
>> +
>>  	return ret;
>>  }
>>
> 
> 
> Seems that unfold check_epoch() into check_request() will clarify the
> logic a bit.
> 
> And also, recovery requests will also visit this code, your comments
> only consider gateway requests. Well, recovery code has its own retry
> mechanism.
> 

Yes, I'll refactor the recovery logic in my later patch to stop recovery
from retrying.

thanks,

levin

>  
>> @@ -201,9 +210,7 @@ static int check_request(struct request *req)
>>  	else {
>>  		int ret = check_epoch(req);
>>  		if (ret != SD_RES_SUCCESS) {
>> -			req->rp.result = ret;
>>  			sys->nr_outstanding_io++;
>> -			req->work.done(&req->work);
>>  			return -1;
>>  		}
>>  	}
>> @@ -251,6 +258,19 @@ void resume_pending_requests(void)
>>  		process_request_event_queues();
>>  }
>>  
>> +void resume_wait_epoch_requests(void)
>> +{
>> +	struct request *req, *t;
>> +
>> +	list_for_each_entry_safe(req, t, &sys->wait_rw_queue,
>> +				 request_list) {
>> +
>> +		list_del(&req->request_list);
>> +		list_add_tail(&req->request_list, &sys->request_queue);
>> +		process_request_event_queues();
>> +	}
>> +}
> 
> 
> How about drag process_request_event_queues() out of the loop?
> 
>> +
>>  static void queue_request(struct request *req)
>>  {
>>  	struct sd_req *hdr = &req->rq;
>> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
>> index 3d5a964..ba29a2a 100644
>> --- a/sheep/sheep_priv.h
>> +++ b/sheep/sheep_priv.h
>> @@ -137,6 +137,7 @@ struct cluster_info {
>>  
>>  	struct list_head request_queue;
>>  	struct list_head event_queue;
>> +	struct list_head wait_rw_queue;
>>  	struct event_struct *cur_cevent;
>>  	int nr_outstanding_io;
>>  	int nr_outstanding_reqs;
>> @@ -267,6 +268,7 @@ int get_nr_copies(struct vnode_info *vnode_info);
>>  int is_access_to_busy_objects(uint64_t oid);
>>  
>>  void resume_pending_requests(void);
>> +void resume_wait_epoch_requests(void);
>>  
>>  int create_cluster(int port, int64_t zone, int nr_vnodes);
>>  int leave_cluster(void);
> 
>