[sheepdog] [PATCH 2/2] sheep: fix dead lock for create_and_write request
Liu Yuan
namei.unix at gmail.com
Mon Aug 27 16:19:08 CEST 2012
From: Liu Yuan <tailai.ly at taobao.com>
This dead lock can be reprodiced by 026.
We should always service CREATE_AND_WRITE request instead of queueing it
on wait queues while in recovery. The recovery can be finished without any
any objects in the list (rw->count == 0 in some special case), in which case
that no one calls resume_wait_recovery_requests() or other flusher on rw_list or
obj_list.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/request.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/sheep/request.c b/sheep/request.c
index 5981e14..fd210d3 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -141,6 +141,12 @@ static int check_request_epoch(struct request *req)
static bool request_in_recovery(struct request *req)
{
+
+ /* For CREATE request, we simply service it */
+ if (req->rq.opcode == SD_OP_CREATE_AND_WRITE_PEER ||
+ req->rq.opcode == SD_OP_CREATE_AND_WRITE_OBJ)
+ return false;
+
/*
* Request from recovery should go down the Farm even if
* oid_in_recovery() returns true because we should also try snap
@@ -152,10 +158,12 @@ static bool request_in_recovery(struct request *req)
* Put request on wait queues of local node
*/
if (is_recovery_init()) {
+ dprintf("%"PRIx64" on rw_queue\n", req->local_oid);
req->rp.result = SD_RES_OBJ_RECOVERING;
list_add_tail(&req->request_list,
&sys->wait_rw_queue);
} else {
+ dprintf("%"PRIx64" on obj_queue\n", req->local_oid);
list_add_tail(&req->request_list,
&sys->wait_obj_queue);
}
@@ -328,7 +336,7 @@ static void queue_request(struct request *req)
goto done;
}
- dprintf("%s\n", op_name(req->op));
+ dprintf("%s, %d\n", op_name(req->op), sys->status);
switch (sys->status) {
case SD_STATUS_KILLED:
--
1.7.12.84.gefa6462
More information about the sheepdog
mailing list