[Sheepdog] [PATCH 1/2] collie: fix recovery thread race
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Wed Apr 14 07:17:44 CEST 2010
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
collie/store.c | 34 +++++++++++++++++-----------------
1 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/collie/store.c b/collie/store.c
index f1dbdc3..1f17df4 100644
--- a/collie/store.c
+++ b/collie/store.c
@@ -1085,15 +1085,13 @@ static void recover_one_done(struct work *work, int idx)
rw->done++;
- if (rw->done < rw->count && rw->rw_siblings.next == &recovery_work_list) {
+ if (rw->done < rw->count && list_empty(&recovery_work_list)) {
queue_work(dobj_queue, &rw->work);
return;
}
dprintf("recovery done, %d\n", rw->epoch);
- recovering--;
-
- list_del(&rw->rw_siblings);
+ recovering = 0;
free(rw->buf);
free(rw);
@@ -1102,7 +1100,9 @@ static void recover_one_done(struct work *work, int idx)
rw = list_first_entry(&recovery_work_list,
struct recovery_work, rw_siblings);
- recovering++;
+ list_del(&rw->rw_siblings);
+
+ recovering = 1;
queue_work(dobj_queue, &rw->work);
}
}
@@ -1129,7 +1129,7 @@ static int __fill_obj_list(struct recovery_work *rw,
}
wlen = 0;
- rlen = 1 << 20;
+ rlen = (1 << 20) - (rw->count * sizeof(uint64_t));
memset(&hdr, 0, sizeof(hdr));
hdr.opcode = SD_OP_GET_OBJ_LIST;
@@ -1150,10 +1150,10 @@ static int __fill_obj_list(struct recovery_work *rw,
rsp = (struct sd_list_rsp *)&hdr;
- if (rsp->result != SD_RES_SUCCESS) {
+ if (ret || rsp->result != SD_RES_SUCCESS) {
rw->retry = 1;
*done_hash = end_hash;
- eprintf("try again, %d\n", rsp->result);
+ eprintf("try again, %d, %d\n", ret, rsp->result);
return 0;
}
@@ -1281,7 +1281,7 @@ static void __start_recovery_done(struct work *work, int idx)
return;
}
- if (rw->count && rw->rw_siblings.next == &recovery_work_list) {
+ if (rw->count && list_empty(&recovery_work_list)) {
rw->work.fn = recover_one;
rw->work.done = recover_one_done;
@@ -1293,9 +1293,7 @@ static void __start_recovery_done(struct work *work, int idx)
}
dprintf("recovery done, %d\n", rw->epoch);
- recovering--;
-
- list_del(&rw->rw_siblings);
+ recovering = 0;
free(rw->buf);
free(rw);
@@ -1304,7 +1302,9 @@ static void __start_recovery_done(struct work *work, int idx)
rw = list_first_entry(&recovery_work_list,
struct recovery_work, rw_siblings);
- recovering++;
+ list_del(&rw->rw_siblings);
+
+ recovering = 1;
queue_work(dobj_queue, &rw->work);
}
}
@@ -1324,10 +1324,10 @@ int start_recovery(uint32_t epoch)
rw->work.fn = __start_recovery;
rw->work.done = __start_recovery_done;
- list_add_tail(&rw->rw_siblings, &recovery_work_list);
-
- if (!recovering) {
- recovering++;
+ if (recovering)
+ list_add_tail(&rw->rw_siblings, &recovery_work_list);
+ else {
+ recovering = 1;
queue_work(dobj_queue, &rw->work);
}
--
1.5.6.5
More information about the sheepdog
mailing list