[Sheepdog] [PATCH 2/2] avoid the race between recovery and IO requests
FUJITA Tomonori
fujita.tomonori at lab.ntt.co.jp
Fri May 7 07:15:50 CEST 2010
- we can't perform IO requests against the object that we are
recovering.
- we can't recover the object that we are performing IO requests
against.
Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
collie/collie.h | 5 +++++
collie/group.c | 35 +++++++++++++++++++----------------
collie/net.c | 30 ++++++++++++++++++------------
collie/store.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 87 insertions(+), 28 deletions(-)
diff --git a/collie/collie.h b/collie/collie.h
index 17f9dec..b53c824 100644
--- a/collie/collie.h
+++ b/collie/collie.h
@@ -126,6 +126,9 @@ int read_vdis(char *data, int len, unsigned int *rsp_len);
int setup_ordered_sd_node_list(struct request *req);
int get_ordered_sd_node_list(struct sheepdog_node_list_entry *entries);
+int is_access_to_busy_objects(uint64_t oid);
+
+void resume_pending_requests(void);
int create_cluster(int port);
@@ -155,6 +158,8 @@ int set_cluster_ctime(uint64_t ctime);
uint64_t get_cluster_ctime(void);
int start_recovery(uint32_t epoch, uint32_t *failed_vdis, int nr_failed_vdis);
+void resume_recovery_work(void);
+int is_recoverying_oid(uint64_t oid);
static inline int is_myself(struct sheepdog_node_list_entry *e)
{
diff --git a/collie/group.c b/collie/group.c
index 66930c4..105a5f9 100644
--- a/collie/group.c
+++ b/collie/group.c
@@ -1320,30 +1320,33 @@ static int check_epoch(struct request *req)
return ret;
}
-static int is_access_to_busy_objects(struct request *req)
+int is_access_to_busy_objects(uint64_t oid)
{
- struct request *o_req;
+ struct request *req;
- if (!req->local_oid[0] && !req->local_oid[1])
+ if (!oid)
return 0;
- list_for_each_entry(o_req, &sys->outstanding_req_list, r_wlist) {
-
- if (req->local_oid[0]) {
- if (req->local_oid[0] == o_req->local_oid[0] ||
- req->local_oid[0] == o_req->local_oid[1])
+ list_for_each_entry(req, &sys->outstanding_req_list, r_wlist) {
+ if (oid == req->local_oid[0] || oid == req->local_oid[1])
return 1;
- }
-
- if (req->local_oid[1]) {
- if (req->local_oid[1] == o_req->local_oid[0] ||
- req->local_oid[1] == o_req->local_oid[1])
- return 1;
- }
}
return 0;
}
+static int __is_access_to_busy_objects(struct request *req)
+{
+ if (is_access_to_busy_objects(req->local_oid[0]) ||
+ is_access_to_busy_objects(req->local_oid[1]))
+ return 1;
+
+ if (is_recoverying_oid(req->local_oid[0]) ||
+ is_recoverying_oid(req->local_oid[1]))
+ return 1;
+
+ return 0;
+}
+
/* can be called only by the main process */
void start_cpg_event_work(void)
{
@@ -1387,7 +1390,7 @@ void start_cpg_event_work(void)
list_del(&cevent->cpg_event_list);
if (is_io_request(req->rq.opcode)) {
- if (is_access_to_busy_objects(req)) {
+ if (__is_access_to_busy_objects(req)) {
list_add_tail(&req->r_wlist, &sys->req_wait_for_obj_list);
continue;
}
diff --git a/collie/net.c b/collie/net.c
index f641022..d3e6ce5 100644
--- a/collie/net.c
+++ b/collie/net.c
@@ -35,6 +35,22 @@ int is_io_request(unsigned op)
return ret;
}
+void resume_pending_requests(void)
+{
+ struct request *next, *tmp;
+
+ list_for_each_entry_safe(next, tmp, &sys->req_wait_for_obj_list,
+ r_wlist) {
+ struct cpg_event *cevent = &next->cev;
+
+ list_del(&next->r_wlist);
+ list_add_tail(&cevent->cpg_event_list, &sys->cpg_event_siblings);
+ }
+
+ if (list_empty(&sys->cpg_event_siblings))
+ start_cpg_event_work();
+}
+
static void __done(struct work *work, int idx)
{
struct request *req = container_of(work, struct request, work);
@@ -53,7 +69,6 @@ static void __done(struct work *work, int idx)
}
if (is_io_request(hdr->opcode)) {
- struct request *next, *tmp;
list_del(&req->r_wlist);
sys->nr_outstanding_io--;
@@ -63,17 +78,8 @@ static void __done(struct work *work, int idx)
* of sys->cpg_event_siblings.
*/
- list_for_each_entry_safe(next, tmp, &sys->req_wait_for_obj_list,
- r_wlist) {
- struct cpg_event *cevent = &next->cev;
-
- list_del(&next->r_wlist);
- list_add_tail(&cevent->cpg_event_list, &sys->cpg_event_siblings);
- }
-
- if (!sys->nr_outstanding_io &&
- !list_empty(&sys->cpg_event_siblings))
- start_cpg_event_work();
+ resume_pending_requests();
+ resume_recovery_work();
}
req->done(req);
diff --git a/collie/store.c b/collie/store.c
index d21a1c3..5fc8e28 100644
--- a/collie/store.c
+++ b/collie/store.c
@@ -1255,17 +1255,57 @@ out:
rw->done++;
}
+static struct recovery_work *suspended_recovery_work;
+static uint64_t recovering_oid;
+
static void __start_recovery(struct work *work, int idx);
static void recover_timer(void *data)
{
struct recovery_work *rw = (struct recovery_work *)data;
+ uint64_t oid = *(((uint64_t *)rw->buf) + rw->done);
+
+ if (is_access_to_busy_objects(oid)) {
+ suspended_recovery_work = rw;
+ return;
+ }
+
+ recovering_oid = oid;
queue_work(&rw->work);
}
+void resume_recovery_work(void)
+{
+ struct recovery_work *rw;
+ uint64_t oid;
+
+ if (!suspended_recovery_work)
+ return;
+
+ rw = suspended_recovery_work;
+
+ oid = *(((uint64_t *)rw->buf) + rw->done);
+ if (is_access_to_busy_objects(oid))
+ return;
+
+ suspended_recovery_work = NULL;
+ recovering_oid = oid;
+ queue_work(&rw->work);
+}
+
+int is_recoverying_oid(uint64_t oid)
+{
+ return recovering_oid && recovering_oid == oid;
+}
+
static void recover_done(struct work *work, int idx)
{
struct recovery_work *rw = container_of(work, struct recovery_work, work);
+ uint64_t oid = *(((uint64_t *)rw->buf) + rw->done);
+
+ recovering_oid = 0;
+
+ resume_pending_requests();
if (rw->retry) {
rw->retry = 0;
@@ -1279,6 +1319,11 @@ static void recover_done(struct work *work, int idx)
if (rw->done < rw->count && list_empty(&recovery_work_list)) {
rw->work.fn = recover_one;
+ if (is_access_to_busy_objects(oid)) {
+ suspended_recovery_work = rw;
+ return;
+ }
+ recovering_oid = oid;
queue_work(&rw->work);
return;
}
--
1.6.5
More information about the sheepdog
mailing list