[Sheepdog] [PATCH 1/2] avoid the race of object access from multiple IO requests

FUJITA Tomonori fujita.tomonori at lab.ntt.co.jp
Fri May 7 07:15:49 CEST 2010


We need to avoid performing multiple IO requests to the same object.

Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
 collie/collie.h |    5 +++++
 collie/group.c  |   34 ++++++++++++++++++++++++++++++++++
 collie/net.c    |   55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 94 insertions(+), 0 deletions(-)

diff --git a/collie/collie.h b/collie/collie.h
index a4505d7..17f9dec 100644
--- a/collie/collie.h
+++ b/collie/collie.h
@@ -64,6 +64,8 @@ struct request {
 	struct list_head r_wlist;
 	struct list_head pending_list;
 
+	uint64_t local_oid[2];
+
 	struct sheepdog_node_list_entry entry[SD_MAX_NODES];
 	int nr_nodes;
 
@@ -94,6 +96,9 @@ struct cluster_info {
 
 	DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS);
 
+	struct list_head outstanding_req_list;
+	struct list_head req_wait_for_obj_list;
+
 	int nr_sobjs;
 
 	struct list_head cpg_event_siblings;
diff --git a/collie/group.c b/collie/group.c
index 5a49db7..66930c4 100644
--- a/collie/group.c
+++ b/collie/group.c
@@ -1320,6 +1320,30 @@ static int check_epoch(struct request *req)
 	return ret;
 }
 
+static int is_access_to_busy_objects(struct request *req)
+{
+	struct request *o_req;
+
+	if (!req->local_oid[0] && !req->local_oid[1])
+		return 0;
+
+	list_for_each_entry(o_req, &sys->outstanding_req_list, r_wlist) {
+
+		if (req->local_oid[0]) {
+			if (req->local_oid[0] == o_req->local_oid[0] ||
+			    req->local_oid[0] == o_req->local_oid[1])
+				return 1;
+		}
+
+		if (req->local_oid[1]) {
+			if (req->local_oid[1] == o_req->local_oid[0] ||
+			    req->local_oid[1] == o_req->local_oid[1])
+				return 1;
+		}
+	}
+	return 0;
+}
+
 /* can be called only by the main process */
 void start_cpg_event_work(void)
 {
@@ -1363,6 +1387,13 @@ void start_cpg_event_work(void)
 		list_del(&cevent->cpg_event_list);
 
 		if (is_io_request(req->rq.opcode)) {
+			if (is_access_to_busy_objects(req)) {
+				list_add_tail(&req->r_wlist, &sys->req_wait_for_obj_list);
+				continue;
+			}
+
+			list_add_tail(&req->r_wlist, &sys->outstanding_req_list);
+
 			sys->nr_outstanding_io++;
 
 			if (req->rq.flags & SD_FLAG_CMD_DIRECT) {
@@ -1578,6 +1609,9 @@ join_retry:
 	INIT_LIST_HEAD(&sys->vm_list);
 	INIT_LIST_HEAD(&sys->pending_list);
 
+	INIT_LIST_HEAD(&sys->outstanding_req_list);
+	INIT_LIST_HEAD(&sys->req_wait_for_obj_list);
+
 	INIT_LIST_HEAD(&sys->cpg_event_siblings);
 	cpg_context_set(cpg_handle, sys);
 
diff --git a/collie/net.c b/collie/net.c
index 7abab60..f641022 100644
--- a/collie/net.c
+++ b/collie/net.c
@@ -53,12 +53,24 @@ static void __done(struct work *work, int idx)
 	}
 
 	if (is_io_request(hdr->opcode)) {
+		struct request *next, *tmp;
+		list_del(&req->r_wlist);
+
 		sys->nr_outstanding_io--;
 		/*
 		 * TODO: if the request failed due to epoch unmatch,
 		 * we should retry here (adds this request to the tail
 		 * of sys->cpg_event_siblings.
 		 */
+
+		list_for_each_entry_safe(next, tmp, &sys->req_wait_for_obj_list,
+					 r_wlist) {
+			struct cpg_event *cevent = &next->cev;
+
+			list_del(&next->r_wlist);
+			list_add_tail(&cevent->cpg_event_list, &sys->cpg_event_siblings);
+		}
+
 		if (!sys->nr_outstanding_io &&
 		    !list_empty(&sys->cpg_event_siblings))
 			start_cpg_event_work();
@@ -67,6 +79,48 @@ static void __done(struct work *work, int idx)
 	req->done(req);
 }
 
+
+static int is_access_local(struct sheepdog_node_list_entry *e, int nr_nodes,
+			   uint64_t oid, int copies)
+{
+	int i, n;
+
+	for (i = 0; i < copies; i++) {
+		n = obj_to_sheep(e, nr_nodes, oid, i);
+
+		if (is_myself(&e[n]))
+			return 1;
+	}
+
+	return 0;
+}
+
+static void setup_access_to_local_objects(struct request *req)
+{
+	struct sd_obj_req *hdr = (struct sd_obj_req *)&req->rq;
+	int copies;
+
+	if (hdr->flags & SD_FLAG_CMD_DIRECT) {
+		req->local_oid[0] = hdr->oid;
+
+		if (hdr->flags & SD_FLAG_CMD_COW)
+			req->local_oid[1] = hdr->cow_oid;
+
+		return;
+	}
+
+	copies = hdr->copies;
+	if (!copies)
+		copies = sys->nr_sobjs;
+
+	if (is_access_local(req->entry, req->nr_nodes, hdr->oid, copies))
+		req->local_oid[0] = hdr->oid;
+
+	if ((hdr->flags & SD_FLAG_CMD_COW) &&
+	    is_access_local(req->entry, req->nr_nodes, hdr->cow_oid, copies))
+		req->local_oid[1] = hdr->cow_oid;
+}
+
 static void queue_request(struct request *req)
 {
 	struct cpg_event *cevent = &req->cev;
@@ -155,6 +209,7 @@ static void queue_request(struct request *req)
 		hdr->epoch = sys->epoch;
 
 	req->nr_nodes = setup_ordered_sd_node_list(req);
+	setup_access_to_local_objects(req);
 
 	cevent->ctype = CPG_EVENT_REQUEST;
 	list_add_tail(&cevent->cpg_event_list, &sys->cpg_event_siblings);
-- 
1.6.5




More information about the sheepdog mailing list