[Sheepdog] [PATCH] remove access to sys->sd_node_list in thread

FUJITA Tomonori fujita.tomonori at lab.ntt.co.jp
Thu May 6 13:43:12 CEST 2010


We can't access to sys->sd_node_list in thread. Note that add_vdi and
lookup_vdi are safe since these operations are serialized with
cpg_event. So epoch and sd_node_list don't change during these
operations.

TODO: fix del_vdi() that accesses to sys->sd_node_list in thread in
the unsafe way.

Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
 collie/collie.h |    4 ++++
 collie/group.c  |    5 +++++
 collie/net.c    |    2 ++
 collie/store.c  |   32 +++++++++++---------------------
 collie/vdi.c    |    5 +++++
 5 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/collie/collie.h b/collie/collie.h
index d792d58..a4505d7 100644
--- a/collie/collie.h
+++ b/collie/collie.h
@@ -64,6 +64,9 @@ struct request {
 	struct list_head r_wlist;
 	struct list_head pending_list;
 
+	struct sheepdog_node_list_entry entry[SD_MAX_NODES];
+	int nr_nodes;
+
 	req_end_t done;
 	struct work work;
 };
@@ -116,6 +119,7 @@ int lookup_vdi(uint32_t epoch, char *data, int data_len, uint32_t *vid, uint32_t
 
 int read_vdis(char *data, int len, unsigned int *rsp_len);
 
+int setup_ordered_sd_node_list(struct request *req);
 int get_ordered_sd_node_list(struct sheepdog_node_list_entry *entries);
 
 int create_cluster(int port);
diff --git a/collie/group.c b/collie/group.c
index 377da49..5a49db7 100644
--- a/collie/group.c
+++ b/collie/group.c
@@ -201,6 +201,11 @@ int get_ordered_sd_node_list(struct sheepdog_node_list_entry *entries)
 	return build_node_list(&sys->sd_node_list, entries);
 }
 
+int setup_ordered_sd_node_list(struct request *req)
+{
+	return get_ordered_sd_node_list(req->entry);
+}
+
 static void get_node_list(struct sd_node_req *req,
 			  struct sd_node_rsp *rsp, void *data)
 {
diff --git a/collie/net.c b/collie/net.c
index 448459c..7abab60 100644
--- a/collie/net.c
+++ b/collie/net.c
@@ -154,6 +154,8 @@ static void queue_request(struct request *req)
 	if (!(hdr->flags & SD_FLAG_CMD_DIRECT))
 		hdr->epoch = sys->epoch;
 
+	req->nr_nodes = setup_ordered_sd_node_list(req);
+
 	cevent->ctype = CPG_EVENT_REQUEST;
 	list_add_tail(&cevent->cpg_event_list, &sys->cpg_event_siblings);
 	start_cpg_event_work();
diff --git a/collie/store.c b/collie/store.c
index 3e42cf9..d21a1c3 100644
--- a/collie/store.c
+++ b/collie/store.c
@@ -281,7 +281,7 @@ out:
 
 static int ob_open(uint32_t epoch, uint64_t oid, int aflags, int *ret);
 
-static int read_from_one(uint32_t epoch, uint64_t oid,
+static int read_from_one(struct request *req, uint32_t epoch, uint64_t oid,
 			 unsigned *ori_rlen, void *buf, uint64_t offset)
 {
 	int i, n, nr, fd, ret;
@@ -291,9 +291,8 @@ static int read_from_one(uint32_t epoch, uint64_t oid,
 	struct sd_obj_req hdr;
 	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
 
-	e = zalloc(SD_MAX_NODES * sizeof(struct sheepdog_node_list_entry));
-again:
-	nr = get_ordered_sd_node_list(e);
+	e = req->entry;
+	nr = req->nr_nodes;
 
 	for (i = 0; i < nr; i++) {
 		n = obj_to_sheep(e, nr, oid, i);
@@ -343,8 +342,6 @@ again:
 		case SD_RES_OLD_NODE_VER:
 		case SD_RES_NEW_NODE_VER:
 			/* waits for the node list timer */
-			sleep(2);
-			goto again;
 			break;
 		default:
 			;
@@ -358,14 +355,15 @@ out:
 	return ret;
 }
 
-static int read_from_other_sheeps(uint32_t epoch, uint64_t oid, char *buf, int copies)
+static int read_from_other_sheeps(struct request *req, uint32_t epoch,
+				  uint64_t oid, char *buf, int copies)
 {
 	int ret;
 	unsigned int rlen;
 
 	rlen = SD_DATA_OBJ_SIZE;
 
-	ret = read_from_one(epoch, oid, &rlen, buf, 0);
+	ret = read_from_one(req, epoch, oid, &rlen, buf, 0);
 
 	return ret;
 }
@@ -383,11 +381,8 @@ static int forward_read_obj_req(struct request *req, char *buf)
 	uint64_t oid = hdr->oid;
 	int copies;
 
-	e = zalloc(SD_MAX_NODES * sizeof(struct sheepdog_node_list_entry));
-	if (!e)
-		return SD_RES_NO_MEM;
-
-	nr = get_ordered_sd_node_list(e);
+	e = req->entry;
+	nr = req->nr_nodes;
 
 	copies = hdr->copies;
 
@@ -432,7 +427,6 @@ static int forward_read_obj_req(struct request *req, char *buf)
 	}
 
 out:
-	free(e);
 
 	return ret;
 }
@@ -451,11 +445,8 @@ static int forward_write_obj_req(struct request *req, char *buf)
 	int done, nr_fds, local = 0;
 
 	dprintf("%lx\n", oid);
-	e = zalloc(SD_MAX_NODES * sizeof(struct sheepdog_node_list_entry));
-	if (!e)
-		return SD_RES_NO_MEM;
-
-	nr = get_ordered_sd_node_list(e);
+	e = req->entry;
+	nr = req->nr_nodes;
 
 	copies = hdr->copies;
 
@@ -565,7 +556,6 @@ again:
 
 	ret = SD_RES_SUCCESS;
 out:
-	free(e);
 
 	for (i = 0; i < ARRAY_SIZE(pfds); i++){
 		if (pfds[i].fd >= 0)
@@ -655,7 +645,7 @@ static int store_queue_request_local(struct request *req, char *buf, uint32_t ep
 		if (hdr->flags & SD_FLAG_CMD_COW) {
 			dprintf("%" PRIx64 "\n", hdr->cow_oid);
 
-			ret = read_from_other_sheeps(hdr->epoch, hdr->cow_oid, buf,
+			ret = read_from_other_sheeps(req, hdr->epoch, hdr->cow_oid, buf,
 						     hdr->copies);
 			if (ret) {
 				eprintf("failed to read old object\n");
diff --git a/collie/vdi.c b/collie/vdi.c
index a563f42..4798382 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -357,6 +357,11 @@ static void delete_one(struct work *work, int idx)
 
 	eprintf("%d %d, %16x\n", dw->done, dw->count, vdi_id);
 
+	/*
+	 * FIXME: can't use get_ordered_sd_node_list() here since this
+	 * is called in threads and not serialized with cpg_event so
+	 * we can't access to epoch and sd_node_list safely.
+	 */
 	nr_nodes = get_ordered_sd_node_list(entries);
 
 	ret = read_object(entries, nr_nodes, dw->epoch,
-- 
1.6.5




More information about the sheepdog mailing list