[sheepdog] [PATCH v2 1/3] dog: parallelize parse_vdi() with work queue

Hitoshi Mitake mitake.hitoshi at lab.ntt.co.jp
Tue Aug 27 14:35:35 CEST 2013


Current dog issues request for gathering VDI information in a
sequential manner (parse_vdi()). This way is not scalable when a
number of VDIs becomes larger.

This patch parallelize parse_vdi() with work queue. Some dog
commands which call parse_vdi() can enjoy performance improvement.

The below is an sample of dog vdi list. The test is done on 16
nodes cluster which has 3000 VDIs.

Before:
$ time sh -c "dog/dog vdi list -a 10.68.13.1 > /dev/null"
sh -c "dog/dog vdi list -a 10.68.13.1 > /dev/null"  8.81s user 0.24s system 70% cpu 12.876 total

After:
% time sh -c "dog/dog vdi list -a 10.68.13.1 > /dev/null"
sh -c "dog/dog vdi list -a 10.68.13.1 > /dev/null"  14.35s user 2.02s system 209% cpu 7.816 total

The effect of this optimization would be larger when more nodes are
added to the cluster.

Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
 - rebase on the latest master
 -- collie -> dog
 -- print error messages with sd_err()

 - don't use needless thread in parse_vdi()

 dog/common.c |  129 ++++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 99 insertions(+), 30 deletions(-)

diff --git a/dog/common.c b/dog/common.c
index 99a9431..4a1458e 100644
--- a/dog/common.c
+++ b/dog/common.c
@@ -127,15 +127,100 @@ int sd_write_object(uint64_t oid, uint64_t cow_oid, void *data,
 
 #define FOR_EACH_VDI(nr, vdis) FOR_EACH_BIT(nr, vdis, SD_NR_VDIS)
 
+struct parse_vdi_info {
+	uint64_t oid;
+	size_t size;
+	void *data;
+	vdi_parser_func_t func;
+
+	bool succeed;
+
+	struct work work;
+	struct sd_inode inode;
+};
+
+static void parse_vdi_work(struct work *work)
+{
+	int ret;
+	struct parse_vdi_info *info = container_of(work, struct parse_vdi_info,
+						work);
+	struct sd_inode inode;
+
+	info->succeed = false;
+
+	memset(&inode, 0, sizeof(inode));
+	ret = sd_read_object(info->oid, &inode, SD_INODE_HEADER_SIZE, 0, true);
+	if (ret != SD_RES_SUCCESS) {
+		sd_err("Failed to read inode header, oid: %"PRIx64"\n",
+		       info->oid);
+		return;
+	}
+
+	memcpy(&info->inode, &inode, sizeof(inode));
+
+	if (SD_INODE_HEADER_SIZE < info->size) {
+		unsigned int rlen =
+			DIV_ROUND_UP(inode.vdi_size, SD_DATA_OBJ_SIZE)
+			* sizeof(inode.data_vdi_id[0]);
+		size_t size = info->size;
+
+		if (size - SD_INODE_HEADER_SIZE < rlen)
+			rlen = size - SD_INODE_HEADER_SIZE;
+
+		ret = sd_read_object(info->oid,
+				     ((char *)&inode) + SD_INODE_HEADER_SIZE,
+				     rlen, SD_INODE_HEADER_SIZE, true);
+
+		if (ret != SD_RES_SUCCESS) {
+			sd_err("Failed to read inode, oid of the inode is:"
+			       " %"PRIx64"\n", info->oid);
+			return;
+		}
+
+		memcpy(((char *)&info->inode) + SD_INODE_HEADER_SIZE,
+			((char *)&inode) + SD_INODE_HEADER_SIZE, rlen);
+	}
+
+	info->succeed = true;
+}
+
+static void parse_vdi_main(struct work *work)
+{
+	struct parse_vdi_info *info = container_of(work, struct parse_vdi_info,
+						work);
+	struct sd_inode *inode;
+	uint32_t snapid;
+
+	if (!info->succeed)
+		goto out;
+
+	inode = &info->inode;
+	if (inode->name[0] == '\0') /* this VDI has been deleted */
+		return;
+
+	snapid = vdi_is_snapshot(inode) ? inode->snap_id : 0;
+	info->func(inode->vdi_id, inode->name, inode->tag, snapid, 0, inode,
+		   info->data);
+
+out:
+	free(info);
+}
+
+static struct work_queue *parse_vdi_wq;
+
 int parse_vdi(vdi_parser_func_t func, size_t size, void *data)
 {
 	int ret;
 	unsigned long nr;
-	static struct sd_inode i;
 	struct sd_req req;
 	struct sd_rsp *rsp = (struct sd_rsp *)&req;
 	static DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS);
-	unsigned int rlen = sizeof(vdi_inuse);
+
+	parse_vdi_wq = create_work_queue("parse vdi", WQ_DYNAMIC);
+	if (!parse_vdi_wq) {
+		sd_err("creating work queue for parsing VDIs failed: %m\n");
+		return -1;
+	}
 
 	sd_init_req(&req, SD_OP_READ_VDIS);
 	req.data_length = sizeof(vdi_inuse);
@@ -149,40 +234,24 @@ int parse_vdi(vdi_parser_func_t func, size_t size, void *data)
 	}
 
 	FOR_EACH_VDI(nr, vdi_inuse) {
-		uint64_t oid;
-		uint32_t snapid;
-
-		oid = vid_to_vdi_oid(nr);
-
-		memset(&i, 0, sizeof(i));
-		ret = sd_read_object(oid, &i, SD_INODE_HEADER_SIZE, 0, true);
-		if (ret != SD_RES_SUCCESS) {
-			sd_err("Failed to read inode header");
-			continue;
-		}
-
-		if (i.name[0] == '\0') /* this VDI has been deleted */
-			continue;
+		struct parse_vdi_info *info;
+		info = xzalloc(sizeof(*info));
 
-		if (size > SD_INODE_HEADER_SIZE) {
-			rlen = DIV_ROUND_UP(i.vdi_size, SD_DATA_OBJ_SIZE) *
-				sizeof(i.data_vdi_id[0]);
-			if (rlen > size - SD_INODE_HEADER_SIZE)
-				rlen = size - SD_INODE_HEADER_SIZE;
+		info->oid = vid_to_vdi_oid(nr);
+		info->size = size;
+		info->func = func;
+		info->data = data;
 
-			ret = sd_read_object(oid, ((char *)&i) + SD_INODE_HEADER_SIZE,
-					     rlen, SD_INODE_HEADER_SIZE, true);
+		info->work.fn = parse_vdi_work;
+		info->work.done = parse_vdi_main;
 
-			if (ret != SD_RES_SUCCESS) {
-				sd_err("Failed to read inode");
-				continue;
-			}
-		}
+		queue_work(parse_vdi_wq, &info->work);
 
-		snapid = vdi_is_snapshot(&i) ? i.snap_id : 0;
-		func(i.vdi_id, i.name, i.tag, snapid, 0, &i, data);
+		/* reap results if there are ready ones */
+		event_loop(0);
 	}
 
+	work_queue_wait(parse_vdi_wq);
 out:
 	return ret;
 }
-- 
1.7.10.4




More information about the sheepdog mailing list