[sheepdog] [PATCH 1/2] sheep: update ledger objects in an asynchronous manner

Hitoshi Mitake mitake.hitoshi at lab.ntt.co.jp
Sat May 30 16:00:27 CEST 2015


This patch let gateway update ledger objects in an asynchronous
manner. This change improves performance. Especially vdi deletion will
be improved.

Of course this change introduces a chance of inconsistency of
metadata, but but sheepdog is a distributed file system without
metadata journaling. Therefore inconsistency of metadata introduced by
machine crash or network error, etc must be solved by methods like
fsck (dog vdi check, etc). For this purpose, the next patch enhances
VID GC for collecting garbage objects which belong to removed VDIs.

Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
 sheep/gateway.c | 106 ++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 84 insertions(+), 22 deletions(-)

diff --git a/sheep/gateway.c b/sheep/gateway.c
index 3cb21f0..af7fce4 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -584,16 +584,11 @@ static int prepare_obj_refcnt(const struct sd_req *hdr, uint32_t *vids,
  * This function decreases a refcnt of vid_to_data_oid(old_vid, idx) and
  * increases one of vid_to_data_oid(new_vid, idx)
  */
-static int update_obj_refcnt(const struct sd_req *hdr, uint32_t *vids,
-			     uint32_t *new_vids,
+static void update_obj_refcnt(uint64_t offset, int start,
+			     size_t nr_vids, uint32_t *vids, uint32_t *new_vids,
 			     struct generation_reference *refs)
 {
-	int i, start, ret = SD_RES_SUCCESS;
-	size_t nr_vids = hdr->data_length / sizeof(*vids);
-	uint64_t offset;
-
-	offset = hdr->obj.offset - offsetof(struct sd_inode, data_vdi_id);
-	start = offset / sizeof(*vids);
+	int i, ret = SD_RES_SUCCESS;
 
 	for (i = 0; i < nr_vids; i++) {
 		if (vids[i] == 0 || vids[i] == new_vids[i])
@@ -603,16 +598,7 @@ static int update_obj_refcnt(const struct sd_req *hdr, uint32_t *vids,
 					   refs[i].generation, refs[i].count);
 		if (ret != SD_RES_SUCCESS)
 			sd_err("fail, %d", ret);
-
-		refs[i].generation = 0;
-		refs[i].count = 0;
 	}
-
-	return sd_write_object(hdr->obj.oid, (char *)refs,
-			       nr_vids * sizeof(*refs),
-			       offsetof(struct sd_inode, gref)
-			       + start * sizeof(*refs),
-			       false);
 }
 
 static bool is_inode_refresh_req(struct request *req)
@@ -663,13 +649,51 @@ int gateway_read_obj(struct request *req)
 	return ret;
 }
 
+struct update_obj_refcnt_work {
+	struct work work;
+
+	uint64_t offset;
+	int start;
+
+	size_t nr_vids;
+	uint32_t *vids, *new_vids;
+
+	struct generation_reference *refs;
+};
+
+static void async_update_obj_refcnt_work(struct work *work)
+{
+	struct update_obj_refcnt_work *w =
+		container_of(work, struct update_obj_refcnt_work, work);
+
+	sd_debug("async update of object reference count start: %p", w);
+	update_obj_refcnt(w->offset, w->start, w->nr_vids, w->vids,
+			  w->new_vids, w->refs);
+}
+
+static void async_update_obj_refcnt_done(struct work *work)
+{
+	struct update_obj_refcnt_work *w =
+		container_of(work, struct update_obj_refcnt_work, work);
+
+	sd_debug("async update of object reference count done: %p", w);
+
+	free(w->vids);
+	free(w->new_vids);
+	free(w->refs);
+
+	free(w);
+}
+
 int gateway_write_obj(struct request *req)
 {
 	uint64_t oid = req->rq.obj.oid;
 	int ret;
 	struct sd_req *hdr = &req->rq;
 	uint32_t *vids = NULL, *new_vids = req->data;
-	struct generation_reference *refs = NULL;
+	struct generation_reference *refs = NULL, *zeroed_refs = NULL;
+	struct update_obj_refcnt_work *refcnt_work;
+	size_t nr_vids;
 
 	if ((req->rq.flags & SD_FLAG_CMD_TGT) &&
 	    is_refresh_required(oid_to_vid(oid))) {
@@ -685,13 +709,14 @@ int gateway_write_obj(struct request *req)
 
 
 	if (is_data_vid_update(hdr)) {
-		size_t nr_vids = hdr->data_length / sizeof(*vids);
+		nr_vids = hdr->data_length / sizeof(*vids);
 
 		invalidate_other_nodes(oid_to_vid(oid));
 
 		/* read the previous vids to discard their references later */
 		vids = xzalloc(sizeof(*vids) * nr_vids);
 		refs = xzalloc(sizeof(*refs) * nr_vids);
+		zeroed_refs = xcalloc(sizeof(*zeroed_refs), nr_vids);
 		ret = prepare_obj_refcnt(hdr, vids, refs);
 		if (ret != SD_RES_SUCCESS)
 			goto out;
@@ -702,13 +727,50 @@ int gateway_write_obj(struct request *req)
 		goto out;
 
 	if (is_data_vid_update(hdr)) {
+		uint64_t offset;
+		int start;
+
+		offset = hdr->obj.offset
+			- offsetof(struct sd_inode, data_vdi_id);
+		start = offset / sizeof(*vids);
+
 		sd_debug("update reference counts, %" PRIx64, hdr->obj.oid);
-		update_obj_refcnt(hdr, vids, new_vids, refs);
+
+		ret = sd_write_object(hdr->obj.oid, (char *)zeroed_refs,
+				      nr_vids * sizeof(*zeroed_refs),
+				      offsetof(struct sd_inode, gref)
+				      + start * sizeof(*zeroed_refs), false);
+		if (ret != SD_RES_SUCCESS) {
+			sd_err("updating reference count of inode object %"
+			       PRIx64 " failed: %s", hdr->obj.oid,
+			       sd_strerror(ret));
+
+			goto out;
+		}
+
+		sd_debug("update ledger objects of %"PRIx64, hdr->obj.oid);
+		refcnt_work = xzalloc(sizeof(*refcnt_work));
+
+		refcnt_work->vids = vids;
+		refcnt_work->refs = refs;
+		refcnt_work->nr_vids = nr_vids;
+		refcnt_work->new_vids = xcalloc(hdr->data_length,
+						sizeof(uint32_t));
+		memcpy(refcnt_work->new_vids, new_vids, hdr->data_length);
+
+		refcnt_work->offset = offset;
+		refcnt_work->start = start;
+
+
+		refcnt_work->work.fn = async_update_obj_refcnt_work;
+		refcnt_work->work.done = async_update_obj_refcnt_done;
+
+		queue_work(sys->io_wqueue, &refcnt_work->work);
 	}
+
 out:
+	free(zeroed_refs);
 
-	free(vids);
-	free(refs);
 	return ret;
 }
 
-- 
1.9.1



More information about the sheepdog mailing list