[sheepdog] [PATCH v2 05/11] sheep: decrement generational reference count on vdi deletion

MORITA Kazutaka morita.kazutaka at gmail.com
Tue Jun 18 19:14:25 CEST 2013


From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>

This removes old vdi deletion code, which reclaims objects only when
all relevant snapshots are deleted, and uses a generational reference
counting algorithm.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 sheep/vdi.c |  213 +++--------------------------------------------------------
 1 file changed, 9 insertions(+), 204 deletions(-)

diff --git a/sheep/vdi.c b/sheep/vdi.c
index 988145d..93ab536 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -585,47 +585,11 @@ int read_vdis(char *data, int len, unsigned int *rsp_len)
 }
 
 struct deletion_work {
-	uint32_t done;
-
 	struct work work;
-	struct list_head list;
 	struct request *req;
-
 	uint32_t vid;
-
-	int count;
-	uint32_t *buf;
 };
 
-static LIST_HEAD(deletion_work_list);
-
-static int delete_inode(struct deletion_work *dw)
-{
-	struct sd_inode *inode = NULL;
-	int ret = SD_RES_SUCCESS;
-
-	inode = xzalloc(sizeof(*inode));
-	ret = read_object(vid_to_vdi_oid(dw->vid), (char *)inode,
-			  SD_INODE_HEADER_SIZE, 0);
-	if (ret != SD_RES_SUCCESS) {
-		ret = SD_RES_EIO;
-		goto out;
-	}
-
-	memset(inode->name, 0, sizeof(inode->name));
-
-	ret = write_object(vid_to_vdi_oid(dw->vid), (char *)inode,
-			   SD_INODE_HEADER_SIZE, 0, false);
-	if (ret != 0) {
-		ret = SD_RES_EIO;
-		goto out;
-	}
-
-out:
-	free(inode);
-	return ret;
-}
-
 static int notify_vdi_deletion(uint32_t vdi_id)
 {
 	struct sd_req hdr;
@@ -646,11 +610,11 @@ static int notify_vdi_deletion(uint32_t vdi_id)
 static void delete_one(struct work *work)
 {
 	struct deletion_work *dw = container_of(work, struct deletion_work, work);
-	uint32_t vdi_id = *(dw->buf + dw->count - dw->done - 1);
+	uint32_t vdi_id = dw->vid;
 	int ret, i, nr_deleted;
 	struct sd_inode *inode = NULL;
 
-	sd_dprintf("%d %d, %16x", dw->done, dw->count, vdi_id);
+	sd_eprintf("%16" PRIx32, vdi_id);
 
 	inode = malloc(sizeof(*inode));
 	if (!inode) {
@@ -666,9 +630,6 @@ static void delete_one(struct work *work)
 		goto out;
 	}
 
-	if (inode->vdi_size == 0 && vdi_is_deleted(inode))
-		goto out;
-
 	for (nr_deleted = 0, i = 0; i < MAX_DATA_OBJS; i++) {
 		uint64_t oid;
 
@@ -677,15 +638,10 @@ static void delete_one(struct work *work)
 
 		oid = vid_to_data_oid(inode->data_vdi_id[i], i);
 
-		if (inode->data_vdi_id[i] != inode->vdi_id) {
-			sd_dprintf("object %" PRIx64 " is base's data, would"
-				   " not be deleted.", oid);
-			continue;
-		}
-
-		ret = remove_object(oid);
+		ret = dec_object_refcnt(oid, inode->data_ref[i].generation,
+					inode->data_ref[i].count);
 		if (ret != SD_RES_SUCCESS)
-			sd_eprintf("remove object %" PRIx64 " fail, %d", oid,
+			sd_eprintf("discard ref %" PRIx64 " fail, %d", oid,
 				   ret);
 
 		nr_deleted++;
@@ -694,11 +650,10 @@ static void delete_one(struct work *work)
 	if (nr_deleted)
 		notify_vdi_deletion(vdi_id);
 
-	if (vdi_is_deleted(inode))
-		goto out;
-
 	inode->vdi_size = 0;
 	memset(inode->name, 0, sizeof(inode->name));
+	memset((char *)inode + SD_INODE_HEADER_SIZE, 0,
+	       SD_INODE_SIZE - SD_INODE_HEADER_SIZE);
 
 	write_object(vid_to_vdi_oid(vdi_id), (void *)inode,
 		     sizeof(*inode), 0, false);
@@ -711,175 +666,25 @@ static void delete_one_done(struct work *work)
 	struct deletion_work *dw = container_of(work, struct deletion_work, work);
 	struct request *req = dw->req;
 
-	dw->done++;
-	if (dw->done < dw->count) {
-		queue_work(sys->deletion_wqueue, &dw->work);
-		return;
-	}
-
-	list_del(&dw->list);
-
 	put_request(req);
-
-	free(dw->buf);
-	free(dw);
-
-	if (!list_empty(&deletion_work_list)) {
-		dw = list_first_entry(&deletion_work_list,
-				      struct deletion_work, list);
-
-		queue_work(sys->deletion_wqueue, &dw->work);
-	}
-}
-
-static int fill_vdi_list(struct deletion_work *dw, uint32_t root_vid)
-{
-	int ret, i;
-	struct sd_inode *inode = NULL;
-	int done = dw->count;
-	uint32_t vid;
-
-	inode = malloc(SD_INODE_HEADER_SIZE);
-	if (!inode) {
-		sd_eprintf("failed to allocate memory");
-		goto err;
-	}
-
-	dw->buf[dw->count++] = root_vid;
-again:
-	vid = dw->buf[done++];
-	ret = read_backend_object(vid_to_vdi_oid(vid), (char *)inode,
-			  SD_INODE_HEADER_SIZE, 0);
-
-	if (ret != SD_RES_SUCCESS) {
-		sd_eprintf("cannot find VDI object");
-		goto err;
-	}
-
-	if (!vdi_is_deleted(inode) && vid != dw->vid)
-		goto out;
-
-	for (i = 0; i < ARRAY_SIZE(inode->child_vdi_id); i++) {
-		if (!inode->child_vdi_id[i])
-			continue;
-
-		dw->buf[dw->count++] = inode->child_vdi_id[i];
-	}
-
-	if (dw->buf[done])
-		goto again;
-err:
-	free(inode);
-	return 0;
-out:
-	free(inode);
-	return 1;
-}
-
-static uint64_t get_vdi_root(uint32_t vid, bool *cloned)
-{
-	int ret;
-	struct sd_inode *inode = NULL;
-
-	*cloned = false;
-
-	inode = malloc(SD_INODE_HEADER_SIZE);
-	if (!inode) {
-		sd_eprintf("failed to allocate memory");
-		vid = 0;
-		goto out;
-	}
-next:
-	ret = read_backend_object(vid_to_vdi_oid(vid), (char *)inode,
-			  SD_INODE_HEADER_SIZE, 0);
-
-	if (vid == inode->vdi_id && inode->snap_id == 1
-			&& inode->parent_vdi_id != 0
-			&& !inode->snap_ctime) {
-		sd_dprintf("vdi %" PRIx32 " is a cloned vdi.", vid);
-		/* current vdi is a cloned vdi */
-		*cloned = true;
-	}
-
-	if (ret != SD_RES_SUCCESS) {
-		sd_eprintf("cannot find VDI object");
-		vid = 0;
-		goto out;
-	}
-
-	if (!inode->parent_vdi_id)
-		goto out;
-
-	vid = inode->parent_vdi_id;
-
-	goto next;
-out:
-	free(inode);
-
-	return vid;
 }
 
 static int start_deletion(struct request *req, uint32_t vid)
 {
 	struct deletion_work *dw = NULL;
-	int ret = SD_RES_NO_MEM;
-	bool cloned;
-	uint32_t root_vid;
 
 	dw = xzalloc(sizeof(struct deletion_work));
-	/* buf is to store vdi id of every object */
-	dw->buf = xzalloc(SD_INODE_SIZE - SD_INODE_HEADER_SIZE);
-	dw->count = 0;
+
 	dw->vid = vid;
 	dw->req = req;
 
 	dw->work.fn = delete_one;
 	dw->work.done = delete_one_done;
 
-	root_vid = get_vdi_root(dw->vid, &cloned);
-	if (!root_vid) {
-		ret = SD_RES_EIO;
-		goto err;
-	}
-
-	ret = fill_vdi_list(dw, root_vid);
-	if (ret) {
-		/*
-		 * if the VDI is a cloned VDI, delete its objects
-		 * no matter whether the VDI tree is clear.
-		 */
-		if (cloned) {
-			dw->buf[0] = vid;
-			dw->count = 1;
-		} else {
-			sd_dprintf("snapshot chain has valid vdi, "
-				   "just mark vdi %" PRIx32 " as deleted.",
-				   dw->vid);
-			delete_inode(dw);
-			return SD_RES_SUCCESS;
-		}
-	}
-
-	sd_dprintf("%d", dw->count);
-
-	if (dw->count == 0)
-		goto out;
-
 	uatomic_inc(&req->refcnt);
+	queue_work(sys->deletion_wqueue, &dw->work);
 
-	if (list_empty(&deletion_work_list)) {
-		list_add_tail(&dw->list, &deletion_work_list);
-		queue_work(sys->deletion_wqueue, &dw->work);
-	} else
-		list_add_tail(&dw->list, &deletion_work_list);
-out:
 	return SD_RES_SUCCESS;
-err:
-	if (dw)
-		free(dw->buf);
-	free(dw);
-
-	return ret;
 }
 
 int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len,
-- 
1.7.9.5




More information about the sheepdog mailing list