[Sheepdog] [PATCH 1/2] deleting data objects of a vdi before deleting the inode

Li Wenpeng levin108 at gmail.com
Mon Apr 23 08:09:38 CEST 2012


From: levin li <xingke.lwp at taobao.com>

Currently, when deleting a vdi, sheep firstly clears the name
in the vdi inode to mark it as deleted, then try to delete the
data objects, there's a problem, if deleting one or more data
objects fails, we could never delete the object any more, because
the inode has been deleted.

Now I exchange the order of deleting inode and data objects, if
deleting some data object successes, we clear the the correlative
flag in inode->data_vdi_id[], orelse we set the dw->delete_error to
tell that an error occurs in the deletion work.

In delete_one_done(), if dw->delete_error is true, we set inode->vdi_size
to 0 to show this vdi has been deleted, but fail to delete some of
its objects, then we can try to delete the vdi again.

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 sheep/vdi.c |  114 ++++++++++++++++++++++++++++++++++++----------------------
 1 files changed, 71 insertions(+), 43 deletions(-)

diff --git a/sheep/vdi.c b/sheep/vdi.c
index 71912ba..d2a522d 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -346,7 +346,6 @@ int del_vdi(uint32_t epoch, char *data, int data_len, uint32_t *vid,
 	unsigned long dummy1, dummy2;
 	int ret;
 	struct sd_vnode *entries = NULL;
-	int nr_vnodes, nr_zones, nr_reqs;
 	struct sheepdog_inode *inode = NULL;
 
 	inode = malloc(SD_INODE_HEADER_SIZE);
@@ -370,32 +369,6 @@ int del_vdi(uint32_t epoch, char *data, int data_len, uint32_t *vid,
 	if (ret != SD_RES_SUCCESS)
 		goto out;
 
-	ret = get_ordered_sd_vnode_list(&entries, &nr_vnodes, &nr_zones);
-	if (ret != SD_RES_SUCCESS)
-		goto out;
-
-	nr_reqs = sys->nr_sobjs;
-	if (nr_reqs > nr_zones)
-		nr_reqs = nr_zones;
-
-	ret = read_object(entries, nr_vnodes, nr_zones, epoch,
-			  vid_to_vdi_oid(*vid), (char *)inode,
-			  SD_INODE_HEADER_SIZE, 0, nr_reqs);
-	if (ret != SD_RES_SUCCESS) {
-		ret = SD_RES_EIO;
-		goto out;
-	}
-
-	memset(inode->name, 0, sizeof(inode->name));
-
-	ret = write_object(entries, nr_vnodes, nr_zones, epoch,
-			   vid_to_vdi_oid(*vid), (char *)inode,
-			   SD_INODE_HEADER_SIZE, 0, 0, nr_reqs, 0);
-	if (ret != 0) {
-		ret = SD_RES_EIO;
-		goto out;
-	}
-
 	ret = start_deletion(*vid, epoch);
 out:
 	free(inode);
@@ -426,16 +399,61 @@ struct deletion_work {
 
 	int count;
 	uint32_t *buf;
+
+	struct sd_vnode entries[SD_MAX_VNODES];
+	int nr_vnodes;
+	int nr_zones;
+	int delete_error;
 };
 
 static LIST_HEAD(deletion_work_list);
 
+static int delete_inode(struct deletion_work *dw)
+{
+	int nr_reqs, ret = SD_RES_SUCCESS;
+	struct sheepdog_inode *inode = NULL;
+
+	inode = zalloc(sizeof(*inode));
+	if (!inode) {
+		eprintf("no memory to allocate inode.\n");
+		goto out;
+	}
+
+	nr_reqs = sys->nr_sobjs;
+	if (nr_reqs > dw->nr_zones)
+		nr_reqs = dw->nr_zones;
+
+	ret = read_object(dw->entries, dw->nr_vnodes, dw->nr_zones, dw->epoch,
+			  vid_to_vdi_oid(dw->vid), (char *)inode,
+			  SD_INODE_HEADER_SIZE, 0, nr_reqs);
+	if (ret != SD_RES_SUCCESS) {
+		ret = SD_RES_EIO;
+		goto out;
+	}
+
+	if (dw->delete_error)
+		inode->vdi_size = 0;
+	else
+		memset(inode->name, 0, sizeof(inode->name));
+
+	ret = write_object(dw->entries, dw->nr_vnodes, dw->nr_zones, dw->epoch,
+			   vid_to_vdi_oid(dw->vid), (char *)inode,
+			   SD_INODE_HEADER_SIZE, 0, 0, nr_reqs, 0);
+	if (ret != 0) {
+		ret = SD_RES_EIO;
+		goto out;
+	}
+
+out:
+	free(inode);
+	return ret;
+}
+
+
 static void delete_one(struct work *work)
 {
 	struct deletion_work *dw = container_of(work, struct deletion_work, work);
 	uint32_t vdi_id = *(dw->buf + dw->count - dw->done - 1);
-	struct sd_vnode *entries = NULL;
-	int nr_vnodes, nr_zones;
 	int ret, i;
 	struct sheepdog_inode *inode = NULL;
 
@@ -447,16 +465,7 @@ static void delete_one(struct work *work)
 		goto out;
 	}
 
-	/*
-	 * FIXME: can't use get_ordered_sd_node_list() here since this
-	 * is called in threads and not serialized with cpg_event so
-	 * we can't access to epoch and sd_node_list safely.
-	 */
-	ret = get_ordered_sd_vnode_list(&entries, &nr_vnodes, &nr_zones);
-	if (ret != SD_RES_SUCCESS)
-		goto out;
-
-	ret = read_object(entries, nr_vnodes, nr_zones, dw->epoch,
+	ret = read_object(dw->entries, dw->nr_vnodes, dw->nr_zones, dw->epoch,
 			  vid_to_vdi_oid(vdi_id), (void *)inode, sizeof(*inode),
 			  0, sys->nr_sobjs);
 
@@ -469,13 +478,22 @@ static void delete_one(struct work *work)
 		if (!inode->data_vdi_id[i])
 			continue;
 
-		remove_object(entries, nr_vnodes, nr_zones, dw->epoch,
+		ret = remove_object(dw->entries, dw->nr_vnodes, dw->nr_zones, dw->epoch,
 			      vid_to_data_oid(inode->data_vdi_id[i], i),
 			      inode->nr_copies);
+
+		if (ret != SD_RES_SUCCESS)
+			dw->delete_error = 1;
+		else
+			inode->data_vdi_id[i] = 0;
 	}
 
+	if (dw->delete_error)
+		write_object(dw->entries, dw->nr_vnodes, dw->nr_zones, dw->epoch,
+				vid_to_vdi_oid(vdi_id), (void *)inode, sizeof(*inode),
+				0, 0, inode->nr_copies, 0);
+
 out:
-	free_ordered_sd_vnode_list(entries);
 	free(inode);
 }
 
@@ -489,6 +507,8 @@ static void delete_one_done(struct work *work)
 		return;
 	}
 
+	delete_inode(dw);
+
 	list_del(&dw->dw_siblings);
 
 	free(dw->buf);
@@ -529,7 +549,7 @@ again:
 		goto err;
 	}
 
-	if (inode->name[0] != '\0')
+	if (inode->name[0] != '\0' && vid != dw->vid)
 		goto out;
 
 	for (i = 0; i < ARRAY_SIZE(inode->child_vdi_id); i++) {
@@ -616,6 +636,10 @@ int start_deletion(uint32_t vid, uint32_t epoch)
 	if (ret != SD_RES_SUCCESS)
 		goto err;
 
+	memcpy(dw->entries, entries, nr_vnodes * sizeof(struct sd_vnode));
+	dw->nr_vnodes = nr_vnodes;
+	dw->nr_zones = nr_zones;
+
 	root_vid = get_vdi_root(entries, nr_vnodes, nr_zones, dw->epoch, dw->vid);
 	if (!root_vid) {
 		ret = SD_RES_EIO;
@@ -623,8 +647,12 @@ int start_deletion(uint32_t vid, uint32_t epoch)
 	}
 
 	ret = fill_vdi_list(dw, entries, nr_vnodes, nr_zones, root_vid);
-	if (ret)
+	if (ret) {
+		dprintf("snapshot chain has valid vdi, "
+				"just mark vdi %" PRIx32 " as deleted.\n", dw->vid);
+		delete_inode(dw);
 		return SD_RES_SUCCESS;
+	}
 
 	dprintf("%d\n", dw->count);
 
-- 
1.7.1




More information about the sheepdog mailing list