From: levin li <xingke.lwp at taobao.com> Currently, when deleting a vdi, sheep firstly clears the name in the vdi inode to mark it as deleted, then try to delete the data objects, there's a problem, if deleting one or more data objects fails, we could never delete the object any more, because the inode has been deleted. Now I exchange the order of deleting inode and data objects, if deleting some data object successes, we clear the the correlative flag in inode->data_vdi_id[], orelse we set the dw->delete_error to tell that an error occurs in the deletion work. In delete_one_done(), if dw->delete_error is true, we set inode->vdi_size to 0 to show this vdi has been deleted, but fail to delete some of its objects, then we can try to delete the vdi again. Signed-off-by: levin li <xingke.lwp at taobao.com> --- sheep/vdi.c | 114 ++++++++++++++++++++++++++++++++++++---------------------- 1 files changed, 71 insertions(+), 43 deletions(-) diff --git a/sheep/vdi.c b/sheep/vdi.c index 71912ba..d2a522d 100644 --- a/sheep/vdi.c +++ b/sheep/vdi.c @@ -346,7 +346,6 @@ int del_vdi(uint32_t epoch, char *data, int data_len, uint32_t *vid, unsigned long dummy1, dummy2; int ret; struct sd_vnode *entries = NULL; - int nr_vnodes, nr_zones, nr_reqs; struct sheepdog_inode *inode = NULL; inode = malloc(SD_INODE_HEADER_SIZE); @@ -370,32 +369,6 @@ int del_vdi(uint32_t epoch, char *data, int data_len, uint32_t *vid, if (ret != SD_RES_SUCCESS) goto out; - ret = get_ordered_sd_vnode_list(&entries, &nr_vnodes, &nr_zones); - if (ret != SD_RES_SUCCESS) - goto out; - - nr_reqs = sys->nr_sobjs; - if (nr_reqs > nr_zones) - nr_reqs = nr_zones; - - ret = read_object(entries, nr_vnodes, nr_zones, epoch, - vid_to_vdi_oid(*vid), (char *)inode, - SD_INODE_HEADER_SIZE, 0, nr_reqs); - if (ret != SD_RES_SUCCESS) { - ret = SD_RES_EIO; - goto out; - } - - memset(inode->name, 0, sizeof(inode->name)); - - ret = write_object(entries, nr_vnodes, nr_zones, epoch, - vid_to_vdi_oid(*vid), (char *)inode, - SD_INODE_HEADER_SIZE, 0, 0, nr_reqs, 0); - if (ret != 0) { - ret = SD_RES_EIO; - goto out; - } - ret = start_deletion(*vid, epoch); out: free(inode); @@ -426,16 +399,61 @@ struct deletion_work { int count; uint32_t *buf; + + struct sd_vnode entries[SD_MAX_VNODES]; + int nr_vnodes; + int nr_zones; + int delete_error; }; static LIST_HEAD(deletion_work_list); +static int delete_inode(struct deletion_work *dw) +{ + int nr_reqs, ret = SD_RES_SUCCESS; + struct sheepdog_inode *inode = NULL; + + inode = zalloc(sizeof(*inode)); + if (!inode) { + eprintf("no memory to allocate inode.\n"); + goto out; + } + + nr_reqs = sys->nr_sobjs; + if (nr_reqs > dw->nr_zones) + nr_reqs = dw->nr_zones; + + ret = read_object(dw->entries, dw->nr_vnodes, dw->nr_zones, dw->epoch, + vid_to_vdi_oid(dw->vid), (char *)inode, + SD_INODE_HEADER_SIZE, 0, nr_reqs); + if (ret != SD_RES_SUCCESS) { + ret = SD_RES_EIO; + goto out; + } + + if (dw->delete_error) + inode->vdi_size = 0; + else + memset(inode->name, 0, sizeof(inode->name)); + + ret = write_object(dw->entries, dw->nr_vnodes, dw->nr_zones, dw->epoch, + vid_to_vdi_oid(dw->vid), (char *)inode, + SD_INODE_HEADER_SIZE, 0, 0, nr_reqs, 0); + if (ret != 0) { + ret = SD_RES_EIO; + goto out; + } + +out: + free(inode); + return ret; +} + + static void delete_one(struct work *work) { struct deletion_work *dw = container_of(work, struct deletion_work, work); uint32_t vdi_id = *(dw->buf + dw->count - dw->done - 1); - struct sd_vnode *entries = NULL; - int nr_vnodes, nr_zones; int ret, i; struct sheepdog_inode *inode = NULL; @@ -447,16 +465,7 @@ static void delete_one(struct work *work) goto out; } - /* - * FIXME: can't use get_ordered_sd_node_list() here since this - * is called in threads and not serialized with cpg_event so - * we can't access to epoch and sd_node_list safely. - */ - ret = get_ordered_sd_vnode_list(&entries, &nr_vnodes, &nr_zones); - if (ret != SD_RES_SUCCESS) - goto out; - - ret = read_object(entries, nr_vnodes, nr_zones, dw->epoch, + ret = read_object(dw->entries, dw->nr_vnodes, dw->nr_zones, dw->epoch, vid_to_vdi_oid(vdi_id), (void *)inode, sizeof(*inode), 0, sys->nr_sobjs); @@ -469,13 +478,22 @@ static void delete_one(struct work *work) if (!inode->data_vdi_id[i]) continue; - remove_object(entries, nr_vnodes, nr_zones, dw->epoch, + ret = remove_object(dw->entries, dw->nr_vnodes, dw->nr_zones, dw->epoch, vid_to_data_oid(inode->data_vdi_id[i], i), inode->nr_copies); + + if (ret != SD_RES_SUCCESS) + dw->delete_error = 1; + else + inode->data_vdi_id[i] = 0; } + if (dw->delete_error) + write_object(dw->entries, dw->nr_vnodes, dw->nr_zones, dw->epoch, + vid_to_vdi_oid(vdi_id), (void *)inode, sizeof(*inode), + 0, 0, inode->nr_copies, 0); + out: - free_ordered_sd_vnode_list(entries); free(inode); } @@ -489,6 +507,8 @@ static void delete_one_done(struct work *work) return; } + delete_inode(dw); + list_del(&dw->dw_siblings); free(dw->buf); @@ -529,7 +549,7 @@ again: goto err; } - if (inode->name[0] != '\0') + if (inode->name[0] != '\0' && vid != dw->vid) goto out; for (i = 0; i < ARRAY_SIZE(inode->child_vdi_id); i++) { @@ -616,6 +636,10 @@ int start_deletion(uint32_t vid, uint32_t epoch) if (ret != SD_RES_SUCCESS) goto err; + memcpy(dw->entries, entries, nr_vnodes * sizeof(struct sd_vnode)); + dw->nr_vnodes = nr_vnodes; + dw->nr_zones = nr_zones; + root_vid = get_vdi_root(entries, nr_vnodes, nr_zones, dw->epoch, dw->vid); if (!root_vid) { ret = SD_RES_EIO; @@ -623,8 +647,12 @@ int start_deletion(uint32_t vid, uint32_t epoch) } ret = fill_vdi_list(dw, entries, nr_vnodes, nr_zones, root_vid); - if (ret) + if (ret) { + dprintf("snapshot chain has valid vdi, " + "just mark vdi %" PRIx32 " as deleted.\n", dw->vid); + delete_inode(dw); return SD_RES_SUCCESS; + } dprintf("%d\n", dw->count); -- 1.7.1 |