[sheepdog] 答复: [PATCH v2] sheep: cleaning vdi deletion process of sheepdaemon
Hitoshi Mitake
mitake.hitoshi at gmail.com
Fri Dec 27 19:54:55 CET 2013
At Fri, 27 Dec 2013 22:28:25 +0800,
redtone wrote:
>
> Thanks
>
> Many vdi disks data is lost, after I issue a command to delete one vdi
> disk's snapshot.
>
> I copy the data in .stale directory to restore the vdi disks
>
> I think this is the solution.
>
> Could you please release the patch to stable-0.7 version?
Yes, I'll backport this patch to stable-0.7.
BTW, could you post a way of reproducing the above problem? It is very
serious. And I didn't know that.
Thanks,
Hitoshi
>
>
> -----邮件原件-----
> 发件人: sheepdog-bounces at lists.wpkg.org [mailto:sheepdog-bounces at lists.wpkg.
> org] 代表 Hitoshi Mitake
> 发送时间: 2013年12月27日 15:54
> 收件人: sheepdog at lists.wpkg.org
> 抄送: Hitoshi Mitake
> 主题: [sheepdog] [PATCH v2] sheep: cleaning vdi deletion process of
> sheepdaemon
>
> Current code of vdi deletion process in sheep deamon is a real
> disaster. Especially, its irregular usage of work queue is terrible
> thing for maintenance. In addition, the vdi deletion process can
> potentially notify a completion of deletion to a dog process before
> actual completion.
>
> This patch cleans the VDI deletion code and removes the above bug.
>
> Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
> ---
>
> v2: change an comparison operator
>
> sheep/vdi.c | 301
> ++++++++++++++++++++++++++++++++++-------------------------
> 1 file changed, 176 insertions(+), 125 deletions(-)
>
> diff --git a/sheep/vdi.c b/sheep/vdi.c
> index f8b6bac..ff35e92 100644
> --- a/sheep/vdi.c
> +++ b/sheep/vdi.c
> @@ -754,22 +754,6 @@ int vdi_snapshot(const struct vdi_iocb *iocb, uint32_t
> *new_vid)
> info.vid);
> }
>
> -static int start_deletion(struct request *req, uint32_t vid);
> -
> -int vdi_delete(const struct vdi_iocb *iocb, struct request *req)
> -{
> - struct vdi_info info;
> - int ret;
> -
> - ret = vdi_lookup(iocb, &info);
> - if (ret != SD_RES_SUCCESS)
> - goto out;
> -
> - ret = start_deletion(req, info.vid);
> -out:
> - return ret;
> -}
> -
> int read_vdis(char *data, int len, unsigned int *rsp_len)
> {
> if (len != sizeof(sys->vdi_inuse))
> @@ -781,28 +765,35 @@ int read_vdis(char *data, int len, unsigned int
> *rsp_len)
> return SD_RES_SUCCESS;
> }
>
> -struct deletion_work {
> - uint32_t done;
> +struct deletion_info {
> + uint32_t target_vid;
> +
> + int delete_vid_count;
> + uint32_t *delete_vid_array;
> +
> + int done_deletion;
>
> + int finish_fd;
> + struct list_node list; /* connected deletion_info_queue */
> +};
> +
> +struct deletion_work {
> struct work work;
> - struct list_node list;
> - struct request *req;
>
> uint32_t vid;
> -
> - int count;
> - uint32_t *buf;
> + struct deletion_info *di;
> };
>
> -static LIST_HEAD(deletion_work_list);
> +static pthread_mutex_t deletion_info_queue_lock =
> PTHREAD_MUTEX_INITIALIZER;
> +static LIST_HEAD(deletion_info_queue);
>
> -static int delete_inode(struct deletion_work *dw)
> +static int delete_inode(uint32_t vid)
> {
> struct sd_inode *inode = NULL;
> int ret = SD_RES_SUCCESS;
>
> inode = xzalloc(sizeof(*inode));
> - ret = sd_read_object(vid_to_vdi_oid(dw->vid), (char *)inode,
> + ret = sd_read_object(vid_to_vdi_oid(vid), (char *)inode,
> SD_INODE_HEADER_SIZE, 0);
> if (ret != SD_RES_SUCCESS) {
> ret = SD_RES_EIO;
> @@ -811,7 +802,7 @@ static int delete_inode(struct deletion_work *dw)
>
> memset(inode->name, 0, sizeof(inode->name));
>
> - ret = sd_write_object(vid_to_vdi_oid(dw->vid), (char *)inode,
> + ret = sd_write_object(vid_to_vdi_oid(vid), (char *)inode,
> SD_INODE_HEADER_SIZE, 0, false);
> if (ret != 0) {
> ret = SD_RES_EIO;
> @@ -871,16 +862,15 @@ static void delete_cb(void *data, enum btree_node_type
> type, void *arg)
> }
> }
>
> -static void delete_one(struct work *work)
> +static void delete_one_vdi(struct work *work)
> {
> - struct deletion_work *dw = container_of(work, struct deletion_work,
> work);
> - uint32_t vdi_id = *(dw->buf + dw->count - dw->done - 1);
> + struct deletion_work *dw =
> + container_of(work, struct deletion_work, work);
> + uint32_t vdi_id = dw->vid;
> int ret;
> uint32_t i, nr_deleted, nr_objs;
> struct sd_inode *inode = NULL;
>
> - sd_debug("%d %d, %16x", dw->done, dw->count, vdi_id);
> -
> inode = malloc(sizeof(*inode));
> if (!inode) {
> sd_err("failed to allocate memory");
> @@ -942,74 +932,111 @@ out:
> free(inode);
> }
>
> -static void delete_one_done(struct work *work)
> +static void delete_one_vdi_done(struct work *work);
> +
> +static struct deletion_work *create_next_deletion_work(struct deletion_info
> *di)
> {
> - struct deletion_work *dw = container_of(work, struct deletion_work,
> work);
> - struct request *req = dw->req;
> + struct deletion_work *dw = NULL;
>
> - dw->done++;
> - if (dw->done < dw->count) {
> - queue_work(sys->deletion_wqueue, &dw->work);
> - return;
> + if (di->done_deletion >= di->delete_vid_count)
> + /* no more deletion work, this deletion info is completed */
> + return NULL;
> +
> + sd_debug("done_deletion: %d, delete_vid_count: %d, next vid:
> %"PRIu32,
> + di->done_deletion, di->delete_vid_count,
> + di->delete_vid_array[di->done_deletion + 1]);
> +
> + dw = xzalloc(sizeof(*dw));
> + dw->work.fn = delete_one_vdi;
> + dw->work.done = delete_one_vdi_done;
> + dw->vid = di->delete_vid_array[di->done_deletion++];
> +
> + dw->di = di;
> +
> + return dw;
> +}
> +
> +static void delete_one_vdi_done(struct work *work)
> +{
> + struct deletion_work *dw =
> + container_of(work, struct deletion_work, work);
> + struct deletion_work *next_dw;
> + struct deletion_info *di = dw->di;
> +
> + next_dw = create_next_deletion_work(di);
> + if (next_dw) {
> + sd_debug("next vid of deletion: %"PRIx32, next_dw->vid);
> + queue_work(sys->deletion_wqueue, &next_dw->work);
> +
> + goto out;
> }
>
> - list_del(&dw->list);
> + eventfd_xwrite(di->finish_fd, 1);
>
> - put_request(req);
> + /* the deletion info is completed */
> + free(di->delete_vid_array);
> + free(di);
>
> - free(dw->buf);
> - free(dw);
> + pthread_mutex_lock(&deletion_info_queue_lock);
>
> - if (!list_empty(&deletion_work_list)) {
> - dw = list_first_entry(&deletion_work_list,
> - struct deletion_work, list);
> + if (!list_empty(&deletion_info_queue)) {
> + di = list_first_entry(&deletion_info_queue,
> + struct deletion_info, list);
> + list_del(&di->list);
>
> - queue_work(sys->deletion_wqueue, &dw->work);
> + next_dw = create_next_deletion_work(di);
> + assert(next_dw);
> + sd_debug("next vid of deletion: %"PRIx32, next_dw->vid);
> + queue_work(sys->deletion_wqueue, &next_dw->work);
> }
> +
> + pthread_mutex_unlock(&deletion_info_queue_lock);
> +
> +out:
> + free(dw);
> }
>
> -static int fill_vdi_list(struct deletion_work *dw, uint32_t root_vid)
> +static int fill_delete_vid_array(struct deletion_info *di, uint32_t
> root_vid)
> {
> - int ret, i;
> + int ret = 0;
> struct sd_inode *inode = NULL;
> - int done = dw->count;
> + int done = di->delete_vid_count;
> uint32_t vid;
>
> inode = malloc(SD_INODE_HEADER_SIZE);
> if (!inode) {
> sd_err("failed to allocate memory");
> - goto err;
> + return -1;
> }
>
> - dw->buf[dw->count++] = root_vid;
> -again:
> - vid = dw->buf[done++];
> - ret = read_backend_object(vid_to_vdi_oid(vid), (char *)inode,
> - SD_INODE_HEADER_SIZE, 0);
> + di->delete_vid_array[di->delete_vid_count++] = root_vid;
>
> - if (ret != SD_RES_SUCCESS) {
> - sd_err("cannot find VDI object");
> - goto err;
> - }
> + do {
> + vid = di->delete_vid_array[done++];
> + ret = read_backend_object(vid_to_vdi_oid(vid), (char
> *)inode,
> + SD_INODE_HEADER_SIZE, 0);
> + if (ret != SD_RES_SUCCESS) {
> + sd_err("cannot find VDI object");
> + ret = -1;
> + break;
> + }
>
> - if (!vdi_is_deleted(inode) && vid != dw->vid)
> - goto out;
> + if (!vdi_is_deleted(inode) && vid != di->target_vid) {
> + ret = 1;
> + break;
> + }
>
> - for (i = 0; i < ARRAY_SIZE(inode->child_vdi_id); i++) {
> - if (!inode->child_vdi_id[i])
> - continue;
> + for (int i = 0; i < ARRAY_SIZE(inode->child_vdi_id); i++) {
> + if (!inode->child_vdi_id[i])
> + continue;
>
> - dw->buf[dw->count++] = inode->child_vdi_id[i];
> - }
> + di->delete_vid_array[di->delete_vid_count++] =
> + inode->child_vdi_id[i];
> + }
> + } while (di->delete_vid_array[done]);
>
> - if (dw->buf[done])
> - goto again;
> -err:
> - free(inode);
> - return 0;
> -out:
> free(inode);
> - return 1;
> + return ret;
> }
>
> static uint64_t get_vdi_root(uint32_t vid, bool *cloned)
> @@ -1022,100 +1049,124 @@ static uint64_t get_vdi_root(uint32_t vid, bool
> *cloned)
> inode = malloc(SD_INODE_HEADER_SIZE);
> if (!inode) {
> sd_err("failed to allocate memory");
> - vid = 0;
> - goto out;
> - }
> -next:
> - ret = read_backend_object(vid_to_vdi_oid(vid), (char *)inode,
> - SD_INODE_HEADER_SIZE, 0);
> -
> - if (vid == inode->vdi_id && inode->snap_id == 1
> - && inode->parent_vdi_id != 0
> - && !inode->snap_ctime) {
> - sd_debug("vdi %" PRIx32 " is a cloned vdi.", vid);
> - /* current vdi is a cloned vdi */
> - *cloned = true;
> + return 0;
> }
>
> - if (ret != SD_RES_SUCCESS) {
> - sd_err("cannot find VDI object");
> - vid = 0;
> - goto out;
> - }
> + do {
> + ret = read_backend_object(vid_to_vdi_oid(vid), (char
> *)inode,
> + SD_INODE_HEADER_SIZE, 0);
> + if (ret != SD_RES_SUCCESS) {
> + sd_err("cannot find VDI object");
> + vid = 0;
> + break;
> + }
>
> - if (!inode->parent_vdi_id)
> - goto out;
> + if (vid == inode->vdi_id && inode->snap_id == 1
> + && inode->parent_vdi_id != 0 && !inode->snap_ctime) {
> + sd_debug("vdi %" PRIx32 " is a cloned vdi.", vid);
> + /* current vdi is a cloned vdi */
> + *cloned = true;
> + }
>
> - vid = inode->parent_vdi_id;
> + if (!inode->parent_vdi_id)
> + break;
>
> - goto next;
> -out:
> - free(inode);
> + vid = inode->parent_vdi_id;
> + } while (true);
>
> + free(inode);
> return vid;
> }
>
> static int start_deletion(struct request *req, uint32_t vid)
> {
> + struct deletion_info *di = NULL;
> struct deletion_work *dw = NULL;
> - int ret = SD_RES_NO_MEM;
> + int ret = SD_RES_SUCCESS;
> bool cloned;
> uint32_t root_vid;
>
> - dw = xzalloc(sizeof(struct deletion_work));
> - /* buf is to store vdi id of every object */
> - dw->buf = xzalloc(SD_INODE_SIZE - SD_INODE_HEADER_SIZE);
> - dw->count = 0;
> - dw->vid = vid;
> - dw->req = req;
> -
> - dw->work.fn = delete_one;
> - dw->work.done = delete_one_done;
> + di = xzalloc(sizeof(*di));
> + di->delete_vid_array = xzalloc(SD_INODE_SIZE -
> SD_INODE_HEADER_SIZE);
> + di->done_deletion = di->delete_vid_count = 0;
> + di->target_vid = vid;
> + di->finish_fd = eventfd(0, 0);
> + if (di->finish_fd < 0) {
> + sd_err("cannot create an eventfd for notifying finish of"
> + " deletion info: %m");
> + goto out;
> + }
> + INIT_LIST_NODE(&di->list);
>
> - root_vid = get_vdi_root(dw->vid, &cloned);
> + root_vid = get_vdi_root(di->target_vid, &cloned);
> if (!root_vid) {
> ret = SD_RES_EIO;
> goto out;
> }
>
> - ret = fill_vdi_list(dw, root_vid);
> - if (ret) {
> + ret = fill_delete_vid_array(di, root_vid);
> + if (ret < 0) {
> + ret = SD_RES_EIO;
> + goto out;
> + } else if (ret == 1) {
> /*
> * if the VDI is a cloned VDI, delete its objects
> * no matter whether the VDI tree is clear.
> */
> if (cloned) {
> - dw->buf[0] = vid;
> - dw->count = 1;
> + di->delete_vid_array[0] = vid;
> + di->delete_vid_count = 1;
> } else {
> sd_debug("snapshot chain has valid vdi, just mark
> vdi %"
> - PRIx32 " as deleted.", dw->vid);
> - delete_inode(dw);
> + PRIx32 " as deleted.", di->target_vid);
> + delete_inode(di->target_vid);
> ret = SD_RES_SUCCESS;
> goto out;
> }
> }
>
> - sd_debug("%d", dw->count);
> + sd_debug("number of VDI deletion: %d", di->delete_vid_count);
>
> - ret = SD_RES_SUCCESS;
> - if (dw->count == 0)
> + if (di->delete_vid_count == 0)
> goto out;
>
> - refcount_inc(&req->refcnt);
> + pthread_mutex_lock(&deletion_info_queue_lock);
>
> - if (list_empty(&deletion_work_list)) {
> - list_add_tail(&dw->list, &deletion_work_list);
> + if (list_empty(&deletion_info_queue)) {
> + dw = create_next_deletion_work(di);
> + assert(dw);
> queue_work(sys->deletion_wqueue, &dw->work);
> } else
> - list_add_tail(&dw->list, &deletion_work_list);
> + list_add_tail(&di->list, &deletion_info_queue);
> +
> + pthread_mutex_unlock(&deletion_info_queue_lock);
> +
> + /*
> + * the event fd is written by delete_one_vdi_done(), when all vdis
> of
> + * deletion_info are deleted
> + */
> + eventfd_xread(di->finish_fd);
>
> return ret;
> out:
> - if (dw)
> - free(dw->buf);
> - free(dw);
> + if (di)
> + free(di->delete_vid_array);
> + free(di);
> +
> + return ret;
> +}
>
> +int vdi_delete(const struct vdi_iocb *iocb, struct request *req)
> +{
> + struct vdi_info info;
> + int ret;
> +
> + ret = vdi_lookup(iocb, &info);
> + if (ret != SD_RES_SUCCESS)
> + goto out;
> +
> + ret = start_deletion(req, info.vid);
> +out:
> return ret;
> }
>
> --
> 1.7.10.4
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog
More information about the sheepdog
mailing list