[Sheepdog] [PATCH v3] sheep: change snapshot/clone flow
Huxinwei
huxinwei at huawei.com
Fri Apr 20 11:25:52 CEST 2012
What if the old code from qemu connects to the latest sheepdog with basic data structure changed ?
Will it destroy our precious data ?
Btw: It seems that we don't have version defined for client-server communication. Is that right ?
> -----Original Message-----
> From: sheepdog-bounces at lists.wpkg.org
> [mailto:sheepdog-bounces at lists.wpkg.org] On Behalf Of 无觉
> Sent: Friday, April 20, 2012 3:08 PM
> To: yaohaiting.wujue at gmail.com; sheepdog at lists.wpkg.org
> Subject: [Sheepdog] 答复: [PATCH v3] sheep: change snapshot/clone flow
>
> If this patch has been merged, the sheepdog driver in qemu project also need
> changed.
>
> I have prepared the patch as below, but not submit to qemu project.
>
> Perhaps I should submit the patch to qemu after the confirmation here.
>
> ---
> block/sheepdog.c | 37 ++++++++++++++++++++-----------------
> 1 files changed, 20 insertions(+), 17 deletions(-)
>
> diff --git a/block/sheepdog.c b/block/sheepdog.c
> index 3eaf625..cec5294 100644
> --- a/block/sheepdog.c
> +++ b/block/sheepdog.c
> @@ -167,7 +167,6 @@ typedef struct SheepdogInode {
> char name[SD_MAX_VDI_LEN];
> char tag[SD_MAX_VDI_TAG_LEN];
> uint64_t ctime;
> - uint64_t snap_ctime;
> uint64_t vm_clock_nsec;
> uint64_t vdi_size;
> uint64_t vm_state_size;
> @@ -177,6 +176,9 @@ typedef struct SheepdogInode {
> uint32_t snap_id;
> uint32_t vdi_id;
> uint32_t parent_vdi_id;
> + uint32_t snap_vdi_id;
> + /* padding is to memory alignment */
> + uint32_t padding;
> uint32_t child_vdi_id[MAX_CHILDREN];
> uint32_t data_vdi_id[MAX_DATA_OBJS];
> } SheepdogInode;
> @@ -203,7 +205,7 @@ static inline uint64_t fnv_64a_buf(void *buf, size_t len,
> uint64_t hval)
>
> static inline int is_data_obj_writable(SheepdogInode *inode, unsigned int idx)
> {
> - return inode->vdi_id == inode->data_vdi_id[idx];
> + return inode->snap_vdi_id == inode->data_vdi_id[idx];
> }
>
> static inline int is_data_obj(uint64_t oid)
> @@ -233,7 +235,7 @@ static inline uint64_t vid_to_data_oid(uint32_t vid,
> uint32_t idx)
>
> static inline int is_snapshot(struct SheepdogInode *inode)
> {
> - return !!inode->snap_ctime;
> + return inode->snap_id;
> }
>
> #undef dprintf
> @@ -694,14 +696,14 @@ static void coroutine_fn aio_read_response(void
> *opaque)
> }
> idx = data_oid_to_idx(aio_req->oid);
>
> - if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) {
> + if (s->inode.data_vdi_id[idx] != s->inode.snap_vdi_id) {
> /*
> * If the object is newly created one, we need to update
> * the vdi object (metadata object). min_dirty_data_idx
> * and max_dirty_data_idx are changed to include updated
> * index between them.
> */
> - s->inode.data_vdi_id[idx] = s->inode.vdi_id;
> + s->inode.data_vdi_id[idx] = s->inode.snap_vdi_id;
> s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx);
> s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx);
>
> @@ -1552,7 +1554,7 @@ static int coroutine_fn sd_co_rw_vector(void *p)
> dprintf("update ino (%" PRIu32") %" PRIu64 " %" PRIu64
> " %" PRIu64 "\n", inode->vdi_id, oid,
> vid_to_data_oid(inode->data_vdi_id[idx], idx), idx);
> - oid = vid_to_data_oid(inode->vdi_id, idx);
> + oid = vid_to_data_oid(inode->snap_vdi_id, idx);
> dprintf("new oid %lx\n", oid);
> }
>
> @@ -1710,7 +1712,6 @@ static int sd_snapshot_create(BlockDriverState *bs,
> QEMUSnapshotInfo *sn_info)
>
> s->inode.vm_state_size = sn_info->vm_state_size;
> s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
> - strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
> /* we don't need to update entire object */
> datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
>
> @@ -1721,14 +1722,6 @@ static int sd_snapshot_create(BlockDriverState
> *bs, QEMUSnapshotInfo *sn_info)
> goto cleanup;
> }
>
> - ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
> - s->inode.nr_copies, datalen, 0, 0,
> s->cache_enabled);
> - if (ret < 0) {
> - error_report("failed to write snapshot's inode.");
> - ret = -EIO;
> - goto cleanup;
> - }
> -
> ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id,
> &new_vid, 1,
> s->addr, s->port);
> if (ret < 0) {
> @@ -1750,6 +1743,16 @@ static int sd_snapshot_create(BlockDriverState
> *bs, QEMUSnapshotInfo *sn_info)
> }
>
> memcpy(&s->inode, inode, datalen);
> + strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
> +
> + ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(new_vid),
> + s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
> + if (ret < 0) {
> + error_report("failed to write snapshot's inode.");
> + ret = -EIO;
> + goto cleanup;
> + }
> +
> dprintf("s->inode: name %s snap_id %x oid %x\n",
> s->inode.name, s->inode.snap_id, s->inode.vdi_id);
>
> @@ -1899,8 +1902,8 @@ static int sd_snapshot_list(BlockDriverState *bs,
> QEMUSnapshotInfo **psn_tab)
> }
>
> if (!strcmp(inode.name, s->name) && is_snapshot(&inode)) {
> - sn_tab[found].date_sec = inode.snap_ctime >> 32;
> - sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff;
> + sn_tab[found].date_sec = inode.ctime >> 32;
> + sn_tab[found].date_nsec = inode.ctime & 0xffffffff;
> sn_tab[found].vm_state_size = inode.vm_state_size;
> sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec;
>
> Thanks
> Haiting
>
> -----邮件原件-----
> 发件人: yaohaiting.wujue at gmail.com [mailto:yaohaiting.wujue at gmail.com]
> 发送时间: 2012年4月20日 14:40
> 收件人: sheepdog at lists.wpkg.org
> 抄送: 无觉
> 主题: [PATCH v3] sheep: change snapshot/clone flow
>
> From: HaiTing Yao <wujue.yht at taobao.com>
>
> When create snapshot for source VDI, the new created VDI used as source
> VDI, and the old source VDI used as snapshot. This flow make users
> confused about VDI and snapshot relation. The snapshot metadata maybe is
> stored on multi-VDI, so need read multi VDIs inode to get snapshot list.
>
> When create snapshot, we does not need change new created VDI to source
> VDI. The source VDI just need use snapshot VDI ID as its object data ID.
>
> Show one example.
>
> Before modification:
>
> Name Id Size Used Shared Creation time VDI id
> Tag
> s v1 1 64 MB 20 MB 0.0 MB 2012-03-26 16:55 709128
> s v1 2 64 MB 0.0 MB 20 MB 2012-03-26 16:56 709129
> sn3
> v1 3 64 MB 0.0 MB 20 MB 2012-03-26 16:56 70912a
>
> After modification:
>
> Name Id Size Used Shared Creation time VDI id
> Tag
> v1 0 64 MB 20 MB 0.0 MB 2012-03-27 11:06 709128
> s v1 1 64 MB 0.0 MB 20 MB 2012-03-27 11:06 709129
> s v1 2 64 MB 0.0 MB 20 MB 2012-03-27 11:07 70912a
> sn3
>
> Signed-off-by: HaiTing Yao <wujue.yht at taobao.com>
> ---
> collie/common.c | 2 +-
> collie/vdi.c | 33 ++++++++++++++++++++++-----------
> include/sheepdog_proto.h | 6 ++++--
> sheep/vdi.c | 18 ++++++++++--------
> 4 files changed, 37 insertions(+), 22 deletions(-)
>
> changes from v2:
>
> 1, Tag display has been merged, so I removed the display from my patch
> 2, Add padding to inode structure, not used packed attribute any more
>
> diff --git a/collie/common.c b/collie/common.c
> index f4301c4..636b821 100644
> --- a/collie/common.c
> +++ b/collie/common.c
> @@ -13,7 +13,7 @@
>
> int is_current(struct sheepdog_inode *i)
> {
> - return !i->snap_ctime;
> + return !i->snap_id;
> }
>
> char *size_to_str(uint64_t _size, char *str, int str_size)
> diff --git a/collie/vdi.c b/collie/vdi.c
> index 352e10c..0962fc2 100644
> --- a/collie/vdi.c
> +++ b/collie/vdi.c
> @@ -93,7 +93,10 @@ static void print_vdi_list(uint32_t vid, char *name, char
> *tag, uint32_t snapid,
> for (idx = 0; idx < MAX_DATA_OBJS; idx++) {
> if (!i->data_vdi_id[idx])
> continue;
> - if (is_data_obj_writeable(i, idx))
> + if (!i->parent_vdi_id)
> + my_objs++;
> + /* for clone VDI */
> + else if ((!i->snap_id) && is_data_obj_writeable(i, idx))
> my_objs++;
> else
> cow_objs++;
> @@ -522,7 +525,7 @@ out:
> static int vdi_snapshot(int argc, char **argv)
> {
> char *vdiname = argv[optind++];
> - uint32_t vid;
> + uint32_t vid, own_vid;
> int ret;
> char buf[SD_INODE_HEADER_SIZE];
> struct sheepdog_inode *inode = (struct sheepdog_inode *)buf;
> @@ -539,20 +542,26 @@ static int vdi_snapshot(int argc, char **argv)
> return EXIT_FAILURE;
> }
>
> - ret = sd_read_object(vid_to_vdi_oid(vid), inode,
> SD_INODE_HEADER_SIZE, 0);
> - if (ret != SD_RES_SUCCESS) {
> - fprintf(stderr, "Failed to read an inode header\n");
> + ret = do_vdi_create(vdiname, inode->vdi_size, vid, &own_vid, 1);
> +
> + if (ret < 0) {
> + fprintf(stderr, "Failed to write VDI %s\n", vdiname);
> return EXIT_FAILURE;
> }
>
> if (vdi_cmd_data.snapshot_tag[0]) {
> - ret = sd_write_object(vid_to_vdi_oid(vid), 0,
> vdi_cmd_data.snapshot_tag,
> + ret = sd_read_object(vid_to_vdi_oid(own_vid), inode,
> SD_INODE_HEADER_SIZE, 0);
> + if (ret != SD_RES_SUCCESS) {
> + fprintf(stderr, "Failed to read an inode
> header\n");
> + return EXIT_FAILURE;
> + }
> + ret = sd_write_object(vid_to_vdi_oid(own_vid), 0,
> vdi_cmd_data.snapshot_tag,
> SD_MAX_VDI_TAG_LEN,
> offsetof(struct sheepdog_inode,
> tag),
> 0, inode->nr_copies, 0);
> }
>
> - return do_vdi_create(vdiname, inode->vdi_size, vid, NULL, 1);
> + return ret;
> }
>
> static int vdi_clone(int argc, char **argv)
> @@ -1140,6 +1149,7 @@ static int vdi_read(int argc, char **argv)
> goto out;
> }
>
> +
> if (inode->vdi_size < offset) {
> fprintf(stderr, "Read offset is beyond the end of the
> VDI\n");
> ret = EXIT_FAILURE;
> @@ -1284,7 +1294,7 @@ static int vdi_write(int argc, char **argv)
> remain -= ret;
> }
>
> - inode->data_vdi_id[idx] = inode->vdi_id;
> + inode->data_vdi_id[idx] = inode->snap_vdi_id;
> oid = vid_to_data_oid(inode->data_vdi_id[idx], idx);
> ret = sd_write_object(oid, old_oid, buf, len, offset, flags,
> inode->nr_copies, create);
> @@ -1295,9 +1305,10 @@ static int vdi_write(int argc, char **argv)
> }
>
> if (create) {
> - ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid,
> sizeof(vid),
> -
> SD_INODE_HEADER_SIZE + sizeof(vid) * idx, 0,
> - inode->nr_copies, 0);
> + /* snap_vdi_id is equal to vdi_id when no
> snapshot */
> + ret = sd_write_object(vid_to_vdi_oid(vid), 0,
> &inode->snap_vdi_id,
> + sizeof(inode->snap_vdi_id),
> SD_INODE_HEADER_SIZE + sizeof(vid) * idx,
> + 0, inode->nr_copies, 0);
> if (ret) {
> ret = EXIT_FAILURE;
> goto out;
> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
> index 11c2c7c..8691901 100644
> --- a/include/sheepdog_proto.h
> +++ b/include/sheepdog_proto.h
> @@ -179,7 +179,6 @@ struct sheepdog_inode {
> char name[SD_MAX_VDI_LEN];
> char tag[SD_MAX_VDI_TAG_LEN];
> uint64_t ctime;
> - uint64_t snap_ctime;
> uint64_t vm_clock_nsec;
> uint64_t vdi_size;
> uint64_t vm_state_size;
> @@ -189,6 +188,9 @@ struct sheepdog_inode {
> uint32_t snap_id;
> uint32_t vdi_id;
> uint32_t parent_vdi_id;
> + uint32_t snap_vdi_id;
> + /* padding is to memory alignment */
> + uint32_t padding;
> uint32_t child_vdi_id[MAX_CHILDREN];
> uint32_t data_vdi_id[MAX_DATA_OBJS];
> };
> @@ -240,7 +242,7 @@ static inline uint64_t hash_64(uint64_t val, unsigned
> int bits)
>
> static inline int is_data_obj_writeable(struct sheepdog_inode *inode, int idx)
> {
> - return inode->vdi_id == inode->data_vdi_id[idx];
> + return inode->snap_vdi_id == inode->data_vdi_id[idx];
> }
>
> static inline int is_vdi_obj(uint64_t oid)
> diff --git a/sheep/vdi.c b/sheep/vdi.c
> index 71912ba..81f1a66 100644
> --- a/sheep/vdi.c
> +++ b/sheep/vdi.c
> @@ -82,10 +82,10 @@ static int create_vdi_obj(uint32_t epoch, char *name,
> uint32_t new_vid, uint64_t
> ret = SD_RES_BASE_VDI_READ;
> goto out;
> }
> -
> - cur->snap_ctime = (uint64_t) tv.tv_sec << 32 |
> tv.tv_usec * 1000;
> - } else
> - base->snap_ctime = (uint64_t) tv.tv_sec << 32 |
> tv.tv_usec * 1000;
> + } else {
> + base->snap_vdi_id = new_vid;
> + size = base->vdi_size;
> + }
> }
>
> strncpy(new->name, name, sizeof(new->name));
> @@ -96,6 +96,7 @@ static int create_vdi_obj(uint32_t epoch, char *name,
> uint32_t new_vid, uint64_t
> new->nr_copies = copies;
> new->block_size_shift = find_next_bit(&block_size, BITS_PER_LONG,
> 0);
> new->snap_id = snapid;
> + new->snap_vdi_id = new_vid;
>
> if (base_vid) {
> int i;
> @@ -192,14 +193,15 @@ static int find_first_vdi(uint32_t epoch, unsigned
> long start, unsigned long end
> }
>
> if (!strncmp(inode->name, name, strlen(inode->name))) {
> + if (!*next_snap)
> + *next_snap = inode->snap_id + 1;
> vdi_found = 1;
> if (tag && tag[0] &&
> strncmp(inode->tag, tag,
> sizeof(inode->tag)) != 0)
> continue;
> - if (snapid && snapid != inode->snap_id)
> + if (snapid != inode->snap_id)
> continue;
>
> - *next_snap = inode->snap_id + 1;
> *vid = inode->vdi_id;
> *nr_copies = inode->nr_copies;
> if (ctime)
> @@ -280,7 +282,7 @@ int add_vdi(uint32_t epoch, char *data, int data_len,
> uint64_t size,
> int is_snapshot, unsigned int *nr_copies)
> {
> uint32_t cur_vid = 0;
> - uint32_t next_snapid;
> + uint32_t next_snapid = 0;
> unsigned long nr, deleted_nr = SD_NR_VDIS, right_nr = SD_NR_VDIS;
> int ret;
> char *name;
> @@ -313,7 +315,7 @@ int add_vdi(uint32_t epoch, char *data, int data_len,
> uint64_t size,
> else
> nr = deleted_nr; /* we can recycle a deleted VDI
> */
>
> - next_snapid = 1;
> + next_snapid = 0;
> }
>
> *new_vid = nr;
> --
> 1.7.1
>
>
> ________________________________
>
> This email (including any attachments) is confidential and may be legally
> privileged. If you received this email in error, please delete it immediately and
> do not copy it or use it for any purpose or disclose its contents to any other
> person. Thank you.
>
> 本电邮(包括任何附件)可能含有机密资料并受法律保护。如您不是正确的收
> 件人,请您立即删除本邮件。请不要将本电邮进行复制并用作任何其他用
> 途、或透露本邮件之内容。谢谢。
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog
More information about the sheepdog
mailing list