[Sheepdog] [PATCH v3] sheep: change snapshot/clone flow

Huxinwei huxinwei at huawei.com
Fri Apr 20 11:25:52 CEST 2012


What if the old code from qemu connects to the latest sheepdog with basic data structure changed ?
Will it destroy our precious data ?

Btw: It seems that we don't have version defined for client-server communication. Is that right ?

> -----Original Message-----
> From: sheepdog-bounces at lists.wpkg.org
> [mailto:sheepdog-bounces at lists.wpkg.org] On Behalf Of 无觉
> Sent: Friday, April 20, 2012 3:08 PM
> To: yaohaiting.wujue at gmail.com; sheepdog at lists.wpkg.org
> Subject: [Sheepdog] 答复: [PATCH v3] sheep: change snapshot/clone flow
> 
> If this patch has been merged, the sheepdog driver in qemu project also need
> changed.
> 
> I have prepared the patch as below, but not submit to qemu project.
> 
> Perhaps I should submit the patch to qemu after the confirmation here.
> 
> ---
>  block/sheepdog.c |   37 ++++++++++++++++++++-----------------
>  1 files changed, 20 insertions(+), 17 deletions(-)
> 
> diff --git a/block/sheepdog.c b/block/sheepdog.c
> index 3eaf625..cec5294 100644
> --- a/block/sheepdog.c
> +++ b/block/sheepdog.c
> @@ -167,7 +167,6 @@ typedef struct SheepdogInode {
>      char name[SD_MAX_VDI_LEN];
>      char tag[SD_MAX_VDI_TAG_LEN];
>      uint64_t ctime;
> -    uint64_t snap_ctime;
>      uint64_t vm_clock_nsec;
>      uint64_t vdi_size;
>      uint64_t vm_state_size;
> @@ -177,6 +176,9 @@ typedef struct SheepdogInode {
>      uint32_t snap_id;
>      uint32_t vdi_id;
>      uint32_t parent_vdi_id;
> +    uint32_t snap_vdi_id;
> +    /* padding is to memory alignment */
> +    uint32_t padding;
>      uint32_t child_vdi_id[MAX_CHILDREN];
>      uint32_t data_vdi_id[MAX_DATA_OBJS];
>  } SheepdogInode;
> @@ -203,7 +205,7 @@ static inline uint64_t fnv_64a_buf(void *buf, size_t len,
> uint64_t hval)
> 
>  static inline int is_data_obj_writable(SheepdogInode *inode, unsigned int idx)
>  {
> -    return inode->vdi_id == inode->data_vdi_id[idx];
> +    return inode->snap_vdi_id == inode->data_vdi_id[idx];
>  }
> 
>  static inline int is_data_obj(uint64_t oid)
> @@ -233,7 +235,7 @@ static inline uint64_t vid_to_data_oid(uint32_t vid,
> uint32_t idx)
> 
>  static inline int is_snapshot(struct SheepdogInode *inode)
>  {
> -    return !!inode->snap_ctime;
> +    return inode->snap_id;
>  }
> 
>  #undef dprintf
> @@ -694,14 +696,14 @@ static void coroutine_fn aio_read_response(void
> *opaque)
>          }
>          idx = data_oid_to_idx(aio_req->oid);
> 
> -        if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) {
> +        if (s->inode.data_vdi_id[idx] != s->inode.snap_vdi_id) {
>              /*
>               * If the object is newly created one, we need to update
>               * the vdi object (metadata object).  min_dirty_data_idx
>               * and max_dirty_data_idx are changed to include updated
>               * index between them.
>               */
> -            s->inode.data_vdi_id[idx] = s->inode.vdi_id;
> +            s->inode.data_vdi_id[idx] = s->inode.snap_vdi_id;
>              s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx);
>              s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx);
> 
> @@ -1552,7 +1554,7 @@ static int coroutine_fn sd_co_rw_vector(void *p)
>              dprintf("update ino (%" PRIu32") %" PRIu64 " %" PRIu64
>                      " %" PRIu64 "\n", inode->vdi_id, oid,
>                      vid_to_data_oid(inode->data_vdi_id[idx], idx), idx);
> -            oid = vid_to_data_oid(inode->vdi_id, idx);
> +            oid = vid_to_data_oid(inode->snap_vdi_id, idx);
>              dprintf("new oid %lx\n", oid);
>          }
> 
> @@ -1710,7 +1712,6 @@ static int sd_snapshot_create(BlockDriverState *bs,
> QEMUSnapshotInfo *sn_info)
> 
>      s->inode.vm_state_size = sn_info->vm_state_size;
>      s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
> -    strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
>      /* we don't need to update entire object */
>      datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
> 
> @@ -1721,14 +1722,6 @@ static int sd_snapshot_create(BlockDriverState
> *bs, QEMUSnapshotInfo *sn_info)
>          goto cleanup;
>      }
> 
> -    ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
> -                       s->inode.nr_copies, datalen, 0, 0,
> s->cache_enabled);
> -    if (ret < 0) {
> -        error_report("failed to write snapshot's inode.");
> -        ret = -EIO;
> -        goto cleanup;
> -    }
> -
>      ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id,
> &new_vid, 1,
>                         s->addr, s->port);
>      if (ret < 0) {
> @@ -1750,6 +1743,16 @@ static int sd_snapshot_create(BlockDriverState
> *bs, QEMUSnapshotInfo *sn_info)
>      }
> 
>      memcpy(&s->inode, inode, datalen);
> +    strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
> +
> +    ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(new_vid),
> +        s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
> +    if (ret < 0) {
> +        error_report("failed to write snapshot's inode.");
> +        ret = -EIO;
> +        goto cleanup;
> +    }
> +
>      dprintf("s->inode: name %s snap_id %x oid %x\n",
>              s->inode.name, s->inode.snap_id, s->inode.vdi_id);
> 
> @@ -1899,8 +1902,8 @@ static int sd_snapshot_list(BlockDriverState *bs,
> QEMUSnapshotInfo **psn_tab)
>          }
> 
>          if (!strcmp(inode.name, s->name) && is_snapshot(&inode)) {
> -            sn_tab[found].date_sec = inode.snap_ctime >> 32;
> -            sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff;
> +            sn_tab[found].date_sec = inode.ctime >> 32;
> +            sn_tab[found].date_nsec = inode.ctime & 0xffffffff;
>              sn_tab[found].vm_state_size = inode.vm_state_size;
>              sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec;
> 
> Thanks
> Haiting
> 
> -----邮件原件-----
> 发件人: yaohaiting.wujue at gmail.com [mailto:yaohaiting.wujue at gmail.com]
> 发送时间: 2012年4月20日 14:40
> 收件人: sheepdog at lists.wpkg.org
> 抄送: 无觉
> 主题: [PATCH v3] sheep: change snapshot/clone flow
> 
> From: HaiTing Yao <wujue.yht at taobao.com>
> 
> When create snapshot for source VDI, the new created VDI used as source
> VDI, and the old source VDI used as snapshot. This flow make users
> confused about VDI and snapshot relation. The snapshot metadata maybe is
> stored on multi-VDI, so need read multi VDIs inode to get snapshot list.
> 
> When create snapshot, we does not need change new created VDI to source
> VDI. The source VDI just need use snapshot VDI ID as its object data ID.
> 
> Show one example.
> 
> Before modification:
> 
>   Name        Id    Size    Used  Shared    Creation time   VDI id
> Tag
> s v1           1   64 MB   20 MB  0.0 MB 2012-03-26 16:55   709128
> s v1           2   64 MB  0.0 MB   20 MB 2012-03-26 16:56   709129
> sn3
>   v1           3   64 MB  0.0 MB   20 MB 2012-03-26 16:56   70912a
> 
> After modification:
> 
>   Name        Id    Size    Used  Shared    Creation time   VDI id
> Tag
>   v1           0   64 MB   20 MB  0.0 MB 2012-03-27 11:06   709128
> s v1           1   64 MB  0.0 MB   20 MB 2012-03-27 11:06   709129
> s v1           2   64 MB  0.0 MB   20 MB 2012-03-27 11:07   70912a
> sn3
> 
> Signed-off-by: HaiTing Yao <wujue.yht at taobao.com>
> ---
>  collie/common.c          |    2 +-
>  collie/vdi.c             |   33 ++++++++++++++++++++++-----------
>  include/sheepdog_proto.h |    6 ++++--
>  sheep/vdi.c              |   18 ++++++++++--------
>  4 files changed, 37 insertions(+), 22 deletions(-)
> 
> changes from v2:
> 
> 1, Tag display has been merged, so I removed the display from my patch
> 2, Add padding to inode structure, not used packed attribute any more
> 
> diff --git a/collie/common.c b/collie/common.c
> index f4301c4..636b821 100644
> --- a/collie/common.c
> +++ b/collie/common.c
> @@ -13,7 +13,7 @@
> 
>  int is_current(struct sheepdog_inode *i)
>  {
> -       return !i->snap_ctime;
> +       return !i->snap_id;
>  }
> 
>  char *size_to_str(uint64_t _size, char *str, int str_size)
> diff --git a/collie/vdi.c b/collie/vdi.c
> index 352e10c..0962fc2 100644
> --- a/collie/vdi.c
> +++ b/collie/vdi.c
> @@ -93,7 +93,10 @@ static void print_vdi_list(uint32_t vid, char *name, char
> *tag, uint32_t snapid,
>         for (idx = 0; idx < MAX_DATA_OBJS; idx++) {
>                 if (!i->data_vdi_id[idx])
>                         continue;
> -               if (is_data_obj_writeable(i, idx))
> +               if (!i->parent_vdi_id)
> +                       my_objs++;
> +               /* for clone VDI */
> +               else if ((!i->snap_id) && is_data_obj_writeable(i, idx))
>                         my_objs++;
>                 else
>                         cow_objs++;
> @@ -522,7 +525,7 @@ out:
>  static int vdi_snapshot(int argc, char **argv)
>  {
>         char *vdiname = argv[optind++];
> -       uint32_t vid;
> +       uint32_t vid, own_vid;
>         int ret;
>         char buf[SD_INODE_HEADER_SIZE];
>         struct sheepdog_inode *inode = (struct sheepdog_inode *)buf;
> @@ -539,20 +542,26 @@ static int vdi_snapshot(int argc, char **argv)
>                 return EXIT_FAILURE;
>         }
> 
> -       ret = sd_read_object(vid_to_vdi_oid(vid), inode,
> SD_INODE_HEADER_SIZE, 0);
> -       if (ret != SD_RES_SUCCESS) {
> -               fprintf(stderr, "Failed to read an inode header\n");
> +       ret = do_vdi_create(vdiname, inode->vdi_size, vid, &own_vid, 1);
> +
> +       if (ret < 0) {
> +               fprintf(stderr, "Failed to write VDI %s\n", vdiname);
>                 return EXIT_FAILURE;
>         }
> 
>         if (vdi_cmd_data.snapshot_tag[0]) {
> -               ret = sd_write_object(vid_to_vdi_oid(vid), 0,
> vdi_cmd_data.snapshot_tag,
> +               ret = sd_read_object(vid_to_vdi_oid(own_vid), inode,
> SD_INODE_HEADER_SIZE, 0);
> +               if (ret != SD_RES_SUCCESS) {
> +                       fprintf(stderr, "Failed to read an inode
> header\n");
> +                       return EXIT_FAILURE;
> +               }
> +               ret = sd_write_object(vid_to_vdi_oid(own_vid), 0,
> vdi_cmd_data.snapshot_tag,
>                                       SD_MAX_VDI_TAG_LEN,
>                                       offsetof(struct sheepdog_inode,
> tag),
>                                       0, inode->nr_copies, 0);
>         }
> 
> -       return do_vdi_create(vdiname, inode->vdi_size, vid, NULL, 1);
> +       return ret;
>  }
> 
>  static int vdi_clone(int argc, char **argv)
> @@ -1140,6 +1149,7 @@ static int vdi_read(int argc, char **argv)
>                 goto out;
>         }
> 
> +
>         if (inode->vdi_size < offset) {
>                 fprintf(stderr, "Read offset is beyond the end of the
> VDI\n");
>                 ret = EXIT_FAILURE;
> @@ -1284,7 +1294,7 @@ static int vdi_write(int argc, char **argv)
>                         remain -= ret;
>                 }
> 
> -               inode->data_vdi_id[idx] = inode->vdi_id;
> +               inode->data_vdi_id[idx] = inode->snap_vdi_id;
>                 oid = vid_to_data_oid(inode->data_vdi_id[idx], idx);
>                 ret = sd_write_object(oid, old_oid, buf, len, offset, flags,
>                                       inode->nr_copies, create);
> @@ -1295,9 +1305,10 @@ static int vdi_write(int argc, char **argv)
>                 }
> 
>                 if (create) {
> -                       ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid,
> sizeof(vid),
> -
> SD_INODE_HEADER_SIZE + sizeof(vid) * idx, 0,
> -                                             inode->nr_copies, 0);
> +                       /* snap_vdi_id is equal to vdi_id when no
> snapshot */
> +                       ret = sd_write_object(vid_to_vdi_oid(vid), 0,
> &inode->snap_vdi_id,
> +                               sizeof(inode->snap_vdi_id),
> SD_INODE_HEADER_SIZE + sizeof(vid) * idx,
> +                               0, inode->nr_copies, 0);
>                         if (ret) {
>                                 ret = EXIT_FAILURE;
>                                 goto out;
> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
> index 11c2c7c..8691901 100644
> --- a/include/sheepdog_proto.h
> +++ b/include/sheepdog_proto.h
> @@ -179,7 +179,6 @@ struct sheepdog_inode {
>         char name[SD_MAX_VDI_LEN];
>         char tag[SD_MAX_VDI_TAG_LEN];
>         uint64_t ctime;
> -       uint64_t snap_ctime;
>         uint64_t vm_clock_nsec;
>         uint64_t vdi_size;
>         uint64_t vm_state_size;
> @@ -189,6 +188,9 @@ struct sheepdog_inode {
>         uint32_t snap_id;
>         uint32_t vdi_id;
>         uint32_t parent_vdi_id;
> +       uint32_t snap_vdi_id;
> +       /* padding is to memory alignment */
> +       uint32_t padding;
>         uint32_t child_vdi_id[MAX_CHILDREN];
>         uint32_t data_vdi_id[MAX_DATA_OBJS];
>  };
> @@ -240,7 +242,7 @@ static inline uint64_t hash_64(uint64_t val, unsigned
> int bits)
> 
>  static inline int is_data_obj_writeable(struct sheepdog_inode *inode, int idx)
>  {
> -       return inode->vdi_id == inode->data_vdi_id[idx];
> +       return inode->snap_vdi_id == inode->data_vdi_id[idx];
>  }
> 
>  static inline int is_vdi_obj(uint64_t oid)
> diff --git a/sheep/vdi.c b/sheep/vdi.c
> index 71912ba..81f1a66 100644
> --- a/sheep/vdi.c
> +++ b/sheep/vdi.c
> @@ -82,10 +82,10 @@ static int create_vdi_obj(uint32_t epoch, char *name,
> uint32_t new_vid, uint64_t
>                                 ret = SD_RES_BASE_VDI_READ;
>                                 goto out;
>                         }
> -
> -                       cur->snap_ctime = (uint64_t) tv.tv_sec << 32 |
> tv.tv_usec * 1000;
> -               } else
> -                       base->snap_ctime = (uint64_t) tv.tv_sec << 32 |
> tv.tv_usec * 1000;
> +               } else {
> +                       base->snap_vdi_id = new_vid;
> +                       size = base->vdi_size;
> +               }
>         }
> 
>         strncpy(new->name, name, sizeof(new->name));
> @@ -96,6 +96,7 @@ static int create_vdi_obj(uint32_t epoch, char *name,
> uint32_t new_vid, uint64_t
>         new->nr_copies = copies;
>         new->block_size_shift = find_next_bit(&block_size, BITS_PER_LONG,
> 0);
>         new->snap_id = snapid;
> +       new->snap_vdi_id = new_vid;
> 
>         if (base_vid) {
>                 int i;
> @@ -192,14 +193,15 @@ static int find_first_vdi(uint32_t epoch, unsigned
> long start, unsigned long end
>                 }
> 
>                 if (!strncmp(inode->name, name, strlen(inode->name))) {
> +                       if (!*next_snap)
> +                               *next_snap = inode->snap_id + 1;
>                         vdi_found = 1;
>                         if (tag && tag[0] &&
>                             strncmp(inode->tag, tag,
> sizeof(inode->tag)) != 0)
>                                 continue;
> -                       if (snapid && snapid != inode->snap_id)
> +                       if (snapid != inode->snap_id)
>                                 continue;
> 
> -                       *next_snap = inode->snap_id + 1;
>                         *vid = inode->vdi_id;
>                         *nr_copies = inode->nr_copies;
>                         if (ctime)
> @@ -280,7 +282,7 @@ int add_vdi(uint32_t epoch, char *data, int data_len,
> uint64_t size,
>             int is_snapshot, unsigned int *nr_copies)
>  {
>         uint32_t cur_vid = 0;
> -       uint32_t next_snapid;
> +       uint32_t next_snapid = 0;
>         unsigned long nr, deleted_nr = SD_NR_VDIS, right_nr = SD_NR_VDIS;
>         int ret;
>         char *name;
> @@ -313,7 +315,7 @@ int add_vdi(uint32_t epoch, char *data, int data_len,
> uint64_t size,
>                 else
>                         nr = deleted_nr; /* we can recycle a deleted VDI
> */
> 
> -               next_snapid = 1;
> +               next_snapid = 0;
>         }
> 
>         *new_vid = nr;
> --
> 1.7.1
> 
> 
> ________________________________
> 
> This email (including any attachments) is confidential and may be legally
> privileged. If you received this email in error, please delete it immediately and
> do not copy it or use it for any purpose or disclose its contents to any other
> person. Thank you.
> 
> 本电邮(包括任何附件)可能含有机密资料并受法律保护。如您不是正确的收
> 件人,请您立即删除本邮件。请不要将本电邮进行复制并用作任何其他用
> 途、或透露本邮件之内容。谢谢。
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog


More information about the sheepdog mailing list