If this patch has been merged, the sheepdog driver in qemu project also need changed. I have prepared the patch as below, but not submit to qemu project. Perhaps I should submit the patch to qemu after the confirmation here. --- block/sheepdog.c | 37 ++++++++++++++++++++----------------- 1 files changed, 20 insertions(+), 17 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 3eaf625..cec5294 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -167,7 +167,6 @@ typedef struct SheepdogInode { char name[SD_MAX_VDI_LEN]; char tag[SD_MAX_VDI_TAG_LEN]; uint64_t ctime; - uint64_t snap_ctime; uint64_t vm_clock_nsec; uint64_t vdi_size; uint64_t vm_state_size; @@ -177,6 +176,9 @@ typedef struct SheepdogInode { uint32_t snap_id; uint32_t vdi_id; uint32_t parent_vdi_id; + uint32_t snap_vdi_id; + /* padding is to memory alignment */ + uint32_t padding; uint32_t child_vdi_id[MAX_CHILDREN]; uint32_t data_vdi_id[MAX_DATA_OBJS]; } SheepdogInode; @@ -203,7 +205,7 @@ static inline uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval) static inline int is_data_obj_writable(SheepdogInode *inode, unsigned int idx) { - return inode->vdi_id == inode->data_vdi_id[idx]; + return inode->snap_vdi_id == inode->data_vdi_id[idx]; } static inline int is_data_obj(uint64_t oid) @@ -233,7 +235,7 @@ static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx) static inline int is_snapshot(struct SheepdogInode *inode) { - return !!inode->snap_ctime; + return inode->snap_id; } #undef dprintf @@ -694,14 +696,14 @@ static void coroutine_fn aio_read_response(void *opaque) } idx = data_oid_to_idx(aio_req->oid); - if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) { + if (s->inode.data_vdi_id[idx] != s->inode.snap_vdi_id) { /* * If the object is newly created one, we need to update * the vdi object (metadata object). min_dirty_data_idx * and max_dirty_data_idx are changed to include updated * index between them. */ - s->inode.data_vdi_id[idx] = s->inode.vdi_id; + s->inode.data_vdi_id[idx] = s->inode.snap_vdi_id; s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx); s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx); @@ -1552,7 +1554,7 @@ static int coroutine_fn sd_co_rw_vector(void *p) dprintf("update ino (%" PRIu32") %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", inode->vdi_id, oid, vid_to_data_oid(inode->data_vdi_id[idx], idx), idx); - oid = vid_to_data_oid(inode->vdi_id, idx); + oid = vid_to_data_oid(inode->snap_vdi_id, idx); dprintf("new oid %lx\n", oid); } @@ -1710,7 +1712,6 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) s->inode.vm_state_size = sn_info->vm_state_size; s->inode.vm_clock_nsec = sn_info->vm_clock_nsec; - strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag)); /* we don't need to update entire object */ datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); @@ -1721,14 +1722,6 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) goto cleanup; } - ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, 0, s->cache_enabled); - if (ret < 0) { - error_report("failed to write snapshot's inode."); - ret = -EIO; - goto cleanup; - } - ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, 1, s->addr, s->port); if (ret < 0) { @@ -1750,6 +1743,16 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) } memcpy(&s->inode, inode, datalen); + strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag)); + + ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(new_vid), + s->inode.nr_copies, datalen, 0, 0, s->cache_enabled); + if (ret < 0) { + error_report("failed to write snapshot's inode."); + ret = -EIO; + goto cleanup; + } + dprintf("s->inode: name %s snap_id %x oid %x\n", s->inode.name, s->inode.snap_id, s->inode.vdi_id); @@ -1899,8 +1902,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) } if (!strcmp(inode.name, s->name) && is_snapshot(&inode)) { - sn_tab[found].date_sec = inode.snap_ctime >> 32; - sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff; + sn_tab[found].date_sec = inode.ctime >> 32; + sn_tab[found].date_nsec = inode.ctime & 0xffffffff; sn_tab[found].vm_state_size = inode.vm_state_size; sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec; Thanks Haiting -----邮件原件----- 发件人: yaohaiting.wujue at gmail.com [mailto:yaohaiting.wujue at gmail.com] 发送时间: 2012年4月20日 14:40 收件人: sheepdog at lists.wpkg.org 抄送: 无觉 主题: [PATCH v3] sheep: change snapshot/clone flow From: HaiTing Yao <wujue.yht at taobao.com> When create snapshot for source VDI, the new created VDI used as source VDI, and the old source VDI used as snapshot. This flow make users confused about VDI and snapshot relation. The snapshot metadata maybe is stored on multi-VDI, so need read multi VDIs inode to get snapshot list. When create snapshot, we does not need change new created VDI to source VDI. The source VDI just need use snapshot VDI ID as its object data ID. Show one example. Before modification: Name Id Size Used Shared Creation time VDI id Tag s v1 1 64 MB 20 MB 0.0 MB 2012-03-26 16:55 709128 s v1 2 64 MB 0.0 MB 20 MB 2012-03-26 16:56 709129 sn3 v1 3 64 MB 0.0 MB 20 MB 2012-03-26 16:56 70912a After modification: Name Id Size Used Shared Creation time VDI id Tag v1 0 64 MB 20 MB 0.0 MB 2012-03-27 11:06 709128 s v1 1 64 MB 0.0 MB 20 MB 2012-03-27 11:06 709129 s v1 2 64 MB 0.0 MB 20 MB 2012-03-27 11:07 70912a sn3 Signed-off-by: HaiTing Yao <wujue.yht at taobao.com> --- collie/common.c | 2 +- collie/vdi.c | 33 ++++++++++++++++++++++----------- include/sheepdog_proto.h | 6 ++++-- sheep/vdi.c | 18 ++++++++++-------- 4 files changed, 37 insertions(+), 22 deletions(-) changes from v2: 1, Tag display has been merged, so I removed the display from my patch 2, Add padding to inode structure, not used packed attribute any more diff --git a/collie/common.c b/collie/common.c index f4301c4..636b821 100644 --- a/collie/common.c +++ b/collie/common.c @@ -13,7 +13,7 @@ int is_current(struct sheepdog_inode *i) { - return !i->snap_ctime; + return !i->snap_id; } char *size_to_str(uint64_t _size, char *str, int str_size) diff --git a/collie/vdi.c b/collie/vdi.c index 352e10c..0962fc2 100644 --- a/collie/vdi.c +++ b/collie/vdi.c @@ -93,7 +93,10 @@ static void print_vdi_list(uint32_t vid, char *name, char *tag, uint32_t snapid, for (idx = 0; idx < MAX_DATA_OBJS; idx++) { if (!i->data_vdi_id[idx]) continue; - if (is_data_obj_writeable(i, idx)) + if (!i->parent_vdi_id) + my_objs++; + /* for clone VDI */ + else if ((!i->snap_id) && is_data_obj_writeable(i, idx)) my_objs++; else cow_objs++; @@ -522,7 +525,7 @@ out: static int vdi_snapshot(int argc, char **argv) { char *vdiname = argv[optind++]; - uint32_t vid; + uint32_t vid, own_vid; int ret; char buf[SD_INODE_HEADER_SIZE]; struct sheepdog_inode *inode = (struct sheepdog_inode *)buf; @@ -539,20 +542,26 @@ static int vdi_snapshot(int argc, char **argv) return EXIT_FAILURE; } - ret = sd_read_object(vid_to_vdi_oid(vid), inode, SD_INODE_HEADER_SIZE, 0); - if (ret != SD_RES_SUCCESS) { - fprintf(stderr, "Failed to read an inode header\n"); + ret = do_vdi_create(vdiname, inode->vdi_size, vid, &own_vid, 1); + + if (ret < 0) { + fprintf(stderr, "Failed to write VDI %s\n", vdiname); return EXIT_FAILURE; } if (vdi_cmd_data.snapshot_tag[0]) { - ret = sd_write_object(vid_to_vdi_oid(vid), 0, vdi_cmd_data.snapshot_tag, + ret = sd_read_object(vid_to_vdi_oid(own_vid), inode, SD_INODE_HEADER_SIZE, 0); + if (ret != SD_RES_SUCCESS) { + fprintf(stderr, "Failed to read an inode header\n"); + return EXIT_FAILURE; + } + ret = sd_write_object(vid_to_vdi_oid(own_vid), 0, vdi_cmd_data.snapshot_tag, SD_MAX_VDI_TAG_LEN, offsetof(struct sheepdog_inode, tag), 0, inode->nr_copies, 0); } - return do_vdi_create(vdiname, inode->vdi_size, vid, NULL, 1); + return ret; } static int vdi_clone(int argc, char **argv) @@ -1140,6 +1149,7 @@ static int vdi_read(int argc, char **argv) goto out; } + if (inode->vdi_size < offset) { fprintf(stderr, "Read offset is beyond the end of the VDI\n"); ret = EXIT_FAILURE; @@ -1284,7 +1294,7 @@ static int vdi_write(int argc, char **argv) remain -= ret; } - inode->data_vdi_id[idx] = inode->vdi_id; + inode->data_vdi_id[idx] = inode->snap_vdi_id; oid = vid_to_data_oid(inode->data_vdi_id[idx], idx); ret = sd_write_object(oid, old_oid, buf, len, offset, flags, inode->nr_copies, create); @@ -1295,9 +1305,10 @@ static int vdi_write(int argc, char **argv) } if (create) { - ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid, sizeof(vid), - SD_INODE_HEADER_SIZE + sizeof(vid) * idx, 0, - inode->nr_copies, 0); + /* snap_vdi_id is equal to vdi_id when no snapshot */ + ret = sd_write_object(vid_to_vdi_oid(vid), 0, &inode->snap_vdi_id, + sizeof(inode->snap_vdi_id), SD_INODE_HEADER_SIZE + sizeof(vid) * idx, + 0, inode->nr_copies, 0); if (ret) { ret = EXIT_FAILURE; goto out; diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index 11c2c7c..8691901 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -179,7 +179,6 @@ struct sheepdog_inode { char name[SD_MAX_VDI_LEN]; char tag[SD_MAX_VDI_TAG_LEN]; uint64_t ctime; - uint64_t snap_ctime; uint64_t vm_clock_nsec; uint64_t vdi_size; uint64_t vm_state_size; @@ -189,6 +188,9 @@ struct sheepdog_inode { uint32_t snap_id; uint32_t vdi_id; uint32_t parent_vdi_id; + uint32_t snap_vdi_id; + /* padding is to memory alignment */ + uint32_t padding; uint32_t child_vdi_id[MAX_CHILDREN]; uint32_t data_vdi_id[MAX_DATA_OBJS]; }; @@ -240,7 +242,7 @@ static inline uint64_t hash_64(uint64_t val, unsigned int bits) static inline int is_data_obj_writeable(struct sheepdog_inode *inode, int idx) { - return inode->vdi_id == inode->data_vdi_id[idx]; + return inode->snap_vdi_id == inode->data_vdi_id[idx]; } static inline int is_vdi_obj(uint64_t oid) diff --git a/sheep/vdi.c b/sheep/vdi.c index 71912ba..81f1a66 100644 --- a/sheep/vdi.c +++ b/sheep/vdi.c @@ -82,10 +82,10 @@ static int create_vdi_obj(uint32_t epoch, char *name, uint32_t new_vid, uint64_t ret = SD_RES_BASE_VDI_READ; goto out; } - - cur->snap_ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000; - } else - base->snap_ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000; + } else { + base->snap_vdi_id = new_vid; + size = base->vdi_size; + } } strncpy(new->name, name, sizeof(new->name)); @@ -96,6 +96,7 @@ static int create_vdi_obj(uint32_t epoch, char *name, uint32_t new_vid, uint64_t new->nr_copies = copies; new->block_size_shift = find_next_bit(&block_size, BITS_PER_LONG, 0); new->snap_id = snapid; + new->snap_vdi_id = new_vid; if (base_vid) { int i; @@ -192,14 +193,15 @@ static int find_first_vdi(uint32_t epoch, unsigned long start, unsigned long end } if (!strncmp(inode->name, name, strlen(inode->name))) { + if (!*next_snap) + *next_snap = inode->snap_id + 1; vdi_found = 1; if (tag && tag[0] && strncmp(inode->tag, tag, sizeof(inode->tag)) != 0) continue; - if (snapid && snapid != inode->snap_id) + if (snapid != inode->snap_id) continue; - *next_snap = inode->snap_id + 1; *vid = inode->vdi_id; *nr_copies = inode->nr_copies; if (ctime) @@ -280,7 +282,7 @@ int add_vdi(uint32_t epoch, char *data, int data_len, uint64_t size, int is_snapshot, unsigned int *nr_copies) { uint32_t cur_vid = 0; - uint32_t next_snapid; + uint32_t next_snapid = 0; unsigned long nr, deleted_nr = SD_NR_VDIS, right_nr = SD_NR_VDIS; int ret; char *name; @@ -313,7 +315,7 @@ int add_vdi(uint32_t epoch, char *data, int data_len, uint64_t size, else nr = deleted_nr; /* we can recycle a deleted VDI */ - next_snapid = 1; + next_snapid = 0; } *new_vid = nr; -- 1.7.1 ________________________________ This email (including any attachments) is confidential and may be legally privileged. If you received this email in error, please delete it immediately and do not copy it or use it for any purpose or disclose its contents to any other person. Thank you. 本电邮(包括任何附件)可能含有机密资料并受法律保护。如您不是正确的收件人,请您立即删除本邮件。请不要将本电邮进行复制并用作任何其他用途、或透露本邮件之内容。谢谢。 |