[Sheepdog] [PATCH v3] sheep: change snapshot/clone flow

yaohaiting.wujue at gmail.com yaohaiting.wujue at gmail.com
Fri Apr 20 08:40:26 CEST 2012


From: HaiTing Yao <wujue.yht at taobao.com>

When create snapshot for source VDI, the new created VDI used as source
VDI, and the old source VDI used as snapshot. This flow make users
confused about VDI and snapshot relation. The snapshot metadata maybe is
stored on multi-VDI, so need read multi VDIs inode to get snapshot list.

When create snapshot, we does not need change new created VDI to source
VDI. The source VDI just need use snapshot VDI ID as its object data ID.

Show one example.

Before modification:

  Name        Id    Size    Used  Shared    Creation time   VDI id   Tag
s v1           1   64 MB   20 MB  0.0 MB 2012-03-26 16:55   709128
s v1           2   64 MB  0.0 MB   20 MB 2012-03-26 16:56   709129   sn3
  v1           3   64 MB  0.0 MB   20 MB 2012-03-26 16:56   70912a

After modification:

  Name        Id    Size    Used  Shared    Creation time   VDI id   Tag
  v1           0   64 MB   20 MB  0.0 MB 2012-03-27 11:06   709128
s v1           1   64 MB  0.0 MB   20 MB 2012-03-27 11:06   709129
s v1           2   64 MB  0.0 MB   20 MB 2012-03-27 11:07   70912a   sn3

Signed-off-by: HaiTing Yao <wujue.yht at taobao.com>
---
 collie/common.c          |    2 +-
 collie/vdi.c             |   33 ++++++++++++++++++++++-----------
 include/sheepdog_proto.h |    6 ++++--
 sheep/vdi.c              |   18 ++++++++++--------
 4 files changed, 37 insertions(+), 22 deletions(-)

changes from v2:

1, Tag display has been merged, so I removed the display from my patch
2, Add padding to inode structure, not used packed attribute any more

diff --git a/collie/common.c b/collie/common.c
index f4301c4..636b821 100644
--- a/collie/common.c
+++ b/collie/common.c
@@ -13,7 +13,7 @@
 
 int is_current(struct sheepdog_inode *i)
 {
-	return !i->snap_ctime;
+	return !i->snap_id;
 }
 
 char *size_to_str(uint64_t _size, char *str, int str_size)
diff --git a/collie/vdi.c b/collie/vdi.c
index 352e10c..0962fc2 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -93,7 +93,10 @@ static void print_vdi_list(uint32_t vid, char *name, char *tag, uint32_t snapid,
 	for (idx = 0; idx < MAX_DATA_OBJS; idx++) {
 		if (!i->data_vdi_id[idx])
 			continue;
-		if (is_data_obj_writeable(i, idx))
+		if (!i->parent_vdi_id)
+			my_objs++;
+		/* for clone VDI */
+		else if ((!i->snap_id) && is_data_obj_writeable(i, idx))
 			my_objs++;
 		else
 			cow_objs++;
@@ -522,7 +525,7 @@ out:
 static int vdi_snapshot(int argc, char **argv)
 {
 	char *vdiname = argv[optind++];
-	uint32_t vid;
+	uint32_t vid, own_vid;
 	int ret;
 	char buf[SD_INODE_HEADER_SIZE];
 	struct sheepdog_inode *inode = (struct sheepdog_inode *)buf;
@@ -539,20 +542,26 @@ static int vdi_snapshot(int argc, char **argv)
 		return EXIT_FAILURE;
 	}
 
-	ret = sd_read_object(vid_to_vdi_oid(vid), inode, SD_INODE_HEADER_SIZE, 0);
-	if (ret != SD_RES_SUCCESS) {
-		fprintf(stderr, "Failed to read an inode header\n");
+	ret = do_vdi_create(vdiname, inode->vdi_size, vid, &own_vid, 1);
+
+	if (ret < 0) {
+		fprintf(stderr, "Failed to write VDI %s\n", vdiname);
 		return EXIT_FAILURE;
 	}
 
 	if (vdi_cmd_data.snapshot_tag[0]) {
-		ret = sd_write_object(vid_to_vdi_oid(vid), 0, vdi_cmd_data.snapshot_tag,
+		ret = sd_read_object(vid_to_vdi_oid(own_vid), inode, SD_INODE_HEADER_SIZE, 0);
+		if (ret != SD_RES_SUCCESS) {
+			fprintf(stderr, "Failed to read an inode header\n");
+			return EXIT_FAILURE;
+		}
+		ret = sd_write_object(vid_to_vdi_oid(own_vid), 0, vdi_cmd_data.snapshot_tag,
 				      SD_MAX_VDI_TAG_LEN,
 				      offsetof(struct sheepdog_inode, tag),
 				      0, inode->nr_copies, 0);
 	}
 
-	return do_vdi_create(vdiname, inode->vdi_size, vid, NULL, 1);
+	return ret;
 }
 
 static int vdi_clone(int argc, char **argv)
@@ -1140,6 +1149,7 @@ static int vdi_read(int argc, char **argv)
 		goto out;
 	}
 
+
 	if (inode->vdi_size < offset) {
 		fprintf(stderr, "Read offset is beyond the end of the VDI\n");
 		ret = EXIT_FAILURE;
@@ -1284,7 +1294,7 @@ static int vdi_write(int argc, char **argv)
 			remain -= ret;
 		}
 
-		inode->data_vdi_id[idx] = inode->vdi_id;
+		inode->data_vdi_id[idx] = inode->snap_vdi_id;
 		oid = vid_to_data_oid(inode->data_vdi_id[idx], idx);
 		ret = sd_write_object(oid, old_oid, buf, len, offset, flags,
 				      inode->nr_copies, create);
@@ -1295,9 +1305,10 @@ static int vdi_write(int argc, char **argv)
 		}
 
 		if (create) {
-			ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid, sizeof(vid),
-					      SD_INODE_HEADER_SIZE + sizeof(vid) * idx, 0,
-					      inode->nr_copies, 0);
+			/* snap_vdi_id is equal to vdi_id when no snapshot */
+			ret = sd_write_object(vid_to_vdi_oid(vid), 0, &inode->snap_vdi_id,
+				sizeof(inode->snap_vdi_id), SD_INODE_HEADER_SIZE + sizeof(vid) * idx,
+				0, inode->nr_copies, 0);
 			if (ret) {
 				ret = EXIT_FAILURE;
 				goto out;
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 11c2c7c..8691901 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -179,7 +179,6 @@ struct sheepdog_inode {
 	char name[SD_MAX_VDI_LEN];
 	char tag[SD_MAX_VDI_TAG_LEN];
 	uint64_t ctime;
-	uint64_t snap_ctime;
 	uint64_t vm_clock_nsec;
 	uint64_t vdi_size;
 	uint64_t vm_state_size;
@@ -189,6 +188,9 @@ struct sheepdog_inode {
 	uint32_t snap_id;
 	uint32_t vdi_id;
 	uint32_t parent_vdi_id;
+	uint32_t snap_vdi_id;
+	/* padding is to memory alignment */
+	uint32_t padding;
 	uint32_t child_vdi_id[MAX_CHILDREN];
 	uint32_t data_vdi_id[MAX_DATA_OBJS];
 };
@@ -240,7 +242,7 @@ static inline uint64_t hash_64(uint64_t val, unsigned int bits)
 
 static inline int is_data_obj_writeable(struct sheepdog_inode *inode, int idx)
 {
-	return inode->vdi_id == inode->data_vdi_id[idx];
+	return inode->snap_vdi_id == inode->data_vdi_id[idx];
 }
 
 static inline int is_vdi_obj(uint64_t oid)
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 71912ba..81f1a66 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -82,10 +82,10 @@ static int create_vdi_obj(uint32_t epoch, char *name, uint32_t new_vid, uint64_t
 				ret = SD_RES_BASE_VDI_READ;
 				goto out;
 			}
-
-			cur->snap_ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000;
-		} else
-			base->snap_ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000;
+		} else {
+			base->snap_vdi_id = new_vid;
+			size = base->vdi_size;
+		}
 	}
 
 	strncpy(new->name, name, sizeof(new->name));
@@ -96,6 +96,7 @@ static int create_vdi_obj(uint32_t epoch, char *name, uint32_t new_vid, uint64_t
 	new->nr_copies = copies;
 	new->block_size_shift = find_next_bit(&block_size, BITS_PER_LONG, 0);
 	new->snap_id = snapid;
+	new->snap_vdi_id = new_vid;
 
 	if (base_vid) {
 		int i;
@@ -192,14 +193,15 @@ static int find_first_vdi(uint32_t epoch, unsigned long start, unsigned long end
 		}
 
 		if (!strncmp(inode->name, name, strlen(inode->name))) {
+			if (!*next_snap)
+				*next_snap = inode->snap_id + 1;
 			vdi_found = 1;
 			if (tag && tag[0] &&
 			    strncmp(inode->tag, tag, sizeof(inode->tag)) != 0)
 				continue;
-			if (snapid && snapid != inode->snap_id)
+			if (snapid != inode->snap_id)
 				continue;
 
-			*next_snap = inode->snap_id + 1;
 			*vid = inode->vdi_id;
 			*nr_copies = inode->nr_copies;
 			if (ctime)
@@ -280,7 +282,7 @@ int add_vdi(uint32_t epoch, char *data, int data_len, uint64_t size,
 	    int is_snapshot, unsigned int *nr_copies)
 {
 	uint32_t cur_vid = 0;
-	uint32_t next_snapid;
+	uint32_t next_snapid = 0;
 	unsigned long nr, deleted_nr = SD_NR_VDIS, right_nr = SD_NR_VDIS;
 	int ret;
 	char *name;
@@ -313,7 +315,7 @@ int add_vdi(uint32_t epoch, char *data, int data_len, uint64_t size,
 		else
 			nr = deleted_nr; /* we can recycle a deleted VDI */
 
-		next_snapid = 1;
+		next_snapid = 0;
 	}
 
 	*new_vid = nr;
-- 
1.7.1




More information about the sheepdog mailing list