[Sheepdog] [PATCH] sheepdog: add live snapshot feature

OZAWA Tsuyoshi ozawa.tsuyoshi at lab.ntt.co.jp
Mon Apr 19 10:39:14 CEST 2010


This patch provide sheepdog with live snapshot.

NOTE: To work this patch correctly, it's needed to apply the patch
which adds vm_clock_nsec and vm_state_size to sd_inode to collie.

Signed-off-by: OZAWA Tsuyoshi <ozawa.tsuyoshi at lab.ntt.co.jp>
---
 block/sheepdog.c |  284 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 280 insertions(+), 4 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 18ecd22..ded7c75 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -76,6 +76,9 @@
 #define SD_RES_VDI_NOT_LOCKED   0x17 /* Vdi is not locked */
 #define SD_RES_SHUTDOWN      0x18 /* Sheepdog is shutting down */
 
+#define VDI_SNAPSHOT_MASK	0x7fffffff00000000
+#define VDI_SNAPSHOT_BIT 	0x4000000000000000
+
 /* should be configurable? */
 #define MAX_RETRIES 6
 
@@ -209,7 +212,9 @@ struct sd_inode {
 	uint64_t oid;
 	uint64_t ctime;
 	uint64_t snap_ctime;
+	uint64_t vm_clock_nsec;
 	uint64_t vdi_size;
+	uint64_t vm_state_size;
 	uint16_t copy_policy;
 	uint8_t  nr_copies;
 	uint8_t  block_size_shift;
@@ -1043,6 +1048,23 @@ static int read_vdi_obj(char *buf, uint64_t oid, int *copies)
 	}
 }
 
+static void set_vdi_index_and_offset(int *vdi_index, int *offset,
+				int64_t pos, int size)
+{
+	int next_offset, overflow;
+
+	*vdi_index = pos / SD_DATA_OBJ_SIZE;
+	*offset = pos % SD_DATA_OBJ_SIZE;
+	next_offset = *offset + size;
+	overflow = next_offset / SD_DATA_OBJ_SIZE;
+	if (overflow && (next_offset % SD_DATA_OBJ_SIZE != 0)) {
+		/* change to write data to next vdi */
+		(*vdi_index)++;
+		*offset = 0;
+	}
+	return;
+}
+
 /* TODO: error cleanups */
 static int sd_open(BlockDriverState *bs, const char *filename, int flags)
 {
@@ -1572,9 +1594,15 @@ static BlockDriverAIOCB *sd_aio_readv(BlockDriverState *bs,
 static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 {
 	struct bdrv_sd_state *s = bs->opaque;
-	int ret;
+	int ret, fd;
+	struct sd_obj_req hdr;
+	unsigned int rlen, wlen;
+	uint64_t new_oid;
+	struct sd_inode *inode;
 
-	eprintf("%s %s %s %d %d\n", sn_info->name, sn_info->id_str,
+	sd_release(bs);
+	eprintf("sn_info: name %s id_str %s s: name %s vm_state_size %d"
+		"is_current %d\n", sn_info->name, sn_info->id_str,
 		s->name, sn_info->vm_state_size, s->is_current);
 
 	if (!s->is_current) {
@@ -1587,12 +1615,156 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 
 	dprintf("%s %s\n", sn_info->name, sn_info->id_str);
 
-	ret = do_sd_create(s->name, sn_info->name, s->inode.vdi_size >> 9,
-			   s->inode.oid, NULL, 1);
+	s->inode.vm_state_size = sn_info->vm_state_size;
+	s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
+
+	/* refresh inode. */
+	fd = connect_to_vost();
+	if (fd < 0) {
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.opcode = SD_OP_WRITE_OBJ;
+
+	hdr.oid = s->inode.oid;
+	hdr.copies = s->inode.nr_copies;
+
+	hdr.flags |= SD_FLAG_CMD_WRITE;
+	hdr.data_length = SD_INODE_SIZE;
+	hdr.offset = 0;
+	wlen = SD_INODE_SIZE;
+	rlen = 0;
+
+	ret = do_req(fd, (struct sd_req *)&hdr, &s->inode, &wlen, &rlen);
+	if (ret < 0) {
+		eprintf("do_req write\n");
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	if (fd < 0) {
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	ret = do_sd_create(s->name, NULL, s->inode.vdi_size >> 9,
+			   s->inode.oid, &new_oid, 1);
+	if (ret < 0) {
+		eprintf("do_sd_create %m");
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	inode = (struct sd_inode *)malloc(sizeof(struct sd_inode));
+	if (!inode) {
+		eprintf("malloc %m");
+		goto cleanup;
+	}
+
+	memset(&hdr, 0, sizeof(hdr));
+
+	hdr.opcode = SD_OP_READ_OBJ;
+	hdr.oid = new_oid;
+	hdr.data_length = SD_INODE_SIZE;
+	hdr.offset = 0;
+
+	wlen = 0;
+	rlen = SD_INODE_SIZE;
+
+	ret = do_req(fd, (struct sd_req *)&hdr, inode, &wlen, &rlen);
+	if (ret < 0) {
+		eprintf("do_req read\n");
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	memcpy(&s->inode, inode, sizeof(struct sd_inode));
+	eprintf("s->inode: name %s snap_id %x oid %lxn",
+		s->inode.name, s->inode.snap_id, s->inode.oid);
+
+cleanup:
+	close(fd);
+	return ret;
+}
+
+static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
+{
+	struct bdrv_sd_state *s = bs->opaque;
+	struct bdrv_sd_state *old_s;
+	char vdi[256];
+	char *buf = NULL;
+	uint64_t oid;
+	uint32_t snapid = 0;
+	int ret = -ENOENT, dummy;
+
+
+	old_s = malloc(sizeof(struct bdrv_sd_state));
+	if (!old_s) {
+		eprintf("malloc");
+		goto out;
+	}
+
+	memcpy(old_s, s, sizeof(struct bdrv_sd_state));
+	sd_release(bs);
+
+	snapid = strtol(snapshot_id, NULL, 16);
+	if (!snapid) {
+		eprintf("Invalid snapshot_id\n");
+		goto out;
+	}
+
+	buf = malloc(SD_INODE_SIZE);
+	if (!buf) {
+		eprintf("Failed to allocate memory\n");
+		goto out;
+	}
+	strncpy(vdi, s->name, strlen(s->name)+1);
+	ret = find_vdi_name(s, vdi, snapid, &oid);
+	if (ret) {
+		eprintf("Failed to find_vdi_name\n");
+		ret = -ENOENT;
+		goto out;
+	}
+
+	ret = read_vdi_obj(buf, oid, &dummy);
+	if (ret) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	memcpy(&s->inode, buf, sizeof(s->inode));
+
+	if (!s->inode.vm_state_size) {
+		eprintf("Invalid snapshot\n");
+		ret = -ENOENT;
+		goto out;
+	}
+
+	s->is_current = 0;
+
+	free(buf);
+	free(old_s);
+
+	return 0;
+out:
+	/* recover bdrv_sd_state */
+	memcpy(s, old_s, sizeof(struct bdrv_sd_state));
+	free(buf);
+	free(old_s);
+
+	eprintf("failed to open. recover old bdrv_sd_state.\n");
 
 	return ret;
 }
 
+static int sd_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
+{
+	/* FIXME: Delete snapshot specified by snapshot_id */
+	return 0;
+}
+
 struct sd_so_req {
 	uint8_t		proto_ver;
 	uint8_t		opcode;
@@ -1696,6 +1868,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 		if (!strcmp(inode.name, s->name) && inode.snap_ctime) {
 			sn_tab[found].date_sec = inode.snap_ctime >> 32;
 			sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff;
+			sn_tab[found].vm_state_size = inode.vm_state_size;
+			sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec;
 
 			snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u",
 				 inode.snap_id);
@@ -1710,6 +1884,100 @@ out:
 	return found;
 }
 
+static int sd_save_vmstate(BlockDriverState *bs, const uint8_t *data,
+				int64_t pos, int size)
+{
+	struct bdrv_sd_state *s = bs->opaque;
+	struct sd_obj_req hdr;
+	int fd;
+	int ret = 0;
+	unsigned int rlen = 0, wlen = 0;
+	uint64_t sn_oid;
+	int vdi_index, offset;
+
+	set_vdi_index_and_offset(&vdi_index, &offset, pos, size);
+
+	sn_oid = VDI_SNAPSHOT_MASK & s->inode.oid;
+	sn_oid |= VDI_SNAPSHOT_BIT;
+	sn_oid |= vdi_index;
+
+	fd = connect_to_vost();
+	if (fd < 0) {
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	memset(&hdr, 0, sizeof(hdr));
+	if (offset)
+		hdr.opcode = SD_OP_WRITE_OBJ;
+	else
+		hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
+
+	hdr.oid = sn_oid;
+	hdr.cow_oid = 0;
+	hdr.copies = s->inode.nr_copies;
+
+	hdr.flags |= SD_FLAG_CMD_WRITE;
+	hdr.data_length = size;
+	hdr.offset = offset;
+	wlen = size;
+	rlen = 0;
+
+	ret = do_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen);
+	if (ret < 0) {
+		eprintf("do_req %m");
+		ret = -EIO;
+	}
+
+cleanup:
+	close(fd);
+	return ret;
+}
+
+static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
+				int64_t pos, int size)
+{
+	struct bdrv_sd_state *s = bs->opaque;
+	int ret = 0;
+	int fd;
+	unsigned int rlen = 0, wlen = 0;
+	struct sd_obj_req hdr;
+	uint64_t sn_oid;
+	int vdi_index, offset;
+
+	set_vdi_index_and_offset(&vdi_index, &offset, pos, size);
+
+	sn_oid = VDI_SNAPSHOT_MASK & s->inode.oid;
+	sn_oid |= VDI_SNAPSHOT_BIT;
+	sn_oid |= vdi_index;
+
+	fd = connect_to_vost();
+	if (fd < 0) {
+		ret = -EIO;
+		goto cleanup;
+	}
+	memset(&hdr, 0, sizeof(hdr));
+
+	hdr.opcode = SD_OP_READ_OBJ;
+	hdr.oid = sn_oid;
+	hdr.data_length = size;
+	hdr.offset = offset;
+
+	wlen = 0;
+	rlen = size;
+
+	ret = do_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen);
+	if (ret < 0) {
+		eprintf("do_req %m");
+		ret = -EIO;
+	}
+
+cleanup:
+	close(fd);
+	return rlen;
+}
+
+
 static QEMUOptionParameter sd_create_options[] = {
 	{
 		.name = BLOCK_OPT_SIZE,
@@ -1738,8 +2006,16 @@ BlockDriver bdrv_sheepdog = {
 	.bdrv_aio_writev = sd_aio_writev,
 
 	.bdrv_snapshot_create = sd_snapshot_create,
+	.bdrv_snapshot_goto = sd_snapshot_goto,
+	.bdrv_snapshot_delete   = sd_snapshot_delete,
 	.bdrv_snapshot_list = sd_snapshot_list,
+
+
+	.bdrv_save_vmstate    = sd_save_vmstate,
+	.bdrv_load_vmstate    = sd_load_vmstate,
+
 	.create_options = sd_create_options,
+
 };
 
 static void bdrv_sheepdog_init(void)
-- 
1.6.5




More information about the sheepdog mailing list