[Sheepdog] [PATCH v2] sheepdog:add live snapshot feature

OZAWA Tsuyoshi ozawa.tsuyoshi at lab.ntt.co.jp
Wed Apr 21 14:41:53 CEST 2010


Support savevm & loadvm command in qemu monitor for sheepdog.

Special VDI space is needed to save & load vmstate.
This version use unused VDI space

VDI_VMSTATE_BIT         0x4000000000000000

to do this.

Signed-off-by: OZAWA Tsuyoshi <ozawa.tsuyoshi at lab.ntt.co.jp>
---
 block/sheepdog.c |  300 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 296 insertions(+), 4 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index ea81c34..c129d5d 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -76,6 +76,9 @@
 #define SD_RES_VDI_NOT_LOCKED   0x17 /* Vdi is not locked */
 #define SD_RES_SHUTDOWN      0x18 /* Sheepdog is shutting down */
 
+#define VDI_VMSTATE_MASK	0x7fffffff00000000
+#define VDI_VMSTATE_BIT 	0x4000000000000000
+
 /* should be configurable? */
 #define MAX_RETRIES 6
 
@@ -209,7 +212,9 @@ struct sd_inode {
 	uint64_t oid;
 	uint64_t ctime;
 	uint64_t snap_ctime;
+	uint64_t vm_clock_nsec;
 	uint64_t vdi_size;
+	uint64_t vm_state_size;
 	uint16_t copy_policy;
 	uint8_t  nr_copies;
 	uint8_t  block_size_shift;
@@ -1045,6 +1050,27 @@ static int read_vdi_obj(char *buf, uint64_t oid, int *copies)
 	}
 }
 
+static uint32_t get_vmstate_index(uint64_t pos)
+{
+	return pos / SD_DATA_OBJ_SIZE;
+}
+
+static uint64_t get_vmstate_offset(uint64_t pos)
+{
+	return pos % SD_DATA_OBJ_SIZE;
+}
+
+static uint64_t get_vmstate_oid(uint64_t oid, uint32_t vdi_index)
+{
+	uint64_t vmstate_oid;
+
+	vmstate_oid = VDI_VMSTATE_MASK & oid;
+	vmstate_oid |= VDI_VMSTATE_BIT;
+	vmstate_oid |= vdi_index;
+
+	return vmstate_oid;
+}
+
 /* TODO: error cleanups */
 static int sd_open(BlockDriverState *bs, const char *filename, int flags)
 {
@@ -1574,9 +1600,16 @@ static BlockDriverAIOCB *sd_aio_readv(BlockDriverState *bs,
 static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 {
 	struct bdrv_sd_state *s = bs->opaque;
-	int ret;
+	int ret, fd, dummy;
+	struct sd_obj_req hdr;
+	unsigned int rlen, wlen;
+	uint64_t new_oid;
+	struct sd_inode *inode;
+
+	sd_release(bs);
 
-	eprintf("%s %s %s %d %d\n", sn_info->name, sn_info->id_str,
+	eprintf("sn_info: name %s id_str %s s: name %s vm_state_size %d "
+		"is_current %d\n", sn_info->name, sn_info->id_str,
 		s->name, sn_info->vm_state_size, s->is_current);
 
 	if (!s->is_current) {
@@ -1589,12 +1622,145 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 
 	dprintf("%s %s\n", sn_info->name, sn_info->id_str);
 
-	ret = do_sd_create(s->name, sn_info->name, s->inode.vdi_size >> 9,
-			   s->inode.oid, NULL, 1);
+	s->inode.vm_state_size = sn_info->vm_state_size;
+	s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
+
+	/* refresh inode. */
+	fd = connect_to_vost();
+	if (fd < 0) {
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.opcode = SD_OP_WRITE_OBJ;
+
+	hdr.oid = s->inode.oid;
+	hdr.copies = s->inode.nr_copies;
+
+	hdr.flags |= SD_FLAG_CMD_WRITE;
+	hdr.data_length = SD_INODE_SIZE;
+	hdr.offset = 0;
+	wlen = SD_INODE_SIZE;
+	rlen = 0;
+
+	ret = do_req(fd, (struct sd_req *)&hdr, &s->inode, &wlen, &rlen);
+	if (ret < 0) {
+		eprintf("failed to write snapshot's inode.\n");
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	if (fd < 0) {
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	ret = do_sd_create(s->name, NULL, s->inode.vdi_size >> 9,
+			   s->inode.oid, &new_oid, 1);
+	if (ret < 0) {
+		eprintf("failed to create inode for snapshot. %m\n");
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	inode = (struct sd_inode *)malloc(sizeof(struct sd_inode));
+	if (!inode) {
+		eprintf("failed to allocate memory for inode. %m\n");
+		goto cleanup;
+	}
+
+	if (read_vdi_obj((char *)inode, new_oid, &dummy) < 0) {
+		eprintf("failed to read new inode info. %m\n");
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	memcpy(&s->inode, inode, sizeof(struct sd_inode));
+	eprintf("s->inode: name %s snap_id %x oid %lx\n",
+		s->inode.name, s->inode.snap_id, s->inode.oid);
 
+cleanup:
+	close(fd);
 	return ret;
 }
 
+static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
+{
+	struct bdrv_sd_state *s = bs->opaque;
+	struct bdrv_sd_state *old_s;
+	char vdi[256];
+	char *buf = NULL;
+	uint64_t oid;
+	uint32_t snapid = 0;
+	int ret = -ENOENT, dummy;
+
+	sd_release(bs);
+
+	old_s = malloc(sizeof(struct bdrv_sd_state));
+	if (!old_s) {
+		eprintf("failed to allocate memory for old state. %m\n");
+		goto out;
+	}
+
+	memcpy(old_s, s, sizeof(struct bdrv_sd_state));
+
+	snapid = strtol(snapshot_id, NULL, 10);
+	if (!snapid) {
+		eprintf("Invalid snapshot_id\n");
+		goto out;
+	}
+
+	buf = malloc(SD_INODE_SIZE);
+	if (!buf) {
+		eprintf("Failed to allocate memory\n");
+		goto out;
+	}
+	strncpy(vdi, s->name, strlen(s->name)+1);
+	ret = find_vdi_name(s, vdi, snapid, &oid);
+	if (ret) {
+		eprintf("Failed to find_vdi_name\n");
+		ret = -ENOENT;
+		goto out;
+	}
+
+	ret = read_vdi_obj(buf, oid, &dummy);
+	if (ret) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	memcpy(&s->inode, buf, sizeof(s->inode));
+
+	if (!s->inode.vm_state_size) {
+		eprintf("Invalid snapshot\n");
+		ret = -ENOENT;
+		goto out;
+	}
+
+	s->is_current = 0;
+
+	free(buf);
+	free(old_s);
+
+	return 0;
+out:
+	/* recover bdrv_sd_state */
+	memcpy(s, old_s, sizeof(struct bdrv_sd_state));
+	free(buf);
+	free(old_s);
+
+	eprintf("failed to open. recover old bdrv_sd_state.\n");
+
+	return ret;
+}
+
+static int sd_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
+{
+	/* FIXME: Delete specified snapshot id.  */
+	return 0;
+}
+
 struct sd_so_req {
 	uint8_t		proto_ver;
 	uint8_t		opcode;
@@ -1698,6 +1864,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 		if (!strcmp(inode.name, s->name) && inode.snap_ctime) {
 			sn_tab[found].date_sec = inode.snap_ctime >> 32;
 			sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff;
+			sn_tab[found].vm_state_size = inode.vm_state_size;
+			sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec;
 
 			snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u",
 				 inode.snap_id);
@@ -1712,6 +1880,122 @@ out:
 	return found;
 }
 
+static int sd_save_vmstate(BlockDriverState *bs, const uint8_t *data,
+				int64_t pos, int size)
+{
+	struct bdrv_sd_state *s = bs->opaque;
+	struct sd_obj_req hdr;
+	int fd;
+	int ret = 0;
+	unsigned int rlen = 0, wlen = 0;
+	uint64_t vmstate_oid;
+	uint32_t vdi_index;
+	uint64_t offset;
+
+	fd = connect_to_vost();
+	if (fd < 0) {
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	while (size) {
+		vdi_index = get_vmstate_index(pos);
+		offset = get_vmstate_offset(pos);
+
+		if (size > SD_DATA_OBJ_SIZE)
+			wlen = SD_DATA_OBJ_SIZE;
+		else
+			wlen = size;
+
+		vmstate_oid = get_vmstate_oid(s->inode.oid, vdi_index);
+
+		memset(&hdr, 0, sizeof(hdr));
+		if (offset)
+			hdr.opcode = SD_OP_WRITE_OBJ;
+		else
+			hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
+
+		hdr.oid = vmstate_oid;
+		hdr.cow_oid = 0;
+		hdr.copies = s->inode.nr_copies;
+
+		hdr.flags |= SD_FLAG_CMD_WRITE;
+		hdr.data_length = wlen;
+		hdr.offset = offset;
+
+		rlen = 0;
+
+		if (do_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen) < 0) {
+			eprintf("failed to save vmstate %m\n");
+			ret = -EIO;
+			goto cleanup;
+		}
+
+		pos += wlen;
+		size -= wlen;
+		ret += wlen;
+	}
+
+cleanup:
+	close(fd);
+	return ret;
+}
+
+static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
+				int64_t pos, int size)
+{
+	struct bdrv_sd_state *s = bs->opaque;
+	int ret = 0;
+	int fd;
+	unsigned int rlen = 0, wlen = 0;
+	struct sd_obj_req hdr;
+	uint64_t vmstate_oid;
+	uint32_t vdi_index;
+	uint64_t offset;
+
+	fd = connect_to_vost();
+	if (fd < 0) {
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	while (size) {
+		vdi_index = get_vmstate_index(pos);
+		offset = get_vmstate_offset(pos);
+
+		if (size > SD_DATA_OBJ_SIZE)
+			rlen = SD_DATA_OBJ_SIZE;
+		else
+			rlen = size;
+
+		vmstate_oid = get_vmstate_oid(s->inode.oid, vdi_index);
+
+		memset(&hdr, 0, sizeof(hdr));
+
+		hdr.opcode = SD_OP_READ_OBJ;
+		hdr.oid = vmstate_oid;
+		hdr.data_length = rlen;
+		hdr.offset = offset;
+
+		wlen = 0;
+
+		if (do_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen) < 0) {
+			eprintf("illegal vmstate %m\n");
+			ret = -EIO;
+			goto cleanup;
+		}
+
+		pos += rlen;
+		size -= rlen;
+		ret += rlen;
+	}
+
+cleanup:
+	close(fd);
+	return ret;
+}
+
+
 static QEMUOptionParameter sd_create_options[] = {
 	{
 		.name = BLOCK_OPT_SIZE,
@@ -1740,8 +2024,16 @@ BlockDriver bdrv_sheepdog = {
 	.bdrv_aio_writev = sd_aio_writev,
 
 	.bdrv_snapshot_create = sd_snapshot_create,
+	.bdrv_snapshot_goto = sd_snapshot_goto,
+	.bdrv_snapshot_delete   = sd_snapshot_delete,
 	.bdrv_snapshot_list = sd_snapshot_list,
+
+
+	.bdrv_save_vmstate    = sd_save_vmstate,
+	.bdrv_load_vmstate    = sd_load_vmstate,
+
 	.create_options = sd_create_options,
+
 };
 
 static void bdrv_sheepdog_init(void)
-- 
1.7.0




More information about the sheepdog mailing list