[Sheepdog] [PATCH v2] sheepdog:add live snapshot feature
OZAWA Tsuyoshi
ozawa.tsuyoshi at lab.ntt.co.jp
Wed Apr 21 14:41:53 CEST 2010
Support savevm & loadvm command in qemu monitor for sheepdog.
Special VDI space is needed to save & load vmstate.
This version use unused VDI space
VDI_VMSTATE_BIT 0x4000000000000000
to do this.
Signed-off-by: OZAWA Tsuyoshi <ozawa.tsuyoshi at lab.ntt.co.jp>
---
block/sheepdog.c | 300 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 296 insertions(+), 4 deletions(-)
diff --git a/block/sheepdog.c b/block/sheepdog.c
index ea81c34..c129d5d 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -76,6 +76,9 @@
#define SD_RES_VDI_NOT_LOCKED 0x17 /* Vdi is not locked */
#define SD_RES_SHUTDOWN 0x18 /* Sheepdog is shutting down */
+#define VDI_VMSTATE_MASK 0x7fffffff00000000
+#define VDI_VMSTATE_BIT 0x4000000000000000
+
/* should be configurable? */
#define MAX_RETRIES 6
@@ -209,7 +212,9 @@ struct sd_inode {
uint64_t oid;
uint64_t ctime;
uint64_t snap_ctime;
+ uint64_t vm_clock_nsec;
uint64_t vdi_size;
+ uint64_t vm_state_size;
uint16_t copy_policy;
uint8_t nr_copies;
uint8_t block_size_shift;
@@ -1045,6 +1050,27 @@ static int read_vdi_obj(char *buf, uint64_t oid, int *copies)
}
}
+static uint32_t get_vmstate_index(uint64_t pos)
+{
+ return pos / SD_DATA_OBJ_SIZE;
+}
+
+static uint64_t get_vmstate_offset(uint64_t pos)
+{
+ return pos % SD_DATA_OBJ_SIZE;
+}
+
+static uint64_t get_vmstate_oid(uint64_t oid, uint32_t vdi_index)
+{
+ uint64_t vmstate_oid;
+
+ vmstate_oid = VDI_VMSTATE_MASK & oid;
+ vmstate_oid |= VDI_VMSTATE_BIT;
+ vmstate_oid |= vdi_index;
+
+ return vmstate_oid;
+}
+
/* TODO: error cleanups */
static int sd_open(BlockDriverState *bs, const char *filename, int flags)
{
@@ -1574,9 +1600,16 @@ static BlockDriverAIOCB *sd_aio_readv(BlockDriverState *bs,
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
struct bdrv_sd_state *s = bs->opaque;
- int ret;
+ int ret, fd, dummy;
+ struct sd_obj_req hdr;
+ unsigned int rlen, wlen;
+ uint64_t new_oid;
+ struct sd_inode *inode;
+
+ sd_release(bs);
- eprintf("%s %s %s %d %d\n", sn_info->name, sn_info->id_str,
+ eprintf("sn_info: name %s id_str %s s: name %s vm_state_size %d "
+ "is_current %d\n", sn_info->name, sn_info->id_str,
s->name, sn_info->vm_state_size, s->is_current);
if (!s->is_current) {
@@ -1589,12 +1622,145 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
dprintf("%s %s\n", sn_info->name, sn_info->id_str);
- ret = do_sd_create(s->name, sn_info->name, s->inode.vdi_size >> 9,
- s->inode.oid, NULL, 1);
+ s->inode.vm_state_size = sn_info->vm_state_size;
+ s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
+
+ /* refresh inode. */
+ fd = connect_to_vost();
+ if (fd < 0) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.opcode = SD_OP_WRITE_OBJ;
+
+ hdr.oid = s->inode.oid;
+ hdr.copies = s->inode.nr_copies;
+
+ hdr.flags |= SD_FLAG_CMD_WRITE;
+ hdr.data_length = SD_INODE_SIZE;
+ hdr.offset = 0;
+ wlen = SD_INODE_SIZE;
+ rlen = 0;
+
+ ret = do_req(fd, (struct sd_req *)&hdr, &s->inode, &wlen, &rlen);
+ if (ret < 0) {
+ eprintf("failed to write snapshot's inode.\n");
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ if (fd < 0) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ ret = do_sd_create(s->name, NULL, s->inode.vdi_size >> 9,
+ s->inode.oid, &new_oid, 1);
+ if (ret < 0) {
+ eprintf("failed to create inode for snapshot. %m\n");
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ inode = (struct sd_inode *)malloc(sizeof(struct sd_inode));
+ if (!inode) {
+ eprintf("failed to allocate memory for inode. %m\n");
+ goto cleanup;
+ }
+
+ if (read_vdi_obj((char *)inode, new_oid, &dummy) < 0) {
+ eprintf("failed to read new inode info. %m\n");
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ memcpy(&s->inode, inode, sizeof(struct sd_inode));
+ eprintf("s->inode: name %s snap_id %x oid %lx\n",
+ s->inode.name, s->inode.snap_id, s->inode.oid);
+cleanup:
+ close(fd);
return ret;
}
+static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
+{
+ struct bdrv_sd_state *s = bs->opaque;
+ struct bdrv_sd_state *old_s;
+ char vdi[256];
+ char *buf = NULL;
+ uint64_t oid;
+ uint32_t snapid = 0;
+ int ret = -ENOENT, dummy;
+
+ sd_release(bs);
+
+ old_s = malloc(sizeof(struct bdrv_sd_state));
+ if (!old_s) {
+ eprintf("failed to allocate memory for old state. %m\n");
+ goto out;
+ }
+
+ memcpy(old_s, s, sizeof(struct bdrv_sd_state));
+
+ snapid = strtol(snapshot_id, NULL, 10);
+ if (!snapid) {
+ eprintf("Invalid snapshot_id\n");
+ goto out;
+ }
+
+ buf = malloc(SD_INODE_SIZE);
+ if (!buf) {
+ eprintf("Failed to allocate memory\n");
+ goto out;
+ }
+ strncpy(vdi, s->name, strlen(s->name)+1);
+ ret = find_vdi_name(s, vdi, snapid, &oid);
+ if (ret) {
+ eprintf("Failed to find_vdi_name\n");
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ret = read_vdi_obj(buf, oid, &dummy);
+ if (ret) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ memcpy(&s->inode, buf, sizeof(s->inode));
+
+ if (!s->inode.vm_state_size) {
+ eprintf("Invalid snapshot\n");
+ ret = -ENOENT;
+ goto out;
+ }
+
+ s->is_current = 0;
+
+ free(buf);
+ free(old_s);
+
+ return 0;
+out:
+ /* recover bdrv_sd_state */
+ memcpy(s, old_s, sizeof(struct bdrv_sd_state));
+ free(buf);
+ free(old_s);
+
+ eprintf("failed to open. recover old bdrv_sd_state.\n");
+
+ return ret;
+}
+
+static int sd_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
+{
+ /* FIXME: Delete specified snapshot id. */
+ return 0;
+}
+
struct sd_so_req {
uint8_t proto_ver;
uint8_t opcode;
@@ -1698,6 +1864,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
if (!strcmp(inode.name, s->name) && inode.snap_ctime) {
sn_tab[found].date_sec = inode.snap_ctime >> 32;
sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff;
+ sn_tab[found].vm_state_size = inode.vm_state_size;
+ sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec;
snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u",
inode.snap_id);
@@ -1712,6 +1880,122 @@ out:
return found;
}
+static int sd_save_vmstate(BlockDriverState *bs, const uint8_t *data,
+ int64_t pos, int size)
+{
+ struct bdrv_sd_state *s = bs->opaque;
+ struct sd_obj_req hdr;
+ int fd;
+ int ret = 0;
+ unsigned int rlen = 0, wlen = 0;
+ uint64_t vmstate_oid;
+ uint32_t vdi_index;
+ uint64_t offset;
+
+ fd = connect_to_vost();
+ if (fd < 0) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ while (size) {
+ vdi_index = get_vmstate_index(pos);
+ offset = get_vmstate_offset(pos);
+
+ if (size > SD_DATA_OBJ_SIZE)
+ wlen = SD_DATA_OBJ_SIZE;
+ else
+ wlen = size;
+
+ vmstate_oid = get_vmstate_oid(s->inode.oid, vdi_index);
+
+ memset(&hdr, 0, sizeof(hdr));
+ if (offset)
+ hdr.opcode = SD_OP_WRITE_OBJ;
+ else
+ hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
+
+ hdr.oid = vmstate_oid;
+ hdr.cow_oid = 0;
+ hdr.copies = s->inode.nr_copies;
+
+ hdr.flags |= SD_FLAG_CMD_WRITE;
+ hdr.data_length = wlen;
+ hdr.offset = offset;
+
+ rlen = 0;
+
+ if (do_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen) < 0) {
+ eprintf("failed to save vmstate %m\n");
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ pos += wlen;
+ size -= wlen;
+ ret += wlen;
+ }
+
+cleanup:
+ close(fd);
+ return ret;
+}
+
+static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
+ int64_t pos, int size)
+{
+ struct bdrv_sd_state *s = bs->opaque;
+ int ret = 0;
+ int fd;
+ unsigned int rlen = 0, wlen = 0;
+ struct sd_obj_req hdr;
+ uint64_t vmstate_oid;
+ uint32_t vdi_index;
+ uint64_t offset;
+
+ fd = connect_to_vost();
+ if (fd < 0) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ while (size) {
+ vdi_index = get_vmstate_index(pos);
+ offset = get_vmstate_offset(pos);
+
+ if (size > SD_DATA_OBJ_SIZE)
+ rlen = SD_DATA_OBJ_SIZE;
+ else
+ rlen = size;
+
+ vmstate_oid = get_vmstate_oid(s->inode.oid, vdi_index);
+
+ memset(&hdr, 0, sizeof(hdr));
+
+ hdr.opcode = SD_OP_READ_OBJ;
+ hdr.oid = vmstate_oid;
+ hdr.data_length = rlen;
+ hdr.offset = offset;
+
+ wlen = 0;
+
+ if (do_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen) < 0) {
+ eprintf("illegal vmstate %m\n");
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ pos += rlen;
+ size -= rlen;
+ ret += rlen;
+ }
+
+cleanup:
+ close(fd);
+ return ret;
+}
+
+
static QEMUOptionParameter sd_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@@ -1740,8 +2024,16 @@ BlockDriver bdrv_sheepdog = {
.bdrv_aio_writev = sd_aio_writev,
.bdrv_snapshot_create = sd_snapshot_create,
+ .bdrv_snapshot_goto = sd_snapshot_goto,
+ .bdrv_snapshot_delete = sd_snapshot_delete,
.bdrv_snapshot_list = sd_snapshot_list,
+
+
+ .bdrv_save_vmstate = sd_save_vmstate,
+ .bdrv_load_vmstate = sd_load_vmstate,
+
.create_options = sd_create_options,
+
};
static void bdrv_sheepdog_init(void)
--
1.7.0
More information about the sheepdog
mailing list