Support savevm & loadvm command in qemu monitor for sheepdog. Special VDI space is needed to save & load vmstate. This version use unused VDI space VDI_VMSTATE_BIT 0x4000000000000000 to do this. Signed-off-by: OZAWA Tsuyoshi <ozawa.tsuyoshi at lab.ntt.co.jp> --- block/sheepdog.c | 300 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 296 insertions(+), 4 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index ea81c34..c129d5d 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -76,6 +76,9 @@ #define SD_RES_VDI_NOT_LOCKED 0x17 /* Vdi is not locked */ #define SD_RES_SHUTDOWN 0x18 /* Sheepdog is shutting down */ +#define VDI_VMSTATE_MASK 0x7fffffff00000000 +#define VDI_VMSTATE_BIT 0x4000000000000000 + /* should be configurable? */ #define MAX_RETRIES 6 @@ -209,7 +212,9 @@ struct sd_inode { uint64_t oid; uint64_t ctime; uint64_t snap_ctime; + uint64_t vm_clock_nsec; uint64_t vdi_size; + uint64_t vm_state_size; uint16_t copy_policy; uint8_t nr_copies; uint8_t block_size_shift; @@ -1045,6 +1050,27 @@ static int read_vdi_obj(char *buf, uint64_t oid, int *copies) } } +static uint32_t get_vmstate_index(uint64_t pos) +{ + return pos / SD_DATA_OBJ_SIZE; +} + +static uint64_t get_vmstate_offset(uint64_t pos) +{ + return pos % SD_DATA_OBJ_SIZE; +} + +static uint64_t get_vmstate_oid(uint64_t oid, uint32_t vdi_index) +{ + uint64_t vmstate_oid; + + vmstate_oid = VDI_VMSTATE_MASK & oid; + vmstate_oid |= VDI_VMSTATE_BIT; + vmstate_oid |= vdi_index; + + return vmstate_oid; +} + /* TODO: error cleanups */ static int sd_open(BlockDriverState *bs, const char *filename, int flags) { @@ -1574,9 +1600,16 @@ static BlockDriverAIOCB *sd_aio_readv(BlockDriverState *bs, static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) { struct bdrv_sd_state *s = bs->opaque; - int ret; + int ret, fd, dummy; + struct sd_obj_req hdr; + unsigned int rlen, wlen; + uint64_t new_oid; + struct sd_inode *inode; + + sd_release(bs); - eprintf("%s %s %s %d %d\n", sn_info->name, sn_info->id_str, + eprintf("sn_info: name %s id_str %s s: name %s vm_state_size %d " + "is_current %d\n", sn_info->name, sn_info->id_str, s->name, sn_info->vm_state_size, s->is_current); if (!s->is_current) { @@ -1589,12 +1622,145 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) dprintf("%s %s\n", sn_info->name, sn_info->id_str); - ret = do_sd_create(s->name, sn_info->name, s->inode.vdi_size >> 9, - s->inode.oid, NULL, 1); + s->inode.vm_state_size = sn_info->vm_state_size; + s->inode.vm_clock_nsec = sn_info->vm_clock_nsec; + + /* refresh inode. */ + fd = connect_to_vost(); + if (fd < 0) { + ret = -EIO; + goto cleanup; + } + + memset(&hdr, 0, sizeof(hdr)); + hdr.opcode = SD_OP_WRITE_OBJ; + + hdr.oid = s->inode.oid; + hdr.copies = s->inode.nr_copies; + + hdr.flags |= SD_FLAG_CMD_WRITE; + hdr.data_length = SD_INODE_SIZE; + hdr.offset = 0; + wlen = SD_INODE_SIZE; + rlen = 0; + + ret = do_req(fd, (struct sd_req *)&hdr, &s->inode, &wlen, &rlen); + if (ret < 0) { + eprintf("failed to write snapshot's inode.\n"); + ret = -EIO; + goto cleanup; + } + + if (fd < 0) { + ret = -EIO; + goto cleanup; + } + + ret = do_sd_create(s->name, NULL, s->inode.vdi_size >> 9, + s->inode.oid, &new_oid, 1); + if (ret < 0) { + eprintf("failed to create inode for snapshot. %m\n"); + ret = -EIO; + goto cleanup; + } + + inode = (struct sd_inode *)malloc(sizeof(struct sd_inode)); + if (!inode) { + eprintf("failed to allocate memory for inode. %m\n"); + goto cleanup; + } + + if (read_vdi_obj((char *)inode, new_oid, &dummy) < 0) { + eprintf("failed to read new inode info. %m\n"); + ret = -EIO; + goto cleanup; + } + + memcpy(&s->inode, inode, sizeof(struct sd_inode)); + eprintf("s->inode: name %s snap_id %x oid %lx\n", + s->inode.name, s->inode.snap_id, s->inode.oid); +cleanup: + close(fd); return ret; } +static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) +{ + struct bdrv_sd_state *s = bs->opaque; + struct bdrv_sd_state *old_s; + char vdi[256]; + char *buf = NULL; + uint64_t oid; + uint32_t snapid = 0; + int ret = -ENOENT, dummy; + + sd_release(bs); + + old_s = malloc(sizeof(struct bdrv_sd_state)); + if (!old_s) { + eprintf("failed to allocate memory for old state. %m\n"); + goto out; + } + + memcpy(old_s, s, sizeof(struct bdrv_sd_state)); + + snapid = strtol(snapshot_id, NULL, 10); + if (!snapid) { + eprintf("Invalid snapshot_id\n"); + goto out; + } + + buf = malloc(SD_INODE_SIZE); + if (!buf) { + eprintf("Failed to allocate memory\n"); + goto out; + } + strncpy(vdi, s->name, strlen(s->name)+1); + ret = find_vdi_name(s, vdi, snapid, &oid); + if (ret) { + eprintf("Failed to find_vdi_name\n"); + ret = -ENOENT; + goto out; + } + + ret = read_vdi_obj(buf, oid, &dummy); + if (ret) { + ret = -ENOENT; + goto out; + } + + memcpy(&s->inode, buf, sizeof(s->inode)); + + if (!s->inode.vm_state_size) { + eprintf("Invalid snapshot\n"); + ret = -ENOENT; + goto out; + } + + s->is_current = 0; + + free(buf); + free(old_s); + + return 0; +out: + /* recover bdrv_sd_state */ + memcpy(s, old_s, sizeof(struct bdrv_sd_state)); + free(buf); + free(old_s); + + eprintf("failed to open. recover old bdrv_sd_state.\n"); + + return ret; +} + +static int sd_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) +{ + /* FIXME: Delete specified snapshot id. */ + return 0; +} + struct sd_so_req { uint8_t proto_ver; uint8_t opcode; @@ -1698,6 +1864,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) if (!strcmp(inode.name, s->name) && inode.snap_ctime) { sn_tab[found].date_sec = inode.snap_ctime >> 32; sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff; + sn_tab[found].vm_state_size = inode.vm_state_size; + sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec; snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u", inode.snap_id); @@ -1712,6 +1880,122 @@ out: return found; } +static int sd_save_vmstate(BlockDriverState *bs, const uint8_t *data, + int64_t pos, int size) +{ + struct bdrv_sd_state *s = bs->opaque; + struct sd_obj_req hdr; + int fd; + int ret = 0; + unsigned int rlen = 0, wlen = 0; + uint64_t vmstate_oid; + uint32_t vdi_index; + uint64_t offset; + + fd = connect_to_vost(); + if (fd < 0) { + ret = -EIO; + goto cleanup; + } + + while (size) { + vdi_index = get_vmstate_index(pos); + offset = get_vmstate_offset(pos); + + if (size > SD_DATA_OBJ_SIZE) + wlen = SD_DATA_OBJ_SIZE; + else + wlen = size; + + vmstate_oid = get_vmstate_oid(s->inode.oid, vdi_index); + + memset(&hdr, 0, sizeof(hdr)); + if (offset) + hdr.opcode = SD_OP_WRITE_OBJ; + else + hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; + + hdr.oid = vmstate_oid; + hdr.cow_oid = 0; + hdr.copies = s->inode.nr_copies; + + hdr.flags |= SD_FLAG_CMD_WRITE; + hdr.data_length = wlen; + hdr.offset = offset; + + rlen = 0; + + if (do_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen) < 0) { + eprintf("failed to save vmstate %m\n"); + ret = -EIO; + goto cleanup; + } + + pos += wlen; + size -= wlen; + ret += wlen; + } + +cleanup: + close(fd); + return ret; +} + +static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data, + int64_t pos, int size) +{ + struct bdrv_sd_state *s = bs->opaque; + int ret = 0; + int fd; + unsigned int rlen = 0, wlen = 0; + struct sd_obj_req hdr; + uint64_t vmstate_oid; + uint32_t vdi_index; + uint64_t offset; + + fd = connect_to_vost(); + if (fd < 0) { + ret = -EIO; + goto cleanup; + } + + while (size) { + vdi_index = get_vmstate_index(pos); + offset = get_vmstate_offset(pos); + + if (size > SD_DATA_OBJ_SIZE) + rlen = SD_DATA_OBJ_SIZE; + else + rlen = size; + + vmstate_oid = get_vmstate_oid(s->inode.oid, vdi_index); + + memset(&hdr, 0, sizeof(hdr)); + + hdr.opcode = SD_OP_READ_OBJ; + hdr.oid = vmstate_oid; + hdr.data_length = rlen; + hdr.offset = offset; + + wlen = 0; + + if (do_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen) < 0) { + eprintf("illegal vmstate %m\n"); + ret = -EIO; + goto cleanup; + } + + pos += rlen; + size -= rlen; + ret += rlen; + } + +cleanup: + close(fd); + return ret; +} + + static QEMUOptionParameter sd_create_options[] = { { .name = BLOCK_OPT_SIZE, @@ -1740,8 +2024,16 @@ BlockDriver bdrv_sheepdog = { .bdrv_aio_writev = sd_aio_writev, .bdrv_snapshot_create = sd_snapshot_create, + .bdrv_snapshot_goto = sd_snapshot_goto, + .bdrv_snapshot_delete = sd_snapshot_delete, .bdrv_snapshot_list = sd_snapshot_list, + + + .bdrv_save_vmstate = sd_save_vmstate, + .bdrv_load_vmstate = sd_load_vmstate, + .create_options = sd_create_options, + }; static void bdrv_sheepdog_init(void) -- 1.7.0 |