[Sheepdog] [PATCH] sheepdog: add live snapshot feature
OZAWA Tsuyoshi
ozawa.tsuyoshi at lab.ntt.co.jp
Mon Apr 19 10:39:14 CEST 2010
This patch provide sheepdog with live snapshot.
NOTE: To work this patch correctly, it's needed to apply the patch
which adds vm_clock_nsec and vm_state_size to sd_inode to collie.
Signed-off-by: OZAWA Tsuyoshi <ozawa.tsuyoshi at lab.ntt.co.jp>
---
block/sheepdog.c | 284 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 280 insertions(+), 4 deletions(-)
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 18ecd22..ded7c75 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -76,6 +76,9 @@
#define SD_RES_VDI_NOT_LOCKED 0x17 /* Vdi is not locked */
#define SD_RES_SHUTDOWN 0x18 /* Sheepdog is shutting down */
+#define VDI_SNAPSHOT_MASK 0x7fffffff00000000
+#define VDI_SNAPSHOT_BIT 0x4000000000000000
+
/* should be configurable? */
#define MAX_RETRIES 6
@@ -209,7 +212,9 @@ struct sd_inode {
uint64_t oid;
uint64_t ctime;
uint64_t snap_ctime;
+ uint64_t vm_clock_nsec;
uint64_t vdi_size;
+ uint64_t vm_state_size;
uint16_t copy_policy;
uint8_t nr_copies;
uint8_t block_size_shift;
@@ -1043,6 +1048,23 @@ static int read_vdi_obj(char *buf, uint64_t oid, int *copies)
}
}
+static void set_vdi_index_and_offset(int *vdi_index, int *offset,
+ int64_t pos, int size)
+{
+ int next_offset, overflow;
+
+ *vdi_index = pos / SD_DATA_OBJ_SIZE;
+ *offset = pos % SD_DATA_OBJ_SIZE;
+ next_offset = *offset + size;
+ overflow = next_offset / SD_DATA_OBJ_SIZE;
+ if (overflow && (next_offset % SD_DATA_OBJ_SIZE != 0)) {
+ /* change to write data to next vdi */
+ (*vdi_index)++;
+ *offset = 0;
+ }
+ return;
+}
+
/* TODO: error cleanups */
static int sd_open(BlockDriverState *bs, const char *filename, int flags)
{
@@ -1572,9 +1594,15 @@ static BlockDriverAIOCB *sd_aio_readv(BlockDriverState *bs,
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
struct bdrv_sd_state *s = bs->opaque;
- int ret;
+ int ret, fd;
+ struct sd_obj_req hdr;
+ unsigned int rlen, wlen;
+ uint64_t new_oid;
+ struct sd_inode *inode;
- eprintf("%s %s %s %d %d\n", sn_info->name, sn_info->id_str,
+ sd_release(bs);
+ eprintf("sn_info: name %s id_str %s s: name %s vm_state_size %d"
+ "is_current %d\n", sn_info->name, sn_info->id_str,
s->name, sn_info->vm_state_size, s->is_current);
if (!s->is_current) {
@@ -1587,12 +1615,156 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
dprintf("%s %s\n", sn_info->name, sn_info->id_str);
- ret = do_sd_create(s->name, sn_info->name, s->inode.vdi_size >> 9,
- s->inode.oid, NULL, 1);
+ s->inode.vm_state_size = sn_info->vm_state_size;
+ s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
+
+ /* refresh inode. */
+ fd = connect_to_vost();
+ if (fd < 0) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.opcode = SD_OP_WRITE_OBJ;
+
+ hdr.oid = s->inode.oid;
+ hdr.copies = s->inode.nr_copies;
+
+ hdr.flags |= SD_FLAG_CMD_WRITE;
+ hdr.data_length = SD_INODE_SIZE;
+ hdr.offset = 0;
+ wlen = SD_INODE_SIZE;
+ rlen = 0;
+
+ ret = do_req(fd, (struct sd_req *)&hdr, &s->inode, &wlen, &rlen);
+ if (ret < 0) {
+ eprintf("do_req write\n");
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ if (fd < 0) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ ret = do_sd_create(s->name, NULL, s->inode.vdi_size >> 9,
+ s->inode.oid, &new_oid, 1);
+ if (ret < 0) {
+ eprintf("do_sd_create %m");
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ inode = (struct sd_inode *)malloc(sizeof(struct sd_inode));
+ if (!inode) {
+ eprintf("malloc %m");
+ goto cleanup;
+ }
+
+ memset(&hdr, 0, sizeof(hdr));
+
+ hdr.opcode = SD_OP_READ_OBJ;
+ hdr.oid = new_oid;
+ hdr.data_length = SD_INODE_SIZE;
+ hdr.offset = 0;
+
+ wlen = 0;
+ rlen = SD_INODE_SIZE;
+
+ ret = do_req(fd, (struct sd_req *)&hdr, inode, &wlen, &rlen);
+ if (ret < 0) {
+ eprintf("do_req read\n");
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ memcpy(&s->inode, inode, sizeof(struct sd_inode));
+ eprintf("s->inode: name %s snap_id %x oid %lxn",
+ s->inode.name, s->inode.snap_id, s->inode.oid);
+
+cleanup:
+ close(fd);
+ return ret;
+}
+
+static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
+{
+ struct bdrv_sd_state *s = bs->opaque;
+ struct bdrv_sd_state *old_s;
+ char vdi[256];
+ char *buf = NULL;
+ uint64_t oid;
+ uint32_t snapid = 0;
+ int ret = -ENOENT, dummy;
+
+
+ old_s = malloc(sizeof(struct bdrv_sd_state));
+ if (!old_s) {
+ eprintf("malloc");
+ goto out;
+ }
+
+ memcpy(old_s, s, sizeof(struct bdrv_sd_state));
+ sd_release(bs);
+
+ snapid = strtol(snapshot_id, NULL, 16);
+ if (!snapid) {
+ eprintf("Invalid snapshot_id\n");
+ goto out;
+ }
+
+ buf = malloc(SD_INODE_SIZE);
+ if (!buf) {
+ eprintf("Failed to allocate memory\n");
+ goto out;
+ }
+ strncpy(vdi, s->name, strlen(s->name)+1);
+ ret = find_vdi_name(s, vdi, snapid, &oid);
+ if (ret) {
+ eprintf("Failed to find_vdi_name\n");
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ret = read_vdi_obj(buf, oid, &dummy);
+ if (ret) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ memcpy(&s->inode, buf, sizeof(s->inode));
+
+ if (!s->inode.vm_state_size) {
+ eprintf("Invalid snapshot\n");
+ ret = -ENOENT;
+ goto out;
+ }
+
+ s->is_current = 0;
+
+ free(buf);
+ free(old_s);
+
+ return 0;
+out:
+ /* recover bdrv_sd_state */
+ memcpy(s, old_s, sizeof(struct bdrv_sd_state));
+ free(buf);
+ free(old_s);
+
+ eprintf("failed to open. recover old bdrv_sd_state.\n");
return ret;
}
+static int sd_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
+{
+ /* FIXME: Delete snapshot specified by snapshot_id */
+ return 0;
+}
+
struct sd_so_req {
uint8_t proto_ver;
uint8_t opcode;
@@ -1696,6 +1868,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
if (!strcmp(inode.name, s->name) && inode.snap_ctime) {
sn_tab[found].date_sec = inode.snap_ctime >> 32;
sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff;
+ sn_tab[found].vm_state_size = inode.vm_state_size;
+ sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec;
snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u",
inode.snap_id);
@@ -1710,6 +1884,100 @@ out:
return found;
}
+static int sd_save_vmstate(BlockDriverState *bs, const uint8_t *data,
+ int64_t pos, int size)
+{
+ struct bdrv_sd_state *s = bs->opaque;
+ struct sd_obj_req hdr;
+ int fd;
+ int ret = 0;
+ unsigned int rlen = 0, wlen = 0;
+ uint64_t sn_oid;
+ int vdi_index, offset;
+
+ set_vdi_index_and_offset(&vdi_index, &offset, pos, size);
+
+ sn_oid = VDI_SNAPSHOT_MASK & s->inode.oid;
+ sn_oid |= VDI_SNAPSHOT_BIT;
+ sn_oid |= vdi_index;
+
+ fd = connect_to_vost();
+ if (fd < 0) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ memset(&hdr, 0, sizeof(hdr));
+ if (offset)
+ hdr.opcode = SD_OP_WRITE_OBJ;
+ else
+ hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
+
+ hdr.oid = sn_oid;
+ hdr.cow_oid = 0;
+ hdr.copies = s->inode.nr_copies;
+
+ hdr.flags |= SD_FLAG_CMD_WRITE;
+ hdr.data_length = size;
+ hdr.offset = offset;
+ wlen = size;
+ rlen = 0;
+
+ ret = do_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen);
+ if (ret < 0) {
+ eprintf("do_req %m");
+ ret = -EIO;
+ }
+
+cleanup:
+ close(fd);
+ return ret;
+}
+
+static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
+ int64_t pos, int size)
+{
+ struct bdrv_sd_state *s = bs->opaque;
+ int ret = 0;
+ int fd;
+ unsigned int rlen = 0, wlen = 0;
+ struct sd_obj_req hdr;
+ uint64_t sn_oid;
+ int vdi_index, offset;
+
+ set_vdi_index_and_offset(&vdi_index, &offset, pos, size);
+
+ sn_oid = VDI_SNAPSHOT_MASK & s->inode.oid;
+ sn_oid |= VDI_SNAPSHOT_BIT;
+ sn_oid |= vdi_index;
+
+ fd = connect_to_vost();
+ if (fd < 0) {
+ ret = -EIO;
+ goto cleanup;
+ }
+ memset(&hdr, 0, sizeof(hdr));
+
+ hdr.opcode = SD_OP_READ_OBJ;
+ hdr.oid = sn_oid;
+ hdr.data_length = size;
+ hdr.offset = offset;
+
+ wlen = 0;
+ rlen = size;
+
+ ret = do_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen);
+ if (ret < 0) {
+ eprintf("do_req %m");
+ ret = -EIO;
+ }
+
+cleanup:
+ close(fd);
+ return rlen;
+}
+
+
static QEMUOptionParameter sd_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@@ -1738,8 +2006,16 @@ BlockDriver bdrv_sheepdog = {
.bdrv_aio_writev = sd_aio_writev,
.bdrv_snapshot_create = sd_snapshot_create,
+ .bdrv_snapshot_goto = sd_snapshot_goto,
+ .bdrv_snapshot_delete = sd_snapshot_delete,
.bdrv_snapshot_list = sd_snapshot_list,
+
+
+ .bdrv_save_vmstate = sd_save_vmstate,
+ .bdrv_load_vmstate = sd_load_vmstate,
+
.create_options = sd_create_options,
+
};
static void bdrv_sheepdog_init(void)
--
1.6.5
More information about the sheepdog
mailing list