[Sheepdog] [PATCH] sheepdog: shrink vdi object size

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Fri Apr 30 02:01:49 CEST 2010


64 bit oid field in the struct sd_inode is redundant,
because we can derive its lower 32 bit. This patch reduces
the vdi object size and increases metadata operation performance.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 block/sheepdog.c |  126 +++++++++++++++++++++++++----------------------------
 1 files changed, 59 insertions(+), 67 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 780dc5e..f8863f8 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -76,11 +76,10 @@
 #define SD_RES_VDI_NOT_LOCKED   0x17 /* Vdi is not locked */
 #define SD_RES_SHUTDOWN      0x18 /* Sheepdog is shutting down */
 
-#define VDI_VMSTATE_BIT 	0x4000000000000000
-
 #define SD_NR_VDIS   (1U << 24)
 #define VDI_SPACE_SHIFT   32
 #define VDI_BIT (UINT64_C(1) << 63)
+#define VMSTATE_BIT (UINT64_C(1) << 62)
 #define DEAFAULT_NR_COPIES 1
 #define SD_MAX_VDI_LEN 256
 #define MAX_DATA_OBJS (1ULL << 20)
@@ -159,11 +158,11 @@ struct sd_vdi_req {
 	uint32_t	epoch;
 	uint32_t        id;
 	uint32_t        data_length;
-	uint64_t        base_oid;
 	uint64_t	vdi_size;
+	uint32_t        base_vdi_id;
 	uint32_t        copies;
 	uint32_t        snapid;
-	uint32_t        pad[2];
+	uint32_t        pad[3];
 };
 
 struct sd_vdi_rsp {
@@ -175,13 +174,12 @@ struct sd_vdi_rsp {
 	uint32_t        data_length;
 	uint32_t        result;
 	uint32_t        rsvd;
-	uint64_t        oid;
-	uint32_t        pad[4];
+	uint32_t        vdi_id;
+	uint32_t        pad[5];
 };
 
 struct sd_inode {
 	char name[SD_MAX_VDI_LEN];
-	uint64_t oid;
 	uint64_t ctime;
 	uint64_t snap_ctime;
 	uint64_t vm_clock_nsec;
@@ -191,9 +189,10 @@ struct sd_inode {
 	uint8_t  nr_copies;
 	uint8_t  block_size_shift;
 	uint32_t snap_id;
-	uint64_t parent_oid;
-	uint64_t child_oid[MAX_CHILDREN];
-	uint64_t data_oid[MAX_DATA_OBJS];
+	uint32_t vdi_id;
+	uint32_t parent_vdi_id;
+	uint32_t child_vdi_id[MAX_CHILDREN];
+	uint32_t data_vdi_id[MAX_DATA_OBJS];
 };
 
 struct aio_req {
@@ -306,23 +305,22 @@ static inline int after(uint32_t seq1, uint32_t seq2)
 
 static inline int is_data_obj_writeable(struct sd_inode *inode, unsigned int idx)
 {
-	return ((inode->oid & ~VDI_BIT) >> VDI_SPACE_SHIFT) ==
-		(inode->data_oid[idx] >> VDI_SPACE_SHIFT);
+	return inode->vdi_id == inode->data_vdi_id[idx];
 }
 
-static inline int is_data_obj(uint64_t oid)
+static inline uint64_t vid_to_vdi_oid(uint32_t vid)
 {
-	return !(VDI_BIT & oid);
+	return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT);
 }
 
-static inline uint64_t to_data_oid(uint64_t vdi_oid, unsigned int idx)
+static inline uint64_t vid_to_vmstate_oid(uint32_t vid, uint32_t idx)
 {
-	return (vdi_oid & ~VDI_BIT) | idx;
+	return VMSTATE_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
 }
 
-static inline uint64_t to_vmstate_oid(uint64_t vdi_oid, unsigned int idx)
+static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx)
 {
-	return (vdi_oid & ~VDI_BIT) | VDI_VMSTATE_BIT | idx;
+	return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
 }
 
 /*
@@ -694,7 +692,6 @@ static void aio_read_response(void *opaque)
 	struct sd_aiocb *acb;
 	int rest;
 	unsigned long idx;
-	uint64_t oid;
 
 	if (!nr_outstanding_aio_req(s))
 		return;
@@ -711,13 +708,13 @@ static void aio_read_response(void *opaque)
 	switch (acb->aiocb_type) {
 	case AIOCB_WRITE_UDATA:
 		idx = acb->sector_num * 512 / SD_DATA_OBJ_SIZE;
-		oid = to_data_oid(s->inode.oid, idx);
 
-		if (s->inode.data_oid[idx] != oid) {
-			s->inode.data_oid[idx] = oid;
+		if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) {
+			s->inode.data_vdi_id[idx] = s->inode.vdi_id;
 			s->inode_dirty = 1;
 
-			send_pending_req(s, oid, rsp->id);
+			send_pending_req(s, vid_to_data_oid(s->inode.vdi_id, idx),
+					 rsp->id);
 		}
 		break;
 	case AIOCB_READ_UDATA:
@@ -829,7 +826,7 @@ static int parse_vdiname(const char *filename, char *vdi, int vdi_len,
 }
 
 static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint32_t snapid,
-			 uint64_t *oid)
+			 uint32_t *vid)
 {
 	int ret, fd;
 	struct sd_vdi_req hdr;
@@ -860,7 +857,7 @@ static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint32_t snapi
 		ret = -1;
 		goto out;
 	}
-	*oid = rsp->oid;
+	*vid = rsp->vdi_id;
 
 	ret = 0;
 out:
@@ -932,7 +929,7 @@ static int add_aio_request(struct bdrv_sd_state *s, struct aio_req *aio_req,
 	return 0;
 }
 
-static int read_vdi_obj(char *buf, uint64_t oid, int *copies)
+static int read_vdi_obj(char *buf, uint32_t vid, int *copies)
 {
 	struct sd_obj_req hdr;
 	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
@@ -945,7 +942,7 @@ static int read_vdi_obj(char *buf, uint64_t oid, int *copies)
 	memset(&hdr, 0, sizeof(hdr));
 
 	hdr.opcode = SD_OP_READ_OBJ;
-	hdr.oid = oid;
+	hdr.oid = vid_to_vdi_oid(vid);
 	hdr.data_length = rlen;
 
 	fd = connect_to_vost();
@@ -977,7 +974,7 @@ static int read_vdi_obj(char *buf, uint64_t oid, int *copies)
 static int sd_open(BlockDriverState *bs, const char *filename, int flags)
 {
 	int ret, i;
-	uint64_t oid = 0;
+	uint32_t vid = 0;
 	struct bdrv_sd_state *s = bs->opaque;
 	char vdi[256];
 	uint32_t snapid;
@@ -1006,16 +1003,16 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
 	if (parse_vdiname(filename, vdi, sizeof(vdi), &snapid) < 0)
 		goto out;
 
-	ret = find_vdi_name(s, vdi, snapid, &oid);
+	ret = find_vdi_name(s, vdi, snapid, &vid);
 	if (ret)
 		goto out;
 
 	if (snapid)
-		eprintf("%" PRIx64 " non current inode was open.\n", oid);
+		eprintf("%" PRIx32 " non current inode was open.\n", vid);
 	else
 		s->is_current = 1;
 
-	ret = read_vdi_obj(buf, oid, &dummy);
+	ret = read_vdi_obj(buf, vid, &dummy);
 	if (ret)
 		goto out;
 
@@ -1034,7 +1031,7 @@ out:
 }
 
 static int do_sd_create(char *filename, char *tag, int64_t total_sectors,
-			uint64_t base_oid, uint64_t *oid, int snapshot)
+			uint32_t base_vid, uint32_t *vdi_id, int snapshot)
 {
 	struct sd_vdi_req hdr;
 	struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
@@ -1050,7 +1047,7 @@ static int do_sd_create(char *filename, char *tag, int64_t total_sectors,
 
 	memset(&hdr, 0, sizeof(hdr));
 	hdr.opcode = SD_OP_NEW_VDI;
-	hdr.base_oid = base_oid;
+	hdr.base_vdi_id = base_vid;
 
 	wlen = SD_MAX_VDI_LEN;
 
@@ -1072,8 +1069,8 @@ static int do_sd_create(char *filename, char *tag, int64_t total_sectors,
 		return -1;
 	}
 
-	if (oid)
-		*oid = rsp->oid;
+	if (vdi_id)
+		*vdi_id = rsp->vdi_id;
 
 	return 0;
 }
@@ -1081,7 +1078,7 @@ static int do_sd_create(char *filename, char *tag, int64_t total_sectors,
 static int sd_create(const char *filename, QEMUOptionParameter *options)
 {
 	int ret;
-	uint64_t oid = 0;
+	uint32_t vid = 0;
 	int64_t total_sectors = 0;
 	char *backing_file = NULL;
 
@@ -1116,12 +1113,12 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
 		if (snapid == CURRENT_VDI_ID)
 			return -1;
 
-		ret = find_vdi_name(bs.opaque, vdi, snapid, &oid);
+		ret = find_vdi_name(bs.opaque, vdi, snapid, &vid);
 		if (ret)
 			return -1;
 	}
 
-	return do_sd_create((char *)filename, NULL, total_sectors, oid, NULL, 0);
+	return do_sd_create((char *)filename, NULL, total_sectors, vid, NULL, 0);
 }
 
 static void sd_close(BlockDriverState *bs)
@@ -1220,8 +1217,8 @@ static void sd_write_done(struct sd_aiocb *acb)
 		s->inode_dirty = 0;
 		iov.iov_base = &s->inode;
 		iov.iov_len = sizeof(s->inode);
-		aio_req = alloc_aio_req(s, acb, s->inode.oid, sizeof(s->inode),
-					0, 0, 0, 0);
+		aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
+					sizeof(s->inode), 0, 0, 0, 0);
 		if (!aio_req) {
 			eprintf("too many requests\n");
 			acb->ret = -EIO;
@@ -1245,25 +1242,25 @@ out:
 static int sd_create_branch(struct bdrv_sd_state *s)
 {
 	int ret, copies;
-	uint64_t oid;
+	uint32_t vid;
 	char *buf;
 
-	eprintf("%" PRIx64 " is not current.\n", s->inode.oid);
+	eprintf("%" PRIx32 " is not current.\n", s->inode.vdi_id);
 
 	buf = malloc(SD_INODE_SIZE);
 	if (!buf)
 		return -1;
 
 	ret = do_sd_create(s->name, NULL, s->inode.vdi_size >> 9,
-			   s->inode.oid, &oid, 1);
+			   s->inode.vdi_id, &vid, 1);
 	if (ret)
 		goto out;
 
-	eprintf("%" PRIx64 " is created.\n", oid);
+	eprintf("%" PRIx32 " is created.\n", vid);
 
 	copies = s->inode.nr_copies;
 
-	ret = read_vdi_obj(buf, oid, &copies);
+	ret = read_vdi_obj(buf, vid, &copies);
 	if (ret < 0)
 		goto out;
 
@@ -1271,7 +1268,7 @@ static int sd_create_branch(struct bdrv_sd_state *s)
 
 	s->is_current = 1;
 	ret = 0;
-	eprintf("%" PRIx64 " was newly created.\n", s->inode.oid);
+	eprintf("%" PRIx32 " was newly created.\n", s->inode.vdi_id);
 
 out:
 	free(buf);
@@ -1307,11 +1304,11 @@ static void sd_readv_writev_bh_cb(void *p)
 		uint64_t old_oid = 0;
 		int create = 0;
 
-		oid = inode->data_oid[idx];
+		oid = vid_to_data_oid(inode->data_vdi_id[idx], idx);
 
 		len = min_t(unsigned long, total - done, SD_DATA_OBJ_SIZE - offset);
 
-		if (!oid) {
+		if (!inode->data_vdi_id[idx]) {
 			if (acb->aiocb_type == AIOCB_READ_UDATA)
 				goto done;
 
@@ -1327,7 +1324,7 @@ static void sd_readv_writev_bh_cb(void *p)
 			dprintf("update ino (%" PRIu64") %"
 				PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 				inode->oid, oid, to_data_oid(inode->oid, idx), idx);
-			oid = to_data_oid(inode->oid, idx);
+			oid = vid_to_data_oid(inode->vdi_id, idx);
 			dprintf("new oid %lx\n", oid);
 		}
 
@@ -1418,7 +1415,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 	int ret, fd, dummy;
 	struct sd_obj_req hdr;
 	unsigned int rlen, wlen;
-	uint64_t new_oid;
+	uint32_t new_vid;
 	struct sd_inode *inode;
 
 	sd_release(bs);
@@ -1429,8 +1426,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 
 	if (!s->is_current) {
 		eprintf("You can't create a snapshot of "
-			"a non current VDI, %s (%" PRIu64 ").\n",
-			s->name, s->inode.oid);
+			"a non current VDI, %s (%" PRIu32 ").\n",
+			s->name, s->inode.vdi_id);
 
 		return -1;
 	}
@@ -1450,7 +1447,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 	memset(&hdr, 0, sizeof(hdr));
 	hdr.opcode = SD_OP_WRITE_OBJ;
 
-	hdr.oid = s->inode.oid;
+	hdr.oid = vid_to_vdi_oid(s->inode.vdi_id);
 	hdr.copies = s->inode.nr_copies;
 
 	hdr.flags |= SD_FLAG_CMD_WRITE;
@@ -1472,7 +1469,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 	}
 
 	ret = do_sd_create(s->name, NULL, s->inode.vdi_size >> 9,
-			   s->inode.oid, &new_oid, 1);
+			   s->inode.vdi_id, &new_vid, 1);
 	if (ret < 0) {
 		eprintf("failed to create inode for snapshot. %m\n");
 		ret = -EIO;
@@ -1485,15 +1482,15 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 		goto cleanup;
 	}
 
-	if (read_vdi_obj((char *)inode, new_oid, &dummy) < 0) {
+	if (read_vdi_obj((char *)inode, new_vid, &dummy) < 0) {
 		eprintf("failed to read new inode info. %m\n");
 		ret = -EIO;
 		goto cleanup;
 	}
 
 	memcpy(&s->inode, inode, sizeof(struct sd_inode));
-	eprintf("s->inode: name %s snap_id %x oid %lx\n",
-		s->inode.name, s->inode.snap_id, s->inode.oid);
+	eprintf("s->inode: name %s snap_id %x oid %x\n",
+		s->inode.name, s->inode.snap_id, s->inode.vdi_id);
 
 cleanup:
 	close(fd);
@@ -1506,7 +1503,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
 	struct bdrv_sd_state *old_s;
 	char vdi[256];
 	char *buf = NULL;
-	uint64_t oid;
+	uint32_t vid;
 	uint32_t snapid = 0;
 	int ret = -ENOENT, dummy;
 
@@ -1532,14 +1529,14 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
 		goto out;
 	}
 	strncpy(vdi, s->name, strlen(s->name)+1);
-	ret = find_vdi_name(s, vdi, snapid, &oid);
+	ret = find_vdi_name(s, vdi, snapid, &vid);
 	if (ret) {
 		eprintf("Failed to find_vdi_name\n");
 		ret = -ENOENT;
 		goto out;
 	}
 
-	ret = read_vdi_obj(buf, oid, &dummy);
+	ret = read_vdi_obj(buf, vid, &dummy);
 	if (ret) {
 		ret = -ENOENT;
 		goto out;
@@ -1590,11 +1587,6 @@ static inline int test_bit(unsigned int nr, const unsigned long *addr)
 		(((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
 }
 
-static inline uint64_t bit_to_oid(unsigned long nr)
-{
-	return ((unsigned long long)nr << VDI_SPACE_SHIFT) | VDI_BIT;
-}
-
 static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 {
 	struct bdrv_sd_state *s = bs->opaque;
@@ -1644,7 +1636,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 		if (!test_bit(i, vdi_inuse))
 			break;
 
-		ret = read_vdi_obj((char *)&inode, bit_to_oid(i), &copies);
+		ret = read_vdi_obj((char *)&inode, i, &copies);
 		if (ret)
 			continue;
 
@@ -1690,7 +1682,7 @@ static int do_load_save_vmstate(struct bdrv_sd_state *s, uint8_t *data,
 
 		data_len = min_t(unsigned int, size, SD_DATA_OBJ_SIZE);
 
-		vmstate_oid = to_vmstate_oid(s->inode.oid, vdi_index);
+		vmstate_oid = vid_to_vmstate_oid(s->inode.vdi_id, vdi_index);
 
 		memset(&hdr, 0, sizeof(hdr));
 		if (load) {
-- 
1.5.6.5




More information about the sheepdog mailing list