[Sheepdog] [PATCH] sheepdog: shrink vdi object size
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Fri Apr 30 02:01:49 CEST 2010
64 bit oid field in the struct sd_inode is redundant,
because we can derive its lower 32 bit. This patch reduces
the vdi object size and increases metadata operation performance.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
block/sheepdog.c | 126 +++++++++++++++++++++++++----------------------------
1 files changed, 59 insertions(+), 67 deletions(-)
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 780dc5e..f8863f8 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -76,11 +76,10 @@
#define SD_RES_VDI_NOT_LOCKED 0x17 /* Vdi is not locked */
#define SD_RES_SHUTDOWN 0x18 /* Sheepdog is shutting down */
-#define VDI_VMSTATE_BIT 0x4000000000000000
-
#define SD_NR_VDIS (1U << 24)
#define VDI_SPACE_SHIFT 32
#define VDI_BIT (UINT64_C(1) << 63)
+#define VMSTATE_BIT (UINT64_C(1) << 62)
#define DEAFAULT_NR_COPIES 1
#define SD_MAX_VDI_LEN 256
#define MAX_DATA_OBJS (1ULL << 20)
@@ -159,11 +158,11 @@ struct sd_vdi_req {
uint32_t epoch;
uint32_t id;
uint32_t data_length;
- uint64_t base_oid;
uint64_t vdi_size;
+ uint32_t base_vdi_id;
uint32_t copies;
uint32_t snapid;
- uint32_t pad[2];
+ uint32_t pad[3];
};
struct sd_vdi_rsp {
@@ -175,13 +174,12 @@ struct sd_vdi_rsp {
uint32_t data_length;
uint32_t result;
uint32_t rsvd;
- uint64_t oid;
- uint32_t pad[4];
+ uint32_t vdi_id;
+ uint32_t pad[5];
};
struct sd_inode {
char name[SD_MAX_VDI_LEN];
- uint64_t oid;
uint64_t ctime;
uint64_t snap_ctime;
uint64_t vm_clock_nsec;
@@ -191,9 +189,10 @@ struct sd_inode {
uint8_t nr_copies;
uint8_t block_size_shift;
uint32_t snap_id;
- uint64_t parent_oid;
- uint64_t child_oid[MAX_CHILDREN];
- uint64_t data_oid[MAX_DATA_OBJS];
+ uint32_t vdi_id;
+ uint32_t parent_vdi_id;
+ uint32_t child_vdi_id[MAX_CHILDREN];
+ uint32_t data_vdi_id[MAX_DATA_OBJS];
};
struct aio_req {
@@ -306,23 +305,22 @@ static inline int after(uint32_t seq1, uint32_t seq2)
static inline int is_data_obj_writeable(struct sd_inode *inode, unsigned int idx)
{
- return ((inode->oid & ~VDI_BIT) >> VDI_SPACE_SHIFT) ==
- (inode->data_oid[idx] >> VDI_SPACE_SHIFT);
+ return inode->vdi_id == inode->data_vdi_id[idx];
}
-static inline int is_data_obj(uint64_t oid)
+static inline uint64_t vid_to_vdi_oid(uint32_t vid)
{
- return !(VDI_BIT & oid);
+ return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT);
}
-static inline uint64_t to_data_oid(uint64_t vdi_oid, unsigned int idx)
+static inline uint64_t vid_to_vmstate_oid(uint32_t vid, uint32_t idx)
{
- return (vdi_oid & ~VDI_BIT) | idx;
+ return VMSTATE_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
}
-static inline uint64_t to_vmstate_oid(uint64_t vdi_oid, unsigned int idx)
+static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx)
{
- return (vdi_oid & ~VDI_BIT) | VDI_VMSTATE_BIT | idx;
+ return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
}
/*
@@ -694,7 +692,6 @@ static void aio_read_response(void *opaque)
struct sd_aiocb *acb;
int rest;
unsigned long idx;
- uint64_t oid;
if (!nr_outstanding_aio_req(s))
return;
@@ -711,13 +708,13 @@ static void aio_read_response(void *opaque)
switch (acb->aiocb_type) {
case AIOCB_WRITE_UDATA:
idx = acb->sector_num * 512 / SD_DATA_OBJ_SIZE;
- oid = to_data_oid(s->inode.oid, idx);
- if (s->inode.data_oid[idx] != oid) {
- s->inode.data_oid[idx] = oid;
+ if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) {
+ s->inode.data_vdi_id[idx] = s->inode.vdi_id;
s->inode_dirty = 1;
- send_pending_req(s, oid, rsp->id);
+ send_pending_req(s, vid_to_data_oid(s->inode.vdi_id, idx),
+ rsp->id);
}
break;
case AIOCB_READ_UDATA:
@@ -829,7 +826,7 @@ static int parse_vdiname(const char *filename, char *vdi, int vdi_len,
}
static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint32_t snapid,
- uint64_t *oid)
+ uint32_t *vid)
{
int ret, fd;
struct sd_vdi_req hdr;
@@ -860,7 +857,7 @@ static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint32_t snapi
ret = -1;
goto out;
}
- *oid = rsp->oid;
+ *vid = rsp->vdi_id;
ret = 0;
out:
@@ -932,7 +929,7 @@ static int add_aio_request(struct bdrv_sd_state *s, struct aio_req *aio_req,
return 0;
}
-static int read_vdi_obj(char *buf, uint64_t oid, int *copies)
+static int read_vdi_obj(char *buf, uint32_t vid, int *copies)
{
struct sd_obj_req hdr;
struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
@@ -945,7 +942,7 @@ static int read_vdi_obj(char *buf, uint64_t oid, int *copies)
memset(&hdr, 0, sizeof(hdr));
hdr.opcode = SD_OP_READ_OBJ;
- hdr.oid = oid;
+ hdr.oid = vid_to_vdi_oid(vid);
hdr.data_length = rlen;
fd = connect_to_vost();
@@ -977,7 +974,7 @@ static int read_vdi_obj(char *buf, uint64_t oid, int *copies)
static int sd_open(BlockDriverState *bs, const char *filename, int flags)
{
int ret, i;
- uint64_t oid = 0;
+ uint32_t vid = 0;
struct bdrv_sd_state *s = bs->opaque;
char vdi[256];
uint32_t snapid;
@@ -1006,16 +1003,16 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
if (parse_vdiname(filename, vdi, sizeof(vdi), &snapid) < 0)
goto out;
- ret = find_vdi_name(s, vdi, snapid, &oid);
+ ret = find_vdi_name(s, vdi, snapid, &vid);
if (ret)
goto out;
if (snapid)
- eprintf("%" PRIx64 " non current inode was open.\n", oid);
+ eprintf("%" PRIx32 " non current inode was open.\n", vid);
else
s->is_current = 1;
- ret = read_vdi_obj(buf, oid, &dummy);
+ ret = read_vdi_obj(buf, vid, &dummy);
if (ret)
goto out;
@@ -1034,7 +1031,7 @@ out:
}
static int do_sd_create(char *filename, char *tag, int64_t total_sectors,
- uint64_t base_oid, uint64_t *oid, int snapshot)
+ uint32_t base_vid, uint32_t *vdi_id, int snapshot)
{
struct sd_vdi_req hdr;
struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
@@ -1050,7 +1047,7 @@ static int do_sd_create(char *filename, char *tag, int64_t total_sectors,
memset(&hdr, 0, sizeof(hdr));
hdr.opcode = SD_OP_NEW_VDI;
- hdr.base_oid = base_oid;
+ hdr.base_vdi_id = base_vid;
wlen = SD_MAX_VDI_LEN;
@@ -1072,8 +1069,8 @@ static int do_sd_create(char *filename, char *tag, int64_t total_sectors,
return -1;
}
- if (oid)
- *oid = rsp->oid;
+ if (vdi_id)
+ *vdi_id = rsp->vdi_id;
return 0;
}
@@ -1081,7 +1078,7 @@ static int do_sd_create(char *filename, char *tag, int64_t total_sectors,
static int sd_create(const char *filename, QEMUOptionParameter *options)
{
int ret;
- uint64_t oid = 0;
+ uint32_t vid = 0;
int64_t total_sectors = 0;
char *backing_file = NULL;
@@ -1116,12 +1113,12 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
if (snapid == CURRENT_VDI_ID)
return -1;
- ret = find_vdi_name(bs.opaque, vdi, snapid, &oid);
+ ret = find_vdi_name(bs.opaque, vdi, snapid, &vid);
if (ret)
return -1;
}
- return do_sd_create((char *)filename, NULL, total_sectors, oid, NULL, 0);
+ return do_sd_create((char *)filename, NULL, total_sectors, vid, NULL, 0);
}
static void sd_close(BlockDriverState *bs)
@@ -1220,8 +1217,8 @@ static void sd_write_done(struct sd_aiocb *acb)
s->inode_dirty = 0;
iov.iov_base = &s->inode;
iov.iov_len = sizeof(s->inode);
- aio_req = alloc_aio_req(s, acb, s->inode.oid, sizeof(s->inode),
- 0, 0, 0, 0);
+ aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
+ sizeof(s->inode), 0, 0, 0, 0);
if (!aio_req) {
eprintf("too many requests\n");
acb->ret = -EIO;
@@ -1245,25 +1242,25 @@ out:
static int sd_create_branch(struct bdrv_sd_state *s)
{
int ret, copies;
- uint64_t oid;
+ uint32_t vid;
char *buf;
- eprintf("%" PRIx64 " is not current.\n", s->inode.oid);
+ eprintf("%" PRIx32 " is not current.\n", s->inode.vdi_id);
buf = malloc(SD_INODE_SIZE);
if (!buf)
return -1;
ret = do_sd_create(s->name, NULL, s->inode.vdi_size >> 9,
- s->inode.oid, &oid, 1);
+ s->inode.vdi_id, &vid, 1);
if (ret)
goto out;
- eprintf("%" PRIx64 " is created.\n", oid);
+ eprintf("%" PRIx32 " is created.\n", vid);
copies = s->inode.nr_copies;
- ret = read_vdi_obj(buf, oid, &copies);
+ ret = read_vdi_obj(buf, vid, &copies);
if (ret < 0)
goto out;
@@ -1271,7 +1268,7 @@ static int sd_create_branch(struct bdrv_sd_state *s)
s->is_current = 1;
ret = 0;
- eprintf("%" PRIx64 " was newly created.\n", s->inode.oid);
+ eprintf("%" PRIx32 " was newly created.\n", s->inode.vdi_id);
out:
free(buf);
@@ -1307,11 +1304,11 @@ static void sd_readv_writev_bh_cb(void *p)
uint64_t old_oid = 0;
int create = 0;
- oid = inode->data_oid[idx];
+ oid = vid_to_data_oid(inode->data_vdi_id[idx], idx);
len = min_t(unsigned long, total - done, SD_DATA_OBJ_SIZE - offset);
- if (!oid) {
+ if (!inode->data_vdi_id[idx]) {
if (acb->aiocb_type == AIOCB_READ_UDATA)
goto done;
@@ -1327,7 +1324,7 @@ static void sd_readv_writev_bh_cb(void *p)
dprintf("update ino (%" PRIu64") %"
PRIu64 " %" PRIu64 " %" PRIu64 "\n",
inode->oid, oid, to_data_oid(inode->oid, idx), idx);
- oid = to_data_oid(inode->oid, idx);
+ oid = vid_to_data_oid(inode->vdi_id, idx);
dprintf("new oid %lx\n", oid);
}
@@ -1418,7 +1415,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
int ret, fd, dummy;
struct sd_obj_req hdr;
unsigned int rlen, wlen;
- uint64_t new_oid;
+ uint32_t new_vid;
struct sd_inode *inode;
sd_release(bs);
@@ -1429,8 +1426,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
if (!s->is_current) {
eprintf("You can't create a snapshot of "
- "a non current VDI, %s (%" PRIu64 ").\n",
- s->name, s->inode.oid);
+ "a non current VDI, %s (%" PRIu32 ").\n",
+ s->name, s->inode.vdi_id);
return -1;
}
@@ -1450,7 +1447,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
memset(&hdr, 0, sizeof(hdr));
hdr.opcode = SD_OP_WRITE_OBJ;
- hdr.oid = s->inode.oid;
+ hdr.oid = vid_to_vdi_oid(s->inode.vdi_id);
hdr.copies = s->inode.nr_copies;
hdr.flags |= SD_FLAG_CMD_WRITE;
@@ -1472,7 +1469,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
}
ret = do_sd_create(s->name, NULL, s->inode.vdi_size >> 9,
- s->inode.oid, &new_oid, 1);
+ s->inode.vdi_id, &new_vid, 1);
if (ret < 0) {
eprintf("failed to create inode for snapshot. %m\n");
ret = -EIO;
@@ -1485,15 +1482,15 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
goto cleanup;
}
- if (read_vdi_obj((char *)inode, new_oid, &dummy) < 0) {
+ if (read_vdi_obj((char *)inode, new_vid, &dummy) < 0) {
eprintf("failed to read new inode info. %m\n");
ret = -EIO;
goto cleanup;
}
memcpy(&s->inode, inode, sizeof(struct sd_inode));
- eprintf("s->inode: name %s snap_id %x oid %lx\n",
- s->inode.name, s->inode.snap_id, s->inode.oid);
+ eprintf("s->inode: name %s snap_id %x oid %x\n",
+ s->inode.name, s->inode.snap_id, s->inode.vdi_id);
cleanup:
close(fd);
@@ -1506,7 +1503,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
struct bdrv_sd_state *old_s;
char vdi[256];
char *buf = NULL;
- uint64_t oid;
+ uint32_t vid;
uint32_t snapid = 0;
int ret = -ENOENT, dummy;
@@ -1532,14 +1529,14 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto out;
}
strncpy(vdi, s->name, strlen(s->name)+1);
- ret = find_vdi_name(s, vdi, snapid, &oid);
+ ret = find_vdi_name(s, vdi, snapid, &vid);
if (ret) {
eprintf("Failed to find_vdi_name\n");
ret = -ENOENT;
goto out;
}
- ret = read_vdi_obj(buf, oid, &dummy);
+ ret = read_vdi_obj(buf, vid, &dummy);
if (ret) {
ret = -ENOENT;
goto out;
@@ -1590,11 +1587,6 @@ static inline int test_bit(unsigned int nr, const unsigned long *addr)
(((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
}
-static inline uint64_t bit_to_oid(unsigned long nr)
-{
- return ((unsigned long long)nr << VDI_SPACE_SHIFT) | VDI_BIT;
-}
-
static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
{
struct bdrv_sd_state *s = bs->opaque;
@@ -1644,7 +1636,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
if (!test_bit(i, vdi_inuse))
break;
- ret = read_vdi_obj((char *)&inode, bit_to_oid(i), &copies);
+ ret = read_vdi_obj((char *)&inode, i, &copies);
if (ret)
continue;
@@ -1690,7 +1682,7 @@ static int do_load_save_vmstate(struct bdrv_sd_state *s, uint8_t *data,
data_len = min_t(unsigned int, size, SD_DATA_OBJ_SIZE);
- vmstate_oid = to_vmstate_oid(s->inode.oid, vdi_index);
+ vmstate_oid = vid_to_vmstate_oid(s->inode.vdi_id, vdi_index);
memset(&hdr, 0, sizeof(hdr));
if (load) {
--
1.5.6.5
More information about the sheepdog
mailing list