[Sheepdog] [PATCH] shrink vdi object size
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Fri Apr 30 02:03:15 CEST 2010
64 bit oid field in the struct sheepdog_inode is redundant,
because we can derive its lower 32 bit. This patch reduces
the vdi object size and increases metadata operation performance.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
collie/collie.h | 8 +-
collie/group.c | 24 ++++----
collie/store.c | 9 +--
collie/vdi.c | 157 ++++++++++++++++++++++++----------------------
include/meta.h | 31 +++++----
include/sheepdog_proto.h | 8 +-
shepherd/shepherd.c | 99 +++++++++++++++--------------
shepherd/treeview.c | 26 ++++----
shepherd/treeview.h | 4 +-
9 files changed, 188 insertions(+), 178 deletions(-)
diff --git a/collie/collie.h b/collie/collie.h
index d827eb4..db4fd5c 100644
--- a/collie/collie.h
+++ b/collie/collie.h
@@ -92,12 +92,12 @@ int create_listen_port(int port, void *data);
int init_store(char *dir);
int add_vdi(char *data, int data_len, uint64_t size,
- uint64_t *new_oid, uint64_t base_oid, uint32_t copies,
+ uint32_t *new_vid, uint32_t base_vid, uint32_t copies,
int is_snapshot);
int del_vdi(char *data, int data_len, uint32_t snapid);
-int lookup_vdi(char *data, int data_len, uint64_t *oid, uint32_t snapid);
+int lookup_vdi(char *data, int data_len, uint32_t *vid, uint32_t snapid);
int read_vdis(char *data, int len, unsigned int *rsp_len);
@@ -129,8 +129,8 @@ int remove_epoch(int epoch);
int set_cluster_ctime(uint64_t ctime);
uint64_t get_cluster_ctime(void);
-int start_recovery(uint32_t epoch, unsigned long *failed_vdis, int nr_failed_vdis);
-int start_deletion(uint64_t oid);
+int start_recovery(uint32_t epoch, uint32_t *failed_vdis, int nr_failed_vdis);
+int start_deletion(uint32_t vid);
static inline int is_myself(struct sheepdog_node_list_entry *e)
{
diff --git a/collie/group.c b/collie/group.c
index 9247438..b9f87b1 100644
--- a/collie/group.c
+++ b/collie/group.c
@@ -85,7 +85,7 @@ struct work_confchg {
struct cpg_address *joined_list;
size_t joined_list_entries;
- unsigned long *failed_vdis;
+ uint32_t *failed_vdis;
int nr_failed_vdis;
int first_cpg_node;
int sd_node_left;
@@ -634,12 +634,12 @@ static void vdi_op(struct vdi_op_message *msg)
struct sd_vdi_rsp *rsp = &msg->rsp;
void *data = msg->data;
int ret = SD_RES_SUCCESS;
- uint64_t oid = 0;
+ uint32_t vid = 0;
switch (hdr->opcode) {
case SD_OP_NEW_VDI:
- ret = add_vdi(data, hdr->data_length, hdr->vdi_size, &oid,
- hdr->base_oid, hdr->copies,
+ ret = add_vdi(data, hdr->data_length, hdr->vdi_size, &vid,
+ hdr->base_vdi_id, hdr->copies,
hdr->snapid);
break;
case SD_OP_DEL_VDI:
@@ -651,7 +651,7 @@ static void vdi_op(struct vdi_op_message *msg)
break;
case SD_OP_LOCK_VDI:
case SD_OP_GET_VDI_INFO:
- ret = lookup_vdi(data, hdr->data_length, &oid, hdr->snapid);
+ ret = lookup_vdi(data, hdr->data_length, &vid, hdr->snapid);
if (ret != SD_RES_SUCCESS)
break;
break;
@@ -668,7 +668,7 @@ static void vdi_op(struct vdi_op_message *msg)
break;
}
- rsp->oid = oid;
+ rsp->vdi_id = vid;
rsp->result = ret;
}
@@ -690,8 +690,8 @@ static void vdi_op_done(struct vdi_op_message *msg)
switch (hdr->opcode) {
case SD_OP_NEW_VDI:
{
- unsigned long nr = (rsp->oid & ~VDI_BIT) >> VDI_SPACE_SHIFT;
- vprintf(SDOG_INFO "done %d %ld %" PRIx64 "\n", ret, nr, rsp->oid);
+ unsigned long nr = rsp->vdi_id;
+ vprintf(SDOG_INFO "done %d %ld\n", ret, nr);
set_bit(nr, sys->vdi_inuse);
break;
}
@@ -728,7 +728,7 @@ static void vdi_op_done(struct vdi_op_message *msg)
case SD_OP_GET_VDI_INFO:
break;
case SD_OP_MAKE_FS:
- sys->nr_sobjs = ((struct sd_vdi_req *)hdr)->copies;
+ sys->nr_sobjs = ((struct sd_so_req *)hdr)->copies;
ctime = ((struct sd_so_req *)hdr)->ctime;
set_cluster_ctime(ctime);
@@ -1028,7 +1028,7 @@ static void del_node(struct cpg_address *addr, struct work_confchg *w)
struct sheepdog_node_list_entry e[SD_MAX_NODES];
struct vm *vm, *n;
int ret, size;
- uint64_t oid;
+ uint32_t vid;
void *buf;
w->sd_node_left++;
@@ -1053,9 +1053,9 @@ static void del_node(struct cpg_address *addr, struct work_confchg *w)
}
ret = lookup_vdi((char *)vm->ent.name,
- sizeof(vm->ent.name), &oid, 0);
+ sizeof(vm->ent.name), &vid, 0);
if (ret == SD_RES_SUCCESS)
- w->failed_vdis[w->nr_failed_vdis++] = oid_to_bit(oid);
+ w->failed_vdis[w->nr_failed_vdis++] = vid;
else
eprintf("cannot find vdi %s\n", vm->ent.name);
diff --git a/collie/store.c b/collie/store.c
index 3495674..bf5824f 100644
--- a/collie/store.c
+++ b/collie/store.c
@@ -1254,7 +1254,7 @@ static void recover_one(struct work *work, int idx)
cur_idx = obj_to_sheep(cur_entry, cur_nr, oid, 0);
for (i = 0; i < rw->nr_failed_vdis; i++) {
- if (rw->failed_vdis[i] == oid_to_bit(oid))
+ if (rw->failed_vdis[i] == oid_to_vid(oid))
is_failed_oid = 1;
}
@@ -1506,7 +1506,7 @@ fail:
return;
}
-int start_recovery(uint32_t epoch, unsigned long *failed_vdis, int nr_failed_vdis)
+int start_recovery(uint32_t epoch, uint32_t *failed_vdis, int nr_failed_vdis)
{
struct recovery_work *rw;
@@ -1677,10 +1677,9 @@ static int init_epoch_path(char *base_path)
if (is_data_obj(oid))
continue;
- vprintf(SDOG_DEBUG "found the vdi obj, %" PRIx64 " %lu\n",
- oid, oid_to_bit(oid));
+ vprintf(SDOG_DEBUG "found the vdi obj, %" PRIx64 "\n", oid);
- set_bit(oid_to_bit(oid), sys->vdi_inuse);
+ set_bit(oid_to_vid(oid), sys->vdi_inuse);
}
closedir(dir);
diff --git a/collie/vdi.c b/collie/vdi.c
index baf978f..dc2c082 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -18,8 +18,8 @@
/* TODO: should be performed atomically */
-static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size,
- uint64_t base_oid, uint64_t cur_oid, uint32_t copies,
+static int create_vdi_obj(char *name, uint32_t new_vid, uint64_t size,
+ uint32_t base_vid, uint32_t cur_vid, uint32_t copies,
uint32_t snapid, int is_snapshot)
{
struct sheepdog_node_list_entry entries[SD_MAX_NODES];
@@ -31,10 +31,10 @@ static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size,
nr_nodes = get_ordered_sd_node_list(entries);
- if (base_oid) {
+ if (base_vid) {
ret = read_object(entries, nr_nodes, sys->epoch,
- base_oid, (char *)&base, sizeof(base), 0,
- copies);
+ vid_to_vdi_oid(base_vid), (char *)&base,
+ sizeof(base), 0, copies);
if (ret < 0)
return SD_RES_BASE_VDI_READ;
}
@@ -42,13 +42,13 @@ static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size,
gettimeofday(&tv, NULL);
if (is_snapshot) {
- if (cur_oid != base_oid) {
- vprintf(SDOG_INFO "tree snapshot %s %" PRIx64 " %" PRIx64 "\n",
- name, cur_oid, base_oid);
+ if (cur_vid != base_vid) {
+ vprintf(SDOG_INFO "tree snapshot %s %" PRIx32 " %" PRIx32 "\n",
+ name, cur_vid, base_vid);
ret = read_object(entries, nr_nodes, sys->epoch,
- cur_oid, (char *)&cur, sizeof(cur), 0,
- copies);
+ vid_to_vdi_oid(cur_vid), (char *)&cur,
+ sizeof(cur), 0, copies);
if (ret < 0) {
vprintf(SDOG_ERR "failed\n");
return SD_RES_BASE_VDI_READ;
@@ -62,7 +62,7 @@ static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size,
memset(&new, 0, sizeof(new));
strncpy(new.name, name, sizeof(new.name));
- new.oid = new_oid;
+ new.vdi_id = new_vid;
new.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000;
new.vdi_size = size;
new.copy_policy = 0;
@@ -70,38 +70,37 @@ static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size,
new.block_size_shift = find_next_bit(&block_size, BITS_PER_LONG, 0);
new.snap_id = snapid;
- if (base_oid) {
+ if (base_vid) {
int i;
- new.parent_oid = base_oid;
- memcpy(new.data_oid, base.data_oid,
- MAX_DATA_OBJS * sizeof(uint64_t));
+ new.parent_vdi_id = base_vid;
+ memcpy(new.data_vdi_id, base.data_vdi_id, sizeof(new.data_vdi_id));
- for (i = 0; i < ARRAY_SIZE(base.child_oid); i++) {
- if (!base.child_oid[i]) {
- base.child_oid[i] = new_oid;
+ for (i = 0; i < ARRAY_SIZE(base.child_vdi_id); i++) {
+ if (!base.child_vdi_id[i]) {
+ base.child_vdi_id[i] = new_vid;
break;
}
}
- if (i == ARRAY_SIZE(base.child_oid))
+ if (i == ARRAY_SIZE(base.child_vdi_id))
return SD_RES_NO_BASE_VDI;
}
- if (is_snapshot && cur_oid != base_oid) {
+ if (is_snapshot && cur_vid != base_vid) {
ret = write_object(entries, nr_nodes, sys->epoch,
- cur_oid, (char *)&cur, sizeof(cur), 0,
- copies, 0);
+ vid_to_vdi_oid(cur_vid), (char *)&cur,
+ sizeof(cur), 0, copies, 0);
if (ret < 0) {
vprintf(SDOG_ERR "failed\n");
return SD_RES_BASE_VDI_READ;
}
}
- if (base_oid) {
- ret = write_object(entries, nr_nodes,
- sys->epoch, base_oid, (char *)&base,
+ if (base_vid) {
+ ret = write_object(entries, nr_nodes, sys->epoch,
+ vid_to_vdi_oid(base_vid), (char *)&base,
sizeof(base), 0, copies, 0);
if (ret < 0) {
vprintf(SDOG_ERR "failed\n");
@@ -110,7 +109,8 @@ static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size,
}
ret = write_object(entries, nr_nodes, sys->epoch,
- new_oid, (char *)&new, sizeof(new), 0, copies, 1);
+ vid_to_vdi_oid(new_vid), (char *)&new, sizeof(new),
+ 0, copies, 1);
if (ret < 0)
return SD_RES_VDI_WRITE;
@@ -118,7 +118,7 @@ static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size,
}
static int find_first_vdi(unsigned long start, unsigned long end,
- char *name, int namelen, uint32_t snapid, uint64_t *oid,
+ char *name, int namelen, uint32_t snapid, uint32_t *vid,
unsigned long *deleted_nr, uint32_t *next_snap)
{
struct sheepdog_node_list_entry entries[SD_MAX_NODES];
@@ -135,8 +135,8 @@ static int find_first_vdi(unsigned long start, unsigned long end,
for (i = start; i >= end; i--) {
ret = read_object(entries, nr_nodes, sys->epoch,
- bit_to_oid(i), (char *)&inode, sizeof(inode), 0,
- nr_reqs);
+ vid_to_vdi_oid(i), (char *)&inode,
+ sizeof(inode), 0, nr_reqs);
if (ret < 0)
return SD_RES_EIO;
@@ -150,7 +150,7 @@ static int find_first_vdi(unsigned long start, unsigned long end,
continue;
*next_snap = inode.snap_id + 1;
- *oid = inode.oid;
+ *vid = inode.vdi_id;
return SD_RES_SUCCESS;
}
}
@@ -158,7 +158,7 @@ static int find_first_vdi(unsigned long start, unsigned long end,
}
-static int do_lookup_vdi(char *name, int namelen, uint64_t *oid, uint32_t snapid,
+static int do_lookup_vdi(char *name, int namelen, uint32_t *vid, uint32_t snapid,
uint32_t *next_snapid,
unsigned long *right_nr, unsigned long *deleted_nr)
{
@@ -177,7 +177,7 @@ static int do_lookup_vdi(char *name, int namelen, uint64_t *oid, uint32_t snapid
} else if (nr < SD_NR_VDIS) {
right_side:
/* look up on the right side of the hash point */
- ret = find_first_vdi(nr - 1, start_nr, name, namelen, snapid, oid,
+ ret = find_first_vdi(nr - 1, start_nr, name, namelen, snapid, vid,
deleted_nr, next_snapid);
return ret;
} else {
@@ -188,7 +188,7 @@ static int do_lookup_vdi(char *name, int namelen, uint64_t *oid, uint32_t snapid
return SD_RES_FULL_VDI;
else if (nr) {
/* look up on the left side of the hash point */
- ret = find_first_vdi(nr - 1, 0, name, namelen, snapid, oid,
+ ret = find_first_vdi(nr - 1, 0, name, namelen, snapid, vid,
deleted_nr, next_snapid);
if (ret == SD_RES_NO_VDI)
; /* we need to go to the right side */
@@ -201,7 +201,7 @@ static int do_lookup_vdi(char *name, int namelen, uint64_t *oid, uint32_t snapid
}
}
-int lookup_vdi(char *data, int data_len, uint64_t *oid, uint32_t snapid)
+int lookup_vdi(char *data, int data_len, uint32_t *vid, uint32_t snapid)
{
char *name = data;
uint32_t dummy0;
@@ -210,14 +210,14 @@ int lookup_vdi(char *data, int data_len, uint64_t *oid, uint32_t snapid)
if (data_len != SD_MAX_VDI_LEN)
return SD_RES_INVALID_PARMS;
- return do_lookup_vdi(name, strlen(name), oid, snapid,
+ return do_lookup_vdi(name, strlen(name), vid, snapid,
&dummy0, &dummy1, &dummy2);
}
int add_vdi(char *data, int data_len, uint64_t size,
- uint64_t *new_oid, uint64_t base_oid, uint32_t copies, int is_snapshot)
+ uint32_t *new_vid, uint32_t base_vid, uint32_t copies, int is_snapshot)
{
- uint64_t cur_oid;
+ uint32_t cur_vid;
uint32_t next_snapid;
unsigned long nr, deleted_nr = SD_NR_VDIS, right_nr = SD_NR_VDIS;
int ret;
@@ -228,7 +228,7 @@ int add_vdi(char *data, int data_len, uint64_t size,
name = data;
- ret = do_lookup_vdi(name, strlen(name), &cur_oid, 0, &next_snapid,
+ ret = do_lookup_vdi(name, strlen(name), &cur_vid, 0, &next_snapid,
&right_nr, &deleted_nr);
if (is_snapshot) {
@@ -254,11 +254,11 @@ int add_vdi(char *data, int data_len, uint64_t size,
next_snapid = 1;
}
- *new_oid = bit_to_oid(nr);
+ *new_vid = nr;
- vprintf(SDOG_INFO "we create a new vdi, %d %s (%zd) %" PRIu64 ", oid: %"
- PRIx64 ", base %" PRIx64 ", cur %" PRIx64 " \n",
- is_snapshot, name, strlen(name), size, *new_oid, base_oid, cur_oid);
+ vprintf(SDOG_INFO "we create a new vdi, %d %s (%zd) %" PRIu64 ", vid: %"
+ PRIx32 ", base %" PRIx32 ", cur %" PRIx32 " \n",
+ is_snapshot, name, strlen(name), size, *new_vid, base_vid, cur_vid);
if (!copies) {
vprintf(SDOG_WARNING "qemu doesn't specify the copies... %d\n",
@@ -266,7 +266,7 @@ int add_vdi(char *data, int data_len, uint64_t size,
copies = sys->nr_sobjs;
}
- ret = create_vdi_obj(name, *new_oid, size, base_oid, cur_oid, copies,
+ ret = create_vdi_obj(name, *new_vid, size, base_vid, cur_vid, copies,
next_snapid, is_snapshot);
return ret;
@@ -275,7 +275,7 @@ int add_vdi(char *data, int data_len, uint64_t size,
int del_vdi(char *data, int data_len, uint32_t snapid)
{
char *name = data;
- uint64_t oid;
+ uint32_t vid;
uint32_t dummy0;
unsigned long dummy1, dummy2;
int ret;
@@ -286,7 +286,7 @@ int del_vdi(char *data, int data_len, uint32_t snapid)
if (data_len != SD_MAX_VDI_LEN)
return SD_RES_INVALID_PARMS;
- ret = do_lookup_vdi(name, strlen(name), &oid, snapid,
+ ret = do_lookup_vdi(name, strlen(name), &vid, snapid,
&dummy0, &dummy1, &dummy2);
if (ret != SD_RES_SUCCESS)
return ret;
@@ -297,7 +297,7 @@ int del_vdi(char *data, int data_len, uint32_t snapid)
nr_reqs = nr_nodes;
ret = read_object(entries, nr_nodes, sys->epoch,
- oid, (char *)&inode, sizeof(inode), 0,
+ vid_to_vdi_oid(vid), (char *)&inode, sizeof(inode), 0,
nr_reqs);
if (ret < 0)
return SD_RES_EIO;
@@ -305,12 +305,12 @@ int del_vdi(char *data, int data_len, uint32_t snapid)
memset(inode.name, 0, sizeof(inode.name));
ret = write_object(entries, nr_nodes, sys->epoch,
- oid, (char *)&inode, sizeof(inode), 0,
+ vid_to_vdi_oid(vid), (char *)&inode, sizeof(inode), 0,
nr_reqs, 0);
if (ret < 0)
return SD_RES_EIO;
- ret = start_deletion(oid);
+ ret = start_deletion(vid);
if (ret < 0)
return SD_RES_NO_MEM;
@@ -334,7 +334,7 @@ struct deletion_work {
struct work work;
struct list_head dw_siblings;
- uint64_t oid;
+ uint32_t vid;
int count;
char *buf;
@@ -346,18 +346,19 @@ static int deleting;
static void delete_one(struct work *work, int idx)
{
struct deletion_work *dw = container_of(work, struct deletion_work, work);
- uint64_t vdi_oid = *(((uint64_t *)dw->buf) + dw->count - dw->done - 1);
+ uint32_t vdi_id = *(((uint32_t *)dw->buf) + dw->count - dw->done - 1);
struct sheepdog_node_list_entry entries[SD_MAX_NODES];
int nr_nodes;
int ret, i;
static struct sheepdog_inode inode;
- eprintf("%d %d, %16lx\n", dw->done, dw->count, vdi_oid);
+ eprintf("%d %d, %16x\n", dw->done, dw->count, vdi_id);
nr_nodes = get_ordered_sd_node_list(entries);
ret = read_object(entries, nr_nodes, sys->epoch,
- vdi_oid, (void *)&inode, sizeof(inode), 0, sys->nr_sobjs);
+ vid_to_vdi_oid(vdi_id), (void *)&inode, sizeof(inode),
+ 0, sys->nr_sobjs);
if (ret != sizeof(inode)) {
eprintf("cannot find vdi object\n");
@@ -365,15 +366,17 @@ static void delete_one(struct work *work, int idx)
}
for (i = 0; i < MAX_DATA_OBJS; i++) {
- if (!inode.data_oid[i])
+ if (!inode.data_vdi_id[i])
continue;
remove_object(entries, nr_nodes, sys->epoch,
- inode.data_oid[i], inode.nr_copies);
+ vid_to_data_oid(inode.data_vdi_id[i], i),
+ inode.nr_copies);
}
- if (remove_object(entries, nr_nodes, sys->epoch, vdi_oid, sys->nr_sobjs))
- eprintf("failed to remove vdi objects %lx\n", vdi_oid);
+ if (remove_object(entries, nr_nodes, sys->epoch, vid_to_vdi_oid(vdi_id),
+ sys->nr_sobjs))
+ eprintf("failed to remove vdi objects %x\n", vdi_id);
}
static void __start_deletion(struct work *work, int idx);
@@ -407,18 +410,19 @@ static void delete_one_done(struct work *work, int idx)
static int fill_vdi_list(struct deletion_work *dw,
struct sheepdog_node_list_entry *entries,
- int nr_entries, uint64_t root_oid)
+ int nr_entries, uint32_t root_vid)
{
int ret, i;
static struct sheepdog_inode inode;
int done = dw->count;
- uint64_t oid;
+ uint32_t vid;
- ((uint64_t *)dw->buf)[dw->count++] = root_oid;
+ ((uint32_t *)dw->buf)[dw->count++] = root_vid;
again:
- oid = ((uint64_t *)dw->buf)[done++];
+ vid = ((uint32_t *)dw->buf)[done++];
ret = read_object(entries, nr_entries, sys->epoch,
- oid, (void *)&inode, sizeof(inode), 0, nr_entries);
+ vid_to_vdi_oid(vid), (void *)&inode, sizeof(inode),
+ 0, nr_entries);
if (ret != sizeof(inode)) {
eprintf("cannot find vdi object\n");
@@ -428,27 +432,28 @@ again:
if (inode.name[0] != '\0')
return 1;
- for (i = 0; i < ARRAY_SIZE(inode.child_oid); i++) {
- if (!inode.child_oid[i])
+ for (i = 0; i < ARRAY_SIZE(inode.child_vdi_id); i++) {
+ if (!inode.child_vdi_id[i])
continue;
- ((uint64_t *)dw->buf)[dw->count++] = inode.child_oid[i];
+ ((uint32_t *)dw->buf)[dw->count++] = inode.child_vdi_id[i];
}
- if (((uint64_t *)dw->buf)[done])
+ if (((uint32_t *)dw->buf)[done])
goto again;
return 0;
}
static uint64_t get_vdi_root(struct sheepdog_node_list_entry *entries,
- int nr_entries, uint64_t oid)
+ int nr_entries, uint32_t vid)
{
int ret;
static struct sheepdog_inode inode;
next:
- ret = read_object(entries, nr_entries, sys->epoch, oid,
+ ret = read_object(entries, nr_entries, sys->epoch,
+ vid_to_vdi_oid(vid),
(void *)&inode, sizeof(inode), 0, nr_entries);
if (ret != sizeof(inode)) {
@@ -456,10 +461,10 @@ next:
return 0;
}
- if (!inode.parent_oid)
- return oid;
+ if (!inode.parent_vdi_id)
+ return vid;
- oid = inode.parent_oid;
+ vid = inode.parent_vdi_id;
goto next;
}
@@ -469,15 +474,15 @@ static void __start_deletion(struct work *work, int idx)
struct deletion_work *dw = container_of(work, struct deletion_work, work);
struct sheepdog_node_list_entry entries[SD_MAX_NODES];
int nr_nodes, ret;
- uint64_t root_oid;
+ uint32_t root_vid;
nr_nodes = get_ordered_sd_node_list(entries);
- root_oid = get_vdi_root(entries, nr_nodes, dw->oid);
- if (!root_oid)
+ root_vid = get_vdi_root(entries, nr_nodes, dw->vid);
+ if (!root_vid)
goto fail;
- ret = fill_vdi_list(dw, entries, nr_nodes, root_oid);
+ ret = fill_vdi_list(dw, entries, nr_nodes, root_vid);
if (ret)
goto fail;
@@ -518,7 +523,7 @@ static void __start_deletion_done(struct work *work, int idx)
}
}
-int start_deletion(uint64_t oid)
+int start_deletion(uint32_t vid)
{
struct deletion_work *dw;
@@ -533,7 +538,7 @@ int start_deletion(uint64_t oid)
}
dw->count = 0;
- dw->oid = oid;
+ dw->vid = vid;
dw->work.fn = __start_deletion;
dw->work.done = __start_deletion_done;
diff --git a/include/meta.h b/include/meta.h
index 74bdfcf..53fa085 100644
--- a/include/meta.h
+++ b/include/meta.h
@@ -23,13 +23,14 @@
* 0 - 19 (20 bits): data object space
* 20 - 31 (12 bits): reserved data object space
* 32 - 55 (24 bits): vdi object space
- * 56 - 62 (17 bits): reserved vdi object space
- * 63 - 63 ( 1 bit ): set if vdi
+ * 56 - 59 ( 4 bits): reserved vdi object space
+ * 60 - 63 ( 4 bit ): object type indentifier space
*/
#define VDI_SPACE 24
#define VDI_SPACE_SHIFT 32
#define VDI_BIT (UINT64_C(1) << 63)
+#define VMSTATE_BIT (UINT64_C(1) << 62)
#define DEAFAULT_NR_COPIES 1
#define MAX_DATA_OBJS (1ULL << 20)
#define MAX_CHILDREN 1024
@@ -38,7 +39,6 @@
struct sheepdog_inode {
char name[SD_MAX_VDI_LEN];
- uint64_t oid;
uint64_t ctime;
uint64_t snap_ctime;
uint64_t vm_clock_nsec;
@@ -48,15 +48,15 @@ struct sheepdog_inode {
uint8_t nr_copies;
uint8_t block_size_shift;
uint32_t snap_id;
- uint64_t parent_oid;
- uint64_t child_oid[MAX_CHILDREN];
- uint64_t data_oid[MAX_DATA_OBJS];
+ uint32_t vdi_id;
+ uint32_t parent_vdi_id;
+ uint32_t child_vdi_id[MAX_CHILDREN];
+ uint32_t data_vdi_id[MAX_DATA_OBJS];
};
static inline int is_data_obj_writeable(struct sheepdog_inode *inode, int idx)
{
- return ((inode->oid & ~VDI_BIT) >> VDI_SPACE_SHIFT) ==
- (inode->data_oid[idx] >> VDI_SPACE_SHIFT);
+ return inode->vdi_id == inode->data_vdi_id[idx];
}
static inline int is_data_obj(uint64_t oid)
@@ -64,16 +64,21 @@ static inline int is_data_obj(uint64_t oid)
return !(VDI_BIT & oid);
}
-#define NR_VDIS (1U << DATA_SPECE_SHIFT)
+static inline uint64_t vid_to_vdi_oid(uint32_t vid)
+{
+ return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT);
+}
-static inline uint64_t bit_to_oid(unsigned long nr)
+static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx)
{
- return ((unsigned long long)nr << VDI_SPACE_SHIFT) | VDI_BIT;
+ return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
}
-static inline unsigned long oid_to_bit(uint64_t oid)
+static inline uint32_t oid_to_vid(uint64_t oid)
{
- return (oid & ~VDI_BIT) >> VDI_SPACE_SHIFT;
+ return (~VDI_BIT & oid) >> VDI_SPACE_SHIFT;
}
+#define NR_VDIS (1U << DATA_SPECE_SHIFT)
+
#endif
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index c4f6a13..c59aa74 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -222,11 +222,11 @@ struct sd_vdi_req {
uint32_t epoch;
uint32_t id;
uint32_t data_length;
- uint64_t base_oid;
uint64_t vdi_size;
+ uint32_t base_vdi_id;
uint32_t copies;
uint32_t snapid;
- uint32_t pad[2];
+ uint32_t pad[3];
};
struct sd_vdi_rsp {
@@ -238,9 +238,9 @@ struct sd_vdi_rsp {
uint32_t data_length;
uint32_t result;
uint32_t rsvd;
- uint64_t oid;
+ uint32_t vdi_id;
uint32_t vdi_epoch;
- uint32_t pad[3];
+ uint32_t pad[4];
};
struct sd_node_req {
diff --git a/shepherd/shepherd.c b/shepherd/shepherd.c
index da1263b..911aa00 100644
--- a/shepherd/shepherd.c
+++ b/shepherd/shepherd.c
@@ -316,7 +316,7 @@ static int debug(char *op, char *arg)
char name[128];
unsigned rlen, wlen;
unsigned opcode, flags;
- uint64_t oid = 0;
+ uint32_t vid = 0;
char vdiname[SD_MAX_VDI_LEN];
if (!op)
@@ -360,8 +360,8 @@ static int debug(char *op, char *arg)
if (!arg)
return 1;
rlen = 0;
- oid = strtoul(arg, NULL, 10);
- if (oid == 0) {
+ vid = strtoul(arg, NULL, 10);
+ if (vid == 0) {
wlen = strlen(arg) + 1;
opcode = SD_OP_GET_VDI_INFO;
flags = SD_FLAG_CMD_WRITE;
@@ -393,8 +393,8 @@ static int debug(char *op, char *arg)
hdr.data_length = wlen;
hdr.flags = flags;
hdr.epoch = node_list_version;
- if (oid > 0) {
- ((struct sd_vdi_req *)&hdr)->base_oid = oid;
+ if (vid > 0) {
+ ((struct sd_vdi_req *)&hdr)->base_vdi_id = vid;
}
ret = exec_req(fd, &hdr, arg, &wlen, &rlen);
@@ -425,8 +425,8 @@ static int debug(char *op, char *arg)
if (!strcasecmp(op, "vdi_info")) {
struct sd_vdi_rsp *vdi_rsp = (struct sd_vdi_rsp *)rsp;
- printf("name = %s, oid = %"PRIu64", epoch = %d\n",
- arg, vdi_rsp->oid, vdi_rsp->vdi_epoch);
+ printf("name = %s, vid = %"PRIu32", epoch = %d\n",
+ arg, vdi_rsp->vdi_id, vdi_rsp->vdi_epoch);
}
return ret;
}
@@ -473,7 +473,7 @@ static int shutdown_sheepdog(void)
#define DIR_BUF_LEN (UINT64_C(1) << 20)
-typedef void (*vdi_parser_func_t)(uint64_t oid, char *name, uint32_t tag, uint32_t flags,
+typedef void (*vdi_parser_func_t)(uint32_t vid, char *name, uint32_t tag, uint32_t flags,
struct sheepdog_inode *i, void *data);
@@ -509,14 +509,15 @@ static int parse_vdi(vdi_parser_func_t func, void *data)
continue;
ret = read_object(node_list_entries, nr_nodes, node_list_version,
- bit_to_oid(nr), (void *)&i, sizeof(i), 0, nr_nodes);
+ vid_to_vdi_oid(nr), (void *)&i,
+ sizeof(i), 0, nr_nodes);
if (ret == sizeof(i)) {
if (i.name[0] == '\0') /* deleted */
continue;
- func(i.oid, i.name, i.snap_id, 0, &i, data);
+ func(i.vdi_id, i.name, i.snap_id, 0, &i, data);
} else
- printf("error %lu %" PRIx64 ", %d\n", nr, bit_to_oid(nr), ret);
+ printf("error %lu, %d\n", nr, ret);
}
@@ -529,7 +530,7 @@ struct graph_info {
int highlight;
};
-static void print_graph_tree(uint64_t oid, char *name, uint32_t tag,
+static void print_graph_tree(uint32_t vid, char *name, uint32_t tag,
uint32_t flags, struct sheepdog_inode *i, void *data)
{
struct graph_info *info = (struct graph_info *)data;
@@ -543,7 +544,7 @@ static void print_graph_tree(uint64_t oid, char *name, uint32_t tag,
return;
if (info->root < 0)
- info->root = i->parent_oid;
+ info->root = vid_to_vdi_oid(i->parent_vdi_id);
ti = i->ctime >> 32;
localtime_r(&ti, &tm);
@@ -552,12 +553,12 @@ static void print_graph_tree(uint64_t oid, char *name, uint32_t tag,
strftime(time, sizeof(time), "%H:%M:%S", &tm);
size_to_str(i->vdi_size, size_str, sizeof(size_str));
- printf(" \"%" PRIu64 "\" [shape = \"box\","
+ printf(" \"%" PRIu32 "\" [shape = \"box\","
"fontname = \"Courier\","
"fontsize = \"12\","
"group = \"%s\","
"label = \"",
- oid, name);
+ vid, name);
printf("name: %8s\\n"
"tag : %8x\\n"
"size: %8s\\n"
@@ -570,7 +571,7 @@ static void print_graph_tree(uint64_t oid, char *name, uint32_t tag,
else
printf("\"];\n");
- printf(" \"%" PRIu64 "\" -> \"%" PRIu64 "\";\n", i->parent_oid, oid);
+ printf(" \"%" PRIu32 "\" -> \"%" PRIu32 "\";\n", i->parent_vdi_id, vid);
}
static int graphview_vdi(char *vdiname, int highlight)
@@ -603,7 +604,7 @@ struct tree_info {
char *name;
};
-static void print_vdi_tree(uint64_t oid, char *name, uint32_t tag,
+static void print_vdi_tree(uint32_t vid, char *name, uint32_t tag,
uint32_t flags, struct sheepdog_inode *i, void *data)
{
struct tree_info *info = (struct tree_info *)data;
@@ -624,7 +625,7 @@ static void print_vdi_tree(uint64_t oid, char *name, uint32_t tag,
"[%y-%m-%d %H:%M]", &tm);
}
- add_vdi_tree(name, buf, oid, i->parent_oid,
+ add_vdi_tree(name, buf, vid, i->parent_vdi_id,
info->highlight && is_current(i));
}
@@ -644,7 +645,7 @@ static int treeview_vdi(char *vdiname, int highlight)
return 0;
}
-static void print_vdi_list(uint64_t oid, char *name, uint32_t tag,
+static void print_vdi_list(uint32_t vid, char *name, uint32_t tag,
uint32_t flags, struct sheepdog_inode *i, void *data)
{
int idx;
@@ -663,7 +664,7 @@ static void print_vdi_list(uint64_t oid, char *name, uint32_t tag,
my_objs = 0;
cow_objs = 0;
for (idx = 0; idx < MAX_DATA_OBJS; idx++) {
- if (!i->data_oid[idx])
+ if (!i->data_vdi_id[idx])
continue;
if (is_data_obj_writeable(i, idx))
my_objs++;
@@ -676,9 +677,9 @@ static void print_vdi_list(uint64_t oid, char *name, uint32_t tag,
size_to_str(cow_objs * SD_DATA_OBJ_SIZE, cow_objs_str, sizeof(cow_objs_str));
if (!data || strcmp(name, data) == 0) {
- printf("%c %-8s %5d %7s %7s %7s %s %9" PRIx64 "\n",
+ printf("%c %-8s %5d %7s %7s %7s %s %7" PRIx32 "\n",
is_current(i) ? ' ' : 's', name, tag,
- vdi_size_str, my_objs_str, cow_objs_str, dbuf, oid);
+ vdi_size_str, my_objs_str, cow_objs_str, dbuf, vid);
}
}
@@ -688,7 +689,7 @@ struct vm_list_info {
int highlight;
};
-static void print_vm_list(uint64_t oid, char *name, uint32_t tag,
+static void print_vm_list(uint32_t vid, char *name, uint32_t tag,
uint32_t flags, struct sheepdog_inode *inode, void *data)
{
int i, j;
@@ -707,7 +708,7 @@ static void print_vm_list(uint64_t oid, char *name, uint32_t tag,
my_objs = 0;
cow_objs = 0;
for (j = 0; j < MAX_DATA_OBJS; j++) {
- if (!inode->data_oid[j])
+ if (!inode->data_vdi_id[j])
continue;
if (is_data_obj_writeable(inode, j))
my_objs++;
@@ -737,8 +738,8 @@ static void print_vm_list(uint64_t oid, char *name, uint32_t tag,
vdi_size_str, my_objs_str, cow_objs_str);
}
-static void cal_total_vdi_size(uint64_t oid, char *name, uint32_t tag,
- uint32_t flags, struct sheepdog_inode *i, void *data)
+static void cal_total_vdi_size(uint32_t vid, char *name, uint32_t tag,
+ uint32_t flags, struct sheepdog_inode *i, void *data)
{
uint64_t *size = data;
@@ -746,15 +747,15 @@ static void cal_total_vdi_size(uint64_t oid, char *name, uint32_t tag,
*size += i->vdi_size;
}
-struct get_oid_info {
+struct get_vid_info {
char *name;
- uint64_t oid;
+ uint32_t vid;
};
-static void get_oid(uint64_t oid, char *name, uint32_t tag,
+static void get_oid(uint32_t vid, char *name, uint32_t tag,
uint32_t flags, struct sheepdog_inode *i, void *data)
{
- struct get_oid_info *info = data;
+ struct get_vid_info *info = data;
char *p;
if (info->name) {
@@ -763,10 +764,10 @@ static void get_oid(uint64_t oid, char *name, uint32_t tag,
if (p) {
if (!strncmp(name, info->name, p - info->name) &&
tag == strtoul(p + 1, NULL, 16))
- info->oid = oid;
+ info->vid = vid;
} else {
if (!strcmp(name, info->name))
- info->oid = oid;
+ info->vid = vid;
}
}
}
@@ -813,7 +814,7 @@ static void get_data_oid(char *sheep, uint64_t oid, struct sd_obj_rsp *rsp,
if (info->success)
break;
info->success = 1;
- info->data_oid = inode->data_oid[info->idx];
+ info->data_oid = vid_to_data_oid(inode->data_vdi_id[info->idx], info->idx);
break;
case SD_RES_NO_OBJ:
break;
@@ -876,24 +877,24 @@ static void parse_objs(uint64_t oid, obj_parser_func_t func, void *data)
static void print_obj(char *vdiname, unsigned index)
{
int ret;
- struct get_oid_info info;
- uint64_t oid;
+ struct get_vid_info info;
+ uint32_t vid;
info.name = vdiname;
- info.oid = 0;
+ info.vid = 0;
ret = parse_vdi(get_oid, &info);
- oid = info.oid;
- if (oid == 0) {
+ vid = info.vid;
+ if (vid == 0) {
printf("No such vdi\n");
return;
}
if (index == ~0) {
- printf("Looking for the inode object 0x%" PRIx64 " with %d nodes\n\n",
- oid, nr_nodes);
- parse_objs(oid, do_print_obj, NULL);
+ printf("Looking for the inode object 0x%" PRIx32 " with %d nodes\n\n",
+ vid, nr_nodes);
+ parse_objs(vid_to_vdi_oid(vid), do_print_obj, NULL);
} else {
struct get_data_oid_info info;
@@ -905,20 +906,20 @@ static void print_obj(char *vdiname, unsigned index)
exit(1);
}
- parse_objs(oid, get_data_oid, &info);
+ parse_objs(vid_to_vdi_oid(vid), get_data_oid, &info);
if (info.success) {
if (info.data_oid) {
printf("Looking for the object 0x%" PRIx64
- " (the inode oid 0x%" PRIx64 " index %u) with %d nodes\n\n",
- info.data_oid, oid, index, nr_nodes);
+ " (the inode vid 0x%" PRIx32 " index %u) with %d nodes\n\n",
+ info.data_oid, vid, index, nr_nodes);
parse_objs(info.data_oid, do_print_obj, NULL);
} else
- printf("The inode object 0x%" PRIx64 " index %u is not allocated\n",
- oid, index);
+ printf("The inode object 0x%" PRIx32 " index %u is not allocated\n",
+ vid, index);
} else
- printf("failed to read the inode object 0x%" PRIx64 "\n", oid);
+ printf("failed to read the inode object 0x%" PRIx32 "\n", vid);
}
}
@@ -947,8 +948,8 @@ rerun:
case INFO_VDI:
switch (format) {
case FORMAT_LIST:
- printf(" name id size used shared creation time object id\n");
- printf("--------------------------------------------------------------------\n");
+ printf(" name id size used shared creation time vdi id\n");
+ printf("------------------------------------------------------------------\n");
ret = parse_vdi(print_vdi_list, name);
break;
case FORMAT_TREE:
diff --git a/shepherd/treeview.c b/shepherd/treeview.c
index 7bbdef6..c5adb96 100644
--- a/shepherd/treeview.c
+++ b/shepherd/treeview.c
@@ -24,8 +24,8 @@
struct vdi_tree {
char name[1024];
char label[256];
- uint64_t oid;
- uint64_t poid;
+ uint32_t vid;
+ uint32_t pvid;
int highlight;
struct list_head children;
struct list_head siblings;
@@ -34,16 +34,16 @@ struct vdi_tree {
static int *width, *more;
static struct vdi_tree *root;
-static struct vdi_tree *find_vdi(struct vdi_tree *parent, uint64_t oid,
+static struct vdi_tree *find_vdi(struct vdi_tree *parent, uint32_t vid,
const char *name)
{
struct vdi_tree *vdi, *ret;
list_for_each_entry(vdi, &parent->children, siblings) {
- if (vdi->oid == oid && !strcmp(vdi->name, name))
+ if (vdi->vid == vid && !strcmp(vdi->name, name))
return vdi;
- ret = find_vdi(vdi, oid, name);
+ ret = find_vdi(vdi, vid, name);
if (ret)
return ret;
}
@@ -51,7 +51,7 @@ static struct vdi_tree *find_vdi(struct vdi_tree *parent, uint64_t oid,
}
static struct vdi_tree *new_vdi(const char *name, const char *label,
- uint64_t oid, uint64_t poid, int highlight)
+ uint64_t vid, uint64_t pvid, int highlight)
{
struct vdi_tree *vdi;
@@ -62,8 +62,8 @@ static struct vdi_tree *new_vdi(const char *name, const char *label,
}
strcpy(vdi->name, name);
strcpy(vdi->label, label);
- vdi->oid = oid;
- vdi->poid = poid;
+ vdi->vid = vid;
+ vdi->pvid = pvid;
vdi->highlight = highlight;
INIT_LIST_HEAD(&vdi->children);
return vdi;
@@ -74,16 +74,16 @@ void init_tree(void)
root = new_vdi("", "", 0, 0, 0);
}
-void add_vdi_tree(const char *name, const char *label, uint64_t oid,
- uint64_t poid, int highlight)
+void add_vdi_tree(const char *name, const char *label, uint32_t vid,
+ uint32_t pvid, int highlight)
{
struct vdi_tree *vdi, *parent;
- vdi = new_vdi(name, label, oid, poid, highlight);
+ vdi = new_vdi(name, label, vid, pvid, highlight);
if (!vdi)
return;
- parent = find_vdi(root, poid, name);
+ parent = find_vdi(root, pvid, name);
if (!parent)
parent = root;
@@ -95,7 +95,7 @@ static void compaction(struct vdi_tree *parent)
struct vdi_tree *vdi, *e, *new_parent;
list_for_each_entry_safe(vdi, e, &parent->children, siblings) {
- new_parent = find_vdi(root, vdi->poid, vdi->name);
+ new_parent = find_vdi(root, vdi->pvid, vdi->name);
if (new_parent && parent != new_parent) {
list_del(&vdi->siblings);
list_add_tail(&vdi->siblings, &new_parent->children);
diff --git a/shepherd/treeview.h b/shepherd/treeview.h
index d18fe67..fa54e6e 100644
--- a/shepherd/treeview.h
+++ b/shepherd/treeview.h
@@ -12,8 +12,8 @@
#define __TREEVIEW__
void init_tree(void);
-void add_vdi_tree(const char *label, const char *tag, uint64_t pid,
- uint64_t ppid, int highlight);
+void add_vdi_tree(const char *label, const char *tag, uint32_t vid,
+ uint32_t pvid, int highlight);
void dump_tree(void);
#endif
--
1.5.6.5
More information about the sheepdog
mailing list