[Sheepdog] [PATCH] shrink vdi object size

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Fri Apr 30 02:03:15 CEST 2010


64 bit oid field in the struct sheepdog_inode is redundant,
because we can derive its lower 32 bit. This patch reduces
the vdi object size and increases metadata operation performance.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 collie/collie.h          |    8 +-
 collie/group.c           |   24 ++++----
 collie/store.c           |    9 +--
 collie/vdi.c             |  157 ++++++++++++++++++++++++----------------------
 include/meta.h           |   31 +++++----
 include/sheepdog_proto.h |    8 +-
 shepherd/shepherd.c      |   99 +++++++++++++++--------------
 shepherd/treeview.c      |   26 ++++----
 shepherd/treeview.h      |    4 +-
 9 files changed, 188 insertions(+), 178 deletions(-)

diff --git a/collie/collie.h b/collie/collie.h
index d827eb4..db4fd5c 100644
--- a/collie/collie.h
+++ b/collie/collie.h
@@ -92,12 +92,12 @@ int create_listen_port(int port, void *data);
 int init_store(char *dir);
 
 int add_vdi(char *data, int data_len, uint64_t size,
-	    uint64_t *new_oid, uint64_t base_oid, uint32_t copies,
+	    uint32_t *new_vid, uint32_t base_vid, uint32_t copies,
 	    int is_snapshot);
 
 int del_vdi(char *data, int data_len, uint32_t snapid);
 
-int lookup_vdi(char *data, int data_len, uint64_t *oid, uint32_t snapid);
+int lookup_vdi(char *data, int data_len, uint32_t *vid, uint32_t snapid);
 
 int read_vdis(char *data, int len, unsigned int *rsp_len);
 
@@ -129,8 +129,8 @@ int remove_epoch(int epoch);
 int set_cluster_ctime(uint64_t ctime);
 uint64_t get_cluster_ctime(void);
 
-int start_recovery(uint32_t epoch, unsigned long *failed_vdis, int nr_failed_vdis);
-int start_deletion(uint64_t oid);
+int start_recovery(uint32_t epoch, uint32_t *failed_vdis, int nr_failed_vdis);
+int start_deletion(uint32_t vid);
 
 static inline int is_myself(struct sheepdog_node_list_entry *e)
 {
diff --git a/collie/group.c b/collie/group.c
index 9247438..b9f87b1 100644
--- a/collie/group.c
+++ b/collie/group.c
@@ -85,7 +85,7 @@ struct work_confchg {
 	struct cpg_address *joined_list;
 	size_t joined_list_entries;
 
-	unsigned long *failed_vdis;
+	uint32_t *failed_vdis;
 	int nr_failed_vdis;
 	int first_cpg_node;
 	int sd_node_left;
@@ -634,12 +634,12 @@ static void vdi_op(struct vdi_op_message *msg)
 	struct sd_vdi_rsp *rsp = &msg->rsp;
 	void *data = msg->data;
 	int ret = SD_RES_SUCCESS;
-	uint64_t oid = 0;
+	uint32_t vid = 0;
 
 	switch (hdr->opcode) {
 	case SD_OP_NEW_VDI:
-		ret = add_vdi(data, hdr->data_length, hdr->vdi_size, &oid,
-			      hdr->base_oid, hdr->copies,
+		ret = add_vdi(data, hdr->data_length, hdr->vdi_size, &vid,
+			      hdr->base_vdi_id, hdr->copies,
 			      hdr->snapid);
 		break;
 	case SD_OP_DEL_VDI:
@@ -651,7 +651,7 @@ static void vdi_op(struct vdi_op_message *msg)
 		break;
 	case SD_OP_LOCK_VDI:
 	case SD_OP_GET_VDI_INFO:
-		ret = lookup_vdi(data, hdr->data_length, &oid, hdr->snapid);
+		ret = lookup_vdi(data, hdr->data_length, &vid, hdr->snapid);
 		if (ret != SD_RES_SUCCESS)
 			break;
 		break;
@@ -668,7 +668,7 @@ static void vdi_op(struct vdi_op_message *msg)
 		break;
 	}
 
-	rsp->oid = oid;
+	rsp->vdi_id = vid;
 	rsp->result = ret;
 }
 
@@ -690,8 +690,8 @@ static void vdi_op_done(struct vdi_op_message *msg)
 	switch (hdr->opcode) {
 	case SD_OP_NEW_VDI:
 	{
-		unsigned long nr = (rsp->oid & ~VDI_BIT) >> VDI_SPACE_SHIFT;
-		vprintf(SDOG_INFO "done %d %ld %" PRIx64 "\n", ret, nr, rsp->oid);
+		unsigned long nr = rsp->vdi_id;
+		vprintf(SDOG_INFO "done %d %ld\n", ret, nr);
 		set_bit(nr, sys->vdi_inuse);
 		break;
 	}
@@ -728,7 +728,7 @@ static void vdi_op_done(struct vdi_op_message *msg)
 	case SD_OP_GET_VDI_INFO:
 		break;
 	case SD_OP_MAKE_FS:
-		sys->nr_sobjs = ((struct sd_vdi_req *)hdr)->copies;
+		sys->nr_sobjs = ((struct sd_so_req *)hdr)->copies;
 
 		ctime = ((struct sd_so_req *)hdr)->ctime;
 		set_cluster_ctime(ctime);
@@ -1028,7 +1028,7 @@ static void del_node(struct cpg_address *addr, struct work_confchg *w)
 		struct sheepdog_node_list_entry e[SD_MAX_NODES];
 		struct vm *vm, *n;
 		int ret, size;
-		uint64_t oid;
+		uint32_t vid;
 		void *buf;
 
 		w->sd_node_left++;
@@ -1053,9 +1053,9 @@ static void del_node(struct cpg_address *addr, struct work_confchg *w)
 			}
 
 			ret = lookup_vdi((char *)vm->ent.name,
-					 sizeof(vm->ent.name), &oid, 0);
+					 sizeof(vm->ent.name), &vid, 0);
 			if (ret == SD_RES_SUCCESS)
-				w->failed_vdis[w->nr_failed_vdis++] = oid_to_bit(oid);
+				w->failed_vdis[w->nr_failed_vdis++] = vid;
 			else
 				eprintf("cannot find vdi %s\n", vm->ent.name);
 
diff --git a/collie/store.c b/collie/store.c
index 3495674..bf5824f 100644
--- a/collie/store.c
+++ b/collie/store.c
@@ -1254,7 +1254,7 @@ static void recover_one(struct work *work, int idx)
 	cur_idx = obj_to_sheep(cur_entry, cur_nr, oid, 0);
 
 	for (i = 0; i < rw->nr_failed_vdis; i++) {
-		if (rw->failed_vdis[i] == oid_to_bit(oid))
+		if (rw->failed_vdis[i] == oid_to_vid(oid))
 			is_failed_oid = 1;
 	}
 
@@ -1506,7 +1506,7 @@ fail:
 	return;
 }
 
-int start_recovery(uint32_t epoch, unsigned long *failed_vdis, int nr_failed_vdis)
+int start_recovery(uint32_t epoch, uint32_t *failed_vdis, int nr_failed_vdis)
 {
 	struct recovery_work *rw;
 
@@ -1677,10 +1677,9 @@ static int init_epoch_path(char *base_path)
 		if (is_data_obj(oid))
 			continue;
 
-		vprintf(SDOG_DEBUG "found the vdi obj, %" PRIx64 " %lu\n",
-			oid, oid_to_bit(oid));
+		vprintf(SDOG_DEBUG "found the vdi obj, %" PRIx64 "\n", oid);
 
-		set_bit(oid_to_bit(oid), sys->vdi_inuse);
+		set_bit(oid_to_vid(oid), sys->vdi_inuse);
 	}
 	closedir(dir);
 
diff --git a/collie/vdi.c b/collie/vdi.c
index baf978f..dc2c082 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -18,8 +18,8 @@
 
 
 /* TODO: should be performed atomically */
-static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size,
-			  uint64_t base_oid, uint64_t cur_oid, uint32_t copies,
+static int create_vdi_obj(char *name, uint32_t new_vid, uint64_t size,
+			  uint32_t base_vid, uint32_t cur_vid, uint32_t copies,
 			  uint32_t snapid, int is_snapshot)
 {
 	struct sheepdog_node_list_entry entries[SD_MAX_NODES];
@@ -31,10 +31,10 @@ static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size,
 
 	nr_nodes = get_ordered_sd_node_list(entries);
 
-	if (base_oid) {
+	if (base_vid) {
 		ret = read_object(entries, nr_nodes, sys->epoch,
-				  base_oid, (char *)&base, sizeof(base), 0,
-				  copies);
+				  vid_to_vdi_oid(base_vid), (char *)&base,
+				  sizeof(base), 0, copies);
 		if (ret < 0)
 			return SD_RES_BASE_VDI_READ;
 	}
@@ -42,13 +42,13 @@ static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size,
 	gettimeofday(&tv, NULL);
 
 	if (is_snapshot) {
-		if (cur_oid != base_oid) {
-			vprintf(SDOG_INFO "tree snapshot %s %" PRIx64 " %" PRIx64 "\n",
-				name, cur_oid, base_oid);
+		if (cur_vid != base_vid) {
+			vprintf(SDOG_INFO "tree snapshot %s %" PRIx32 " %" PRIx32 "\n",
+				name, cur_vid, base_vid);
 
 			ret = read_object(entries, nr_nodes, sys->epoch,
-					  cur_oid, (char *)&cur, sizeof(cur), 0,
-					  copies);
+					  vid_to_vdi_oid(cur_vid), (char *)&cur,
+					  sizeof(cur), 0, copies);
 			if (ret < 0) {
 				vprintf(SDOG_ERR "failed\n");
 				return SD_RES_BASE_VDI_READ;
@@ -62,7 +62,7 @@ static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size,
 	memset(&new, 0, sizeof(new));
 
 	strncpy(new.name, name, sizeof(new.name));
-	new.oid = new_oid;
+	new.vdi_id = new_vid;
 	new.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000;
 	new.vdi_size = size;
 	new.copy_policy = 0;
@@ -70,38 +70,37 @@ static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size,
 	new.block_size_shift = find_next_bit(&block_size, BITS_PER_LONG, 0);
 	new.snap_id = snapid;
 
-	if (base_oid) {
+	if (base_vid) {
 		int i;
 
-		new.parent_oid = base_oid;
-		memcpy(new.data_oid, base.data_oid,
-		       MAX_DATA_OBJS * sizeof(uint64_t));
+		new.parent_vdi_id = base_vid;
+		memcpy(new.data_vdi_id, base.data_vdi_id, sizeof(new.data_vdi_id));
 
-		for (i = 0; i < ARRAY_SIZE(base.child_oid); i++) {
-			if (!base.child_oid[i]) {
-				base.child_oid[i] = new_oid;
+		for (i = 0; i < ARRAY_SIZE(base.child_vdi_id); i++) {
+			if (!base.child_vdi_id[i]) {
+				base.child_vdi_id[i] = new_vid;
 				break;
 			}
 		}
 
-		if (i == ARRAY_SIZE(base.child_oid))
+		if (i == ARRAY_SIZE(base.child_vdi_id))
 			return SD_RES_NO_BASE_VDI;
 
 	}
 
-	if (is_snapshot && cur_oid != base_oid) {
+	if (is_snapshot && cur_vid != base_vid) {
 		ret = write_object(entries, nr_nodes, sys->epoch,
-				   cur_oid, (char *)&cur, sizeof(cur), 0,
-				   copies, 0);
+				   vid_to_vdi_oid(cur_vid), (char *)&cur,
+				   sizeof(cur), 0, copies, 0);
 		if (ret < 0) {
 			vprintf(SDOG_ERR "failed\n");
 			return SD_RES_BASE_VDI_READ;
 		}
 	}
 
-	if (base_oid) {
-		ret = write_object(entries, nr_nodes,
-				   sys->epoch, base_oid, (char *)&base,
+	if (base_vid) {
+		ret = write_object(entries, nr_nodes, sys->epoch,
+				   vid_to_vdi_oid(base_vid), (char *)&base,
 				   sizeof(base), 0, copies, 0);
 		if (ret < 0) {
 			vprintf(SDOG_ERR "failed\n");
@@ -110,7 +109,8 @@ static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size,
 	}
 
 	ret = write_object(entries, nr_nodes, sys->epoch,
-			   new_oid, (char *)&new, sizeof(new), 0, copies, 1);
+			   vid_to_vdi_oid(new_vid), (char *)&new, sizeof(new),
+			   0, copies, 1);
 	if (ret < 0)
 		return SD_RES_VDI_WRITE;
 
@@ -118,7 +118,7 @@ static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size,
 }
 
 static int find_first_vdi(unsigned long start, unsigned long end,
-			  char *name, int namelen, uint32_t snapid, uint64_t *oid,
+			  char *name, int namelen, uint32_t snapid, uint32_t *vid,
 			  unsigned long *deleted_nr, uint32_t *next_snap)
 {
 	struct sheepdog_node_list_entry entries[SD_MAX_NODES];
@@ -135,8 +135,8 @@ static int find_first_vdi(unsigned long start, unsigned long end,
 
 	for (i = start; i >= end; i--) {
 		ret = read_object(entries, nr_nodes, sys->epoch,
-				  bit_to_oid(i), (char *)&inode, sizeof(inode), 0,
-				  nr_reqs);
+				  vid_to_vdi_oid(i), (char *)&inode,
+				  sizeof(inode), 0, nr_reqs);
 		if (ret < 0)
 			return SD_RES_EIO;
 
@@ -150,7 +150,7 @@ static int find_first_vdi(unsigned long start, unsigned long end,
 				continue;
 
 			*next_snap = inode.snap_id + 1;
-			*oid = inode.oid;
+			*vid = inode.vdi_id;
 			return SD_RES_SUCCESS;
 		}
 	}
@@ -158,7 +158,7 @@ static int find_first_vdi(unsigned long start, unsigned long end,
 }
 
 
-static int do_lookup_vdi(char *name, int namelen, uint64_t *oid, uint32_t snapid,
+static int do_lookup_vdi(char *name, int namelen, uint32_t *vid, uint32_t snapid,
 			 uint32_t *next_snapid,
 			 unsigned long *right_nr,  unsigned long *deleted_nr)
 {
@@ -177,7 +177,7 @@ static int do_lookup_vdi(char *name, int namelen, uint64_t *oid, uint32_t snapid
 	} else if (nr < SD_NR_VDIS) {
 	right_side:
 		/* look up on the right side of the hash point */
-		ret = find_first_vdi(nr - 1, start_nr, name, namelen, snapid, oid,
+		ret = find_first_vdi(nr - 1, start_nr, name, namelen, snapid, vid,
 				     deleted_nr, next_snapid);
 		return ret;
 	} else {
@@ -188,7 +188,7 @@ static int do_lookup_vdi(char *name, int namelen, uint64_t *oid, uint32_t snapid
 			return SD_RES_FULL_VDI;
 		else if (nr) {
 			/* look up on the left side of the hash point */
-			ret = find_first_vdi(nr - 1, 0, name, namelen, snapid, oid,
+			ret = find_first_vdi(nr - 1, 0, name, namelen, snapid, vid,
 					     deleted_nr, next_snapid);
 			if (ret == SD_RES_NO_VDI)
 				; /* we need to go to the right side */
@@ -201,7 +201,7 @@ static int do_lookup_vdi(char *name, int namelen, uint64_t *oid, uint32_t snapid
 	}
 }
 
-int lookup_vdi(char *data, int data_len, uint64_t *oid, uint32_t snapid)
+int lookup_vdi(char *data, int data_len, uint32_t *vid, uint32_t snapid)
 {
 	char *name = data;
 	uint32_t dummy0;
@@ -210,14 +210,14 @@ int lookup_vdi(char *data, int data_len, uint64_t *oid, uint32_t snapid)
 	if (data_len != SD_MAX_VDI_LEN)
 		return SD_RES_INVALID_PARMS;
 
-	return do_lookup_vdi(name, strlen(name), oid, snapid,
+	return do_lookup_vdi(name, strlen(name), vid, snapid,
 			     &dummy0, &dummy1, &dummy2);
 }
 
 int add_vdi(char *data, int data_len, uint64_t size,
-	    uint64_t *new_oid, uint64_t base_oid, uint32_t copies, int is_snapshot)
+	    uint32_t *new_vid, uint32_t base_vid, uint32_t copies, int is_snapshot)
 {
-	uint64_t cur_oid;
+	uint32_t cur_vid;
 	uint32_t next_snapid;
 	unsigned long nr, deleted_nr = SD_NR_VDIS, right_nr = SD_NR_VDIS;
 	int ret;
@@ -228,7 +228,7 @@ int add_vdi(char *data, int data_len, uint64_t size,
 
 	name = data;
 
-	ret = do_lookup_vdi(name, strlen(name), &cur_oid, 0, &next_snapid,
+	ret = do_lookup_vdi(name, strlen(name), &cur_vid, 0, &next_snapid,
 			    &right_nr, &deleted_nr);
 
 	if (is_snapshot) {
@@ -254,11 +254,11 @@ int add_vdi(char *data, int data_len, uint64_t size,
 		next_snapid = 1;
 	}
 
-	*new_oid = bit_to_oid(nr);
+	*new_vid = nr;
 
-	vprintf(SDOG_INFO "we create a new vdi, %d %s (%zd) %" PRIu64 ", oid: %"
-		PRIx64 ", base %" PRIx64 ", cur %" PRIx64 " \n",
-		is_snapshot, name, strlen(name), size, *new_oid, base_oid, cur_oid);
+	vprintf(SDOG_INFO "we create a new vdi, %d %s (%zd) %" PRIu64 ", vid: %"
+		PRIx32 ", base %" PRIx32 ", cur %" PRIx32 " \n",
+		is_snapshot, name, strlen(name), size, *new_vid, base_vid, cur_vid);
 
 	if (!copies) {
 		vprintf(SDOG_WARNING "qemu doesn't specify the copies... %d\n",
@@ -266,7 +266,7 @@ int add_vdi(char *data, int data_len, uint64_t size,
 		copies = sys->nr_sobjs;
 	}
 
-	ret = create_vdi_obj(name, *new_oid, size, base_oid, cur_oid, copies,
+	ret = create_vdi_obj(name, *new_vid, size, base_vid, cur_vid, copies,
 			     next_snapid, is_snapshot);
 
 	return ret;
@@ -275,7 +275,7 @@ int add_vdi(char *data, int data_len, uint64_t size,
 int del_vdi(char *data, int data_len, uint32_t snapid)
 {
 	char *name = data;
-	uint64_t oid;
+	uint32_t vid;
 	uint32_t dummy0;
 	unsigned long dummy1, dummy2;
 	int ret;
@@ -286,7 +286,7 @@ int del_vdi(char *data, int data_len, uint32_t snapid)
 	if (data_len != SD_MAX_VDI_LEN)
 		return SD_RES_INVALID_PARMS;
 
-	ret = do_lookup_vdi(name, strlen(name), &oid, snapid,
+	ret = do_lookup_vdi(name, strlen(name), &vid, snapid,
 			     &dummy0, &dummy1, &dummy2);
 	if (ret != SD_RES_SUCCESS)
 		return ret;
@@ -297,7 +297,7 @@ int del_vdi(char *data, int data_len, uint32_t snapid)
 		nr_reqs = nr_nodes;
 
 	ret = read_object(entries, nr_nodes, sys->epoch,
-			  oid, (char *)&inode, sizeof(inode), 0,
+			  vid_to_vdi_oid(vid), (char *)&inode, sizeof(inode), 0,
 			  nr_reqs);
 	if (ret < 0)
 		return SD_RES_EIO;
@@ -305,12 +305,12 @@ int del_vdi(char *data, int data_len, uint32_t snapid)
 	memset(inode.name, 0, sizeof(inode.name));
 
 	ret = write_object(entries, nr_nodes, sys->epoch,
-			  oid, (char *)&inode, sizeof(inode), 0,
+			   vid_to_vdi_oid(vid), (char *)&inode, sizeof(inode), 0,
 			   nr_reqs, 0);
 	if (ret < 0)
 		return SD_RES_EIO;
 
-	ret = start_deletion(oid);
+	ret = start_deletion(vid);
 	if (ret < 0)
 		return SD_RES_NO_MEM;
 
@@ -334,7 +334,7 @@ struct deletion_work {
 	struct work work;
 	struct list_head dw_siblings;
 
-	uint64_t oid;
+	uint32_t vid;
 
 	int count;
 	char *buf;
@@ -346,18 +346,19 @@ static int deleting;
 static void delete_one(struct work *work, int idx)
 {
 	struct deletion_work *dw = container_of(work, struct deletion_work, work);
-	uint64_t vdi_oid = *(((uint64_t *)dw->buf) + dw->count - dw->done - 1);
+	uint32_t vdi_id = *(((uint32_t *)dw->buf) + dw->count - dw->done - 1);
 	struct sheepdog_node_list_entry entries[SD_MAX_NODES];
 	int nr_nodes;
 	int ret, i;
 	static struct sheepdog_inode inode;
 
-	eprintf("%d %d, %16lx\n", dw->done, dw->count, vdi_oid);
+	eprintf("%d %d, %16x\n", dw->done, dw->count, vdi_id);
 
 	nr_nodes = get_ordered_sd_node_list(entries);
 
 	ret = read_object(entries, nr_nodes, sys->epoch,
-			  vdi_oid, (void *)&inode, sizeof(inode), 0, sys->nr_sobjs);
+			  vid_to_vdi_oid(vdi_id), (void *)&inode, sizeof(inode),
+			  0, sys->nr_sobjs);
 
 	if (ret != sizeof(inode)) {
 		eprintf("cannot find vdi object\n");
@@ -365,15 +366,17 @@ static void delete_one(struct work *work, int idx)
 	}
 
 	for (i = 0; i < MAX_DATA_OBJS; i++) {
-		if (!inode.data_oid[i])
+		if (!inode.data_vdi_id[i])
 			continue;
 
 		remove_object(entries, nr_nodes, sys->epoch,
-			      inode.data_oid[i], inode.nr_copies);
+			      vid_to_data_oid(inode.data_vdi_id[i], i),
+			      inode.nr_copies);
 	}
 
-	if (remove_object(entries, nr_nodes, sys->epoch, vdi_oid, sys->nr_sobjs))
-		eprintf("failed to remove vdi objects %lx\n", vdi_oid);
+	if (remove_object(entries, nr_nodes, sys->epoch, vid_to_vdi_oid(vdi_id),
+			  sys->nr_sobjs))
+		eprintf("failed to remove vdi objects %x\n", vdi_id);
 }
 
 static void __start_deletion(struct work *work, int idx);
@@ -407,18 +410,19 @@ static void delete_one_done(struct work *work, int idx)
 
 static int fill_vdi_list(struct deletion_work *dw,
 			 struct sheepdog_node_list_entry *entries,
-			 int nr_entries, uint64_t root_oid)
+			 int nr_entries, uint32_t root_vid)
 {
 	int ret, i;
 	static struct sheepdog_inode inode;
 	int done = dw->count;
-	uint64_t oid;
+	uint32_t vid;
 
-	((uint64_t *)dw->buf)[dw->count++] = root_oid;
+	((uint32_t *)dw->buf)[dw->count++] = root_vid;
 again:
-	oid = ((uint64_t *)dw->buf)[done++];
+	vid = ((uint32_t *)dw->buf)[done++];
 	ret = read_object(entries, nr_entries, sys->epoch,
-			  oid, (void *)&inode, sizeof(inode), 0, nr_entries);
+			  vid_to_vdi_oid(vid), (void *)&inode, sizeof(inode),
+			  0, nr_entries);
 
 	if (ret != sizeof(inode)) {
 		eprintf("cannot find vdi object\n");
@@ -428,27 +432,28 @@ again:
 	if (inode.name[0] != '\0')
 		return 1;
 
-	for (i = 0; i < ARRAY_SIZE(inode.child_oid); i++) {
-		if (!inode.child_oid[i])
+	for (i = 0; i < ARRAY_SIZE(inode.child_vdi_id); i++) {
+		if (!inode.child_vdi_id[i])
 			continue;
 
-		((uint64_t *)dw->buf)[dw->count++] = inode.child_oid[i];
+		((uint32_t *)dw->buf)[dw->count++] = inode.child_vdi_id[i];
 	}
 
-	if (((uint64_t *)dw->buf)[done])
+	if (((uint32_t *)dw->buf)[done])
 		goto again;
 
 	return 0;
 }
 
 static uint64_t get_vdi_root(struct sheepdog_node_list_entry *entries,
-			     int nr_entries, uint64_t oid)
+			     int nr_entries, uint32_t vid)
 {
 	int ret;
 	static struct sheepdog_inode inode;
 
 next:
-	ret = read_object(entries, nr_entries, sys->epoch, oid,
+	ret = read_object(entries, nr_entries, sys->epoch,
+			  vid_to_vdi_oid(vid),
 			  (void *)&inode, sizeof(inode), 0, nr_entries);
 
 	if (ret != sizeof(inode)) {
@@ -456,10 +461,10 @@ next:
 		return 0;
 	}
 
-	if (!inode.parent_oid)
-		return oid;
+	if (!inode.parent_vdi_id)
+		return vid;
 
-	oid = inode.parent_oid;
+	vid = inode.parent_vdi_id;
 
 	goto next;
 }
@@ -469,15 +474,15 @@ static void __start_deletion(struct work *work, int idx)
 	struct deletion_work *dw = container_of(work, struct deletion_work, work);
 	struct sheepdog_node_list_entry entries[SD_MAX_NODES];
 	int nr_nodes, ret;
-	uint64_t root_oid;
+	uint32_t root_vid;
 
 	nr_nodes = get_ordered_sd_node_list(entries);
 
-	root_oid = get_vdi_root(entries, nr_nodes, dw->oid);
-	if (!root_oid)
+	root_vid = get_vdi_root(entries, nr_nodes, dw->vid);
+	if (!root_vid)
 		goto fail;
 
-	ret = fill_vdi_list(dw, entries, nr_nodes, root_oid);
+	ret = fill_vdi_list(dw, entries, nr_nodes, root_vid);
 	if (ret)
 		goto fail;
 
@@ -518,7 +523,7 @@ static void __start_deletion_done(struct work *work, int idx)
 	}
 }
 
-int start_deletion(uint64_t oid)
+int start_deletion(uint32_t vid)
 {
 	struct deletion_work *dw;
 
@@ -533,7 +538,7 @@ int start_deletion(uint64_t oid)
 	}
 
 	dw->count = 0;
-	dw->oid = oid;
+	dw->vid = vid;
 
 	dw->work.fn = __start_deletion;
 	dw->work.done = __start_deletion_done;
diff --git a/include/meta.h b/include/meta.h
index 74bdfcf..53fa085 100644
--- a/include/meta.h
+++ b/include/meta.h
@@ -23,13 +23,14 @@
  *  0 - 19 (20 bits): data object space
  * 20 - 31 (12 bits): reserved data object space
  * 32 - 55 (24 bits): vdi object space
- * 56 - 62 (17 bits): reserved vdi object space
- * 63 - 63 ( 1 bit ): set if vdi
+ * 56 - 59 ( 4 bits): reserved vdi object space
+ * 60 - 63 ( 4 bit ): object type indentifier space
  */
 
 #define VDI_SPACE   24
 #define VDI_SPACE_SHIFT   32
 #define VDI_BIT (UINT64_C(1) << 63)
+#define VMSTATE_BIT (UINT64_C(1) << 62)
 #define DEAFAULT_NR_COPIES 1
 #define MAX_DATA_OBJS (1ULL << 20)
 #define MAX_CHILDREN 1024
@@ -38,7 +39,6 @@
 
 struct sheepdog_inode {
 	char name[SD_MAX_VDI_LEN];
-	uint64_t oid;
 	uint64_t ctime;
 	uint64_t snap_ctime;
 	uint64_t vm_clock_nsec;
@@ -48,15 +48,15 @@ struct sheepdog_inode {
 	uint8_t  nr_copies;
 	uint8_t  block_size_shift;
 	uint32_t snap_id;
-	uint64_t parent_oid;
-	uint64_t child_oid[MAX_CHILDREN];
-	uint64_t data_oid[MAX_DATA_OBJS];
+	uint32_t vdi_id;
+	uint32_t parent_vdi_id;
+	uint32_t child_vdi_id[MAX_CHILDREN];
+	uint32_t data_vdi_id[MAX_DATA_OBJS];
 };
 
 static inline int is_data_obj_writeable(struct sheepdog_inode *inode, int idx)
 {
-	return ((inode->oid & ~VDI_BIT) >> VDI_SPACE_SHIFT) ==
-		(inode->data_oid[idx] >> VDI_SPACE_SHIFT);
+	return inode->vdi_id == inode->data_vdi_id[idx];
 }
 
 static inline int is_data_obj(uint64_t oid)
@@ -64,16 +64,21 @@ static inline int is_data_obj(uint64_t oid)
 	return !(VDI_BIT & oid);
 }
 
-#define NR_VDIS (1U << DATA_SPECE_SHIFT)
+static inline uint64_t vid_to_vdi_oid(uint32_t vid)
+{
+	return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT);
+}
 
-static inline uint64_t bit_to_oid(unsigned long nr)
+static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx)
 {
-	return ((unsigned long long)nr << VDI_SPACE_SHIFT) | VDI_BIT;
+	return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
 }
 
-static inline unsigned long oid_to_bit(uint64_t oid)
+static inline uint32_t oid_to_vid(uint64_t oid)
 {
-	return (oid & ~VDI_BIT) >> VDI_SPACE_SHIFT;
+	return (~VDI_BIT & oid) >> VDI_SPACE_SHIFT;
 }
 
+#define NR_VDIS (1U << DATA_SPECE_SHIFT)
+
 #endif
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index c4f6a13..c59aa74 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -222,11 +222,11 @@ struct sd_vdi_req {
 	uint32_t	epoch;
 	uint32_t        id;
 	uint32_t        data_length;
-	uint64_t        base_oid;
 	uint64_t	vdi_size;
+	uint32_t        base_vdi_id;
 	uint32_t	copies;
 	uint32_t        snapid;
-	uint32_t        pad[2];
+	uint32_t        pad[3];
 };
 
 struct sd_vdi_rsp {
@@ -238,9 +238,9 @@ struct sd_vdi_rsp {
 	uint32_t        data_length;
 	uint32_t        result;
 	uint32_t        rsvd;
-	uint64_t        oid;
+	uint32_t        vdi_id;
 	uint32_t	vdi_epoch;
-	uint32_t        pad[3];
+	uint32_t        pad[4];
 };
 
 struct sd_node_req {
diff --git a/shepherd/shepherd.c b/shepherd/shepherd.c
index da1263b..911aa00 100644
--- a/shepherd/shepherd.c
+++ b/shepherd/shepherd.c
@@ -316,7 +316,7 @@ static int debug(char *op, char *arg)
 	char name[128];
 	unsigned rlen, wlen;
 	unsigned opcode, flags;
-	uint64_t oid = 0;
+	uint32_t vid = 0;
 	char vdiname[SD_MAX_VDI_LEN];
 
 	if (!op)
@@ -360,8 +360,8 @@ static int debug(char *op, char *arg)
 		if (!arg)
 			return 1;
 		rlen = 0;
-		oid = strtoul(arg, NULL, 10);
-		if (oid == 0) {
+		vid = strtoul(arg, NULL, 10);
+		if (vid == 0) {
 			wlen = strlen(arg) + 1;
 			opcode = SD_OP_GET_VDI_INFO;
 			flags = SD_FLAG_CMD_WRITE;
@@ -393,8 +393,8 @@ static int debug(char *op, char *arg)
 	hdr.data_length = wlen;
 	hdr.flags = flags;
 	hdr.epoch = node_list_version;
-	if (oid > 0) {
-		((struct sd_vdi_req *)&hdr)->base_oid = oid;
+	if (vid > 0) {
+		((struct sd_vdi_req *)&hdr)->base_vdi_id = vid;
 	}
 
 	ret = exec_req(fd, &hdr, arg, &wlen, &rlen);
@@ -425,8 +425,8 @@ static int debug(char *op, char *arg)
 
 	if (!strcasecmp(op, "vdi_info")) {
 		struct sd_vdi_rsp *vdi_rsp = (struct sd_vdi_rsp *)rsp;
-		printf("name = %s, oid = %"PRIu64", epoch = %d\n",
-		       arg, vdi_rsp->oid, vdi_rsp->vdi_epoch);
+		printf("name = %s, vid = %"PRIu32", epoch = %d\n",
+		       arg, vdi_rsp->vdi_id, vdi_rsp->vdi_epoch);
 	}
 	return ret;
 }
@@ -473,7 +473,7 @@ static int shutdown_sheepdog(void)
 
 #define DIR_BUF_LEN (UINT64_C(1) << 20)
 
-typedef void (*vdi_parser_func_t)(uint64_t oid, char *name, uint32_t tag, uint32_t flags,
+typedef void (*vdi_parser_func_t)(uint32_t vid, char *name, uint32_t tag, uint32_t flags,
 				  struct sheepdog_inode *i, void *data);
 
 
@@ -509,14 +509,15 @@ static int parse_vdi(vdi_parser_func_t func, void *data)
 			continue;
 
 		ret = read_object(node_list_entries, nr_nodes, node_list_version,
-				  bit_to_oid(nr), (void *)&i, sizeof(i), 0, nr_nodes);
+				  vid_to_vdi_oid(nr), (void *)&i,
+				  sizeof(i), 0, nr_nodes);
 
 		if (ret == sizeof(i)) {
 			if (i.name[0] == '\0') /* deleted */
 				continue;
-			func(i.oid, i.name, i.snap_id, 0, &i, data);
+			func(i.vdi_id, i.name, i.snap_id, 0, &i, data);
 		} else
-			printf("error %lu %" PRIx64 ", %d\n", nr, bit_to_oid(nr), ret);
+			printf("error %lu, %d\n", nr, ret);
 
 	}
 
@@ -529,7 +530,7 @@ struct graph_info {
 	int highlight;
 };
 
-static void print_graph_tree(uint64_t oid, char *name, uint32_t tag,
+static void print_graph_tree(uint32_t vid, char *name, uint32_t tag,
 			     uint32_t flags, struct sheepdog_inode *i, void *data)
 {
 	struct graph_info *info = (struct graph_info *)data;
@@ -543,7 +544,7 @@ static void print_graph_tree(uint64_t oid, char *name, uint32_t tag,
 		return;
 
 	if (info->root < 0)
-		info->root = i->parent_oid;
+		info->root = vid_to_vdi_oid(i->parent_vdi_id);
 
 	ti = i->ctime >> 32;
 	localtime_r(&ti, &tm);
@@ -552,12 +553,12 @@ static void print_graph_tree(uint64_t oid, char *name, uint32_t tag,
 	strftime(time, sizeof(time), "%H:%M:%S", &tm);
 	size_to_str(i->vdi_size, size_str, sizeof(size_str));
 
-	printf("  \"%" PRIu64 "\" [shape = \"box\","
+	printf("  \"%" PRIu32 "\" [shape = \"box\","
 	       "fontname = \"Courier\","
 	       "fontsize = \"12\","
 	       "group = \"%s\","
 	       "label = \"",
-	       oid, name);
+	       vid, name);
 	printf("name: %8s\\n"
 	       "tag : %8x\\n"
 	       "size: %8s\\n"
@@ -570,7 +571,7 @@ static void print_graph_tree(uint64_t oid, char *name, uint32_t tag,
 	else
 		printf("\"];\n");
 
-	printf("  \"%" PRIu64 "\" -> \"%" PRIu64 "\";\n", i->parent_oid, oid);
+	printf("  \"%" PRIu32 "\" -> \"%" PRIu32 "\";\n", i->parent_vdi_id, vid);
 }
 
 static int graphview_vdi(char *vdiname, int highlight)
@@ -603,7 +604,7 @@ struct tree_info {
 	char *name;
 };
 
-static void print_vdi_tree(uint64_t oid, char *name, uint32_t tag,
+static void print_vdi_tree(uint32_t vid, char *name, uint32_t tag,
 			   uint32_t flags, struct sheepdog_inode *i, void *data)
 {
 	struct tree_info *info = (struct tree_info *)data;
@@ -624,7 +625,7 @@ static void print_vdi_tree(uint64_t oid, char *name, uint32_t tag,
 			 "[%y-%m-%d %H:%M]", &tm);
 	}
 
-	add_vdi_tree(name, buf, oid, i->parent_oid,
+	add_vdi_tree(name, buf, vid, i->parent_vdi_id,
 		     info->highlight && is_current(i));
 }
 
@@ -644,7 +645,7 @@ static int treeview_vdi(char *vdiname, int highlight)
 	return 0;
 }
 
-static void print_vdi_list(uint64_t oid, char *name, uint32_t tag,
+static void print_vdi_list(uint32_t vid, char *name, uint32_t tag,
 			   uint32_t flags, struct sheepdog_inode *i, void *data)
 {
 	int idx;
@@ -663,7 +664,7 @@ static void print_vdi_list(uint64_t oid, char *name, uint32_t tag,
 	my_objs = 0;
 	cow_objs = 0;
 	for (idx = 0; idx < MAX_DATA_OBJS; idx++) {
-		if (!i->data_oid[idx])
+		if (!i->data_vdi_id[idx])
 			continue;
 		if (is_data_obj_writeable(i, idx))
 			my_objs++;
@@ -676,9 +677,9 @@ static void print_vdi_list(uint64_t oid, char *name, uint32_t tag,
 	size_to_str(cow_objs * SD_DATA_OBJ_SIZE, cow_objs_str, sizeof(cow_objs_str));
 
 	if (!data || strcmp(name, data) == 0) {
-		printf("%c %-8s %5d %7s %7s %7s %s  %9" PRIx64 "\n",
+		printf("%c %-8s %5d %7s %7s %7s %s  %7" PRIx32 "\n",
 		       is_current(i) ? ' ' : 's', name, tag,
-		       vdi_size_str, my_objs_str, cow_objs_str, dbuf, oid);
+		       vdi_size_str, my_objs_str, cow_objs_str, dbuf, vid);
 	}
 }
 
@@ -688,7 +689,7 @@ struct vm_list_info {
 	int highlight;
 };
 
-static void print_vm_list(uint64_t oid, char *name, uint32_t tag,
+static void print_vm_list(uint32_t vid, char *name, uint32_t tag,
 			  uint32_t flags, struct sheepdog_inode *inode, void *data)
 {
 	int i, j;
@@ -707,7 +708,7 @@ static void print_vm_list(uint64_t oid, char *name, uint32_t tag,
 	my_objs = 0;
 	cow_objs = 0;
 	for (j = 0; j < MAX_DATA_OBJS; j++) {
-		if (!inode->data_oid[j])
+		if (!inode->data_vdi_id[j])
 			continue;
 		if (is_data_obj_writeable(inode, j))
 			my_objs++;
@@ -737,8 +738,8 @@ static void print_vm_list(uint64_t oid, char *name, uint32_t tag,
 		       vdi_size_str, my_objs_str, cow_objs_str);
 }
 
-static void cal_total_vdi_size(uint64_t oid, char *name, uint32_t tag,
-			   uint32_t flags, struct sheepdog_inode *i, void *data)
+static void cal_total_vdi_size(uint32_t vid, char *name, uint32_t tag,
+			       uint32_t flags, struct sheepdog_inode *i, void *data)
 {
 	uint64_t *size = data;
 
@@ -746,15 +747,15 @@ static void cal_total_vdi_size(uint64_t oid, char *name, uint32_t tag,
 		*size += i->vdi_size;
 }
 
-struct get_oid_info {
+struct get_vid_info {
 	char *name;
-	uint64_t oid;
+	uint32_t vid;
 };
 
-static void get_oid(uint64_t oid, char *name, uint32_t tag,
+static void get_oid(uint32_t vid, char *name, uint32_t tag,
 		    uint32_t flags, struct sheepdog_inode *i, void *data)
 {
-	struct get_oid_info *info = data;
+	struct get_vid_info *info = data;
 	char *p;
 
 	if (info->name) {
@@ -763,10 +764,10 @@ static void get_oid(uint64_t oid, char *name, uint32_t tag,
 		if (p) {
 			if (!strncmp(name, info->name, p - info->name) &&
 			    tag == strtoul(p + 1, NULL, 16))
-				info->oid = oid;
+				info->vid = vid;
 		} else {
 			if (!strcmp(name, info->name))
-				info->oid = oid;
+				info->vid = vid;
 		}
 	}
 }
@@ -813,7 +814,7 @@ static void get_data_oid(char *sheep, uint64_t oid, struct sd_obj_rsp *rsp,
 		if (info->success)
 			break;
 		info->success = 1;
-		info->data_oid = inode->data_oid[info->idx];
+		info->data_oid = vid_to_data_oid(inode->data_vdi_id[info->idx], info->idx);
 		break;
 	case SD_RES_NO_OBJ:
 		break;
@@ -876,24 +877,24 @@ static void parse_objs(uint64_t oid, obj_parser_func_t func, void *data)
 static void print_obj(char *vdiname, unsigned index)
 {
 	int ret;
-	struct get_oid_info info;
-	uint64_t oid;
+	struct get_vid_info info;
+	uint32_t vid;
 
 	info.name = vdiname;
-	info.oid = 0;
+	info.vid = 0;
 
 	ret = parse_vdi(get_oid, &info);
 
-	oid = info.oid;
-	if (oid == 0) {
+	vid = info.vid;
+	if (vid == 0) {
 		printf("No such vdi\n");
 		return;
 	}
 
 	if (index == ~0) {
-		printf("Looking for the inode object 0x%" PRIx64 " with %d nodes\n\n",
-		       oid, nr_nodes);
-		parse_objs(oid, do_print_obj, NULL);
+		printf("Looking for the inode object 0x%" PRIx32 " with %d nodes\n\n",
+		       vid, nr_nodes);
+		parse_objs(vid_to_vdi_oid(vid), do_print_obj, NULL);
 	} else {
 		struct get_data_oid_info info;
 
@@ -905,20 +906,20 @@ static void print_obj(char *vdiname, unsigned index)
 			exit(1);
 		}
 
-		parse_objs(oid, get_data_oid, &info);
+		parse_objs(vid_to_vdi_oid(vid), get_data_oid, &info);
 
 		if (info.success) {
 			if (info.data_oid) {
 				printf("Looking for the object 0x%" PRIx64
-				       " (the inode oid 0x%" PRIx64 " index %u) with %d nodes\n\n",
-				       info.data_oid, oid, index, nr_nodes);
+				       " (the inode vid 0x%" PRIx32 " index %u) with %d nodes\n\n",
+				       info.data_oid, vid, index, nr_nodes);
 
 				parse_objs(info.data_oid, do_print_obj, NULL);
 			} else
-				printf("The inode object 0x%" PRIx64 " index %u is not allocated\n",
-				       oid, index);
+				printf("The inode object 0x%" PRIx32 " index %u is not allocated\n",
+				       vid, index);
 		} else
-			printf("failed to read the inode object 0x%" PRIx64 "\n", oid);
+			printf("failed to read the inode object 0x%" PRIx32 "\n", vid);
 	}
 }
 
@@ -947,8 +948,8 @@ rerun:
 	case INFO_VDI:
 		switch (format) {
 		case FORMAT_LIST:
-			printf("  name        id    size    used  shared    creation time  object id\n");
-			printf("--------------------------------------------------------------------\n");
+			printf("  name        id    size    used  shared    creation time   vdi id\n");
+			printf("------------------------------------------------------------------\n");
 			ret = parse_vdi(print_vdi_list, name);
 			break;
 		case FORMAT_TREE:
diff --git a/shepherd/treeview.c b/shepherd/treeview.c
index 7bbdef6..c5adb96 100644
--- a/shepherd/treeview.c
+++ b/shepherd/treeview.c
@@ -24,8 +24,8 @@
 struct vdi_tree {
 	char name[1024];
 	char label[256];
-	uint64_t oid;
-	uint64_t poid;
+	uint32_t vid;
+	uint32_t pvid;
 	int highlight;
 	struct list_head children;
 	struct list_head siblings;
@@ -34,16 +34,16 @@ struct vdi_tree {
 static int *width, *more;
 static struct vdi_tree *root;
 
-static struct vdi_tree *find_vdi(struct vdi_tree *parent, uint64_t oid,
+static struct vdi_tree *find_vdi(struct vdi_tree *parent, uint32_t vid,
 				 const char *name)
 {
 	struct vdi_tree *vdi, *ret;
 
 	list_for_each_entry(vdi, &parent->children, siblings) {
-		if (vdi->oid == oid && !strcmp(vdi->name, name))
+		if (vdi->vid == vid && !strcmp(vdi->name, name))
 			return vdi;
 
-		ret = find_vdi(vdi, oid, name);
+		ret = find_vdi(vdi, vid, name);
 		if (ret)
 			return ret;
 	}
@@ -51,7 +51,7 @@ static struct vdi_tree *find_vdi(struct vdi_tree *parent, uint64_t oid,
 }
 
 static struct vdi_tree *new_vdi(const char *name, const char *label,
-		uint64_t oid, uint64_t poid, int highlight)
+				uint64_t vid, uint64_t pvid, int highlight)
 {
 	struct vdi_tree *vdi;
 
@@ -62,8 +62,8 @@ static struct vdi_tree *new_vdi(const char *name, const char *label,
 	}
 	strcpy(vdi->name, name);
 	strcpy(vdi->label, label);
-	vdi->oid = oid;
-	vdi->poid = poid;
+	vdi->vid = vid;
+	vdi->pvid = pvid;
 	vdi->highlight = highlight;
 	INIT_LIST_HEAD(&vdi->children);
 	return vdi;
@@ -74,16 +74,16 @@ void init_tree(void)
 	root = new_vdi("", "", 0, 0, 0);
 }
 
-void add_vdi_tree(const char *name, const char *label, uint64_t oid,
-		uint64_t poid, int highlight)
+void add_vdi_tree(const char *name, const char *label, uint32_t vid,
+		  uint32_t pvid, int highlight)
 {
 	struct vdi_tree *vdi, *parent;
 
-	vdi = new_vdi(name, label, oid, poid, highlight);
+	vdi = new_vdi(name, label, vid, pvid, highlight);
 	if (!vdi)
 		return;
 
-	parent = find_vdi(root, poid, name);
+	parent = find_vdi(root, pvid, name);
 	if (!parent)
 		parent = root;
 
@@ -95,7 +95,7 @@ static void compaction(struct vdi_tree *parent)
 	struct vdi_tree *vdi, *e, *new_parent;
 
 	list_for_each_entry_safe(vdi, e, &parent->children, siblings) {
-		new_parent = find_vdi(root, vdi->poid, vdi->name);
+		new_parent = find_vdi(root, vdi->pvid, vdi->name);
 		if (new_parent && parent != new_parent) {
 			list_del(&vdi->siblings);
 			list_add_tail(&vdi->siblings, &new_parent->children);
diff --git a/shepherd/treeview.h b/shepherd/treeview.h
index d18fe67..fa54e6e 100644
--- a/shepherd/treeview.h
+++ b/shepherd/treeview.h
@@ -12,8 +12,8 @@
 #define __TREEVIEW__
 
 void init_tree(void);
-void add_vdi_tree(const char *label, const char *tag, uint64_t pid,
-		  uint64_t ppid, int highlight);
+void add_vdi_tree(const char *label, const char *tag, uint32_t vid,
+		  uint32_t pvid, int highlight);
 void dump_tree(void);
 
 #endif
-- 
1.5.6.5




More information about the sheepdog mailing list