[sheepdog] [PATCH v3 4/5] modify interface to write inode meta data

Robin Dong robin.k.dong at gmail.com
Thu Oct 24 11:46:21 CEST 2013


After changing inode->data_vdi_id[] from array to B-tree, we can't write
vid to data_vdi_id[] directly now. So we need to add new interface
sd_inode_write_vdi() to update index of inode.

Signed-off-by: Robin Dong <sanbai at taobao.com>
---
 dog/dog.h                |    4 ++--
 dog/vdi.c                |   41 +++++++++++++++++++++--------------------
 include/sheepdog_proto.h |    8 ++++++--
 lib/sd_inode.c           |   44 ++++++++++++++++++++++++++++++++++----------
 sheep/ops.c              |   27 ++++++++++++++++++++-------
 sheep/sheep_priv.h       |    4 ++--
 sheep/vdi.c              |    8 ++++----
 sheepfs/volume.c         |   19 +++++++++----------
 8 files changed, 98 insertions(+), 57 deletions(-)

diff --git a/dog/dog.h b/dog/dog.h
index c2832bb..cc64d59 100644
--- a/dog/dog.h
+++ b/dog/dog.h
@@ -85,9 +85,9 @@ void show_progress(uint64_t done, uint64_t total, bool raw);
 size_t get_store_objsize(uint8_t copy_policy, uint64_t oid);
 bool is_erasure_oid(uint64_t oid, uint8_t policy);
 
-int write_btree_node(uint64_t id, void *mem, unsigned int len,
+int write_btree_node(uint64_t id, void *mem, unsigned int len, uint64_t offset,
 				int copies, int copy_policy, int create);
-int read_btree_node(uint64_t id, void **mem, unsigned int len);
+int read_btree_node(uint64_t id, void **mem, unsigned int len, uint64_t offset);
 
 #define INODE_GET_VDI(inode, idx) (sd_inode_get_vdi(read_btree_node, \
 							inode, idx))
diff --git a/dog/vdi.c b/dog/vdi.c
index 960e2a0..fe11671 100644
--- a/dog/vdi.c
+++ b/dog/vdi.c
@@ -58,16 +58,16 @@ struct get_vdi_info {
 	uint8_t copy_policy;
 };
 
-int write_btree_node(uint64_t id, void *mem, unsigned int len,
+int write_btree_node(uint64_t id, void *mem, unsigned int len, uint64_t offset,
 				int copies, int copy_policy, int create)
 {
-	return sd_write_object(id, 0, mem, len, 0, 0, copies, copy_policy,
-			true, true);
+	return sd_write_object(id, 0, mem, len, offset, 0, copies,
+			copy_policy, true, true);
 }
 
-int read_btree_node(uint64_t id, void **mem, unsigned int len)
+int read_btree_node(uint64_t id, void **mem, unsigned int len, uint64_t offset)
 {
-	return sd_read_object(id, *mem, len, 0, true);
+	return sd_read_object(id, *mem, len, offset, true);
 }
 
 static inline bool is_data_obj_writeable(const struct sd_inode *inode,
@@ -559,10 +559,8 @@ static int vdi_create(int argc, char **argv)
 		}
 
 		INODE_SET_VDI(inode, idx, vid);
-		ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid, sizeof(vid),
-				      SD_INODE_HEADER_SIZE + sizeof(vid) * idx,
-				      0, inode->nr_copies, inode->copy_policy,
-				      false, true);
+		ret = sd_inode_write_vdi(write_btree_node, inode, idx,
+				vid, false);
 		if (ret) {
 			ret = EXIT_FAILURE;
 			goto out;
@@ -628,7 +626,7 @@ static int vdi_clone(int argc, char **argv)
 	uint32_t base_vid, new_vid, vdi_id;
 	uint64_t oid;
 	int idx, max_idx, ret;
-	struct sd_inode *inode = NULL;
+	struct sd_inode *inode = NULL, *new_inode = NULL;
 	char *buf = NULL;
 
 	dst_vdi = argv[optind];
@@ -658,6 +656,12 @@ static int vdi_clone(int argc, char **argv)
 	if (ret != EXIT_SUCCESS || !vdi_cmd_data.prealloc)
 		goto out;
 
+	new_inode = xmalloc(sizeof(*inode));
+	ret = read_vdi_obj(dst_vdi, 0, "", NULL, new_inode,
+			SD_INODE_HEADER_SIZE);
+	if (ret != EXIT_SUCCESS)
+		goto out;
+
 	buf = xzalloc(SD_DATA_OBJ_SIZE);
 	max_idx = count_data_objs(inode);
 
@@ -685,11 +689,9 @@ static int vdi_clone(int argc, char **argv)
 			goto out;
 		}
 
-		ret = sd_write_object(vid_to_vdi_oid(new_vid), 0, &new_vid,
-				      sizeof(new_vid),
-				SD_INODE_HEADER_SIZE + sizeof(new_vid) * idx, 0,
-				      inode->nr_copies, inode->copy_policy,
-				      false, true);
+		INODE_SET_VDI(new_inode, idx, new_vid);
+		ret = sd_inode_write_vdi(write_btree_node, new_inode, idx,
+				new_vid, false);
 		if (ret) {
 			ret = EXIT_FAILURE;
 			goto out;
@@ -706,6 +708,8 @@ static int vdi_clone(int argc, char **argv)
 	}
 out:
 	free(inode);
+	if (new_inode)
+		free(new_inode);
 	free(buf);
 	return ret;
 }
@@ -1335,11 +1339,8 @@ static int vdi_write(int argc, char **argv)
 		}
 
 		if (create) {
-			ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid,
-					      sizeof(vid),
-				SD_INODE_HEADER_SIZE + sizeof(vid) * idx,
-					      flags, inode->nr_copies,
-					      inode->copy_policy, false, false);
+			ret = sd_inode_write_vdi(write_btree_node, inode,
+					idx, vid, false);
 			if (ret) {
 				ret = EXIT_FAILURE;
 				goto out;
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index c338efa..8c9be31 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -249,8 +249,10 @@ struct sd_extent_header {
 };
 
 typedef int (*write_node_fn)(uint64_t id, void *mem, unsigned int len,
-				int copies, int copy_policy, int create);
-typedef int (*read_node_fn)(uint64_t id, void **mem, unsigned int len);
+				uint64_t offset, int copies,
+				int copy_policy, int create);
+typedef int (*read_node_fn)(uint64_t id, void **mem, unsigned int len,
+				uint64_t offset);
 
 struct sheepdog_vdi_attr {
 	char name[SD_MAX_VDI_LEN];
@@ -266,6 +268,8 @@ extern uint32_t sd_inode_get_vdi(read_node_fn reader,
 		const struct sd_inode *inode, int idx);
 extern void sd_inode_set_vdi(write_node_fn writer, read_node_fn reader,
 		struct sd_inode *inode, int idx, uint32_t vdi_id);
+extern int sd_inode_write_vdi(write_node_fn writer, struct sd_inode *inode,
+		int idx, uint32_t vid, bool create);
 extern void sd_inode_copy_vdis(struct sd_inode *oldi, struct sd_inode *newi);
 
 /* 64 bit FNV-1a non-zero initial basis */
diff --git a/lib/sd_inode.c b/lib/sd_inode.c
index 426e00c..60976d6 100644
--- a/lib/sd_inode.c
+++ b/lib/sd_inode.c
@@ -146,7 +146,7 @@ static void dump_btree(read_node_fn reader, struct sd_inode *inode)
 		tmp = (void *)leaf_node;
 
 		while (itor_idx != last_idx) {
-			reader(itor_idx->oid, &tmp, SD_INODE_INDEX_SIZE);
+			reader(itor_idx->oid, &tmp, SD_INODE_INDEX_SIZE, 0);
 
 			sd_info("btree> %p idx: %d, %lu, %u",
 					itor_idx, itor_idx->idx, itor_idx->oid,
@@ -306,9 +306,9 @@ static void transfer_to_idx_root(write_node_fn writer, struct sd_inode *inode)
 	left_oid = vid_to_btree_oid(inode->vdi_id, inode->btree_counter++);
 	right_oid = vid_to_btree_oid(inode->vdi_id, inode->btree_counter++);
 
-	writer(left_oid, left, SD_INODE_INDEX_SIZE, inode->nr_copies,
+	writer(left_oid, left, SD_INODE_INDEX_SIZE, 0, inode->nr_copies,
 			inode->copy_policy, 1);
-	writer(right_oid, right, SD_INODE_INDEX_SIZE, inode->nr_copies,
+	writer(right_oid, right, SD_INODE_INDEX_SIZE, 0, inode->nr_copies,
 			inode->copy_policy, 1);
 
 	/* change root from ext-node to idx-node */
@@ -340,7 +340,7 @@ static int search_whole_btree(read_node_fn reader, const struct sd_inode *inode,
 
 		if (idx_in_range(header, path->p_idx)) {
 			oid = path->p_idx->oid;
-			ret = reader(oid, &tmp, SD_INODE_INDEX_SIZE);
+			ret = reader(oid, &tmp, SD_INODE_INDEX_SIZE, 0);
 			if (ret != SD_RES_SUCCESS)
 				goto out;
 			path->p_ext = search_ext_entry(leaf_node, idx);
@@ -351,7 +351,7 @@ static int search_whole_btree(read_node_fn reader, const struct sd_inode *inode,
 		} else {
 			/* check if last idx-node has space */
 			oid = (path->p_idx - 1)->oid;
-			ret = reader(oid, &tmp, SD_INODE_INDEX_SIZE);
+			ret = reader(oid, &tmp, SD_INODE_INDEX_SIZE, 0);
 			if (ret != SD_RES_SUCCESS)
 				goto out;
 			if (leaf_node->entries < EXT_MAX_ENTRIES) {
@@ -408,9 +408,9 @@ static void split_ext_node(write_node_fn writer, struct sd_inode *inode,
 	split_to_nodes(old, new_ext, old, num);
 
 	new_oid = vid_to_btree_oid(inode->vdi_id, inode->btree_counter++);
-	writer(new_oid, new_ext, SD_INODE_INDEX_SIZE, inode->nr_copies,
+	writer(new_oid, new_ext, SD_INODE_INDEX_SIZE, 0, inode->nr_copies,
 			inode->copy_policy, 1);
-	writer(path->p_idx->oid, old, SD_INODE_INDEX_SIZE, inode->nr_copies,
+	writer(path->p_idx->oid, old, SD_INODE_INDEX_SIZE, 0, inode->nr_copies,
 			inode->copy_policy, 0);
 
 	/* write new index */
@@ -451,7 +451,7 @@ static int insert_new_node(write_node_fn writer, read_node_fn reader,
 			insert_ext_entry_nosearch(path->p_ext_header,
 					path->p_ext, idx, vdi_id);
 			writer(path->p_idx->oid, path->p_ext_header,
-				SD_INODE_INDEX_SIZE, inode->nr_copies,
+				SD_INODE_INDEX_SIZE, 0, inode->nr_copies,
 				inode->copy_policy, 1);
 		} else if (path->p_ext_header) {
 			/* the last idx-node */
@@ -461,7 +461,7 @@ static int insert_new_node(write_node_fn writer, read_node_fn reader,
 			path->p_idx->idx =
 				(LAST_EXT(path->p_ext_header) - 1)->idx;
 			writer(path->p_idx->oid, path->p_ext_header,
-				SD_INODE_INDEX_SIZE, inode->nr_copies,
+				SD_INODE_INDEX_SIZE, 0, inode->nr_copies,
 				inode->copy_policy, 1);
 		} else {
 			/* create a new ext-node */
@@ -472,7 +472,7 @@ static int insert_new_node(write_node_fn writer, read_node_fn reader,
 			insert_ext_entry_nosearch(leaf_node,
 					FIRST_EXT(leaf_node), idx, vdi_id);
 			writer(oid, leaf_node, SD_INODE_INDEX_SIZE,
-					inode->nr_copies,
+					0, inode->nr_copies,
 					inode->copy_policy, 1);
 			insert_idx_entry_nosearch(header, path->p_idx,
 					idx, oid);
@@ -524,6 +524,30 @@ out:
 	dump_btree(reader, inode);
 }
 
+int sd_inode_write_vdi(write_node_fn writer, struct sd_inode *inode, int idx,
+		uint32_t vid, bool create)
+{
+	struct sd_extent_header *header = EXT_HEADER(inode->data_vdi_id);
+	int len, ret = SD_RES_SUCCESS;
+
+	if (inode->store_policy == 0)
+		ret = writer(vid_to_vdi_oid(vid), inode, sizeof(vid),
+				SD_INODE_HEADER_SIZE + sizeof(vid) * idx,
+				inode->nr_copies, inode->copy_policy, create);
+	else {
+		len = SD_INODE_HEADER_SIZE + sizeof(struct sd_extent_header);
+		if (header->depth == 1)
+			len += sizeof(struct sd_extent) * header->entries;
+		else if (header->depth == 2)
+			len += sizeof(struct sd_extent_idx) * header->entries;
+		else
+			assert(0);
+		ret = writer(vid_to_vdi_oid(vid), inode, len, 0,
+				inode->nr_copies, inode->copy_policy, create);
+	}
+	return ret;
+}
+
 void sd_inode_copy_vdis(struct sd_inode *oldi, struct sd_inode *newi)
 {
 	memcpy(newi->data_vdi_id, oldi->data_vdi_id, sizeof(newi->data_vdi_id));
diff --git a/sheep/ops.c b/sheep/ops.c
index 7f73ab1..3638a49 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -811,20 +811,33 @@ static int local_flush_vdi(struct request *req)
 static int local_discard_obj(struct request *req)
 {
 	uint64_t oid = req->rq.obj.oid;
-	uint32_t vid = oid_to_vid(oid), zero = 0;
-	int ret, idx = data_oid_to_idx(oid);
+	uint32_t vid = oid_to_vid(oid), zero = 0, tmp_vid;
+	int ret = SD_RES_SUCCESS, idx = data_oid_to_idx(oid);
+	struct sd_inode *inode = xmalloc(sizeof(struct sd_inode));
 
 	sd_debug("%"PRIx64, oid);
-	ret = write_object(vid_to_vdi_oid(vid), (char *)&zero, sizeof(zero),
-			   SD_INODE_HEADER_SIZE + sizeof(vid) * idx, false);
+	ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
+			sizeof(struct sd_inode), 0);
 	if (ret != SD_RES_SUCCESS)
-		return ret;
-	if (remove_object(oid) != SD_RES_SUCCESS)
-		sd_err("failed to remove %"PRIx64, oid);
+		goto out;
+
+	tmp_vid = INODE_GET_VDI(inode, idx);
+	/* if vid in idx is not exist, we don't need to remove it */
+	if (tmp_vid) {
+		INODE_SET_VDI(inode, idx, vid);
+		sd_inode_write_vdi(write_btree_node, inode, idx, zero, false);
+		if (ret != SD_RES_SUCCESS)
+			goto out;
+		if (remove_object(oid) != SD_RES_SUCCESS)
+			sd_err("failed to remove %"PRIx64, oid);
+	}
 	/*
 	 * Return success even if remove_object fails because we have updated
 	 * inode successfully.
 	 */
+out:
+	if (inode)
+		free(inode);
 	return SD_RES_SUCCESS;
 }
 
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index e28e1b1..f2d696b 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -375,9 +375,9 @@ void objlist_cache_remove(uint64_t oid);
 
 void put_request(struct request *req);
 
-int write_btree_node(uint64_t id, void *mem, unsigned int len,
+int write_btree_node(uint64_t id, void *mem, unsigned int len, uint64_t offset,
 				int copies, int copy_policy, int create);
-int read_btree_node(uint64_t id, void **mem, unsigned int len);
+int read_btree_node(uint64_t id, void **mem, unsigned int len, uint64_t offset);
 
 #define INODE_GET_VDI(inode, idx) (sd_inode_get_vdi(read_btree_node, \
 					inode, idx))
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 203472a..43db614 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -22,15 +22,15 @@ struct vdi_state_entry {
 static struct rb_root vdi_state_root = RB_ROOT;
 static struct sd_lock vdi_state_lock = SD_LOCK_INITIALIZER;
 
-int write_btree_node(uint64_t id, void *mem, unsigned int len,
+int write_btree_node(uint64_t id, void *mem, unsigned int len, uint64_t offset,
 				int copies, int copy_policy, int create)
 {
-	return write_object(id, mem, len, 0, create == 1);
+	return write_object(id, mem, len, offset, create == 1);
 }
 
-int read_btree_node(uint64_t id, void **mem, unsigned int len)
+int read_btree_node(uint64_t id, void **mem, unsigned int len, uint64_t offset)
 {
-	return read_object(id, *mem, len, 0);
+	return read_object(id, *mem, len, offset);
 }
 
 static int vdi_state_cmp(const struct vdi_state_entry *a,
diff --git a/sheepfs/volume.c b/sheepfs/volume.c
index 3fbc4a9..f94a517 100644
--- a/sheepfs/volume.c
+++ b/sheepfs/volume.c
@@ -66,8 +66,9 @@ static struct sd_lock vdi_inode_tree_lock = SD_LOCK_INITIALIZER;
 
 
 static int write_btree_node(uint64_t id, void *mem, unsigned int len,
-				int copies, int copy_policy, int create);
-static int read_btree_node(uint64_t id, void **mem, unsigned int len);
+		uint64_t offset, int copies, int copy_policy, int create);
+static int read_btree_node(uint64_t id, void **mem, unsigned int len,
+		uint64_t offset);
 
 #define INODE_GET_VDI(inode, idx) (sd_inode_get_vdi(\
 					read_btree_node, inode, idx))
@@ -194,10 +195,8 @@ static int volume_rw_object(char *buf, uint64_t oid, size_t size,
 	if (create) {
 		INODE_SET_VDI(vdi->inode, idx, vid);
 		/* writeback inode update */
-		if (volume_rw_object((char *)&vid, vid_to_vdi_oid(vid),
-				     sizeof(vid),
-				     SD_INODE_HEADER_SIZE + sizeof(vid) * idx,
-				     VOLUME_WRITE) < 0)
+		if (sd_inode_write_vdi(write_btree_node, vdi->inode, idx,
+					vid, false) < 0)
 			return -1;
 	}
 done:
@@ -247,20 +246,20 @@ static int volume_do_rw(const char *path, char *buf, size_t size,
 	return 0;
 }
 
-int write_btree_node(uint64_t id, void *mem, unsigned int len,
+int write_btree_node(uint64_t id, void *mem, unsigned int len, uint64_t offset,
 				int copies, int copy_policy, int create)
 {
 	int ret;
-	ret = volume_rw_object(mem, id, len, 0, VOLUME_WRITE);
+	ret = volume_rw_object(mem, id, len, offset, VOLUME_WRITE);
 	if (ret == len)
 		return SD_RES_SUCCESS;
 	return ret;
 }
 
-int read_btree_node(uint64_t id, void **mem, unsigned int len)
+int read_btree_node(uint64_t id, void **mem, unsigned int len, uint64_t offset)
 {
 	int ret;
-	ret = volume_rw_object(*mem, id, len, 0, VOLUME_READ);
+	ret = volume_rw_object(*mem, id, len, offset, VOLUME_READ);
 	if (ret == len)
 		return SD_RES_SUCCESS;
 	return ret;
-- 
1.7.1




More information about the sheepdog mailing list