[Sheepdog] [PATCH v2 1/2] add oid_to_vnodes() and obj_to_sheeps() to avoid too much vnodes traverse

levin li levin108 at gmail.com
Tue May 15 08:48:53 CEST 2012


oid_to_vnode() and obj_to_sheep() traverse the vnode list to find the
target vnode, many times, sheep needs to call these two functions nr_copies
times, it means we also need to traverse the vnode list nr_copies times,
it's absolutely a waste, so I add these two functions to make it only
traverse one time in stead of calling obj_to_sheep()/oid_to_vnode() nr_copies
time.

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 collie/vdi.c         |    7 +++++--
 include/sheep.h      |   25 +++++++++++++++++++++++++
 sheep/farm/trunk.c   |    5 ++++-
 sheep/group.c        |   17 ++++++++++-------
 sheep/object_cache.c |    7 +++++--
 sheep/ops.c          |    6 ++++--
 sheep/recovery.c     |   11 +++++++++--
 sheep/sdnet.c        |    6 +++---
 sheep/sheep_priv.h   |    2 ++
 sheep/store.c        |   26 +++++++++++++++++---------
 10 files changed, 84 insertions(+), 28 deletions(-)

diff --git a/collie/vdi.c b/collie/vdi.c
index 7e6a309..87bbc7f 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -810,6 +810,7 @@ static int print_obj_epoch(uint64_t oid)
 	struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
 	unsigned rlen, wlen;
 	struct sd_vnode vnodes[SD_MAX_VNODES];
+	int idx_buf[SD_MAX_COPIES];
 	struct epoch_log *logs;
 	int vnodes_nr, nr_logs, log_length;
 	char host[128];
@@ -853,10 +854,12 @@ again:
 		printf("\nobj %"PRIx64" locations at epoch %d, copies = %d\n",
 				oid, logs[i].epoch, logs[i].nr_copies);
 		printf("---------------------------------------------------\n");
+		obj_to_sheeps(vnodes, vnodes_nr, oid,
+				logs[i].nr_copies, idx_buf);
 		for (j = 0; j < logs[i].nr_copies; j++) {
-			idx = obj_to_sheep(vnodes, vnodes_nr, oid, j);
+			idx = idx_buf[j];
 			addr_to_str(host, sizeof(host), vnodes[idx].addr,
-						vnodes[idx].port);
+					vnodes[idx].port);
 			printf("%s\n", host);
 		}
 	}
diff --git a/include/sheep.h b/include/sheep.h
index 7e287c4..4fd05f7 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -22,6 +22,7 @@
 #define SD_DEFAULT_REDUNDANCY 3
 #define SD_MAX_REDUNDANCY 8
 
+#define SD_MAX_COPIES 16
 #define SD_MAX_NODES 1024
 #define SD_DEFAULT_VNODES 64
 #define SD_MAX_VNODES 65536
@@ -246,6 +247,30 @@ static inline int obj_to_sheep(struct sd_vnode *entries,
 	return hval_to_sheep(entries, nr_entries, id, idx);
 }
 
+static inline void hval_to_sheeps(struct sd_vnode *entries,
+			int nr_entries, uint64_t id, int nr_copies, int *idxs)
+{
+	int i, idx;
+	struct sd_vnode *e = entries, *n;
+
+	for (i = 0; i < nr_entries - 1; i++, e++) {
+		n = e + 1;
+		if (id > e->id && id <= n->id)
+			break;
+	}
+	for (idx = 0; idx < nr_copies; idx++)
+		idxs[idx] = get_nth_node(entries, nr_entries,
+				(i + 1) % nr_entries, idx);
+}
+
+static inline void obj_to_sheeps(struct sd_vnode *entries,
+		  int nr_entries, uint64_t oid, int nr_copies, int *idxs)
+{
+	uint64_t id = fnv_64a_buf(&oid, sizeof(oid), FNV1A_64_INIT);
+
+	hval_to_sheeps(entries, nr_entries, id, nr_copies, idxs);
+}
+
 static inline int is_sheep_op(uint8_t op)
 {
 	return op & SD_OP_SHEEP;
diff --git a/sheep/farm/trunk.c b/sheep/farm/trunk.c
index 881f37d..d47c31b 100644
--- a/sheep/farm/trunk.c
+++ b/sheep/farm/trunk.c
@@ -239,11 +239,14 @@ static int oid_stale(uint64_t oid)
 	struct vnode_info *vnodes;
 	struct sd_vnode *v;
 	int ret = 1;
+	struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
 
 	vnodes = get_vnode_info();
 	nr_copies = get_nr_copies(vnodes);
+
+	oid_to_vnodes(vnodes, oid, nr_copies, obj_vnodes);
 	for (i = 0; i < nr_copies; i++) {
-		v = oid_to_vnode(vnodes, oid, i);
+		v = obj_vnodes[i];
 		if (vnode_is_local(v)) {
 			ret = 0;
 			break;
diff --git a/sheep/group.c b/sheep/group.c
index 73a5ba7..f0fee2d 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -166,16 +166,19 @@ void put_vnode_info(struct vnode_info *vnodes)
 		free(vnodes);
 }
 
-struct sd_vnode *oid_to_vnode(struct vnode_info *vnode_info, uint64_t oid,
-		int copy_idx)
+void oid_to_vnodes(struct vnode_info *vnode_info, uint64_t oid, int nr_copies,
+		struct sd_vnode **vnodes)
 {
-	int n;
+	int idx_buf[SD_MAX_COPIES], i, n;
 
-	n = obj_to_sheep(vnode_info->entries, vnode_info->nr_vnodes,
-			 oid, copy_idx);
-	return &vnode_info->entries[n];
-}
+	obj_to_sheeps(vnode_info->entries, vnode_info->nr_vnodes,
+			oid, nr_copies, idx_buf);
 
+	for (i = 0; i < nr_copies; i++) {
+		n = idx_buf[i];
+		vnodes[i] = &vnode_info->entries[n];
+	}
+}
 
 static int update_vnode_info(void)
 {
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index 0b3c911..1b7ee5c 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -385,6 +385,7 @@ int object_cache_pull(struct object_cache *oc, uint32_t idx)
 	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
 	struct vnode_info *vnodes = get_vnode_info();
 	struct sd_vnode *v;
+	struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
 	void *buf;
 	int nr_copies;
 
@@ -404,8 +405,10 @@ int object_cache_pull(struct object_cache *oc, uint32_t idx)
 
 	/* Check if we can read locally */
 	nr_copies = get_nr_copies(vnodes);
+	oid_to_vnodes(vnodes, oid, nr_copies, obj_vnodes);
+
 	for (i = 0; i < nr_copies; i++) {
-		v = oid_to_vnode(vnodes, oid, i);
+		v = obj_vnodes[i];
 		if (vnode_is_local(v)) {
 			struct siocb iocb = { 0 };
 			iocb.epoch = sys->epoch;
@@ -430,7 +433,7 @@ int object_cache_pull(struct object_cache *oc, uint32_t idx)
 pull_remote:
 	/* Okay, no luck, let's read remotely */
 	for (i = 0; i < nr_copies; i++) {
-		v = oid_to_vnode(vnodes, oid, i);
+		v = obj_vnodes[i];
 
 		if (vnode_is_local(v))
 			continue;
diff --git a/sheep/ops.c b/sheep/ops.c
index b6f8eb2..ee704c6 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -644,15 +644,17 @@ static int read_copy_from_replica(struct request *req, uint32_t epoch,
 	unsigned wlen, rlen;
 	char name[128];
 	struct sd_vnode *v;
+	struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
 	struct sd_obj_req hdr;
 	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
 	struct siocb iocb;
 	int fd;
 
 	nr_copies = get_nr_copies(req->vnodes);
-	for (i = 0; i < nr_copies; i++) {
-		v = oid_to_vnode(req->vnodes, oid, i);
+	oid_to_vnodes(req->vnodes, oid, nr_copies, obj_vnodes);
 
+	for (i = 0; i < nr_copies; i++) {
+		v = obj_vnodes[i];
 		addr_to_str(name, sizeof(name), v->addr, 0);
 
 		if (vnode_is_local(v)) {
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 50f7764..3eb56e9 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -378,9 +378,14 @@ err:
 static int get_replica_idx(struct recovery_work *rw, uint64_t oid, int *copy_nr)
 {
 	int i, ret = -1;
+	int idx_buf[SD_MAX_COPIES];
+
 	*copy_nr = get_max_nr_copies_from(rw->cur_nodes, rw->cur_nr_nodes);
+	obj_to_sheeps(rw->cur_vnodes, rw->cur_nr_vnodes, oid,
+				*copy_nr, idx_buf);
+
 	for (i = 0; i < *copy_nr; i++) {
-		int n = obj_to_sheep(rw->cur_vnodes, rw->cur_nr_vnodes, oid, i);
+		int n = idx_buf[i];
 		if (vnode_is_local(&rw->cur_vnodes[n])) {
 			ret = i;
 			break;
@@ -657,10 +662,12 @@ static int screen_obj_list(struct recovery_work *rw,  uint64_t *list, int list_n
 	struct sd_vnode *nodes = rw->cur_vnodes;
 	int nodes_nr = rw->cur_nr_vnodes;
 	int nr_objs = get_max_nr_copies_from(rw->cur_nodes, rw->cur_nr_nodes);
+	int idx_buf[SD_MAX_COPIES];
 
 	for (i = 0; i < list_nr; i++) {
+		obj_to_sheeps(nodes, nodes_nr, list[i], nr_objs, idx_buf);
 		for (cp = 0; cp < nr_objs; cp++) {
-			idx = obj_to_sheep(nodes, nodes_nr, list[i], cp);
+			idx = idx_buf[cp];
 			if (vnode_is_local(&nodes[idx]))
 				break;
 		}
diff --git a/sheep/sdnet.c b/sheep/sdnet.c
index 8aad8f9..6544d4a 100644
--- a/sheep/sdnet.c
+++ b/sheep/sdnet.c
@@ -38,15 +38,15 @@ void resume_pending_requests(void)
 
 static int is_access_local(struct request *req, uint64_t oid)
 {
-	struct sd_vnode *v;
+	struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
 	int nr_copies;
 	int i;
 
 	nr_copies = get_nr_copies(req->vnodes);
+	oid_to_vnodes(req->vnodes, oid, nr_copies, obj_vnodes);
 
 	for (i = 0; i < nr_copies; i++) {
-		v = oid_to_vnode(req->vnodes, oid, i);
-		if (vnode_is_local(v))
+		if (vnode_is_local(obj_vnodes[i]))
 			return 1;
 	}
 
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 2275a93..8b941dc 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -249,6 +249,8 @@ void put_vnode_info(struct vnode_info *vnodes);
 
 struct sd_vnode *oid_to_vnode(struct vnode_info *vnode_info, uint64_t oid,
 		int copy_idx);
+void oid_to_vnodes(struct vnode_info *vnode_info, uint64_t oid, int nr_copies,
+		struct sd_vnode **vnodes);
 int get_nr_copies(struct vnode_info *vnode_info);
 
 int is_access_to_busy_objects(uint64_t oid);
diff --git a/sheep/store.c b/sheep/store.c
index 8ca71dc..0fd3a48 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -63,6 +63,7 @@ static int forward_read_obj_req(struct request *req)
 	struct sd_obj_req hdr = *(struct sd_obj_req *)&req->rq;
 	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
 	struct sd_vnode *v;
+	struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
 	uint64_t oid = hdr.oid;
 	int nr_copies;
 
@@ -74,8 +75,9 @@ static int forward_read_obj_req(struct request *req)
 		nr_copies = get_nr_copies(req->vnodes);
 
 	/* TODO: we can do better; we need to check this first */
+	oid_to_vnodes(req->vnodes, oid, nr_copies, obj_vnodes);
 	for (i = 0; i < nr_copies; i++) {
-		v = oid_to_vnode(req->vnodes, oid, i);
+		v = obj_vnodes[i];
 		if (vnode_is_local(v)) {
 			ret = do_local_io(req, hdr.epoch);
 			if (ret != SD_RES_SUCCESS)
@@ -86,7 +88,7 @@ static int forward_read_obj_req(struct request *req)
 
 read_remote:
 	for (i = 0; i < nr_copies; i++) {
-		v = oid_to_vnode(req->vnodes, oid, i);
+		v = obj_vnodes[i];
 		if (vnode_is_local(v))
 			continue;
 
@@ -122,6 +124,7 @@ int forward_write_obj_req(struct request *req)
 	struct sd_obj_req hdr = *(struct sd_obj_req *)&req->rq;
 	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&req->rp;
 	struct sd_vnode *v;
+	struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
 	uint64_t oid = hdr.oid;
 	int nr_copies;
 	struct pollfd pfds[SD_MAX_REDUNDANCY];
@@ -139,8 +142,9 @@ int forward_write_obj_req(struct request *req)
 	wlen = hdr.data_length;
 
 	nr_copies = get_nr_copies(req->vnodes);
+	oid_to_vnodes(req->vnodes, oid, nr_copies, obj_vnodes);
 	for (i = 0; i < nr_copies; i++) {
-		v = oid_to_vnode(req->vnodes, oid, i);
+		v = obj_vnodes[i];
 
 		addr_to_str(name, sizeof(name), v->addr, 0);
 
@@ -945,6 +949,7 @@ int write_object(struct vnode_info *vnodes, uint32_t node_version,
 {
 	struct sd_obj_req hdr;
 	struct sd_vnode *v;
+	struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
 	int i, fd, ret;
 	char name[128];
 
@@ -957,10 +962,11 @@ int write_object(struct vnode_info *vnodes, uint32_t node_version,
 		}
 	}
 
+	oid_to_vnodes(vnodes, oid, nr_copies, obj_vnodes);
 	for (i = 0; i < nr_copies; i++) {
 		unsigned rlen = 0, wlen = datalen;
 
-		v = oid_to_vnode(vnodes, oid, i);
+		v = obj_vnodes[i];
 		if (vnode_is_local(v)) {
 			ret = write_object_local(oid, data, datalen, offset,
 						 flags, nr_copies, node_version,
@@ -1041,12 +1047,14 @@ int read_object(struct vnode_info *vnodes, uint32_t node_version,
 	struct sd_obj_req hdr;
 	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
 	struct sd_vnode *v;
+	struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
 	char name[128];
 	int i = 0, fd, ret, last_error = SD_RES_SUCCESS;
 
 	/* search a local object first */
+	oid_to_vnodes(vnodes, oid, nr_copies, obj_vnodes);
 	for (i = 0; i < nr_copies; i++) {
-		v = oid_to_vnode(vnodes, oid, i);
+		v = obj_vnodes[i];
 		if (vnode_is_local(v)) {
 			ret = read_object_local(oid, data, datalen, offset,
 						nr_copies, node_version);
@@ -1064,8 +1072,7 @@ int read_object(struct vnode_info *vnodes, uint32_t node_version,
 	for (i = 0; i < nr_copies; i++) {
 		unsigned wlen = 0, rlen = datalen;
 
-		v = oid_to_vnode(vnodes, oid, i);
-
+		v = obj_vnodes[i];
 		addr_to_str(name, sizeof(name), v->addr, 0);
 
 		fd = connect_to(name, v->port);
@@ -1108,13 +1115,14 @@ int remove_object(struct vnode_info *vnodes, uint32_t node_version,
 	struct sd_obj_req hdr;
 	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
 	struct sd_vnode *v;
+	struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
 	int i = 0, fd, ret, err = 0;
 
+	oid_to_vnodes(vnodes, oid, nr, obj_vnodes);
 	for (i = 0; i < nr; i++) {
 		unsigned wlen = 0, rlen = 0;
 
-		v = oid_to_vnode(vnodes, oid, i);
-
+		v = obj_vnodes[i];
 		addr_to_str(name, sizeof(name), v->addr, 0);
 
 		fd = connect_to(name, v->port);
-- 
1.7.10




More information about the sheepdog mailing list