[sheepdog] [PATCH 3/3] sheep: use struct vnode_info in recovery

Christoph Hellwig hch at infradead.org
Mon May 28 14:57:09 CEST 2012


Use struct vnode_info to hold vnode lists in recovery instead of opencoding
the data structures.  This helps to keep the code simpler, especially in
do_recover_object which did mind-boggling copy games before.

Signed-off-by: Christoph Hellwig <hch at lst.de>

---
 sheep/recovery.c |  138 ++++++++++++++++++++++---------------------------------
 1 file changed, 56 insertions(+), 82 deletions(-)

Index: sheepdog/sheep/recovery.c
===================================================================
--- sheepdog.orig/sheep/recovery.c	2012-05-28 14:40:21.440682850 +0200
+++ sheepdog/sheep/recovery.c	2012-05-28 14:43:05.072687040 +0200
@@ -41,10 +41,9 @@ struct recovery_work {
 	struct sd_node old_nodes[SD_MAX_NODES];
 	int cur_nr_nodes;
 	struct sd_node cur_nodes[SD_MAX_NODES];
-	int old_nr_vnodes;
-	struct sd_vnode old_vnodes[SD_MAX_VNODES];
-	int cur_nr_vnodes;
-	struct sd_vnode cur_vnodes[SD_MAX_VNODES];
+
+	struct vnode_info *old_vnodes;
+	struct vnode_info *cur_vnodes;
 };
 
 static struct recovery_work *next_rw;
@@ -62,25 +61,20 @@ static int obj_cmp(const void *oid1, con
 	return 0;
 }
 
-static void *get_vnodes_from_epoch(uint32_t epoch, int *nr, int *copies)
+static struct vnode_info *get_vnodes_from_epoch(uint32_t epoch)
 {
-	int nodes_nr, len = sizeof(struct sd_vnode) * SD_MAX_VNODES;
 	struct sd_node nodes[SD_MAX_NODES];
-	void *buf = xmalloc(len);
+	int nr_nodes;
 
-	nodes_nr = epoch_log_read_nr(epoch, (void *)nodes, sizeof(nodes));
-	if (nodes_nr < 0) {
-		nodes_nr = epoch_log_read_remote(epoch, (void *)nodes, sizeof(nodes));
-		if (nodes_nr == 0) {
-			free(buf);
+	nr_nodes = epoch_log_read_nr(epoch, (void *)nodes, sizeof(nodes));
+	if (nr_nodes < 0) {
+		nr_nodes = epoch_log_read_remote(epoch, (void *)nodes, sizeof(nodes));
+		if (nr_nodes == 0)
 			return NULL;
-		}
-		nodes_nr /= sizeof(nodes[0]);
+		nr_nodes /= sizeof(nodes[0]);
 	}
-	*nr = nodes_to_vnodes(nodes, nodes_nr, buf);
-	*copies = get_max_nr_copies_from(nodes, nodes_nr);
 
-	return buf;
+	return alloc_vnode_info(nodes, nr_nodes);
 }
 
 static int recover_object_from_replica(uint64_t oid,
@@ -180,22 +174,6 @@ out:
 	return ret;
 }
 
-static void rollback_old_cur(struct sd_vnode *old, int *old_nr, int *old_copies,
-			     struct sd_vnode *cur, int *cur_nr, int *cur_copies,
-			     struct sd_vnode *new_old, int new_old_nr, int new_old_copies)
-{
-	int nr_old = *old_nr;
-	int copies_old = *old_copies;
-
-	memcpy(cur, old, sizeof(*old) * nr_old);
-	*cur_nr = nr_old;
-	*cur_copies = copies_old;
-	memcpy(old, new_old, sizeof(*new_old) * new_old_nr);
-	*old_nr = new_old_nr;
-	*old_copies = new_old_copies;
-}
-
-
 /*
  * A virtual node that does not match any node in current node list
  * means the node has left the cluster, then it's an invalid virtual node.
@@ -216,33 +194,28 @@ static int is_invalid_vnode(struct sd_vn
  */
 static int do_recover_object(struct recovery_work *rw)
 {
-	struct sd_vnode *old, *cur;
+	struct vnode_info *old, *cur;
 	uint64_t oid = rw->oids[rw->done];
-	int old_nr = rw->old_nr_vnodes, cur_nr = rw->cur_nr_vnodes;
 	uint32_t epoch = rw->epoch, tgt_epoch = rw->epoch - 1;
-	struct sd_vnode *tgt_entry;
-	int old_copies, cur_copies, ret, i, retry;
+	int nr_copies, ret, i, retry;
 
-	old = xmalloc(sizeof(*old) * SD_MAX_VNODES);
-	cur = xmalloc(sizeof(*cur) * SD_MAX_VNODES);
-	memcpy(old, rw->old_vnodes, sizeof(*old) * old_nr);
-	memcpy(cur, rw->cur_vnodes, sizeof(*cur) * cur_nr);
-	old_copies = get_max_nr_copies_from(rw->old_nodes, rw->old_nr_nodes);
-	cur_copies = get_max_nr_copies_from(rw->cur_nodes, rw->cur_nr_nodes);
+	old = grab_vnode_info(rw->old_vnodes);
+	cur = grab_vnode_info(rw->cur_vnodes);
 
 again:
 	dprintf("try recover object %"PRIx64" from epoch %"PRIu32"\n",
 		oid, tgt_epoch);
-	retry = 0;
+
 	/* Let's do a breadth-first search */
-	for (i = 0; i < old_copies; i++) {
-		int idx;
-		idx = obj_to_sheep(old, old_nr, oid, i);
-		tgt_entry = old + idx;
-		if (is_invalid_vnode(tgt_entry, rw->cur_nodes,
+	retry = 0;
+	nr_copies = get_nr_copies(old);
+	for (i = 0; i < nr_copies; i++) {
+		struct sd_vnode *tgt_vnode = oid_to_vnode(old, oid, i);
+
+		if (is_invalid_vnode(tgt_vnode, rw->cur_nodes,
 				     rw->cur_nr_nodes))
 			continue;
-		ret = recover_object_from_replica(oid, tgt_entry,
+		ret = recover_object_from_replica(oid, tgt_vnode,
 						  epoch, tgt_epoch);
 		if (ret == 0) {
 			/* Succeed */
@@ -253,16 +226,17 @@ again:
 			continue;
 		}
 	}
+
 	/* If not succeed but someone orders us to retry it, serve the order */
 	if (ret != 0 && retry == 1) {
 		ret = 0;
 		rw->retry = 1;
 		goto err;
 	}
+
 	/* No luck, roll back to an older configuration and try again */
 	if (ret < 0) {
-		struct sd_vnode *new_old;
-		int new_old_nr, new_old_copies;
+		struct vnode_info *new_old;
 
 		tgt_epoch--;
 		if (tgt_epoch < 1) {
@@ -271,19 +245,20 @@ again:
 			goto err;
 		}
 
-		new_old = get_vnodes_from_epoch(tgt_epoch, &new_old_nr, &new_old_copies);
+		new_old = get_vnodes_from_epoch(tgt_epoch);
 		if (!new_old) {
 			ret = -1;
 			goto err;
 		}
-		rollback_old_cur(old, &old_nr, &old_copies, cur, &cur_nr, &cur_copies,
-				new_old, new_old_nr, new_old_copies);
-		free(new_old);
+
+		put_vnode_info(cur);
+		cur = old;
+		old = new_old;
 		goto again;
 	}
 err:
-	free(old);
-	free(cur);
+	put_vnode_info(old);
+	put_vnode_info(cur);
 	return ret;
 }
 
@@ -432,6 +407,14 @@ static void flush_wait_obj_requests(void
 	process_request_event_queues();
 }
 
+static void free_recovery_work(struct recovery_work *rw)
+{
+	put_vnode_info(rw->cur_vnodes);
+	put_vnode_info(rw->old_vnodes);
+	free(rw->oids);
+	free(rw);
+}
+
 static void do_recover_main(struct work *work)
 {
 	struct recovery_work *rw = container_of(work, struct recovery_work, work);
@@ -477,9 +460,7 @@ static void do_recover_main(struct work
 	recovering_work = NULL;
 
 	sys->recovered_epoch = rw->epoch;
-
-	free(rw->oids);
-	free(rw);
+	free_recovery_work(rw);
 
 	if (next_rw) {
 		rw = next_rw;
@@ -563,23 +544,20 @@ int merge_objlist(uint64_t *list1, int n
 
 static int screen_obj_list(struct recovery_work *rw,  uint64_t *list, int list_nr)
 {
-	int ret, i, cp, idx;
+	int ret, i, j;
 	struct strbuf buf = STRBUF_INIT;
-	struct sd_vnode *nodes = rw->cur_vnodes;
-	int nodes_nr = rw->cur_nr_vnodes;
-	int nr_objs = get_max_nr_copies_from(rw->cur_nodes, rw->cur_nr_nodes);
-	int idx_buf[SD_MAX_COPIES];
+	struct sd_vnode *vnodes[SD_MAX_COPIES];
+	int nr_objs;
 
+	nr_objs = get_nr_copies(rw->cur_vnodes);
 	for (i = 0; i < list_nr; i++) {
-		obj_to_sheeps(nodes, nodes_nr, list[i], nr_objs, idx_buf);
-		for (cp = 0; cp < nr_objs; cp++) {
-			idx = idx_buf[cp];
-			if (vnode_is_local(&nodes[idx]))
+		oid_to_vnodes(rw->cur_vnodes, list[i], nr_objs, vnodes);
+		for (j = 0; j < nr_objs; j++) {
+			if (vnode_is_local(vnodes[j])) {
+				strbuf_add(&buf, &list[i], sizeof(uint64_t));
 				break;
+			}
 		}
-		if (cp == nr_objs)
-			continue;
-		strbuf_add(&buf, &list[i], sizeof(uint64_t));
 	}
 	memcpy(list, buf.buf, buf.len);
 
@@ -689,11 +667,9 @@ static int init_rw(struct recovery_work
 		eprintf("failed to read epoch log for epoch %"PRIu32"\n", epoch - 1);
 		return -1;
 	}
-	rw->old_nr_vnodes = nodes_to_vnodes(rw->old_nodes, rw->old_nr_nodes,
-					    rw->old_vnodes);
-	rw->cur_nr_vnodes = nodes_to_vnodes(rw->cur_nodes, rw->cur_nr_nodes,
-					    rw->cur_vnodes);
 
+	rw->old_vnodes = alloc_vnode_info(rw->old_nodes, rw->old_nr_nodes);
+	rw->cur_vnodes = alloc_vnode_info(rw->cur_nodes, rw->cur_nr_nodes);
 	return 0;
 }
 
@@ -739,11 +715,9 @@ int start_recovery(uint32_t epoch)
 	}
 
 	if (recovering_work != NULL) {
-		if (next_rw) {
-			/* skip the previous epoch recovery */
-			free(next_rw->oids);
-			free(next_rw);
-		}
+		/* skip the previous epoch recovery */
+		if (next_rw)
+			free_recovery_work(next_rw);
 		next_rw = rw;
 	} else {
 		recovering_work = rw;



More information about the sheepdog mailing list