[sheepdog] [PATCH v4] recovery: avoid recovering object from node left

levin li levin108 at gmail.com
Thu May 24 11:32:10 CEST 2012


From: levin li <xingke.lwp at taobao.com>

v3 -- > v4
change some coding style, add vnode_node_cmp() function for bsearch
in is_invalid_vnode()

v2 -- > v3:
sort the node list first in init_rw(), and then use binary search
to check whether a vnode is valid.

v1 -- > v2:
rebased to the current master branch
----------------------------------------------- >8
In the recovery path, sheep may get to old epoch at which
some nodes have left the cluster, we shouldn't try to recover
objects from these nodes, so I add a check function to check
whether the target node is a valid node at current epoch.

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 include/sheep.h  |   17 +++++++++++++++++
 sheep/recovery.c |   20 ++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/include/sheep.h b/include/sheep.h
index 010e213..ac9179c 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -326,6 +326,23 @@ static inline const char *sd_strerror(int err)
 	return "Invalid error code";
 }
 
+static inline int vnode_node_cmp(const void *a, const void *b)
+{
+	const struct sd_vnode *node1 = a;
+	const struct sd_node *node2 = b;
+	int cmp;
+
+	cmp = memcmp(node1->addr, node2->addr, sizeof(node1->addr));
+	if (cmp != 0)
+		return cmp;
+
+	if (node1->port < node2->port)
+		return -1;
+	if (node1->port > node2->port)
+		return 1;
+	return 0;
+}
+
 static inline int node_cmp(const void *a, const void *b)
 {
 	const struct sd_node *node1 = a;
diff --git a/sheep/recovery.c b/sheep/recovery.c
index f341fc6..f4241f0 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -193,6 +193,20 @@ static void rollback_old_cur(struct sd_vnode *old, int *old_nr, int *old_copies,
 	*old_copies = new_old_copies;
 }
 
+
+/*
+ * A virtual node that does not match any node in current node list
+ * means the node has left the cluster, then it's an invalid virtual node.
+ */
+static int is_invalid_vnode(struct sd_vnode *entry, struct sd_node *nodes,
+				int nr_nodes)
+{
+	if (bsearch(entry, nodes, nr_nodes, sizeof(struct sd_node),
+				vnode_node_cmp))
+		return 0;
+	return 1;
+}
+
 /*
  * Recover the object from its track in epoch history. That is,
  * the routine will try to recovery it from the nodes it has stayed,
@@ -223,6 +237,9 @@ again:
 		int idx;
 		idx = obj_to_sheep(old, old_nr, oid, i);
 		tgt_entry = old + idx;
+		if (is_invalid_vnode(tgt_entry, rw->cur_nodes,
+				   rw->cur_nr_nodes))
+			continue;
 		ret = recover_object_from_replica(oid, tgt_entry,
 						  epoch, tgt_epoch);
 		if (ret == 0) {
@@ -620,6 +637,9 @@ static int init_rw(struct recovery_work *rw)
 
 	rw->cur_nr_nodes = epoch_log_read_nr(epoch, (char *)rw->cur_nodes,
 					     sizeof(rw->cur_nodes));
+	qsort(rw->cur_nodes, rw->cur_nr_nodes, sizeof(struct sd_node),
+		  node_cmp);
+
 	if (rw->cur_nr_nodes <= 0) {
 		eprintf("failed to read epoch log for epoch %"PRIu32"\n", epoch);
 		return -1;
-- 
1.7.10




More information about the sheepdog mailing list