From: levin li <xingke.lwp at taobao.com> v3 -- > v4 change some coding style, add vnode_node_cmp() function for bsearch in is_invalid_vnode() v2 -- > v3: sort the node list first in init_rw(), and then use binary search to check whether a vnode is valid. v1 -- > v2: rebased to the current master branch ----------------------------------------------- >8 In the recovery path, sheep may get to old epoch at which some nodes have left the cluster, we shouldn't try to recover objects from these nodes, so I add a check function to check whether the target node is a valid node at current epoch. Signed-off-by: levin li <xingke.lwp at taobao.com> --- include/sheep.h | 17 +++++++++++++++++ sheep/recovery.c | 20 ++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/include/sheep.h b/include/sheep.h index 010e213..ac9179c 100644 --- a/include/sheep.h +++ b/include/sheep.h @@ -326,6 +326,23 @@ static inline const char *sd_strerror(int err) return "Invalid error code"; } +static inline int vnode_node_cmp(const void *a, const void *b) +{ + const struct sd_vnode *node1 = a; + const struct sd_node *node2 = b; + int cmp; + + cmp = memcmp(node1->addr, node2->addr, sizeof(node1->addr)); + if (cmp != 0) + return cmp; + + if (node1->port < node2->port) + return -1; + if (node1->port > node2->port) + return 1; + return 0; +} + static inline int node_cmp(const void *a, const void *b) { const struct sd_node *node1 = a; diff --git a/sheep/recovery.c b/sheep/recovery.c index f341fc6..f4241f0 100644 --- a/sheep/recovery.c +++ b/sheep/recovery.c @@ -193,6 +193,20 @@ static void rollback_old_cur(struct sd_vnode *old, int *old_nr, int *old_copies, *old_copies = new_old_copies; } + +/* + * A virtual node that does not match any node in current node list + * means the node has left the cluster, then it's an invalid virtual node. + */ +static int is_invalid_vnode(struct sd_vnode *entry, struct sd_node *nodes, + int nr_nodes) +{ + if (bsearch(entry, nodes, nr_nodes, sizeof(struct sd_node), + vnode_node_cmp)) + return 0; + return 1; +} + /* * Recover the object from its track in epoch history. That is, * the routine will try to recovery it from the nodes it has stayed, @@ -223,6 +237,9 @@ again: int idx; idx = obj_to_sheep(old, old_nr, oid, i); tgt_entry = old + idx; + if (is_invalid_vnode(tgt_entry, rw->cur_nodes, + rw->cur_nr_nodes)) + continue; ret = recover_object_from_replica(oid, tgt_entry, epoch, tgt_epoch); if (ret == 0) { @@ -620,6 +637,9 @@ static int init_rw(struct recovery_work *rw) rw->cur_nr_nodes = epoch_log_read_nr(epoch, (char *)rw->cur_nodes, sizeof(rw->cur_nodes)); + qsort(rw->cur_nodes, rw->cur_nr_nodes, sizeof(struct sd_node), + node_cmp); + if (rw->cur_nr_nodes <= 0) { eprintf("failed to read epoch log for epoch %"PRIu32"\n", epoch); return -1; -- 1.7.10 |