[sheepdog] [PATCH v3] recovery: avoid recovering object from node left

Liu Yuan namei.unix at gmail.com
Thu May 24 10:38:12 CEST 2012


On 05/24/2012 04:27 PM, levin li wrote:

> From: levin li <xingke.lwp at taobao.com>
> 
> v2 -- > v3:
> sort the node list first in init_rw(), and then use binary search
> to check whether a vnode is valid.
> 
> v1 -- > v2:
> rebased to the current master branch
> ---------------------------------------------------- >8
> In the recovery path, sheep may get to old epoch at which
> some nodes have left the cluster, we shouldn't try to recover
> objects from these nodes, so I add a check function to check
> whether the target node is a valid node at current epoch.
> 
> Signed-off-by: levin li <xingke.lwp at taobao.com>
> ---
>  include/sheep.h  |    2 +-
>  sheep/recovery.c |   15 +++++++++++++++
>  2 files changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/include/sheep.h b/include/sheep.h
> index 010e213..4e47c6f 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -155,9 +155,9 @@ struct sd_node {
>  };
>  
>  struct sd_vnode {
> -	uint64_t        id;
>  	uint8_t         addr[16];
>  	uint16_t        port;
> +	uint64_t        id;
>  	uint16_t	node_idx;
>  	uint32_t	zone;
>  };
> diff --git a/sheep/recovery.c b/sheep/recovery.c
> index f341fc6..7552034 100644
> --- a/sheep/recovery.c
> +++ b/sheep/recovery.c
> @@ -193,6 +193,14 @@ static void rollback_old_cur(struct sd_vnode *old, int *old_nr, int *old_copies,
>  	*old_copies = new_old_copies;
>  }
>  

> +static int is_invalid_vnode(struct sd_vnode *entry, struct sd_node *nodes,
> +				int nr_nodes)


please comment on in source file what kind of vnode is considered invalid.

> +{
> +	if (bsearch(entry, nodes, nr_nodes, sizeof(struct sd_node), node_cmp))
> +		return 0;
> +	return -1;
> +}
> +


return 1 if it is invalid, and 0 if not.

>  /*
>   * Recover the object from its track in epoch history. That is,
>   * the routine will try to recovery it from the nodes it has stayed,
> @@ -223,6 +231,10 @@ again:
>  		int idx;
>  		idx = obj_to_sheep(old, old_nr, oid, i);
>  		tgt_entry = old + idx;
> +		ret = is_invalid_vnode(tgt_entry, rw->cur_nodes,
> +				   rw->cur_nr_nodes);
> +		if (ret < 0)
> +			continue;
>  		ret = recover_object_from_replica(oid, tgt_entry,
>  

then
		if (is_invalid_vnode(...))
			continue;


						  epoch, tgt_epoch);
>  		if (ret == 0) {
> @@ -620,6 +632,9 @@ static int init_rw(struct recovery_work *rw)
>  
>  	rw->cur_nr_nodes = epoch_log_read_nr(epoch, (char *)rw->cur_nodes,
>  					     sizeof(rw->cur_nodes));
> +	qsort(rw->cur_nodes, rw->cur_nr_nodes, sizeof(struct sd_node),
> +		  node_cmp);
> +


When it is not over 80 lines, we don't need wrap it around.

>  	if (rw->cur_nr_nodes <= 0) {
>  		eprintf("failed to read epoch log for epoch %"PRIu32"\n", epoch);
>  		return -1;





More information about the sheepdog mailing list