[sheepdog] [PATCH v5 5/6] sheep: cache vnode_info when doing recovery

Liu Yuan namei.unix at gmail.com
Wed May 21 07:38:37 CEST 2014


On Wed, May 21, 2014 at 11:41:58AM +0800, Robin Dong wrote:
> From: Robin Dong <sanbai at taobao.com>
> 
> When sheepdog doing recovery in same low-performance machines, the CPU is
> very high. After using perf tools to check the hot point of performance in
> sheep daemon, we find out that the "alloc_vnode_info()" function cost lots
> of CPU circyles because the rollback_vnode_info() rebuilds the vnode_info
> by calling alloc_vnode_info() too frequently.
> 
> The solution is to cache result of alloc_vnode_info() for specific 'epoch'
> in the recovery context.
> 
> Signed-off-by: Robin Dong <sanbai at taobao.com>
> ---
>  sheep/group.c      | 12 ++++++++++++
>  sheep/recovery.c   | 43 +++++++++++++++++++++++++++++++++++++------
>  sheep/sheep_priv.h |  4 ++++
>  3 files changed, 53 insertions(+), 6 deletions(-)
> 
> diff --git a/sheep/group.c b/sheep/group.c
> index 63e9ab9..360771d 100644
> --- a/sheep/group.c
> +++ b/sheep/group.c
> @@ -175,6 +175,18 @@ struct vnode_info *get_vnode_info_epoch(uint32_t epoch,
>  	return alloc_vnode_info(&nroot);
>  }
>  
> +int get_nodes_epoch(uint32_t epoch, struct vnode_info *cur_vinfo,
> +		    struct sd_node *nodes, int len)
> +{
> +	int nr_nodes;
> +
> +	nr_nodes = epoch_log_read(epoch, nodes, len);
> +	if (nr_nodes < 0)
> +		nr_nodes = epoch_log_read_remote(epoch, nodes, len,
> +						 NULL, cur_vinfo);
> +	return nr_nodes;
> +}
> +
>  int local_get_node_list(const struct sd_req *req, struct sd_rsp *rsp,
>  			void *data)
>  {
> diff --git a/sheep/recovery.c b/sheep/recovery.c
> index 6008a0b..21fc0a6 100644
> --- a/sheep/recovery.c
> +++ b/sheep/recovery.c
> @@ -17,6 +17,7 @@ struct recovery_work {
>  	uint32_t epoch;
>  	uint32_t tgt_epoch;
>  
> +	struct recovery_info *rinfo;
>  	struct vnode_info *old_vinfo;
>  	struct vnode_info *cur_vinfo;
>  
> @@ -71,6 +72,10 @@ struct recovery_info {
>  
>  	struct vnode_info *old_vinfo;
>  	struct vnode_info *cur_vinfo;
> +
> +	int max_epoch;
> +	struct vnode_info **vinfo_array;
> +	struct sd_mutex vinfo_lock;
>  };
>  
>  static struct recovery_info *next_rinfo;
> @@ -98,22 +103,37 @@ static inline bool node_is_gateway_only(void)
>  }
>  
>  static struct vnode_info *rollback_vnode_info(uint32_t *epoch,
> +					      struct recovery_info *rinfo,
>  					      struct vnode_info *cur)
>  {
> -	struct vnode_info *vinfo;
> +	struct sd_node nodes[SD_MAX_NODES];
> +	int nr_nodes;
> +	struct rb_root nroot = RB_ROOT;
> +
>  rollback:
>  	*epoch -= 1;
>  	if (*epoch < last_gathered_epoch)
>  		return NULL;
>  
> -	vinfo = get_vnode_info_epoch(*epoch, cur);
> -	if (!vinfo) {
> +	nr_nodes = get_nodes_epoch(*epoch, cur, nodes, sizeof(nodes));
> +	if (!nr_nodes) {
>  		/* We rollback in case we don't get a valid epoch */
>  		sd_alert("cannot get epoch %d", *epoch);
>  		sd_alert("clients may see old data");
>  		goto rollback;
>  	}
> -	return vinfo;
> +	/* double check */
> +	if (rinfo->vinfo_array[*epoch] == NULL) {
> +		sd_mutex_lock(&rinfo->vinfo_lock);
> +		if (rinfo->vinfo_array[*epoch] == NULL) {
> +			for (int i = 0; i < nr_nodes; i++)
> +				rb_insert(&nroot, &nodes[i], rb, node_cmp);
> +			rinfo->vinfo_array[*epoch] = alloc_vnode_info(&nroot);
> +		}
> +		sd_mutex_unlock(&rinfo->vinfo_lock);
> +	}
> +	grab_vnode_info(rinfo->vinfo_array[*epoch]);
> +	return rinfo->vinfo_array[*epoch];
>  }
>  
>  /*
> @@ -204,7 +224,8 @@ again:
>  		break;
>  	default:
>  rollback:
> -		new_old = rollback_vnode_info(&tgt_epoch, rw->cur_vinfo);
> +		new_old = rollback_vnode_info(&tgt_epoch, rw->rinfo,
> +					      rw->cur_vinfo);
>  		if (!new_old) {
>  			sd_err("can not read %"PRIx64" idx %d", oid, idx);
>  			free(buf);
> @@ -387,7 +408,8 @@ again:
>  		/* fall through */
>  	default:
>  		/* No luck, roll back to an older configuration and try again */
> -		new_old = rollback_vnode_info(&tgt_epoch, rw->cur_vinfo);
> +		new_old = rollback_vnode_info(&tgt_epoch, rw->rinfo,
> +					      rw->cur_vinfo);
>  		if (!new_old) {
>  			sd_err("can not recover oid %"PRIx64, oid);
>  			ret = -1;
> @@ -675,6 +697,10 @@ static void free_recovery_info(struct recovery_info *rinfo)
>  	put_vnode_info(rinfo->old_vinfo);
>  	free(rinfo->oids);
>  	free(rinfo->prio_oids);
> +	for (int i = 0; i < rinfo->max_epoch; i++)
> +		put_vnode_info(rinfo->vinfo_array[i]);
> +	free(rinfo->vinfo_array);
> +	sd_destroy_mutex(&rinfo->vinfo_lock);
>  	free(rinfo);
>  }
>  
> @@ -1071,6 +1097,10 @@ int start_recovery(struct vnode_info *cur_vinfo, struct vnode_info *old_vinfo,
>  	rinfo->tgt_epoch = epoch_lifted ? sys->cinfo.epoch - 1 :
>  		sys->cinfo.epoch;
>  	rinfo->count = 0;
> +	rinfo->max_epoch = sys->cinfo.epoch;
> +	rinfo->vinfo_array = xzalloc(sizeof(struct vnode_info *) *
> +				     rinfo->max_epoch);
> +	sd_init_mutex(&rinfo->vinfo_lock);
>  	if (epoch_lifted)
>  		rinfo->notify_complete = true; /* Reweight or node recovery */
>  	else
> @@ -1138,6 +1168,7 @@ static void queue_recovery_work(struct recovery_info *rinfo)
>  
>  	rw->epoch = rinfo->epoch;
>  	rw->tgt_epoch = rinfo->tgt_epoch;
> +	rw->rinfo = rinfo;
>  	rw->cur_vinfo = grab_vnode_info(rinfo->cur_vinfo);
>  	rw->old_vinfo = grab_vnode_info(rinfo->old_vinfo);
>  
> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> index 693171c..f405b75 100644
> --- a/sheep/sheep_priv.h
> +++ b/sheep/sheep_priv.h
> @@ -346,12 +346,16 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len, uint32_t vid,
>  int local_get_node_list(const struct sd_req *req, struct sd_rsp *rsp,
>  		void *data);
>  
> +void reset_vinfo_array(void);

What is this for? No one calls it.

Thanks
Yuan



More information about the sheepdog mailing list