[sheepdog] [PATCH RFC 1/2] sheep: enhance STAT_RECOVERY for prividing information of recovery progress

Liu Yuan namei.unix at gmail.com
Mon Jul 29 10:02:30 CEST 2013


On Mon, Jul 29, 2013 at 04:39:26PM +0900, Hitoshi Mitake wrote:
> Current SD_OP_STAT_RECOVERY only provides an information that
> indicates the node is doing recovery or not. For providing more useful
> information about progress of recovery, this patch enhances this
> request.
> 
> The enhanced SD_OP_STAT_RECOVERY returns the information with struct
> recovery_progress. The struct contains these information:
> 1. state of recovery. With this information, collie can know which
>    phase of progress sheep is in.
> 2. Number of copied object during a recovery process.
> 3. Number of entire objects which should be copied during a recovery
>    process.
> 
> Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
> ---
>  include/internal_proto.h |   12 ++++++++++++
>  sheep/ops.c              |   11 ++++++++---
>  sheep/recovery.c         |   33 ++++++++++++++++++++++++---------
>  sheep/sheep_priv.h       |    1 +
>  4 files changed, 45 insertions(+), 12 deletions(-)
> 
> diff --git a/include/internal_proto.h b/include/internal_proto.h
> index 0463eae..6d7a0e9 100644
> --- a/include/internal_proto.h
> +++ b/include/internal_proto.h
> @@ -184,6 +184,18 @@ struct sd_md_info {
>  	int nr;
>  };
>  
> +enum rw_state {
> +	RW_PREPARE_LIST, /* the recovery thread is preparing object list */
> +	RW_RECOVER_OBJ, /* the thread is recoering objects */
> +	RW_NOTIFY_COMPLETION, /* the thread is notifying recovery completion */
> +};
> +
> +struct recovery_progress {
> +	uint32_t state;		/* actual type is enum rw_state */

enum rw_state state; then you don't need comment.

> +	uint32_t nr_recovered_objects;
> +	uint32_t nr_entire_checked_objects;
> +};

it's recovery information, so better name it as recovery_info. And

nr_recovered_objects --> finished_count
nr_entire_checked_objects --> total

>  static inline __attribute__((used)) void __sd_epoch_format_build_bug_ons(void)
>  {
>  	/* never called, only for checking BUILD_BUG_ON()s */
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 5d7686c..1436664 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -390,10 +390,15 @@ static int local_stat_sheep(struct request *req)
>  static int local_stat_recovery(const struct sd_req *req, struct sd_rsp *rsp,
>  					void *data)
>  {
> -	if (node_in_recovery())
> -		return SD_RES_NODE_IN_RECOVERY;
> +	if (!node_in_recovery())
> +		return SD_RES_SUCCESS;
>  
> -	return SD_RES_SUCCESS;
> +	if (req->data_length == sizeof(struct recovery_progress)) {
> +		fill_recovery_progress(data);
> +		rsp->data_length = sizeof(struct recovery_progress);
> +	}
> +
> +	return SD_RES_NODE_IN_RECOVERY;
>  }
>  
>  static int local_stat_cluster(struct request *req)
> diff --git a/sheep/recovery.c b/sheep/recovery.c
> index ea3101a..69b07cd 100644
> --- a/sheep/recovery.c
> +++ b/sheep/recovery.c
> @@ -11,12 +11,6 @@
>  
>  #include "sheep_priv.h"
>  
> -enum rw_state {
> -	RW_PREPARE_LIST, /* the recovery thread is preparing object list */
> -	RW_RECOVER_OBJ, /* the thread is recoering objects */
> -	RW_NOTIFY_COMPLETION, /* the thread is notifying recovery completion */
> -};
> -
>  /* base structure for the recovery thread */
>  struct recovery_work {
>  	uint32_t epoch;
> @@ -86,6 +80,9 @@ static void queue_recovery_work(struct recovery_info *rinfo);
>  #define DEFAULT_LIST_BUFFER_SIZE (UINT64_C(1) << 22)
>  static size_t list_buffer_size = DEFAULT_LIST_BUFFER_SIZE;
>  
> +static enum rw_state state;
> +static uint32_t nr_recovered_objects, nr_entire_checked_objects;
> +
>  static int obj_cmp(const uint64_t *oid1, const uint64_t *oid2)
>  {
>  	const uint64_t hval1 = fnv_64a_buf(oid1, sizeof(*oid1), FNV1A_64_INIT);
> @@ -316,7 +313,7 @@ static void recover_object_work(struct work *work)
>  
>  	if (sd_store->exist(oid)) {
>  		sd_dprintf("the object is already recovered");
> -		return;
> +		goto end;
>  	}
>  
>  	/* find object in the stale directory */
> @@ -332,6 +329,9 @@ static void recover_object_work(struct work *work)
>  	ret = do_recover_object(row);
>  	if (ret < 0)
>  		sd_eprintf("failed to recover object %"PRIx64, oid);
> +
> +end:

It is better use 'out' lable as other code.

> +	uatomic_inc(&nr_recovered_objects);
>  }
>  
>  bool node_in_recovery(void)
> @@ -499,12 +499,12 @@ static void notify_recovery_completion_main(struct work *work)
>  static inline void finish_recovery(struct recovery_info *rinfo)
>  {
>  	uint32_t recovered_epoch = rinfo->epoch;
> -	main_thread_set(current_rinfo, NULL);
>  
> +	main_thread_set(current_rinfo, NULL);
>  	wakeup_all_requests();
>  
>  	if (rinfo->notify_complete) {
> -		rinfo->state = RW_NOTIFY_COMPLETION;
> +		state = rinfo->state = RW_NOTIFY_COMPLETION;
>  		queue_recovery_work(rinfo);
>  	}
>  
> @@ -668,6 +668,9 @@ static void finish_object_list(struct work *work)
>  		return;
>  	}
>  
> +	state = RW_RECOVER_OBJ;
> +	uatomic_set(&nr_entire_checked_objects, rinfo->count);
> +
>  	recover_next_object(rinfo);
>  	return;
>  }
> @@ -853,6 +856,10 @@ static void queue_recovery_work(struct recovery_info *rinfo)
>  		rlw = xzalloc(sizeof(*rlw));
>  		rlw->oids = xmalloc(list_buffer_size);
>  
> +		state = RW_PREPARE_LIST;
> +		uatomic_set(&nr_recovered_objects, 0);
> +		uatomic_set(&nr_entire_checked_objects, 0);
> +
>  		rw = &rlw->base;
>  		rw->work.fn = prepare_object_list;
>  		rw->work.done = finish_object_list;
> @@ -882,3 +889,11 @@ static void queue_recovery_work(struct recovery_info *rinfo)
>  
>  	queue_work(sys->recovery_wqueue, &rw->work);
>  }
> +
> +void fill_recovery_progress(struct recovery_progress *prog)
> +{
> +	prog->state = state;
> +	prog->nr_recovered_objects = uatomic_read(&nr_recovered_objects);
> +	prog->nr_entire_checked_objects =
> +		uatomic_read(&nr_entire_checked_objects);
> +}

just ->nr_recovered_objects = rinfo->done isn't enough?
the same goes for nr_entire_checked_objects.

global variables aren't necessary.

Better name the function as get_recovery_info()

Thanks
Yuan



More information about the sheepdog mailing list