[sheepdog] [PATCH RFC 1/2] sheep: enhance STAT_RECOVERY for prividing information of recovery progress

Wed Jul 31 05:10:44 CEST 2013

On Wed, Jul 31, 2013 at 11:09:52AM +0900, Hitoshi Mitake wrote:
> At Mon, 29 Jul 2013 16:02:30 +0800,
> Liu Yuan wrote:
> > 
> > On Mon, Jul 29, 2013 at 04:39:26PM +0900, Hitoshi Mitake wrote:
> > > Current SD_OP_STAT_RECOVERY only provides an information that
> > > indicates the node is doing recovery or not. For providing more useful
> > > information about progress of recovery, this patch enhances this
> > > request.
> > > 
> > > The enhanced SD_OP_STAT_RECOVERY returns the information with struct
> > > recovery_progress. The struct contains these information:
> > > 1. state of recovery. With this information, collie can know which
> > >    phase of progress sheep is in.
> > > 2. Number of copied object during a recovery process.
> > > 3. Number of entire objects which should be copied during a recovery
> > >    process.
> > > 
> > > Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
> > > ---
> > >  include/internal_proto.h |   12 ++++++++++++
> > >  sheep/ops.c              |   11 ++++++++---
> > >  sheep/recovery.c         |   33 ++++++++++++++++++++++++---------
> > >  sheep/sheep_priv.h       |    1 +
> > >  4 files changed, 45 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/include/internal_proto.h b/include/internal_proto.h
> > > index 0463eae..6d7a0e9 100644
> > > --- a/include/internal_proto.h
> > > +++ b/include/internal_proto.h
> > > @@ -184,6 +184,18 @@ struct sd_md_info {
> > >  	int nr;
> > >  };
> > >  
> > > +enum rw_state {
> > > +	RW_PREPARE_LIST, /* the recovery thread is preparing object list */
> > > +	RW_RECOVER_OBJ, /* the thread is recoering objects */
> > > +	RW_NOTIFY_COMPLETION, /* the thread is notifying recovery completion */
> > > +};
> > > +
> > > +struct recovery_progress {
> > > +	uint32_t state;		/* actual type is enum rw_state */
> > 
> > enum rw_state state; then you don't need comment.
> 
> I agree.
> 
> > 
> > > +	uint32_t nr_recovered_objects;
> > > +	uint32_t nr_entire_checked_objects;
> > > +};
> > 
> > it's recovery information, so better name it as recovery_info. And
> > 
> > nr_recovered_objects --> finished_count
> > nr_entire_checked_objects --> total
> 
> The names seems to be good. I agree.
> 
> > 
> > >  static inline __attribute__((used)) void __sd_epoch_format_build_bug_ons(void)
> > >  {
> > >  	/* never called, only for checking BUILD_BUG_ON()s */
> > > diff --git a/sheep/ops.c b/sheep/ops.c
> > > index 5d7686c..1436664 100644
> > > --- a/sheep/ops.c
> > > +++ b/sheep/ops.c
> > > @@ -390,10 +390,15 @@ static int local_stat_sheep(struct request *req)
> > >  static int local_stat_recovery(const struct sd_req *req, struct sd_rsp *rsp,
> > >  					void *data)
> > >  {
> > > -	if (node_in_recovery())
> > > -		return SD_RES_NODE_IN_RECOVERY;
> > > +	if (!node_in_recovery())
> > > +		return SD_RES_SUCCESS;
> > >  
> > > -	return SD_RES_SUCCESS;
> > > +	if (req->data_length == sizeof(struct recovery_progress)) {
> > > +		fill_recovery_progress(data);
> > > +		rsp->data_length = sizeof(struct recovery_progress);
> > > +	}
> > > +
> > > +	return SD_RES_NODE_IN_RECOVERY;
> > >  }
> > >  
> > >  static int local_stat_cluster(struct request *req)
> > > diff --git a/sheep/recovery.c b/sheep/recovery.c
> > > index ea3101a..69b07cd 100644
> > > --- a/sheep/recovery.c
> > > +++ b/sheep/recovery.c
> > > @@ -11,12 +11,6 @@
> > >  
> > >  #include "sheep_priv.h"
> > >  
> > > -enum rw_state {
> > > -	RW_PREPARE_LIST, /* the recovery thread is preparing object list */
> > > -	RW_RECOVER_OBJ, /* the thread is recoering objects */
> > > -	RW_NOTIFY_COMPLETION, /* the thread is notifying recovery completion */
> > > -};
> > > -
> > >  /* base structure for the recovery thread */
> > >  struct recovery_work {
> > >  	uint32_t epoch;
> > > @@ -86,6 +80,9 @@ static void queue_recovery_work(struct recovery_info *rinfo);
> > >  #define DEFAULT_LIST_BUFFER_SIZE (UINT64_C(1) << 22)
> > >  static size_t list_buffer_size = DEFAULT_LIST_BUFFER_SIZE;
> > >  
> > > +static enum rw_state state;
> > > +static uint32_t nr_recovered_objects, nr_entire_checked_objects;
> > > +
> > >  static int obj_cmp(const uint64_t *oid1, const uint64_t *oid2)
> > >  {
> > >  	const uint64_t hval1 = fnv_64a_buf(oid1, sizeof(*oid1), FNV1A_64_INIT);
> > > @@ -316,7 +313,7 @@ static void recover_object_work(struct work *work)
> > >  
> > >  	if (sd_store->exist(oid)) {
> > >  		sd_dprintf("the object is already recovered");
> > > -		return;
> > > +		goto end;
> > >  	}
> > >  
> > >  	/* find object in the stale directory */
> > > @@ -332,6 +329,9 @@ static void recover_object_work(struct work *work)
> > >  	ret = do_recover_object(row);
> > >  	if (ret < 0)
> > >  		sd_eprintf("failed to recover object %"PRIx64, oid);
> > > +
> > > +end:
> > 
> > It is better use 'out' lable as other code.
> 
> OK.
> 
> > 
> > > +	uatomic_inc(&nr_recovered_objects);
> > >  }
> > >  
> > >  bool node_in_recovery(void)
> > > @@ -499,12 +499,12 @@ static void notify_recovery_completion_main(struct work *work)
> > >  static inline void finish_recovery(struct recovery_info *rinfo)
> > >  {
> > >  	uint32_t recovered_epoch = rinfo->epoch;
> > > -	main_thread_set(current_rinfo, NULL);
> > >  
> > > +	main_thread_set(current_rinfo, NULL);
> > >  	wakeup_all_requests();
> > >  
> > >  	if (rinfo->notify_complete) {
> > > -		rinfo->state = RW_NOTIFY_COMPLETION;
> > > +		state = rinfo->state = RW_NOTIFY_COMPLETION;
> > >  		queue_recovery_work(rinfo);
> > >  	}
> > >  
> > > @@ -668,6 +668,9 @@ static void finish_object_list(struct work *work)
> > >  		return;
> > >  	}
> > >  
> > > +	state = RW_RECOVER_OBJ;
> > > +	uatomic_set(&nr_entire_checked_objects, rinfo->count);
> > > +
> > >  	recover_next_object(rinfo);
> > >  	return;
> > >  }
> > > @@ -853,6 +856,10 @@ static void queue_recovery_work(struct recovery_info *rinfo)
> > >  		rlw = xzalloc(sizeof(*rlw));
> > >  		rlw->oids = xmalloc(list_buffer_size);
> > >  
> > > +		state = RW_PREPARE_LIST;
> > > +		uatomic_set(&nr_recovered_objects, 0);
> > > +		uatomic_set(&nr_entire_checked_objects, 0);
> > > +
> > >  		rw = &rlw->base;
> > >  		rw->work.fn = prepare_object_list;
> > >  		rw->work.done = finish_object_list;
> > > @@ -882,3 +889,11 @@ static void queue_recovery_work(struct recovery_info *rinfo)
> > >  
> > >  	queue_work(sys->recovery_wqueue, &rw->work);
> > >  }
> > > +
> > > +void fill_recovery_progress(struct recovery_progress *prog)
> > > +{
> > > +	prog->state = state;
> > > +	prog->nr_recovered_objects = uatomic_read(&nr_recovered_objects);
> > > +	prog->nr_entire_checked_objects =
> > > +		uatomic_read(&nr_entire_checked_objects);
> > > +}
> > 
> > just ->nr_recovered_objects = rinfo->done isn't enough?
> > the same goes for nr_entire_checked_objects.
> > 
> > global variables aren't necessary.
> > 
> > Better name the function as get_recovery_info()
> 
> If we make a constraint that fill_recovery_progress() (or
> get_recovery_info()) is only callable from main thread, the global
> variable isn't needed. I'll do so in the next version.

Since the filling recovery info is very light, so I think we can make this
constraint.

Thanks
Yuan