[sheepdog] [PATCH] sheep/recovery: multi-threading recovery process

Liu Yuan namei.unix at gmail.com
Thu Feb 6 10:15:22 CET 2014


On Thu, Feb 06, 2014 at 05:55:59PM +0900, MORITA Kazutaka wrote:
> At Wed,  5 Feb 2014 15:41:25 +0800,
> Liu Yuan wrote:
> > 
> > +
> > +	/*
> > +	 * If running_threads_nr == 0, it means run_next_rw() is called in the
> > +	 * finish_object_list() and no recovery threads so it is safe to run
> > +	 * into next recovery.
> > +	 *
> > +	 * If running_threads_nr >= 1, it means we are now in the process of
> > +	 * recovering objects. We have to await all the threads to completion
> > +	 * before running into next recovery.
> > +	 */
> > +	if (cur->running_threads_nr > 1) {
> > +		cur->running_threads_nr--;
> > +		return true;
> > +	}
> > +
> 
> Counting the number of recovery threads looks a bit complex to me.
> This patch decreases running_threads_nr in both run_next_rw() and
> recover_object_main(), and I didn't understand how it works at glance.
> 
> Can we achieve the similar thing by comparing rinfo->done and
> rinfo->next?  I tried the following change on top of your patch, and
> it looks working properly.
> 

Looks a smarter move and we can remove running_threads_nr completely. 
I'll merge your changes into my patch.

Thanks
Yuan

> ==
> diff --git a/sheep/recovery.c b/sheep/recovery.c
> index 0faf71a..08be884 100644
> --- a/sheep/recovery.c
> +++ b/sheep/recovery.c
> @@ -680,19 +680,9 @@ static inline bool run_next_rw(void)
>  
>  	sd_debug("running threads nr %"PRIu32, cur->running_threads_nr);
>  
> -	/*
> -	 * If running_threads_nr == 0, it means run_next_rw() is called in the
> -	 * finish_object_list() and no recovery threads so it is safe to run
> -	 * into next recovery.
> -	 *
> -	 * If running_threads_nr >= 1, it means we are now in the process of
> -	 * recovering objects. We have to await all the threads to completion
> -	 * before running into next recovery.
> -	 */
> -	if (cur->running_threads_nr > 1) {
> -		cur->running_threads_nr--;
> +	/* Some objects are still in recovery. */
> +	if (cur->done < cur->next)
>  		return true;
> -	}
>  
>  	nrinfo = uatomic_xchg_ptr(&next_rinfo, NULL);
>  	/*
> @@ -862,6 +852,16 @@ static void recover_object_main(struct work *work)
>  						     base);
>  	struct recovery_info *rinfo = main_thread_get(current_rinfo);
>  
> +	/* ->oids[done, next] is out of order since finish order is random */
> +	if (rinfo->oids[rinfo->done] != row->oid) {
> +		uint64_t *p = xlfind(&row->oid, rinfo->oids + rinfo->done,
> +				     rinfo->next - rinfo->done, oid_cmp);
> +
> +		*p = rinfo->oids[rinfo->done];
> +		rinfo->oids[rinfo->done] = row->oid;
> +	}
> +	rinfo->done++;
> +
>  	if (run_next_rw()) {
>  		free_recovery_obj_work(row);
>  		return;
> @@ -879,15 +879,6 @@ static void recover_object_main(struct work *work)
>  	}
>  
>  	wakeup_requests_on_oid(row->oid);
> -	/* ->oids[done, next] is out of order since finish order is random */
> -	if (rinfo->oids[rinfo->done] != row->oid) {
> -		uint64_t *p = xlfind(&row->oid, rinfo->oids + rinfo->done,
> -				     rinfo->next - rinfo->done, oid_cmp);
> -
> -		*p = rinfo->oids[rinfo->done];
> -		rinfo->oids[rinfo->done] = row->oid;
> -	}
> -	rinfo->done++;
>  
>  	sd_info("object %"PRIx64" is recovered (%"PRIu64"/%"PRIu64")", row->oid,
>  		rinfo->done, rinfo->count);
> ==
> 
> Thanks,
> 
> Kazutaka



More information about the sheepdog mailing list