[sheepdog] [PATCH v3] sheep/recovery: multi-threading recovery process

Tue Feb 4 16:08:17 CET 2014

At Tue,  4 Feb 2014 15:14:44 +0800,
Liu Yuan wrote:
> 
> @@ -666,12 +671,19 @@ static void free_recovery_info(struct recovery_info *rinfo)
>  /* Return true if next recovery work is queued. */
>  static inline bool run_next_rw(void)
>  {
> -	struct recovery_info *nrinfo = uatomic_xchg_ptr(&next_rinfo, NULL);
> +	struct recovery_info *nrinfo = uatomic_read(&next_rinfo);
>  	struct recovery_info *cur = main_thread_get(current_rinfo);
>  
>  	if (nrinfo == NULL)
>  		return false;
>  
> +	sd_debug("running threads nr %"PRIu32, cur->running_threads_nr);
> +	if (cur->running_threads_nr > 1) {
> +		cur->running_threads_nr--;
> +		return true;
> +	}
> +

This looks wrong.  running_threads_nr is incremented only in
recover_next_object(), but run_next_rw() can be called before calling
recover_next_object(), e.g. in finish_object_list().

> @@ -843,24 +863,33 @@ static void recover_object_main(struct work *work)
>  		 * requests
>  		 */
>  		rinfo->notify_complete = false;
> -		finish_recovery(rinfo);
>  		sd_debug("recovery is stopped");
> -		goto out;
> +		goto finish_recovery;
>  	}
>  
>  	wakeup_requests_on_oid(row->oid);
> +	/* ->oids[done, next] is out of order since finish order is random */
> +	if (rinfo->oids[rinfo->done] != row->oid) {
> +		uint64_t *p = xlfind(&row->oid, rinfo->oids + rinfo->done,
> +				     rinfo->next - rinfo->done, oid_cmp);
> +
> +		*p = rinfo->oids[rinfo->done];
> +		rinfo->oids[rinfo->done] = row->oid;
> +	}
>  	rinfo->done++;
>  
>  	sd_info("object %"PRIx64" is recovered (%"PRIu64"/%"PRIu64")", row->oid,
>  		rinfo->done, rinfo->count);
>  
> -	if (rinfo->done < rinfo->count) {
> -		recover_next_object(rinfo);
> -		goto out;
> -	}
> +	if (rinfo->done >= rinfo->count)
> +		goto finish_recovery;
>  
> +	recover_next_object(rinfo);
> +	rinfo->running_threads_nr--;
> +	free_recovery_obj_work(row);
> +	return;
> +finish_recovery:
>  	finish_recovery(rinfo);
> -out:
>  	free_recovery_obj_work(row);
>  }

No need to decrement running_threads_nr when finishing recovery?


In addition,

 1. this change cannot pass ./check 10 on my environment.

==
# ./check 10
PLATFORM      -- Linux/x86_64 ubuntu 3.8.0-30-generic

010 Last Used:8s.  Test manual recovery command - output mismatch (see 010.out.bad)
--- 010.out     2014-01-23 12:40:15.415051967 +0900
+++ 010.out.bad 2014-02-04 23:49:45.952976066 +0900
@@ -81,9 +81,13 @@
 STORE/7/obj/807c2b2500000000
 Cluster recovery: disable
 e0b27e7466a3c21d0a4dedfed8bb9184  -
+Failed to write object 7c2b2500000000: No object found
+Failed to write VDI
 f35835c0a25be5ee75a536d1816c1db4  -
 0faf5f38c28a38a6db1e6dfcdf259141  -
 83bffbfb00dbcb7d6b4a2fa9274175b9  -
+Failed to write object 7c2b2500000003: No object found
+Failed to write VDI
 Cluster recovery: enable
 Cluster status: running, auto-recovery enabled

Failures: 010
Failed 1 of 1 tests
==

 2. this patch seems to need at least the following change.

==

diff --git a/sheep/recovery.c b/sheep/recovery.c
index 525d93b..af79ea0 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -618,8 +618,8 @@ main_fn bool oid_in_recovery(uint64_t oid)
 		 *
 		 * FIXME: do we need more efficient yet complex data structure?
 		 */
-		if (xlfind(&oid, rinfo->oids + rinfo->done + 1,
-			   rinfo->count - (rinfo->done + 1), oid_cmp))
+		if (xlfind(&oid, rinfo->oids + rinfo->next + 1,
+			   rinfo->count - (rinfo->next + 1), oid_cmp))
 			break;
 
 		/*

==

Thanks,

Kazutaka