At Tue, 4 Feb 2014 15:14:44 +0800, Liu Yuan wrote: > > @@ -666,12 +671,19 @@ static void free_recovery_info(struct recovery_info *rinfo) > /* Return true if next recovery work is queued. */ > static inline bool run_next_rw(void) > { > - struct recovery_info *nrinfo = uatomic_xchg_ptr(&next_rinfo, NULL); > + struct recovery_info *nrinfo = uatomic_read(&next_rinfo); > struct recovery_info *cur = main_thread_get(current_rinfo); > > if (nrinfo == NULL) > return false; > > + sd_debug("running threads nr %"PRIu32, cur->running_threads_nr); > + if (cur->running_threads_nr > 1) { > + cur->running_threads_nr--; > + return true; > + } > + This looks wrong. running_threads_nr is incremented only in recover_next_object(), but run_next_rw() can be called before calling recover_next_object(), e.g. in finish_object_list(). > @@ -843,24 +863,33 @@ static void recover_object_main(struct work *work) > * requests > */ > rinfo->notify_complete = false; > - finish_recovery(rinfo); > sd_debug("recovery is stopped"); > - goto out; > + goto finish_recovery; > } > > wakeup_requests_on_oid(row->oid); > + /* ->oids[done, next] is out of order since finish order is random */ > + if (rinfo->oids[rinfo->done] != row->oid) { > + uint64_t *p = xlfind(&row->oid, rinfo->oids + rinfo->done, > + rinfo->next - rinfo->done, oid_cmp); > + > + *p = rinfo->oids[rinfo->done]; > + rinfo->oids[rinfo->done] = row->oid; > + } > rinfo->done++; > > sd_info("object %"PRIx64" is recovered (%"PRIu64"/%"PRIu64")", row->oid, > rinfo->done, rinfo->count); > > - if (rinfo->done < rinfo->count) { > - recover_next_object(rinfo); > - goto out; > - } > + if (rinfo->done >= rinfo->count) > + goto finish_recovery; > > + recover_next_object(rinfo); > + rinfo->running_threads_nr--; > + free_recovery_obj_work(row); > + return; > +finish_recovery: > finish_recovery(rinfo); > -out: > free_recovery_obj_work(row); > } No need to decrement running_threads_nr when finishing recovery? In addition, 1. this change cannot pass ./check 10 on my environment. == # ./check 10 PLATFORM -- Linux/x86_64 ubuntu 3.8.0-30-generic 010 Last Used:8s. Test manual recovery command - output mismatch (see 010.out.bad) --- 010.out 2014-01-23 12:40:15.415051967 +0900 +++ 010.out.bad 2014-02-04 23:49:45.952976066 +0900 @@ -81,9 +81,13 @@ STORE/7/obj/807c2b2500000000 Cluster recovery: disable e0b27e7466a3c21d0a4dedfed8bb9184 - +Failed to write object 7c2b2500000000: No object found +Failed to write VDI f35835c0a25be5ee75a536d1816c1db4 - 0faf5f38c28a38a6db1e6dfcdf259141 - 83bffbfb00dbcb7d6b4a2fa9274175b9 - +Failed to write object 7c2b2500000003: No object found +Failed to write VDI Cluster recovery: enable Cluster status: running, auto-recovery enabled Failures: 010 Failed 1 of 1 tests == 2. this patch seems to need at least the following change. == diff --git a/sheep/recovery.c b/sheep/recovery.c index 525d93b..af79ea0 100644 --- a/sheep/recovery.c +++ b/sheep/recovery.c @@ -618,8 +618,8 @@ main_fn bool oid_in_recovery(uint64_t oid) * * FIXME: do we need more efficient yet complex data structure? */ - if (xlfind(&oid, rinfo->oids + rinfo->done + 1, - rinfo->count - (rinfo->done + 1), oid_cmp)) + if (xlfind(&oid, rinfo->oids + rinfo->next + 1, + rinfo->count - (rinfo->next + 1), oid_cmp)) break; /* == Thanks, Kazutaka |