At Wed, 5 Feb 2014 15:41:25 +0800, Liu Yuan wrote: > > + > + /* > + * If running_threads_nr == 0, it means run_next_rw() is called in the > + * finish_object_list() and no recovery threads so it is safe to run > + * into next recovery. > + * > + * If running_threads_nr >= 1, it means we are now in the process of > + * recovering objects. We have to await all the threads to completion > + * before running into next recovery. > + */ > + if (cur->running_threads_nr > 1) { > + cur->running_threads_nr--; > + return true; > + } > + Counting the number of recovery threads looks a bit complex to me. This patch decreases running_threads_nr in both run_next_rw() and recover_object_main(), and I didn't understand how it works at glance. Can we achieve the similar thing by comparing rinfo->done and rinfo->next? I tried the following change on top of your patch, and it looks working properly. == diff --git a/sheep/recovery.c b/sheep/recovery.c index 0faf71a..08be884 100644 --- a/sheep/recovery.c +++ b/sheep/recovery.c @@ -680,19 +680,9 @@ static inline bool run_next_rw(void) sd_debug("running threads nr %"PRIu32, cur->running_threads_nr); - /* - * If running_threads_nr == 0, it means run_next_rw() is called in the - * finish_object_list() and no recovery threads so it is safe to run - * into next recovery. - * - * If running_threads_nr >= 1, it means we are now in the process of - * recovering objects. We have to await all the threads to completion - * before running into next recovery. - */ - if (cur->running_threads_nr > 1) { - cur->running_threads_nr--; + /* Some objects are still in recovery. */ + if (cur->done < cur->next) return true; - } nrinfo = uatomic_xchg_ptr(&next_rinfo, NULL); /* @@ -862,6 +852,16 @@ static void recover_object_main(struct work *work) base); struct recovery_info *rinfo = main_thread_get(current_rinfo); + /* ->oids[done, next] is out of order since finish order is random */ + if (rinfo->oids[rinfo->done] != row->oid) { + uint64_t *p = xlfind(&row->oid, rinfo->oids + rinfo->done, + rinfo->next - rinfo->done, oid_cmp); + + *p = rinfo->oids[rinfo->done]; + rinfo->oids[rinfo->done] = row->oid; + } + rinfo->done++; + if (run_next_rw()) { free_recovery_obj_work(row); return; @@ -879,15 +879,6 @@ static void recover_object_main(struct work *work) } wakeup_requests_on_oid(row->oid); - /* ->oids[done, next] is out of order since finish order is random */ - if (rinfo->oids[rinfo->done] != row->oid) { - uint64_t *p = xlfind(&row->oid, rinfo->oids + rinfo->done, - rinfo->next - rinfo->done, oid_cmp); - - *p = rinfo->oids[rinfo->done]; - rinfo->oids[rinfo->done] = row->oid; - } - rinfo->done++; sd_info("object %"PRIx64" is recovered (%"PRIu64"/%"PRIu64")", row->oid, rinfo->done, rinfo->count); == Thanks, Kazutaka |