[sheepdog] [PATCH 8/9] recovery: don't notify completion for md recovery

Sun May 26 15:40:46 CEST 2013

If we do, stale objects will be removed before the recovery, that goes after the
md recovery, tries to recovery objects from other nodes.

Signed-off-by: Liu Yuan <namei.unix at gmail.com>
---
 sheep/recovery.c |   22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/sheep/recovery.c b/sheep/recovery.c
index ab99244..5bc2ef0 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -69,6 +69,7 @@ struct recovery_info {
 	 * and no recovery work is running
 	 */
 	bool suspended;
+	bool notify_complete;
 
 	int count;
 	uint64_t *oids;
@@ -432,11 +433,19 @@ static void free_recovery_info(struct recovery_info *rinfo)
 static inline bool run_next_rw(void)
 {
 	struct recovery_info *nrinfo = uatomic_xchg_ptr(&next_rinfo, NULL);
+	struct recovery_info *cur = main_thread_get(current_rinfo);
 
 	if (nrinfo == NULL)
 		return false;
 
-	free_recovery_info(main_thread_get(current_rinfo));
+	/*
+	 * When md recovery supersed the reweight or node recovery, we need to
+	 * notify completion.
+	 */
+	if (!nrinfo->notify_complete && cur->notify_complete)
+		nrinfo->notify_complete = true;
+
+	free_recovery_info(cur);
 
 	sd_store->update_epoch(nrinfo->epoch);
 
@@ -479,10 +488,11 @@ static inline void finish_recovery(struct recovery_info *rinfo)
 
 	wakeup_all_requests();
 
-	rinfo->state = RW_NOTIFY_COMPLETION;
+	if (rinfo->notify_complete) {
+		rinfo->state = RW_NOTIFY_COMPLETION;
+		queue_recovery_work(rinfo);
+	}
 
-	/* notify recovery completion to other nodes */
-	queue_recovery_work(rinfo);
 	free_recovery_info(rinfo);
 
 	sd_dprintf("recovery complete: new epoch %"PRIu32, recovered_epoch);
@@ -789,6 +799,10 @@ int start_recovery(struct vnode_info *cur_vinfo, struct vnode_info *old_vinfo,
 	rinfo->state = RW_PREPARE_LIST;
 	rinfo->epoch = sys->epoch;
 	rinfo->count = 0;
+	if (epoch_lifted || cur_vinfo != old_vinfo)
+		rinfo->notify_complete = true; /* Reweight or node recovery */
+	else
+		rinfo->notify_complete = false; /* MD recovery */
 
 	rinfo->cur_vinfo = grab_vnode_info(cur_vinfo);
 	rinfo->old_vinfo = grab_vnode_info(old_vinfo);
-- 
1.7.9.5