[sheepdog] [PATCH v3 10/13] recovery: don't notify completion for md recovery
Liu Yuan
namei.unix at gmail.com
Mon May 27 13:36:13 CEST 2013
If we do, stale objects will be removed before the recovery, that goes after the
md recovery, tries to recovery objects from other nodes.
Signed-off-by: Liu Yuan <namei.unix at gmail.com>
---
sheep/ops.c | 12 ------------
sheep/recovery.c | 22 ++++++++++++++++++----
2 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/sheep/ops.c b/sheep/ops.c
index 9911afb..0d35dad 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -661,18 +661,6 @@ static int cluster_recovery_completion(const struct sd_req *req,
nr_recovereds = 0;
}
- /*
- * Disk failure might send duplicate notification, ingore it.
- *
- * We can't simply stop disk recovery from sending notication because
- * disk recovery might supersede node recovery, which indeed need
- * to send notification
- */
- for (i = 0; i < nr_recovereds; i++)
- if (node_eq(node, recovereds + i)) {
- sd_dprintf("duplicate %s", node_to_str(node));
- return SD_RES_SUCCESS;
- }
recovereds[nr_recovereds++] = *node;
xqsort(recovereds, nr_recovereds, node_cmp);
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 21d76e2..46a7baa 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -69,6 +69,7 @@ struct recovery_info {
* and no recovery work is running
*/
bool suspended;
+ bool notify_complete;
int count;
uint64_t *oids;
@@ -432,11 +433,19 @@ static void free_recovery_info(struct recovery_info *rinfo)
static inline bool run_next_rw(void)
{
struct recovery_info *nrinfo = uatomic_xchg_ptr(&next_rinfo, NULL);
+ struct recovery_info *cur = main_thread_get(current_rinfo);
if (nrinfo == NULL)
return false;
- free_recovery_info(main_thread_get(current_rinfo));
+ /*
+ * When md recovery supersed the reweight or node recovery, we need to
+ * notify completion.
+ */
+ if (!nrinfo->notify_complete && cur->notify_complete)
+ nrinfo->notify_complete = true;
+
+ free_recovery_info(cur);
sd_store->update_epoch(nrinfo->epoch);
@@ -479,10 +488,11 @@ static inline void finish_recovery(struct recovery_info *rinfo)
wakeup_all_requests();
- rinfo->state = RW_NOTIFY_COMPLETION;
+ if (rinfo->notify_complete) {
+ rinfo->state = RW_NOTIFY_COMPLETION;
+ queue_recovery_work(rinfo);
+ }
- /* notify recovery completion to other nodes */
- queue_recovery_work(rinfo);
free_recovery_info(rinfo);
sd_dprintf("recovery complete: new epoch %"PRIu32, recovered_epoch);
@@ -786,6 +796,10 @@ int start_recovery(struct vnode_info *cur_vinfo, struct vnode_info *old_vinfo,
rinfo->state = RW_PREPARE_LIST;
rinfo->epoch = sys->epoch;
rinfo->count = 0;
+ if (epoch_lifted)
+ rinfo->notify_complete = true; /* Reweight or node recovery */
+ else
+ rinfo->notify_complete = false; /* MD recovery */
rinfo->cur_vinfo = grab_vnode_info(cur_vinfo);
rinfo->old_vinfo = grab_vnode_info(old_vinfo);
--
1.7.9.5
More information about the sheepdog
mailing list