[sheepdog] [PATCH] recovery: notify completion only when all objects are fresh
MORITA Kazutaka
morita.kazutaka at gmail.com
Fri May 31 14:55:41 CEST 2013
From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
To reduce the risk of data loss, we shouldn't remove stale objects if
there are some sheeps who failed to recover objects.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
sheep/recovery.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/sheep/recovery.c b/sheep/recovery.c
index dba89fb..e8edbca 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -85,6 +85,7 @@ struct recovery_info {
struct recovery_info *next_rinfo;
static main_thread(struct recovery_info *) current_rinfo;
+static bool safe_mode; /* set true when something critical happens */
static void queue_recovery_work(struct recovery_info *rinfo);
@@ -260,6 +261,7 @@ again:
sd_printf(SDOG_ALERT, "cannot access any replicas of "
"%"PRIx64" at epoch %d", oid, tgt_epoch);
sd_printf(SDOG_ALERT, "clients may see old data");
+ safe_mode = true;
/* fall through */
default:
/* No luck, roll back to an older configuration and try again */
@@ -276,6 +278,7 @@ rollback:
/* We rollback in case we don't get a valid epoch */
sd_printf(SDOG_ALERT, "cannot get epoch %d", tgt_epoch);
sd_printf(SDOG_ALERT, "clients may see old data");
+ safe_mode = true;
goto rollback;
}
@@ -461,6 +464,11 @@ static void notify_recovery_completion_work(struct work *work)
struct sd_req hdr;
int ret;
+ if (safe_mode) {
+ sd_iprintf("skip notifying recovery completion");
+ return;
+ }
+
sd_init_req(&hdr, SD_OP_COMPLETE_RECOVERY);
hdr.obj.tgt_epoch = rw->epoch;
hdr.flags = SD_FLAG_CMD_WRITE;
@@ -687,6 +695,7 @@ retry:
e->nid.port);
sd_printf(SDOG_ALERT, "some objects may be not recovered at "
"epoch %d", epoch);
+ safe_mode = true;
free(buf);
return NULL;
}
--
1.7.9.5
More information about the sheepdog
mailing list