From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp> To reduce the risk of data loss, we shouldn't remove stale objects if there are some sheeps who failed to recover objects. Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp> --- sheep/recovery.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sheep/recovery.c b/sheep/recovery.c index dba89fb..e8edbca 100644 --- a/sheep/recovery.c +++ b/sheep/recovery.c @@ -85,6 +85,7 @@ struct recovery_info { struct recovery_info *next_rinfo; static main_thread(struct recovery_info *) current_rinfo; +static bool safe_mode; /* set true when something critical happens */ static void queue_recovery_work(struct recovery_info *rinfo); @@ -260,6 +261,7 @@ again: sd_printf(SDOG_ALERT, "cannot access any replicas of " "%"PRIx64" at epoch %d", oid, tgt_epoch); sd_printf(SDOG_ALERT, "clients may see old data"); + safe_mode = true; /* fall through */ default: /* No luck, roll back to an older configuration and try again */ @@ -276,6 +278,7 @@ rollback: /* We rollback in case we don't get a valid epoch */ sd_printf(SDOG_ALERT, "cannot get epoch %d", tgt_epoch); sd_printf(SDOG_ALERT, "clients may see old data"); + safe_mode = true; goto rollback; } @@ -461,6 +464,11 @@ static void notify_recovery_completion_work(struct work *work) struct sd_req hdr; int ret; + if (safe_mode) { + sd_iprintf("skip notifying recovery completion"); + return; + } + sd_init_req(&hdr, SD_OP_COMPLETE_RECOVERY); hdr.obj.tgt_epoch = rw->epoch; hdr.flags = SD_FLAG_CMD_WRITE; @@ -687,6 +695,7 @@ retry: e->nid.port); sd_printf(SDOG_ALERT, "some objects may be not recovered at " "epoch %d", epoch); + safe_mode = true; free(buf); return NULL; } -- 1.7.9.5 |