[sheepdog] [PATCH 2/2] sheep: check joining nodes with newer but invalid epoch
MORITA Kazutaka
morita.kazutaka at gmail.com
Tue May 14 17:20:38 CEST 2013
From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
Currently, cluster_wait_for_join_check() always returns
CJ_RES_MASTER_TRANSFER if the joining node has a newer epoch.
However, we have to take into account that the node has a wrong epoch
(e.g. the node comes from a different cluster).
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
sheep/group.c | 29 ++++++++++++++++-------------
1 file changed, 16 insertions(+), 13 deletions(-)
diff --git a/sheep/group.c b/sheep/group.c
index a78dd15..920b4a1 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -525,12 +525,6 @@ static int cluster_sanity_check(struct join_message *jm)
return CJ_RES_FAIL;
}
- if (jm->epoch > local_epoch) {
- sd_eprintf("joining node epoch too large: %"
- PRIu32 " vs %" PRIu32, jm->epoch, local_epoch);
- return CJ_RES_FAIL;
- }
-
if (jm->nr_copies != local_nr_copies) {
sd_eprintf("joining node nr_copies doesn't match: %u vs %u",
jm->nr_copies, local_nr_copies);
@@ -543,6 +537,21 @@ static int cluster_sanity_check(struct join_message *jm)
return CJ_RES_FAIL;
}
+ if (jm->epoch > local_epoch) {
+ if (sys->status == SD_STATUS_WAIT_FOR_JOIN) {
+ /* The joining node will be a master */
+ sd_eprintf("transfer mastership (%d, %d)", jm->epoch,
+ local_epoch);
+ return CJ_RES_MASTER_TRANSFER;
+ } else {
+ /* Something goes wrong with sheepdog */
+ sd_printf(SDOG_ALERT, "joining node epoch too large: %"
+ PRIu32 " vs %" PRIu32, jm->epoch,
+ local_epoch);
+ return CJ_RES_FAIL;
+ }
+ }
+
return CJ_RES_SUCCESS;
}
@@ -559,14 +568,8 @@ static int cluster_wait_for_join_check(const struct sd_node *joined,
return CJ_RES_JOIN_LATER;
ret = cluster_sanity_check(jm);
- if (ret != CJ_RES_SUCCESS) {
- if (jm->epoch > sys->epoch) {
- sd_eprintf("transfer mastership (%d, %d)", jm->epoch,
- sys->epoch);
- return CJ_RES_MASTER_TRANSFER;
- }
+ if (ret != CJ_RES_SUCCESS)
return ret;
- }
nr_local_entries = epoch_log_read(jm->epoch, local_entries,
sizeof(local_entries));
--
1.7.9.5
More information about the sheepdog
mailing list