[sheepdog] [PATCH 2/2] sheep: check joining nodes with newer but invalid epoch

MORITA Kazutaka morita.kazutaka at gmail.com
Tue May 14 17:20:38 CEST 2013


From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>

Currently, cluster_wait_for_join_check() always returns
CJ_RES_MASTER_TRANSFER if the joining node has a newer epoch.
However, we have to take into account that the node has a wrong epoch
(e.g. the node comes from a different cluster).

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 sheep/group.c |   29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/sheep/group.c b/sheep/group.c
index a78dd15..920b4a1 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -525,12 +525,6 @@ static int cluster_sanity_check(struct join_message *jm)
 		return CJ_RES_FAIL;
 	}
 
-	if (jm->epoch > local_epoch) {
-		sd_eprintf("joining node epoch too large: %"
-			   PRIu32 " vs %" PRIu32, jm->epoch, local_epoch);
-		return CJ_RES_FAIL;
-	}
-
 	if (jm->nr_copies != local_nr_copies) {
 		sd_eprintf("joining node nr_copies doesn't match: %u vs %u",
 			   jm->nr_copies, local_nr_copies);
@@ -543,6 +537,21 @@ static int cluster_sanity_check(struct join_message *jm)
 		return CJ_RES_FAIL;
 	}
 
+	if (jm->epoch > local_epoch) {
+		if (sys->status == SD_STATUS_WAIT_FOR_JOIN) {
+			/* The joining node will be a master */
+			sd_eprintf("transfer mastership (%d, %d)", jm->epoch,
+				   local_epoch);
+			return CJ_RES_MASTER_TRANSFER;
+		} else {
+			/* Something goes wrong with sheepdog */
+			sd_printf(SDOG_ALERT, "joining node epoch too large: %"
+				   PRIu32 " vs %" PRIu32, jm->epoch,
+				   local_epoch);
+			return CJ_RES_FAIL;
+		}
+	}
+
 	return CJ_RES_SUCCESS;
 }
 
@@ -559,14 +568,8 @@ static int cluster_wait_for_join_check(const struct sd_node *joined,
 		return CJ_RES_JOIN_LATER;
 
 	ret = cluster_sanity_check(jm);
-	if (ret != CJ_RES_SUCCESS)  {
-		if (jm->epoch > sys->epoch) {
-			sd_eprintf("transfer mastership (%d, %d)", jm->epoch,
-				   sys->epoch);
-			return CJ_RES_MASTER_TRANSFER;
-		}
+	if (ret != CJ_RES_SUCCESS)
 		return ret;
-	}
 
 	nr_local_entries = epoch_log_read(jm->epoch, local_entries,
 					  sizeof(local_entries));
-- 
1.7.9.5




More information about the sheepdog mailing list