[Sheepdog] [PATCH] collie: reject invalid node joining to the sheepdog

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Fri May 7 13:43:10 CEST 2010


When joining nodes have a wrong epoch information, sheepdog
should reject the nodes and return the error result.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 collie/group.c           |  138 +++++++++++++++++++++++++++++++--------------
 include/sheepdog_proto.h |    4 +
 2 files changed, 99 insertions(+), 43 deletions(-)

diff --git a/collie/group.c b/collie/group.c
index 59ee86f..9e57656 100644
--- a/collie/group.c
+++ b/collie/group.c
@@ -60,6 +60,8 @@ struct join_message {
 	uint32_t cluster_status;
 	uint32_t epoch;
 	uint64_t ctime;
+	uint32_t result;
+	uint32_t pad;
 	struct {
 		uint32_t nodeid;
 		uint32_t pid;
@@ -352,53 +354,99 @@ static int is_master(void)
 
 static int get_cluster_status(struct sheepdog_node_list_entry *from,
 			      struct sheepdog_node_list_entry *entries,
-			      int nr_entries, uint64_t ctime, uint32_t epoch)
+			      int nr_entries, uint64_t ctime, uint32_t epoch,
+			      uint32_t *status)
 {
 	int i;
 	int nr_local_entries;
 	struct sheepdog_node_list_entry local_entries[SD_MAX_NODES];
 	struct node *node;
+	uint32_t local_epoch;
 
-	if (sys->status == SD_STATUS_INCONSISTENT_EPOCHS)
-		return SD_STATUS_INCONSISTENT_EPOCHS;
+	*status = sys->status;
 
-	if (epoch != get_latest_epoch())
-		return SD_STATUS_INCONSISTENT_EPOCHS;
-
-	if (ctime != get_cluster_ctime())
-		return SD_STATUS_INCONSISTENT_EPOCHS;
+	switch (sys->status) {
+	case SD_STATUS_OK:
+		if (ctime != get_cluster_ctime()) {
+			eprintf("joining node has invalid ctime, %ld\n", from->id);
+			return SD_RES_INVALID_CTIME;
+		}
 
-	nr_local_entries = epoch_log_read(epoch, (char *)local_entries,
-					  sizeof(local_entries));
-	nr_local_entries /= sizeof(local_entries[0]);
+		local_epoch = get_latest_epoch();
+		if (epoch > local_epoch) {
+			eprintf("sheepdog is running with older epoch, %d %d %ld\n",
+				epoch, local_epoch, from->id);
+			return SD_RES_OLD_NODE_VER;
+		}
+		break;
+	case SD_STATUS_WAIT_FOR_FORMAT:
+		if (nr_entries != 0) {
+			eprintf("joining node is not clean, %ld\n", from->id);
+			return SD_RES_NOT_FORMATTED;
+		}
+		break;
+	case SD_STATUS_WAIT_FOR_JOIN:
+		if (ctime != get_cluster_ctime()) {
+			eprintf("joining node has invalid ctime, %ld\n", from->id);
+			return SD_RES_INVALID_CTIME;
+		}
 
-	if (nr_entries != nr_local_entries)
-		return SD_STATUS_INCONSISTENT_EPOCHS;
+		local_epoch = get_latest_epoch();
+		if (epoch > local_epoch) {
+			eprintf("sheepdog is waiting with older epoch, %d %d %ld\n",
+				epoch, local_epoch, from->id);
+			return SD_RES_OLD_NODE_VER;
+		} else if (epoch < local_epoch) {
+			eprintf("sheepdog is waiting with newer epoch, %d %d %ld\n",
+				epoch, local_epoch, from->id);
+			return SD_RES_NEW_NODE_VER;
+		}
 
-	if (memcmp(entries, local_entries, sizeof(entries[0]) * nr_entries) != 0)
-		return SD_STATUS_INCONSISTENT_EPOCHS;
+		nr_local_entries = epoch_log_read(epoch, (char *)local_entries,
+						  sizeof(local_entries));
+		nr_local_entries /= sizeof(local_entries[0]);
 
-	nr_entries = 1;
-	list_for_each_entry(node, &sys->sd_node_list, list) {
-		nr_entries++;
-	}
+		if (nr_entries != nr_local_entries) {
+			eprintf("joining node has invalid epoch, %d %ld\n",
+				epoch, from->id);
+			return SD_RES_INVALID_EPOCH;
+		}
 
-	if (nr_entries != nr_local_entries)
-		return SD_STATUS_WAIT_FOR_JOIN;
+		if (memcmp(entries, local_entries, sizeof(entries[0]) * nr_entries) != 0) {
+			eprintf("joining node has invalid epoch, %ld\n", from->id);
+			return SD_RES_INVALID_EPOCH;
+		}
 
-	for (i = 0; i < nr_local_entries; i++) {
-		if (local_entries[i].id == from->id)
-			goto next;
+		nr_entries = 1;
 		list_for_each_entry(node, &sys->sd_node_list, list) {
-			if (local_entries[i].id == node->ent.id)
+			nr_entries++;
+		}
+
+		if (nr_entries != nr_local_entries)
+			return SD_RES_SUCCESS;
+
+		for (i = 0; i < nr_local_entries; i++) {
+			if (local_entries[i].id == from->id)
 				goto next;
+			list_for_each_entry(node, &sys->sd_node_list, list) {
+				if (local_entries[i].id == node->ent.id)
+					goto next;
+			}
+			return SD_RES_SUCCESS;
+		next:
+			;
 		}
-		return SD_STATUS_WAIT_FOR_JOIN;
-	next:
-		;
-	}
 
-	return SD_STATUS_OK;
+		*status = SD_STATUS_OK;
+		break;
+	case SD_STATUS_SHUTDOWN:
+		return SD_RES_SHUTDOWN;
+	case SD_STATUS_INCONSISTENT_EPOCHS:
+		return SD_RES_INCONSISTENT_EPOCHS;
+	default:
+		break;
+	}
+	return SD_RES_SUCCESS;
 }
 
 static void join(struct join_message *msg)
@@ -417,15 +465,12 @@ static void join(struct join_message *msg)
 	else
 		msg->epoch = 0;
 
-	if (sys->status == SD_STATUS_WAIT_FOR_JOIN) {
-		for (i = 0; i < msg->nr_nodes; i++)
-			entry[i] = msg->nodes[i].ent;
+	for (i = 0; i < msg->nr_nodes; i++)
+		entry[i] = msg->nodes[i].ent;
 
-		msg->cluster_status = get_cluster_status(&msg->header.from,
-							 entry, msg->nr_nodes,
-							 msg->ctime, msg->epoch);
-	} else
-		msg->cluster_status = sys->status;
+	msg->result = get_cluster_status(&msg->header.from, entry,
+					 msg->nr_nodes, msg->ctime,
+					 msg->epoch, &msg->cluster_status);
 
 	msg->nr_nodes = 0;
 	list_for_each_entry(node, &sys->sd_node_list, list) {
@@ -512,6 +557,15 @@ static void update_cluster_info(struct join_message *msg)
 	int ret, nr_nodes = msg->nr_nodes;
 	struct sheepdog_node_list_entry entry[SD_MAX_NODES];
 
+	if (msg->result != SD_RES_SUCCESS) {
+		if (is_myself(&msg->header.from)) {
+			eprintf("failed to join sheepdog, %d\n", msg->result);
+			sys->status = SD_STATUS_JOIN_FAILED;
+			return;
+		} else
+			return;
+	}
+
 	if (!sys->nr_sobjs)
 		sys->nr_sobjs = msg->nr_sobjs;
 
@@ -1059,7 +1113,7 @@ static int is_my_cpg_addr(struct cpg_address *addr)
 static void __sd_confchg(struct cpg_event *cevent)
 {
 	struct work_confchg *w = container_of(cevent, struct work_confchg, cev);
-	int ret, status;
+	int ret;
 
 	if (w->member_list_entries ==
 	    w->joined_list_entries - w->left_list_entries &&
@@ -1102,10 +1156,8 @@ static void __sd_confchg(struct cpg_event *cevent)
 		nr_entries = ARRAY_SIZE(entries);
 		ret = read_epoch(&epoch, &ctime, entries, &nr_entries);
 		if (ret == SD_RES_SUCCESS) {
-			status = get_cluster_status(&msg.header.from,
-						    entries, nr_entries,
-						    ctime, epoch);
-			msg.cluster_status = status;
+			get_cluster_status(&msg.header.from, entries, nr_entries,
+					   ctime, epoch, &msg.cluster_status);
 		} else
 			msg.cluster_status = SD_STATUS_WAIT_FOR_FORMAT;
 
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 2fa217a..521432a 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -61,6 +61,7 @@
 #define SD_STATUS_WAIT_FOR_JOIN     0x02
 #define SD_STATUS_SHUTDOWN          0x03
 #define SD_STATUS_INCONSISTENT_EPOCHS   0x04
+#define SD_STATUS_JOIN_FAILED   0x05
 
 #define SD_RES_SUCCESS       0x00 /* Success */
 #define SD_RES_UNKNOWN       0x01 /* Unknown error */
@@ -92,6 +93,9 @@
 #define SD_RES_FULL_VDI      0x1B /* we already have the maximum vdis */
 #define SD_RES_WAIT_FOR_FORMAT      0x1C /* Sheepdog is waiting for a format operation */
 #define SD_RES_WAIT_FOR_JOIN        0x1D /* Sheepdog is waiting for other nodes joining */
+#define SD_RES_NOT_FORMATTED 0x1E /* Sheepdog is not formatted yet */
+#define SD_RES_INVALID_CTIME 0x1F /* Creation time of sheepdog is different */
+#define SD_RES_INVALID_EPOCH 0x20 /* Invalid epoch */
 
 struct sd_req {
 	uint8_t		proto_ver;
-- 
1.5.6.5




More information about the sheepdog mailing list