[sheepdog] [PATCH v2 7/9] sheep: merge wait_for_format and wait_for_join

Fri Jul 12 22:25:57 CEST 2013

From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>

We have a simple rule for cluster start-up now, so don't have to
distinguish WAIT_FOR_JOIN and WAIT_FOR_FORMAT strictly.

With this patch, a sheep with an old data can join to the cluster
whose status is WAIT_FOR_FORMAT.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 include/internal_proto.h |    3 +--
 sheep/config.c           |    9 +++++++
 sheep/group.c            |   60 +++++++++++++---------------------------------
 sheep/ops.c              |   15 ++++++++----
 sheep/request.c          |   13 ++++------
 sheep/sheep_priv.h       |    1 +
 6 files changed, 43 insertions(+), 58 deletions(-)

diff --git a/include/internal_proto.h b/include/internal_proto.h
index d1fe0d2..617556e 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -106,8 +106,7 @@
 #define SD_FLAG_QUORUM       0x0008 /* Serve the IO rquest as long we are quorate */
 
 #define SD_STATUS_OK                0x00000001
-#define SD_STATUS_WAIT_FOR_FORMAT   0x00000002
-#define SD_STATUS_WAIT_FOR_JOIN     0x00000004
+#define SD_STATUS_WAIT              0x00000004
 #define SD_STATUS_SHUTDOWN          0x00000008
 #define SD_STATUS_HALT              0x00000020
 #define SD_STATUS_KILLED            0x00000040
diff --git a/sheep/config.c b/sheep/config.c
index 384e711..e6fc35d 100644
--- a/sheep/config.c
+++ b/sheep/config.c
@@ -162,6 +162,15 @@ int get_node_space(uint64_t *space)
 	return SD_RES_SUCCESS;
 }
 
+bool is_cluster_formatted(void)
+{
+	struct cluster_info cinfo;
+
+	get_cluster_config(&cinfo);
+
+	return cinfo.ctime != 0;
+}
+
 static inline __attribute__((used)) void __sd_config_format_build_bug_ons(void)
 {
 	/* never called, only for checking BUILD_BUG_ON()s */
diff --git a/sheep/group.c b/sheep/group.c
index bec0b74..d86edc5 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -424,9 +424,9 @@ static bool enough_nodes_gathered(struct join_message *jm,
 	return true;
 }
 
-static int cluster_wait_for_join_check(const struct sd_node *joining,
-				       const struct sd_node *nodes,
-				       size_t nr_nodes, struct join_message *jm)
+static int cluster_wait_check(const struct sd_node *joining,
+			      const struct sd_node *nodes, size_t nr_nodes,
+			      struct join_message *jm)
 {
 	int ret;
 
@@ -595,6 +595,9 @@ static void setup_backend_store(const struct join_message *jm)
 {
 	int ret;
 
+	if (jm->cinfo.store[0] == '\0')
+		return;
+
 	if (!sd_store) {
 		sd_store = find_store_driver((char *)jm->cinfo.store);
 		if (!sd_store)
@@ -623,12 +626,6 @@ static void finish_join(const struct join_message *msg,
 			const struct sd_node *nodes, size_t nr_nodes)
 {
 	sys->join_finished = true;
-	sys->cinfo.epoch = msg->cinfo.epoch;
-
-	if (msg->cinfo.store[0]) {
-		if (!sys->gateway_only)
-			setup_backend_store(msg);
-	}
 
 	sockfd_cache_add_group(nodes, nr_nodes);
 }
@@ -700,6 +697,9 @@ static void update_cluster_info(const struct join_message *msg,
 	sd_dprintf("status = %d, epoch = %d, finished: %d",
 		   msg->cluster_status, msg->cinfo.epoch, sys->join_finished);
 
+	if (!sys->gateway_only)
+		setup_backend_store(msg);
+
 	if (!sys->join_finished)
 		finish_join(msg, joined, nodes, nr_nodes);
 
@@ -712,19 +712,10 @@ static void update_cluster_info(const struct join_message *msg,
 	switch (msg->cluster_status) {
 	case SD_STATUS_OK:
 	case SD_STATUS_HALT:
-		switch (sys->status) {
-		case SD_STATUS_WAIT_FOR_FORMAT:
-			sys->cinfo.nr_copies = msg->cinfo.nr_copies;
-			sys->cinfo.flags = msg->cinfo.flags;
-
-			set_cluster_config(&sys->cinfo);
-			/*FALLTHROUGH*/
-		case SD_STATUS_WAIT_FOR_JOIN:
-			sys->cinfo.disable_recovery =
-				msg->cinfo.disable_recovery;
-			break;
-		default:
-			break;
+		if (sys->status == SD_STATUS_WAIT) {
+			if (!is_cluster_formatted())
+				/* initialize config file */
+				set_cluster_config(&sys->cinfo);
 		}
 
 		sys->status = msg->cluster_status;
@@ -844,12 +835,6 @@ enum cluster_join_result sd_check_join_cb(const struct sd_node *joining,
 		if (!epoch)
 			return CJ_RES_SUCCESS;
 
-		if (sys->status != SD_STATUS_WAIT_FOR_JOIN) {
-			sd_eprintf("unexpected cluster status 0x%x",
-				   sys->status);
-			return CJ_RES_FAIL;
-		}
-
 		nr_entries = epoch_log_read(epoch, entries, sizeof(entries));
 		if (nr_entries == -1)
 			return CJ_RES_FAIL;
@@ -868,16 +853,8 @@ enum cluster_join_result sd_check_join_cb(const struct sd_node *joining,
 	case SD_STATUS_SHUTDOWN:
 		ret = CJ_RES_FAIL;
 		break;
-	case SD_STATUS_WAIT_FOR_FORMAT:
-		if (jm->cinfo.nr_nodes != 0) {
-			ret = CJ_RES_FAIL;
-			break;
-		}
-
-		ret = CJ_RES_SUCCESS;
-		break;
-	case SD_STATUS_WAIT_FOR_JOIN:
-		ret = cluster_wait_for_join_check(joining, nodes, nr_nodes, jm);
+	case SD_STATUS_WAIT:
+		ret = cluster_wait_check(joining, nodes, nr_nodes, jm);
 		break;
 	case SD_STATUS_OK:
 	case SD_STATUS_HALT:
@@ -982,7 +959,7 @@ static void requeue_cluster_request(void)
 
 int sd_reconnect_handler(void)
 {
-	sys->status = SD_STATUS_WAIT_FOR_JOIN;
+	sys->status = SD_STATUS_WAIT;
 	sys->join_finished = false;
 	if (sys->cdrv->init(sys->cdrv_option) != 0)
 		return -1;
@@ -1138,16 +1115,13 @@ int create_cluster(int port, int64_t zone, int nr_vnodes,
 
 	sys->cinfo.epoch = get_latest_epoch();
 	if (sys->cinfo.epoch) {
-		sys->status = SD_STATUS_WAIT_FOR_JOIN;
-
 		sys->cinfo.nr_nodes = epoch_log_read(sys->cinfo.epoch,
 						     sys->cinfo.nodes,
 						     sizeof(sys->cinfo.nodes));
 		if (sys->cinfo.nr_nodes == -1)
 			return -1;
-	} else {
-		sys->status = SD_STATUS_WAIT_FOR_FORMAT;
 	}
+	sys->status = SD_STATUS_WAIT;
 
 	main_thread_set(pending_block_list,
 			  xzalloc(sizeof(struct list_head)));
diff --git a/sheep/ops.c b/sheep/ops.c
index bded4c9..5d7686c 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -441,10 +441,11 @@ out:
 	switch (sys->status) {
 	case SD_STATUS_OK:
 		return SD_RES_SUCCESS;
-	case SD_STATUS_WAIT_FOR_FORMAT:
-		return SD_RES_WAIT_FOR_FORMAT;
-	case SD_STATUS_WAIT_FOR_JOIN:
-		return SD_RES_WAIT_FOR_JOIN;
+	case SD_STATUS_WAIT:
+		if (sys->cinfo.ctime == 0)
+			return SD_RES_WAIT_FOR_FORMAT;
+		else
+			return SD_RES_WAIT_FOR_JOIN;
 	case SD_STATUS_SHUTDOWN:
 		return SD_RES_SHUTDOWN;
 	case SD_STATUS_HALT:
@@ -492,7 +493,7 @@ static int cluster_force_recover_work(struct request *req)
 	 * 2) some nodes are physically down (same epoch condition).
 	 * In both case, the nodes(s) stat is WAIT_FOR_JOIN.
 	 */
-	if (sys->status != SD_STATUS_WAIT_FOR_JOIN || req->vinfo == NULL)
+	if (sys->status != SD_STATUS_WAIT || req->vinfo == NULL)
 		return SD_RES_FORCE_RECOVER;
 
 	old_vnode_info = get_vnode_info_epoch(epoch, req->vinfo);
@@ -539,6 +540,10 @@ static int cluster_force_recover_main(const struct sd_req *req,
 		goto err;
 	}
 
+	if (!is_cluster_formatted())
+		/* initialize config file */
+		set_cluster_config(&sys->cinfo);
+
 	if (have_enough_zones())
 		sys->status = SD_STATUS_OK;
 	else
diff --git a/sheep/request.c b/sheep/request.c
index 3b43c76..ed4b3aa 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -353,15 +353,12 @@ static void queue_request(struct request *req)
 	case SD_STATUS_SHUTDOWN:
 		rsp->result = SD_RES_SHUTDOWN;
 		goto done;
-	case SD_STATUS_WAIT_FOR_FORMAT:
+	case SD_STATUS_WAIT:
 		if (!is_force_op(req->op)) {
-			rsp->result = SD_RES_WAIT_FOR_FORMAT;
-			goto done;
-		}
-		break;
-	case SD_STATUS_WAIT_FOR_JOIN:
-		if (!is_force_op(req->op)) {
-			rsp->result = SD_RES_WAIT_FOR_JOIN;
+			if (sys->cinfo.ctime == 0)
+				rsp->result = SD_RES_WAIT_FOR_FORMAT;
+			else
+				rsp->result = SD_RES_WAIT_FOR_JOIN;
 			goto done;
 		}
 		break;
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index f9d0e57..4cd30ee 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -303,6 +303,7 @@ int set_cluster_config(const struct cluster_info *cinfo);
 int get_cluster_config(struct cluster_info *cinfo);
 int set_node_space(uint64_t space);
 int get_node_space(uint64_t *space);
+bool is_cluster_formatted(void);
 
 int store_file_write(void *buffer, size_t len);
 void *store_file_read(void);
-- 
1.7.9.5