[Sheepdog] [PATCH 6/6] split SD_STATUS_STARTUP

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Tue May 4 21:20:10 CEST 2010


There are two independent state in SD_STATUS_STARTUP:

- collie is waiting for a format opereation
- collie is waiting for other nodes joining to the cluster

so we should split it.  This patch introduces
SD_STATUS_WAIT_FOR_FORMAT and SD_STATUS_WAIT_FOR_JOIN.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 collie/group.c           |   17 ++++++++++-------
 collie/net.c             |    9 ++++++---
 include/sheepdog_proto.h |    9 ++++++---
 shepherd/shepherd.c      |   15 +++++++++++----
 4 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/collie/group.c b/collie/group.c
index 56caa33..452fe83 100644
--- a/collie/group.c
+++ b/collie/group.c
@@ -406,7 +406,7 @@ static int get_cluster_status(struct sheepdog_node_list_entry *node)
 
 	if (ret != SD_RES_SUCCESS) {
 		eprintf("failed to read epoch, %x\n", ret);
-		return SD_STATUS_STARTUP;
+		return SD_STATUS_WAIT_FOR_FORMAT;
 	}
 
 	if (epoch != get_latest_epoch())
@@ -427,7 +427,7 @@ static int get_cluster_status(struct sheepdog_node_list_entry *node)
 
 	nr_entries = get_ordered_sd_node_list(entries);
 	if (nr_entries + 1 != nr_local_entries)
-		return SD_STATUS_STARTUP;
+		return SD_STATUS_WAIT_FOR_JOIN;
 
 	for (i = 0; i < nr_local_entries; i++) {
 		if (local_entries[i].id == node->id)
@@ -436,7 +436,7 @@ static int get_cluster_status(struct sheepdog_node_list_entry *node)
 			if (local_entries[i].id == entries[j].id)
 				goto next;
 		}
-		return SD_STATUS_STARTUP;
+		return SD_STATUS_WAIT_FOR_JOIN;
 	next:
 		;
 	}
@@ -465,7 +465,7 @@ static void join(struct join_message *msg)
 		msg->nr_nodes++;
 	}
 
-	if (sys->status == SD_STATUS_STARTUP)
+	if (sys->status == SD_STATUS_WAIT_FOR_JOIN)
 		msg->cluster_status = get_cluster_status(&msg->header.from);
 	else
 		msg->cluster_status = sys->status;
@@ -568,7 +568,7 @@ static void update_cluster_info(struct join_message *msg)
 
 	sys->join_finished = 1;
 
-	if (sys->status == SD_STATUS_STARTUP && msg->cluster_status == SD_STATUS_OK) {
+	if (sys->status == SD_STATUS_WAIT_FOR_JOIN && msg->cluster_status == SD_STATUS_OK) {
 		if (msg->epoch > 0) {
 			sys->epoch = msg->epoch;
 			sys->status = SD_STATUS_OK;
@@ -612,7 +612,7 @@ out:
 
 	print_node_list(&sys->sd_node_list);
 
-	if (sys->status == SD_STATUS_STARTUP && msg->cluster_status == SD_STATUS_OK) {
+	if (sys->status == SD_STATUS_WAIT_FOR_JOIN && msg->cluster_status == SD_STATUS_OK) {
 		if (msg->epoch == 0)
 			sys->epoch = get_latest_epoch();
 	}
@@ -1496,7 +1496,10 @@ join_retry:
 		sys->this_node.id = hval;
 	}
 
-	sys->status = SD_STATUS_STARTUP;
+	if (get_latest_epoch() == 0)
+		sys->status = SD_STATUS_WAIT_FOR_FORMAT;
+	else
+		sys->status = SD_STATUS_WAIT_FOR_JOIN;
 	INIT_LIST_HEAD(&sys->sd_node_list);
 	INIT_LIST_HEAD(&sys->cpg_node_list);
 	INIT_LIST_HEAD(&sys->vm_list);
diff --git a/collie/net.c b/collie/net.c
index 8877535..7f60ffd 100644
--- a/collie/net.c
+++ b/collie/net.c
@@ -51,7 +51,8 @@ static void queue_request(struct request *req)
 		return;
 	}
 
-	if (sys->status == SD_STATUS_STARTUP ||
+	if (sys->status == SD_STATUS_WAIT_FOR_FORMAT ||
+	    sys->status == SD_STATUS_WAIT_FOR_JOIN ||
 	    sys->status == SD_STATUS_INCONSISTENT_EPOCHS) {
 		/* TODO: cleanup */
 		switch (hdr->opcode) {
@@ -62,8 +63,10 @@ static void queue_request(struct request *req)
 		case SD_OP_READ_VDIS:
 			break;
 		default:
-			if (sys->status == SD_STATUS_STARTUP)
-				rsp->result = SD_RES_STARTUP;
+			if (sys->status == SD_STATUS_WAIT_FOR_FORMAT)
+				rsp->result = SD_RES_WAIT_FOR_FORMAT;
+			else if (sys->status == SD_STATUS_WAIT_FOR_JOIN)
+				rsp->result = SD_RES_WAIT_FOR_JOIN;
 			else
 				rsp->result = SD_RES_INCONSISTENT_EPOCHS;
 			req->done(req);
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index bb14929..02c45a7 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -58,9 +58,10 @@
 #define SD_FLAG_CMD_RECOVERY 0x08
 
 #define SD_STATUS_OK            0x00
-#define SD_STATUS_STARTUP       0x01
-#define SD_STATUS_SHUTDOWN      0x02
-#define SD_STATUS_INCONSISTENT_EPOCHS   0x03
+#define SD_STATUS_WAIT_FOR_FORMAT   0x01
+#define SD_STATUS_WAIT_FOR_JOIN     0x02
+#define SD_STATUS_SHUTDOWN          0x03
+#define SD_STATUS_INCONSISTENT_EPOCHS   0x04
 
 #define SD_RES_SUCCESS       0x00 /* Success */
 #define SD_RES_UNKNOWN       0x01 /* Unknown error */
@@ -90,6 +91,8 @@
 #define SD_RES_NO_MEM        0x19 /* Cannot allocate memory */
 #define SD_RES_INCONSISTENT_EPOCHS  0x1A /* There is inconsistency between epochs */
 #define SD_RES_FULL_VDI      0x1B /* we already have the maximum vdis */
+#define SD_RES_WAIT_FOR_FORMAT      0x1C /* Sheepdog is waiting for a format operation */
+#define SD_RES_WAIT_FOR_JOIN        0x1D /* Sheepdog is waiting for other nodes joining */
 
 struct sd_req {
 	uint8_t		proto_ver;
diff --git a/shepherd/shepherd.c b/shepherd/shepherd.c
index 911aa00..85a77a6 100644
--- a/shepherd/shepherd.c
+++ b/shepherd/shepherd.c
@@ -168,8 +168,12 @@ static int update_node_list(int max_nodes, int epoch)
 		fprintf(stderr, "cannot read directory object\n");
 		ret = -1;
 		goto out;
-	case SD_RES_STARTUP:
-		fprintf(stderr, "sheepdog is not ready\n");
+	case SD_RES_WAIT_FOR_FORMAT:
+		fprintf(stderr, "sheepdog is not formatted yet\n");
+		ret = -1;
+		goto out;
+	case SD_RES_WAIT_FOR_JOIN:
+		fprintf(stderr, "there is not enough nodes to start sheepdog\n");
 		ret = -1;
 		goto out;
 	case SD_RES_SHUTDOWN:
@@ -1126,8 +1130,11 @@ rerun:
 		case SD_STATUS_OK:
 			printf("running\n");
 			break;
-		case SD_STATUS_STARTUP:
-			printf("startup\n");
+		case SD_STATUS_WAIT_FOR_FORMAT:
+			printf("sheepdog is not formatted yet\n");
+			break;
+		case SD_STATUS_WAIT_FOR_JOIN:
+			printf("sheepdog is waiting for other nodes joining\n");
 			break;
 		case SD_STATUS_INCONSISTENT_EPOCHS:
 			printf("there is inconsistency between epochs\n");
-- 
1.5.6.5




More information about the sheepdog mailing list