[Sheepdog] [PATCH v2 5/5] sheep: use SD_STATUS_HALT to stop serving IO

Liu Yuan namei.unix at gmail.com
Sun Oct 16 12:35:15 CEST 2011


From: Liu Yuan <tailai.ly at taobao.com>

We use SD_STATUS_HALT to identify the cluster state when it should not serve
IO requests.

This is optional, users might risk themselves to turn off this HALT status. As
the below command:

$ collie cluster format -H
or
$ collie cluster format --nohalt

By default, this is enabled.

[Test Case]

[1]
steps:

for i in 0 1 2 3; do ./sheep/sheep -d /store/$i -z $i -p 700$i; sleep 1; done
./collie/collie cluster format --copies=3;
for i in 0 1; do pkill -f "sheep -d /store/$i"; sleep 1; done
for i in 2 3; do ./collie/collie cluster info -p 700$i; done
for i in 0 1; do ./sheep/sheep -d /store/$i -z $i -p 700$i; sleep 1; done
for i in 0 1 2 3; do ./collie/collie cluster info -p 700$i; done

output:

Cluster status: The sheepdog is stopped doing IO, short of living nodes

Creation time        Epoch Nodes
2011-10-11 16:26:02      3 [192.168.0.1:7002, 192.168.0.1:7003]
2011-10-11 16:26:02      2 [192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7003]
2011-10-11 16:26:02      1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7003]
Cluster status: The sheepdog is stopped doing IO, short of living nodes

Creation time        Epoch Nodes
2011-10-11 16:26:02      3 [192.168.0.1:7002, 192.168.0.1:7003]
2011-10-11 16:26:02      2 [192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7003]
2011-10-11 16:26:02      1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7003]
Cluster status: running

Creation time        Epoch Nodes
2011-10-11 16:26:02      5 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7003]
2011-10-11 16:26:02      4 [192.168.0.1:7000, 192.168.0.1:7002, 192.168.0.1:7003]
2011-10-11 16:26:02      3 [192.168.0.1:7002, 192.168.0.1:7003]
2011-10-11 16:26:02      2 [192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7003]
2011-10-11 16:26:02      1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7003]

...

[2]
steps:
for i in 0 1; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
collie/collie cluster format
for i in 0 1; do collie/collie cluster info -p 700$i;done
for i in 0; do pkill -f "sheep/sheep -d /store/$i"; sleep 1; done
for i in 2; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
for i in 1 2; do pkill -f "sheep/sheep -d /store/$i"; sleep 1; done
for i in 0 1 2; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
for i in 0 1 2; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
for i in 0 1 2; do collie/collie cluster info -p 700$i;done

output:
Cluster status: The sheepdog is stopped doing IO, short of living nodes

Creation time        Epoch Nodes
2011-10-16 18:11:07      1 [192.168.0.1:7000, 192.168.0.1:7001]
Cluster status: The sheepdog is stopped doing IO, short of living nodes

Creation time        Epoch Nodes
2011-10-16 18:11:07      1 [192.168.0.1:7000, 192.168.0.1:7001]
Cluster status: running

Creation time        Epoch Nodes
2011-10-16 18:11:07      6 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002]
2011-10-16 18:11:07      5 [192.168.0.1:7000, 192.168.0.1:7002]
2011-10-16 18:11:07      4 [192.168.0.1:7002]
2011-10-16 18:11:07      3 [192.168.0.1:7001, 192.168.0.1:7002]
2011-10-16 18:11:07      2 [192.168.0.1:7001]
2011-10-16 18:11:07      1 [192.168.0.1:7000, 192.168.0.1:7001]

...

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 collie/cluster.c   |   14 +++++++++++++-
 collie/collie.c    |    1 +
 sheep/group.c      |   30 +++++++++++++++++++++++++++++-
 sheep/sheep_priv.h |    2 ++
 4 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/collie/cluster.c b/collie/cluster.c
index 0d5dfbe..3b16308 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -16,8 +16,15 @@
 
 struct cluster_cmd_data {
 	int copies;
+	int nohalt;
 } cluster_cmd_data;
 
+static void set_nohalt(uint32_t *p)
+{
+	if (p)
+		*p |= 1 << 31;
+}
+
 static int cluster_format(int argc, char **argv)
 {
 	int fd, ret;
@@ -36,6 +43,8 @@ static int cluster_format(int argc, char **argv)
 
 	hdr.opcode = SD_OP_MAKE_FS;
 	hdr.copies = cluster_cmd_data.copies;
+	if (cluster_cmd_data.nohalt)
+		set_nohalt(&hdr.copies);
 	hdr.epoch = node_list_version;
 	hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000;
 
@@ -163,7 +172,7 @@ static int cluster_shutdown(int argc, char **argv)
 static struct subcommand cluster_cmd[] = {
 	{"info", NULL, "aprh", "show cluster information",
 	 0, cluster_info},
-	{"format", NULL, "caph", "create a Sheepdog storage",
+	{"format", NULL, "cHaph", "create a Sheepdog storage",
 	 0, cluster_format},
 	{"shutdown", NULL, "aph", "stop Sheepdog",
 	 SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown},
@@ -176,6 +185,9 @@ static int cluster_parser(int ch, char *opt)
 	case 'c':
 		cluster_cmd_data.copies = atoi(opt);
 		break;
+	case 'H':
+		cluster_cmd_data.nohalt = 1;
+		break;
 	}
 
 	return 0;
diff --git a/collie/collie.c b/collie/collie.c
index e064a0a..df5dca4 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -41,6 +41,7 @@ static const struct sd_option collie_options[] = {
 
 	/* cluster options */
 	{'c', "copies", 1, "set the number of data redundancy"},
+	{'H', "nohalt", 0, "serve the IO rquests even lack of enough redundant nodes"},
 
 	{ 0, NULL, 0, NULL },
 };
diff --git a/sheep/group.c b/sheep/group.c
index 5d06745..103a647 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -983,7 +983,16 @@ static void vdi_op_done(struct vdi_op_message *msg)
 
 		set_global_nr_copies(sys->nr_sobjs);
 
-		sys->status = SD_STATUS_OK;
+		if (sys_nohalt())
+			sys->status = SD_STATUS_OK;
+		else {
+			int nr_zones = get_zones_nr_from(&sys->sd_node_list);
+
+			if (nr_zones >= sys->nr_sobjs)
+				sys->status = SD_STATUS_OK;
+			else
+				sys->status = SD_STATUS_HALT;
+		}
 		break;
 	case SD_OP_SHUTDOWN:
 		sys->status = SD_STATUS_SHUTDOWN;
@@ -1210,6 +1219,13 @@ static void __sd_notify_done(struct cpg_event *cevent)
 		}
 		start_recovery(sys->epoch);
 	}
+
+	if (sys->status == SD_STATUS_HALT) {
+		int nr_zones = get_zones_nr_from(&sys->sd_node_list);
+
+		if (nr_zones >= sys->nr_sobjs)
+			sys->status = SD_STATUS_OK;
+	}
 }
 
 static void sd_notify_handler(struct sheepid *sender, void *msg, size_t msg_len)
@@ -1438,6 +1454,11 @@ static void __sd_join_done(struct cpg_event *cevent)
 		send_join_request(&w->joined);
 }
 
+int sys_nohalt()
+{
+	return sys->nr_sobjs & (1 << 31);
+}
+
 static void __sd_leave_done(struct cpg_event *cevent)
 {
 	struct work_leave *w = container_of(cevent, struct work_leave, cev);
@@ -1450,6 +1471,13 @@ static void __sd_leave_done(struct cpg_event *cevent)
 	if (node_left &&
 	    (sys->status == SD_STATUS_OK || sys->status == SD_STATUS_HALT))
 		start_recovery(sys->epoch);
+
+	if (sys->status == SD_STATUS_OK && !sys_nohalt()) {
+		int nr_zones = get_zones_nr_from(&sys->sd_node_list);
+
+		if (nr_zones < sys->nr_sobjs)
+			sys->status = SD_STATUS_HALT;
+	}
 }
 
 static void cpg_event_free(struct cpg_event *cevent)
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 355cd93..762a673 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -212,6 +212,8 @@ int update_epoch_store(uint32_t epoch);
 int set_global_nr_copies(uint32_t copies);
 int get_global_nr_copies(uint32_t *copies);
 
+int sys_nohalt(void);
+
 #define NR_GW_WORKER_THREAD 4
 #define NR_IO_WORKER_THREAD 4
 
-- 
1.7.6.1




More information about the sheepdog mailing list