[sheepdog] [PATCH] change nohalt flag into a switch to control which mode to run

Dietmar Maurer dietmar at proxmox.com
Thu Jul 19 09:50:44 CEST 2012


This removes the --nohalt switch, and adds a mode switch

	--mode safe|quorum|unsafe

safe mode: halt cluster when nr_nodes < nr_copies (default)
quorum mode: halt cluster when nr_nodes < nr_copies / 2 + 1
unsafe mode: never halt the cluster.

Signed-off-by: Dietmar Maurer <dietmar at proxmox.com>
---
 collie/cluster.c         |   30 ++++++++++++++++++++----------
 collie/collie.c          |    3 +--
 include/internal_proto.h |    1 +
 sheep/group.c            |   17 ++++++++++-------
 sheep/sheep_priv.h       |    5 +++++
 5 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/collie/cluster.c b/collie/cluster.c
index a0c099c..2913f6d 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -21,18 +21,13 @@ struct cluster_cmd_data {
 	int list;
 	int copies;
 	int nohalt;
+	int quorum;
 	int force;
 	char name[STORE_LEN];
 } cluster_cmd_data;
 
 #define DEFAULT_STORE	"farm"
 
-static void set_nohalt(uint16_t *p)
-{
-	if (p)
-		*p |= SD_FLAG_NOHALT;
-}
-
 static int list_store(void)
 {
 	int fd, ret;
@@ -89,7 +84,10 @@ static int cluster_format(int argc, char **argv)
 	sd_init_req((struct sd_req *)&hdr, SD_OP_MAKE_FS);
 	hdr.copies = cluster_cmd_data.copies;
 	if (cluster_cmd_data.nohalt)
-		set_nohalt(&hdr.flags);
+		 hdr.flags |= SD_FLAG_NOHALT;
+	if (cluster_cmd_data.quorum)
+		 hdr.flags |= SD_FLAG_QUORUM;
+
 	hdr.epoch = sd_epoch;
 	hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000;
 
@@ -461,7 +459,7 @@ static int cluster_recover(int argc, char **argv)
 static struct subcommand cluster_cmd[] = {
 	{"info", NULL, "aprh", "show cluster information",
 	 SUBCMD_FLAG_NEED_NODELIST, cluster_info},
-	{"format", NULL, "bcHaph", "create a Sheepdog store",
+	{"format", NULL, "bcmaph", "create a Sheepdog store",
 	 0, cluster_format},
 	{"shutdown", NULL, "aph", "stop Sheepdog",
 	 SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown},
@@ -495,8 +493,20 @@ static int cluster_parser(int ch, char *opt)
 		}
 		cluster_cmd_data.copies = copies;
 		break;
-	case 'H':
-		cluster_cmd_data.nohalt = 1;
+	case 'm':
+		if (strcmp(opt, "safe") == 0) {
+			cluster_cmd_data.nohalt = 0;
+			cluster_cmd_data.quorum = 0;
+		} else if (strcmp(opt, "quorum") == 0) {
+			cluster_cmd_data.nohalt = 0;
+			cluster_cmd_data.quorum = 1;
+		} else if (strcmp(opt, "unsafe") == 0) {
+			cluster_cmd_data.nohalt = 1;
+			cluster_cmd_data.quorum = 0;
+		} else {
+			fprintf(stderr, "Unknown mode '%s'\n", opt);
+			exit(EXIT_FAILURE);
+		}
 		break;
 	case 'f':
 		cluster_cmd_data.force = 1;
diff --git a/collie/collie.c b/collie/collie.c
index ccf87c5..fda9a59 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -42,8 +42,7 @@ static const struct sd_option collie_options[] = {
 	/* cluster options */
 	{'b', "store", 1, "specify backend store"},
 	{'c', "copies", 1, "specify the data redundancy (number of copies)"},
-	{'H', "nohalt", 0, "serve IO requests even if there are too few\n\
-                          nodes for the configured redundancy"},
+	{'m', "mode", 1, "mode (safe, quorum, unsafe)"},
 	{'f', "force", 0, "do not prompt for confirmation"},
 	{'R', "restore", 1, "restore the cluster"},
 	{'l', "list", 0, "list the user epoch information"},
diff --git a/include/internal_proto.h b/include/internal_proto.h
index a523093..584f41e 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -76,6 +76,7 @@
 #define SD_RES_NETWORK_ERROR 0x86 /* Network error between sheep */
 
 #define SD_FLAG_NOHALT       0x0004 /* Serve the IO rquest even lack of nodes */
+#define SD_FLAG_QUORUM       0x0008 /* Serve the IO rquest as long we are quorate */
 
 #define SD_STATUS_OK                0x00000001
 #define SD_STATUS_WAIT_FOR_FORMAT   0x00000002
diff --git a/sheep/group.c b/sheep/group.c
index 059656e..1961ae9 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -88,8 +88,13 @@ bool have_enough_zones(void)
 	dprintf("flags %d, nr_zones %d, copies %d\n",
 		sys->flags, current_vnode_info->nr_zones, sys->nr_copies);
 
-	if (current_vnode_info->nr_zones >= sys->nr_copies)
-		return true;
+	if (sys_flag_quorum()) {
+		if (current_vnode_info->nr_zones > (sys->nr_copies/2))
+			return true;
+	} else {
+		if (current_vnode_info->nr_zones >= sys->nr_copies)
+			return true;
+	}
 	return false;
 }
 
@@ -1076,11 +1081,9 @@ void sd_leave_handler(struct sd_node *left, struct sd_node *members,
 	}
 	put_vnode_info(old_vnode_info);
 
-	if (sys_can_halt()) {
-		if (current_vnode_info->nr_zones < sys->nr_copies)
-			sys_stat_set(SD_STATUS_HALT);
-	}
-
+	if (!have_enough_zones())
+		sys_stat_set(SD_STATUS_HALT);
+		
 	sockfd_cache_del(&left->nid);
 }
 
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 7d5700c..648f485 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -323,6 +323,11 @@ static inline int sys_flag_nohalt(void)
 	return sys->flags & SD_FLAG_NOHALT;
 }
 
+static inline int sys_flag_quorum(void)
+{
+	return sys->flags & SD_FLAG_QUORUM;
+}
+
 static inline int sys_stat_ok(void)
 {
 	return sys->status & SD_STATUS_OK;
-- 
1.7.2.5





More information about the sheepdog mailing list