[sheepdog] [PATCH] change nohalt flag into a switch to control which mode to run
Dietmar Maurer
dietmar at proxmox.com
Thu Jul 19 09:50:44 CEST 2012
This removes the --nohalt switch, and adds a mode switch
--mode safe|quorum|unsafe
safe mode: halt cluster when nr_nodes < nr_copies (default)
quorum mode: halt cluster when nr_nodes < nr_copies / 2 + 1
unsafe mode: never halt the cluster.
Signed-off-by: Dietmar Maurer <dietmar at proxmox.com>
---
collie/cluster.c | 30 ++++++++++++++++++++----------
collie/collie.c | 3 +--
include/internal_proto.h | 1 +
sheep/group.c | 17 ++++++++++-------
sheep/sheep_priv.h | 5 +++++
5 files changed, 37 insertions(+), 19 deletions(-)
diff --git a/collie/cluster.c b/collie/cluster.c
index a0c099c..2913f6d 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -21,18 +21,13 @@ struct cluster_cmd_data {
int list;
int copies;
int nohalt;
+ int quorum;
int force;
char name[STORE_LEN];
} cluster_cmd_data;
#define DEFAULT_STORE "farm"
-static void set_nohalt(uint16_t *p)
-{
- if (p)
- *p |= SD_FLAG_NOHALT;
-}
-
static int list_store(void)
{
int fd, ret;
@@ -89,7 +84,10 @@ static int cluster_format(int argc, char **argv)
sd_init_req((struct sd_req *)&hdr, SD_OP_MAKE_FS);
hdr.copies = cluster_cmd_data.copies;
if (cluster_cmd_data.nohalt)
- set_nohalt(&hdr.flags);
+ hdr.flags |= SD_FLAG_NOHALT;
+ if (cluster_cmd_data.quorum)
+ hdr.flags |= SD_FLAG_QUORUM;
+
hdr.epoch = sd_epoch;
hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000;
@@ -461,7 +459,7 @@ static int cluster_recover(int argc, char **argv)
static struct subcommand cluster_cmd[] = {
{"info", NULL, "aprh", "show cluster information",
SUBCMD_FLAG_NEED_NODELIST, cluster_info},
- {"format", NULL, "bcHaph", "create a Sheepdog store",
+ {"format", NULL, "bcmaph", "create a Sheepdog store",
0, cluster_format},
{"shutdown", NULL, "aph", "stop Sheepdog",
SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown},
@@ -495,8 +493,20 @@ static int cluster_parser(int ch, char *opt)
}
cluster_cmd_data.copies = copies;
break;
- case 'H':
- cluster_cmd_data.nohalt = 1;
+ case 'm':
+ if (strcmp(opt, "safe") == 0) {
+ cluster_cmd_data.nohalt = 0;
+ cluster_cmd_data.quorum = 0;
+ } else if (strcmp(opt, "quorum") == 0) {
+ cluster_cmd_data.nohalt = 0;
+ cluster_cmd_data.quorum = 1;
+ } else if (strcmp(opt, "unsafe") == 0) {
+ cluster_cmd_data.nohalt = 1;
+ cluster_cmd_data.quorum = 0;
+ } else {
+ fprintf(stderr, "Unknown mode '%s'\n", opt);
+ exit(EXIT_FAILURE);
+ }
break;
case 'f':
cluster_cmd_data.force = 1;
diff --git a/collie/collie.c b/collie/collie.c
index ccf87c5..fda9a59 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -42,8 +42,7 @@ static const struct sd_option collie_options[] = {
/* cluster options */
{'b', "store", 1, "specify backend store"},
{'c', "copies", 1, "specify the data redundancy (number of copies)"},
- {'H', "nohalt", 0, "serve IO requests even if there are too few\n\
- nodes for the configured redundancy"},
+ {'m', "mode", 1, "mode (safe, quorum, unsafe)"},
{'f', "force", 0, "do not prompt for confirmation"},
{'R', "restore", 1, "restore the cluster"},
{'l', "list", 0, "list the user epoch information"},
diff --git a/include/internal_proto.h b/include/internal_proto.h
index a523093..584f41e 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -76,6 +76,7 @@
#define SD_RES_NETWORK_ERROR 0x86 /* Network error between sheep */
#define SD_FLAG_NOHALT 0x0004 /* Serve the IO rquest even lack of nodes */
+#define SD_FLAG_QUORUM 0x0008 /* Serve the IO rquest as long we are quorate */
#define SD_STATUS_OK 0x00000001
#define SD_STATUS_WAIT_FOR_FORMAT 0x00000002
diff --git a/sheep/group.c b/sheep/group.c
index 059656e..1961ae9 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -88,8 +88,13 @@ bool have_enough_zones(void)
dprintf("flags %d, nr_zones %d, copies %d\n",
sys->flags, current_vnode_info->nr_zones, sys->nr_copies);
- if (current_vnode_info->nr_zones >= sys->nr_copies)
- return true;
+ if (sys_flag_quorum()) {
+ if (current_vnode_info->nr_zones > (sys->nr_copies/2))
+ return true;
+ } else {
+ if (current_vnode_info->nr_zones >= sys->nr_copies)
+ return true;
+ }
return false;
}
@@ -1076,11 +1081,9 @@ void sd_leave_handler(struct sd_node *left, struct sd_node *members,
}
put_vnode_info(old_vnode_info);
- if (sys_can_halt()) {
- if (current_vnode_info->nr_zones < sys->nr_copies)
- sys_stat_set(SD_STATUS_HALT);
- }
-
+ if (!have_enough_zones())
+ sys_stat_set(SD_STATUS_HALT);
+
sockfd_cache_del(&left->nid);
}
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 7d5700c..648f485 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -323,6 +323,11 @@ static inline int sys_flag_nohalt(void)
return sys->flags & SD_FLAG_NOHALT;
}
+static inline int sys_flag_quorum(void)
+{
+ return sys->flags & SD_FLAG_QUORUM;
+}
+
static inline int sys_stat_ok(void)
{
return sys->status & SD_STATUS_OK;
--
1.7.2.5
More information about the sheepdog
mailing list