This removes the --nohalt switch, and adds a mode switch --mode safe|quorum|unsafe safe mode: halt cluster when nr_nodes < nr_copies (default) quorum mode: halt cluster when nr_nodes < nr_copies / 2 + 1 unsafe mode: never halt the cluster. Signed-off-by: Dietmar Maurer <dietmar at proxmox.com> --- collie/cluster.c | 30 ++++++++++++++++++++---------- collie/collie.c | 3 +-- include/internal_proto.h | 1 + sheep/group.c | 17 ++++++++++------- sheep/sheep_priv.h | 5 +++++ 5 files changed, 37 insertions(+), 19 deletions(-) diff --git a/collie/cluster.c b/collie/cluster.c index a0c099c..2913f6d 100644 --- a/collie/cluster.c +++ b/collie/cluster.c @@ -21,18 +21,13 @@ struct cluster_cmd_data { int list; int copies; int nohalt; + int quorum; int force; char name[STORE_LEN]; } cluster_cmd_data; #define DEFAULT_STORE "farm" -static void set_nohalt(uint16_t *p) -{ - if (p) - *p |= SD_FLAG_NOHALT; -} - static int list_store(void) { int fd, ret; @@ -89,7 +84,10 @@ static int cluster_format(int argc, char **argv) sd_init_req((struct sd_req *)&hdr, SD_OP_MAKE_FS); hdr.copies = cluster_cmd_data.copies; if (cluster_cmd_data.nohalt) - set_nohalt(&hdr.flags); + hdr.flags |= SD_FLAG_NOHALT; + if (cluster_cmd_data.quorum) + hdr.flags |= SD_FLAG_QUORUM; + hdr.epoch = sd_epoch; hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000; @@ -461,7 +459,7 @@ static int cluster_recover(int argc, char **argv) static struct subcommand cluster_cmd[] = { {"info", NULL, "aprh", "show cluster information", SUBCMD_FLAG_NEED_NODELIST, cluster_info}, - {"format", NULL, "bcHaph", "create a Sheepdog store", + {"format", NULL, "bcmaph", "create a Sheepdog store", 0, cluster_format}, {"shutdown", NULL, "aph", "stop Sheepdog", SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown}, @@ -495,8 +493,20 @@ static int cluster_parser(int ch, char *opt) } cluster_cmd_data.copies = copies; break; - case 'H': - cluster_cmd_data.nohalt = 1; + case 'm': + if (strcmp(opt, "safe") == 0) { + cluster_cmd_data.nohalt = 0; + cluster_cmd_data.quorum = 0; + } else if (strcmp(opt, "quorum") == 0) { + cluster_cmd_data.nohalt = 0; + cluster_cmd_data.quorum = 1; + } else if (strcmp(opt, "unsafe") == 0) { + cluster_cmd_data.nohalt = 1; + cluster_cmd_data.quorum = 0; + } else { + fprintf(stderr, "Unknown mode '%s'\n", opt); + exit(EXIT_FAILURE); + } break; case 'f': cluster_cmd_data.force = 1; diff --git a/collie/collie.c b/collie/collie.c index ccf87c5..fda9a59 100644 --- a/collie/collie.c +++ b/collie/collie.c @@ -42,8 +42,7 @@ static const struct sd_option collie_options[] = { /* cluster options */ {'b', "store", 1, "specify backend store"}, {'c', "copies", 1, "specify the data redundancy (number of copies)"}, - {'H', "nohalt", 0, "serve IO requests even if there are too few\n\ - nodes for the configured redundancy"}, + {'m', "mode", 1, "mode (safe, quorum, unsafe)"}, {'f', "force", 0, "do not prompt for confirmation"}, {'R', "restore", 1, "restore the cluster"}, {'l', "list", 0, "list the user epoch information"}, diff --git a/include/internal_proto.h b/include/internal_proto.h index a523093..584f41e 100644 --- a/include/internal_proto.h +++ b/include/internal_proto.h @@ -76,6 +76,7 @@ #define SD_RES_NETWORK_ERROR 0x86 /* Network error between sheep */ #define SD_FLAG_NOHALT 0x0004 /* Serve the IO rquest even lack of nodes */ +#define SD_FLAG_QUORUM 0x0008 /* Serve the IO rquest as long we are quorate */ #define SD_STATUS_OK 0x00000001 #define SD_STATUS_WAIT_FOR_FORMAT 0x00000002 diff --git a/sheep/group.c b/sheep/group.c index 059656e..1961ae9 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -88,8 +88,13 @@ bool have_enough_zones(void) dprintf("flags %d, nr_zones %d, copies %d\n", sys->flags, current_vnode_info->nr_zones, sys->nr_copies); - if (current_vnode_info->nr_zones >= sys->nr_copies) - return true; + if (sys_flag_quorum()) { + if (current_vnode_info->nr_zones > (sys->nr_copies/2)) + return true; + } else { + if (current_vnode_info->nr_zones >= sys->nr_copies) + return true; + } return false; } @@ -1076,11 +1081,9 @@ void sd_leave_handler(struct sd_node *left, struct sd_node *members, } put_vnode_info(old_vnode_info); - if (sys_can_halt()) { - if (current_vnode_info->nr_zones < sys->nr_copies) - sys_stat_set(SD_STATUS_HALT); - } - + if (!have_enough_zones()) + sys_stat_set(SD_STATUS_HALT); + sockfd_cache_del(&left->nid); } diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 7d5700c..648f485 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -323,6 +323,11 @@ static inline int sys_flag_nohalt(void) return sys->flags & SD_FLAG_NOHALT; } +static inline int sys_flag_quorum(void) +{ + return sys->flags & SD_FLAG_QUORUM; +} + static inline int sys_stat_ok(void) { return sys->status & SD_STATUS_OK; -- 1.7.2.5 |