I also considered 'collie cluster delay_recovery {start|stop|info}', but it seems too long for typing, do you like it? On Mon, Jul 30, 2012 at 12:58 PM, Yunkai Zhang <yunkai.me at gmail.com> wrote: > On Mon, Jul 30, 2012 at 12:43 PM, MORITA Kazutaka > <morita.kazutaka at lab.ntt.co.jp> wrote: >> At Sun, 29 Jul 2012 22:29:20 +0800, >> Yunkai Zhang wrote: >>> >>> From: Yunkai Zhang <qiushu.zyk at taobao.com> >>> >>> = What is delay recovry = >>> With delay recovery, we can add(kill) multiple nodes into(from) cluster >>> leisurely, only one recovery operation will be triggered at the end of >>> delay recovery, so that it can help me to reduce cluster's fluctuation. >>> >>> = Usage = >>> 1) Start delay recovery transaction: >>> $ collie delay_recovery start >>> Delay recovery start ... >>> >>> 2) Add(kill) multiple nodes into(from) cluster >>> ... >>> >>> 3) Stop delay recovery transaction: >>> $ collie delay_recovery stop >>> Delay recovery stop. >>> >>> This patch only implements the command-line tools to update inner status, >>> next patch will do the real work of delay recovry. >>> >>> Signed-off-by: Yunkai Zhang <qiushu.zyk at taobao.com> >>> --- >>> collie/Makefile.am | 3 ++- >>> collie/collie.c | 1 + >>> collie/collie.h | 1 + >>> collie/delay_recovery.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++ >>> include/internal_proto.h | 2 ++ >>> include/sheepdog_proto.h | 4 +++ >>> sheep/ops.c | 26 +++++++++++++++++++ >>> sheep/sheep_priv.h | 1 + >>> 8 files changed, 102 insertions(+), 1 deletion(-) >>> create mode 100644 collie/delay_recovery.c >> >> We use the first argument of the collie command as a target of the >> operation. I prefer 'collie cluster recovery <operation>'. > > Yes, I wanted to use 'collie cluster recovery xxx' originally, but I > found that we have had another command: 'collie cluster recover', I > worry about it will confuse user, what do you think of it? > > >> >> Thanks, >> >> Kazutaka >> >> >>> >>> diff --git a/collie/Makefile.am b/collie/Makefile.am >>> index d98b709..e7821b1 100644 >>> --- a/collie/Makefile.am >>> +++ b/collie/Makefile.am >>> @@ -23,7 +23,8 @@ INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include >>> >>> sbin_PROGRAMS = collie >>> >>> -collie_SOURCES = collie.c common.c treeview.c vdi.c node.c cluster.c >>> +collie_SOURCES = collie.c common.c treeview.c vdi.c node.c cluster.c \ >>> + delay_recovery.c >>> >>> if BUILD_TRACE >>> collie_SOURCES += debug.c >>> diff --git a/collie/collie.c b/collie/collie.c >>> index fda9a59..7d30537 100644 >>> --- a/collie/collie.c >>> +++ b/collie/collie.c >>> @@ -285,6 +285,7 @@ int main(int argc, char **argv) >>> vdi_command, >>> node_command, >>> cluster_command, >>> + delay_recovery_command, >>> debug_command, >>> {NULL,} >>> }; >>> diff --git a/collie/collie.h b/collie/collie.h >>> index 75a675c..32a0368 100644 >>> --- a/collie/collie.h >>> +++ b/collie/collie.h >>> @@ -76,6 +76,7 @@ int send_light_req(struct sd_req *hdr, const char *host, int port); >>> extern struct command vdi_command; >>> extern struct command node_command; >>> extern struct command cluster_command; >>> +extern struct command delay_recovery_command; >>> >>> #ifdef ENABLE_TRACE >>> extern struct command debug_command; >>> diff --git a/collie/delay_recovery.c b/collie/delay_recovery.c >>> new file mode 100644 >>> index 0000000..2cd4841 >>> --- /dev/null >>> +++ b/collie/delay_recovery.c >>> @@ -0,0 +1,65 @@ >>> +/* >>> + * Copyright (C) 2011 Taobao Inc. >>> + * >>> + * Yunkai Zhang <yunkai.me at gmail.com> >>> + * >>> + * This program is free software; you can redistribute it and/or >>> + * modify it under the terms of the GNU General Public License version >>> + * 2 as published by the Free Software Foundation. >>> + * >>> + * You should have received a copy of the GNU General Public License >>> + * along with this program. If not, see <http://www.gnu.org/licenses/>. >>> + */ >>> + >>> +#include <time.h> >>> +#include <string.h> >>> +#include <ctype.h> >>> +#include <sys/time.h> >>> + >>> +#include "collie.h" >>> + >>> +static int delay_recovery_start(int argc, char **argv) >>> +{ >>> + int ret; >>> + struct sd_req hdr; >>> + >>> + sd_init_req(&hdr, SD_OP_START_DELAY_RECOVERY); >>> + hdr.epoch = sd_epoch; >>> + >>> + ret = send_light_req(&hdr, sdhost, sdport); >>> + if (ret) >>> + return EXIT_FAILURE; >>> + >>> + printf("Delay recovery start ...\n"); >>> + return EXIT_SUCCESS; >>> +} >>> + >>> +static int delay_recovery_stop(int argc, char **argv) >>> +{ >>> + int ret; >>> + struct sd_req hdr; >>> + >>> + sd_init_req(&hdr, SD_OP_STOP_DELAY_RECOVERY); >>> + hdr.epoch = sd_epoch; >>> + >>> + ret = send_light_req(&hdr, sdhost, sdport); >>> + if (ret) >>> + return EXIT_FAILURE; >>> + >>> + printf("Delay recovery stop.\n"); >>> + return EXIT_SUCCESS; >>> +} >>> + >>> +static struct subcommand delay_recovery_cmd[] = { >>> + {"start", NULL, "aph", "start delay recovery", >>> + 0, delay_recovery_start}, >>> + {"stop", NULL, "aph", "stop delay recovery", >>> + 0, delay_recovery_stop}, >>> + {NULL,}, >>> +}; >>> + >>> +struct command delay_recovery_command = { >>> + "delay_recovery", >>> + delay_recovery_cmd, >>> + NULL >>> +}; >>> diff --git a/include/internal_proto.h b/include/internal_proto.h >>> index abc14dc..d6a7989 100644 >>> --- a/include/internal_proto.h >>> +++ b/include/internal_proto.h >>> @@ -60,6 +60,8 @@ >>> #define SD_OP_READ_PEER 0xA4 >>> #define SD_OP_WRITE_PEER 0xA5 >>> #define SD_OP_REMOVE_PEER 0xA6 >>> +#define SD_OP_STOP_DELAY_RECOVERY 0xA7 >>> +#define SD_OP_START_DELAY_RECOVERY 0xA8 >>> >>> /* internal flags for hdr.flags, must be above 0x80 */ >>> #define SD_FLAG_CMD_RECOVERY 0x0080 >>> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h >>> index 45a4b81..5a306a6 100644 >>> --- a/include/sheepdog_proto.h >>> +++ b/include/sheepdog_proto.h >>> @@ -104,6 +104,10 @@ >>> >>> #define STORE_LEN 16 >>> >>> +/* status for delay recovery */ >>> +#define SD_DELAY_RECOVERY_STOP 0 >>> +#define SD_DELAY_RECOVERY_START 1 >>> + >>> struct sd_req { >>> uint8_t proto_ver; >>> uint8_t opcode; >>> diff --git a/sheep/ops.c b/sheep/ops.c >>> index 8aa6b34..c0fa98e 100644 >>> --- a/sheep/ops.c >>> +++ b/sheep/ops.c >>> @@ -260,6 +260,20 @@ static int cluster_shutdown(const struct sd_req *req, struct sd_rsp *rsp, >>> return SD_RES_SUCCESS; >>> } >>> >>> +static int cluster_start_delay_recovery(const struct sd_req *req, >>> + struct sd_rsp *rsp, void *data) >>> +{ >>> + sys->delay_recovery = SD_DELAY_RECOVERY_START; >>> + return SD_RES_SUCCESS; >>> +} >>> + >>> +static int cluster_stop_delay_recovery(const struct sd_req *req, >>> + struct sd_rsp *rsp, void *data) >>> +{ >>> + sys->delay_recovery = SD_DELAY_RECOVERY_STOP; >>> + return SD_RES_SUCCESS; >>> +} >>> + >>> static int cluster_get_vdi_attr(struct request *req) >>> { >>> const struct sd_req *hdr = &req->rq; >>> @@ -1000,6 +1014,18 @@ static struct sd_op_template sd_ops[] = { >>> .type = SD_OP_TYPE_PEER, >>> .process_work = peer_remove_obj, >>> }, >>> + >>> + [SD_OP_START_DELAY_RECOVERY] = { >>> + .name = "START_DELAY_RECOVERY", >>> + .type = SD_OP_TYPE_CLUSTER, >>> + .process_main = cluster_start_delay_recovery, >>> + }, >>> + >>> + [SD_OP_STOP_DELAY_RECOVERY] = { >>> + .name = "STOP_DELAY_RECOVERY", >>> + .type = SD_OP_TYPE_CLUSTER, >>> + .process_main = cluster_stop_delay_recovery, >>> + }, >>> }; >>> >>> struct sd_op_template *get_sd_op(uint8_t opcode) >>> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h >>> index 530fe14..22d9d6c 100644 >>> --- a/sheep/sheep_priv.h >>> +++ b/sheep/sheep_priv.h >>> @@ -116,6 +116,7 @@ struct cluster_info { >>> >>> int use_directio; >>> uint8_t gateway_only; >>> + uint8_t delay_recovery; >>> >>> struct work_queue *gateway_wqueue; >>> struct work_queue *io_wqueue; >>> -- >>> 1.7.11.2 >>> >>> -- >>> sheepdog mailing list >>> sheepdog at lists.wpkg.org >>> http://lists.wpkg.org/mailman/listinfo/sheepdog > > > > -- > Yunkai Zhang > Work at Taobao -- Yunkai Zhang Work at Taobao |