On Mon, Jul 30, 2012 at 12:43 PM, MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp> wrote: > At Sun, 29 Jul 2012 22:29:20 +0800, > Yunkai Zhang wrote: >> >> From: Yunkai Zhang <qiushu.zyk at taobao.com> >> >> = What is delay recovry = >> With delay recovery, we can add(kill) multiple nodes into(from) cluster >> leisurely, only one recovery operation will be triggered at the end of >> delay recovery, so that it can help me to reduce cluster's fluctuation. >> >> = Usage = >> 1) Start delay recovery transaction: >> $ collie delay_recovery start >> Delay recovery start ... >> >> 2) Add(kill) multiple nodes into(from) cluster >> ... >> >> 3) Stop delay recovery transaction: >> $ collie delay_recovery stop >> Delay recovery stop. >> >> This patch only implements the command-line tools to update inner status, >> next patch will do the real work of delay recovry. >> >> Signed-off-by: Yunkai Zhang <qiushu.zyk at taobao.com> >> --- >> collie/Makefile.am | 3 ++- >> collie/collie.c | 1 + >> collie/collie.h | 1 + >> collie/delay_recovery.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++ >> include/internal_proto.h | 2 ++ >> include/sheepdog_proto.h | 4 +++ >> sheep/ops.c | 26 +++++++++++++++++++ >> sheep/sheep_priv.h | 1 + >> 8 files changed, 102 insertions(+), 1 deletion(-) >> create mode 100644 collie/delay_recovery.c > > We use the first argument of the collie command as a target of the > operation. I prefer 'collie cluster recovery <operation>'. Yes, I wanted to use 'collie cluster recovery xxx' originally, but I found that we have had another command: 'collie cluster recover', I worry about it will confuse user, what do you think of it? > > Thanks, > > Kazutaka > > >> >> diff --git a/collie/Makefile.am b/collie/Makefile.am >> index d98b709..e7821b1 100644 >> --- a/collie/Makefile.am >> +++ b/collie/Makefile.am >> @@ -23,7 +23,8 @@ INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include >> >> sbin_PROGRAMS = collie >> >> -collie_SOURCES = collie.c common.c treeview.c vdi.c node.c cluster.c >> +collie_SOURCES = collie.c common.c treeview.c vdi.c node.c cluster.c \ >> + delay_recovery.c >> >> if BUILD_TRACE >> collie_SOURCES += debug.c >> diff --git a/collie/collie.c b/collie/collie.c >> index fda9a59..7d30537 100644 >> --- a/collie/collie.c >> +++ b/collie/collie.c >> @@ -285,6 +285,7 @@ int main(int argc, char **argv) >> vdi_command, >> node_command, >> cluster_command, >> + delay_recovery_command, >> debug_command, >> {NULL,} >> }; >> diff --git a/collie/collie.h b/collie/collie.h >> index 75a675c..32a0368 100644 >> --- a/collie/collie.h >> +++ b/collie/collie.h >> @@ -76,6 +76,7 @@ int send_light_req(struct sd_req *hdr, const char *host, int port); >> extern struct command vdi_command; >> extern struct command node_command; >> extern struct command cluster_command; >> +extern struct command delay_recovery_command; >> >> #ifdef ENABLE_TRACE >> extern struct command debug_command; >> diff --git a/collie/delay_recovery.c b/collie/delay_recovery.c >> new file mode 100644 >> index 0000000..2cd4841 >> --- /dev/null >> +++ b/collie/delay_recovery.c >> @@ -0,0 +1,65 @@ >> +/* >> + * Copyright (C) 2011 Taobao Inc. >> + * >> + * Yunkai Zhang <yunkai.me at gmail.com> >> + * >> + * This program is free software; you can redistribute it and/or >> + * modify it under the terms of the GNU General Public License version >> + * 2 as published by the Free Software Foundation. >> + * >> + * You should have received a copy of the GNU General Public License >> + * along with this program. If not, see <http://www.gnu.org/licenses/>. >> + */ >> + >> +#include <time.h> >> +#include <string.h> >> +#include <ctype.h> >> +#include <sys/time.h> >> + >> +#include "collie.h" >> + >> +static int delay_recovery_start(int argc, char **argv) >> +{ >> + int ret; >> + struct sd_req hdr; >> + >> + sd_init_req(&hdr, SD_OP_START_DELAY_RECOVERY); >> + hdr.epoch = sd_epoch; >> + >> + ret = send_light_req(&hdr, sdhost, sdport); >> + if (ret) >> + return EXIT_FAILURE; >> + >> + printf("Delay recovery start ...\n"); >> + return EXIT_SUCCESS; >> +} >> + >> +static int delay_recovery_stop(int argc, char **argv) >> +{ >> + int ret; >> + struct sd_req hdr; >> + >> + sd_init_req(&hdr, SD_OP_STOP_DELAY_RECOVERY); >> + hdr.epoch = sd_epoch; >> + >> + ret = send_light_req(&hdr, sdhost, sdport); >> + if (ret) >> + return EXIT_FAILURE; >> + >> + printf("Delay recovery stop.\n"); >> + return EXIT_SUCCESS; >> +} >> + >> +static struct subcommand delay_recovery_cmd[] = { >> + {"start", NULL, "aph", "start delay recovery", >> + 0, delay_recovery_start}, >> + {"stop", NULL, "aph", "stop delay recovery", >> + 0, delay_recovery_stop}, >> + {NULL,}, >> +}; >> + >> +struct command delay_recovery_command = { >> + "delay_recovery", >> + delay_recovery_cmd, >> + NULL >> +}; >> diff --git a/include/internal_proto.h b/include/internal_proto.h >> index abc14dc..d6a7989 100644 >> --- a/include/internal_proto.h >> +++ b/include/internal_proto.h >> @@ -60,6 +60,8 @@ >> #define SD_OP_READ_PEER 0xA4 >> #define SD_OP_WRITE_PEER 0xA5 >> #define SD_OP_REMOVE_PEER 0xA6 >> +#define SD_OP_STOP_DELAY_RECOVERY 0xA7 >> +#define SD_OP_START_DELAY_RECOVERY 0xA8 >> >> /* internal flags for hdr.flags, must be above 0x80 */ >> #define SD_FLAG_CMD_RECOVERY 0x0080 >> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h >> index 45a4b81..5a306a6 100644 >> --- a/include/sheepdog_proto.h >> +++ b/include/sheepdog_proto.h >> @@ -104,6 +104,10 @@ >> >> #define STORE_LEN 16 >> >> +/* status for delay recovery */ >> +#define SD_DELAY_RECOVERY_STOP 0 >> +#define SD_DELAY_RECOVERY_START 1 >> + >> struct sd_req { >> uint8_t proto_ver; >> uint8_t opcode; >> diff --git a/sheep/ops.c b/sheep/ops.c >> index 8aa6b34..c0fa98e 100644 >> --- a/sheep/ops.c >> +++ b/sheep/ops.c >> @@ -260,6 +260,20 @@ static int cluster_shutdown(const struct sd_req *req, struct sd_rsp *rsp, >> return SD_RES_SUCCESS; >> } >> >> +static int cluster_start_delay_recovery(const struct sd_req *req, >> + struct sd_rsp *rsp, void *data) >> +{ >> + sys->delay_recovery = SD_DELAY_RECOVERY_START; >> + return SD_RES_SUCCESS; >> +} >> + >> +static int cluster_stop_delay_recovery(const struct sd_req *req, >> + struct sd_rsp *rsp, void *data) >> +{ >> + sys->delay_recovery = SD_DELAY_RECOVERY_STOP; >> + return SD_RES_SUCCESS; >> +} >> + >> static int cluster_get_vdi_attr(struct request *req) >> { >> const struct sd_req *hdr = &req->rq; >> @@ -1000,6 +1014,18 @@ static struct sd_op_template sd_ops[] = { >> .type = SD_OP_TYPE_PEER, >> .process_work = peer_remove_obj, >> }, >> + >> + [SD_OP_START_DELAY_RECOVERY] = { >> + .name = "START_DELAY_RECOVERY", >> + .type = SD_OP_TYPE_CLUSTER, >> + .process_main = cluster_start_delay_recovery, >> + }, >> + >> + [SD_OP_STOP_DELAY_RECOVERY] = { >> + .name = "STOP_DELAY_RECOVERY", >> + .type = SD_OP_TYPE_CLUSTER, >> + .process_main = cluster_stop_delay_recovery, >> + }, >> }; >> >> struct sd_op_template *get_sd_op(uint8_t opcode) >> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h >> index 530fe14..22d9d6c 100644 >> --- a/sheep/sheep_priv.h >> +++ b/sheep/sheep_priv.h >> @@ -116,6 +116,7 @@ struct cluster_info { >> >> int use_directio; >> uint8_t gateway_only; >> + uint8_t delay_recovery; >> >> struct work_queue *gateway_wqueue; >> struct work_queue *io_wqueue; >> -- >> 1.7.11.2 >> >> -- >> sheepdog mailing list >> sheepdog at lists.wpkg.org >> http://lists.wpkg.org/mailman/listinfo/sheepdog -- Yunkai Zhang Work at Taobao |