At Sun, 29 Jul 2012 22:29:20 +0800, Yunkai Zhang wrote: > > From: Yunkai Zhang <qiushu.zyk at taobao.com> > > = What is delay recovry = > With delay recovery, we can add(kill) multiple nodes into(from) cluster > leisurely, only one recovery operation will be triggered at the end of > delay recovery, so that it can help me to reduce cluster's fluctuation. > > = Usage = > 1) Start delay recovery transaction: > $ collie delay_recovery start > Delay recovery start ... > > 2) Add(kill) multiple nodes into(from) cluster > ... > > 3) Stop delay recovery transaction: > $ collie delay_recovery stop > Delay recovery stop. > > This patch only implements the command-line tools to update inner status, > next patch will do the real work of delay recovry. > > Signed-off-by: Yunkai Zhang <qiushu.zyk at taobao.com> > --- > collie/Makefile.am | 3 ++- > collie/collie.c | 1 + > collie/collie.h | 1 + > collie/delay_recovery.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++ > include/internal_proto.h | 2 ++ > include/sheepdog_proto.h | 4 +++ > sheep/ops.c | 26 +++++++++++++++++++ > sheep/sheep_priv.h | 1 + > 8 files changed, 102 insertions(+), 1 deletion(-) > create mode 100644 collie/delay_recovery.c We use the first argument of the collie command as a target of the operation. I prefer 'collie cluster recovery <operation>'. Thanks, Kazutaka > > diff --git a/collie/Makefile.am b/collie/Makefile.am > index d98b709..e7821b1 100644 > --- a/collie/Makefile.am > +++ b/collie/Makefile.am > @@ -23,7 +23,8 @@ INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include > > sbin_PROGRAMS = collie > > -collie_SOURCES = collie.c common.c treeview.c vdi.c node.c cluster.c > +collie_SOURCES = collie.c common.c treeview.c vdi.c node.c cluster.c \ > + delay_recovery.c > > if BUILD_TRACE > collie_SOURCES += debug.c > diff --git a/collie/collie.c b/collie/collie.c > index fda9a59..7d30537 100644 > --- a/collie/collie.c > +++ b/collie/collie.c > @@ -285,6 +285,7 @@ int main(int argc, char **argv) > vdi_command, > node_command, > cluster_command, > + delay_recovery_command, > debug_command, > {NULL,} > }; > diff --git a/collie/collie.h b/collie/collie.h > index 75a675c..32a0368 100644 > --- a/collie/collie.h > +++ b/collie/collie.h > @@ -76,6 +76,7 @@ int send_light_req(struct sd_req *hdr, const char *host, int port); > extern struct command vdi_command; > extern struct command node_command; > extern struct command cluster_command; > +extern struct command delay_recovery_command; > > #ifdef ENABLE_TRACE > extern struct command debug_command; > diff --git a/collie/delay_recovery.c b/collie/delay_recovery.c > new file mode 100644 > index 0000000..2cd4841 > --- /dev/null > +++ b/collie/delay_recovery.c > @@ -0,0 +1,65 @@ > +/* > + * Copyright (C) 2011 Taobao Inc. > + * > + * Yunkai Zhang <yunkai.me at gmail.com> > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License version > + * 2 as published by the Free Software Foundation. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include <time.h> > +#include <string.h> > +#include <ctype.h> > +#include <sys/time.h> > + > +#include "collie.h" > + > +static int delay_recovery_start(int argc, char **argv) > +{ > + int ret; > + struct sd_req hdr; > + > + sd_init_req(&hdr, SD_OP_START_DELAY_RECOVERY); > + hdr.epoch = sd_epoch; > + > + ret = send_light_req(&hdr, sdhost, sdport); > + if (ret) > + return EXIT_FAILURE; > + > + printf("Delay recovery start ...\n"); > + return EXIT_SUCCESS; > +} > + > +static int delay_recovery_stop(int argc, char **argv) > +{ > + int ret; > + struct sd_req hdr; > + > + sd_init_req(&hdr, SD_OP_STOP_DELAY_RECOVERY); > + hdr.epoch = sd_epoch; > + > + ret = send_light_req(&hdr, sdhost, sdport); > + if (ret) > + return EXIT_FAILURE; > + > + printf("Delay recovery stop.\n"); > + return EXIT_SUCCESS; > +} > + > +static struct subcommand delay_recovery_cmd[] = { > + {"start", NULL, "aph", "start delay recovery", > + 0, delay_recovery_start}, > + {"stop", NULL, "aph", "stop delay recovery", > + 0, delay_recovery_stop}, > + {NULL,}, > +}; > + > +struct command delay_recovery_command = { > + "delay_recovery", > + delay_recovery_cmd, > + NULL > +}; > diff --git a/include/internal_proto.h b/include/internal_proto.h > index abc14dc..d6a7989 100644 > --- a/include/internal_proto.h > +++ b/include/internal_proto.h > @@ -60,6 +60,8 @@ > #define SD_OP_READ_PEER 0xA4 > #define SD_OP_WRITE_PEER 0xA5 > #define SD_OP_REMOVE_PEER 0xA6 > +#define SD_OP_STOP_DELAY_RECOVERY 0xA7 > +#define SD_OP_START_DELAY_RECOVERY 0xA8 > > /* internal flags for hdr.flags, must be above 0x80 */ > #define SD_FLAG_CMD_RECOVERY 0x0080 > diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h > index 45a4b81..5a306a6 100644 > --- a/include/sheepdog_proto.h > +++ b/include/sheepdog_proto.h > @@ -104,6 +104,10 @@ > > #define STORE_LEN 16 > > +/* status for delay recovery */ > +#define SD_DELAY_RECOVERY_STOP 0 > +#define SD_DELAY_RECOVERY_START 1 > + > struct sd_req { > uint8_t proto_ver; > uint8_t opcode; > diff --git a/sheep/ops.c b/sheep/ops.c > index 8aa6b34..c0fa98e 100644 > --- a/sheep/ops.c > +++ b/sheep/ops.c > @@ -260,6 +260,20 @@ static int cluster_shutdown(const struct sd_req *req, struct sd_rsp *rsp, > return SD_RES_SUCCESS; > } > > +static int cluster_start_delay_recovery(const struct sd_req *req, > + struct sd_rsp *rsp, void *data) > +{ > + sys->delay_recovery = SD_DELAY_RECOVERY_START; > + return SD_RES_SUCCESS; > +} > + > +static int cluster_stop_delay_recovery(const struct sd_req *req, > + struct sd_rsp *rsp, void *data) > +{ > + sys->delay_recovery = SD_DELAY_RECOVERY_STOP; > + return SD_RES_SUCCESS; > +} > + > static int cluster_get_vdi_attr(struct request *req) > { > const struct sd_req *hdr = &req->rq; > @@ -1000,6 +1014,18 @@ static struct sd_op_template sd_ops[] = { > .type = SD_OP_TYPE_PEER, > .process_work = peer_remove_obj, > }, > + > + [SD_OP_START_DELAY_RECOVERY] = { > + .name = "START_DELAY_RECOVERY", > + .type = SD_OP_TYPE_CLUSTER, > + .process_main = cluster_start_delay_recovery, > + }, > + > + [SD_OP_STOP_DELAY_RECOVERY] = { > + .name = "STOP_DELAY_RECOVERY", > + .type = SD_OP_TYPE_CLUSTER, > + .process_main = cluster_stop_delay_recovery, > + }, > }; > > struct sd_op_template *get_sd_op(uint8_t opcode) > diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h > index 530fe14..22d9d6c 100644 > --- a/sheep/sheep_priv.h > +++ b/sheep/sheep_priv.h > @@ -116,6 +116,7 @@ struct cluster_info { > > int use_directio; > uint8_t gateway_only; > + uint8_t delay_recovery; > > struct work_queue *gateway_wqueue; > struct work_queue *io_wqueue; > -- > 1.7.11.2 > > -- > sheepdog mailing list > sheepdog at lists.wpkg.org > http://lists.wpkg.org/mailman/listinfo/sheepdog |