[sheepdog] [PATCH 1/3] collie: add delay_recovery {start|stop} command
Yunkai Zhang
yunkai.me at gmail.com
Mon Jul 30 06:58:10 CEST 2012
On Mon, Jul 30, 2012 at 12:43 PM, MORITA Kazutaka
<morita.kazutaka at lab.ntt.co.jp> wrote:
> At Sun, 29 Jul 2012 22:29:20 +0800,
> Yunkai Zhang wrote:
>>
>> From: Yunkai Zhang <qiushu.zyk at taobao.com>
>>
>> = What is delay recovry =
>> With delay recovery, we can add(kill) multiple nodes into(from) cluster
>> leisurely, only one recovery operation will be triggered at the end of
>> delay recovery, so that it can help me to reduce cluster's fluctuation.
>>
>> = Usage =
>> 1) Start delay recovery transaction:
>> $ collie delay_recovery start
>> Delay recovery start ...
>>
>> 2) Add(kill) multiple nodes into(from) cluster
>> ...
>>
>> 3) Stop delay recovery transaction:
>> $ collie delay_recovery stop
>> Delay recovery stop.
>>
>> This patch only implements the command-line tools to update inner status,
>> next patch will do the real work of delay recovry.
>>
>> Signed-off-by: Yunkai Zhang <qiushu.zyk at taobao.com>
>> ---
>> collie/Makefile.am | 3 ++-
>> collie/collie.c | 1 +
>> collie/collie.h | 1 +
>> collie/delay_recovery.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++
>> include/internal_proto.h | 2 ++
>> include/sheepdog_proto.h | 4 +++
>> sheep/ops.c | 26 +++++++++++++++++++
>> sheep/sheep_priv.h | 1 +
>> 8 files changed, 102 insertions(+), 1 deletion(-)
>> create mode 100644 collie/delay_recovery.c
>
> We use the first argument of the collie command as a target of the
> operation. I prefer 'collie cluster recovery <operation>'.
Yes, I wanted to use 'collie cluster recovery xxx' originally, but I
found that we have had another command: 'collie cluster recover', I
worry about it will confuse user, what do you think of it?
>
> Thanks,
>
> Kazutaka
>
>
>>
>> diff --git a/collie/Makefile.am b/collie/Makefile.am
>> index d98b709..e7821b1 100644
>> --- a/collie/Makefile.am
>> +++ b/collie/Makefile.am
>> @@ -23,7 +23,8 @@ INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include
>>
>> sbin_PROGRAMS = collie
>>
>> -collie_SOURCES = collie.c common.c treeview.c vdi.c node.c cluster.c
>> +collie_SOURCES = collie.c common.c treeview.c vdi.c node.c cluster.c \
>> + delay_recovery.c
>>
>> if BUILD_TRACE
>> collie_SOURCES += debug.c
>> diff --git a/collie/collie.c b/collie/collie.c
>> index fda9a59..7d30537 100644
>> --- a/collie/collie.c
>> +++ b/collie/collie.c
>> @@ -285,6 +285,7 @@ int main(int argc, char **argv)
>> vdi_command,
>> node_command,
>> cluster_command,
>> + delay_recovery_command,
>> debug_command,
>> {NULL,}
>> };
>> diff --git a/collie/collie.h b/collie/collie.h
>> index 75a675c..32a0368 100644
>> --- a/collie/collie.h
>> +++ b/collie/collie.h
>> @@ -76,6 +76,7 @@ int send_light_req(struct sd_req *hdr, const char *host, int port);
>> extern struct command vdi_command;
>> extern struct command node_command;
>> extern struct command cluster_command;
>> +extern struct command delay_recovery_command;
>>
>> #ifdef ENABLE_TRACE
>> extern struct command debug_command;
>> diff --git a/collie/delay_recovery.c b/collie/delay_recovery.c
>> new file mode 100644
>> index 0000000..2cd4841
>> --- /dev/null
>> +++ b/collie/delay_recovery.c
>> @@ -0,0 +1,65 @@
>> +/*
>> + * Copyright (C) 2011 Taobao Inc.
>> + *
>> + * Yunkai Zhang <yunkai.me at gmail.com>
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License version
>> + * 2 as published by the Free Software Foundation.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#include <time.h>
>> +#include <string.h>
>> +#include <ctype.h>
>> +#include <sys/time.h>
>> +
>> +#include "collie.h"
>> +
>> +static int delay_recovery_start(int argc, char **argv)
>> +{
>> + int ret;
>> + struct sd_req hdr;
>> +
>> + sd_init_req(&hdr, SD_OP_START_DELAY_RECOVERY);
>> + hdr.epoch = sd_epoch;
>> +
>> + ret = send_light_req(&hdr, sdhost, sdport);
>> + if (ret)
>> + return EXIT_FAILURE;
>> +
>> + printf("Delay recovery start ...\n");
>> + return EXIT_SUCCESS;
>> +}
>> +
>> +static int delay_recovery_stop(int argc, char **argv)
>> +{
>> + int ret;
>> + struct sd_req hdr;
>> +
>> + sd_init_req(&hdr, SD_OP_STOP_DELAY_RECOVERY);
>> + hdr.epoch = sd_epoch;
>> +
>> + ret = send_light_req(&hdr, sdhost, sdport);
>> + if (ret)
>> + return EXIT_FAILURE;
>> +
>> + printf("Delay recovery stop.\n");
>> + return EXIT_SUCCESS;
>> +}
>> +
>> +static struct subcommand delay_recovery_cmd[] = {
>> + {"start", NULL, "aph", "start delay recovery",
>> + 0, delay_recovery_start},
>> + {"stop", NULL, "aph", "stop delay recovery",
>> + 0, delay_recovery_stop},
>> + {NULL,},
>> +};
>> +
>> +struct command delay_recovery_command = {
>> + "delay_recovery",
>> + delay_recovery_cmd,
>> + NULL
>> +};
>> diff --git a/include/internal_proto.h b/include/internal_proto.h
>> index abc14dc..d6a7989 100644
>> --- a/include/internal_proto.h
>> +++ b/include/internal_proto.h
>> @@ -60,6 +60,8 @@
>> #define SD_OP_READ_PEER 0xA4
>> #define SD_OP_WRITE_PEER 0xA5
>> #define SD_OP_REMOVE_PEER 0xA6
>> +#define SD_OP_STOP_DELAY_RECOVERY 0xA7
>> +#define SD_OP_START_DELAY_RECOVERY 0xA8
>>
>> /* internal flags for hdr.flags, must be above 0x80 */
>> #define SD_FLAG_CMD_RECOVERY 0x0080
>> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
>> index 45a4b81..5a306a6 100644
>> --- a/include/sheepdog_proto.h
>> +++ b/include/sheepdog_proto.h
>> @@ -104,6 +104,10 @@
>>
>> #define STORE_LEN 16
>>
>> +/* status for delay recovery */
>> +#define SD_DELAY_RECOVERY_STOP 0
>> +#define SD_DELAY_RECOVERY_START 1
>> +
>> struct sd_req {
>> uint8_t proto_ver;
>> uint8_t opcode;
>> diff --git a/sheep/ops.c b/sheep/ops.c
>> index 8aa6b34..c0fa98e 100644
>> --- a/sheep/ops.c
>> +++ b/sheep/ops.c
>> @@ -260,6 +260,20 @@ static int cluster_shutdown(const struct sd_req *req, struct sd_rsp *rsp,
>> return SD_RES_SUCCESS;
>> }
>>
>> +static int cluster_start_delay_recovery(const struct sd_req *req,
>> + struct sd_rsp *rsp, void *data)
>> +{
>> + sys->delay_recovery = SD_DELAY_RECOVERY_START;
>> + return SD_RES_SUCCESS;
>> +}
>> +
>> +static int cluster_stop_delay_recovery(const struct sd_req *req,
>> + struct sd_rsp *rsp, void *data)
>> +{
>> + sys->delay_recovery = SD_DELAY_RECOVERY_STOP;
>> + return SD_RES_SUCCESS;
>> +}
>> +
>> static int cluster_get_vdi_attr(struct request *req)
>> {
>> const struct sd_req *hdr = &req->rq;
>> @@ -1000,6 +1014,18 @@ static struct sd_op_template sd_ops[] = {
>> .type = SD_OP_TYPE_PEER,
>> .process_work = peer_remove_obj,
>> },
>> +
>> + [SD_OP_START_DELAY_RECOVERY] = {
>> + .name = "START_DELAY_RECOVERY",
>> + .type = SD_OP_TYPE_CLUSTER,
>> + .process_main = cluster_start_delay_recovery,
>> + },
>> +
>> + [SD_OP_STOP_DELAY_RECOVERY] = {
>> + .name = "STOP_DELAY_RECOVERY",
>> + .type = SD_OP_TYPE_CLUSTER,
>> + .process_main = cluster_stop_delay_recovery,
>> + },
>> };
>>
>> struct sd_op_template *get_sd_op(uint8_t opcode)
>> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
>> index 530fe14..22d9d6c 100644
>> --- a/sheep/sheep_priv.h
>> +++ b/sheep/sheep_priv.h
>> @@ -116,6 +116,7 @@ struct cluster_info {
>>
>> int use_directio;
>> uint8_t gateway_only;
>> + uint8_t delay_recovery;
>>
>> struct work_queue *gateway_wqueue;
>> struct work_queue *io_wqueue;
>> --
>> 1.7.11.2
>>
>> --
>> sheepdog mailing list
>> sheepdog at lists.wpkg.org
>> http://lists.wpkg.org/mailman/listinfo/sheepdog
--
Yunkai Zhang
Work at Taobao
More information about the sheepdog
mailing list