[sheepdog] [PATCH 1/3] collie: add delay_recovery {start|stop} command

Yunkai Zhang yunkai.me at gmail.com
Mon Jul 30 07:05:41 CEST 2012


I also considered 'collie cluster delay_recovery {start|stop|info}',
but it seems too long for typing, do you like it?

On Mon, Jul 30, 2012 at 12:58 PM, Yunkai Zhang <yunkai.me at gmail.com> wrote:
> On Mon, Jul 30, 2012 at 12:43 PM, MORITA Kazutaka
> <morita.kazutaka at lab.ntt.co.jp> wrote:
>> At Sun, 29 Jul 2012 22:29:20 +0800,
>> Yunkai Zhang wrote:
>>>
>>> From: Yunkai Zhang <qiushu.zyk at taobao.com>
>>>
>>> = What is delay recovry =
>>> With delay recovery, we can add(kill) multiple nodes into(from) cluster
>>> leisurely, only one recovery operation will be triggered at the end of
>>> delay recovery, so that it can help me to reduce cluster's fluctuation.
>>>
>>> = Usage =
>>> 1) Start delay recovery transaction:
>>>  $ collie delay_recovery start
>>>  Delay recovery start ...
>>>
>>> 2) Add(kill) multiple nodes into(from) cluster
>>>  ...
>>>
>>> 3) Stop delay recovery transaction:
>>>  $ collie delay_recovery stop
>>>  Delay recovery stop.
>>>
>>> This patch only implements the command-line tools to update inner status,
>>> next patch will do the real work of delay recovry.
>>>
>>> Signed-off-by: Yunkai Zhang <qiushu.zyk at taobao.com>
>>> ---
>>>  collie/Makefile.am       |  3 ++-
>>>  collie/collie.c          |  1 +
>>>  collie/collie.h          |  1 +
>>>  collie/delay_recovery.c  | 65 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>  include/internal_proto.h |  2 ++
>>>  include/sheepdog_proto.h |  4 +++
>>>  sheep/ops.c              | 26 +++++++++++++++++++
>>>  sheep/sheep_priv.h       |  1 +
>>>  8 files changed, 102 insertions(+), 1 deletion(-)
>>>  create mode 100644 collie/delay_recovery.c
>>
>> We use the first argument of the collie command as a target of the
>> operation.  I prefer 'collie cluster recovery <operation>'.
>
> Yes, I wanted to use 'collie cluster recovery xxx' originally, but I
> found that we have had another command: 'collie cluster recover', I
> worry about it will confuse user, what do you think of it?
>
>
>>
>> Thanks,
>>
>> Kazutaka
>>
>>
>>>
>>> diff --git a/collie/Makefile.am b/collie/Makefile.am
>>> index d98b709..e7821b1 100644
>>> --- a/collie/Makefile.am
>>> +++ b/collie/Makefile.am
>>> @@ -23,7 +23,8 @@ INCLUDES            = -I$(top_builddir)/include -I$(top_srcdir)/include
>>>
>>>  sbin_PROGRAMS                = collie
>>>
>>> -collie_SOURCES               = collie.c common.c treeview.c vdi.c node.c cluster.c
>>> +collie_SOURCES               = collie.c common.c treeview.c vdi.c node.c cluster.c \
>>> +                       delay_recovery.c
>>>
>>>  if BUILD_TRACE
>>>  collie_SOURCES          += debug.c
>>> diff --git a/collie/collie.c b/collie/collie.c
>>> index fda9a59..7d30537 100644
>>> --- a/collie/collie.c
>>> +++ b/collie/collie.c
>>> @@ -285,6 +285,7 @@ int main(int argc, char **argv)
>>>               vdi_command,
>>>               node_command,
>>>               cluster_command,
>>> +             delay_recovery_command,
>>>               debug_command,
>>>               {NULL,}
>>>       };
>>> diff --git a/collie/collie.h b/collie/collie.h
>>> index 75a675c..32a0368 100644
>>> --- a/collie/collie.h
>>> +++ b/collie/collie.h
>>> @@ -76,6 +76,7 @@ int send_light_req(struct sd_req *hdr, const char *host, int port);
>>>  extern struct command vdi_command;
>>>  extern struct command node_command;
>>>  extern struct command cluster_command;
>>> +extern struct command delay_recovery_command;
>>>
>>>  #ifdef ENABLE_TRACE
>>>    extern struct command debug_command;
>>> diff --git a/collie/delay_recovery.c b/collie/delay_recovery.c
>>> new file mode 100644
>>> index 0000000..2cd4841
>>> --- /dev/null
>>> +++ b/collie/delay_recovery.c
>>> @@ -0,0 +1,65 @@
>>> +/*
>>> + * Copyright (C) 2011 Taobao Inc.
>>> + *
>>> + * Yunkai Zhang <yunkai.me at gmail.com>
>>> + *
>>> + * This program is free software; you can redistribute it and/or
>>> + * modify it under the terms of the GNU General Public License version
>>> + * 2 as published by the Free Software Foundation.
>>> + *
>>> + * You should have received a copy of the GNU General Public License
>>> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
>>> + */
>>> +
>>> +#include <time.h>
>>> +#include <string.h>
>>> +#include <ctype.h>
>>> +#include <sys/time.h>
>>> +
>>> +#include "collie.h"
>>> +
>>> +static int delay_recovery_start(int argc, char **argv)
>>> +{
>>> +     int ret;
>>> +     struct sd_req hdr;
>>> +
>>> +     sd_init_req(&hdr, SD_OP_START_DELAY_RECOVERY);
>>> +     hdr.epoch = sd_epoch;
>>> +
>>> +     ret = send_light_req(&hdr, sdhost, sdport);
>>> +     if (ret)
>>> +             return EXIT_FAILURE;
>>> +
>>> +     printf("Delay recovery start ...\n");
>>> +     return EXIT_SUCCESS;
>>> +}
>>> +
>>> +static int delay_recovery_stop(int argc, char **argv)
>>> +{
>>> +     int ret;
>>> +     struct sd_req hdr;
>>> +
>>> +     sd_init_req(&hdr, SD_OP_STOP_DELAY_RECOVERY);
>>> +     hdr.epoch = sd_epoch;
>>> +
>>> +     ret = send_light_req(&hdr, sdhost, sdport);
>>> +     if (ret)
>>> +             return EXIT_FAILURE;
>>> +
>>> +     printf("Delay recovery stop.\n");
>>> +     return EXIT_SUCCESS;
>>> +}
>>> +
>>> +static struct subcommand delay_recovery_cmd[] = {
>>> +     {"start", NULL, "aph", "start delay recovery",
>>> +      0, delay_recovery_start},
>>> +     {"stop", NULL, "aph", "stop delay recovery",
>>> +      0, delay_recovery_stop},
>>> +     {NULL,},
>>> +};
>>> +
>>> +struct command delay_recovery_command = {
>>> +     "delay_recovery",
>>> +     delay_recovery_cmd,
>>> +     NULL
>>> +};
>>> diff --git a/include/internal_proto.h b/include/internal_proto.h
>>> index abc14dc..d6a7989 100644
>>> --- a/include/internal_proto.h
>>> +++ b/include/internal_proto.h
>>> @@ -60,6 +60,8 @@
>>>  #define SD_OP_READ_PEER      0xA4
>>>  #define SD_OP_WRITE_PEER     0xA5
>>>  #define SD_OP_REMOVE_PEER    0xA6
>>> +#define SD_OP_STOP_DELAY_RECOVERY 0xA7
>>> +#define SD_OP_START_DELAY_RECOVERY 0xA8
>>>
>>>  /* internal flags for hdr.flags, must be above 0x80 */
>>>  #define SD_FLAG_CMD_RECOVERY 0x0080
>>> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
>>> index 45a4b81..5a306a6 100644
>>> --- a/include/sheepdog_proto.h
>>> +++ b/include/sheepdog_proto.h
>>> @@ -104,6 +104,10 @@
>>>
>>>  #define STORE_LEN 16
>>>
>>> +/* status for delay recovery */
>>> +#define SD_DELAY_RECOVERY_STOP 0
>>> +#define SD_DELAY_RECOVERY_START 1
>>> +
>>>  struct sd_req {
>>>       uint8_t         proto_ver;
>>>       uint8_t         opcode;
>>> diff --git a/sheep/ops.c b/sheep/ops.c
>>> index 8aa6b34..c0fa98e 100644
>>> --- a/sheep/ops.c
>>> +++ b/sheep/ops.c
>>> @@ -260,6 +260,20 @@ static int cluster_shutdown(const struct sd_req *req, struct sd_rsp *rsp,
>>>       return SD_RES_SUCCESS;
>>>  }
>>>
>>> +static int cluster_start_delay_recovery(const struct sd_req *req,
>>> +                                 struct sd_rsp *rsp, void *data)
>>> +{
>>> +     sys->delay_recovery = SD_DELAY_RECOVERY_START;
>>> +     return SD_RES_SUCCESS;
>>> +}
>>> +
>>> +static int cluster_stop_delay_recovery(const struct sd_req *req,
>>> +                                struct sd_rsp *rsp, void *data)
>>> +{
>>> +     sys->delay_recovery = SD_DELAY_RECOVERY_STOP;
>>> +     return SD_RES_SUCCESS;
>>> +}
>>> +
>>>  static int cluster_get_vdi_attr(struct request *req)
>>>  {
>>>       const struct sd_req *hdr = &req->rq;
>>> @@ -1000,6 +1014,18 @@ static struct sd_op_template sd_ops[] = {
>>>               .type = SD_OP_TYPE_PEER,
>>>               .process_work = peer_remove_obj,
>>>       },
>>> +
>>> +     [SD_OP_START_DELAY_RECOVERY] = {
>>> +             .name = "START_DELAY_RECOVERY",
>>> +             .type = SD_OP_TYPE_CLUSTER,
>>> +             .process_main = cluster_start_delay_recovery,
>>> +     },
>>> +
>>> +     [SD_OP_STOP_DELAY_RECOVERY] = {
>>> +             .name = "STOP_DELAY_RECOVERY",
>>> +             .type = SD_OP_TYPE_CLUSTER,
>>> +             .process_main = cluster_stop_delay_recovery,
>>> +     },
>>>  };
>>>
>>>  struct sd_op_template *get_sd_op(uint8_t opcode)
>>> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
>>> index 530fe14..22d9d6c 100644
>>> --- a/sheep/sheep_priv.h
>>> +++ b/sheep/sheep_priv.h
>>> @@ -116,6 +116,7 @@ struct cluster_info {
>>>
>>>       int use_directio;
>>>       uint8_t gateway_only;
>>> +     uint8_t delay_recovery;
>>>
>>>       struct work_queue *gateway_wqueue;
>>>       struct work_queue *io_wqueue;
>>> --
>>> 1.7.11.2
>>>
>>> --
>>> sheepdog mailing list
>>> sheepdog at lists.wpkg.org
>>> http://lists.wpkg.org/mailman/listinfo/sheepdog
>
>
>
> --
> Yunkai Zhang
> Work at Taobao



-- 
Yunkai Zhang
Work at Taobao



More information about the sheepdog mailing list