[sheepdog] [UPDATE PATCH] zookeeper: add an timeout option to detect memership change

Yunkai Zhang yunkai.me at gmail.com
Fri Aug 31 08:58:58 CEST 2012


On Fri, Aug 31, 2012 at 2:46 PM, Liu Yuan <namei.unix at gmail.com> wrote:
> From: Liu Yuan <tailai.ly at taobao.com>
>
> Current 30 second is hardcoded, this means we need to wait 30s to get the
> notification that some node is down. To adopt to more vaious networking
> condtion, we'd better allow users to decide.
>
> This option is also useful for test scripts, which normally epxect instant
> membership change notification.
>
> Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
> ---
>  sheep/cluster/zookeeper.c |   11 +++++------
>  sheep/sheep.c             |   23 +++++++++++++++++++----
>  2 files changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
> index 4cd5be8..ee77bc0 100644
> --- a/sheep/cluster/zookeeper.c
> +++ b/sheep/cluster/zookeeper.c
> @@ -22,8 +22,6 @@
>  #include "event.h"
>  #include "work.h"
>
> -#define SESSION_TIMEOUT 30000          /* millisecond */
> -#define MEMBER_CREATE_TIMEOUT SESSION_TIMEOUT
>  #define MEMBER_CREATE_INTERVAL 10      /* millisecond */
>
>  #define BASE_ZNODE "/sheepdog"
> @@ -40,6 +38,8 @@
>                      (free((strs)->data), 0);                          \
>              free(*(strs)->data))
>
> +extern int zk_timeout;
> +
>  enum zk_event_type {
>         EVENT_JOIN_REQUEST = 1,
>         EVENT_JOIN_RESPONSE,
> @@ -702,8 +702,7 @@ static void zk_handler(int listen_fd, int events, void *data)
>                 if (is_master(zhandle, &this_node) &&
>                     !node_eq(&ev.sender.node, &this_node.node)) {
>                         /* wait util the member node has been created */
> -                       int retry =
> -                               MEMBER_CREATE_TIMEOUT / MEMBER_CREATE_INTERVAL;
> +                       int retry = zk_timeout / MEMBER_CREATE_INTERVAL;
>
>                         sprintf(path, MEMBER_ZNODE "/%s",
>                                 node_to_str(&ev.sender.node));
> @@ -801,14 +800,14 @@ static int zk_init(const char *option)
>                 return -1;
>         }
>
> -       zhandle = zookeeper_init(option, watcher, SESSION_TIMEOUT, 0, NULL, 0);
> +       zhandle = zookeeper_init(option, watcher, zk_timeout, 0, NULL, 0);
>         if (!zhandle) {
>                 eprintf("failed to connect to zk server %s\n", option);
>                 return -1;
>         }
>         dprintf("request session timeout:%dms, "
>                 "negotiated session timeout:%dms\n",
> -               SESSION_TIMEOUT, zoo_recv_timeout(zhandle));
> +               zk_timeout, zoo_recv_timeout(zhandle));
>
>         zk_queue_init(zhandle);
>
> diff --git a/sheep/sheep.c b/sheep/sheep.c
> index e1434cf..228b1f9 100644
> --- a/sheep/sheep.c
> +++ b/sheep/sheep.c
> @@ -34,9 +34,13 @@
>  #define DEFAULT_OBJECT_DIR "/tmp"
>  #define LOG_FILE_NAME "sheep.log"
>
> +#define DEFAULT_TIMEOUT 5000 /* 5s for Zookeeper timeout */
> +
>  LIST_HEAD(cluster_drivers);
>  static char program_name[] = "sheep";
>
> +int zk_timeout = DEFAULT_TIMEOUT;
> +
>  static struct option const long_options[] = {
>         {"cluster", required_argument, NULL, 'c'},
>         {"debug", no_argument, NULL, 'd'},
> @@ -47,6 +51,7 @@ static struct option const long_options[] = {
>         {"loglevel", required_argument, NULL, 'l'},
>         {"myaddr", required_argument, NULL, 'y'},
>         {"stdout", no_argument, NULL, 'o'},
> +       {"timeout", required_argument, NULL, 't'},
>         {"port", required_argument, NULL, 'p'},
>         {"disk-space", required_argument, NULL, 's'},
>         {"enable-cache", required_argument, NULL, 'w'},
> @@ -55,7 +60,7 @@ static struct option const long_options[] = {
>         {NULL, 0, NULL, 0},
>  };
>
> -static const char *short_options = "c:dDfghl:op:P:s:w:y:z:";
> +static const char *short_options = "c:dDfghl:op:P:s:t:w:y:z:";
>
>  static void usage(int status)
>  {
> @@ -78,7 +83,8 @@ Options:\n\
>    -p, --port              specify the TCP port on which to listen\n\
>    -P, --pidfile           create a pid file\n\
>    -s, --disk-space        specify the free disk space in megabytes\n\
> -  -w, --enable-cache      enable object cache and specify the max size (M) and mode\n\
> +  -t, --timeout           specify timeout to detect membership change for Zookeeper\n\
> +  -w, --enable-cache      enable object cache and specify the max size (M)\n\
>    -y, --myaddr            specify the address advertised to other sheep\n\
>    -z, --zone              specify the zone id\n\
>  ", PACKAGE_VERSION, program_name);
> @@ -183,8 +189,7 @@ int main(int argc, char **argv)
>         int ch, longindex;
>         int ret, port = SD_LISTEN_PORT;
>         const char *dir = DEFAULT_OBJECT_DIR;
> -       int is_daemon = 1;
> -       int to_stdout = 0;
> +       int is_daemon = 1, to_stdout = 0, timeout = DEFAULT_TIMEOUT;
>         int log_level = SDOG_INFO;
>         char path[PATH_MAX];
>         int64_t zone = -1;
> @@ -289,6 +294,16 @@ int main(int argc, char **argv)
>                         }
>                         sys->disk_space = free_space * 1024 * 1024;
>                         break;
> +               case 't':
> +                       timeout = strtoll(optarg, &p, 10);
> +                       if (optarg == p || timeout <= 0 || timeout > 120) {
> +                               fprintf(stderr, "Invalid timeout value '%s': "
> +                                       "must be an integer between 0 and 120\n",
> +                                       optarg);
> +                               exit(1);
> +                       }
> +                       zk_timeout = timeout * 1000;
> +                       break;
>                 case 'c':
>                         sys->cdrv = find_cdrv(optarg);
>                         if (!sys->cdrv) {
> --
> 1.7.10.2
>

Good for me.


-- 
Yunkai Zhang
Work at Taobao



More information about the sheepdog mailing list