[sheepdog] [RFC PATCH] zookeeper: add an timeout option to detect memership change

Liu Yuan namei.unix at gmail.com
Fri Aug 31 08:05:27 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

Current 30 second is hardcoded, this means we need to wait 30s to get the
notification that some node is down. To adopt to more vaious networking
condtion, we'd better allow users to decide.

This option is also useful for test scripts, which normally epxect instant
membership change notification.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/cluster/zookeeper.c |   11 +++++------
 sheep/sheep.c             |   23 +++++++++++++++++++----
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
index 4cd5be8..ee77bc0 100644
--- a/sheep/cluster/zookeeper.c
+++ b/sheep/cluster/zookeeper.c
@@ -22,8 +22,6 @@
 #include "event.h"
 #include "work.h"
 
-#define SESSION_TIMEOUT 30000		/* millisecond */
-#define MEMBER_CREATE_TIMEOUT SESSION_TIMEOUT
 #define MEMBER_CREATE_INTERVAL 10	/* millisecond */
 
 #define BASE_ZNODE "/sheepdog"
@@ -40,6 +38,8 @@
 		     (free((strs)->data), 0);			       \
 	     free(*(strs)->data))
 
+extern int zk_timeout;
+
 enum zk_event_type {
 	EVENT_JOIN_REQUEST = 1,
 	EVENT_JOIN_RESPONSE,
@@ -702,8 +702,7 @@ static void zk_handler(int listen_fd, int events, void *data)
 		if (is_master(zhandle, &this_node) &&
 		    !node_eq(&ev.sender.node, &this_node.node)) {
 			/* wait util the member node has been created */
-			int retry =
-				MEMBER_CREATE_TIMEOUT / MEMBER_CREATE_INTERVAL;
+			int retry = zk_timeout / MEMBER_CREATE_INTERVAL;
 
 			sprintf(path, MEMBER_ZNODE "/%s",
 				node_to_str(&ev.sender.node));
@@ -801,14 +800,14 @@ static int zk_init(const char *option)
 		return -1;
 	}
 
-	zhandle = zookeeper_init(option, watcher, SESSION_TIMEOUT, 0, NULL, 0);
+	zhandle = zookeeper_init(option, watcher, zk_timeout, 0, NULL, 0);
 	if (!zhandle) {
 		eprintf("failed to connect to zk server %s\n", option);
 		return -1;
 	}
 	dprintf("request session timeout:%dms, "
 		"negotiated session timeout:%dms\n",
-		SESSION_TIMEOUT, zoo_recv_timeout(zhandle));
+		zk_timeout, zoo_recv_timeout(zhandle));
 
 	zk_queue_init(zhandle);
 
diff --git a/sheep/sheep.c b/sheep/sheep.c
index e1434cf..c7da2ed 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -34,9 +34,13 @@
 #define DEFAULT_OBJECT_DIR "/tmp"
 #define LOG_FILE_NAME "sheep.log"
 
+#define DEFAULT_TIMEOUT 5000 /* 5s for Zookeeper timeout */
+
 LIST_HEAD(cluster_drivers);
 static char program_name[] = "sheep";
 
+int zk_timeout = DEFAULT_TIMEOUT;
+
 static struct option const long_options[] = {
 	{"cluster", required_argument, NULL, 'c'},
 	{"debug", no_argument, NULL, 'd'},
@@ -47,6 +51,7 @@ static struct option const long_options[] = {
 	{"loglevel", required_argument, NULL, 'l'},
 	{"myaddr", required_argument, NULL, 'y'},
 	{"stdout", no_argument, NULL, 'o'},
+	{"timeout", required_argument, NULL, 't'},
 	{"port", required_argument, NULL, 'p'},
 	{"disk-space", required_argument, NULL, 's'},
 	{"enable-cache", required_argument, NULL, 'w'},
@@ -55,7 +60,7 @@ static struct option const long_options[] = {
 	{NULL, 0, NULL, 0},
 };
 
-static const char *short_options = "c:dDfghl:op:P:s:w:y:z:";
+static const char *short_options = "c:dDfghl:op:P:s:t:w:y:z:";
 
 static void usage(int status)
 {
@@ -78,7 +83,8 @@ Options:\n\
   -p, --port              specify the TCP port on which to listen\n\
   -P, --pidfile           create a pid file\n\
   -s, --disk-space        specify the free disk space in megabytes\n\
-  -w, --enable-cache      enable object cache and specify the max size (M) and mode\n\
+  -t, --timeout           specify timeout to detect membership change for Zookeeper\n\
+  -w, --enable-cache      enable object cache and specify the max size (M)\n\
   -y, --myaddr            specify the address advertised to other sheep\n\
   -z, --zone              specify the zone id\n\
 ", PACKAGE_VERSION, program_name);
@@ -183,8 +189,7 @@ int main(int argc, char **argv)
 	int ch, longindex;
 	int ret, port = SD_LISTEN_PORT;
 	const char *dir = DEFAULT_OBJECT_DIR;
-	int is_daemon = 1;
-	int to_stdout = 0;
+	int is_daemon = 1, to_stdout = 0, timeout = DEFAULT_TIMEOUT;
 	int log_level = SDOG_INFO;
 	char path[PATH_MAX];
 	int64_t zone = -1;
@@ -289,6 +294,16 @@ int main(int argc, char **argv)
 			}
 			sys->disk_space = free_space * 1024 * 1024;
 			break;
+		case 't':
+			timeout = strtoll(optarg, &p, 10);
+			if (optarg == p || timeout <= 0 || timeout > 120) {
+				fprintf(stderr, "Invalid free space size '%s': "
+					"must be an integer between 0 and 120\n",
+					optarg);
+				exit(1);
+			}
+			zk_timeout = timeout * 1000;
+			break;
 		case 'c':
 			sys->cdrv = find_cdrv(optarg);
 			if (!sys->cdrv) {
-- 
1.7.10.2




More information about the sheepdog mailing list