[sheepdog] [PATCH] zookeeper: exit program on unrecoverable error

MORITA Kazutaka morita.kazutaka at gmail.com
Fri Jun 7 07:08:59 CEST 2013


From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>

This adds panic() where sheep receives unrecoverable error from
ZooKeeper.  It's not good to abort the program on error, but much
better than destroying membership information on each node.  It is
future work to remove those panic().

This also removes the ZOOAPI modifiers from the function definitions,
which are not ZooKeeper APIs.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 sheep/cluster/zookeeper.c |  109 ++++++++++++++++++++-------------------------
 1 file changed, 48 insertions(+), 61 deletions(-)

diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
index 45db10a..b9759ab 100644
--- a/sheep/cluster/zookeeper.c
+++ b/sheep/cluster/zookeeper.c
@@ -135,19 +135,18 @@ static inline struct zk_node *zk_tree_search(const struct node_id *nid)
 static zhandle_t *zhandle;
 static struct zk_node this_node;
 
-static inline ZOOAPI int zk_delete_node(const char *path, int version)
+static inline void zk_delete_node(const char *path, int version)
 {
 	int rc;
 	do {
 		rc = zoo_delete(zhandle, path, version);
 	} while (rc == ZOPERATIONTIMEOUT || rc == ZCONNECTIONLOSS);
+
 	if (rc != ZOK)
-		sd_eprintf("failed, path:%s, %s", path, zerror(rc));
-	return rc;
+		panic("failed, path:%s, %s", path, zerror(rc));
 }
 
-static inline ZOOAPI void
-zk_init_node(const char *path)
+static inline void zk_init_node(const char *path)
 {
 	int rc;
 	do {
@@ -159,7 +158,8 @@ zk_init_node(const char *path)
 		panic("failed, path:%s, %s", path, zerror(rc));
 }
 
-static inline ZOOAPI int
+/* return zero on success, -1 if path already exists */
+static inline int
 zk_create_node(const char *path, const char *value, int valuelen,
 	       const struct ACL_vector *acl, int flags, char *path_buffer,
 	       int path_buffer_len)
@@ -168,70 +168,74 @@ zk_create_node(const char *path, const char *value, int valuelen,
 	do {
 		rc = zoo_create(zhandle, path, value, valuelen, acl,
 				flags, path_buffer, path_buffer_len);
-		if (rc != ZOK && rc != ZNODEEXISTS)
-			sd_eprintf("failed, path:%s, %s", path, zerror(rc));
 	} while (rc == ZOPERATIONTIMEOUT || rc == ZCONNECTIONLOSS);
-	return rc;
+
+	if (rc != ZOK && rc != ZNODEEXISTS)
+		panic("failed, path:%s, %s", path, zerror(rc));
+
+	return rc == ZOK ? 0 : -1;
 }
 
 /*
  * Create a znode after adding a unique monotonically increasing sequence number
  * to the path name.
  *
- * Note that the caller has to retry this function when this returns
- * ZOPERATIONTIMEOUT or ZCONNECTIONLOSS and the znode is not created.
+ * return zero on success, -1 if it is not sure whether znode is created.
  */
-static inline ZOOAPI int
+static inline int
 zk_create_seq_node(const char *path, const char *value, int valuelen,
 		   char *path_buffer, int path_buffer_len)
 {
 	int rc;
 	rc = zoo_create(zhandle, path, value, valuelen, &ZOO_OPEN_ACL_UNSAFE,
 			ZOO_SEQUENCE, path_buffer, path_buffer_len);
-	if (rc != ZOK)
-		sd_iprintf("failed, path:%s, %s", path, zerror(rc));
+	if (rc != ZOK && rc != ZOPERATIONTIMEOUT && rc != ZCONNECTIONLOSS)
+		panic("failed, path:%s, %s", path, zerror(rc));
 
-	return rc;
+	return rc == ZOK ? 0 : -1;
 }
 
-static inline ZOOAPI int zk_get_data(const char *path, void *buffer,
-				     int *buffer_len)
+/* return zero on success, -1 if path doesn't exist */
+static inline int zk_get_data(const char *path, void *buffer, int *buffer_len)
 {
 	int rc;
 	do {
 		rc = zoo_get(zhandle, path, 1, (char *)buffer,
 			     buffer_len, NULL);
-		if (rc != ZOK)
-			sd_eprintf("failed, path:%s, %s", path, zerror(rc));
 	} while (rc == ZOPERATIONTIMEOUT || rc == ZCONNECTIONLOSS);
-	return rc;
+
+	if (rc != ZOK && rc != ZNONODE)
+		panic("failed, path:%s, %s", path, zerror(rc));
+
+	return rc == ZOK ? 0 : -1;
 }
 
-static inline ZOOAPI int
+static inline void
 zk_set_data(const char *path, const char *buffer, int buflen, int version)
 {
 	int rc;
 	do {
 		rc = zoo_set(zhandle, path, buffer, buflen, version);
 	} while (rc == ZOPERATIONTIMEOUT || rc == ZCONNECTIONLOSS);
+
 	if (rc != ZOK)
 		panic("failed, path:%s, %s", path, zerror(rc));
-	return rc;
 }
 
-static inline ZOOAPI int zk_node_exists(const char *path)
+static inline bool zk_node_exists(const char *path)
 {
 	int rc;
 	do {
 		rc = zoo_exists(zhandle, path, 1, NULL);
-		if (rc != ZOK && rc != ZNONODE)
-			sd_eprintf("failed, path:%s, %s", path, zerror(rc));
 	} while (rc == ZOPERATIONTIMEOUT || rc == ZCONNECTIONLOSS);
 
-	return rc;
+	if (rc != ZOK && rc != ZNONODE)
+		panic("failed, path:%s, %s", path, zerror(rc));
+
+	return rc == ZOK;
 }
 
-static inline ZOOAPI void zk_get_children(const char *path,
+static inline void zk_get_children(const char *path,
 					  struct String_vector *strings)
 {
 	int rc;
@@ -248,16 +252,11 @@ static int32_t queue_pos;
 
 static bool zk_queue_peek(void)
 {
-	int rc;
 	char path[MAX_NODE_STR_LEN];
 
 	snprintf(path, sizeof(path), QUEUE_ZNODE "/%010"PRId32, queue_pos);
 
-	rc = zk_node_exists(path);
-	if (rc == ZOK)
-		return true;
-
-	return false;
+	return zk_node_exists(path);
 }
 
 /* return true if there is a node with 'id' in the queue. */
@@ -271,20 +270,15 @@ static bool zk_find_seq_node(uint64_t id, char *seq_path, int seq_path_len)
 		snprintf(seq_path, seq_path_len, QUEUE_ZNODE"/%010"PRId32, seq);
 		len = offsetof(typeof(ev), id) + sizeof(ev.id);
 		rc = zk_get_data(seq_path, &ev, &len);
-		switch (rc) {
-		case ZOK:
-			if (ev.id == id) {
-				sd_dprintf("id %"PRIx64" is found in %s", id,
-					   seq_path);
-				return true;
-			}
-			break;
-		case ZNONODE:
+		if (rc < 0) {
 			sd_dprintf("id %"PRIx64" is not found", id);
 			return false;
-		default:
-			panic("failed, %s", zerror(rc));
-			break;
+		}
+
+		if (ev.id == id) {
+			sd_dprintf("id %"PRIx64" is found in %s", id,
+				   seq_path);
+			return true;
 		}
 	}
 }
@@ -299,19 +293,12 @@ static void zk_queue_push(struct zk_event *ev)
 	snprintf(path, sizeof(path), "%s/", QUEUE_ZNODE);
 again:
 	rc = zk_create_seq_node(path, (char *)ev, len, buf, sizeof(buf));
-	switch (rc) {
-	case ZOK:
-		/* Success */
-		break;
-	case ZOPERATIONTIMEOUT:
-	case ZCONNECTIONLOSS:
+	if (rc < 0) {
 		if (!zk_find_seq_node(ev->id, buf, sizeof(buf)))
 			/* retry if seq_node was not created */
 			goto again;
-		break;
-	default:
-		panic("failed, path:%s, %s", path, zerror(rc));
 	}
+
 	if (first_push) {
 		int32_t seq;
 
@@ -357,8 +344,8 @@ static void zk_queue_pop_advance(struct zk_event *ev)
 	len = sizeof(*ev);
 	snprintf(path, sizeof(path), QUEUE_ZNODE "/%010"PRId32, queue_pos);
 	rc = zk_get_data(path, ev, &len);
-	if (rc != ZOK)
-		panic("failed to get data from %s, %s", path, zerror(rc));
+	if (rc < 0)
+		panic("failed to get data from %s", path);
 	sd_dprintf("%s, type:%d, len:%d, pos:%"PRId32, path, ev->type, len,
 		   queue_pos);
 	queue_pos++;
@@ -553,20 +540,18 @@ static int add_join_event(void *msg, size_t msg_len)
 static int zk_join(const struct sd_node *myself,
 		   void *opaque, size_t opaque_len)
 {
-	int rc;
 	char path[MAX_NODE_STR_LEN];
 
 	this_node.node = *myself;
 
 	snprintf(path, sizeof(path), MEMBER_ZNODE "/%s", node_to_str(myself));
-	rc = zk_node_exists(path);
-	if (rc == ZOK) {
+	if (zk_node_exists(path)) {
 		sd_eprintf("Previous zookeeper session exist, shoot myself.");
 		exit(1);
 	}
 
 	/* For concurrent nodes setup, we allow only one to continue */
-	while (zk_member_empty() && zk_master_create() != ZOK)
+	while (zk_member_empty() && zk_master_create() < 0)
 		;/* wait */
 
 	return add_join_event(opaque, opaque_len);
@@ -631,7 +616,9 @@ static void watch_all_nodes(void)
 		return;
 
 	FOR_EACH_ZNODE(MEMBER_ZNODE, path, &strs) {
-		zk_get_data(path, &znode, &len);
+		int rc = zk_get_data(path, &znode, &len);
+		if (rc < 0)
+			panic("failed to get data from %s", path);
 	}
 }
 
-- 
1.7.9.5




More information about the sheepdog mailing list