[sheepdog] [PATCH 1/2] zookeeper: remove master from zookeeper

MORITA Kazutaka morita.kazutaka at gmail.com
Thu Aug 8 01:08:13 CEST 2013


>  
> -static int zk_get_least_seq(const char *parent, char *least_seq_path,
> -			    int path_len, void *buf, int *buf_len)
> +static int zk_acquire_lock(const char *path)
>  {
> -	char path[MAX_NODE_STR_LEN], *p, *tmp;
> -	struct String_vector strs;
> -	int rc, least_seq = INT_MAX , seq;
> -
> +	int rc;
>  	while (true) {
> -		RETURN_IF_ERROR(zk_get_children(parent, &strs), "");
> -
> -		FOR_EACH_ZNODE(parent, path, &strs) {
> -			p = strrchr(path, '/');
> -			seq = strtol(++p, &tmp, 10);
> -			if (seq < least_seq)
> -				least_seq = seq;
> -		}
> -
> -		snprintf(path, MAX_NODE_STR_LEN, "%s/%010"PRId32,
> -			 parent, least_seq);
> -		rc = zk_get_data(path, buf, buf_len);
> +		rc = zk_create_node(path, "", 0, &ZOO_OPEN_ACL_UNSAFE,
> +				    ZOO_EPHEMERAL, NULL, 0);
>  		switch (rc) {
>  		case ZOK:
> -			strncpy(least_seq_path, path, path_len);
> -			return ZOK;
> -		case ZNONODE:
> +			return rc;
> +		case ZNODEEXISTS:
> +			sleep(1);

It will take a very long time if we start many sheep daemons at the
same time?  I wonder if we should implement a complete distributed
lock based on ZooKeeper recipes:

http://zookeeper.apache.org/doc/trunk/recipes.html#sc_recipes_Locks


>  static int zk_join(const struct sd_node *myself,
>  		   void *opaque, size_t opaque_len)
>  {
> @@ -800,7 +649,7 @@ static int zk_join(const struct sd_node *myself,
>  		exit(1);
>  	}
>  
> -	zk_compete_master();
> +	RETURN_IF_ERROR(zk_acquire_lock(JOIN_LOCK_ZNODE), "");
>  	RETURN_IF_ERROR(add_join_event(opaque, opaque_len), "");
>  
>  	return ZOK;

zk_join() should return 0 on success, and -1 on error?


> @@ -837,17 +686,24 @@ static int zk_unblock(void *msg, size_t msg_len)
>  
>  static void zk_handle_join(struct zk_event *ev)
>  {
> +	bool member_empty = false;
>  	sd_dprintf("sender: %s", node_to_str(&ev->sender.node));
> -	if (!uatomic_is_true(&is_master)) {
> -		/* Let's await master acking the join-request */
> -		queue_pos--;
> -		return;
> -	}
>  
> -	sd_join_handler(&ev->sender.node, sd_nodes, nr_sd_nodes, ev->buf);
> -	push_join_response(ev);
> +	/*
> +	 * If the join request is local and there is no joined node, sender node
> +	 * handle the join request by itself. Otherwise, it wait other nodes to
> +	 * handle it.
> +	 */
> +	if (node_eq(&ev->sender.node, &this_node.node)) {
> +		RETURN_VOID_IF_ERROR(zk_member_empty(&member_empty), "");
> +		if (!member_empty) {
> +			queue_pos--;
> +			return;
> +		}
> +	}
>  
> -	sd_dprintf("I'm the master now");
> +	if (sd_join_handler(&ev->sender.node, sd_nodes, nr_sd_nodes, ev->buf))
> +		push_join_response(ev);

I think queue_pos needs to be decremented if sd_join_handler() returns
false.

Thanks,

Kazutaka



More information about the sheepdog mailing list