[sheepdog] [PATCH] sheep: rework update node logic

Tue Jul 9 02:28:18 CEST 2013

At Sat,  6 Jul 2013 01:23:41 +0800,
Liu Yuan wrote:
> 
> @@ -343,6 +346,13 @@ static bool __corosync_dispatch_one(struct corosync_event *cevent)
>  		sd_notify_handler(&cevent->sender.ent, cevent->msg,
>  						 cevent->msg_len);
>  		break;
> +	case COROSYNC_EVENT_TYPE_UPDATE_NODE:
> +		node = &cevent->sender.ent;
> +		idx = find_sd_node(cpg_nodes, nr_cpg_nodes, node);
> +		assert(idx != 0);

assert(idx >= 0)?

idx can be zero if the node is the first member of cpg_nodes.
Actually, tests/functional 63 and 64 fail on my environment.

> +		cpg_nodes[idx].ent = *node;
> +		sd_update_node_handler(node);
> +		break;
>  	}
>  
>  	return true;

> @@ -806,12 +824,9 @@ again:
>  
>  static void corosync_update_node(struct sd_node *node)
>  {
> -	int idx;
> -
> -	idx = find_sd_node(cpg_nodes, nr_cpg_nodes, node);
> -	if (idx < 0)
> -		return;
> -	cpg_nodes[idx].ent = *node;
> +	this_node.ent = *node;

For all the nodes to see the same node information, we shouldn't
update this_node here but in __corosync_dispatch_one().

> +	send_message(COROSYNC_MSG_TYPE_UPDATE_NODE, 0, &this_node,
> +		     NULL, 0, NULL, 0);

send_message() can fail.  Return the result to the caller.

>  }
>  
>  static struct cluster_driver cdrv_corosync = {
> diff --git a/sheep/cluster/local.c b/sheep/cluster/local.c
> index 307a69e..2c5d197 100644
> --- a/sheep/cluster/local.c
> +++ b/sheep/cluster/local.c
> @@ -474,6 +474,7 @@ static bool local_process_event(void)
>  		sd_notify_handler(&ev->sender.node, ev->buf, ev->buf_len);
>  		break;
>  	case EVENT_UPDATE_NODE:
> +		sd_update_node_handler(&ev->sender.node);
>  		break;
>  	}
>  out:
> @@ -551,16 +552,12 @@ static int local_init(const char *option)
>  	return 0;
>  }
>  
> -/* FIXME: we have to call nr of nodes times to update nodes information */
>  static void local_update_node(struct sd_node *node)
>  {
> -	struct local_node n = {
> -		.node = *node,
> -	};
> -
> +	this_node.node = *node;

Update this_node in local_process_event().

>  	shm_queue_lock();
>  
> -	add_event(EVENT_UPDATE_NODE, &n, NULL, 0);
> +	add_event(EVENT_UPDATE_NODE, &this_node, NULL, 0);
>  
>  	shm_queue_unlock();
>  }
> diff --git a/sheep/cluster/shepherd.c b/sheep/cluster/shepherd.c
> index fba329c..e0aa6ea 100644
> --- a/sheep/cluster/shepherd.c
> +++ b/sheep/cluster/shepherd.c
> @@ -664,8 +664,7 @@ static void shepherd_unblock(void *msg, size_t msg_len)
>  /* FIXME: shepherd server also has to udpate node information */
>  static void shepherd_update_node(struct sd_node *node)
>  {
> -	struct sd_node *n = xlfind(node, nodes, nr_nodes, node_cmp);
> -	*n = *node;
> +	panic("not implemented");

panic is not good at all.  Please at least return SD_RES_NO_SUPPORT to
the caller.

>  }
>  
>  static struct cluster_driver cdrv_shepherd = {
> diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
> index 0feb8cf..6c40b5f 100644
> --- a/sheep/cluster/zookeeper.c
> +++ b/sheep/cluster/zookeeper.c
> @@ -47,6 +47,7 @@ enum zk_event_type {
>  	EVENT_BLOCK,
>  	EVENT_UNBLOCK,
>  	EVENT_NOTIFY,
> +	EVENT_UPDATE_NODE,
>  };
>  
>  struct zk_node {
> @@ -993,6 +994,21 @@ static void zk_handle_notify(struct zk_event *ev)
>  	sd_notify_handler(&ev->sender.node, ev->buf, ev->buf_len);
>  }
>  
> +static void zk_handle_update_node(struct zk_event *ev)
> +{
> +	struct zk_node *t, *n = &ev->sender;
> +
> +	sd_dprintf("%s", node_to_str(&n->node));
> +
> +	pthread_rwlock_rdlock(&zk_tree_lock);
> +	t = zk_tree_search_nolock(&n->node.nid);
> +	assert(t);
> +	t->node = n->node;
> +	build_node_list();
> +	pthread_rwlock_unlock(&zk_tree_lock);
> +	sd_update_node_handler(&n->node);
> +}
> +
>  static void (*const zk_event_handlers[])(struct zk_event *ev) = {
>  	[EVENT_JOIN_REQUEST]	= zk_handle_join_request,
>  	[EVENT_JOIN_RESPONSE]	= zk_handle_join_response,
> @@ -1000,6 +1016,7 @@ static void (*const zk_event_handlers[])(struct zk_event *ev) = {
>  	[EVENT_BLOCK]		= zk_handle_block,
>  	[EVENT_UNBLOCK]		= zk_handle_unblock,
>  	[EVENT_NOTIFY]		= zk_handle_notify,
> +	[EVENT_UPDATE_NODE]	= zk_handle_update_node,
>  };
>  
>  static const int zk_max_event_handlers = ARRAY_SIZE(zk_event_handlers);
> @@ -1135,20 +1152,8 @@ static int zk_init(const char *option)
>  
>  static void zk_update_node(struct sd_node *node)
>  {
> -	struct zk_node n = {
> -		.node = *node,
> -	};
> -	struct zk_node *t;
> -
> -	sd_dprintf("%s", node_to_str(&n.node));
> -
> -	pthread_rwlock_rdlock(&zk_tree_lock);
> -	t = zk_tree_search_nolock(&n.node.nid);
> -	if (t) {
> -		t->node = n.node;
> -		build_node_list();
> -	}
> -	pthread_rwlock_unlock(&zk_tree_lock);
> +	this_node.node = *node,

Update this_node in zk_handle_update_node().

> +	add_event(EVENT_UPDATE_NODE, &this_node, NULL, 0);
>  }
>  
>  static struct cluster_driver cdrv_zookeeper = {
> diff --git a/sheep/group.c b/sheep/group.c
> index 6e01a8d..73a9ed4 100644
> --- a/sheep/group.c
> +++ b/sheep/group.c
> @@ -985,16 +985,15 @@ void sd_leave_handler(const struct sd_node *left, const struct sd_node *members,
>  	sockfd_cache_del(&left->nid);
>  }
>  
> -void update_node_size(struct sd_node *node)
> +static void update_node_size(struct sd_node *node)
>  {
>  	struct vnode_info *cur_vinfo = main_thread_get(current_vnode_info);
>  	int idx = get_node_idx(cur_vinfo, node);
>  	assert(idx != -1);
>  	cur_vinfo->nodes[idx].space = node->space;
> -	sys->cdrv->update_node(node);
>  }
>  
> -void kick_node_recover(void)
> +static void kick_node_recover(void)
>  {
>  	struct vnode_info *old = main_thread_get(current_vnode_info);
>  
> @@ -1006,6 +1005,12 @@ void kick_node_recover(void)
>  	put_vnode_info(old);
>  }
>  
> +void sd_update_node_handler(struct sd_node *node)
> +{
> +	update_node_size(node);
> +	kick_node_recover();
> +}
> +
>  int create_cluster(int port, int64_t zone, int nr_vnodes,
>  		   bool explicit_addr)
>  {
> diff --git a/sheep/ops.c b/sheep/ops.c
> index bf975bf..49630a9 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -662,33 +662,9 @@ static int cluster_recovery_completion(const struct sd_req *req,
>  	return SD_RES_SUCCESS;
>  }
>  
> -static void do_reweight(struct work *work)
> +static void reweight_this_node(void)
>  {
> -	struct sd_req hdr;
> -	int ret;
> -
> -	sd_init_req(&hdr, SD_OP_UPDATE_SIZE);
> -	hdr.flags = SD_FLAG_CMD_WRITE;
> -	hdr.data_length = sizeof(sys->this_node);
> -
> -	ret = exec_local_req(&hdr, &sys->this_node);
> -	if (ret != SD_RES_SUCCESS)
> -		sd_eprintf("failed to update node size");
> -}
> -
> -static void reweight_done(struct work *work)
> -{
> -	free(work);
> -}
> -
> -static void reweight_node(void)
> -{
> -	struct work *rw = xzalloc(sizeof(*rw));
> -
> -	rw->fn = do_reweight;
> -	rw->done = reweight_done;
> -
> -	queue_work(sys->recovery_wqueue, rw);
> +	sys->cdrv->update_node(&sys->this_node);
>  }
>  
>  static bool node_size_varied(void)
> @@ -724,18 +700,7 @@ static int cluster_reweight(const struct sd_req *req, struct sd_rsp *rsp,
>  			    void *data)
>  {
>  	if (node_size_varied())
> -		reweight_node();
> -
> -	return SD_RES_SUCCESS;
> -}
> -
> -static int cluster_update_size(const struct sd_req *req, struct sd_rsp *rsp,
> -			       void *data)
> -{
> -	struct sd_node *node = (struct sd_node *)data;
> -
> -	update_node_size(node);
> -	kick_node_recover();
> +		reweight_this_node();

Calling sys->cdrv->update_node() directly looks simpler and easier to
read.

Thanks,

Kazutaka