[Sheepdog] [PATCH v3 1/7] sheep: add transient failure detection

MORITA Kazutaka morita.kazutaka at gmail.com
Wed May 2 21:37:36 CEST 2012


At Wed,  2 May 2012 15:12:49 +0800,
yaohaiting.wujue at gmail.com wrote:
> 
> From: HaiTing Yao <wujue.yht at taobao.com>
> 
> Sometimes we need node can be back in a while.
> 
> When we need this:
> 
> 1, restart sheepdog daemon for ugrade or other purpose
> 
> 2, the corosync driver lose its token for a short while

This is a corosync specific problem, and should be handled by changing
parameters in corosync.conf, I think.

So I think the main benefit of this patchset is to allow us to restart
sheep daemons without changing node membership, but what's the reason
you want to avoid temporal membership changes?  Sheepdog blocks write
I/Os when it cannot create full replicas, so basically we should
remove the failed nodes from node membership ASAP.

Thanks,

Kazutaka

> 
> How to implement this:
> 
> Assume the node leave as transient failue and mark the node's status as
> failed. Wait to the node come back for a short while. If the time is up
> and the node has not been back, change the transient failure to
> permanent one.
> 
> Here are some changes in header file.
> 
> Signed-off-by: HaiTing Yao <wujue.yht at taobao.com>
> ---
>  include/sheep.h    |   14 ++++++++++++++
>  sheep/cluster.h    |    8 ++++++++
>  sheep/sheep_priv.h |   10 ++++++++++
>  3 files changed, 32 insertions(+), 0 deletions(-)
> 
> diff --git a/include/sheep.h b/include/sheep.h
> index 7e287c4..b5a5d5d 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -66,6 +66,10 @@
>  #define SD_RES_NOT_FORMATTED 0x43 /* Sheepdog is not formatted yet */
>  #define SD_RES_INVALID_CTIME 0x44 /* Creation time of sheepdog is different */
>  #define SD_RES_INVALID_EPOCH 0x45 /* Invalid epoch */
> +/* node come back again after temprorary failure */
> +#define SD_RES_NODE_COME_BACK 0x46
> +/* cluster has temporarily failed node */
> +#define SD_RES_CLUSTER_TEMP_FAILURE 0x47
>  
>  #define SD_FLAG_NOHALT       0x0004 /* Serve the IO rquest even lack of nodes */
>  
> @@ -145,11 +149,17 @@ struct sd_node_rsp {
>  	uint64_t	store_free;
>  };
>  
> +enum node_vnode_status {
> +	NODE_STATUS_NORMAL,
> +	NODE_STATUS_FAIL,
> +};
> +
>  struct sd_node {
>  	uint8_t         addr[16];
>  	uint16_t        port;
>  	uint16_t	nr_vnodes;
>  	uint32_t	zone;
> +	uint32_t	status;
>  };
>  
>  struct sd_vnode {
> @@ -158,6 +168,7 @@ struct sd_vnode {
>  	uint16_t        port;
>  	uint16_t	node_idx;
>  	uint32_t	zone;
> +	uint32_t	status;
>  };
>  
>  struct epoch_log {
> @@ -295,6 +306,7 @@ static inline const char *sd_strerror(int err)
>  		{SD_RES_NOT_FORMATTED, "Cluster has not been formatted"},
>  		{SD_RES_INVALID_CTIME, "Creation times differ"},
>  		{SD_RES_INVALID_EPOCH, "Invalid epoch"},
> +		{SD_RES_CLUSTER_TEMP_FAILURE, "Nodes occur temporary failure"},
>  	};
>  
>  	for (i = 0; i < ARRAY_SIZE(errors); ++i)
> @@ -368,4 +380,6 @@ static inline int nodes_to_vnodes(struct sd_node *nodes, int nr,
>  	return nr_vnodes;
>  }
>  
> +int temp_failure_enabled(void);
> +
>  #endif
> diff --git a/sheep/cluster.h b/sheep/cluster.h
> index d543e99..12d141f 100644
> --- a/sheep/cluster.h
> +++ b/sheep/cluster.h
> @@ -30,6 +30,7 @@ enum cluster_join_result {
>  	CJ_RES_MASTER_TRANSFER, /* Transfer mastership.  The joining
>  				 * node has a newer epoch, so this node
>  				 * will leave the cluster (restart later). */
> +	CJ_RES_COME_BACK, /* node come back after reboot */
>  };
>  
>  struct cluster_driver {
> @@ -99,6 +100,13 @@ struct cluster_driver {
>  	 */
>  	int (*dispatch)(void);
>  
> +	/*
> +	 * remove one useless node
> +	 *
> +	 * Returns zero on success, -1 on error
> +	 */
> +	int (*remove_node)(void *node);
> +
>  	struct list_head list;
>  };
>  
> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> index fd36ea4..83d2a9b 100644
> --- a/sheep/sheep_priv.h
> +++ b/sheep/sheep_priv.h
> @@ -49,6 +49,14 @@ struct event_struct {
>  	struct list_head event_list;
>  };
>  
> +#define SD_MAX_NODES_TEMP_FAIL (SD_MAX_NODES - 1)
> +
> +struct temp_failure {
> +	int busy;
> +	struct sd_node node;
> +	struct timer leave_timer;
> +};
> +
>  struct client_info {
>  	struct connection conn;
>  
> @@ -150,6 +158,8 @@ struct cluster_info {
>  	int use_directio;
>  	uint8_t async_flush;
>  
> +	uint32_t templeft_time;
> +
>  	struct work_queue *event_wqueue;
>  	struct work_queue *gateway_wqueue;
>  	struct work_queue *io_wqueue;
> -- 
> 1.7.1
> 
> -- 
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog



More information about the sheepdog mailing list