[Sheepdog] [PATCH v2 1/6] sheep: add transient failure detection
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Wed Apr 25 19:20:25 CEST 2012
Hi,
Could you rebase this patchset onto the master branch? Sorry for my
late review.
checkpatch.pl says that this patchset contains some coding style
problems. I hope you would fix them in the next patchset. :)
Thanks,
Kazutaka
At Thu, 12 Apr 2012 10:37:58 +0800,
yaohaiting.wujue at gmail.com wrote:
>
> From: HaiTing Yao <wujue.yht at taobao.com>
>
> Sometimes we need node can be back in a while.
>
> When we need this:
>
> 1, restart sheepdog daemon for ugrade or other purpose
>
> 2, the corosync driver lose its token for a short while
>
> How to implement this:
>
> Assume the node leave as transient failue and mark the node's status as
> failed. Wait to the node come back for a short while. If the time is up
> and the node has not been back, change the transient failure to
> permanent one.
>
> Here are some changes in header file.
>
> Signed-off-by: HaiTing Yao <wujue.yht at taobao.com>
> ---
> include/sheep.h | 12 ++++++++++++
> sheep/cluster.h | 8 ++++++++
> sheep/sheep_priv.h | 10 ++++++++++
> 3 files changed, 30 insertions(+), 0 deletions(-)
>
> diff --git a/include/sheep.h b/include/sheep.h
> index d010fdf..c4166f4 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -65,6 +65,8 @@
> #define SD_RES_NOT_FORMATTED 0x43 /* Sheepdog is not formatted yet */
> #define SD_RES_INVALID_CTIME 0x44 /* Creation time of sheepdog is different */
> #define SD_RES_INVALID_EPOCH 0x45 /* Invalid epoch */
> +#define SD_RES_NODE_COME_BACK 0x46 /* node come back again after temprorary failure */
> +#define SD_RES_CLUSTER_TEMP_FAILURE 0x47 /* cluster has temporarily failed node */
>
> #define SD_FLAG_NOHALT 0x0004 /* Serve the IO rquest even lack of nodes */
>
> @@ -144,11 +146,17 @@ struct sd_node_rsp {
> uint64_t store_free;
> };
>
> +enum node_vnode_status {
> + NODE_STATUS_NORMAL,
> + NODE_STATUS_FAIL,
> +};
> +
> struct sd_node {
> uint8_t addr[16];
> uint16_t port;
> uint16_t nr_vnodes;
> uint32_t zone;
> + uint32_t status;
> };
>
> struct sd_vnode {
> @@ -157,6 +165,7 @@ struct sd_vnode {
> uint16_t port;
> uint16_t node_idx;
> uint32_t zone;
> + uint32_t status;
> };
>
> struct epoch_log {
> @@ -293,6 +302,7 @@ static inline const char *sd_strerror(int err)
> {SD_RES_NOT_FORMATTED, "Cluster has not been formatted"},
> {SD_RES_INVALID_CTIME, "Creation times differ"},
> {SD_RES_INVALID_EPOCH, "Invalid epoch"},
> + {SD_RES_CLUSTER_TEMP_FAILURE, "Nodes occur temporary failure"},
> };
>
> for (i = 0; i < ARRAY_SIZE(errors); ++i)
> @@ -366,4 +376,6 @@ static inline int nodes_to_vnodes(struct sd_node *nodes, int nr,
> return nr_vnodes;
> }
>
> +int temp_failure_enabled(void);
> +
> #endif
> diff --git a/sheep/cluster.h b/sheep/cluster.h
> index b50dbb2..58811e6 100644
> --- a/sheep/cluster.h
> +++ b/sheep/cluster.h
> @@ -29,6 +29,7 @@ enum cluster_join_result {
> CJ_RES_MASTER_TRANSFER, /* Transfer mastership. The joining
> * node has a newer epoch, so this node
> * will leave the cluster (restart later). */
> + CJ_RES_COME_BACK, /* node come back after reboot */
> };
>
> struct cdrv_handlers {
> @@ -114,6 +115,13 @@ struct cluster_driver {
> */
> int (*dispatch)(void);
>
> + /*
> + * remove one useless node
> + *
> + * Returns zero on success, -1 on error
> + */
> + int (*remove_node)(void *node);
> +
> struct list_head list;
> };
>
> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> index a9e8440..e18c696 100644
> --- a/sheep/sheep_priv.h
> +++ b/sheep/sheep_priv.h
> @@ -49,6 +49,14 @@ struct cpg_event {
> struct list_head cpg_event_list;
> };
>
> +#define SD_MAX_NODES_TEMP_FAIL (SD_MAX_NODES - 1)
> +
> +struct temp_failure {
> + int busy;
> + struct sd_node node;
> + struct timer leave_timer;
> +};
> +
> struct client_info {
> struct connection conn;
>
> @@ -150,6 +158,8 @@ struct cluster_info {
> int use_directio;
> uint8_t sync_flush;
>
> + uint32_t templeft_time;
> +
> struct work_queue *cpg_wqueue;
> struct work_queue *gateway_wqueue;
> struct work_queue *io_wqueue;
> --
> 1.7.1
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog
More information about the sheepdog
mailing list