Hi, Could you rebase this patchset onto the master branch? Sorry for my late review. checkpatch.pl says that this patchset contains some coding style problems. I hope you would fix them in the next patchset. :) Thanks, Kazutaka At Thu, 12 Apr 2012 10:37:58 +0800, yaohaiting.wujue at gmail.com wrote: > > From: HaiTing Yao <wujue.yht at taobao.com> > > Sometimes we need node can be back in a while. > > When we need this: > > 1, restart sheepdog daemon for ugrade or other purpose > > 2, the corosync driver lose its token for a short while > > How to implement this: > > Assume the node leave as transient failue and mark the node's status as > failed. Wait to the node come back for a short while. If the time is up > and the node has not been back, change the transient failure to > permanent one. > > Here are some changes in header file. > > Signed-off-by: HaiTing Yao <wujue.yht at taobao.com> > --- > include/sheep.h | 12 ++++++++++++ > sheep/cluster.h | 8 ++++++++ > sheep/sheep_priv.h | 10 ++++++++++ > 3 files changed, 30 insertions(+), 0 deletions(-) > > diff --git a/include/sheep.h b/include/sheep.h > index d010fdf..c4166f4 100644 > --- a/include/sheep.h > +++ b/include/sheep.h > @@ -65,6 +65,8 @@ > #define SD_RES_NOT_FORMATTED 0x43 /* Sheepdog is not formatted yet */ > #define SD_RES_INVALID_CTIME 0x44 /* Creation time of sheepdog is different */ > #define SD_RES_INVALID_EPOCH 0x45 /* Invalid epoch */ > +#define SD_RES_NODE_COME_BACK 0x46 /* node come back again after temprorary failure */ > +#define SD_RES_CLUSTER_TEMP_FAILURE 0x47 /* cluster has temporarily failed node */ > > #define SD_FLAG_NOHALT 0x0004 /* Serve the IO rquest even lack of nodes */ > > @@ -144,11 +146,17 @@ struct sd_node_rsp { > uint64_t store_free; > }; > > +enum node_vnode_status { > + NODE_STATUS_NORMAL, > + NODE_STATUS_FAIL, > +}; > + > struct sd_node { > uint8_t addr[16]; > uint16_t port; > uint16_t nr_vnodes; > uint32_t zone; > + uint32_t status; > }; > > struct sd_vnode { > @@ -157,6 +165,7 @@ struct sd_vnode { > uint16_t port; > uint16_t node_idx; > uint32_t zone; > + uint32_t status; > }; > > struct epoch_log { > @@ -293,6 +302,7 @@ static inline const char *sd_strerror(int err) > {SD_RES_NOT_FORMATTED, "Cluster has not been formatted"}, > {SD_RES_INVALID_CTIME, "Creation times differ"}, > {SD_RES_INVALID_EPOCH, "Invalid epoch"}, > + {SD_RES_CLUSTER_TEMP_FAILURE, "Nodes occur temporary failure"}, > }; > > for (i = 0; i < ARRAY_SIZE(errors); ++i) > @@ -366,4 +376,6 @@ static inline int nodes_to_vnodes(struct sd_node *nodes, int nr, > return nr_vnodes; > } > > +int temp_failure_enabled(void); > + > #endif > diff --git a/sheep/cluster.h b/sheep/cluster.h > index b50dbb2..58811e6 100644 > --- a/sheep/cluster.h > +++ b/sheep/cluster.h > @@ -29,6 +29,7 @@ enum cluster_join_result { > CJ_RES_MASTER_TRANSFER, /* Transfer mastership. The joining > * node has a newer epoch, so this node > * will leave the cluster (restart later). */ > + CJ_RES_COME_BACK, /* node come back after reboot */ > }; > > struct cdrv_handlers { > @@ -114,6 +115,13 @@ struct cluster_driver { > */ > int (*dispatch)(void); > > + /* > + * remove one useless node > + * > + * Returns zero on success, -1 on error > + */ > + int (*remove_node)(void *node); > + > struct list_head list; > }; > > diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h > index a9e8440..e18c696 100644 > --- a/sheep/sheep_priv.h > +++ b/sheep/sheep_priv.h > @@ -49,6 +49,14 @@ struct cpg_event { > struct list_head cpg_event_list; > }; > > +#define SD_MAX_NODES_TEMP_FAIL (SD_MAX_NODES - 1) > + > +struct temp_failure { > + int busy; > + struct sd_node node; > + struct timer leave_timer; > +}; > + > struct client_info { > struct connection conn; > > @@ -150,6 +158,8 @@ struct cluster_info { > int use_directio; > uint8_t sync_flush; > > + uint32_t templeft_time; > + > struct work_queue *cpg_wqueue; > struct work_queue *gateway_wqueue; > struct work_queue *io_wqueue; > -- > 1.7.1 > > -- > sheepdog mailing list > sheepdog at lists.wpkg.org > http://lists.wpkg.org/mailman/listinfo/sheepdog |