<br><br>
<div class="gmail_quote">On Thu, Apr 26, 2012 at 1:20 AM, MORITA Kazutaka <span dir="ltr"><<a href="mailto:morita.kazutaka@lab.ntt.co.jp" target="_blank">morita.kazutaka@lab.ntt.co.jp</a>></span> wrote:<br>
<blockquote style="BORDER-LEFT:#ccc 1px solid;MARGIN:0px 0px 0px 0.8ex;PADDING-LEFT:1ex" class="gmail_quote">Hi,<br><br>Could you rebase this patchset onto the master branch? Sorry for my<br>late review.<br><br><a href="http://checkpatch.pl/" target="_blank">checkpatch.pl</a> says that this patchset contains some coding style<br>
problems. I hope you would fix them in the next patchset. :)<br><br>Thanks,<br><br>Kazutaka<br></blockquote>
<div> </div>
<div>Sorry for the delay. I have rebased the codes and submitted the new patch.</div>
<div> </div>
<div>I used <a href="http://checkpatch.pl">checkpatch.pl</a> to check my patch and fixed the bugs this time</div>
<div> </div>
<div>Thanks</div>
<div>Haiti</div>
<div> </div>
<blockquote style="BORDER-LEFT:#ccc 1px solid;MARGIN:0px 0px 0px 0.8ex;PADDING-LEFT:1ex" class="gmail_quote"><br><br>At Thu, 12 Apr 2012 10:37:58 +0800,<br>
<div class="HOEnZb">
<div class="h5"><a href="mailto:yaohaiting.wujue@gmail.com">yaohaiting.wujue@gmail.com</a> wrote:<br>><br>> From: HaiTing Yao <<a href="mailto:wujue.yht@taobao.com">wujue.yht@taobao.com</a>><br>><br>> Sometimes we need node can be back in a while.<br>
><br>> When we need this:<br>><br>> 1, restart sheepdog daemon for ugrade or other purpose<br>><br>> 2, the corosync driver lose its token for a short while<br>><br>> How to implement this:<br>><br>
> Assume the node leave as transient failue and mark the node's status as<br>> failed. Wait to the node come back for a short while. If the time is up<br>> and the node has not been back, change the transient failure to<br>
> permanent one.<br>><br>> Here are some changes in header file.<br>><br>> Signed-off-by: HaiTing Yao <<a href="mailto:wujue.yht@taobao.com">wujue.yht@taobao.com</a>><br>> ---<br>> include/sheep.h | 12 ++++++++++++<br>
> sheep/cluster.h | 8 ++++++++<br>> sheep/sheep_priv.h | 10 ++++++++++<br>> 3 files changed, 30 insertions(+), 0 deletions(-)<br>><br>> diff --git a/include/sheep.h b/include/sheep.h<br>> index d010fdf..c4166f4 100644<br>
> --- a/include/sheep.h<br>> +++ b/include/sheep.h<br>> @@ -65,6 +65,8 @@<br>> #define SD_RES_NOT_FORMATTED 0x43 /* Sheepdog is not formatted yet */<br>> #define SD_RES_INVALID_CTIME 0x44 /* Creation time of sheepdog is different */<br>
> #define SD_RES_INVALID_EPOCH 0x45 /* Invalid epoch */<br>> +#define SD_RES_NODE_COME_BACK 0x46 /* node come back again after temprorary failure */<br>> +#define SD_RES_CLUSTER_TEMP_FAILURE 0x47 /* cluster has temporarily failed node */<br>
><br>> #define SD_FLAG_NOHALT 0x0004 /* Serve the IO rquest even lack of nodes */<br>><br>> @@ -144,11 +146,17 @@ struct sd_node_rsp {<br>> uint64_t store_free;<br>> };<br>><br>> +enum node_vnode_status {<br>
> + NODE_STATUS_NORMAL,<br>> + NODE_STATUS_FAIL,<br>> +};<br>> +<br>> struct sd_node {<br>> uint8_t addr[16];<br>> uint16_t port;<br>> uint16_t nr_vnodes;<br>
> uint32_t zone;<br>> + uint32_t status;<br>> };<br>><br>> struct sd_vnode {<br>> @@ -157,6 +165,7 @@ struct sd_vnode {<br>> uint16_t port;<br>> uint16_t node_idx;<br>
> uint32_t zone;<br>> + uint32_t status;<br>> };<br>><br>> struct epoch_log {<br>> @@ -293,6 +302,7 @@ static inline const char *sd_strerror(int err)<br>> {SD_RES_NOT_FORMATTED, "Cluster has not been formatted"},<br>
> {SD_RES_INVALID_CTIME, "Creation times differ"},<br>> {SD_RES_INVALID_EPOCH, "Invalid epoch"},<br>> + {SD_RES_CLUSTER_TEMP_FAILURE, "Nodes occur temporary failure"},<br>
> };<br>><br>> for (i = 0; i < ARRAY_SIZE(errors); ++i)<br>> @@ -366,4 +376,6 @@ static inline int nodes_to_vnodes(struct sd_node *nodes, int nr,<br>> return nr_vnodes;<br>> }<br>><br>
> +int temp_failure_enabled(void);<br>> +<br>> #endif<br>> diff --git a/sheep/cluster.h b/sheep/cluster.h<br>> index b50dbb2..58811e6 100644<br>> --- a/sheep/cluster.h<br>> +++ b/sheep/cluster.h<br>> @@ -29,6 +29,7 @@ enum cluster_join_result {<br>
> CJ_RES_MASTER_TRANSFER, /* Transfer mastership. The joining<br>> * node has a newer epoch, so this node<br>> * will leave the cluster (restart later). */<br>
> + CJ_RES_COME_BACK, /* node come back after reboot */<br>> };<br>><br>> struct cdrv_handlers {<br>> @@ -114,6 +115,13 @@ struct cluster_driver {<br>> */<br>> int (*dispatch)(void);<br>
><br>> + /*<br>> + * remove one useless node<br>> + *<br>> + * Returns zero on success, -1 on error<br>> + */<br>> + int (*remove_node)(void *node);<br>> +<br>> struct list_head list;<br>
> };<br>><br>> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h<br>> index a9e8440..e18c696 100644<br>> --- a/sheep/sheep_priv.h<br>> +++ b/sheep/sheep_priv.h<br>> @@ -49,6 +49,14 @@ struct cpg_event {<br>
> struct list_head cpg_event_list;<br>> };<br>><br>> +#define SD_MAX_NODES_TEMP_FAIL (SD_MAX_NODES - 1)<br>> +<br>> +struct temp_failure {<br>> + int busy;<br>> + struct sd_node node;<br>
> + struct timer leave_timer;<br>> +};<br>> +<br>> struct client_info {<br>> struct connection conn;<br>><br>> @@ -150,6 +158,8 @@ struct cluster_info {<br>> int use_directio;<br>> uint8_t sync_flush;<br>
><br>> + uint32_t templeft_time;<br>> +<br>> struct work_queue *cpg_wqueue;<br>> struct work_queue *gateway_wqueue;<br>> struct work_queue *io_wqueue;<br>> --<br>> 1.7.1<br>><br>
</div></div><span class="HOEnZb"><font color="#888888">> --<br>> sheepdog mailing list<br>> <a href="mailto:sheepdog@lists.wpkg.org">sheepdog@lists.wpkg.org</a><br>> <a href="http://lists.wpkg.org/mailman/listinfo/sheepdog" target="_blank">http://lists.wpkg.org/mailman/listinfo/sheepdog</a><br>
</font></span></blockquote></div><br>