[Sheepdog] [PATCH v2 1/5] sheep: refactor get_cluster_status()
Liu Yuan
namei.unix at gmail.com
Mon Oct 17 07:49:30 CEST 2011
On 10/17/2011 01:27 PM, MORITA Kazutaka wrote:
> At Sun, 16 Oct 2011 18:35:11 +0800,
> Liu Yuan wrote:
>>
>> From: Liu Yuan <tailai.ly at taobao.com>
>>
>> And add a helper func to do the sanity check for cluster status.
>>
>> Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
>> ---
>> sheep/group.c | 138 +++++++++++++++++++++++++++++----------------------------
>> 1 files changed, 70 insertions(+), 68 deletions(-)
>>
>> diff --git a/sheep/group.c b/sheep/group.c
>> index eb64207..a054c22 100644
>> --- a/sheep/group.c
>> +++ b/sheep/group.c
>> @@ -526,101 +526,97 @@ err:
>> return ret;
>> }
>>
>> +static int cluster_sanity_check(struct sheepdog_node_list_entry *entries,
>> + int nr_entries, uint64_t ctime, uint32_t epoch)
>> +{
>> + int ret = SD_RES_SUCCESS, nr_local_entries;
>> + struct sheepdog_node_list_entry local_entries[SD_MAX_NODES];
>> + uint32_t lepoch;
>> +
>> + if (sys->status == SD_STATUS_WAIT_FOR_FORMAT ||
>> + sys->status == SD_STATUS_SHUTDOWN ||
>> + goto out;
>
> Syntax error.
>
>> + /* When the joinning node is newly created, we need to check nothing. */
>> + if (nr_entries == 0)
>> + goto out;
>> +
>> + if (ctime != get_cluster_ctime()) {
>> + ret = SD_RES_INVALID_CTIME;
>> + goto out;
>> + }
>> +
>> + lepoch = get_latest_epoch();
>> + if (epoch > lepoch) {
>> + ret = SD_RES_OLD_NODE_VER;
>> + goto out;
>> + }
>> +
>> + if (sys->status == SD_STATUS_OK)
>> + goto out;
>> +
>> + if (epoch < lepoch) {
>> + ret = SD_RES_NEW_NODE_VER;
>> + goto out;
>> + }
>> +
>> + nr_local_entries = epoch_log_read(epoch, (char *)local_entries,
>> + sizeof(local_entries));
>> + nr_local_entries /= sizeof(local_entries[0]);
>> +
>> + if (nr_entries != nr_local_entries ||
>> + memcmp(entries, local_entries, sizeof(entries[0]) * nr_entries) != 0) {
>> + ret = SD_RES_INVALID_EPOCH;
>> + goto out;
>> + }
>> +
>> +out:
>> + return ret;
>> +}
>> +
>> static int get_cluster_status(struct sheepdog_node_list_entry *from,
>> struct sheepdog_node_list_entry *entries,
>> int nr_entries, uint64_t ctime, uint32_t epoch,
>> uint32_t *status, uint8_t *inc_epoch)
>> {
>> - int i;
>> - int nr_local_entries, nr_leave_entries;
>> + int i, ret = SD_RES_SUCCESS;
>> + int nr, nr_local_entries, nr_leave_entries;
>> struct sheepdog_node_list_entry local_entries[SD_MAX_NODES];
>> struct node *node;
>> - uint32_t local_epoch;
>> char str[256];
>>
>> *status = sys->status;
>> if (inc_epoch)
>> *inc_epoch = 0;
>>
>> + ret = cluster_sanity_check(entries, nr_entries, ctime, epoch);
>> + if (ret)
>> + goto out;
>> +
>> switch (sys->status) {
>> case SD_STATUS_OK:
>> if (inc_epoch)
>> *inc_epoch = 1;
>> -
>> - if (nr_entries == 0)
>> - break;
>> -
>> - if (ctime != get_cluster_ctime()) {
>> - eprintf("joining node has invalid ctime, %s\n",
>> - addr_to_str(str, sizeof(str), from->addr, from->port));
>> - return SD_RES_INVALID_CTIME;
>> - }
>> -
>> - local_epoch = get_latest_epoch();
>> - if (epoch > local_epoch) {
>> - eprintf("sheepdog is running with older epoch, %"PRIu32" %"PRIu32" %s\n",
>> - epoch, local_epoch,
>> - addr_to_str(str, sizeof(str), from->addr, from->port));
>> - return SD_RES_OLD_NODE_VER;
>> - }
>> break;
>> case SD_STATUS_WAIT_FOR_FORMAT:
>> - if (nr_entries != 0) {
>> - eprintf("joining node is not clean, %s\n",
>> - addr_to_str(str, sizeof(str), from->addr, from->port));
>> - return SD_RES_NOT_FORMATTED;
>> - }
>> + if (nr_entries != 0)
>> + ret = SD_RES_NOT_FORMATTED;
>> break;
>> case SD_STATUS_WAIT_FOR_JOIN:
>> - if (ctime != get_cluster_ctime()) {
>> - eprintf("joining node has invalid ctime, %s\n",
>> - addr_to_str(str, sizeof(str), from->addr, from->port));
>> - return SD_RES_INVALID_CTIME;
>> - }
>> -
>> - local_epoch = get_latest_epoch();
>> - if (epoch > local_epoch) {
>> - eprintf("sheepdog is waiting with older epoch, %"PRIu32" %"PRIu32" %s\n",
>> - epoch, local_epoch,
>> - addr_to_str(str, sizeof(str), from->addr, from->port));
>> - return SD_RES_OLD_NODE_VER;
>> - } else if (epoch < local_epoch) {
>> - eprintf("sheepdog is waiting with newer epoch, %"PRIu32" %"PRIu32" %s\n",
>> - epoch, local_epoch,
>> - addr_to_str(str, sizeof(str), from->addr, from->port));
>> - return SD_RES_NEW_NODE_VER;
>> - }
>> -
>> + nr = get_nodes_nr_from(&sys->sd_node_list) + 1;
>> nr_local_entries = epoch_log_read(epoch, (char *)local_entries,
>> sizeof(local_entries));
>> - nr_local_entries /= sizeof(local_entries[0]);
>
> We can't remove this line because epoch_log_read() returns the number
> of bytes read. Perhaps, should we change epoch_log_read() to return
> the number of nodes?
>
Hmm, it was removed accidentally by me. So this proves it quite
error-prone. So yeah, I think so, return the number of nodes. I am going
to prepare it in this patch series.
Thanks,
Yuan
More information about the sheepdog
mailing list