[Sheepdog] [PATCH v2 1/5] sheep: refactor get_cluster_status()

Liu Yuan namei.unix at gmail.com
Mon Oct 17 07:49:30 CEST 2011


On 10/17/2011 01:27 PM, MORITA Kazutaka wrote:

> At Sun, 16 Oct 2011 18:35:11 +0800,
> Liu Yuan wrote:
>>
>> From: Liu Yuan <tailai.ly at taobao.com>
>>
>> And add a helper func to do the sanity check for cluster status.
>>
>> Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
>> ---
>>  sheep/group.c |  138 +++++++++++++++++++++++++++++----------------------------
>>  1 files changed, 70 insertions(+), 68 deletions(-)
>>
>> diff --git a/sheep/group.c b/sheep/group.c
>> index eb64207..a054c22 100644
>> --- a/sheep/group.c
>> +++ b/sheep/group.c
>> @@ -526,101 +526,97 @@ err:
>>  	return ret;
>>  }
>>  
>> +static int cluster_sanity_check(struct sheepdog_node_list_entry *entries,
>> +			     int nr_entries, uint64_t ctime, uint32_t epoch)
>> +{
>> +	int ret = SD_RES_SUCCESS, nr_local_entries;
>> +	struct sheepdog_node_list_entry local_entries[SD_MAX_NODES];
>> +	uint32_t lepoch;
>> +
>> +	if (sys->status == SD_STATUS_WAIT_FOR_FORMAT ||
>> +	    sys->status == SD_STATUS_SHUTDOWN ||
>> +		goto out;
> 
> Syntax error.
> 
>> +	/* When the joinning node is newly created, we need to check nothing. */
>> +	if (nr_entries == 0)
>> +		goto out;
>> +
>> +	if (ctime != get_cluster_ctime()) {
>> +		ret = SD_RES_INVALID_CTIME;
>> +		goto out;
>> +	}
>> +
>> +	lepoch = get_latest_epoch();
>> +	if (epoch > lepoch) {
>> +		ret = SD_RES_OLD_NODE_VER;
>> +		goto out;
>> +	}
>> +
>> +	if (sys->status == SD_STATUS_OK)
>> +		goto out;
>> +
>> +	if (epoch < lepoch) {
>> +		ret = SD_RES_NEW_NODE_VER;
>> +		goto out;
>> +	}
>> +
>> +	nr_local_entries = epoch_log_read(epoch, (char *)local_entries,
>> +			sizeof(local_entries));
>> +	nr_local_entries /= sizeof(local_entries[0]);
>> +
>> +	if (nr_entries != nr_local_entries ||
>> +	    memcmp(entries, local_entries, sizeof(entries[0]) * nr_entries) != 0) {
>> +		ret = SD_RES_INVALID_EPOCH;
>> +		goto out;
>> +	}
>> +
>> +out:
>> +	return ret;
>> +}
>> +
>>  static int get_cluster_status(struct sheepdog_node_list_entry *from,
>>  			      struct sheepdog_node_list_entry *entries,
>>  			      int nr_entries, uint64_t ctime, uint32_t epoch,
>>  			      uint32_t *status, uint8_t *inc_epoch)
>>  {
>> -	int i;
>> -	int nr_local_entries, nr_leave_entries;
>> +	int i, ret = SD_RES_SUCCESS;
>> +	int nr, nr_local_entries, nr_leave_entries;
>>  	struct sheepdog_node_list_entry local_entries[SD_MAX_NODES];
>>  	struct node *node;
>> -	uint32_t local_epoch;
>>  	char str[256];
>>  
>>  	*status = sys->status;
>>  	if (inc_epoch)
>>  		*inc_epoch = 0;
>>  
>> +	ret = cluster_sanity_check(entries, nr_entries, ctime, epoch);
>> +	if (ret)
>> +		goto out;
>> +
>>  	switch (sys->status) {
>>  	case SD_STATUS_OK:
>>  		if (inc_epoch)
>>  			*inc_epoch = 1;
>> -
>> -		if (nr_entries == 0)
>> -			break;
>> -
>> -		if (ctime != get_cluster_ctime()) {
>> -			eprintf("joining node has invalid ctime, %s\n",
>> -				addr_to_str(str, sizeof(str), from->addr, from->port));
>> -			return SD_RES_INVALID_CTIME;
>> -		}
>> -
>> -		local_epoch = get_latest_epoch();
>> -		if (epoch > local_epoch) {
>> -			eprintf("sheepdog is running with older epoch, %"PRIu32" %"PRIu32" %s\n",
>> -				epoch, local_epoch,
>> -				addr_to_str(str, sizeof(str), from->addr, from->port));
>> -			return SD_RES_OLD_NODE_VER;
>> -		}
>>  		break;
>>  	case SD_STATUS_WAIT_FOR_FORMAT:
>> -		if (nr_entries != 0) {
>> -			eprintf("joining node is not clean, %s\n",
>> -				addr_to_str(str, sizeof(str), from->addr, from->port));
>> -			return SD_RES_NOT_FORMATTED;
>> -		}
>> +		if (nr_entries != 0)
>> +			ret = SD_RES_NOT_FORMATTED;
>>  		break;
>>  	case SD_STATUS_WAIT_FOR_JOIN:
>> -		if (ctime != get_cluster_ctime()) {
>> -			eprintf("joining node has invalid ctime, %s\n",
>> -				addr_to_str(str, sizeof(str), from->addr, from->port));
>> -			return SD_RES_INVALID_CTIME;
>> -		}
>> -
>> -		local_epoch = get_latest_epoch();
>> -		if (epoch > local_epoch) {
>> -			eprintf("sheepdog is waiting with older epoch, %"PRIu32" %"PRIu32" %s\n",
>> -				epoch, local_epoch,
>> -				addr_to_str(str, sizeof(str), from->addr, from->port));
>> -			return SD_RES_OLD_NODE_VER;
>> -		} else if (epoch < local_epoch) {
>> -			eprintf("sheepdog is waiting with newer epoch, %"PRIu32" %"PRIu32" %s\n",
>> -				epoch, local_epoch,
>> -				addr_to_str(str, sizeof(str), from->addr, from->port));
>> -			return SD_RES_NEW_NODE_VER;
>> -		}
>> -
>> +		nr = get_nodes_nr_from(&sys->sd_node_list) + 1;
>>  		nr_local_entries = epoch_log_read(epoch, (char *)local_entries,
>>  						  sizeof(local_entries));
>> -		nr_local_entries /= sizeof(local_entries[0]);
> 
> We can't remove this line because epoch_log_read() returns the number
> of bytes read.  Perhaps, should we change epoch_log_read() to return
> the number of nodes?
> 


Hmm, it was removed accidentally by me. So this proves it quite
error-prone. So yeah, I think so, return the number of nodes. I am going
to prepare it in this patch series.

Thanks,
Yuan



More information about the sheepdog mailing list