At Sun, 16 Oct 2011 18:35:11 +0800, Liu Yuan wrote: > > From: Liu Yuan <tailai.ly at taobao.com> > > And add a helper func to do the sanity check for cluster status. > > Signed-off-by: Liu Yuan <tailai.ly at taobao.com> > --- > sheep/group.c | 138 +++++++++++++++++++++++++++++---------------------------- > 1 files changed, 70 insertions(+), 68 deletions(-) > > diff --git a/sheep/group.c b/sheep/group.c > index eb64207..a054c22 100644 > --- a/sheep/group.c > +++ b/sheep/group.c > @@ -526,101 +526,97 @@ err: > return ret; > } > > +static int cluster_sanity_check(struct sheepdog_node_list_entry *entries, > + int nr_entries, uint64_t ctime, uint32_t epoch) > +{ > + int ret = SD_RES_SUCCESS, nr_local_entries; > + struct sheepdog_node_list_entry local_entries[SD_MAX_NODES]; > + uint32_t lepoch; > + > + if (sys->status == SD_STATUS_WAIT_FOR_FORMAT || > + sys->status == SD_STATUS_SHUTDOWN || > + goto out; Syntax error. > + /* When the joinning node is newly created, we need to check nothing. */ > + if (nr_entries == 0) > + goto out; > + > + if (ctime != get_cluster_ctime()) { > + ret = SD_RES_INVALID_CTIME; > + goto out; > + } > + > + lepoch = get_latest_epoch(); > + if (epoch > lepoch) { > + ret = SD_RES_OLD_NODE_VER; > + goto out; > + } > + > + if (sys->status == SD_STATUS_OK) > + goto out; > + > + if (epoch < lepoch) { > + ret = SD_RES_NEW_NODE_VER; > + goto out; > + } > + > + nr_local_entries = epoch_log_read(epoch, (char *)local_entries, > + sizeof(local_entries)); > + nr_local_entries /= sizeof(local_entries[0]); > + > + if (nr_entries != nr_local_entries || > + memcmp(entries, local_entries, sizeof(entries[0]) * nr_entries) != 0) { > + ret = SD_RES_INVALID_EPOCH; > + goto out; > + } > + > +out: > + return ret; > +} > + > static int get_cluster_status(struct sheepdog_node_list_entry *from, > struct sheepdog_node_list_entry *entries, > int nr_entries, uint64_t ctime, uint32_t epoch, > uint32_t *status, uint8_t *inc_epoch) > { > - int i; > - int nr_local_entries, nr_leave_entries; > + int i, ret = SD_RES_SUCCESS; > + int nr, nr_local_entries, nr_leave_entries; > struct sheepdog_node_list_entry local_entries[SD_MAX_NODES]; > struct node *node; > - uint32_t local_epoch; > char str[256]; > > *status = sys->status; > if (inc_epoch) > *inc_epoch = 0; > > + ret = cluster_sanity_check(entries, nr_entries, ctime, epoch); > + if (ret) > + goto out; > + > switch (sys->status) { > case SD_STATUS_OK: > if (inc_epoch) > *inc_epoch = 1; > - > - if (nr_entries == 0) > - break; > - > - if (ctime != get_cluster_ctime()) { > - eprintf("joining node has invalid ctime, %s\n", > - addr_to_str(str, sizeof(str), from->addr, from->port)); > - return SD_RES_INVALID_CTIME; > - } > - > - local_epoch = get_latest_epoch(); > - if (epoch > local_epoch) { > - eprintf("sheepdog is running with older epoch, %"PRIu32" %"PRIu32" %s\n", > - epoch, local_epoch, > - addr_to_str(str, sizeof(str), from->addr, from->port)); > - return SD_RES_OLD_NODE_VER; > - } > break; > case SD_STATUS_WAIT_FOR_FORMAT: > - if (nr_entries != 0) { > - eprintf("joining node is not clean, %s\n", > - addr_to_str(str, sizeof(str), from->addr, from->port)); > - return SD_RES_NOT_FORMATTED; > - } > + if (nr_entries != 0) > + ret = SD_RES_NOT_FORMATTED; > break; > case SD_STATUS_WAIT_FOR_JOIN: > - if (ctime != get_cluster_ctime()) { > - eprintf("joining node has invalid ctime, %s\n", > - addr_to_str(str, sizeof(str), from->addr, from->port)); > - return SD_RES_INVALID_CTIME; > - } > - > - local_epoch = get_latest_epoch(); > - if (epoch > local_epoch) { > - eprintf("sheepdog is waiting with older epoch, %"PRIu32" %"PRIu32" %s\n", > - epoch, local_epoch, > - addr_to_str(str, sizeof(str), from->addr, from->port)); > - return SD_RES_OLD_NODE_VER; > - } else if (epoch < local_epoch) { > - eprintf("sheepdog is waiting with newer epoch, %"PRIu32" %"PRIu32" %s\n", > - epoch, local_epoch, > - addr_to_str(str, sizeof(str), from->addr, from->port)); > - return SD_RES_NEW_NODE_VER; > - } > - > + nr = get_nodes_nr_from(&sys->sd_node_list) + 1; > nr_local_entries = epoch_log_read(epoch, (char *)local_entries, > sizeof(local_entries)); > - nr_local_entries /= sizeof(local_entries[0]); We can't remove this line because epoch_log_read() returns the number of bytes read. Perhaps, should we change epoch_log_read() to return the number of nodes? Thanks, Kazutaka |