[Sheepdog] [PATCH v2 2/3] sheep: timestamp the epoch log

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Wed Oct 26 09:12:18 CEST 2011


At Wed, 26 Oct 2011 14:36:16 +0900,
MORITA Kazutaka wrote:
> 
> At Wed, 26 Oct 2011 11:35:30 +0800,
> Liu Yuan wrote:
> > 
> > From: Liu Yuan <tailai.ly at taobao.com>
> > 
> > Timestamped epoch is supposed to help users get more information about cluster.
> > 
> > Currently, sheepdog just write epoch log without timestamp. This patch stores
> > timestamp at the end of the epoch log file. These timestamps will be seen by
> > collie cluster info command as follows:
> > 
> > Cluster status: running
> > 
> > Cluster created at Wed Oct 26 10:32:44 2011
> > 
> > Epoch Time           Version
> > 2011-10-26 11:09:38      5 [192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7004]
> > 2011-10-26 11:09:32      4 [192.168.0.1:7001, 192.168.0.1:7002]
> > 2011-10-26 10:38:14      3 [192.168.0.1:7002]
> > 2011-10-26 10:38:13      2 [192.168.0.1:7001, 192.168.0.1:7002]
> > 2011-10-26 10:32:44      1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002]
> > 
> > Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
> > ---
> >  collie/cluster.c   |   11 ++++++---
> >  include/sheep.h    |    1 +
> >  sheep/ops.c        |    1 +
> >  sheep/sheep_priv.h |    1 -
> >  sheep/store.c      |   58 ++++++++++++++++++++++++++-------------------------
> >  5 files changed, 39 insertions(+), 33 deletions(-)
> > 
> > diff --git a/collie/cluster.c b/collie/cluster.c
> > index 2f0e66f..162eb8a 100644
> > --- a/collie/cluster.c
> > +++ b/collie/cluster.c
> > @@ -77,7 +77,7 @@ static int cluster_info(int argc, char **argv)
> >  	unsigned rlen, wlen;
> >  	struct epoch_log logs[8];
> >  	int nr_logs;
> > -	time_t ti;
> > +	time_t ti, ct;
> >  	struct tm tm;
> >  	char time_str[128];
> >  
> > @@ -106,8 +106,11 @@ static int cluster_info(int argc, char **argv)
> >  	else
> >  		printf("%s\n", sd_strerror(rsp->result));
> >  
> > -	if (!raw_output)
> > -		printf("\nCreation time        Epoch Nodes\n");
> > +	if (!raw_output) {
> > +		ct = logs[0].ctime >> 32;
> > +		printf("\nCluster created at %s\n", ctime(&ct));
> > +		printf("Epoch Time           Version\n");
> > +	}
> >  
> >  	nr_logs = rsp->data_length / sizeof(struct epoch_log);
> >  	for (i = 0; i < nr_logs; i++) {
> > @@ -115,7 +118,7 @@ static int cluster_info(int argc, char **argv)
> >  		char name[128];
> >  		struct sheepdog_node_list_entry *entry;
> >  
> > -		ti = logs[i].ctime >> 32;
> > +		ti = logs[i].time;
> >  		if (raw_output) {
> >  			snprintf(time_str, sizeof(time_str), "%" PRIu64, (uint64_t) ti);
> >  		} else {
> > diff --git a/include/sheep.h b/include/sheep.h
> > index 072ea7a..033bef5 100644
> > --- a/include/sheep.h
> > +++ b/include/sheep.h
> > @@ -147,6 +147,7 @@ struct sheepdog_vnode_list_entry {
> >  
> >  struct epoch_log {
> >  	uint64_t ctime;
> > +	time_t time;
> >  	uint32_t epoch;
> >  	uint32_t nr_nodes;
> >  	struct sheepdog_node_list_entry nodes[SD_MAX_NODES];

This is not 64 bit aligned.


> > diff --git a/sheep/ops.c b/sheep/ops.c
> > index 743eb0f..e2d7fb1 100644
> > --- a/sheep/ops.c
> > +++ b/sheep/ops.c
> > @@ -279,6 +279,7 @@ static int local_stat_cluster(const struct sd_req *req, struct sd_rsp *rsp,
> >  
> >  		rsp->data_length += sizeof(*log);
> >  		log->nr_nodes /= sizeof(log->nodes[0]);
> > +		log->time = *(time_t *)(&log->nodes[log->nr_nodes]);

Looks a bit hacky to me.  In addition, this doesn't work if
sizeof(log->nodes[0]) < sizeof(time_t).


> >  		epoch--;
> >  	}
> >  
> > diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> > index 418b69f..16f5672 100644
> > --- a/sheep/sheep_priv.h
> > +++ b/sheep/sheep_priv.h
> > @@ -209,7 +209,6 @@ int get_cluster_flags(uint16_t *flags);
> >  #define NR_GW_WORKER_THREAD 4
> >  #define NR_IO_WORKER_THREAD 4
> >  
> > -int epoch_log_write(uint32_t epoch, char *buf, int len);
> >  int epoch_log_read(uint32_t epoch, char *buf, int len);
> >  int epoch_log_read_nr(uint32_t epoch, char *buf, int len);
> >  int epoch_log_read_remote(uint32_t epoch, char *buf, int len);
> > diff --git a/sheep/store.c b/sheep/store.c
> > index 4e66920..ebf514c 100644
> > --- a/sheep/store.c
> > +++ b/sheep/store.c
> > @@ -21,6 +21,7 @@
> >  #include <sys/types.h>
> >  #include <sys/stat.h>
> >  #include <fcntl.h>
> > +#include <time.h>
> >  
> >  #include "sheep_priv.h"
> >  
> > @@ -474,17 +475,38 @@ int update_epoch_store(uint32_t epoch)
> >  
> >  int update_epoch_log(int epoch)
> >  {
> > -        int ret;
> > +	int fd, ret, len;
> > +	time_t t;
> > +	char path[PATH_MAX];
> >  
> > -        dprintf("update epoch, %d, %d\n", epoch, sys->nr_nodes);
> > -        ret = epoch_log_write(epoch, (char *)sys->nodes,
> > -                              sys->nr_nodes * sizeof(struct sheepdog_node_list_entry));
> > -        if (ret < 0)
> > -                eprintf("can't write epoch %u\n", epoch);
> > +	dprintf("update epoch, %d, %d\n", epoch, sys->nr_nodes);
> >  
> > -        return ret;
> > -}
> > +	snprintf(path, sizeof(path), "%s%08u", epoch_path, epoch);
> > +	fd = open(path, O_RDWR | O_CREAT | O_SYNC, def_fmode);
> > +	if (fd < 0) {
> > +		ret = fd;
> > +		goto err_open;
> > +	}
> >  
> > +	len = sys->nr_nodes * sizeof(struct sheepdog_node_list_entry);
> > +	ret = write(fd, (char *)sys->nodes, len);
> > +	if (ret != len)
> > +		goto err;
> > +
> > +	time(&t);
> 
> This forces us to synchronize clocks on all of the physical nodes.  It
> is not a good constraint.
> 
> I think the cluster driver is in charge of time synchronization.  How
> about passing time as arguments of cdrv_handlers (join/leave/notify)?

It seems to need many changes to pass the same timestamp in
leave_handler().

Let's leave it as a future work.


Thanks,

Kazutaka



More information about the sheepdog mailing list