[Sheepdog] [PATCH v2 2/3] sheep: timestamp the epoch log
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Wed Oct 26 09:12:18 CEST 2011
At Wed, 26 Oct 2011 14:36:16 +0900,
MORITA Kazutaka wrote:
>
> At Wed, 26 Oct 2011 11:35:30 +0800,
> Liu Yuan wrote:
> >
> > From: Liu Yuan <tailai.ly at taobao.com>
> >
> > Timestamped epoch is supposed to help users get more information about cluster.
> >
> > Currently, sheepdog just write epoch log without timestamp. This patch stores
> > timestamp at the end of the epoch log file. These timestamps will be seen by
> > collie cluster info command as follows:
> >
> > Cluster status: running
> >
> > Cluster created at Wed Oct 26 10:32:44 2011
> >
> > Epoch Time Version
> > 2011-10-26 11:09:38 5 [192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7004]
> > 2011-10-26 11:09:32 4 [192.168.0.1:7001, 192.168.0.1:7002]
> > 2011-10-26 10:38:14 3 [192.168.0.1:7002]
> > 2011-10-26 10:38:13 2 [192.168.0.1:7001, 192.168.0.1:7002]
> > 2011-10-26 10:32:44 1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002]
> >
> > Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
> > ---
> > collie/cluster.c | 11 ++++++---
> > include/sheep.h | 1 +
> > sheep/ops.c | 1 +
> > sheep/sheep_priv.h | 1 -
> > sheep/store.c | 58 ++++++++++++++++++++++++++-------------------------
> > 5 files changed, 39 insertions(+), 33 deletions(-)
> >
> > diff --git a/collie/cluster.c b/collie/cluster.c
> > index 2f0e66f..162eb8a 100644
> > --- a/collie/cluster.c
> > +++ b/collie/cluster.c
> > @@ -77,7 +77,7 @@ static int cluster_info(int argc, char **argv)
> > unsigned rlen, wlen;
> > struct epoch_log logs[8];
> > int nr_logs;
> > - time_t ti;
> > + time_t ti, ct;
> > struct tm tm;
> > char time_str[128];
> >
> > @@ -106,8 +106,11 @@ static int cluster_info(int argc, char **argv)
> > else
> > printf("%s\n", sd_strerror(rsp->result));
> >
> > - if (!raw_output)
> > - printf("\nCreation time Epoch Nodes\n");
> > + if (!raw_output) {
> > + ct = logs[0].ctime >> 32;
> > + printf("\nCluster created at %s\n", ctime(&ct));
> > + printf("Epoch Time Version\n");
> > + }
> >
> > nr_logs = rsp->data_length / sizeof(struct epoch_log);
> > for (i = 0; i < nr_logs; i++) {
> > @@ -115,7 +118,7 @@ static int cluster_info(int argc, char **argv)
> > char name[128];
> > struct sheepdog_node_list_entry *entry;
> >
> > - ti = logs[i].ctime >> 32;
> > + ti = logs[i].time;
> > if (raw_output) {
> > snprintf(time_str, sizeof(time_str), "%" PRIu64, (uint64_t) ti);
> > } else {
> > diff --git a/include/sheep.h b/include/sheep.h
> > index 072ea7a..033bef5 100644
> > --- a/include/sheep.h
> > +++ b/include/sheep.h
> > @@ -147,6 +147,7 @@ struct sheepdog_vnode_list_entry {
> >
> > struct epoch_log {
> > uint64_t ctime;
> > + time_t time;
> > uint32_t epoch;
> > uint32_t nr_nodes;
> > struct sheepdog_node_list_entry nodes[SD_MAX_NODES];
This is not 64 bit aligned.
> > diff --git a/sheep/ops.c b/sheep/ops.c
> > index 743eb0f..e2d7fb1 100644
> > --- a/sheep/ops.c
> > +++ b/sheep/ops.c
> > @@ -279,6 +279,7 @@ static int local_stat_cluster(const struct sd_req *req, struct sd_rsp *rsp,
> >
> > rsp->data_length += sizeof(*log);
> > log->nr_nodes /= sizeof(log->nodes[0]);
> > + log->time = *(time_t *)(&log->nodes[log->nr_nodes]);
Looks a bit hacky to me. In addition, this doesn't work if
sizeof(log->nodes[0]) < sizeof(time_t).
> > epoch--;
> > }
> >
> > diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> > index 418b69f..16f5672 100644
> > --- a/sheep/sheep_priv.h
> > +++ b/sheep/sheep_priv.h
> > @@ -209,7 +209,6 @@ int get_cluster_flags(uint16_t *flags);
> > #define NR_GW_WORKER_THREAD 4
> > #define NR_IO_WORKER_THREAD 4
> >
> > -int epoch_log_write(uint32_t epoch, char *buf, int len);
> > int epoch_log_read(uint32_t epoch, char *buf, int len);
> > int epoch_log_read_nr(uint32_t epoch, char *buf, int len);
> > int epoch_log_read_remote(uint32_t epoch, char *buf, int len);
> > diff --git a/sheep/store.c b/sheep/store.c
> > index 4e66920..ebf514c 100644
> > --- a/sheep/store.c
> > +++ b/sheep/store.c
> > @@ -21,6 +21,7 @@
> > #include <sys/types.h>
> > #include <sys/stat.h>
> > #include <fcntl.h>
> > +#include <time.h>
> >
> > #include "sheep_priv.h"
> >
> > @@ -474,17 +475,38 @@ int update_epoch_store(uint32_t epoch)
> >
> > int update_epoch_log(int epoch)
> > {
> > - int ret;
> > + int fd, ret, len;
> > + time_t t;
> > + char path[PATH_MAX];
> >
> > - dprintf("update epoch, %d, %d\n", epoch, sys->nr_nodes);
> > - ret = epoch_log_write(epoch, (char *)sys->nodes,
> > - sys->nr_nodes * sizeof(struct sheepdog_node_list_entry));
> > - if (ret < 0)
> > - eprintf("can't write epoch %u\n", epoch);
> > + dprintf("update epoch, %d, %d\n", epoch, sys->nr_nodes);
> >
> > - return ret;
> > -}
> > + snprintf(path, sizeof(path), "%s%08u", epoch_path, epoch);
> > + fd = open(path, O_RDWR | O_CREAT | O_SYNC, def_fmode);
> > + if (fd < 0) {
> > + ret = fd;
> > + goto err_open;
> > + }
> >
> > + len = sys->nr_nodes * sizeof(struct sheepdog_node_list_entry);
> > + ret = write(fd, (char *)sys->nodes, len);
> > + if (ret != len)
> > + goto err;
> > +
> > + time(&t);
>
> This forces us to synchronize clocks on all of the physical nodes. It
> is not a good constraint.
>
> I think the cluster driver is in charge of time synchronization. How
> about passing time as arguments of cdrv_handlers (join/leave/notify)?
It seems to need many changes to pass the same timestamp in
leave_handler().
Let's leave it as a future work.
Thanks,
Kazutaka
More information about the sheepdog
mailing list