[Sheepdog] [PATCH v2 2/3] sheep: timestamp the epoch log
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Wed Oct 26 07:36:16 CEST 2011
At Wed, 26 Oct 2011 11:35:30 +0800,
Liu Yuan wrote:
>
> From: Liu Yuan <tailai.ly at taobao.com>
>
> Timestamped epoch is supposed to help users get more information about cluster.
>
> Currently, sheepdog just write epoch log without timestamp. This patch stores
> timestamp at the end of the epoch log file. These timestamps will be seen by
> collie cluster info command as follows:
>
> Cluster status: running
>
> Cluster created at Wed Oct 26 10:32:44 2011
>
> Epoch Time Version
> 2011-10-26 11:09:38 5 [192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7004]
> 2011-10-26 11:09:32 4 [192.168.0.1:7001, 192.168.0.1:7002]
> 2011-10-26 10:38:14 3 [192.168.0.1:7002]
> 2011-10-26 10:38:13 2 [192.168.0.1:7001, 192.168.0.1:7002]
> 2011-10-26 10:32:44 1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002]
>
> Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
> ---
> collie/cluster.c | 11 ++++++---
> include/sheep.h | 1 +
> sheep/ops.c | 1 +
> sheep/sheep_priv.h | 1 -
> sheep/store.c | 58 ++++++++++++++++++++++++++-------------------------
> 5 files changed, 39 insertions(+), 33 deletions(-)
>
> diff --git a/collie/cluster.c b/collie/cluster.c
> index 2f0e66f..162eb8a 100644
> --- a/collie/cluster.c
> +++ b/collie/cluster.c
> @@ -77,7 +77,7 @@ static int cluster_info(int argc, char **argv)
> unsigned rlen, wlen;
> struct epoch_log logs[8];
> int nr_logs;
> - time_t ti;
> + time_t ti, ct;
> struct tm tm;
> char time_str[128];
>
> @@ -106,8 +106,11 @@ static int cluster_info(int argc, char **argv)
> else
> printf("%s\n", sd_strerror(rsp->result));
>
> - if (!raw_output)
> - printf("\nCreation time Epoch Nodes\n");
> + if (!raw_output) {
> + ct = logs[0].ctime >> 32;
> + printf("\nCluster created at %s\n", ctime(&ct));
> + printf("Epoch Time Version\n");
> + }
>
> nr_logs = rsp->data_length / sizeof(struct epoch_log);
> for (i = 0; i < nr_logs; i++) {
> @@ -115,7 +118,7 @@ static int cluster_info(int argc, char **argv)
> char name[128];
> struct sheepdog_node_list_entry *entry;
>
> - ti = logs[i].ctime >> 32;
> + ti = logs[i].time;
> if (raw_output) {
> snprintf(time_str, sizeof(time_str), "%" PRIu64, (uint64_t) ti);
> } else {
> diff --git a/include/sheep.h b/include/sheep.h
> index 072ea7a..033bef5 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -147,6 +147,7 @@ struct sheepdog_vnode_list_entry {
>
> struct epoch_log {
> uint64_t ctime;
> + time_t time;
> uint32_t epoch;
> uint32_t nr_nodes;
> struct sheepdog_node_list_entry nodes[SD_MAX_NODES];
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 743eb0f..e2d7fb1 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -279,6 +279,7 @@ static int local_stat_cluster(const struct sd_req *req, struct sd_rsp *rsp,
>
> rsp->data_length += sizeof(*log);
> log->nr_nodes /= sizeof(log->nodes[0]);
> + log->time = *(time_t *)(&log->nodes[log->nr_nodes]);
> epoch--;
> }
>
> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> index 418b69f..16f5672 100644
> --- a/sheep/sheep_priv.h
> +++ b/sheep/sheep_priv.h
> @@ -209,7 +209,6 @@ int get_cluster_flags(uint16_t *flags);
> #define NR_GW_WORKER_THREAD 4
> #define NR_IO_WORKER_THREAD 4
>
> -int epoch_log_write(uint32_t epoch, char *buf, int len);
> int epoch_log_read(uint32_t epoch, char *buf, int len);
> int epoch_log_read_nr(uint32_t epoch, char *buf, int len);
> int epoch_log_read_remote(uint32_t epoch, char *buf, int len);
> diff --git a/sheep/store.c b/sheep/store.c
> index 4e66920..ebf514c 100644
> --- a/sheep/store.c
> +++ b/sheep/store.c
> @@ -21,6 +21,7 @@
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <fcntl.h>
> +#include <time.h>
>
> #include "sheep_priv.h"
>
> @@ -474,17 +475,38 @@ int update_epoch_store(uint32_t epoch)
>
> int update_epoch_log(int epoch)
> {
> - int ret;
> + int fd, ret, len;
> + time_t t;
> + char path[PATH_MAX];
>
> - dprintf("update epoch, %d, %d\n", epoch, sys->nr_nodes);
> - ret = epoch_log_write(epoch, (char *)sys->nodes,
> - sys->nr_nodes * sizeof(struct sheepdog_node_list_entry));
> - if (ret < 0)
> - eprintf("can't write epoch %u\n", epoch);
> + dprintf("update epoch, %d, %d\n", epoch, sys->nr_nodes);
>
> - return ret;
> -}
> + snprintf(path, sizeof(path), "%s%08u", epoch_path, epoch);
> + fd = open(path, O_RDWR | O_CREAT | O_SYNC, def_fmode);
> + if (fd < 0) {
> + ret = fd;
> + goto err_open;
> + }
>
> + len = sys->nr_nodes * sizeof(struct sheepdog_node_list_entry);
> + ret = write(fd, (char *)sys->nodes, len);
> + if (ret != len)
> + goto err;
> +
> + time(&t);
This forces us to synchronize clocks on all of the physical nodes. It
is not a good constraint.
I think the cluster driver is in charge of time synchronization. How
about passing time as arguments of cdrv_handlers (join/leave/notify)?
Thanks,
Kazutaka
> + len = sizeof(t);
> + ret = write(fd, (char *)&t, len);
> + if (ret != len)
> + goto err;
> +
> + close(fd);
> + return 0;
> +err:
> + close(fd);
> +err_open:
> + dprintf("%s\n", strerror(errno));
> + return -1;
> +}
>
> int write_object_local(uint64_t oid, char *data, unsigned int datalen,
> uint64_t offset, uint16_t flags, int copies,
> @@ -827,26 +849,6 @@ out:
> rsp->result = ret;
> }
>
> -int epoch_log_write(uint32_t epoch, char *buf, int len)
> -{
> - int fd, ret;
> - char path[PATH_MAX];
> -
> - snprintf(path, sizeof(path), "%s%08u", epoch_path, epoch);
> - fd = open(path, O_RDWR | O_CREAT |O_SYNC, def_fmode);
> - if (fd < 0)
> - return -1;
> -
> - ret = write(fd, buf, len);
> -
> - close(fd);
> -
> - if (ret != len)
> - return -1;
> -
> - return 0;
> -}
> -
> int epoch_log_read_remote(uint32_t epoch, char *buf, int len)
> {
> struct sd_obj_req hdr;
> --
> 1.7.6.1
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog
More information about the sheepdog
mailing list