[Sheepdog] [PATCH v4 11/12] sheep: add cluster snapshot/restore support

Fri Dec 30 07:50:15 CET 2011

> -----Original Message-----
> From: sheepdog-bounces at lists.wpkg.org [mailto:sheepdog-bounces at lists.wpkg.org] On Behalf Of Liu Yuan
> Sent: Sunday, December 25, 2011 11:43 PM
> To: morita.kazutaka at lab.ntt.co.jp
> Cc: sheepdog at lists.wpkg.org
> Subject: [Sheepdog] [PATCH v4 11/12] sheep: add cluster snapshot/restore support
> 
> From: Liu Yuan <tailai.ly at taobao.com>
> 
> This kind of snapshot is supposed be triggered by user, _not_ by recovery
> code. I don't think we need to restore to the state at the beginning of
> the recovery. So this work only permits us to restore cluster to the
> snapshot initiated by end users, thought it is quite easy to implement
> to restore to the snapshots forcibly taken by recovery path.
> 
> TODO:
> - check the nodes state consistency besides object recovery.
> 
> Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
> ---
>  include/sheep.h    |    5 ++-
>  sheep/farm/farm.c  |  134 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  sheep/ops.c        |   58 ++++++++++++++++++++++
>  sheep/sheep_priv.h |    4 ++
>  4 files changed, 200 insertions(+), 1 deletions(-)
> 
> diff --git a/include/sheep.h b/include/sheep.h
> index 906c1f5..f90b968 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -37,7 +37,10 @@
>  #define SD_OP_STAT_CLUSTER   0x87
>  #define SD_OP_KILL_NODE      0x88
>  #define SD_OP_GET_VDI_ATTR   0x89
> -#define SD_OP_RECOVER	     0x8A
> +#define SD_OP_RECOVER        0x8a
> +#define SD_OP_SNAPSHOT       0x90
> +#define SD_OP_RESTORE        0x91
> +#define SD_OP_SNAP_FILE      0x92
> 
>  #define SD_FLAG_CMD_IO_LOCAL   0x0010
>  #define SD_FLAG_CMD_RECOVERY 0x0020
> diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
> index 813e389..1c26d76 100644
> --- a/sheep/farm/farm.c
> +++ b/sheep/farm/farm.c
> @@ -410,6 +410,137 @@ out:
>  	return ret;
>  }
> 
> +static int farm_snapshot(struct siocb *iocb)
> +{
> +	unsigned char snap_sha1[SHA1_LEN];
> +	void *buffer;
> +	int log_nr, ret = SD_RES_EIO, epoch;
> +
> +	buffer = snap_log_read(&log_nr, 1);
> +	if (!buffer)
> +		goto out;
> +
> +	epoch = log_nr + 1;
> +	dprintf("user epoch %d\n", epoch);
Will there be concurrent snapshot creators?

> +	if (snap_file_write(epoch, snap_sha1, 1) < 0)
> +		goto out;
> +
> +	if (snap_log_write(epoch, snap_sha1, 1) < 0)
> +		goto out;
> +
> +	ret = SD_RES_SUCCESS;
> +out:
> +	free(buffer);
> +	return ret;
> +}
> +
> +static int cleanup_working_dir(void)
> +{
> +	DIR *dir;
> +	struct dirent *d;
> +
> +	dprintf("try clean up working dir\n");
> +	dir = opendir(obj_path);
> +	if (!dir)
> +		return -1;
> +
> +	while ((d = readdir(dir))) {
> +		char p[PATH_MAX];
> +		if (!strncmp(d->d_name, ".", 1))
> +			continue;
> +		snprintf(p, sizeof(p), "%s%s", obj_path, d->d_name);
> +		if (unlink(p) < 0) {
> +			eprintf("%s:%m\n", p);
> +			continue;
> +		}
> +		dprintf("remove file %s\n", d->d_name);
> +	}
> +	closedir(dir);
> +	return 0;
> +}
> +
> +static int restore_objects_from_snap(int epoch)
> +{
> +	struct sha1_file_hdr hdr;
> +	struct trunk_entry *trunk_buf, *trunk_free = NULL;
> +	unsigned char trunk_sha1[SHA1_LEN];
> +	uint64_t nr_trunks, i;
> +	int ret = SD_RES_EIO;
> +
> +	if (get_trunk_sha1(epoch, trunk_sha1, 1) < 0)
> +		goto out;
> +
> +	trunk_free = trunk_buf = trunk_file_read(trunk_sha1, &hdr);
> +	if (!trunk_buf)
> +		goto out;
> +
> +	nr_trunks = hdr.priv;
> +	for (i = 0; i < nr_trunks; i++, trunk_buf++) {
> +		struct sha1_file_hdr h;
> +		struct siocb io = { 0 };
> +		uint64_t oid;
> +		void *buffer = NULL;
> +
> +		oid = trunk_buf->oid;
> +		buffer = sha1_file_read(trunk_buf->sha1, &h);
> +		if (!buffer) {
> +			eprintf("oid %"PRIx64" not restored\n", oid);
> +			goto out;
> +		}
> +		io.length = h.size;
> +		io.buf = buffer;
> +		ret = farm_atomic_put(oid, &io);
> +		if (ret != SD_RES_SUCCESS) {
> +			eprintf("oid %"PRIx64" not restored\n", oid);
> +			goto out;
> +		} else
> +			dprintf("oid %"PRIx64" restored\n", oid);
nit... if one section of "if...else" has brackets, better add them to the other section as well :)

> +
> +		free(buffer);
> +	}
> +out:
> +	free(trunk_free);
> +	return ret;
> +}
> +
> +static int farm_restore(struct siocb *iocb)
> +{
> +	int ret = SD_RES_EIO, epoch = iocb->epoch;
> +
> +	dprintf("try recover user epoch %d\n", epoch);
> +
> +	if (cleanup_working_dir() < 0) {
> +		eprintf("failed to clean up the working dir %m\n");
> +		goto out;
> +	}
> +
> +	ret = restore_objects_from_snap(epoch);
> +	if (ret != SD_RES_SUCCESS)
> +		goto out;
> +out:
> +	return ret;
> +}
> +
> +static int farm_get_snap_file(struct siocb *iocb)
> +{
> +	int ret = SD_RES_EIO;
> +	void *buffer = NULL;
> +	size_t size;
> +	int nr;
> +
> +	dprintf("try get snap file\n");
> +	buffer = snap_log_read(&nr, 1);
> +	if (!buffer)
> +		goto out;
> +	size = nr * sizeof(struct snap_log);
> +	memcpy(iocb->buf, buffer, size);
> +	iocb->length = size;
> +	ret = SD_RES_SUCCESS;
> +out:
> +	free(buffer);
> +	return ret;
> +}
> +
>  struct store_driver farm = {
>  	.driver_name = "farm",
>  	.init = farm_init,
> @@ -422,4 +553,7 @@ struct store_driver farm = {
>  	.atomic_put = farm_atomic_put,
>  	.begin_recover = farm_begin_recover,
>  	.end_recover = farm_end_recover,
> +	.snapshot = farm_snapshot,
> +	.restore = farm_restore,
> +	.get_snap_file = farm_get_snap_file,
>  };
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 13ecdf2..5300039 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -13,6 +13,8 @@
> 
>  #include "sheep_priv.h"
> 
> +extern struct store_driver store;
> +
>  enum sd_op_type {
>  	SD_OP_TYPE_CLUSTER = 1, /* cluster operations */
>  	SD_OP_TYPE_LOCAL,       /* local operations */
> @@ -383,6 +385,45 @@ out:
>  	return ret;
>  }
> 
> +static int cluster_snapshot(const struct sd_req *req, struct sd_rsp *rsp,
> +			    void *data)
> +{
> +	int ret = SD_RES_SUCCESS;
Better return some error instead of success if snapshot is not supported.

> +	struct siocb iocb = { 0 };
> +
> +	if (store.snapshot)
> +		ret = store.snapshot(&iocb);
How does the above become a cluster wide snapshot? From what I read, farm.snapshot only ensures single node's dirty objects to be snapshotted. Or am I missing something?

> +
> +	return ret;
> +}
> +
> +static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
> +			   void *data)
> +{
> +	const struct sd_vdi_req *hdr = (const struct sd_vdi_req *)req;
> +	int ret = SD_RES_SUCCESS;
ditto...

> +	struct siocb iocb = { .epoch = hdr->epoch };
> +
> +	if (store.restore)
> +		ret = store.restore(&iocb);
same here. farm.restore looks to be single node only...

Cheers,
Tao