[Sheepdog] [PATCH v4 11/12] sheep: add cluster snapshot/restore support
tao.peng at emc.com
tao.peng at emc.com
Fri Dec 30 07:50:15 CET 2011
> -----Original Message-----
> From: sheepdog-bounces at lists.wpkg.org [mailto:sheepdog-bounces at lists.wpkg.org] On Behalf Of Liu Yuan
> Sent: Sunday, December 25, 2011 11:43 PM
> To: morita.kazutaka at lab.ntt.co.jp
> Cc: sheepdog at lists.wpkg.org
> Subject: [Sheepdog] [PATCH v4 11/12] sheep: add cluster snapshot/restore support
>
> From: Liu Yuan <tailai.ly at taobao.com>
>
> This kind of snapshot is supposed be triggered by user, _not_ by recovery
> code. I don't think we need to restore to the state at the beginning of
> the recovery. So this work only permits us to restore cluster to the
> snapshot initiated by end users, thought it is quite easy to implement
> to restore to the snapshots forcibly taken by recovery path.
>
> TODO:
> - check the nodes state consistency besides object recovery.
>
> Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
> ---
> include/sheep.h | 5 ++-
> sheep/farm/farm.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> sheep/ops.c | 58 ++++++++++++++++++++++
> sheep/sheep_priv.h | 4 ++
> 4 files changed, 200 insertions(+), 1 deletions(-)
>
> diff --git a/include/sheep.h b/include/sheep.h
> index 906c1f5..f90b968 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -37,7 +37,10 @@
> #define SD_OP_STAT_CLUSTER 0x87
> #define SD_OP_KILL_NODE 0x88
> #define SD_OP_GET_VDI_ATTR 0x89
> -#define SD_OP_RECOVER 0x8A
> +#define SD_OP_RECOVER 0x8a
> +#define SD_OP_SNAPSHOT 0x90
> +#define SD_OP_RESTORE 0x91
> +#define SD_OP_SNAP_FILE 0x92
>
> #define SD_FLAG_CMD_IO_LOCAL 0x0010
> #define SD_FLAG_CMD_RECOVERY 0x0020
> diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
> index 813e389..1c26d76 100644
> --- a/sheep/farm/farm.c
> +++ b/sheep/farm/farm.c
> @@ -410,6 +410,137 @@ out:
> return ret;
> }
>
> +static int farm_snapshot(struct siocb *iocb)
> +{
> + unsigned char snap_sha1[SHA1_LEN];
> + void *buffer;
> + int log_nr, ret = SD_RES_EIO, epoch;
> +
> + buffer = snap_log_read(&log_nr, 1);
> + if (!buffer)
> + goto out;
> +
> + epoch = log_nr + 1;
> + dprintf("user epoch %d\n", epoch);
Will there be concurrent snapshot creators?
> + if (snap_file_write(epoch, snap_sha1, 1) < 0)
> + goto out;
> +
> + if (snap_log_write(epoch, snap_sha1, 1) < 0)
> + goto out;
> +
> + ret = SD_RES_SUCCESS;
> +out:
> + free(buffer);
> + return ret;
> +}
> +
> +static int cleanup_working_dir(void)
> +{
> + DIR *dir;
> + struct dirent *d;
> +
> + dprintf("try clean up working dir\n");
> + dir = opendir(obj_path);
> + if (!dir)
> + return -1;
> +
> + while ((d = readdir(dir))) {
> + char p[PATH_MAX];
> + if (!strncmp(d->d_name, ".", 1))
> + continue;
> + snprintf(p, sizeof(p), "%s%s", obj_path, d->d_name);
> + if (unlink(p) < 0) {
> + eprintf("%s:%m\n", p);
> + continue;
> + }
> + dprintf("remove file %s\n", d->d_name);
> + }
> + closedir(dir);
> + return 0;
> +}
> +
> +static int restore_objects_from_snap(int epoch)
> +{
> + struct sha1_file_hdr hdr;
> + struct trunk_entry *trunk_buf, *trunk_free = NULL;
> + unsigned char trunk_sha1[SHA1_LEN];
> + uint64_t nr_trunks, i;
> + int ret = SD_RES_EIO;
> +
> + if (get_trunk_sha1(epoch, trunk_sha1, 1) < 0)
> + goto out;
> +
> + trunk_free = trunk_buf = trunk_file_read(trunk_sha1, &hdr);
> + if (!trunk_buf)
> + goto out;
> +
> + nr_trunks = hdr.priv;
> + for (i = 0; i < nr_trunks; i++, trunk_buf++) {
> + struct sha1_file_hdr h;
> + struct siocb io = { 0 };
> + uint64_t oid;
> + void *buffer = NULL;
> +
> + oid = trunk_buf->oid;
> + buffer = sha1_file_read(trunk_buf->sha1, &h);
> + if (!buffer) {
> + eprintf("oid %"PRIx64" not restored\n", oid);
> + goto out;
> + }
> + io.length = h.size;
> + io.buf = buffer;
> + ret = farm_atomic_put(oid, &io);
> + if (ret != SD_RES_SUCCESS) {
> + eprintf("oid %"PRIx64" not restored\n", oid);
> + goto out;
> + } else
> + dprintf("oid %"PRIx64" restored\n", oid);
nit... if one section of "if...else" has brackets, better add them to the other section as well :)
> +
> + free(buffer);
> + }
> +out:
> + free(trunk_free);
> + return ret;
> +}
> +
> +static int farm_restore(struct siocb *iocb)
> +{
> + int ret = SD_RES_EIO, epoch = iocb->epoch;
> +
> + dprintf("try recover user epoch %d\n", epoch);
> +
> + if (cleanup_working_dir() < 0) {
> + eprintf("failed to clean up the working dir %m\n");
> + goto out;
> + }
> +
> + ret = restore_objects_from_snap(epoch);
> + if (ret != SD_RES_SUCCESS)
> + goto out;
> +out:
> + return ret;
> +}
> +
> +static int farm_get_snap_file(struct siocb *iocb)
> +{
> + int ret = SD_RES_EIO;
> + void *buffer = NULL;
> + size_t size;
> + int nr;
> +
> + dprintf("try get snap file\n");
> + buffer = snap_log_read(&nr, 1);
> + if (!buffer)
> + goto out;
> + size = nr * sizeof(struct snap_log);
> + memcpy(iocb->buf, buffer, size);
> + iocb->length = size;
> + ret = SD_RES_SUCCESS;
> +out:
> + free(buffer);
> + return ret;
> +}
> +
> struct store_driver farm = {
> .driver_name = "farm",
> .init = farm_init,
> @@ -422,4 +553,7 @@ struct store_driver farm = {
> .atomic_put = farm_atomic_put,
> .begin_recover = farm_begin_recover,
> .end_recover = farm_end_recover,
> + .snapshot = farm_snapshot,
> + .restore = farm_restore,
> + .get_snap_file = farm_get_snap_file,
> };
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 13ecdf2..5300039 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -13,6 +13,8 @@
>
> #include "sheep_priv.h"
>
> +extern struct store_driver store;
> +
> enum sd_op_type {
> SD_OP_TYPE_CLUSTER = 1, /* cluster operations */
> SD_OP_TYPE_LOCAL, /* local operations */
> @@ -383,6 +385,45 @@ out:
> return ret;
> }
>
> +static int cluster_snapshot(const struct sd_req *req, struct sd_rsp *rsp,
> + void *data)
> +{
> + int ret = SD_RES_SUCCESS;
Better return some error instead of success if snapshot is not supported.
> + struct siocb iocb = { 0 };
> +
> + if (store.snapshot)
> + ret = store.snapshot(&iocb);
How does the above become a cluster wide snapshot? From what I read, farm.snapshot only ensures single node's dirty objects to be snapshotted. Or am I missing something?
> +
> + return ret;
> +}
> +
> +static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
> + void *data)
> +{
> + const struct sd_vdi_req *hdr = (const struct sd_vdi_req *)req;
> + int ret = SD_RES_SUCCESS;
ditto...
> + struct siocb iocb = { .epoch = hdr->epoch };
> +
> + if (store.restore)
> + ret = store.restore(&iocb);
same here. farm.restore looks to be single node only...
Cheers,
Tao
More information about the sheepdog
mailing list