> -----Original Message----- > From: sheepdog-bounces at lists.wpkg.org [mailto:sheepdog-bounces at lists.wpkg.org] On Behalf Of Liu Yuan > Sent: Sunday, December 25, 2011 11:43 PM > To: morita.kazutaka at lab.ntt.co.jp > Cc: sheepdog at lists.wpkg.org > Subject: [Sheepdog] [PATCH v4 11/12] sheep: add cluster snapshot/restore support > > From: Liu Yuan <tailai.ly at taobao.com> > > This kind of snapshot is supposed be triggered by user, _not_ by recovery > code. I don't think we need to restore to the state at the beginning of > the recovery. So this work only permits us to restore cluster to the > snapshot initiated by end users, thought it is quite easy to implement > to restore to the snapshots forcibly taken by recovery path. > > TODO: > - check the nodes state consistency besides object recovery. > > Signed-off-by: Liu Yuan <tailai.ly at taobao.com> > --- > include/sheep.h | 5 ++- > sheep/farm/farm.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > sheep/ops.c | 58 ++++++++++++++++++++++ > sheep/sheep_priv.h | 4 ++ > 4 files changed, 200 insertions(+), 1 deletions(-) > > diff --git a/include/sheep.h b/include/sheep.h > index 906c1f5..f90b968 100644 > --- a/include/sheep.h > +++ b/include/sheep.h > @@ -37,7 +37,10 @@ > #define SD_OP_STAT_CLUSTER 0x87 > #define SD_OP_KILL_NODE 0x88 > #define SD_OP_GET_VDI_ATTR 0x89 > -#define SD_OP_RECOVER 0x8A > +#define SD_OP_RECOVER 0x8a > +#define SD_OP_SNAPSHOT 0x90 > +#define SD_OP_RESTORE 0x91 > +#define SD_OP_SNAP_FILE 0x92 > > #define SD_FLAG_CMD_IO_LOCAL 0x0010 > #define SD_FLAG_CMD_RECOVERY 0x0020 > diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c > index 813e389..1c26d76 100644 > --- a/sheep/farm/farm.c > +++ b/sheep/farm/farm.c > @@ -410,6 +410,137 @@ out: > return ret; > } > > +static int farm_snapshot(struct siocb *iocb) > +{ > + unsigned char snap_sha1[SHA1_LEN]; > + void *buffer; > + int log_nr, ret = SD_RES_EIO, epoch; > + > + buffer = snap_log_read(&log_nr, 1); > + if (!buffer) > + goto out; > + > + epoch = log_nr + 1; > + dprintf("user epoch %d\n", epoch); Will there be concurrent snapshot creators? > + if (snap_file_write(epoch, snap_sha1, 1) < 0) > + goto out; > + > + if (snap_log_write(epoch, snap_sha1, 1) < 0) > + goto out; > + > + ret = SD_RES_SUCCESS; > +out: > + free(buffer); > + return ret; > +} > + > +static int cleanup_working_dir(void) > +{ > + DIR *dir; > + struct dirent *d; > + > + dprintf("try clean up working dir\n"); > + dir = opendir(obj_path); > + if (!dir) > + return -1; > + > + while ((d = readdir(dir))) { > + char p[PATH_MAX]; > + if (!strncmp(d->d_name, ".", 1)) > + continue; > + snprintf(p, sizeof(p), "%s%s", obj_path, d->d_name); > + if (unlink(p) < 0) { > + eprintf("%s:%m\n", p); > + continue; > + } > + dprintf("remove file %s\n", d->d_name); > + } > + closedir(dir); > + return 0; > +} > + > +static int restore_objects_from_snap(int epoch) > +{ > + struct sha1_file_hdr hdr; > + struct trunk_entry *trunk_buf, *trunk_free = NULL; > + unsigned char trunk_sha1[SHA1_LEN]; > + uint64_t nr_trunks, i; > + int ret = SD_RES_EIO; > + > + if (get_trunk_sha1(epoch, trunk_sha1, 1) < 0) > + goto out; > + > + trunk_free = trunk_buf = trunk_file_read(trunk_sha1, &hdr); > + if (!trunk_buf) > + goto out; > + > + nr_trunks = hdr.priv; > + for (i = 0; i < nr_trunks; i++, trunk_buf++) { > + struct sha1_file_hdr h; > + struct siocb io = { 0 }; > + uint64_t oid; > + void *buffer = NULL; > + > + oid = trunk_buf->oid; > + buffer = sha1_file_read(trunk_buf->sha1, &h); > + if (!buffer) { > + eprintf("oid %"PRIx64" not restored\n", oid); > + goto out; > + } > + io.length = h.size; > + io.buf = buffer; > + ret = farm_atomic_put(oid, &io); > + if (ret != SD_RES_SUCCESS) { > + eprintf("oid %"PRIx64" not restored\n", oid); > + goto out; > + } else > + dprintf("oid %"PRIx64" restored\n", oid); nit... if one section of "if...else" has brackets, better add them to the other section as well :) > + > + free(buffer); > + } > +out: > + free(trunk_free); > + return ret; > +} > + > +static int farm_restore(struct siocb *iocb) > +{ > + int ret = SD_RES_EIO, epoch = iocb->epoch; > + > + dprintf("try recover user epoch %d\n", epoch); > + > + if (cleanup_working_dir() < 0) { > + eprintf("failed to clean up the working dir %m\n"); > + goto out; > + } > + > + ret = restore_objects_from_snap(epoch); > + if (ret != SD_RES_SUCCESS) > + goto out; > +out: > + return ret; > +} > + > +static int farm_get_snap_file(struct siocb *iocb) > +{ > + int ret = SD_RES_EIO; > + void *buffer = NULL; > + size_t size; > + int nr; > + > + dprintf("try get snap file\n"); > + buffer = snap_log_read(&nr, 1); > + if (!buffer) > + goto out; > + size = nr * sizeof(struct snap_log); > + memcpy(iocb->buf, buffer, size); > + iocb->length = size; > + ret = SD_RES_SUCCESS; > +out: > + free(buffer); > + return ret; > +} > + > struct store_driver farm = { > .driver_name = "farm", > .init = farm_init, > @@ -422,4 +553,7 @@ struct store_driver farm = { > .atomic_put = farm_atomic_put, > .begin_recover = farm_begin_recover, > .end_recover = farm_end_recover, > + .snapshot = farm_snapshot, > + .restore = farm_restore, > + .get_snap_file = farm_get_snap_file, > }; > diff --git a/sheep/ops.c b/sheep/ops.c > index 13ecdf2..5300039 100644 > --- a/sheep/ops.c > +++ b/sheep/ops.c > @@ -13,6 +13,8 @@ > > #include "sheep_priv.h" > > +extern struct store_driver store; > + > enum sd_op_type { > SD_OP_TYPE_CLUSTER = 1, /* cluster operations */ > SD_OP_TYPE_LOCAL, /* local operations */ > @@ -383,6 +385,45 @@ out: > return ret; > } > > +static int cluster_snapshot(const struct sd_req *req, struct sd_rsp *rsp, > + void *data) > +{ > + int ret = SD_RES_SUCCESS; Better return some error instead of success if snapshot is not supported. > + struct siocb iocb = { 0 }; > + > + if (store.snapshot) > + ret = store.snapshot(&iocb); How does the above become a cluster wide snapshot? From what I read, farm.snapshot only ensures single node's dirty objects to be snapshotted. Or am I missing something? > + > + return ret; > +} > + > +static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp, > + void *data) > +{ > + const struct sd_vdi_req *hdr = (const struct sd_vdi_req *)req; > + int ret = SD_RES_SUCCESS; ditto... > + struct siocb iocb = { .epoch = hdr->epoch }; > + > + if (store.restore) > + ret = store.restore(&iocb); same here. farm.restore looks to be single node only... Cheers, Tao |