[Sheepdog] [PATCH 2/3] cleanup the data object in system snapshot by user
levin li
levin108 at gmail.com
Thu Apr 5 07:17:04 CEST 2012
On 2012年04月05日 10:46, Li Wenpeng wrote:
> From: levin li<xingke.lwp at taobao.com>
>
> When recovering is over, it's no need storing the system
> snapshot data any more, since it's just for recovering.
> So when recovering is complete, we can cleanup the
> system snapshot to save the disk space, I implement it
> by adding a new subcommand 'cluster cleanup' to collie.
>
> Signed-off-by: levin li<xingke.lwp at taobao.com>
> ---
> collie/cluster.c | 36 +++++++++++++++++++++++++
> include/sheep.h | 2 +
> include/sheepdog_proto.h | 1 +
> sheep/farm/farm.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++
> sheep/farm/farm.h | 1 +
> sheep/farm/snap.c | 22 +++++++++++++++
> sheep/ops.c | 24 +++++++++++++++++
> sheep/sheep_priv.h | 1 +
> 8 files changed, 152 insertions(+), 0 deletions(-)
>
> diff --git a/collie/cluster.c b/collie/cluster.c
> index b846a9a..f28e08f 100644
> --- a/collie/cluster.c
> +++ b/collie/cluster.c
> @@ -367,6 +367,40 @@ static int cluster_snapshot(int argc, char **argv)
> return ret;
> }
>
> +static int cluster_cleanup(int argc, char **argv)
> +{
> + int fd, ret;
> + struct sd_req hdr;
> + struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> + unsigned rlen, wlen;
> +
> + fd = connect_to(sdhost, sdport);
> + if (fd< 0)
> + return EXIT_SYSFAIL;
> +
> + memset(&hdr, 0, sizeof(hdr));
> +
> + hdr.opcode = SD_OP_CLEANUP;
> +
> + rlen = 0;
> + wlen = 0;
> + ret = exec_req(fd,&hdr, NULL,&wlen,&rlen);
> + close(fd);
> +
> + if (ret) {
> + fprintf(stderr, "Failed to connect\n");
> + return EXIT_SYSFAIL;
> + }
> +
> + if (rsp->result != SD_RES_SUCCESS) {
> + fprintf(stderr, "Cleanup failed: %s\n",
> + sd_strerror(rsp->result));
> + return EXIT_FAILURE;
> + }
> +
> + return EXIT_SUCCESS;
> +}
> +
> #define RECOVER_PRINT \
> "Caution! Please try starting all the cluster nodes normally before\n\
> running this command.\n\n\
> @@ -435,6 +469,8 @@ static struct subcommand cluster_cmd[] = {
> 0, cluster_recover},
> {"snapshot", NULL, "aRlph", "snapshot/restore the cluster",
> 0, cluster_snapshot},
> + {"cleanup", NULL, "aph", "cleanup the useless snapshot data",
> + 0, cluster_cleanup},
> {NULL,},
> };
>
> diff --git a/include/sheep.h b/include/sheep.h
> index e435b63..aa9ad8b 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -42,6 +42,7 @@
> #define SD_OP_SNAPSHOT 0x91
> #define SD_OP_RESTORE 0x92
> #define SD_OP_GET_SNAP_FILE 0x93
> +#define SD_OP_CLEANUP 0x94
>
> #define SD_FLAG_CMD_IO_LOCAL 0x0010
> #define SD_FLAG_CMD_RECOVERY 0x0020
> @@ -269,6 +270,7 @@ static inline const char *sd_strerror(int err)
> {SD_RES_MANUAL_RECOVER, "Cluster is running/halted and cannot be manually recovered"},
> {SD_RES_NO_STORE, "Targeted backend store is not found"},
> {SD_RES_NO_SUPPORT, "Operation is not supported"},
> + {SD_RES_CLUSTER_RECOVERING, "Cluster is recovering"},
>
> {SD_RES_OLD_NODE_VER, "Remote node has an old epoch"},
> {SD_RES_NEW_NODE_VER, "Remote node has a new epoch"},
> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
> index 6138108..11c2c7c 100644
> --- a/include/sheepdog_proto.h
> +++ b/include/sheepdog_proto.h
> @@ -64,6 +64,7 @@
> #define SD_RES_MANUAL_RECOVER 0x1A /* Users should not manually recover this cluster */
> #define SD_RES_NO_STORE 0x20 /* No targeted backend store */
> #define SD_RES_NO_SUPPORT 0x21 /* Operation is not supported by backend store */
> +#define SD_RES_CLUSTER_RECOVERING 0x22 /* Cluster is recovering. */
>
> /*
> * Object ID rules
> diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
> index 6a74214..49bb8e1 100644
> --- a/sheep/farm/farm.c
> +++ b/sheep/farm/farm.c
> @@ -491,6 +491,70 @@ out:
> return ret;
> }
>
> +static int trunk_cleanup(int epoch)
> +{
> + struct sha1_file_hdr hdr;
> + struct trunk_entry *trunk_buf, *trunk_free = NULL;
> + unsigned char trunk_sha1[SHA1_LEN];
> + uint64_t nr_trunks, i;
> + int ret = SD_RES_EIO;
> +
> + if (get_trunk_sha1(epoch, trunk_sha1, 0)< 0)
> + goto out;
> +
> + trunk_free = trunk_buf = trunk_file_read(trunk_sha1,&hdr);
> + if (!trunk_buf)
> + goto out;
> +
> + nr_trunks = hdr.priv;
> + for (i = 0; i< nr_trunks; i++, trunk_buf++)
> + sha1_file_try_delete(trunk_buf->sha1);
> +
> + if (sha1_file_try_delete(trunk_sha1)< 0)
> + goto out;
> +
> + ret = SD_RES_SUCCESS;
> +
> +out:
> + return ret;
> +}
> +
> +static int farm_cleanup_sys_obj(struct siocb *iocb)
> +{
> + int i, ret = SD_RES_SUCCESS;
> + int epoch = iocb->epoch;
> + struct snap_log *log_pos, *log_free = NULL;
> + int nr_logs;
> +
> + if (iocb<= 0)
> + return ret;
> +
> + for (i = 1; i<= epoch; i++)
> + trunk_cleanup(i);
> +
> + log_free = log_pos = snap_log_read(&nr_logs, 0);
> + if (snap_log_truncate()< 0) {
> + dprintf("snap reset fail\n");
> + ret = SD_RES_EIO;
> + goto out;
> + }
> +
> + for (i = 0; i< nr_logs; i++, log_pos++) {
> + if (log_pos->epoch> epoch) {
> + if (snap_log_write(log_pos->epoch, log_pos->sha1, 0)< 0) {
> + dprintf("snap write fail %d, %s\n",
> + log_pos->epoch, sha1_to_hex(log_pos->sha1));
> + ret = SD_RES_EIO;
> + goto out;
> + }
> + }
> + }
> +
> +out:
> + free(log_free);
> + return ret;
> +}
> +
> static int cleanup_working_dir(void)
> {
> DIR *dir;
> @@ -637,6 +701,7 @@ struct store_driver farm = {
> .atomic_put = farm_atomic_put,
> .end_recover = farm_end_recover,
> .snapshot = farm_snapshot,
> + .cleanup = farm_cleanup_sys_obj,
> .restore = farm_restore,
> .get_snap_file = farm_get_snap_file,
> .format = farm_format,
> diff --git a/sheep/farm/farm.h b/sheep/farm/farm.h
> index e7978b8..aa73737 100644
> --- a/sheep/farm/farm.h
> +++ b/sheep/farm/farm.h
> @@ -75,6 +75,7 @@ extern int trunk_get_working_objlist(uint64_t *list);
> extern int snap_init(void);
> extern void *snap_file_read(unsigned char *sha1, struct sha1_file_hdr *outhdr);
> extern int snap_file_write(int epoch, unsigned char *trunksha1, unsigned char *outsha1, int user);
> +extern int snap_log_truncate(void);
> extern void *snap_log_read(int *, int user);
> extern int snap_log_write(int epoch, unsigned char *sha1, int user);
>
> diff --git a/sheep/farm/snap.c b/sheep/farm/snap.c
> index 65fcc0c..e86eaaf 100644
> --- a/sheep/farm/snap.c
> +++ b/sheep/farm/snap.c
> @@ -57,6 +57,28 @@ out:
> return ret;
> }
>
> +int snap_log_truncate(void)
> +{
> + int fd, ret = 0;
> + struct strbuf buf = STRBUF_INIT;
> +
> + strbuf_addstr(&buf, farm_dir);
> + strbuf_addf(&buf, "/%s", "sys_snap");
> +
> + fd = open(buf.buf, O_CREAT | O_TRUNC, 0666);
> + if (fd< 0) {
> + if (errno != EEXIST) {
> + ret = -1;
> + goto out;
> + }
> + }
> + close(fd);
> +
> +out:
> + strbuf_release(&buf);
> + return ret;
> +}
> +
> int snap_log_write(int epoch, unsigned char *sha1, int user)
> {
> int fd, ret = -1;
> diff --git a/sheep/ops.c b/sheep/ops.c
> index ec40986..028edc5 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -427,6 +427,24 @@ static int cluster_snapshot(const struct sd_req *req, struct sd_rsp *rsp,
> return ret;
> }
>
> +static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp,
> + void *data)
> +{
> + int ret;
> + struct siocb iocb = { 0 };
> + iocb.epoch = sys->epoch;
> +
> + if (node_in_recovery())
> + return SD_RES_CLUSTER_RECOVERING;
> +
> + if (sd_store->cleanup)
> + ret = sd_store->cleanup(&iocb);
> + else
> + ret = SD_RES_NO_SUPPORT;
> +
> + return ret;
> +}
> +
> static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
> void *data)
> {
> @@ -531,6 +549,12 @@ static struct sd_op_template sd_ops[] = {
> .process_main = cluster_restore,
> },
>
> + [SD_OP_CLEANUP] = {
> + .type = SD_OP_TYPE_CLUSTER,
> + .force = 1,
> + .process_main = cluster_cleanup,
> + },
> +
> /* local operations */
> [SD_OP_GET_STORE_LIST] = {
> .type = SD_OP_TYPE_LOCAL,
> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> index 8046516..c48b147 100644
> --- a/sheep/sheep_priv.h
> +++ b/sheep/sheep_priv.h
> @@ -182,6 +182,7 @@ struct store_driver {
> int (*end_recover)(struct siocb *);
> /* Operations for snapshot */
> int (*snapshot)(struct siocb *);
> + int (*cleanup)(struct siocb *);
> int (*restore)(struct siocb *);
> int (*get_snap_file)(struct siocb *);
> };
Patch update:
In farm_cleanup_sys_obj(), make it write the snapshot log from the end,
just traverse the existing snap log whose epoch is newer than the epoch
to be deleted.
---
collie/cluster.c | 36 +++++++++++++++++++++++++
include/sheep.h | 2 +
include/sheepdog_proto.h | 1 +
sheep/farm/farm.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++
sheep/farm/farm.h | 1 +
sheep/farm/snap.c | 22 +++++++++++++++
sheep/ops.c | 24 +++++++++++++++++
sheep/sheep_priv.h | 1 +
8 files changed, 152 insertions(+), 0 deletions(-)
diff --git a/collie/cluster.c b/collie/cluster.c
index b846a9a..f28e08f 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -367,6 +367,40 @@ static int cluster_snapshot(int argc, char **argv)
return ret;
}
+static int cluster_cleanup(int argc, char **argv)
+{
+ int fd, ret;
+ struct sd_req hdr;
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+ unsigned rlen, wlen;
+
+ fd = connect_to(sdhost, sdport);
+ if (fd < 0)
+ return EXIT_SYSFAIL;
+
+ memset(&hdr, 0, sizeof(hdr));
+
+ hdr.opcode = SD_OP_CLEANUP;
+
+ rlen = 0;
+ wlen = 0;
+ ret = exec_req(fd, &hdr, NULL, &wlen, &rlen);
+ close(fd);
+
+ if (ret) {
+ fprintf(stderr, "Failed to connect\n");
+ return EXIT_SYSFAIL;
+ }
+
+ if (rsp->result != SD_RES_SUCCESS) {
+ fprintf(stderr, "Cleanup failed: %s\n",
+ sd_strerror(rsp->result));
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
#define RECOVER_PRINT \
"Caution! Please try starting all the cluster nodes normally before\n\
running this command.\n\n\
@@ -435,6 +469,8 @@ static struct subcommand cluster_cmd[] = {
0, cluster_recover},
{"snapshot", NULL, "aRlph", "snapshot/restore the cluster",
0, cluster_snapshot},
+ {"cleanup", NULL, "aph", "cleanup the useless snapshot data",
+ 0, cluster_cleanup},
{NULL,},
};
diff --git a/include/sheep.h b/include/sheep.h
index e435b63..aa9ad8b 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -42,6 +42,7 @@
#define SD_OP_SNAPSHOT 0x91
#define SD_OP_RESTORE 0x92
#define SD_OP_GET_SNAP_FILE 0x93
+#define SD_OP_CLEANUP 0x94
#define SD_FLAG_CMD_IO_LOCAL 0x0010
#define SD_FLAG_CMD_RECOVERY 0x0020
@@ -269,6 +270,7 @@ static inline const char *sd_strerror(int err)
{SD_RES_MANUAL_RECOVER, "Cluster is running/halted and cannot be
manually recovered"},
{SD_RES_NO_STORE, "Targeted backend store is not found"},
{SD_RES_NO_SUPPORT, "Operation is not supported"},
+ {SD_RES_CLUSTER_RECOVERING, "Cluster is recovering"},
{SD_RES_OLD_NODE_VER, "Remote node has an old epoch"},
{SD_RES_NEW_NODE_VER, "Remote node has a new epoch"},
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 6138108..11c2c7c 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -64,6 +64,7 @@
#define SD_RES_MANUAL_RECOVER 0x1A /* Users should not manually recover
this cluster */
#define SD_RES_NO_STORE 0x20 /* No targeted backend store */
#define SD_RES_NO_SUPPORT 0x21 /* Operation is not supported by backend
store */
+#define SD_RES_CLUSTER_RECOVERING 0x22 /* Cluster is recovering. */
/*
* Object ID rules
diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
index 6a74214..9500e07 100644
--- a/sheep/farm/farm.c
+++ b/sheep/farm/farm.c
@@ -491,6 +491,70 @@ out:
return ret;
}
+static int trunk_cleanup(int epoch)
+{
+ struct sha1_file_hdr hdr;
+ struct trunk_entry *trunk_buf, *trunk_free = NULL;
+ unsigned char trunk_sha1[SHA1_LEN];
+ uint64_t nr_trunks, i;
+ int ret = SD_RES_EIO;
+
+ if (get_trunk_sha1(epoch, trunk_sha1, 0) < 0)
+ goto out;
+
+ trunk_free = trunk_buf = trunk_file_read(trunk_sha1, &hdr);
+ if (!trunk_buf)
+ goto out;
+
+ nr_trunks = hdr.priv;
+ for (i = 0; i < nr_trunks; i++, trunk_buf++)
+ sha1_file_try_delete(trunk_buf->sha1);
+
+ if (sha1_file_try_delete(trunk_sha1) < 0)
+ goto out;
+
+ ret = SD_RES_SUCCESS;
+
+out:
+ return ret;
+}
+
+static int farm_cleanup_sys_obj(struct siocb *iocb)
+{
+ int i, ret = SD_RES_SUCCESS;
+ int epoch = iocb->epoch;
+ struct snap_log *log_pos, *log_free = NULL;
+ int nr_logs;
+
+ if (iocb <= 0)
+ return ret;
+
+ for (i = 1; i <= epoch; i++)
+ trunk_cleanup(i);
+
+ log_free = log_pos = snap_log_read(&nr_logs, 0);
+ if (snap_log_truncate() < 0) {
+ dprintf("snap reset fail\n");
+ ret = SD_RES_EIO;
+ goto out;
+ }
+
+ log_pos += (nr_logs - 1);
+
+ for (i = nr_logs - 1; i >= 0 && log_pos->epoch > epoch; i--, log_pos--) {
+ if (snap_log_write(log_pos->epoch, log_pos->sha1, 0) < 0) {
+ dprintf("snap write fail %d, %s\n",
+ log_pos->epoch, sha1_to_hex(log_pos->sha1));
+ ret = SD_RES_EIO;
+ goto out;
+ }
+ }
+
+out:
+ free(log_free);
+ return ret;
+}
+
static int cleanup_working_dir(void)
{
DIR *dir;
@@ -637,6 +701,7 @@ struct store_driver farm = {
.atomic_put = farm_atomic_put,
.end_recover = farm_end_recover,
.snapshot = farm_snapshot,
+ .cleanup = farm_cleanup_sys_obj,
.restore = farm_restore,
.get_snap_file = farm_get_snap_file,
.format = farm_format,
diff --git a/sheep/farm/farm.h b/sheep/farm/farm.h
index e7978b8..aa73737 100644
--- a/sheep/farm/farm.h
+++ b/sheep/farm/farm.h
@@ -75,6 +75,7 @@ extern int trunk_get_working_objlist(uint64_t *list);
extern int snap_init(void);
extern void *snap_file_read(unsigned char *sha1, struct sha1_file_hdr
*outhdr);
extern int snap_file_write(int epoch, unsigned char *trunksha1, unsigned
char *outsha1, int user);
+extern int snap_log_truncate(void);
extern void *snap_log_read(int *, int user);
extern int snap_log_write(int epoch, unsigned char *sha1, int user);
diff --git a/sheep/farm/snap.c b/sheep/farm/snap.c
index 65fcc0c..e86eaaf 100644
--- a/sheep/farm/snap.c
+++ b/sheep/farm/snap.c
@@ -57,6 +57,28 @@ out:
return ret;
}
+int snap_log_truncate(void)
+{
+ int fd, ret = 0;
+ struct strbuf buf = STRBUF_INIT;
+
+ strbuf_addstr(&buf, farm_dir);
+ strbuf_addf(&buf, "/%s", "sys_snap");
+
+ fd = open(buf.buf, O_CREAT | O_TRUNC, 0666);
+ if (fd < 0) {
+ if (errno != EEXIST) {
+ ret = -1;
+ goto out;
+ }
+ }
+ close(fd);
+
+out:
+ strbuf_release(&buf);
+ return ret;
+}
+
int snap_log_write(int epoch, unsigned char *sha1, int user)
{
int fd, ret = -1;
diff --git a/sheep/ops.c b/sheep/ops.c
index ec40986..028edc5 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -427,6 +427,24 @@ static int cluster_snapshot(const struct sd_req
*req, struct sd_rsp *rsp,
return ret;
}
+static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp,
+ void *data)
+{
+ int ret;
+ struct siocb iocb = { 0 };
+ iocb.epoch = sys->epoch;
+
+ if (node_in_recovery())
+ return SD_RES_CLUSTER_RECOVERING;
+
+ if (sd_store->cleanup)
+ ret = sd_store->cleanup(&iocb);
+ else
+ ret = SD_RES_NO_SUPPORT;
+
+ return ret;
+}
+
static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
@@ -531,6 +549,12 @@ static struct sd_op_template sd_ops[] = {
.process_main = cluster_restore,
},
+ [SD_OP_CLEANUP] = {
+ .type = SD_OP_TYPE_CLUSTER,
+ .force = 1,
+ .process_main = cluster_cleanup,
+ },
+
/* local operations */
[SD_OP_GET_STORE_LIST] = {
.type = SD_OP_TYPE_LOCAL,
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 8046516..c48b147 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -182,6 +182,7 @@ struct store_driver {
int (*end_recover)(struct siocb *);
/* Operations for snapshot */
int (*snapshot)(struct siocb *);
+ int (*cleanup)(struct siocb *);
int (*restore)(struct siocb *);
int (*get_snap_file)(struct siocb *);
};
--
1.7.1
More information about the sheepdog
mailing list