[Sheepdog] [PATCH 3/4] cleanup the data object in system snapshot by user

Fri Mar 30 05:12:44 CEST 2012

From: levin li <xingke.lwp at taobao.com>

When recovering is over, it's no need storing the system
snapshot data any more, since it's just for recovering.
So when recovering is complete, we can cleanup the
system snapshot to save the disk space, I implement it
by adding a new subcommand 'cluster cleanup' to collie.

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 collie/cluster.c         |   36 ++++++++++++++++++++++++++
 include/sheep.h          |    2 +
 include/sheepdog_proto.h |    1 +
 sheep/farm/farm.c        |   62 ++++++++++++++++++++++++++++++++++++++++++++++
 sheep/farm/farm.h        |    1 +
 sheep/farm/snap.c        |   22 ++++++++++++++++
 sheep/ops.c              |   26 +++++++++++++++++++
 sheep/sheep_priv.h       |    1 +
 sheep/store.c            |    5 +++
 9 files changed, 156 insertions(+), 0 deletions(-)

diff --git a/collie/cluster.c b/collie/cluster.c
index b846a9a..f28e08f 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -367,6 +367,40 @@ static int cluster_snapshot(int argc, char **argv)
 	return ret;
 }
 
+static int cluster_cleanup(int argc, char **argv)
+{
+	int fd, ret;
+	struct sd_req hdr;
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+	unsigned rlen, wlen;
+
+	fd = connect_to(sdhost, sdport);
+	if (fd < 0)
+		return EXIT_SYSFAIL;
+
+	memset(&hdr, 0, sizeof(hdr));
+
+	hdr.opcode = SD_OP_CLEANUP;
+
+	rlen = 0;
+	wlen = 0;
+	ret = exec_req(fd, &hdr, NULL, &wlen, &rlen);
+	close(fd);
+
+	if (ret) {
+		fprintf(stderr, "Failed to connect\n");
+		return EXIT_SYSFAIL;
+	}
+
+	if (rsp->result != SD_RES_SUCCESS) {
+		fprintf(stderr, "Cleanup failed: %s\n",
+				sd_strerror(rsp->result));
+		return EXIT_FAILURE;
+	}
+
+	return EXIT_SUCCESS;
+}
+
 #define RECOVER_PRINT \
 "Caution! Please try starting all the cluster nodes normally before\n\
 running this command.\n\n\
@@ -435,6 +469,8 @@ static struct subcommand cluster_cmd[] = {
 	0, cluster_recover},
 	{"snapshot", NULL, "aRlph", "snapshot/restore the cluster",
 	0, cluster_snapshot},
+	{"cleanup", NULL, "aph", "cleanup the useless snapshot data",
+	0, cluster_cleanup},
 	{NULL,},
 };
 
diff --git a/include/sheep.h b/include/sheep.h
index e435b63..aa9ad8b 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -42,6 +42,7 @@
 #define SD_OP_SNAPSHOT       0x91
 #define SD_OP_RESTORE        0x92
 #define SD_OP_GET_SNAP_FILE  0x93
+#define SD_OP_CLEANUP        0x94
 
 #define SD_FLAG_CMD_IO_LOCAL   0x0010
 #define SD_FLAG_CMD_RECOVERY 0x0020
@@ -269,6 +270,7 @@ static inline const char *sd_strerror(int err)
 		{SD_RES_MANUAL_RECOVER, "Cluster is running/halted and cannot be manually recovered"},
 		{SD_RES_NO_STORE, "Targeted backend store is not found"},
 		{SD_RES_NO_SUPPORT, "Operation is not supported"},
+		{SD_RES_CLUSTER_RECOVERING, "Cluster is recovering"},
 
 		{SD_RES_OLD_NODE_VER, "Remote node has an old epoch"},
 		{SD_RES_NEW_NODE_VER, "Remote node has a new epoch"},
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 6138108..11c2c7c 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -64,6 +64,7 @@
 #define SD_RES_MANUAL_RECOVER   0x1A /* Users should not manually recover this cluster */
 #define SD_RES_NO_STORE         0x20 /* No targeted backend store */
 #define SD_RES_NO_SUPPORT       0x21 /* Operation is not supported by backend store */
+#define SD_RES_CLUSTER_RECOVERING 0x22 /* Cluster is recovering. */
 
 /*
  * Object ID rules
diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
index 8453de4..861a9b6 100644
--- a/sheep/farm/farm.c
+++ b/sheep/farm/farm.c
@@ -491,6 +491,67 @@ out:
 	return ret;
 }
 
+static int trunk_cleanup(int epoch)
+{
+	struct sha1_file_hdr hdr;
+	struct trunk_entry *trunk_buf, *trunk_free = NULL;
+	unsigned char trunk_sha1[SHA1_LEN];
+	uint64_t nr_trunks, i;
+	int ret = SD_RES_EIO;
+
+	if (get_trunk_sha1(epoch, trunk_sha1, 0) < 0)
+		goto out;
+
+	trunk_free = trunk_buf = trunk_file_read(trunk_sha1, &hdr);
+	if (!trunk_buf)
+		goto out;
+
+	nr_trunks = hdr.priv;
+	for (i = 0; i < nr_trunks; i++, trunk_buf++)
+		sha1_file_try_delete(trunk_buf->sha1);
+
+	if (sha1_file_try_delete(trunk_sha1) < 0)
+		goto out;
+
+	ret = SD_RES_SUCCESS;
+
+out:
+	return ret;
+}
+
+static int farm_cleanup(struct siocb *iocb)
+{
+	int i, ret = SD_RES_SUCCESS;
+	int epoch = iocb->epoch;
+	struct snap_log *log_pos, *log_free = NULL;
+	int nr_logs;
+
+	for (i = 1; i <= epoch; i++)
+		trunk_cleanup(i);
+
+	log_free = log_pos = snap_log_read(&nr_logs, 0);
+	if (snap_log_reset() < 0) {
+		dprintf("snap reset fail\n");
+		ret = SD_RES_EIO;
+		goto out;
+	}
+
+	for (i = 0; i < nr_logs; i++, log_pos++) {
+		if (log_pos->epoch > epoch) {
+			if (snap_log_write(log_pos->epoch, log_pos->sha1, 0) < 0) {
+				dprintf("snap write fail %d, %s\n",
+						log_pos->epoch, sha1_to_hex(log_pos->sha1));
+				ret = SD_RES_EIO;
+				goto out;
+			}
+		}
+	}
+
+out:
+	free(log_free);
+	return ret;
+}
+
 static int cleanup_working_dir(void)
 {
 	DIR *dir;
@@ -637,6 +698,7 @@ struct store_driver farm = {
 	.atomic_put = farm_atomic_put,
 	.end_recover = farm_end_recover,
 	.snapshot = farm_snapshot,
+	.cleanup = farm_cleanup,
 	.restore = farm_restore,
 	.get_snap_file = farm_get_snap_file,
 	.format = farm_format,
diff --git a/sheep/farm/farm.h b/sheep/farm/farm.h
index e7978b8..4483ca0 100644
--- a/sheep/farm/farm.h
+++ b/sheep/farm/farm.h
@@ -75,6 +75,7 @@ extern int trunk_get_working_objlist(uint64_t *list);
 extern int snap_init(void);
 extern void *snap_file_read(unsigned char *sha1, struct sha1_file_hdr *outhdr);
 extern int snap_file_write(int epoch, unsigned char *trunksha1, unsigned char *outsha1, int user);
+extern int snap_log_reset(void);
 extern void *snap_log_read(int *, int user);
 extern int snap_log_write(int epoch, unsigned char *sha1, int user);
 
diff --git a/sheep/farm/snap.c b/sheep/farm/snap.c
index 65fcc0c..89851ea 100644
--- a/sheep/farm/snap.c
+++ b/sheep/farm/snap.c
@@ -57,6 +57,28 @@ out:
 	return ret;
 }
 
+int snap_log_reset(void)
+{
+	int fd, ret = 0;
+	struct strbuf buf = STRBUF_INIT;
+
+	strbuf_addstr(&buf, farm_dir);
+	strbuf_addf(&buf, "/%s", "sys_snap");
+
+	fd = open(buf.buf, O_CREAT | O_TRUNC, 0666);
+	if (fd < 0) {
+		if (errno != EEXIST) {
+			ret = -1;
+			goto out;
+		}
+	}
+	close(fd);
+
+out:
+	strbuf_release(&buf);
+	return ret;
+}
+
 int snap_log_write(int epoch, unsigned char *sha1, int user)
 {
 	int fd, ret = -1;
diff --git a/sheep/ops.c b/sheep/ops.c
index ec40986..bedafbe 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -427,6 +427,26 @@ static int cluster_snapshot(const struct sd_req *req, struct sd_rsp *rsp,
 	return ret;
 }
 
+static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp,
+				void *data)
+{
+	int ret;
+	struct siocb iocb = { 0 };
+	extern int cluster_recovering;
+
+	iocb.epoch = sys->epoch;
+
+	if (cluster_recovering)
+		return SD_RES_CLUSTER_RECOVERING;
+
+	if (sd_store->cleanup)
+		ret = sd_store->cleanup(&iocb);
+	else
+		ret = SD_RES_NO_SUPPORT;
+
+	return ret;
+}
+
 static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
 			   void *data)
 {
@@ -531,6 +551,12 @@ static struct sd_op_template sd_ops[] = {
 		.process_main = cluster_restore,
 	},
 
+	[SD_OP_CLEANUP] = {
+		.type = SD_OP_TYPE_CLUSTER,
+		.force = 1,
+		.process_main = cluster_cleanup,
+	},
+
 	/* local operations */
 	[SD_OP_GET_STORE_LIST] = {
 		.type = SD_OP_TYPE_LOCAL,
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 8046516..c48b147 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -182,6 +182,7 @@ struct store_driver {
 	int (*end_recover)(struct siocb *);
 	/* Operations for snapshot */
 	int (*snapshot)(struct siocb *);
+	int (*cleanup)(struct siocb *);
 	int (*restore)(struct siocb *);
 	int (*get_snap_file)(struct siocb *);
 };
diff --git a/sheep/store.c b/sheep/store.c
index 3471c76..67e883b 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -53,6 +53,8 @@ struct objlist_cache_entry {
 	struct rb_node node;
 };
 
+int cluster_recovering = 0;
+
 static struct objlist_cache obj_list_cache;
 
 mode_t def_dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP;
@@ -1691,6 +1693,7 @@ static void do_recover_main(struct work *work)
 			iocb.epoch = sys->epoch;
 			sd_store->end_recover(&iocb);
 		}
+		cluster_recovering = 0;
 	}
 
 	resume_pending_requests();
@@ -1905,6 +1908,8 @@ int start_recovery(uint32_t epoch)
 	if (!rw)
 		return -1;
 
+	cluster_recovering = 1;
+
 	rw->state = RW_INIT;
 	rw->oids = malloc(1 << 20); /* FIXME */
 	rw->epoch = epoch;
-- 
1.7.1