[sheepdog] [PATCH RESEND 1/6] farm: do cluster-wide snapshot by work queue

Kai Zhang kyle at zelin.io
Wed Jun 5 12:35:09 CEST 2013


Currently, loading snapshot uses dynamic queue where saving snapshot uses ordered queue.
This is because there is a risk that parallelized saving snapshot could slow down cluster's performance.
We can easily parallelize this by using dynamic queue.

Signed-off-by: Kai Zhang <kyle at zelin.io>
---
 collie/farm/farm.c        |  188 +++++++++++++++++++++++++++++++++------------
 collie/farm/farm.h        |   11 +--
 collie/farm/object_tree.c |   15 +---
 collie/farm/trunk.c       |   15 +---
 4 files changed, 149 insertions(+), 80 deletions(-)

diff --git a/collie/farm/farm.c b/collie/farm/farm.c
index cb4aaa9..072ee16 100644
--- a/collie/farm/farm.c
+++ b/collie/farm/farm.c
@@ -13,12 +13,15 @@
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <pthread.h>
+
 #include "farm.h"
 #include "list.h"
 
 static char farm_object_dir[PATH_MAX];
 static char farm_dir[PATH_MAX];
 
+static pthread_rwlock_t vdi_list_lock = PTHREAD_RWLOCK_INITIALIZER;
 struct vdi_entry {
 	char name[SD_MAX_VDI_LEN];
 	uint64_t vdi_size;
@@ -29,6 +32,14 @@ struct vdi_entry {
 };
 static LIST_HEAD(last_vdi_list);
 
+struct snapshot_work {
+	struct trunk_entry entry;
+	struct strbuf *trunk_buf;
+	struct work work;
+};
+struct work_queue *wq;
+static uatomic_bool work_error;
+
 static struct vdi_entry *find_vdi(const char *name)
 {
 	struct vdi_entry *vdi;
@@ -186,39 +197,6 @@ static int notify_vdi_add(uint32_t vdi_id, uint32_t nr_copies)
 	return ret;
 }
 
-static int fill_trunk_entry(uint64_t oid, int nr_copies,
-			    void *buf, size_t size, void *data)
-{
-	int ret = -1;
-
-	struct strbuf *trunk_entries = data;
-	struct trunk_entry new_entry = {};
-	struct sha1_file_hdr hdr = { .priv = 0 };
-	struct strbuf object_strbuf = STRBUF_INIT;
-
-	memcpy(hdr.tag, TAG_DATA, TAG_LEN);
-	hdr.size = size;
-
-	strbuf_add(&object_strbuf, buf, size);
-	strbuf_insert(&object_strbuf, 0, &hdr, sizeof(hdr));
-
-	if (sha1_file_write((void *)object_strbuf.buf,
-			    object_strbuf.len,
-			    new_entry.sha1) != 0)
-		goto out;
-
-	new_entry.oid = oid;
-	new_entry.nr_copies = nr_copies;
-	strbuf_add(trunk_entries, &new_entry, sizeof(struct trunk_entry));
-
-	ret = 0;
-out:
-	if (ret)
-		fprintf(stderr, "Fail to fill trunk entry\n.");
-	strbuf_release(&object_strbuf);
-	return ret;
-}
-
 int farm_init(const char *path)
 {
 	int ret = -1;
@@ -240,14 +218,75 @@ bool farm_contain_snapshot(uint32_t idx, const char *tag)
 	return (get_trunk_sha1(idx, tag, trunk_sha1) == 0);
 }
 
+static void do_save_object(struct work *work)
+{
+	void *sha1_buf, *data_buf;
+	size_t sha1_size, data_size;
+	struct snapshot_work *sw;
+	struct sha1_file_hdr *hdr;
+
+	if (uatomic_is_true(&work_error))
+		return;
+
+	sw = container_of(work, struct snapshot_work, work);
+	data_size = get_objsize(sw->entry.oid);
+	sha1_size = data_size + sizeof(struct sha1_file_hdr);
+	hdr = sha1_buf = xmalloc(sha1_size);
+	data_buf = (char *)sha1_buf + sizeof(struct sha1_file_hdr);
+
+	if (sd_read_object(sw->entry.oid, data_buf, data_size, 0, true) < 0)
+		goto error;
+
+	memcpy(hdr->tag, TAG_DATA, TAG_LEN);
+	hdr->size = data_size;
+
+	if (sha1_file_write(sha1_buf, sha1_size, sw->entry.sha1) < 0)
+		goto error;
+
+	free(sha1_buf);
+	return;
+error:
+	free(sha1_buf);
+	fprintf(stderr, "Fail to save object, oid %"PRIu64"\n",
+		sw->entry.oid);
+	uatomic_set_true(&work_error);
+}
+
+static void save_object_done(struct work *work)
+{
+	struct snapshot_work *sw = container_of(work, struct snapshot_work,
+						work);
+
+	if (uatomic_is_true(&work_error))
+		goto out;
+
+	strbuf_add(sw->trunk_buf, &sw->entry, sizeof(struct trunk_entry));
+out:
+	free(sw);
+}
+
+static int queue_save_snapshot_work(uint64_t oid, int nr_copies, void *data)
+{
+	struct snapshot_work *sw = xzalloc(sizeof(struct snapshot_work));
+	struct strbuf *trunk_buf = data;
+
+	sw->entry.oid = oid;
+	sw->entry.nr_copies = nr_copies;
+	sw->trunk_buf = trunk_buf;
+	sw->work.fn = do_save_object;
+	sw->work.done = save_object_done;
+	queue_work(wq, &sw->work);
+
+	return 0;
+}
+
 int farm_save_snapshot(const char *tag)
 {
 	unsigned char snap_sha1[SHA1_LEN];
 	unsigned char trunk_sha1[SHA1_LEN];
 	struct strbuf trunk_entries = STRBUF_INIT;
 	void *snap_log = NULL;
-	int log_nr, idx;
-	int ret = -1;
+	int log_nr, idx, ret = -1;
 
 	snap_log = snap_log_read(&log_nr);
 	if (!snap_log)
@@ -255,7 +294,13 @@ int farm_save_snapshot(const char *tag)
 
 	idx = log_nr + 1;
 
-	if (for_each_object_in_tree(fill_trunk_entry, &trunk_entries) < 0)
+	wq = create_work_queue("save snapshot", WQ_ORDERED);
+	if (for_each_object_in_tree(queue_save_snapshot_work,
+				    (void *)&trunk_entries) < 0)
+		goto out;
+
+	work_queue_wait(wq);
+	if (uatomic_is_true(&work_error))
 		goto out;
 
 	if (trunk_file_write(trunk_sha1, &trunk_entries) < 0)
@@ -264,7 +309,7 @@ int farm_save_snapshot(const char *tag)
 	if (snap_file_write(idx, trunk_sha1, snap_sha1) < 0)
 		goto out;
 
-	if (snap_log_write(idx, tag, snap_sha1) != 0)
+	if (snap_log_write(idx, tag, snap_sha1) < 0)
 		goto out;
 
 	ret = 0;
@@ -274,26 +319,61 @@ out:
 	return ret;
 }
 
-static int restore_object(uint64_t oid, int nr_copies,
-			void *buffer, size_t size, void *data)
+static void do_load_object(struct work *work)
 {
-	int ret = -1;
+	void *buffer = NULL;
+	struct sha1_file_hdr hdr;
+	struct snapshot_work *sw;
 
-	if (sd_write_object(oid, 0, buffer, size, 0, 0,
-			    nr_copies, true, true) != 0)
-		goto out;
+	if (uatomic_is_true(&work_error))
+		return;
 
-	if (is_vdi_obj(oid)) {
-		if (notify_vdi_add(oid_to_vid(oid), nr_copies) < 0)
-			goto out;
+	sw = container_of(work, struct snapshot_work, work);
+
+	buffer = sha1_file_read(sw->entry.sha1, &hdr);
+
+	if (!buffer)
+		goto error;
+
+	if (sd_write_object(sw->entry.oid, 0, buffer, hdr.size, 0, 0,
+			    sw->entry.nr_copies, true, true) != 0)
+		goto error;
+
+	if (is_vdi_obj(sw->entry.oid)) {
+		if (notify_vdi_add(oid_to_vid(sw->entry.oid),
+				   sw->entry.nr_copies) < 0)
+			goto error;
 
+		pthread_rwlock_wrlock(&vdi_list_lock);
 		insert_vdi(buffer);
+		pthread_rwlock_unlock(&vdi_list_lock);
 	}
 
-	ret = 0;
-out:
-	if (ret)
-		fprintf(stderr, "Fail to restore object, oid %"PRIu64"\n", oid);
+	free(buffer);
+	return;
+error:
+	free(buffer);
+	fprintf(stderr, "Fail to load object, oid %"PRIu64"\n", sw->entry.oid);
+	uatomic_set_true(&work_error);
+}
+
+static void load_object_done(struct work *work)
+{
+	struct snapshot_work *sw = container_of(work, struct snapshot_work,
+						work);
+
+	free(sw);
+}
+
+static int queue_load_snapshot_work(struct trunk_entry *entry, void *data)
+{
+	struct snapshot_work *sw = xzalloc(sizeof(struct snapshot_work));
+
+	memcpy(&sw->entry, entry, sizeof(struct trunk_entry));
+	sw->work.fn = do_load_object;
+	sw->work.done = load_object_done;
+	queue_work(wq, &sw->work);
+
 	return 0;
 }
 
@@ -305,7 +385,13 @@ int farm_load_snapshot(uint32_t idx, const char *tag)
 	if (get_trunk_sha1(idx, tag, trunk_sha1) < 0)
 		goto out;
 
-	if (for_each_object_in_trunk(trunk_sha1, restore_object, NULL) < 0)
+	wq = create_work_queue("load snapshot", WQ_DYNAMIC);
+	if (for_each_entry_in_trunk(trunk_sha1, queue_load_snapshot_work,
+				    NULL) < 0)
+		goto out;
+
+	work_queue_wait(wq);
+	if (uatomic_is_true(&work_error))
 		goto out;
 
 	if (create_active_vdis() < 0)
diff --git a/collie/farm/farm.h b/collie/farm/farm.h
index 77ac59e..10ba597 100644
--- a/collie/farm/farm.h
+++ b/collie/farm/farm.h
@@ -38,9 +38,6 @@ struct sha1_file_hdr {
 	uint64_t reserved;
 };
 
-typedef int (*object_handler_func_t)(uint64_t oid, int nr_copies, void *buf,
-				     size_t size, void *data);
-
 /* farm.c */
 int farm_init(const char *path);
 bool farm_contain_snapshot(uint32_t idx, const char *tag);
@@ -52,8 +49,9 @@ char *get_object_directory(void);
 int trunk_init(void);
 int trunk_file_write(unsigned char *trunk_sha1, struct strbuf *trunk_entries);
 void *trunk_file_read(unsigned char *sha1, struct sha1_file_hdr *);
-int for_each_object_in_trunk(unsigned char *trunk_sha1,
-			     object_handler_func_t func, void *data);
+int for_each_entry_in_trunk(unsigned char *trunk_sha1,
+			    int (*func)(struct trunk_entry *entry, void *data),
+			    void *data);
 
 /* snap.c */
 int snap_init(const char *path);
@@ -74,6 +72,7 @@ int object_tree_size(void);
 void object_tree_insert(uint64_t oid, int nr_copies);
 void object_tree_free(void);
 void object_tree_print(void);
-int for_each_object_in_tree(object_handler_func_t func, void *data);
+int for_each_object_in_tree(int (*func)(uint64_t oid, int nr_copies,
+					void *data), void *data);
 
 #endif
diff --git a/collie/farm/object_tree.c b/collie/farm/object_tree.c
index f719af6..97cb3e7 100644
--- a/collie/farm/object_tree.c
+++ b/collie/farm/object_tree.c
@@ -103,31 +103,22 @@ int object_tree_size()
 	return tree.nr_objs;
 }
 
-int for_each_object_in_tree(object_handler_func_t func, void *data)
+int for_each_object_in_tree(int (*func)(uint64_t oid, int nr_copies,
+					void *data), void *data)
 {
 	struct rb_node *p = rb_first(&tree.root);
 	struct object_tree_entry *entry;
-	uint64_t oid;
-	size_t size;
-	void *buf = xmalloc(max(SD_INODE_SIZE, SD_DATA_OBJ_SIZE));
 	int ret = -1;
 
 	while (p) {
 		entry = rb_entry(p, struct object_tree_entry, node);
-		oid = entry->oid;
-		size = get_objsize(oid);
-
-		if (sd_read_object(oid, buf, size, 0, true) < 0)
-			goto out;
 
-		if (func(oid, entry->nr_copies,
-			 buf, size, data) < 0)
+		if (func(entry->oid, entry->nr_copies, data) < 0)
 			goto out;
 
 		p = rb_next(p);
 	}
 	ret = 0;
 out:
-	free(buf);
 	return ret;
 }
diff --git a/collie/farm/trunk.c b/collie/farm/trunk.c
index c0b416f..c2f5bbf 100644
--- a/collie/farm/trunk.c
+++ b/collie/farm/trunk.c
@@ -72,8 +72,9 @@ void *trunk_file_read(unsigned char *sha1, struct sha1_file_hdr *outhdr)
 	return buffer;
 }
 
-int for_each_object_in_trunk(unsigned char *trunk_sha1,
-			     object_handler_func_t func, void *data)
+int for_each_entry_in_trunk(unsigned char *trunk_sha1,
+			    int (*func)(struct trunk_entry *entry, void *data),
+			    void *data)
 {
 	struct trunk_entry *trunk_entry, *trunk_free = NULL;
 	struct sha1_file_hdr trunk_hdr;
@@ -87,15 +88,7 @@ int for_each_object_in_trunk(unsigned char *trunk_sha1,
 
 	nr_trunks = trunk_hdr.priv;
 	for (uint64_t i = 0; i < nr_trunks; i++, trunk_entry++) {
-		struct sha1_file_hdr hdr;
-		void *buffer = NULL;
-
-		buffer = sha1_file_read(trunk_entry->sha1, &hdr);
-		if (!buffer)
-			goto out;
-
-		if (func(trunk_entry->oid, trunk_entry->nr_copies,
-			 buffer, hdr.size, data) < 0)
+		if (func(trunk_entry, data) < 0)
 			goto out;
 	}
 
-- 
1.7.1




More information about the sheepdog mailing list