[sheepdog] [PATCH v6 13/16] collie/farm: implement farm

Kai Zhang kyle at zelin.io
Tue May 21 12:11:15 CEST 2013


moved farm.c from sheep/farm to collie/farm

* farm.c have changed a lot *

Farm is responsible to save/load cluster snapshot.

During save snapshot:
0. construct object_rb_tree (this is done in cluster.c)
1. parse object_rb_tree, for each object, call fill_trunk_entry().
2. write trunk entries to trunk file.
3. write trunk sha1 to snap file.
4. write snap sha1 and snapshot tag to snap log file.

During load snapshot:
1. read snap log file.
2. find snap sha1 by tag or index.
3. read snap file and get trunk sha1.
4. read trunk file.
5. for each entry in trunk, read object from sha1 file and write to new cluster
6. create active vdi for each vdi chain based on the last snapshot

Signed-off-by: Kai Zhang <kyle at zelin.io>
---
 collie/Makefile.am            |    2 +-
 collie/collie.h               |    4 +
 {sheep => collie}/farm/farm.c |  380 +++++++++++++++++++++++------------------
 collie/farm/farm.h            |   15 +-
 collie/vdi.c                  |    2 +-
 5 files changed, 226 insertions(+), 177 deletions(-)
 rename {sheep => collie}/farm/farm.c (23%)

diff --git a/collie/Makefile.am b/collie/Makefile.am
index cc8a283..bfbf660 100644
--- a/collie/Makefile.am
+++ b/collie/Makefile.am
@@ -24,7 +24,7 @@ INCLUDES		= -I$(top_builddir)/include -I$(top_srcdir)/include
 sbin_PROGRAMS		= collie
 
 collie_SOURCES		= farm/object_tree.c farm/sha1_file.c farm/snap.c \
-			  farm/trunk.c \
+			  farm/trunk.c farm/farm.c \
 			  collie.c common.c treeview.c vdi.c node.c cluster.c
 
 if BUILD_TRACE
diff --git a/collie/collie.h b/collie/collie.h
index ec8d667..1d89558 100644
--- a/collie/collie.h
+++ b/collie/collie.h
@@ -76,6 +76,10 @@ int do_generic_subcommand(struct subcommand *sub, int argc, char **argv);
 int update_node_list(int max_nodes);
 void confirm(const char *message);
 void work_queue_wait(struct work_queue *q);
+int do_vdi_create(const char *vdiname, int64_t vdi_size,
+		  uint32_t base_vid, uint32_t *vdi_id, bool snapshot,
+		  int nr_copies);
+
 
 extern struct command vdi_command;
 extern struct command node_command;
diff --git a/sheep/farm/farm.c b/collie/farm/farm.c
similarity index 23%
rename from sheep/farm/farm.c
rename to collie/farm/farm.c
index c31c501..cb4aaa9 100644
--- a/sheep/farm/farm.c
+++ b/collie/farm/farm.c
@@ -1,7 +1,9 @@
 /*
  * Copyright (C) 2011 Taobao Inc.
+ * Copyright (C) 2013 Zelin.io
  *
  * Liu Yuan <namei.unix at gmail.com>
+ * Kai Zhang <kyle at zelin.io>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License version
@@ -11,80 +13,151 @@
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <pthread.h>
-#include <linux/limits.h>
-#include <sys/file.h>
-#include <sys/types.h>
-#include <sys/xattr.h>
-
 #include "farm.h"
-#include "sheep_priv.h"
-#include "sheepdog_proto.h"
-#include "sheep.h"
+#include "list.h"
+
+static char farm_object_dir[PATH_MAX];
+static char farm_dir[PATH_MAX];
+
+struct vdi_entry {
+	char name[SD_MAX_VDI_LEN];
+	uint64_t vdi_size;
+	uint32_t vdi_id;
+	uint32_t snap_id;
+	uint8_t  nr_copies;
+	struct list_head list;
+};
+static LIST_HEAD(last_vdi_list);
+
+static struct vdi_entry *find_vdi(const char *name)
+{
+	struct vdi_entry *vdi;
+
+	list_for_each_entry(vdi, &last_vdi_list, list) {
+		if (!strcmp(vdi->name, name))
+			return vdi;
+	}
+	return NULL;
+}
+
+static struct vdi_entry *new_vdi(const char *name, uint64_t vdi_size,
+				 uint32_t vdi_id, uint32_t snap_id,
+				 uint8_t nr_copies)
+{
+	struct vdi_entry *vdi;
+	vdi = xmalloc(sizeof(struct vdi_entry));
+	pstrcpy(vdi->name, sizeof(vdi->name), name);
+	vdi->vdi_size = vdi_size;
+	vdi->vdi_id = vdi_id;
+	vdi->snap_id = snap_id;
+	vdi->nr_copies = nr_copies;
+	INIT_LIST_HEAD(&vdi->list);
+	return vdi;
+}
+
+static void insert_vdi(struct sd_inode *new)
+{
+	struct vdi_entry *vdi;
+	vdi = find_vdi(new->name);
+	if (!vdi) {
+		vdi = new_vdi(new->name,
+			      new->vdi_size,
+			      new->vdi_id,
+			      new->snap_id,
+			      new->nr_copies);
+		list_add(&vdi->list, &last_vdi_list);
+	} else if (vdi->snap_id < new->snap_id) {
+		vdi->vdi_size = new->vdi_size;
+		vdi->vdi_id = new->vdi_id;
+		vdi->snap_id = new->snap_id;
+		vdi->nr_copies = new->nr_copies;
+	}
+}
+
+static int create_active_vdis(void)
+{
+	struct vdi_entry *vdi;
+	uint32_t new_vid;
+	list_for_each_entry(vdi, &last_vdi_list, list) {
+		if (do_vdi_create(vdi->name,
+				  vdi->vdi_size,
+				  vdi->vdi_id, &new_vid,
+				  false, vdi->nr_copies) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+static void free_vdi_list(void)
+{
+	struct vdi_entry *vdi, *next;
+	list_for_each_entry_safe(vdi, next, &last_vdi_list, list)
+		free(vdi);
+}
 
-char farm_obj_dir[PATH_MAX];
-char farm_dir[PATH_MAX];
+char *get_object_directory(void)
+{
+	return farm_object_dir;
+}
 
 static int create_directory(const char *p)
 {
-	int i, ret = 0;
+	int ret = -1;
 	struct strbuf buf = STRBUF_INIT;
 
 	strbuf_addstr(&buf, p);
-	strbuf_addstr(&buf, "/.farm");
 	if (xmkdir(buf.buf, 0755) < 0) {
-		sd_eprintf("%m");
-		ret = -1;
-		goto err;
+		if (errno == EEXIST)
+			fprintf(stderr, "Path is not a directory: %s\n", p);
+		goto out;
 	}
 
 	if (!strlen(farm_dir))
 		strbuf_copyout(&buf, farm_dir, sizeof(farm_dir));
 
 	strbuf_addstr(&buf, "/objects");
-	if (xmkdir(buf.buf, 0755) < 0) {
-		sd_eprintf("%m");
-		ret = -1;
-		goto err;
-	}
-	for (i = 0; i < 256; i++) {
+	if (xmkdir(buf.buf, 0755) < 0)
+		goto out;
+
+	for (int i = 0; i < 256; i++) {
 		strbuf_addf(&buf, "/%02x", i);
-		if (xmkdir(buf.buf, 0755) < 0) {
-			sd_eprintf("%m");
-			ret = -1;
-			goto err;
-		}
+		if (xmkdir(buf.buf, 0755) < 0)
+			goto out;
+
 		strbuf_remove(&buf, buf.len - 3, 3);
 	}
 
-	if (!strlen(farm_obj_dir))
-		strbuf_copyout(&buf, farm_obj_dir, sizeof(farm_obj_dir));
-err:
+	if (!strlen(farm_object_dir))
+		strbuf_copyout(&buf, farm_object_dir, sizeof(farm_object_dir));
+
+	ret = 0;
+out:
+	if (ret)
+		fprintf(stderr, "Fail to create directory: %m\n");
 	strbuf_release(&buf);
 	return ret;
 }
 
-static int get_trunk_sha1(uint32_t epoch, unsigned char *outsha1)
+static int get_trunk_sha1(uint32_t idx, const char *tag, unsigned char *outsha1)
 {
-	int i, nr_logs = -1, ret = -1;
+	int nr_logs = -1, ret = -1;
 	struct snap_log *log_buf, *log_free = NULL;
 	void *snap_buf = NULL;
 	struct sha1_file_hdr hdr;
 
 	log_free = log_buf = snap_log_read(&nr_logs);
-	sd_dprintf("%d", nr_logs);
 	if (nr_logs < 0)
 		goto out;
 
-	for (i = 0; i < nr_logs; i++, log_buf++) {
-		if (log_buf->epoch != epoch)
+	for (int i = 0; i < nr_logs; i++, log_buf++) {
+		if (log_buf->idx != idx && strcmp(log_buf->tag, tag))
 			continue;
 		snap_buf = snap_file_read(log_buf->sha1, &hdr);
 		if (!snap_buf)
 			goto out;
 		memcpy(outsha1, snap_buf, SHA1_LEN);
 		ret = 0;
-		break;
+		goto out;
 	}
 out:
 	free(log_free);
@@ -92,181 +165,154 @@ out:
 	return ret;
 }
 
-static int farm_init(void)
+static int notify_vdi_add(uint32_t vdi_id, uint32_t nr_copies)
 {
-	sd_dprintf("use farm store driver");
-	if (create_directory(obj_path) < 0)
-		goto err;
+	int ret = -1;
+	struct sd_req hdr;
+	char *buf = NULL;
 
-	if (!is_xattr_enabled(obj_path)) {
-		sd_eprintf("xattrs are not enabled on %s", obj_path);
-		goto err;
-	}
+	sd_init_req(&hdr, SD_OP_NOTIFY_VDI_ADD);
+	hdr.vdi_state.new_vid = vdi_id;
+	hdr.vdi_state.copies = nr_copies;
+	hdr.vdi_state.set_bitmap = true;
 
-	if (snap_init() < 0)
-		goto err;
+	ret = collie_exec_req(sdhost, sdport, &hdr, buf);
 
-	if (default_init() < 0)
-		goto err;
+	if (ret)
+		fprintf(stderr, "Fail to notify vdi add event(%"PRIx32", %d)\n",
+			vdi_id, nr_copies);
 
-	return SD_RES_SUCCESS;
-err:
-	return SD_RES_EIO;
+	free(buf);
+	return ret;
 }
 
-static int farm_snapshot(const struct siocb *iocb)
+static int fill_trunk_entry(uint64_t oid, int nr_copies,
+			    void *buf, size_t size, void *data)
 {
-	unsigned char snap_sha1[SHA1_LEN];
-	unsigned char trunk_sha1[SHA1_LEN];
-	struct sd_node nodes[SD_MAX_NODES];
-	int nr_nodes;
-	void *buffer;
-	int log_nr, ret = SD_RES_EIO, epoch;
+	int ret = -1;
 
-	buffer = snap_log_read(&log_nr);
-	if (!buffer)
-		goto out;
+	struct strbuf *trunk_entries = data;
+	struct trunk_entry new_entry = {};
+	struct sha1_file_hdr hdr = { .priv = 0 };
+	struct strbuf object_strbuf = STRBUF_INIT;
 
-	epoch = log_nr + 1;
-	sd_dprintf("user epoch %d", epoch);
+	memcpy(hdr.tag, TAG_DATA, TAG_LEN);
+	hdr.size = size;
 
-	nr_nodes = epoch_log_read(sys->epoch, nodes, sizeof(nodes));
-	if (nr_nodes < 0)
-		goto out;
+	strbuf_add(&object_strbuf, buf, size);
+	strbuf_insert(&object_strbuf, 0, &hdr, sizeof(hdr));
 
-	if (trunk_file_write(trunk_sha1) < 0)
+	if (sha1_file_write((void *)object_strbuf.buf,
+			    object_strbuf.len,
+			    new_entry.sha1) != 0)
 		goto out;
 
-	if (snap_file_write(sys->epoch, nodes, nr_nodes,
-			    trunk_sha1, snap_sha1) < 0)
-		goto out;
+	new_entry.oid = oid;
+	new_entry.nr_copies = nr_copies;
+	strbuf_add(trunk_entries, &new_entry, sizeof(struct trunk_entry));
 
-	if (snap_log_write(epoch, snap_sha1) < 0)
-		goto out;
+	ret = 0;
+out:
+	if (ret)
+		fprintf(stderr, "Fail to fill trunk entry\n.");
+	strbuf_release(&object_strbuf);
+	return ret;
+}
+
+int farm_init(const char *path)
+{
+	int ret = -1;
 
-	ret = SD_RES_SUCCESS;
+	if (create_directory(path) < 0)
+		goto out;
+	if (snap_init(farm_dir) < 0)
+		goto out;
+	return 0;
 out:
-	free(buffer);
+	if (ret)
+		fprintf(stderr, "Fail to init farm.\n");
 	return ret;
 }
 
-static int restore_objects_from_snap(uint32_t epoch)
+bool farm_contain_snapshot(uint32_t idx, const char *tag)
 {
-	struct sha1_file_hdr hdr;
-	struct trunk_entry *trunk_buf, *trunk_free = NULL;
 	unsigned char trunk_sha1[SHA1_LEN];
-	uint64_t nr_trunks, i;
-	int ret = SD_RES_EIO;
+	return (get_trunk_sha1(idx, tag, trunk_sha1) == 0);
+}
+
+int farm_save_snapshot(const char *tag)
+{
+	unsigned char snap_sha1[SHA1_LEN];
+	unsigned char trunk_sha1[SHA1_LEN];
+	struct strbuf trunk_entries = STRBUF_INIT;
+	void *snap_log = NULL;
+	int log_nr, idx;
+	int ret = -1;
 
-	if (get_trunk_sha1(epoch, trunk_sha1) < 0)
+	snap_log = snap_log_read(&log_nr);
+	if (!snap_log)
 		goto out;
 
-	trunk_free = trunk_buf = trunk_file_read(trunk_sha1, &hdr);
-	if (!trunk_buf)
+	idx = log_nr + 1;
+
+	if (for_each_object_in_tree(fill_trunk_entry, &trunk_entries) < 0)
 		goto out;
 
-	nr_trunks = hdr.priv;
-	ret = SD_RES_SUCCESS;
-	for (i = 0; i < nr_trunks; i++, trunk_buf++) {
-		struct sha1_file_hdr h;
-		struct siocb io = { 0 };
-		uint64_t oid;
-		void *buffer = NULL;
-
-		oid = trunk_buf->oid;
-		buffer = sha1_file_read(trunk_buf->sha1, &h);
-		if (!buffer) {
-			sd_eprintf("oid %"PRIx64" not restored", oid);
-			goto out;
-		}
-		io.length = h.size;
-		io.buf = buffer;
-		ret = default_create_and_write(oid, &io);
-		if (ret != SD_RES_SUCCESS) {
-			sd_eprintf("oid %"PRIx64" not restored", oid);
-			goto out;
-		} else
-			sd_dprintf("oid %"PRIx64" restored", oid);
+	if (trunk_file_write(trunk_sha1, &trunk_entries) < 0)
+		goto out;
 
-		free(buffer);
-	}
+	if (snap_file_write(idx, trunk_sha1, snap_sha1) < 0)
+		goto out;
+
+	if (snap_log_write(idx, tag, snap_sha1) != 0)
+		goto out;
+
+	ret = 0;
 out:
-	free(trunk_free);
+	free(snap_log);
+	strbuf_release(&trunk_entries);
 	return ret;
 }
 
-static int rm_object(uint64_t oid, char *path, void *arg)
+static int restore_object(uint64_t oid, int nr_copies,
+			void *buffer, size_t size, void *data)
 {
-	char p[PATH_MAX];
-	int ret = SD_RES_SUCCESS;
-
-	snprintf(p, sizeof(p), "%s/%"PRIx64, path, oid);
-	if (unlink(path) < 0) {
-		sd_eprintf("failed to remove cached object %m");
-		if (errno == ENOENT)
-			return SD_RES_SUCCESS;
-		ret = SD_RES_EIO;
+	int ret = -1;
+
+	if (sd_write_object(oid, 0, buffer, size, 0, 0,
+			    nr_copies, true, true) != 0)
 		goto out;
+
+	if (is_vdi_obj(oid)) {
+		if (notify_vdi_add(oid_to_vid(oid), nr_copies) < 0)
+			goto out;
+
+		insert_vdi(buffer);
 	}
+
+	ret = 0;
 out:
-	return ret;
+	if (ret)
+		fprintf(stderr, "Fail to restore object, oid %"PRIu64"\n", oid);
+	return 0;
 }
 
-static int farm_restore(const struct siocb *iocb)
+int farm_load_snapshot(uint32_t idx, const char *tag)
 {
-	int ret = SD_RES_EIO, epoch = iocb->epoch;
-
-	sd_dprintf("try recover user epoch %d", epoch);
+	int ret = -1;
+	unsigned char trunk_sha1[SHA1_LEN];
 
-	/* Remove all the objects of WD and object cache */
-	for_each_object_in_wd(rm_object, true, NULL);
-	if (sys->enable_object_cache)
-		object_cache_format();
+	if (get_trunk_sha1(idx, tag, trunk_sha1) < 0)
+		goto out;
 
-	ret = restore_objects_from_snap(epoch);
-	if (ret != SD_RES_SUCCESS)
+	if (for_each_object_in_trunk(trunk_sha1, restore_object, NULL) < 0)
 		goto out;
-out:
-	return ret;
-}
 
-static int farm_get_snap_file(struct siocb *iocb)
-{
-	int ret = SD_RES_EIO;
-	void *buffer = NULL;
-	size_t size;
-	int nr;
-
-	sd_dprintf("try get snap file");
-	buffer = snap_log_read(&nr);
-	if (!buffer)
+	if (create_active_vdis() < 0)
 		goto out;
-	size = nr * sizeof(struct snap_log);
-	memcpy(iocb->buf, buffer, size);
-	iocb->length = size;
-	ret = SD_RES_SUCCESS;
+
+	ret = 0;
 out:
-	free(buffer);
+	free_vdi_list();
 	return ret;
 }
-
-static struct store_driver farm = {
-	.name = "farm",
-	.init = farm_init,
-	.exist = default_exist,
-	.create_and_write = default_create_and_write,
-	.write = default_write,
-	.read = default_read,
-	.link = default_link,
-	.update_epoch = default_update_epoch,
-	.snapshot = farm_snapshot,
-	.cleanup = default_cleanup,
-	.restore = farm_restore,
-	.get_snap_file = farm_get_snap_file,
-	.format = default_format,
-	.purge_obj = default_purge_obj,
-	.remove_object = default_remove_object,
-	.get_hash = default_get_hash,
-};
-
-add_store_driver(farm);
diff --git a/collie/farm/farm.h b/collie/farm/farm.h
index d2ee35c..77ac59e 100644
--- a/collie/farm/farm.h
+++ b/collie/farm/farm.h
@@ -38,17 +38,16 @@ struct sha1_file_hdr {
 	uint64_t reserved;
 };
 
-static char farm_obj_dir[PATH_MAX];
-static char farm_dir[PATH_MAX];
-
-static inline char *get_object_directory(void)
-{
-	return farm_obj_dir;
-}
-
 typedef int (*object_handler_func_t)(uint64_t oid, int nr_copies, void *buf,
 				     size_t size, void *data);
 
+/* farm.c */
+int farm_init(const char *path);
+bool farm_contain_snapshot(uint32_t idx, const char *tag);
+int farm_save_snapshot(const char *tag);
+int farm_load_snapshot(uint32_t idx, const char *tag);
+char *get_object_directory(void);
+
 /* trunk.c */
 int trunk_init(void);
 int trunk_file_write(unsigned char *trunk_sha1, struct strbuf *trunk_entries);
diff --git a/collie/vdi.c b/collie/vdi.c
index 27a8418..087ec80 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -505,7 +505,7 @@ static int read_vdi_obj(const char *vdiname, int snapid, const char *tag,
 	return EXIT_SUCCESS;
 }
 
-static int do_vdi_create(const char *vdiname, int64_t vdi_size,
+int do_vdi_create(const char *vdiname, int64_t vdi_size,
 			 uint32_t base_vid, uint32_t *vdi_id, bool snapshot,
 			 int nr_copies)
 {
-- 
1.7.1





More information about the sheepdog mailing list