[sheepdog] [PATCH v5 13/16] collie/farm: implement farm

Kai Zhang kyle at zelin.io
Mon May 20 09:50:43 CEST 2013


moved farm.c from sheep/farm to collie/farm

* farm.c have changed a lot *

Farm is responsible to save/load cluster snapshot.

During save snapshot:
0. construct object_rb_tree (this is done in cluster.c)
1. parse object_rb_tree, for each object, call fill_trunk_entry().
2. write trunk entries to trunk file.
3. write trunk sha1 to snap file.
4. write snap sha1 and snapshot tag to snap log file.

During load snapshot:
1. read snap log file.
2. find snap sha1 by tag or index.
3. read snap file and get trunk sha1.
4. read trunk file.
5. for each entry in trunk, read object from sha1 file and write to new cluster
6. create active vdi for each vdi chain based on the last snapshot

Signed-off-by: Kai Zhang <kyle at zelin.io>
---
 collie/Makefile.am            |    2 +-
 collie/collie.h               |    4 +
 {sheep => collie}/farm/farm.c |  372 +++++++++++++++++++++++------------------
 collie/farm/farm.h            |   13 +-
 collie/vdi.c                  |    2 +-
 5 files changed, 224 insertions(+), 169 deletions(-)
 rename {sheep => collie}/farm/farm.c (26%)

diff --git a/collie/Makefile.am b/collie/Makefile.am
index cc8a283..bfbf660 100644
--- a/collie/Makefile.am
+++ b/collie/Makefile.am
@@ -24,7 +24,7 @@ INCLUDES		= -I$(top_builddir)/include -I$(top_srcdir)/include
 sbin_PROGRAMS		= collie
 
 collie_SOURCES		= farm/object_tree.c farm/sha1_file.c farm/snap.c \
-			  farm/trunk.c \
+			  farm/trunk.c farm/farm.c \
 			  collie.c common.c treeview.c vdi.c node.c cluster.c
 
 if BUILD_TRACE
diff --git a/collie/collie.h b/collie/collie.h
index ec8d667..1d89558 100644
--- a/collie/collie.h
+++ b/collie/collie.h
@@ -76,6 +76,10 @@ int do_generic_subcommand(struct subcommand *sub, int argc, char **argv);
 int update_node_list(int max_nodes);
 void confirm(const char *message);
 void work_queue_wait(struct work_queue *q);
+int do_vdi_create(const char *vdiname, int64_t vdi_size,
+		  uint32_t base_vid, uint32_t *vdi_id, bool snapshot,
+		  int nr_copies);
+
 
 extern struct command vdi_command;
 extern struct command node_command;
diff --git a/sheep/farm/farm.c b/collie/farm/farm.c
similarity index 26%
rename from sheep/farm/farm.c
rename to collie/farm/farm.c
index c31c501..63e8e56 100644
--- a/sheep/farm/farm.c
+++ b/collie/farm/farm.c
@@ -1,7 +1,9 @@
 /*
  * Copyright (C) 2011 Taobao Inc.
+ * Copyright (C) 2013 Zelin.io
  *
  * Liu Yuan <namei.unix at gmail.com>
+ * Kai Zhang <kyle at zelin.io>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License version
@@ -11,60 +13,111 @@
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <pthread.h>
-#include <linux/limits.h>
-#include <sys/file.h>
-#include <sys/types.h>
-#include <sys/xattr.h>
-
+#include "list.h"
 #include "farm.h"
-#include "sheep_priv.h"
-#include "sheepdog_proto.h"
-#include "sheep.h"
 
-char farm_obj_dir[PATH_MAX];
-char farm_dir[PATH_MAX];
+static char farm_obj_dir[PATH_MAX];
+static char farm_dir[PATH_MAX];
+
+struct vdi_entry {
+	char name[SD_MAX_VDI_LEN];
+	uint64_t vdi_size;
+	uint32_t vdi_id;
+	uint32_t snap_id;
+	uint8_t  nr_copies;
+	struct list_head list;
+};
+static LIST_HEAD(last_vdi_list);
+
+static struct vdi_entry *find_vdi(const char *name)
+{
+	struct vdi_entry *vdi;
+
+	list_for_each_entry(vdi, &last_vdi_list, list) {
+		if (!strcmp(vdi->name, name))
+			return vdi;
+	}
+	return NULL;
+}
+
+static struct vdi_entry *new_vdi(const char *name, uint64_t vdi_size,
+				 uint32_t vdi_id, uint32_t snap_id,
+				 uint8_t nr_copies)
+{
+	struct vdi_entry *vdi;
+	vdi = xmalloc(sizeof(struct vdi_entry));
+	pstrcpy(vdi->name, sizeof(vdi->name), name);
+	vdi->vdi_size = vdi_size;
+	vdi->vdi_id = vdi_id;
+	vdi->snap_id = snap_id;
+	vdi->nr_copies = nr_copies;
+	INIT_LIST_HEAD(&vdi->list);
+	return vdi;
+}
+
+static void insert_vdi(struct sd_inode *new)
+{
+	struct vdi_entry *vdi;
+	vdi = find_vdi(new->name);
+	if (!vdi) {
+		vdi = new_vdi(new->name,
+			      new->vdi_size,
+			      new->vdi_id,
+			      new->snap_id,
+			      new->nr_copies);
+		list_add(&vdi->list, &last_vdi_list);
+	} else if (vdi->snap_id < new->snap_id) {
+		vdi->vdi_size = new->vdi_size;
+		vdi->vdi_id = new->vdi_id;
+		vdi->snap_id = new->snap_id;
+		vdi->nr_copies = new->nr_copies;
+	}
+}
+
+char *get_object_directory(void)
+{
+	return farm_obj_dir;
+}
 
 static int create_directory(const char *p)
 {
-	int i, ret = 0;
+	int i, ret = -1;
 	struct strbuf buf = STRBUF_INIT;
 
 	strbuf_addstr(&buf, p);
-	strbuf_addstr(&buf, "/.farm");
 	if (xmkdir(buf.buf, 0755) < 0) {
-		sd_eprintf("%m");
-		ret = -1;
-		goto err;
+		if (errno == EEXIST)
+			fprintf(stderr, "Path is not a directory: %s\n", p);
+		goto out;
 	}
 
 	if (!strlen(farm_dir))
 		strbuf_copyout(&buf, farm_dir, sizeof(farm_dir));
 
 	strbuf_addstr(&buf, "/objects");
-	if (xmkdir(buf.buf, 0755) < 0) {
-		sd_eprintf("%m");
-		ret = -1;
-		goto err;
-	}
+	if (xmkdir(buf.buf, 0755) < 0)
+		goto out;
+
 	for (i = 0; i < 256; i++) {
 		strbuf_addf(&buf, "/%02x", i);
-		if (xmkdir(buf.buf, 0755) < 0) {
-			sd_eprintf("%m");
-			ret = -1;
-			goto err;
-		}
+		if (xmkdir(buf.buf, 0755) < 0)
+			goto out;
+
 		strbuf_remove(&buf, buf.len - 3, 3);
 	}
 
 	if (!strlen(farm_obj_dir))
 		strbuf_copyout(&buf, farm_obj_dir, sizeof(farm_obj_dir));
-err:
+
+	ret = 0;
+out:
+	if (ret)
+		fprintf(stderr, "Fail to create directory: %m\n");
 	strbuf_release(&buf);
 	return ret;
 }
 
-static int get_trunk_sha1(uint32_t epoch, unsigned char *outsha1)
+static int get_trunk_sha1(uint32_t idx, const char *tag, unsigned char *outsha1)
 {
 	int i, nr_logs = -1, ret = -1;
 	struct snap_log *log_buf, *log_free = NULL;
@@ -72,19 +125,18 @@ static int get_trunk_sha1(uint32_t epoch, unsigned char *outsha1)
 	struct sha1_file_hdr hdr;
 
 	log_free = log_buf = snap_log_read(&nr_logs);
-	sd_dprintf("%d", nr_logs);
 	if (nr_logs < 0)
 		goto out;
 
 	for (i = 0; i < nr_logs; i++, log_buf++) {
-		if (log_buf->epoch != epoch)
+		if (log_buf->idx != idx && strcmp(log_buf->tag, tag))
 			continue;
 		snap_buf = snap_file_read(log_buf->sha1, &hdr);
 		if (!snap_buf)
 			goto out;
 		memcpy(outsha1, snap_buf, SHA1_LEN);
 		ret = 0;
-		break;
+		goto out;
 	}
 out:
 	free(log_free);
@@ -92,181 +144,181 @@ out:
 	return ret;
 }
 
-static int farm_init(void)
+static int notify_vdi_add(uint32_t vdi_id, uint32_t nr_copies)
 {
-	sd_dprintf("use farm store driver");
-	if (create_directory(obj_path) < 0)
-		goto err;
+	int ret = -1;
+	struct sd_req hdr;
+	char *buf = NULL;
 
-	if (!is_xattr_enabled(obj_path)) {
-		sd_eprintf("xattrs are not enabled on %s", obj_path);
-		goto err;
-	}
+	sd_init_req(&hdr, SD_OP_NOTIFY_VDI_ADD);
+	hdr.vdi_state.new_vid = vdi_id;
+	hdr.vdi_state.copies = nr_copies;
+	hdr.vdi_state.set_bitmap = true;
+
+	ret = collie_exec_req(sdhost, sdport, &hdr, buf);
+
+	if (ret)
+		fprintf(stderr, "Fail to notify vdi add event(%"PRIx32", %d)\n",
+			vdi_id, nr_copies);
+
+	free(buf);
+	return ret;
+}
+
+static int fill_trunk_entry(uint64_t oid, int nr_copies,
+			    void *buf, size_t size, void *data)
+{
+	int ret = -1;
+
+	struct strbuf *trunk_entries = data;
+	struct trunk_entry new_entry = {};
+	struct sha1_file_hdr hdr = { .priv = 0 };
+	struct strbuf obj_strbuf = STRBUF_INIT;
+
+	/* fill sha1 file */
+	memcpy(hdr.tag, TAG_DATA, TAG_LEN);
+	hdr.size = size;
+
+	/* fill obj_strbuf */
+	strbuf_add(&obj_strbuf, buf, size);
+	strbuf_insert(&obj_strbuf, 0, &hdr, sizeof(hdr));
+
+	/* write sha1 file and fill trunk entry */
+	if (sha1_file_write((void *)obj_strbuf.buf,
+			    obj_strbuf.len,
+			    new_entry.sha1) != 0)
+		goto out;
+
+	new_entry.oid = oid;
+	new_entry.nr_copies = nr_copies;
+	strbuf_add(trunk_entries, &new_entry, sizeof(struct trunk_entry));
 
-	if (snap_init() < 0)
-		goto err;
+	ret = 0;
+out:
+	if (ret)
+		fprintf(stderr, "Fail to fill trunk entry\n.");
+	strbuf_release(&obj_strbuf);
+	return ret;
+}
 
-	if (default_init() < 0)
-		goto err;
+int farm_init(const char *path)
+{
+	int ret = -1;
 
-	return SD_RES_SUCCESS;
-err:
-	return SD_RES_EIO;
+	if (create_directory(path) < 0)
+		goto out;
+	if (snap_init(farm_dir) < 0)
+		goto out;
+	return 0;
+out:
+	if (ret)
+		fprintf(stderr, "Fail to init farm.\n");
+	return ret;
+}
+
+bool farm_contains_snapshot(uint32_t idx, const char *tag)
+{
+	unsigned char trunk_sha1[SHA1_LEN];
+	return (get_trunk_sha1(idx, tag, trunk_sha1) == 0);
 }
 
-static int farm_snapshot(const struct siocb *iocb)
+int farm_save_snapshot(const char *tag)
 {
 	unsigned char snap_sha1[SHA1_LEN];
 	unsigned char trunk_sha1[SHA1_LEN];
-	struct sd_node nodes[SD_MAX_NODES];
-	int nr_nodes;
-	void *buffer;
-	int log_nr, ret = SD_RES_EIO, epoch;
+	struct strbuf trunk_entries = STRBUF_INIT;
+	void *snap_log = NULL;
+	int log_nr, idx;
+	int ret = -1;
 
-	buffer = snap_log_read(&log_nr);
-	if (!buffer)
+	snap_log = snap_log_read(&log_nr);
+	if (!snap_log)
 		goto out;
 
-	epoch = log_nr + 1;
-	sd_dprintf("user epoch %d", epoch);
+	idx = log_nr + 1;
 
-	nr_nodes = epoch_log_read(sys->epoch, nodes, sizeof(nodes));
-	if (nr_nodes < 0)
+	if (for_each_object_in_tree(fill_trunk_entry, &trunk_entries) < 0)
 		goto out;
 
-	if (trunk_file_write(trunk_sha1) < 0)
+	if (trunk_file_write(trunk_sha1, &trunk_entries) < 0)
 		goto out;
 
-	if (snap_file_write(sys->epoch, nodes, nr_nodes,
-			    trunk_sha1, snap_sha1) < 0)
+	if (snap_file_write(idx, trunk_sha1, snap_sha1) < 0)
 		goto out;
 
-	if (snap_log_write(epoch, snap_sha1) < 0)
+	if (snap_log_write(idx, tag, snap_sha1) != 0)
 		goto out;
 
-	ret = SD_RES_SUCCESS;
+	ret = 0;
 out:
-	free(buffer);
+	free(snap_log);
+	strbuf_release(&trunk_entries);
 	return ret;
 }
 
-static int restore_objects_from_snap(uint32_t epoch)
+int farm_load_snapshot(uint32_t idx, const char *tag)
 {
-	struct sha1_file_hdr hdr;
-	struct trunk_entry *trunk_buf, *trunk_free = NULL;
+	int ret = -1;
+	struct trunk_entry *trunk_entry, *trunk_free = NULL;
+	struct sha1_file_hdr trunk_hdr;
+	struct sd_inode *inode;
 	unsigned char trunk_sha1[SHA1_LEN];
 	uint64_t nr_trunks, i;
-	int ret = SD_RES_EIO;
 
-	if (get_trunk_sha1(epoch, trunk_sha1) < 0)
+	if (get_trunk_sha1(idx, tag, trunk_sha1) < 0)
 		goto out;
 
-	trunk_free = trunk_buf = trunk_file_read(trunk_sha1, &hdr);
-	if (!trunk_buf)
+	trunk_free = trunk_entry = trunk_file_read(trunk_sha1, &trunk_hdr);
+
+	if (!trunk_entry)
 		goto out;
 
-	nr_trunks = hdr.priv;
-	ret = SD_RES_SUCCESS;
-	for (i = 0; i < nr_trunks; i++, trunk_buf++) {
-		struct sha1_file_hdr h;
-		struct siocb io = { 0 };
-		uint64_t oid;
+	nr_trunks = trunk_hdr.priv;
+	for (i = 0; i < nr_trunks; i++, trunk_entry++) {
+		struct sha1_file_hdr hdr;
 		void *buffer = NULL;
+		uint64_t oid = trunk_entry->oid;
 
-		oid = trunk_buf->oid;
-		buffer = sha1_file_read(trunk_buf->sha1, &h);
+		buffer = sha1_file_read(trunk_entry->sha1, &hdr);
 		if (!buffer) {
-			sd_eprintf("oid %"PRIx64" not restored", oid);
+			fprintf(stderr, "oid %"PRIx64" not restored\n", oid);
 			goto out;
 		}
-		io.length = h.size;
-		io.buf = buffer;
-		ret = default_create_and_write(oid, &io);
-		if (ret != SD_RES_SUCCESS) {
-			sd_eprintf("oid %"PRIx64" not restored", oid);
+
+		if (sd_write_object(oid, 0, buffer, hdr.size, 0, 0,
+				    trunk_entry->nr_copies,
+				    true, true) != 0) {
+			fprintf(stderr, "oid %"PRIx64" not restored\n", oid);
 			goto out;
-		} else
-			sd_dprintf("oid %"PRIx64" restored", oid);
+		}
 
-		free(buffer);
-	}
-out:
-	free(trunk_free);
-	return ret;
-}
+		if (is_vdi_obj(oid)) {
+			inode = buffer;
 
-static int rm_object(uint64_t oid, char *path, void *arg)
-{
-	char p[PATH_MAX];
-	int ret = SD_RES_SUCCESS;
-
-	snprintf(p, sizeof(p), "%s/%"PRIx64, path, oid);
-	if (unlink(path) < 0) {
-		sd_eprintf("failed to remove cached object %m");
-		if (errno == ENOENT)
-			return SD_RES_SUCCESS;
-		ret = SD_RES_EIO;
-		goto out;
-	}
-out:
-	return ret;
-}
+			if (notify_vdi_add(oid_to_vid(oid),
+					   trunk_entry->nr_copies) < 0)
+				goto out;
 
-static int farm_restore(const struct siocb *iocb)
-{
-	int ret = SD_RES_EIO, epoch = iocb->epoch;
+			insert_vdi(inode);
+		}
+	}
 
-	sd_dprintf("try recover user epoch %d", epoch);
+	/* create active vdi based on last vdi snapshot */
+	struct vdi_entry *vdi, *next;
+	uint32_t new_vid;
+	list_for_each_entry(vdi, &last_vdi_list, list) {
+		if (do_vdi_create(vdi->name,
+				  vdi->vdi_size,
+				  vdi->vdi_id, &new_vid,
+				  false, vdi->nr_copies) < 0)
+			goto out;
+	}
 
-	/* Remove all the objects of WD and object cache */
-	for_each_object_in_wd(rm_object, true, NULL);
-	if (sys->enable_object_cache)
-		object_cache_format();
+	list_for_each_entry_safe(vdi, next, &last_vdi_list, list)
+		free(vdi);
 
-	ret = restore_objects_from_snap(epoch);
-	if (ret != SD_RES_SUCCESS)
-		goto out;
+	ret = 0;
 out:
+	free(trunk_free);
 	return ret;
 }
-
-static int farm_get_snap_file(struct siocb *iocb)
-{
-	int ret = SD_RES_EIO;
-	void *buffer = NULL;
-	size_t size;
-	int nr;
-
-	sd_dprintf("try get snap file");
-	buffer = snap_log_read(&nr);
-	if (!buffer)
-		goto out;
-	size = nr * sizeof(struct snap_log);
-	memcpy(iocb->buf, buffer, size);
-	iocb->length = size;
-	ret = SD_RES_SUCCESS;
-out:
-	free(buffer);
-	return ret;
-}
-
-static struct store_driver farm = {
-	.name = "farm",
-	.init = farm_init,
-	.exist = default_exist,
-	.create_and_write = default_create_and_write,
-	.write = default_write,
-	.read = default_read,
-	.link = default_link,
-	.update_epoch = default_update_epoch,
-	.snapshot = farm_snapshot,
-	.cleanup = default_cleanup,
-	.restore = farm_restore,
-	.get_snap_file = farm_get_snap_file,
-	.format = default_format,
-	.purge_obj = default_purge_obj,
-	.remove_object = default_remove_object,
-	.get_hash = default_get_hash,
-};
-
-add_store_driver(farm);
diff --git a/collie/farm/farm.h b/collie/farm/farm.h
index 2628c17..7331ec1 100644
--- a/collie/farm/farm.h
+++ b/collie/farm/farm.h
@@ -38,13 +38,12 @@ struct sha1_file_hdr {
 	uint64_t reserved;
 };
 
-static char farm_obj_dir[PATH_MAX];
-static char farm_dir[PATH_MAX];
-
-static inline char *get_object_directory(void)
-{
-	return farm_obj_dir;
-}
+/* farm.c */
+int farm_init(const char *path);
+bool farm_contains_snapshot(uint32_t idx, const char *tag);
+int farm_save_snapshot(const char *tag);
+int farm_load_snapshot(uint32_t idx, const char *tag);
+char *get_object_directory(void);
 
 /* trunk.c */
 int trunk_init(void);
diff --git a/collie/vdi.c b/collie/vdi.c
index 27a8418..087ec80 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -505,7 +505,7 @@ static int read_vdi_obj(const char *vdiname, int snapid, const char *tag,
 	return EXIT_SUCCESS;
 }
 
-static int do_vdi_create(const char *vdiname, int64_t vdi_size,
+int do_vdi_create(const char *vdiname, int64_t vdi_size,
 			 uint32_t base_vid, uint32_t *vdi_id, bool snapshot,
 			 int nr_copies)
 {
-- 
1.7.1




More information about the sheepdog mailing list