[sheepdog] [PATCH v4 15/18] collie/farm: implement farm

Kai Zhang kyle at zelin.io
Fri May 17 08:27:23 CEST 2013


moved farm.c from sheep/farm to collie/farm

* farm.c have changed a lot *

Farm is responsible to save/load cluster snapshot.

During save snapshot:
0. construct object_rb_tree (this is done in cluster.c)
1. parse object_rb_tree, for each object, call fill_trunk_entry().
2. write trunk entries to trunk file.
3. write trunk sha1 to snap file.
4. write snap sha1 and snapshot tag to snap log file.

During load snapshot:
1. read snap log file.
2. find snap sha1 by tag or index.
3. read snap file and get trunk sha1.
4. read trunk file.
5. for each entry in trunk, read object from sha1 file and write to new cluster
6. create active vdi for each vdi chain based on the last snapshot

Signed-off-by: Kai Zhang <kyle at zelin.io>
---
 collie/Makefile.am |    2 +-
 collie/collie.h    |    4 +
 collie/farm/farm.c |  612 +++++++++++++++++++++++++++++-----------------------
 collie/farm/farm.h |   12 +-
 collie/vdi.c       |    2 +-
 5 files changed, 351 insertions(+), 281 deletions(-)
 rewrite collie/farm/farm.c (66%)

diff --git a/collie/Makefile.am b/collie/Makefile.am
index 5943cd9..50a3da3 100644
--- a/collie/Makefile.am
+++ b/collie/Makefile.am
@@ -24,7 +24,7 @@ INCLUDES		= -I$(top_builddir)/include -I$(top_srcdir)/include
 sbin_PROGRAMS		= collie
 
 collie_SOURCES		= farm/object_rb_tree.c farm/sha1_file.c farm/snap.c \
-			  farm/trunk.c \
+			  farm/trunk.c farm/farm.c \
 			  collie.c common.c treeview.c vdi.c node.c cluster.c
 
 if BUILD_TRACE
diff --git a/collie/collie.h b/collie/collie.h
index ec8d667..1d89558 100644
--- a/collie/collie.h
+++ b/collie/collie.h
@@ -76,6 +76,10 @@ int do_generic_subcommand(struct subcommand *sub, int argc, char **argv);
 int update_node_list(int max_nodes);
 void confirm(const char *message);
 void work_queue_wait(struct work_queue *q);
+int do_vdi_create(const char *vdiname, int64_t vdi_size,
+		  uint32_t base_vid, uint32_t *vdi_id, bool snapshot,
+		  int nr_copies);
+
 
 extern struct command vdi_command;
 extern struct command node_command;
diff --git a/collie/farm/farm.c b/collie/farm/farm.c
dissimilarity index 66%
index c31c501..08a5402 100644
--- a/collie/farm/farm.c
+++ b/collie/farm/farm.c
@@ -1,272 +1,340 @@
-/*
- * Copyright (C) 2011 Taobao Inc.
- *
- * Liu Yuan <namei.unix at gmail.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <pthread.h>
-#include <linux/limits.h>
-#include <sys/file.h>
-#include <sys/types.h>
-#include <sys/xattr.h>
-
-#include "farm.h"
-#include "sheep_priv.h"
-#include "sheepdog_proto.h"
-#include "sheep.h"
-
-char farm_obj_dir[PATH_MAX];
-char farm_dir[PATH_MAX];
-
-static int create_directory(const char *p)
-{
-	int i, ret = 0;
-	struct strbuf buf = STRBUF_INIT;
-
-	strbuf_addstr(&buf, p);
-	strbuf_addstr(&buf, "/.farm");
-	if (xmkdir(buf.buf, 0755) < 0) {
-		sd_eprintf("%m");
-		ret = -1;
-		goto err;
-	}
-
-	if (!strlen(farm_dir))
-		strbuf_copyout(&buf, farm_dir, sizeof(farm_dir));
-
-	strbuf_addstr(&buf, "/objects");
-	if (xmkdir(buf.buf, 0755) < 0) {
-		sd_eprintf("%m");
-		ret = -1;
-		goto err;
-	}
-	for (i = 0; i < 256; i++) {
-		strbuf_addf(&buf, "/%02x", i);
-		if (xmkdir(buf.buf, 0755) < 0) {
-			sd_eprintf("%m");
-			ret = -1;
-			goto err;
-		}
-		strbuf_remove(&buf, buf.len - 3, 3);
-	}
-
-	if (!strlen(farm_obj_dir))
-		strbuf_copyout(&buf, farm_obj_dir, sizeof(farm_obj_dir));
-err:
-	strbuf_release(&buf);
-	return ret;
-}
-
-static int get_trunk_sha1(uint32_t epoch, unsigned char *outsha1)
-{
-	int i, nr_logs = -1, ret = -1;
-	struct snap_log *log_buf, *log_free = NULL;
-	void *snap_buf = NULL;
-	struct sha1_file_hdr hdr;
-
-	log_free = log_buf = snap_log_read(&nr_logs);
-	sd_dprintf("%d", nr_logs);
-	if (nr_logs < 0)
-		goto out;
-
-	for (i = 0; i < nr_logs; i++, log_buf++) {
-		if (log_buf->epoch != epoch)
-			continue;
-		snap_buf = snap_file_read(log_buf->sha1, &hdr);
-		if (!snap_buf)
-			goto out;
-		memcpy(outsha1, snap_buf, SHA1_LEN);
-		ret = 0;
-		break;
-	}
-out:
-	free(log_free);
-	free(snap_buf);
-	return ret;
-}
-
-static int farm_init(void)
-{
-	sd_dprintf("use farm store driver");
-	if (create_directory(obj_path) < 0)
-		goto err;
-
-	if (!is_xattr_enabled(obj_path)) {
-		sd_eprintf("xattrs are not enabled on %s", obj_path);
-		goto err;
-	}
-
-	if (snap_init() < 0)
-		goto err;
-
-	if (default_init() < 0)
-		goto err;
-
-	return SD_RES_SUCCESS;
-err:
-	return SD_RES_EIO;
-}
-
-static int farm_snapshot(const struct siocb *iocb)
-{
-	unsigned char snap_sha1[SHA1_LEN];
-	unsigned char trunk_sha1[SHA1_LEN];
-	struct sd_node nodes[SD_MAX_NODES];
-	int nr_nodes;
-	void *buffer;
-	int log_nr, ret = SD_RES_EIO, epoch;
-
-	buffer = snap_log_read(&log_nr);
-	if (!buffer)
-		goto out;
-
-	epoch = log_nr + 1;
-	sd_dprintf("user epoch %d", epoch);
-
-	nr_nodes = epoch_log_read(sys->epoch, nodes, sizeof(nodes));
-	if (nr_nodes < 0)
-		goto out;
-
-	if (trunk_file_write(trunk_sha1) < 0)
-		goto out;
-
-	if (snap_file_write(sys->epoch, nodes, nr_nodes,
-			    trunk_sha1, snap_sha1) < 0)
-		goto out;
-
-	if (snap_log_write(epoch, snap_sha1) < 0)
-		goto out;
-
-	ret = SD_RES_SUCCESS;
-out:
-	free(buffer);
-	return ret;
-}
-
-static int restore_objects_from_snap(uint32_t epoch)
-{
-	struct sha1_file_hdr hdr;
-	struct trunk_entry *trunk_buf, *trunk_free = NULL;
-	unsigned char trunk_sha1[SHA1_LEN];
-	uint64_t nr_trunks, i;
-	int ret = SD_RES_EIO;
-
-	if (get_trunk_sha1(epoch, trunk_sha1) < 0)
-		goto out;
-
-	trunk_free = trunk_buf = trunk_file_read(trunk_sha1, &hdr);
-	if (!trunk_buf)
-		goto out;
-
-	nr_trunks = hdr.priv;
-	ret = SD_RES_SUCCESS;
-	for (i = 0; i < nr_trunks; i++, trunk_buf++) {
-		struct sha1_file_hdr h;
-		struct siocb io = { 0 };
-		uint64_t oid;
-		void *buffer = NULL;
-
-		oid = trunk_buf->oid;
-		buffer = sha1_file_read(trunk_buf->sha1, &h);
-		if (!buffer) {
-			sd_eprintf("oid %"PRIx64" not restored", oid);
-			goto out;
-		}
-		io.length = h.size;
-		io.buf = buffer;
-		ret = default_create_and_write(oid, &io);
-		if (ret != SD_RES_SUCCESS) {
-			sd_eprintf("oid %"PRIx64" not restored", oid);
-			goto out;
-		} else
-			sd_dprintf("oid %"PRIx64" restored", oid);
-
-		free(buffer);
-	}
-out:
-	free(trunk_free);
-	return ret;
-}
-
-static int rm_object(uint64_t oid, char *path, void *arg)
-{
-	char p[PATH_MAX];
-	int ret = SD_RES_SUCCESS;
-
-	snprintf(p, sizeof(p), "%s/%"PRIx64, path, oid);
-	if (unlink(path) < 0) {
-		sd_eprintf("failed to remove cached object %m");
-		if (errno == ENOENT)
-			return SD_RES_SUCCESS;
-		ret = SD_RES_EIO;
-		goto out;
-	}
-out:
-	return ret;
-}
-
-static int farm_restore(const struct siocb *iocb)
-{
-	int ret = SD_RES_EIO, epoch = iocb->epoch;
-
-	sd_dprintf("try recover user epoch %d", epoch);
-
-	/* Remove all the objects of WD and object cache */
-	for_each_object_in_wd(rm_object, true, NULL);
-	if (sys->enable_object_cache)
-		object_cache_format();
-
-	ret = restore_objects_from_snap(epoch);
-	if (ret != SD_RES_SUCCESS)
-		goto out;
-out:
-	return ret;
-}
-
-static int farm_get_snap_file(struct siocb *iocb)
-{
-	int ret = SD_RES_EIO;
-	void *buffer = NULL;
-	size_t size;
-	int nr;
-
-	sd_dprintf("try get snap file");
-	buffer = snap_log_read(&nr);
-	if (!buffer)
-		goto out;
-	size = nr * sizeof(struct snap_log);
-	memcpy(iocb->buf, buffer, size);
-	iocb->length = size;
-	ret = SD_RES_SUCCESS;
-out:
-	free(buffer);
-	return ret;
-}
-
-static struct store_driver farm = {
-	.name = "farm",
-	.init = farm_init,
-	.exist = default_exist,
-	.create_and_write = default_create_and_write,
-	.write = default_write,
-	.read = default_read,
-	.link = default_link,
-	.update_epoch = default_update_epoch,
-	.snapshot = farm_snapshot,
-	.cleanup = default_cleanup,
-	.restore = farm_restore,
-	.get_snap_file = farm_get_snap_file,
-	.format = default_format,
-	.purge_obj = default_purge_obj,
-	.remove_object = default_remove_object,
-	.get_hash = default_get_hash,
-};
-
-add_store_driver(farm);
+/*
+ * Copyright (C) 2011 Taobao Inc.
+ * Copyright (C) 2013 Zelin.io
+ *
+ * Liu Yuan <namei.unix at gmail.com>
+ * Kai Zhang <kyle at zelin.io>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "list.h"
+#include "farm.h"
+
+static char farm_obj_dir[PATH_MAX];
+static char farm_dir[PATH_MAX];
+
+inline char *get_object_directory(void)
+{
+	return farm_obj_dir;
+}
+
+static int create_directory(const char *p)
+{
+	int i, ret = -1;
+	struct strbuf buf = STRBUF_INIT;
+
+	strbuf_addstr(&buf, p);
+	if (xmkdir(buf.buf, 0755) < 0) {
+		if (errno == EEXIST)
+			sd_dprintf("path is not a directory: %s", p);
+		else
+			sd_dprintf("fail to create directory: %s", p);
+		goto out;
+	}
+
+	if (!strlen(farm_dir))
+		strbuf_copyout(&buf, farm_dir, sizeof(farm_dir));
+
+	strbuf_addstr(&buf, "/objects");
+	if (xmkdir(buf.buf, 0755) < 0)
+		goto out;
+
+	for (i = 0; i < 256; i++) {
+		strbuf_addf(&buf, "/%02x", i);
+		if (xmkdir(buf.buf, 0755) < 0)
+			goto out;
+
+		strbuf_remove(&buf, buf.len - 3, 3);
+	}
+
+	if (!strlen(farm_obj_dir))
+		strbuf_copyout(&buf, farm_obj_dir, sizeof(farm_obj_dir));
+
+	ret = 0;
+out:
+	if (ret)
+		sd_eprintf("fail to create directory %m");
+	strbuf_release(&buf);
+	return ret;
+}
+
+static int get_trunk_sha1(uint32_t idx, const char *tag, unsigned char *outsha1)
+{
+	int i, nr_logs = -1, ret = -1;
+	struct snap_log *log_buf, *log_free = NULL;
+	void *snap_buf = NULL;
+	struct sha1_file_hdr hdr;
+
+	log_free = log_buf = snap_log_read(&nr_logs);
+	if (nr_logs < 0)
+		goto out;
+
+	for (i = 0; i < nr_logs; i++, log_buf++) {
+		if (log_buf->idx != idx && strcmp(log_buf->tag, tag))
+			continue;
+		snap_buf = snap_file_read(log_buf->sha1, &hdr);
+		if (!snap_buf)
+			goto out;
+		memcpy(outsha1, snap_buf, SHA1_LEN);
+		ret = 0;
+		goto out;
+	}
+	if (idx)
+		sd_eprintf("could not find index %"PRIu32, idx);
+	if (tag)
+		sd_eprintf("could not find tag %s", tag);
+out:
+	if (log_free)
+		free(log_free);
+	if (snap_buf)
+		free(snap_buf);
+	return ret;
+}
+
+static int notify_vdi_add(uint32_t vdi_id, uint32_t nr_copies)
+{
+	int ret = -1;
+	struct sd_req hdr;
+	char *buf = NULL;
+
+	sd_init_req(&hdr, SD_OP_NOTIFY_VDI_ADD);
+	hdr.vdi_state.new_vid = vdi_id;
+	hdr.vdi_state.copies = nr_copies;
+	hdr.vdi_state.set_bitmap = true;
+
+	ret = collie_exec_req(sdhost, sdport, &hdr, buf);
+
+	if (ret)
+		sd_eprintf("fail to notify vdi add event(%" PRIx32 ", %d)",
+			   vdi_id, nr_copies);
+	if (buf)
+		free(buf);
+	return ret;
+}
+
+static int fill_trunk_entry(uint64_t oid, enum obj_type type,
+			    int nr_copies, void *buf, size_t size, void *data)
+{
+	int ret = -1;
+
+	struct strbuf *trunk_entries = data;
+	struct trunk_entry new_entry = {};
+	struct sha1_file_hdr hdr = { .priv = 0 };
+	struct strbuf obj_strbuf = STRBUF_INIT;
+
+	/* fill sha1 file */
+	memcpy(hdr.tag, TAG_DATA, TAG_LEN);
+	hdr.size = size;
+
+	/* fill obj_strbuf */
+	strbuf_add(&obj_strbuf, buf, size);
+	strbuf_insert(&obj_strbuf, 0, &hdr, sizeof(hdr));
+
+	/* write sha1 file and fill trunk entry */
+	if (sha1_file_write((void *)obj_strbuf.buf,
+			    obj_strbuf.len,
+			    new_entry.sha1) != 0)
+		goto out;
+
+	new_entry.oid = oid;
+	new_entry.type = type;
+	new_entry.nr_copies = nr_copies;
+	strbuf_add(trunk_entries, &new_entry, sizeof(struct trunk_entry));
+
+	ret = 0;
+out:
+	if (ret)
+		sd_eprintf("fail to fill trunk entry");
+	strbuf_release(&obj_strbuf);
+	return ret;
+}
+
+int farm_init(const char *path)
+{
+	int ret = -1;
+
+	if (create_directory(path) < 0)
+		goto out;
+	if (snap_init(farm_dir) < 0)
+		goto out;
+	return 0;
+out:
+	if (ret)
+		sd_eprintf("fail to init farm");
+	return ret;
+}
+
+int farm_save_snapshot(const char *tag)
+{
+	unsigned char snap_sha1[SHA1_LEN];
+	unsigned char trunk_sha1[SHA1_LEN];
+	struct strbuf trunk_entries = STRBUF_INIT;
+	void *snap_log = NULL;
+	int log_nr, idx;
+	int ret = -1;
+
+	snap_log = snap_log_read(&log_nr);
+	if (!snap_log)
+		goto out;
+
+	idx = log_nr + 1;
+
+	if (parse_obj(fill_trunk_entry, &trunk_entries) < 0)
+		goto out;
+
+	if (trunk_file_write(trunk_sha1, &trunk_entries) < 0)
+		goto out;
+
+	if (snap_file_write(idx, trunk_sha1, snap_sha1) < 0)
+		goto out;
+
+	if (snap_log_write(idx, tag, snap_sha1) != 0)
+		goto out;
+
+	ret = 0;
+out:
+	if (snap_log)
+		free(snap_log);
+	strbuf_release(&trunk_entries);
+	return ret;
+}
+
+struct vdi_entry {
+	char name[SD_MAX_VDI_LEN];
+	uint64_t vdi_size;
+	uint32_t vdi_id;
+	uint32_t snap_id;
+	uint8_t  nr_copies;
+	struct list_head list;
+};
+/* this is used to track the last snapshot of each vdi*/
+static struct vdi_entry last_snapshots = {
+	.name = "",
+	.vdi_size = 0,
+	.vdi_id = 0,
+	.snap_id = 0,
+	.nr_copies = 0,
+	.list = LIST_HEAD_INIT(last_snapshots.list)
+
+};
+
+static struct vdi_entry *find_vdi(const char *name)
+{
+	struct vdi_entry *vdi;
+
+	list_for_each_entry(vdi, &last_snapshots.list, list) {
+		if (!strcmp(vdi->name, name))
+			return vdi;
+	}
+	return NULL;
+}
+
+static struct vdi_entry *new_vdi(const char *name, uint64_t vdi_size,
+				 uint32_t vdi_id, uint32_t snap_id,
+				 uint8_t nr_copies)
+{
+	struct vdi_entry *vdi;
+	vdi = xmalloc(sizeof(struct vdi_entry));
+	pstrcpy(vdi->name, sizeof(vdi->name), name);
+	vdi->vdi_size = vdi_size;
+	vdi->vdi_id = vdi_id;
+	vdi->snap_id = snap_id;
+	vdi->nr_copies = nr_copies;
+	INIT_LIST_HEAD(&vdi->list);
+	return vdi;
+}
+
+static void insert_vdi(const char *name, uint64_t vdi_size,
+		       uint32_t vdi_id, uint32_t snap_id,
+		       uint8_t nr_copies)
+{
+	struct vdi_entry *vdi;
+	vdi = find_vdi(name);
+	if (!vdi) {
+		vdi = new_vdi(name, vdi_size, vdi_id, snap_id, nr_copies);
+		list_add(&vdi->list, &last_snapshots.list);
+	} else if (vdi->snap_id < snap_id) {
+		vdi->vdi_size = vdi_size;
+		vdi->vdi_id = vdi_id;
+		vdi->snap_id = snap_id;
+		vdi->nr_copies = nr_copies;
+	}
+}
+
+int farm_load_snapshot(uint32_t idx, const char *tag)
+{
+	int ret = -1;
+	struct trunk_entry *trunk_entry, *trunk_free = NULL;
+	struct sha1_file_hdr trunk_hdr;
+	struct sd_inode *inode;
+	unsigned char trunk_sha1[SHA1_LEN];
+	uint64_t nr_trunks, i;
+
+	if (get_trunk_sha1(idx, tag, trunk_sha1) < 0)
+		goto out;
+
+	trunk_free = trunk_entry = trunk_file_read(trunk_sha1, &trunk_hdr);
+
+	if (!trunk_entry)
+		goto out;
+
+	nr_trunks = trunk_hdr.priv;
+	for (i = 0; i < nr_trunks; i++, trunk_entry++) {
+		struct sha1_file_hdr hdr;
+		void *buffer = NULL;
+		uint64_t oid = trunk_entry->oid;
+
+		buffer = sha1_file_read(trunk_entry->sha1, &hdr);
+		if (!buffer) {
+			sd_eprintf("oid %"PRIx64" not restored", oid);
+			goto out;
+		}
+
+		if (sd_write_object(oid, 0, buffer, hdr.size, 0, 0,
+				    trunk_entry->nr_copies,
+				    true, true) != 0) {
+			sd_eprintf("oid %"PRIx64" not restored", oid);
+			goto out;
+		}
+
+		if (trunk_entry->type == VDI) {
+			inode = buffer;
+
+			if (notify_vdi_add(oid_to_vid(oid),
+					   trunk_entry->nr_copies) < 0)
+				goto out;
+
+			insert_vdi(inode->name,
+				   inode->vdi_size,
+				   inode->vdi_id,
+				   inode->snap_id,
+				   inode->nr_copies);
+		}
+	}
+
+	/* create active vdi based on last vdi snapshot */
+	struct vdi_entry *vdi, *next;
+	uint32_t new_vid;
+	list_for_each_entry(vdi, &last_snapshots.list, list) {
+		if (do_vdi_create(vdi->name,
+				  vdi->vdi_size,
+				  vdi->vdi_id, &new_vid,
+				  false, vdi->nr_copies) < 0)
+			goto out;
+	}
+
+	list_for_each_entry_safe(vdi, next, &last_snapshots.list, list)
+		free(vdi);
+
+	ret = 0;
+out:
+	if (trunk_free)
+		free(trunk_free);
+	return ret;
+}
diff --git a/collie/farm/farm.h b/collie/farm/farm.h
index e08d3e8..6de8585 100644
--- a/collie/farm/farm.h
+++ b/collie/farm/farm.h
@@ -44,13 +44,11 @@ struct sha1_file_hdr {
 	uint64_t reserved;
 };
 
-static char farm_obj_dir[PATH_MAX];
-static char farm_dir[PATH_MAX];
-
-static inline char *get_object_directory(void)
-{
-	return farm_obj_dir;
-}
+/* farm.c */
+int farm_init(const char *path);
+int farm_save_snapshot(const char *tag);
+int farm_load_snapshot(uint32_t idx, const char *tag);
+char *get_object_directory(void);
 
 /* trunk.c */
 int trunk_init(void);
diff --git a/collie/vdi.c b/collie/vdi.c
index 27a8418..087ec80 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -505,7 +505,7 @@ static int read_vdi_obj(const char *vdiname, int snapid, const char *tag,
 	return EXIT_SUCCESS;
 }
 
-static int do_vdi_create(const char *vdiname, int64_t vdi_size,
+int do_vdi_create(const char *vdiname, int64_t vdi_size,
 			 uint32_t base_vid, uint32_t *vdi_id, bool snapshot,
 			 int nr_copies)
 {
-- 
1.7.1




More information about the sheepdog mailing list