[sheepdog] [PATCH 5/6] farm: rework trunk logic

Liu Yuan namei.unix at gmail.com
Fri Aug 24 14:10:16 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

Since we move recovery handling out of farm backend, we don't need track
IO requests on object, this leave most of functions in trunk.c useless for
now.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/farm/farm.c   |    5 +-
 sheep/farm/farm.h   |   14 +-
 sheep/farm/trunk.c  |  362 ++++++---------------------------------------------
 sheep/plain_store.c |    8 +-
 sheep/sheep_priv.h  |    1 +
 5 files changed, 48 insertions(+), 342 deletions(-)

diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
index 69198a1..4ccd629 100644
--- a/sheep/farm/farm.c
+++ b/sheep/farm/farm.c
@@ -160,9 +160,6 @@ static int farm_init(char *p)
 		goto err;
 	}
 
-	if (trunk_init() < 0)
-		goto err;
-
 	if (snap_init() < 0)
 		goto err;
 
@@ -194,7 +191,7 @@ static int farm_snapshot(struct siocb *iocb)
 	if (nr_nodes < 0)
 		goto out;
 
-	if (trunk_file_write_user(trunk_sha1) < 0)
+	if (trunk_file_write(trunk_sha1) < 0)
 		goto out;
 
 	if (snap_file_write(sys->epoch, nodes, nr_nodes,
diff --git a/sheep/farm/farm.h b/sheep/farm/farm.h
index 27e65cd..af803b6 100644
--- a/sheep/farm/farm.h
+++ b/sheep/farm/farm.h
@@ -39,13 +39,6 @@ struct trunk_entry {
 	unsigned char sha1[SHA1_LEN];
 };
 
-struct trunk_entry_incore {
-	struct trunk_entry raw;
-	int flags;
-	struct list_head active_list;
-	struct hlist_node hash;
-};
-
 /* farm.c */
 extern char farm_dir[PATH_MAX];
 extern char farm_obj_dir[PATH_MAX];
@@ -59,13 +52,8 @@ extern int sha1_file_try_delete(const unsigned char *sha1);
 
 /* trunk.c */
 extern int trunk_init(void);
-extern int trunk_file_write_recovery(unsigned char *outsha1);
-extern int trunk_file_write_user(unsigned char *outsha1);
+extern int trunk_file_write(unsigned char *outsha1);
 extern void *trunk_file_read(unsigned char *sha1, struct sha1_file_hdr *);
-extern int trunk_update_entry(uint64_t oid);
-extern void trunk_reset(void);
-extern void trunk_put_entry(uint64_t oid);
-extern void trunk_get_entry(uint64_t oid);
 
 /* snap.c */
 extern int snap_init(void);
diff --git a/sheep/farm/trunk.c b/sheep/farm/trunk.c
index 36dd86e..0385e54 100644
--- a/sheep/farm/trunk.c
+++ b/sheep/farm/trunk.c
@@ -13,8 +13,8 @@
 
 /*
  * Trunk object is meta data that describes the structure of the data objects
- * at the time of snapshot being taken. It ties data objects together into a flat
- * directory structure.
+ * at the time of snapshot being taken. It ties data objects together into a
+ * flat directory structure.
  */
 #include <pthread.h>
 #include <dirent.h>
@@ -28,108 +28,7 @@
 #include "sheepdog_proto.h"
 #include "sheep_priv.h"
 
-#define TRUNK_ENTRY_DIRTY	0x00000001
-
-#define HASH_BITS	10
-#define HASH_SIZE	(1 << HASH_BITS)
-
-static LIST_HEAD(trunk_active_list);
-static pthread_mutex_t active_list_lock = PTHREAD_MUTEX_INITIALIZER;
-static struct hlist_head trunk_hashtable[HASH_SIZE];
-static pthread_mutex_t hashtable_lock[HASH_SIZE] = { [0 ... HASH_SIZE - 1] = PTHREAD_MUTEX_INITIALIZER };
-static unsigned int trunk_entry_active_nr;
-
-struct omap_entry {
-	uint64_t oid;
-	unsigned char sha1[SHA1_LEN];
-	struct rb_node node;
-};
-
-struct rb_root omap_tree;
-
-static inline int trunk_entry_is_dirty(struct trunk_entry_incore *entry)
-{
-	return entry->flags & TRUNK_ENTRY_DIRTY;
-}
-
-static inline void dirty_trunk_entry(struct trunk_entry_incore *entry)
-{
-	entry->flags |= TRUNK_ENTRY_DIRTY;
-}
-
-static inline void undirty_trunk_entry(struct trunk_entry_incore *entry)
-{
-	entry->flags &= ~TRUNK_ENTRY_DIRTY;
-}
-
-static inline int hash(uint64_t oid)
-{
-	return hash_64(oid, HASH_BITS);
-}
-
-static inline void get_entry(struct trunk_entry_incore *entry, struct hlist_head *head)
-{
-	hlist_add_head(&entry->hash, head);
-	pthread_mutex_lock(&active_list_lock);
-	list_add(&entry->active_list, &trunk_active_list);
-	trunk_entry_active_nr++;
-	pthread_mutex_unlock(&active_list_lock);
-}
-
-static struct trunk_entry_incore *lookup_trunk_entry(uint64_t oid, int create)
-{
-	int h = hash(oid);
-	struct hlist_head *head = trunk_hashtable + h;
-	struct trunk_entry_incore *entry = NULL;
-	struct hlist_node *node;
-
-	pthread_mutex_lock(&hashtable_lock[h]);
-	if (hlist_empty(head))
-		goto not_found;
-
-	hlist_for_each_entry(entry, node, head, hash) {
-		if (entry->raw.oid == oid)
-			goto out;
-	}
-not_found:
-	if (create) {
-		entry = xzalloc(sizeof(*entry));
-		entry->raw.oid = oid;
-		get_entry(entry, head);
-	} else
-		entry = NULL;
-out:
-	pthread_mutex_unlock(&hashtable_lock[h]);
-	return entry;
-}
-
-int trunk_init(void)
-{
-	DIR *dir;
-	struct dirent *d;
-	uint64_t oid;
-
-	dir = opendir(obj_path);
-	if (!dir)
-		return -1;
-
-	while ((d = readdir(dir))) {
-		if (!strncmp(d->d_name, ".", 1))
-			continue;
-		oid = strtoull(d->d_name, NULL, 16);
-		if (oid == 0 || oid == ULLONG_MAX)
-			continue;
-		objlist_cache_insert(oid);
-		lookup_trunk_entry(oid, 1);
-	}
-
-	omap_tree = RB_ROOT;
-
-	closedir(dir);
-	return 0;
-}
-
-static int fill_entry_new_sha1(struct trunk_entry_incore *entry)
+static int fill_entry_new_sha1(struct trunk_entry *entry)
 {
 	struct strbuf buf = STRBUF_INIT;
 	int fd, ret = 0;
@@ -137,7 +36,7 @@ static int fill_entry_new_sha1(struct trunk_entry_incore *entry)
 
 	memcpy(hdr.tag, TAG_DATA, TAG_LEN);
 	strbuf_addstr(&buf, obj_path);
-	strbuf_addf(&buf, "%016" PRIx64, entry->raw.oid);
+	strbuf_addf(&buf, "%016" PRIx64, entry->oid);
 	fd = open(buf.buf, O_RDONLY);
 	strbuf_reset(&buf);
 
@@ -154,12 +53,12 @@ static int fill_entry_new_sha1(struct trunk_entry_incore *entry)
 	hdr.size = buf.len;
 	strbuf_insert(&buf, 0, &hdr, sizeof(hdr));
 
-	if (sha1_file_write((void *)buf.buf, buf.len, entry->raw.sha1) < 0) {
+	if (sha1_file_write((void *)buf.buf, buf.len, entry->sha1) < 0) {
 		ret = -1;
 		goto out_close;
 	}
-	dprintf("data sha1:%s, %"PRIx64"\n", sha1_to_hex(entry->raw.sha1),
-		entry->raw.oid);
+	dprintf("data sha1:%s, %"PRIx64"\n", sha1_to_hex(entry->sha1),
+		entry->oid);
 out_close:
 	close(fd);
 out:
@@ -167,199 +66,53 @@ out:
 	return ret;
 }
 
-static inline int trunk_entry_no_sha1(struct trunk_entry_incore *entry)
-{
-	unsigned char empty[SHA1_LEN] = {0};
-
-	return memcmp(entry->raw.sha1, empty, SHA1_LEN) == 0;
-}
-
-static inline void put_entry(struct trunk_entry_incore *entry)
-{
-	int h = hash(entry->raw.oid);
-
-	pthread_mutex_lock(&hashtable_lock[h]);
-	hlist_del(&entry->hash);
-	pthread_mutex_unlock(&hashtable_lock[h]);
-
-	pthread_mutex_lock(&active_list_lock);
-	list_del(&entry->active_list);
-	trunk_entry_active_nr--;
-	pthread_mutex_unlock(&active_list_lock);
-	free(entry);
-}
-
-static struct omap_entry *omap_tree_rb_insert(struct rb_root *root,
-			struct omap_entry *new)
-{
-	struct rb_node **p = &root->rb_node;
-	struct rb_node *parent = NULL;
-	struct omap_entry *entry;
-
-	while (*p) {
-		parent = *p;
-		entry = rb_entry(parent, struct omap_entry, node);
-
-		if (new->oid < entry->oid)
-			p = &(*p)->rb_left;
-		else if (new->oid > entry->oid)
-			p = &(*p)->rb_right;
-		else
-			return entry; /* already has this entry */
-	}
-	rb_link_node(&new->node, parent, p);
-	rb_insert_color(&new->node, root);
-
-	return NULL; /* insert successfully */
-}
-
-static unsigned char *omap_tree_insert(uint64_t oid, unsigned char *sha1)
-{
-	struct omap_entry *existing_entry, *new;
-	static unsigned char old_sha1[SHA1_LEN];
-
-	new = xmalloc(sizeof(*new));
-	new->oid = oid;
-	memcpy(new->sha1, sha1, SHA1_LEN);
-	rb_init_node(&new->node);
-
-	existing_entry = omap_tree_rb_insert(&omap_tree, new);
-	if (existing_entry) {
-		free(new);
-		if (memcmp(existing_entry->sha1, sha1, SHA1_LEN) == 0) {
-			return NULL;
-		} else {
-			memcpy(old_sha1, existing_entry->sha1, SHA1_LEN);
-			memcpy(existing_entry->sha1, sha1, SHA1_LEN);
-			return old_sha1;
-		}
-	}
-
-	return NULL;
-}
+static uint64_t object_nr;
 
-static int oid_stale(uint64_t oid)
+static int inc_object_nr(uint64_t oid)
 {
-	int i, nr_copies;
-	struct vnode_info *vinfo;
-	struct sd_vnode *v;
-	int ret = 1;
-	struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
-
-	vinfo = get_vnode_info();
-	nr_copies = get_obj_copy_number(oid);
-	if (!nr_copies)
-		return 0;
-
-	oid_to_vnodes(vinfo->vnodes, vinfo->nr_vnodes, oid,
-		      nr_copies, obj_vnodes);
-	for (i = 0; i < nr_copies; i++) {
-		v = obj_vnodes[i];
-		if (vnode_is_local(v)) {
-			ret = 0;
-			break;
-		}
-	}
-
-	put_vnode_info(vinfo);
-	return ret;
+	object_nr++;
+	return 0;
 }
 
-int trunk_file_write_recovery(unsigned char *outsha1)
+int trunk_file_write(unsigned char *outsha1)
 {
-	struct trunk_entry_incore *entry, *t;
-	struct strbuf buf = STRBUF_INIT;
-	char p[PATH_MAX];
-	struct sha1_file_hdr hdr, *h;
-	int ret = -1, active_nr = 0;
-	uint64_t oid;
-	unsigned char *old_sha1;
+	struct strbuf buf;
+	struct sha1_file_hdr hdr;
+	struct trunk_entry entry;
+	struct dirent *d;
+	DIR *dir;
+	uint64_t data_size, oid;
+	int ret = 0;
 
+	/* Add the hdr first */
+	for_each_object_in_wd(inc_object_nr);
+	data_size = sizeof(struct trunk_entry) * object_nr;
+	hdr.size = data_size;
+	hdr.priv = object_nr;
 	memcpy(hdr.tag, TAG_TRUNK, TAG_LEN);
+	strbuf_init(&buf, sizeof(hdr) + data_size);
 	strbuf_add(&buf, &hdr, sizeof(hdr));
 
-	list_for_each_entry_safe(entry, t, &trunk_active_list, active_list) {
-		oid = entry->raw.oid;
-		if (!oid_stale(oid))
-			continue;
-
-		dprintf("stale oid %"PRIx64"\n", oid);
-		if (trunk_entry_no_sha1(entry) || trunk_entry_is_dirty(entry)) {
-			if (fill_entry_new_sha1(entry) < 0) {
-				eprintf("fill sha1 fail\n");
-				goto out;
-			}
-		}
-
-		old_sha1 = omap_tree_insert(oid, entry->raw.sha1);
-		if (old_sha1)
-			sha1_file_try_delete(old_sha1);
-
-		strbuf_add(&buf, &entry->raw, sizeof(struct trunk_entry));
-		active_nr++;
-
-		/*
-		 * We remove object from the working directory, but can not
-		 * remove the objlist cache entry.
-		 *
-		 * Consider the following case:
-		 *
-		 * If node A ends recovery before some other nodes, and then it
-		 * delete the stale object from the farm working directory, but
-		 * if it also deletes the objlist entry, it may causes problem,
-		 * try thinking of another node B which issues a get_obj_list()
-		 * request after the objlist entry is deleted on the original
-		 * node A, but still not added to the target node C, then
-		 * node B would not find the objlist entry, then for node B,
-		 * this object is ignored to recovery, so it's lost.
-		 */
-		snprintf(p, sizeof(p), "%s%016"PRIx64, obj_path, entry->raw.oid);
-		if (unlink(p) < 0) {
-			eprintf("%s:%m\n", p);
-			goto out;
-		}
-		dprintf("remove file %"PRIx64"\n", entry->raw.oid);
-		put_entry(entry);
-	}
-
-	h = (struct sha1_file_hdr*)buf.buf;
-	h->size = sizeof(struct trunk_entry) * active_nr;
-	h->priv = active_nr;
-
-	if (sha1_file_write((void *)buf.buf, buf.len, outsha1) < 0) {
-		dprintf("sha1 file write fail.\n");
+	dir = opendir(obj_path);
+	if (!dir) {
+		ret = -1;
 		goto out;
 	}
 
-	ret = SD_RES_SUCCESS;
-out:
-	strbuf_release(&buf);
-	return ret;
-}
-
-int trunk_file_write_user(unsigned char *outsha1)
-{
-	struct strbuf buf;
-	uint64_t data_size = sizeof(struct trunk_entry) * trunk_entry_active_nr;
-	struct sha1_file_hdr hdr = { .size = data_size,
-				     .priv = trunk_entry_active_nr };
-	struct trunk_entry_incore *entry, *t;
-	int ret = 0;
+	while ((d = readdir(dir))) {
+		if (!strncmp(d->d_name, ".", 1))
+			continue;
 
-	memcpy(hdr.tag, TAG_TRUNK, TAG_LEN);
-	strbuf_init(&buf, sizeof(hdr) + data_size);
+		oid = strtoull(d->d_name, NULL, 16);
+		if (oid == 0 || oid == ULLONG_MAX)
+			continue;
 
-	strbuf_add(&buf, &hdr, sizeof(hdr));
-	list_for_each_entry_safe(entry, t, &trunk_active_list, active_list) {
-		if (trunk_entry_no_sha1(entry) || trunk_entry_is_dirty(entry)) {
-			if (fill_entry_new_sha1(entry) < 0) {
-				ret = -1;
-				goto out;
-			}
+		entry.oid = oid;
+		if (fill_entry_new_sha1(&entry) < 0) {
+			ret = -1;
+			goto out;
 		}
-		strbuf_add(&buf, &entry->raw, sizeof(struct trunk_entry));
-
-		undirty_trunk_entry(entry);
+		strbuf_add(&buf, &entry, sizeof(struct trunk_entry));
 	}
 
 	if (sha1_file_write((void *)buf.buf, buf.len, outsha1) < 0) {
@@ -368,6 +121,8 @@ int trunk_file_write_user(unsigned char *outsha1)
 	}
 	dprintf("trunk sha1: %s\n", sha1_to_hex(outsha1));
 out:
+	object_nr = 0;
+	closedir(dir);
 	strbuf_release(&buf);
 	return ret;
 }
@@ -387,38 +142,3 @@ void *trunk_file_read(unsigned char *sha1, struct sha1_file_hdr *outhdr)
 
 	return buffer;
 }
-
-int trunk_update_entry(uint64_t oid)
-{
-	struct trunk_entry_incore *entry;
-
-	entry = lookup_trunk_entry(oid, 1);
-	if (!trunk_entry_is_dirty(entry))
-		dirty_trunk_entry(entry);
-
-	return 0;
-}
-
-void trunk_put_entry(uint64_t oid)
-{
-	struct trunk_entry_incore *entry;
-
-	entry = lookup_trunk_entry(oid, 0);
-	if (entry)
-		put_entry(entry);
-}
-
-void trunk_get_entry(uint64_t oid)
-{
-	lookup_trunk_entry(oid, 1);
-}
-
-void trunk_reset(void)
-{
-	struct trunk_entry_incore *entry, *t;
-	list_for_each_entry_safe(entry, t, &trunk_active_list, active_list) {
-		put_entry(entry);
-	}
-	eprintf("%s\n", trunk_entry_active_nr ? "WARN: active_list not clean" :
-						"clean");
-}
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index 324a139..ecf8304 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -36,7 +36,7 @@ static int get_stale_obj_path(uint64_t oid, char *path)
 	return sprintf(path, "%s/%016"PRIx64, stale_dir, oid);
 }
 
-static int for_each_objects(int (*func)(uint64_t oid))
+int for_each_object_in_wd(int (*func)(uint64_t oid))
 {
 	DIR *dir;
 	struct dirent *d;
@@ -199,7 +199,7 @@ int default_init(char *p)
 	/* When we start up, the objects in .stale is useless */
 	default_cleanup();
 
-	return for_each_objects(init_objlist_and_vdi_bitmap);
+	return for_each_object_in_wd(init_objlist_and_vdi_bitmap);
 }
 
 static int default_read_from_path(uint64_t oid, char *path,
@@ -353,7 +353,7 @@ int default_end_recover(uint32_t old_epoch, struct vnode_info *old_vnode_info)
 	if (old_epoch == 0)
 		return SD_RES_SUCCESS;
 
-	return for_each_objects(move_object_to_stale_dir);
+	return for_each_object_in_wd(move_object_to_stale_dir);
 }
 
 int default_format(char *name)
@@ -396,7 +396,7 @@ int default_remove_object(uint64_t oid)
 
 int default_purge_obj(void)
 {
-	return for_each_objects(default_remove_object);
+	return for_each_object_in_wd(default_remove_object);
 }
 
 struct store_driver plain_store = {
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 7a54268..92ccdd2 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -170,6 +170,7 @@ int default_cleanup(void);
 int default_format(char *name);
 int default_remove_object(uint64_t oid);
 int default_purge_obj(void);
+int for_each_object_in_wd(int (*func)(uint64_t oid));
 
 extern struct list_head store_drivers;
 #define add_store_driver(driver)                                 \
-- 
1.7.10.2




More information about the sheepdog mailing list