[sheepdog] [PATCH 5/6] farm: rework trunk logic
Liu Yuan
namei.unix at gmail.com
Fri Aug 24 14:10:16 CEST 2012
From: Liu Yuan <tailai.ly at taobao.com>
Since we move recovery handling out of farm backend, we don't need track
IO requests on object, this leave most of functions in trunk.c useless for
now.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/farm/farm.c | 5 +-
sheep/farm/farm.h | 14 +-
sheep/farm/trunk.c | 362 ++++++---------------------------------------------
sheep/plain_store.c | 8 +-
sheep/sheep_priv.h | 1 +
5 files changed, 48 insertions(+), 342 deletions(-)
diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
index 69198a1..4ccd629 100644
--- a/sheep/farm/farm.c
+++ b/sheep/farm/farm.c
@@ -160,9 +160,6 @@ static int farm_init(char *p)
goto err;
}
- if (trunk_init() < 0)
- goto err;
-
if (snap_init() < 0)
goto err;
@@ -194,7 +191,7 @@ static int farm_snapshot(struct siocb *iocb)
if (nr_nodes < 0)
goto out;
- if (trunk_file_write_user(trunk_sha1) < 0)
+ if (trunk_file_write(trunk_sha1) < 0)
goto out;
if (snap_file_write(sys->epoch, nodes, nr_nodes,
diff --git a/sheep/farm/farm.h b/sheep/farm/farm.h
index 27e65cd..af803b6 100644
--- a/sheep/farm/farm.h
+++ b/sheep/farm/farm.h
@@ -39,13 +39,6 @@ struct trunk_entry {
unsigned char sha1[SHA1_LEN];
};
-struct trunk_entry_incore {
- struct trunk_entry raw;
- int flags;
- struct list_head active_list;
- struct hlist_node hash;
-};
-
/* farm.c */
extern char farm_dir[PATH_MAX];
extern char farm_obj_dir[PATH_MAX];
@@ -59,13 +52,8 @@ extern int sha1_file_try_delete(const unsigned char *sha1);
/* trunk.c */
extern int trunk_init(void);
-extern int trunk_file_write_recovery(unsigned char *outsha1);
-extern int trunk_file_write_user(unsigned char *outsha1);
+extern int trunk_file_write(unsigned char *outsha1);
extern void *trunk_file_read(unsigned char *sha1, struct sha1_file_hdr *);
-extern int trunk_update_entry(uint64_t oid);
-extern void trunk_reset(void);
-extern void trunk_put_entry(uint64_t oid);
-extern void trunk_get_entry(uint64_t oid);
/* snap.c */
extern int snap_init(void);
diff --git a/sheep/farm/trunk.c b/sheep/farm/trunk.c
index 36dd86e..0385e54 100644
--- a/sheep/farm/trunk.c
+++ b/sheep/farm/trunk.c
@@ -13,8 +13,8 @@
/*
* Trunk object is meta data that describes the structure of the data objects
- * at the time of snapshot being taken. It ties data objects together into a flat
- * directory structure.
+ * at the time of snapshot being taken. It ties data objects together into a
+ * flat directory structure.
*/
#include <pthread.h>
#include <dirent.h>
@@ -28,108 +28,7 @@
#include "sheepdog_proto.h"
#include "sheep_priv.h"
-#define TRUNK_ENTRY_DIRTY 0x00000001
-
-#define HASH_BITS 10
-#define HASH_SIZE (1 << HASH_BITS)
-
-static LIST_HEAD(trunk_active_list);
-static pthread_mutex_t active_list_lock = PTHREAD_MUTEX_INITIALIZER;
-static struct hlist_head trunk_hashtable[HASH_SIZE];
-static pthread_mutex_t hashtable_lock[HASH_SIZE] = { [0 ... HASH_SIZE - 1] = PTHREAD_MUTEX_INITIALIZER };
-static unsigned int trunk_entry_active_nr;
-
-struct omap_entry {
- uint64_t oid;
- unsigned char sha1[SHA1_LEN];
- struct rb_node node;
-};
-
-struct rb_root omap_tree;
-
-static inline int trunk_entry_is_dirty(struct trunk_entry_incore *entry)
-{
- return entry->flags & TRUNK_ENTRY_DIRTY;
-}
-
-static inline void dirty_trunk_entry(struct trunk_entry_incore *entry)
-{
- entry->flags |= TRUNK_ENTRY_DIRTY;
-}
-
-static inline void undirty_trunk_entry(struct trunk_entry_incore *entry)
-{
- entry->flags &= ~TRUNK_ENTRY_DIRTY;
-}
-
-static inline int hash(uint64_t oid)
-{
- return hash_64(oid, HASH_BITS);
-}
-
-static inline void get_entry(struct trunk_entry_incore *entry, struct hlist_head *head)
-{
- hlist_add_head(&entry->hash, head);
- pthread_mutex_lock(&active_list_lock);
- list_add(&entry->active_list, &trunk_active_list);
- trunk_entry_active_nr++;
- pthread_mutex_unlock(&active_list_lock);
-}
-
-static struct trunk_entry_incore *lookup_trunk_entry(uint64_t oid, int create)
-{
- int h = hash(oid);
- struct hlist_head *head = trunk_hashtable + h;
- struct trunk_entry_incore *entry = NULL;
- struct hlist_node *node;
-
- pthread_mutex_lock(&hashtable_lock[h]);
- if (hlist_empty(head))
- goto not_found;
-
- hlist_for_each_entry(entry, node, head, hash) {
- if (entry->raw.oid == oid)
- goto out;
- }
-not_found:
- if (create) {
- entry = xzalloc(sizeof(*entry));
- entry->raw.oid = oid;
- get_entry(entry, head);
- } else
- entry = NULL;
-out:
- pthread_mutex_unlock(&hashtable_lock[h]);
- return entry;
-}
-
-int trunk_init(void)
-{
- DIR *dir;
- struct dirent *d;
- uint64_t oid;
-
- dir = opendir(obj_path);
- if (!dir)
- return -1;
-
- while ((d = readdir(dir))) {
- if (!strncmp(d->d_name, ".", 1))
- continue;
- oid = strtoull(d->d_name, NULL, 16);
- if (oid == 0 || oid == ULLONG_MAX)
- continue;
- objlist_cache_insert(oid);
- lookup_trunk_entry(oid, 1);
- }
-
- omap_tree = RB_ROOT;
-
- closedir(dir);
- return 0;
-}
-
-static int fill_entry_new_sha1(struct trunk_entry_incore *entry)
+static int fill_entry_new_sha1(struct trunk_entry *entry)
{
struct strbuf buf = STRBUF_INIT;
int fd, ret = 0;
@@ -137,7 +36,7 @@ static int fill_entry_new_sha1(struct trunk_entry_incore *entry)
memcpy(hdr.tag, TAG_DATA, TAG_LEN);
strbuf_addstr(&buf, obj_path);
- strbuf_addf(&buf, "%016" PRIx64, entry->raw.oid);
+ strbuf_addf(&buf, "%016" PRIx64, entry->oid);
fd = open(buf.buf, O_RDONLY);
strbuf_reset(&buf);
@@ -154,12 +53,12 @@ static int fill_entry_new_sha1(struct trunk_entry_incore *entry)
hdr.size = buf.len;
strbuf_insert(&buf, 0, &hdr, sizeof(hdr));
- if (sha1_file_write((void *)buf.buf, buf.len, entry->raw.sha1) < 0) {
+ if (sha1_file_write((void *)buf.buf, buf.len, entry->sha1) < 0) {
ret = -1;
goto out_close;
}
- dprintf("data sha1:%s, %"PRIx64"\n", sha1_to_hex(entry->raw.sha1),
- entry->raw.oid);
+ dprintf("data sha1:%s, %"PRIx64"\n", sha1_to_hex(entry->sha1),
+ entry->oid);
out_close:
close(fd);
out:
@@ -167,199 +66,53 @@ out:
return ret;
}
-static inline int trunk_entry_no_sha1(struct trunk_entry_incore *entry)
-{
- unsigned char empty[SHA1_LEN] = {0};
-
- return memcmp(entry->raw.sha1, empty, SHA1_LEN) == 0;
-}
-
-static inline void put_entry(struct trunk_entry_incore *entry)
-{
- int h = hash(entry->raw.oid);
-
- pthread_mutex_lock(&hashtable_lock[h]);
- hlist_del(&entry->hash);
- pthread_mutex_unlock(&hashtable_lock[h]);
-
- pthread_mutex_lock(&active_list_lock);
- list_del(&entry->active_list);
- trunk_entry_active_nr--;
- pthread_mutex_unlock(&active_list_lock);
- free(entry);
-}
-
-static struct omap_entry *omap_tree_rb_insert(struct rb_root *root,
- struct omap_entry *new)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct omap_entry *entry;
-
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct omap_entry, node);
-
- if (new->oid < entry->oid)
- p = &(*p)->rb_left;
- else if (new->oid > entry->oid)
- p = &(*p)->rb_right;
- else
- return entry; /* already has this entry */
- }
- rb_link_node(&new->node, parent, p);
- rb_insert_color(&new->node, root);
-
- return NULL; /* insert successfully */
-}
-
-static unsigned char *omap_tree_insert(uint64_t oid, unsigned char *sha1)
-{
- struct omap_entry *existing_entry, *new;
- static unsigned char old_sha1[SHA1_LEN];
-
- new = xmalloc(sizeof(*new));
- new->oid = oid;
- memcpy(new->sha1, sha1, SHA1_LEN);
- rb_init_node(&new->node);
-
- existing_entry = omap_tree_rb_insert(&omap_tree, new);
- if (existing_entry) {
- free(new);
- if (memcmp(existing_entry->sha1, sha1, SHA1_LEN) == 0) {
- return NULL;
- } else {
- memcpy(old_sha1, existing_entry->sha1, SHA1_LEN);
- memcpy(existing_entry->sha1, sha1, SHA1_LEN);
- return old_sha1;
- }
- }
-
- return NULL;
-}
+static uint64_t object_nr;
-static int oid_stale(uint64_t oid)
+static int inc_object_nr(uint64_t oid)
{
- int i, nr_copies;
- struct vnode_info *vinfo;
- struct sd_vnode *v;
- int ret = 1;
- struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
-
- vinfo = get_vnode_info();
- nr_copies = get_obj_copy_number(oid);
- if (!nr_copies)
- return 0;
-
- oid_to_vnodes(vinfo->vnodes, vinfo->nr_vnodes, oid,
- nr_copies, obj_vnodes);
- for (i = 0; i < nr_copies; i++) {
- v = obj_vnodes[i];
- if (vnode_is_local(v)) {
- ret = 0;
- break;
- }
- }
-
- put_vnode_info(vinfo);
- return ret;
+ object_nr++;
+ return 0;
}
-int trunk_file_write_recovery(unsigned char *outsha1)
+int trunk_file_write(unsigned char *outsha1)
{
- struct trunk_entry_incore *entry, *t;
- struct strbuf buf = STRBUF_INIT;
- char p[PATH_MAX];
- struct sha1_file_hdr hdr, *h;
- int ret = -1, active_nr = 0;
- uint64_t oid;
- unsigned char *old_sha1;
+ struct strbuf buf;
+ struct sha1_file_hdr hdr;
+ struct trunk_entry entry;
+ struct dirent *d;
+ DIR *dir;
+ uint64_t data_size, oid;
+ int ret = 0;
+ /* Add the hdr first */
+ for_each_object_in_wd(inc_object_nr);
+ data_size = sizeof(struct trunk_entry) * object_nr;
+ hdr.size = data_size;
+ hdr.priv = object_nr;
memcpy(hdr.tag, TAG_TRUNK, TAG_LEN);
+ strbuf_init(&buf, sizeof(hdr) + data_size);
strbuf_add(&buf, &hdr, sizeof(hdr));
- list_for_each_entry_safe(entry, t, &trunk_active_list, active_list) {
- oid = entry->raw.oid;
- if (!oid_stale(oid))
- continue;
-
- dprintf("stale oid %"PRIx64"\n", oid);
- if (trunk_entry_no_sha1(entry) || trunk_entry_is_dirty(entry)) {
- if (fill_entry_new_sha1(entry) < 0) {
- eprintf("fill sha1 fail\n");
- goto out;
- }
- }
-
- old_sha1 = omap_tree_insert(oid, entry->raw.sha1);
- if (old_sha1)
- sha1_file_try_delete(old_sha1);
-
- strbuf_add(&buf, &entry->raw, sizeof(struct trunk_entry));
- active_nr++;
-
- /*
- * We remove object from the working directory, but can not
- * remove the objlist cache entry.
- *
- * Consider the following case:
- *
- * If node A ends recovery before some other nodes, and then it
- * delete the stale object from the farm working directory, but
- * if it also deletes the objlist entry, it may causes problem,
- * try thinking of another node B which issues a get_obj_list()
- * request after the objlist entry is deleted on the original
- * node A, but still not added to the target node C, then
- * node B would not find the objlist entry, then for node B,
- * this object is ignored to recovery, so it's lost.
- */
- snprintf(p, sizeof(p), "%s%016"PRIx64, obj_path, entry->raw.oid);
- if (unlink(p) < 0) {
- eprintf("%s:%m\n", p);
- goto out;
- }
- dprintf("remove file %"PRIx64"\n", entry->raw.oid);
- put_entry(entry);
- }
-
- h = (struct sha1_file_hdr*)buf.buf;
- h->size = sizeof(struct trunk_entry) * active_nr;
- h->priv = active_nr;
-
- if (sha1_file_write((void *)buf.buf, buf.len, outsha1) < 0) {
- dprintf("sha1 file write fail.\n");
+ dir = opendir(obj_path);
+ if (!dir) {
+ ret = -1;
goto out;
}
- ret = SD_RES_SUCCESS;
-out:
- strbuf_release(&buf);
- return ret;
-}
-
-int trunk_file_write_user(unsigned char *outsha1)
-{
- struct strbuf buf;
- uint64_t data_size = sizeof(struct trunk_entry) * trunk_entry_active_nr;
- struct sha1_file_hdr hdr = { .size = data_size,
- .priv = trunk_entry_active_nr };
- struct trunk_entry_incore *entry, *t;
- int ret = 0;
+ while ((d = readdir(dir))) {
+ if (!strncmp(d->d_name, ".", 1))
+ continue;
- memcpy(hdr.tag, TAG_TRUNK, TAG_LEN);
- strbuf_init(&buf, sizeof(hdr) + data_size);
+ oid = strtoull(d->d_name, NULL, 16);
+ if (oid == 0 || oid == ULLONG_MAX)
+ continue;
- strbuf_add(&buf, &hdr, sizeof(hdr));
- list_for_each_entry_safe(entry, t, &trunk_active_list, active_list) {
- if (trunk_entry_no_sha1(entry) || trunk_entry_is_dirty(entry)) {
- if (fill_entry_new_sha1(entry) < 0) {
- ret = -1;
- goto out;
- }
+ entry.oid = oid;
+ if (fill_entry_new_sha1(&entry) < 0) {
+ ret = -1;
+ goto out;
}
- strbuf_add(&buf, &entry->raw, sizeof(struct trunk_entry));
-
- undirty_trunk_entry(entry);
+ strbuf_add(&buf, &entry, sizeof(struct trunk_entry));
}
if (sha1_file_write((void *)buf.buf, buf.len, outsha1) < 0) {
@@ -368,6 +121,8 @@ int trunk_file_write_user(unsigned char *outsha1)
}
dprintf("trunk sha1: %s\n", sha1_to_hex(outsha1));
out:
+ object_nr = 0;
+ closedir(dir);
strbuf_release(&buf);
return ret;
}
@@ -387,38 +142,3 @@ void *trunk_file_read(unsigned char *sha1, struct sha1_file_hdr *outhdr)
return buffer;
}
-
-int trunk_update_entry(uint64_t oid)
-{
- struct trunk_entry_incore *entry;
-
- entry = lookup_trunk_entry(oid, 1);
- if (!trunk_entry_is_dirty(entry))
- dirty_trunk_entry(entry);
-
- return 0;
-}
-
-void trunk_put_entry(uint64_t oid)
-{
- struct trunk_entry_incore *entry;
-
- entry = lookup_trunk_entry(oid, 0);
- if (entry)
- put_entry(entry);
-}
-
-void trunk_get_entry(uint64_t oid)
-{
- lookup_trunk_entry(oid, 1);
-}
-
-void trunk_reset(void)
-{
- struct trunk_entry_incore *entry, *t;
- list_for_each_entry_safe(entry, t, &trunk_active_list, active_list) {
- put_entry(entry);
- }
- eprintf("%s\n", trunk_entry_active_nr ? "WARN: active_list not clean" :
- "clean");
-}
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index 324a139..ecf8304 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -36,7 +36,7 @@ static int get_stale_obj_path(uint64_t oid, char *path)
return sprintf(path, "%s/%016"PRIx64, stale_dir, oid);
}
-static int for_each_objects(int (*func)(uint64_t oid))
+int for_each_object_in_wd(int (*func)(uint64_t oid))
{
DIR *dir;
struct dirent *d;
@@ -199,7 +199,7 @@ int default_init(char *p)
/* When we start up, the objects in .stale is useless */
default_cleanup();
- return for_each_objects(init_objlist_and_vdi_bitmap);
+ return for_each_object_in_wd(init_objlist_and_vdi_bitmap);
}
static int default_read_from_path(uint64_t oid, char *path,
@@ -353,7 +353,7 @@ int default_end_recover(uint32_t old_epoch, struct vnode_info *old_vnode_info)
if (old_epoch == 0)
return SD_RES_SUCCESS;
- return for_each_objects(move_object_to_stale_dir);
+ return for_each_object_in_wd(move_object_to_stale_dir);
}
int default_format(char *name)
@@ -396,7 +396,7 @@ int default_remove_object(uint64_t oid)
int default_purge_obj(void)
{
- return for_each_objects(default_remove_object);
+ return for_each_object_in_wd(default_remove_object);
}
struct store_driver plain_store = {
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 7a54268..92ccdd2 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -170,6 +170,7 @@ int default_cleanup(void);
int default_format(char *name);
int default_remove_object(uint64_t oid);
int default_purge_obj(void);
+int for_each_object_in_wd(int (*func)(uint64_t oid));
extern struct list_head store_drivers;
#define add_store_driver(driver) \
--
1.7.10.2
More information about the sheepdog
mailing list