From: Liu Yuan <tailai.ly at taobao.com> Since we move recovery handling out of farm backend, we don't need track IO requests on object, this leave most of functions in trunk.c useless for now. Signed-off-by: Liu Yuan <tailai.ly at taobao.com> --- sheep/farm/farm.c | 5 +- sheep/farm/farm.h | 14 +- sheep/farm/trunk.c | 362 ++++++--------------------------------------------- sheep/plain_store.c | 8 +- sheep/sheep_priv.h | 1 + 5 files changed, 48 insertions(+), 342 deletions(-) diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c index 69198a1..4ccd629 100644 --- a/sheep/farm/farm.c +++ b/sheep/farm/farm.c @@ -160,9 +160,6 @@ static int farm_init(char *p) goto err; } - if (trunk_init() < 0) - goto err; - if (snap_init() < 0) goto err; @@ -194,7 +191,7 @@ static int farm_snapshot(struct siocb *iocb) if (nr_nodes < 0) goto out; - if (trunk_file_write_user(trunk_sha1) < 0) + if (trunk_file_write(trunk_sha1) < 0) goto out; if (snap_file_write(sys->epoch, nodes, nr_nodes, diff --git a/sheep/farm/farm.h b/sheep/farm/farm.h index 27e65cd..af803b6 100644 --- a/sheep/farm/farm.h +++ b/sheep/farm/farm.h @@ -39,13 +39,6 @@ struct trunk_entry { unsigned char sha1[SHA1_LEN]; }; -struct trunk_entry_incore { - struct trunk_entry raw; - int flags; - struct list_head active_list; - struct hlist_node hash; -}; - /* farm.c */ extern char farm_dir[PATH_MAX]; extern char farm_obj_dir[PATH_MAX]; @@ -59,13 +52,8 @@ extern int sha1_file_try_delete(const unsigned char *sha1); /* trunk.c */ extern int trunk_init(void); -extern int trunk_file_write_recovery(unsigned char *outsha1); -extern int trunk_file_write_user(unsigned char *outsha1); +extern int trunk_file_write(unsigned char *outsha1); extern void *trunk_file_read(unsigned char *sha1, struct sha1_file_hdr *); -extern int trunk_update_entry(uint64_t oid); -extern void trunk_reset(void); -extern void trunk_put_entry(uint64_t oid); -extern void trunk_get_entry(uint64_t oid); /* snap.c */ extern int snap_init(void); diff --git a/sheep/farm/trunk.c b/sheep/farm/trunk.c index 36dd86e..0385e54 100644 --- a/sheep/farm/trunk.c +++ b/sheep/farm/trunk.c @@ -13,8 +13,8 @@ /* * Trunk object is meta data that describes the structure of the data objects - * at the time of snapshot being taken. It ties data objects together into a flat - * directory structure. + * at the time of snapshot being taken. It ties data objects together into a + * flat directory structure. */ #include <pthread.h> #include <dirent.h> @@ -28,108 +28,7 @@ #include "sheepdog_proto.h" #include "sheep_priv.h" -#define TRUNK_ENTRY_DIRTY 0x00000001 - -#define HASH_BITS 10 -#define HASH_SIZE (1 << HASH_BITS) - -static LIST_HEAD(trunk_active_list); -static pthread_mutex_t active_list_lock = PTHREAD_MUTEX_INITIALIZER; -static struct hlist_head trunk_hashtable[HASH_SIZE]; -static pthread_mutex_t hashtable_lock[HASH_SIZE] = { [0 ... HASH_SIZE - 1] = PTHREAD_MUTEX_INITIALIZER }; -static unsigned int trunk_entry_active_nr; - -struct omap_entry { - uint64_t oid; - unsigned char sha1[SHA1_LEN]; - struct rb_node node; -}; - -struct rb_root omap_tree; - -static inline int trunk_entry_is_dirty(struct trunk_entry_incore *entry) -{ - return entry->flags & TRUNK_ENTRY_DIRTY; -} - -static inline void dirty_trunk_entry(struct trunk_entry_incore *entry) -{ - entry->flags |= TRUNK_ENTRY_DIRTY; -} - -static inline void undirty_trunk_entry(struct trunk_entry_incore *entry) -{ - entry->flags &= ~TRUNK_ENTRY_DIRTY; -} - -static inline int hash(uint64_t oid) -{ - return hash_64(oid, HASH_BITS); -} - -static inline void get_entry(struct trunk_entry_incore *entry, struct hlist_head *head) -{ - hlist_add_head(&entry->hash, head); - pthread_mutex_lock(&active_list_lock); - list_add(&entry->active_list, &trunk_active_list); - trunk_entry_active_nr++; - pthread_mutex_unlock(&active_list_lock); -} - -static struct trunk_entry_incore *lookup_trunk_entry(uint64_t oid, int create) -{ - int h = hash(oid); - struct hlist_head *head = trunk_hashtable + h; - struct trunk_entry_incore *entry = NULL; - struct hlist_node *node; - - pthread_mutex_lock(&hashtable_lock[h]); - if (hlist_empty(head)) - goto not_found; - - hlist_for_each_entry(entry, node, head, hash) { - if (entry->raw.oid == oid) - goto out; - } -not_found: - if (create) { - entry = xzalloc(sizeof(*entry)); - entry->raw.oid = oid; - get_entry(entry, head); - } else - entry = NULL; -out: - pthread_mutex_unlock(&hashtable_lock[h]); - return entry; -} - -int trunk_init(void) -{ - DIR *dir; - struct dirent *d; - uint64_t oid; - - dir = opendir(obj_path); - if (!dir) - return -1; - - while ((d = readdir(dir))) { - if (!strncmp(d->d_name, ".", 1)) - continue; - oid = strtoull(d->d_name, NULL, 16); - if (oid == 0 || oid == ULLONG_MAX) - continue; - objlist_cache_insert(oid); - lookup_trunk_entry(oid, 1); - } - - omap_tree = RB_ROOT; - - closedir(dir); - return 0; -} - -static int fill_entry_new_sha1(struct trunk_entry_incore *entry) +static int fill_entry_new_sha1(struct trunk_entry *entry) { struct strbuf buf = STRBUF_INIT; int fd, ret = 0; @@ -137,7 +36,7 @@ static int fill_entry_new_sha1(struct trunk_entry_incore *entry) memcpy(hdr.tag, TAG_DATA, TAG_LEN); strbuf_addstr(&buf, obj_path); - strbuf_addf(&buf, "%016" PRIx64, entry->raw.oid); + strbuf_addf(&buf, "%016" PRIx64, entry->oid); fd = open(buf.buf, O_RDONLY); strbuf_reset(&buf); @@ -154,12 +53,12 @@ static int fill_entry_new_sha1(struct trunk_entry_incore *entry) hdr.size = buf.len; strbuf_insert(&buf, 0, &hdr, sizeof(hdr)); - if (sha1_file_write((void *)buf.buf, buf.len, entry->raw.sha1) < 0) { + if (sha1_file_write((void *)buf.buf, buf.len, entry->sha1) < 0) { ret = -1; goto out_close; } - dprintf("data sha1:%s, %"PRIx64"\n", sha1_to_hex(entry->raw.sha1), - entry->raw.oid); + dprintf("data sha1:%s, %"PRIx64"\n", sha1_to_hex(entry->sha1), + entry->oid); out_close: close(fd); out: @@ -167,199 +66,53 @@ out: return ret; } -static inline int trunk_entry_no_sha1(struct trunk_entry_incore *entry) -{ - unsigned char empty[SHA1_LEN] = {0}; - - return memcmp(entry->raw.sha1, empty, SHA1_LEN) == 0; -} - -static inline void put_entry(struct trunk_entry_incore *entry) -{ - int h = hash(entry->raw.oid); - - pthread_mutex_lock(&hashtable_lock[h]); - hlist_del(&entry->hash); - pthread_mutex_unlock(&hashtable_lock[h]); - - pthread_mutex_lock(&active_list_lock); - list_del(&entry->active_list); - trunk_entry_active_nr--; - pthread_mutex_unlock(&active_list_lock); - free(entry); -} - -static struct omap_entry *omap_tree_rb_insert(struct rb_root *root, - struct omap_entry *new) -{ - struct rb_node **p = &root->rb_node; - struct rb_node *parent = NULL; - struct omap_entry *entry; - - while (*p) { - parent = *p; - entry = rb_entry(parent, struct omap_entry, node); - - if (new->oid < entry->oid) - p = &(*p)->rb_left; - else if (new->oid > entry->oid) - p = &(*p)->rb_right; - else - return entry; /* already has this entry */ - } - rb_link_node(&new->node, parent, p); - rb_insert_color(&new->node, root); - - return NULL; /* insert successfully */ -} - -static unsigned char *omap_tree_insert(uint64_t oid, unsigned char *sha1) -{ - struct omap_entry *existing_entry, *new; - static unsigned char old_sha1[SHA1_LEN]; - - new = xmalloc(sizeof(*new)); - new->oid = oid; - memcpy(new->sha1, sha1, SHA1_LEN); - rb_init_node(&new->node); - - existing_entry = omap_tree_rb_insert(&omap_tree, new); - if (existing_entry) { - free(new); - if (memcmp(existing_entry->sha1, sha1, SHA1_LEN) == 0) { - return NULL; - } else { - memcpy(old_sha1, existing_entry->sha1, SHA1_LEN); - memcpy(existing_entry->sha1, sha1, SHA1_LEN); - return old_sha1; - } - } - - return NULL; -} +static uint64_t object_nr; -static int oid_stale(uint64_t oid) +static int inc_object_nr(uint64_t oid) { - int i, nr_copies; - struct vnode_info *vinfo; - struct sd_vnode *v; - int ret = 1; - struct sd_vnode *obj_vnodes[SD_MAX_COPIES]; - - vinfo = get_vnode_info(); - nr_copies = get_obj_copy_number(oid); - if (!nr_copies) - return 0; - - oid_to_vnodes(vinfo->vnodes, vinfo->nr_vnodes, oid, - nr_copies, obj_vnodes); - for (i = 0; i < nr_copies; i++) { - v = obj_vnodes[i]; - if (vnode_is_local(v)) { - ret = 0; - break; - } - } - - put_vnode_info(vinfo); - return ret; + object_nr++; + return 0; } -int trunk_file_write_recovery(unsigned char *outsha1) +int trunk_file_write(unsigned char *outsha1) { - struct trunk_entry_incore *entry, *t; - struct strbuf buf = STRBUF_INIT; - char p[PATH_MAX]; - struct sha1_file_hdr hdr, *h; - int ret = -1, active_nr = 0; - uint64_t oid; - unsigned char *old_sha1; + struct strbuf buf; + struct sha1_file_hdr hdr; + struct trunk_entry entry; + struct dirent *d; + DIR *dir; + uint64_t data_size, oid; + int ret = 0; + /* Add the hdr first */ + for_each_object_in_wd(inc_object_nr); + data_size = sizeof(struct trunk_entry) * object_nr; + hdr.size = data_size; + hdr.priv = object_nr; memcpy(hdr.tag, TAG_TRUNK, TAG_LEN); + strbuf_init(&buf, sizeof(hdr) + data_size); strbuf_add(&buf, &hdr, sizeof(hdr)); - list_for_each_entry_safe(entry, t, &trunk_active_list, active_list) { - oid = entry->raw.oid; - if (!oid_stale(oid)) - continue; - - dprintf("stale oid %"PRIx64"\n", oid); - if (trunk_entry_no_sha1(entry) || trunk_entry_is_dirty(entry)) { - if (fill_entry_new_sha1(entry) < 0) { - eprintf("fill sha1 fail\n"); - goto out; - } - } - - old_sha1 = omap_tree_insert(oid, entry->raw.sha1); - if (old_sha1) - sha1_file_try_delete(old_sha1); - - strbuf_add(&buf, &entry->raw, sizeof(struct trunk_entry)); - active_nr++; - - /* - * We remove object from the working directory, but can not - * remove the objlist cache entry. - * - * Consider the following case: - * - * If node A ends recovery before some other nodes, and then it - * delete the stale object from the farm working directory, but - * if it also deletes the objlist entry, it may causes problem, - * try thinking of another node B which issues a get_obj_list() - * request after the objlist entry is deleted on the original - * node A, but still not added to the target node C, then - * node B would not find the objlist entry, then for node B, - * this object is ignored to recovery, so it's lost. - */ - snprintf(p, sizeof(p), "%s%016"PRIx64, obj_path, entry->raw.oid); - if (unlink(p) < 0) { - eprintf("%s:%m\n", p); - goto out; - } - dprintf("remove file %"PRIx64"\n", entry->raw.oid); - put_entry(entry); - } - - h = (struct sha1_file_hdr*)buf.buf; - h->size = sizeof(struct trunk_entry) * active_nr; - h->priv = active_nr; - - if (sha1_file_write((void *)buf.buf, buf.len, outsha1) < 0) { - dprintf("sha1 file write fail.\n"); + dir = opendir(obj_path); + if (!dir) { + ret = -1; goto out; } - ret = SD_RES_SUCCESS; -out: - strbuf_release(&buf); - return ret; -} - -int trunk_file_write_user(unsigned char *outsha1) -{ - struct strbuf buf; - uint64_t data_size = sizeof(struct trunk_entry) * trunk_entry_active_nr; - struct sha1_file_hdr hdr = { .size = data_size, - .priv = trunk_entry_active_nr }; - struct trunk_entry_incore *entry, *t; - int ret = 0; + while ((d = readdir(dir))) { + if (!strncmp(d->d_name, ".", 1)) + continue; - memcpy(hdr.tag, TAG_TRUNK, TAG_LEN); - strbuf_init(&buf, sizeof(hdr) + data_size); + oid = strtoull(d->d_name, NULL, 16); + if (oid == 0 || oid == ULLONG_MAX) + continue; - strbuf_add(&buf, &hdr, sizeof(hdr)); - list_for_each_entry_safe(entry, t, &trunk_active_list, active_list) { - if (trunk_entry_no_sha1(entry) || trunk_entry_is_dirty(entry)) { - if (fill_entry_new_sha1(entry) < 0) { - ret = -1; - goto out; - } + entry.oid = oid; + if (fill_entry_new_sha1(&entry) < 0) { + ret = -1; + goto out; } - strbuf_add(&buf, &entry->raw, sizeof(struct trunk_entry)); - - undirty_trunk_entry(entry); + strbuf_add(&buf, &entry, sizeof(struct trunk_entry)); } if (sha1_file_write((void *)buf.buf, buf.len, outsha1) < 0) { @@ -368,6 +121,8 @@ int trunk_file_write_user(unsigned char *outsha1) } dprintf("trunk sha1: %s\n", sha1_to_hex(outsha1)); out: + object_nr = 0; + closedir(dir); strbuf_release(&buf); return ret; } @@ -387,38 +142,3 @@ void *trunk_file_read(unsigned char *sha1, struct sha1_file_hdr *outhdr) return buffer; } - -int trunk_update_entry(uint64_t oid) -{ - struct trunk_entry_incore *entry; - - entry = lookup_trunk_entry(oid, 1); - if (!trunk_entry_is_dirty(entry)) - dirty_trunk_entry(entry); - - return 0; -} - -void trunk_put_entry(uint64_t oid) -{ - struct trunk_entry_incore *entry; - - entry = lookup_trunk_entry(oid, 0); - if (entry) - put_entry(entry); -} - -void trunk_get_entry(uint64_t oid) -{ - lookup_trunk_entry(oid, 1); -} - -void trunk_reset(void) -{ - struct trunk_entry_incore *entry, *t; - list_for_each_entry_safe(entry, t, &trunk_active_list, active_list) { - put_entry(entry); - } - eprintf("%s\n", trunk_entry_active_nr ? "WARN: active_list not clean" : - "clean"); -} diff --git a/sheep/plain_store.c b/sheep/plain_store.c index 324a139..ecf8304 100644 --- a/sheep/plain_store.c +++ b/sheep/plain_store.c @@ -36,7 +36,7 @@ static int get_stale_obj_path(uint64_t oid, char *path) return sprintf(path, "%s/%016"PRIx64, stale_dir, oid); } -static int for_each_objects(int (*func)(uint64_t oid)) +int for_each_object_in_wd(int (*func)(uint64_t oid)) { DIR *dir; struct dirent *d; @@ -199,7 +199,7 @@ int default_init(char *p) /* When we start up, the objects in .stale is useless */ default_cleanup(); - return for_each_objects(init_objlist_and_vdi_bitmap); + return for_each_object_in_wd(init_objlist_and_vdi_bitmap); } static int default_read_from_path(uint64_t oid, char *path, @@ -353,7 +353,7 @@ int default_end_recover(uint32_t old_epoch, struct vnode_info *old_vnode_info) if (old_epoch == 0) return SD_RES_SUCCESS; - return for_each_objects(move_object_to_stale_dir); + return for_each_object_in_wd(move_object_to_stale_dir); } int default_format(char *name) @@ -396,7 +396,7 @@ int default_remove_object(uint64_t oid) int default_purge_obj(void) { - return for_each_objects(default_remove_object); + return for_each_object_in_wd(default_remove_object); } struct store_driver plain_store = { diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 7a54268..92ccdd2 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -170,6 +170,7 @@ int default_cleanup(void); int default_format(char *name); int default_remove_object(uint64_t oid); int default_purge_obj(void); +int for_each_object_in_wd(int (*func)(uint64_t oid)); extern struct list_head store_drivers; #define add_store_driver(driver) \ -- 1.7.10.2 |