[sheepdog] [PATCH v2 3/4] object cache: clean up codes and introduce cache lock helpers
Liu Yuan
namei.unix at gmail.com
Sun Jan 27 11:33:19 CET 2013
From: Liu Yuan <tailai.ly at taobao.com>
- refine {get,put}_cache_entry helpers
- clean up mis-aligned lines
- use stack for path operation for create_dir_for()
This is a prepare patch for 'optimize push phase of dirty objects'.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/object_cache.c | 162 +++++++++++++++++++++++++++++---------------------
1 file changed, 95 insertions(+), 67 deletions(-)
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index 1ae8820..927e3f3 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -130,6 +130,48 @@ static uint64_t calc_object_bmap(size_t len, off_t offset)
return (uint64_t)bmap;
}
+static inline void get_cache_entry(struct object_cache_entry *entry)
+{
+ uatomic_inc(&entry->refcnt);
+}
+
+static inline void put_cache_entry(struct object_cache_entry *entry)
+{
+ uatomic_dec(&entry->refcnt);
+}
+
+static inline bool entry_in_use(struct object_cache_entry *entry)
+{
+ return uatomic_read(&entry->refcnt) > 0;
+}
+
+/*
+ * Mutual exclusive protection strategy:
+ *
+ * reader and writer: no need to project since it is okay to read
+ * unacked stale data.
+ * reader, writer and pusher: cache lock and entry lock and refcnt.
+ * reader, writer and reclaimer: cache lock and entry refcnt.
+ * pusher and reclaimer: cache lock and entry refcnt.
+ *
+ * entry->bmap is projected by mostly entry lock, sometimes cache lock.
+ * dirty list is projected by cache lock.
+ */
+static inline void read_lock_cache(struct object_cache *oc)
+{
+ pthread_rwlock_rdlock(&oc->lock);
+}
+
+static inline void write_lock_cache(struct object_cache *oc)
+{
+ pthread_rwlock_wrlock(&oc->lock);
+}
+
+static inline void unlock_cache(struct object_cache *oc)
+{
+ pthread_rwlock_unlock(&oc->lock);
+}
+
static struct object_cache_entry *
lru_tree_insert(struct rb_root *root, struct object_cache_entry *new)
{
@@ -158,7 +200,7 @@ lru_tree_insert(struct rb_root *root, struct object_cache_entry *new)
}
static struct object_cache_entry *lru_tree_search(struct rb_root *root,
- uint32_t idx)
+ uint32_t idx)
{
struct rb_node *n = root->rb_node;
struct object_cache_entry *t;
@@ -178,10 +220,11 @@ static struct object_cache_entry *lru_tree_search(struct rb_root *root,
}
static inline void
-free_cache_entry(struct object_cache_entry *entry,
- struct rb_root *lru_tree)
+free_cache_entry(struct object_cache_entry *entry)
{
- rb_erase(&entry->node, lru_tree);
+ struct object_cache *oc = entry->oc;
+
+ rb_erase(&entry->node, &oc->lru_tree);
list_del_init(&entry->lru_list);
if (!list_empty(&entry->dirty_list))
list_del_init(&entry->dirty_list);
@@ -203,8 +246,7 @@ static int remove_cache_object(struct object_cache *oc, uint32_t idx)
sprintf(path, "%s/%06"PRIx32"/%08"PRIx32, object_cache_dir,
oc->vid, idx);
- sd_dprintf("removing cache object %"PRIx64"\n",
- idx_to_oid(oc->vid, idx));
+ sd_dprintf("%"PRIx64"\n", idx_to_oid(oc->vid, idx));
if (unlink(path) < 0) {
sd_eprintf("failed to remove cached object %m\n");
if (errno == ENOENT)
@@ -239,7 +281,7 @@ static int read_cache_object_noupdate(uint32_t vid, uint32_t idx, void *buf,
if (size != count) {
sd_eprintf("size %zu, count:%zu, offset %jd %m\n",
- size, count, (intmax_t)offset);
+ size, count, (intmax_t)offset);
ret = SD_RES_EIO;
goto out_close;
}
@@ -272,7 +314,7 @@ static int write_cache_object_noupdate(uint32_t vid, uint32_t idx, void *buf,
if (size != count) {
sd_eprintf("size %zu, count:%zu, offset %jd %m\n",
- size, count, (intmax_t)offset);
+ size, count, (intmax_t)offset);
ret = SD_RES_EIO;
goto out_close;
}
@@ -293,9 +335,9 @@ static int read_cache_object(struct object_cache_entry *entry, void *buf,
ret = read_cache_object_noupdate(vid, idx, buf, count, offset);
if (ret == SD_RES_SUCCESS) {
- pthread_rwlock_wrlock(&oc->lock);
+ write_lock_cache(oc);
list_move_tail(&entry->lru_list, &oc->lru_head);
- pthread_rwlock_unlock(&oc->lock);
+ unlock_cache(oc);
}
return ret;
}
@@ -313,16 +355,14 @@ static int write_cache_object(struct object_cache_entry *entry, void *buf,
ret = write_cache_object_noupdate(vid, idx, buf, count, offset);
if (ret != SD_RES_SUCCESS)
return ret;
-
- pthread_rwlock_wrlock(&oc->lock);
+ write_lock_cache(oc);
if (writeback) {
entry->bmap |= calc_object_bmap(count, offset);
if (list_empty(&entry->dirty_list))
- list_add_tail(&entry->dirty_list,
- &oc->dirty_head);
+ list_add_tail(&entry->dirty_list, &oc->dirty_head);
}
list_move_tail(&entry->lru_list, &oc->lru_head);
- pthread_rwlock_unlock(&oc->lock);
+ unlock_cache(oc);
if (writeback)
goto out;
@@ -340,7 +380,7 @@ static int write_cache_object(struct object_cache_entry *entry, void *buf,
ret = exec_local_req(&hdr, buf);
if (ret != SD_RES_SUCCESS) {
sd_eprintf("failed to write object %" PRIx64 ", %x\n",
- oid, ret);
+ oid, ret);
return ret;
}
out:
@@ -369,7 +409,7 @@ static int push_cache_object(uint32_t vid, uint32_t idx, uint64_t bmap,
last_bit = fls64(bmap) - 1;
sd_dprintf("bmap:0x%"PRIx64", first_bit:%d, last_bit:%d\n",
- bmap, first_bit, last_bit);
+ bmap, first_bit, last_bit);
offset = first_bit * CACHE_BLOCK_SIZE;
data_length = (last_bit - first_bit + 1) * CACHE_BLOCK_SIZE;
@@ -422,34 +462,27 @@ static void do_reclaim_object(struct object_cache *oc)
uint64_t oid;
uint32_t cap;
- pthread_rwlock_wrlock(&oc->lock);
+ write_lock_cache(oc);
list_for_each_entry_safe(entry, t, &oc->lru_head, lru_list) {
oid = idx_to_oid(oc->vid, entry_idx(entry));
- if (uatomic_read(&entry->refcnt) > 0) {
- sd_dprintf("%"PRIx64" is in operation, skip...\n", oid);
+ if (entry_in_use(entry)) {
+ sd_dprintf("%"PRIx64" is in use, skip...\n", oid);
continue;
}
if (entry_is_dirty(entry)) {
sd_dprintf("%"PRIx64" is dirty, skip...\n", oid);
continue;
}
- if (remove_cache_object(oc, entry_idx(entry))
- != SD_RES_SUCCESS)
+ if (remove_cache_object(oc, entry_idx(entry)) != SD_RES_SUCCESS)
continue;
- free_cache_entry(entry, &oc->lru_tree);
+ free_cache_entry(entry);
cap = uatomic_sub_return(&gcache.capacity, CACHE_OBJECT_SIZE);
sd_dprintf("%"PRIx64" reclaimed. capacity:%"PRId32"\n",
- oid, cap);
+ oid, cap);
if (cap <= HIGH_WATERMARK)
break;
}
- pthread_rwlock_unlock(&oc->lock);
- /*
- * Reclaimer grabs a write lock, which will blocks all the IO thread of
- * this VDI. We call pthread_yield() to expect that other threads can
- * grab the lock more often.
- */
- pthread_yield();
+ unlock_cache(oc);
}
struct reclaim_work {
@@ -481,7 +514,7 @@ static void do_reclaim(struct work *work)
if (cap <= HIGH_WATERMARK) {
pthread_rwlock_unlock(&hashtable_lock[idx]);
sd_dprintf("complete, capacity %"PRIu32"\n",
- cap);
+ cap);
return;
}
}
@@ -500,18 +533,16 @@ static void reclaim_done(struct work *work)
static int create_dir_for(uint32_t vid)
{
int ret = 0;
- struct strbuf buf = STRBUF_INIT;
+ char p[PATH_MAX];
- strbuf_addstr(&buf, object_cache_dir);
- strbuf_addf(&buf, "/%06"PRIx32, vid);
- if (mkdir(buf.buf, def_dmode) < 0)
+ snprintf(p, sizeof(p), "%s/%06"PRIx32, object_cache_dir, vid);
+ if (mkdir(p, def_dmode) < 0)
if (errno != EEXIST) {
- sd_eprintf("%s, %m\n", buf.buf);
+ sd_eprintf("%s, %m\n", p);
ret = -1;
goto err;
}
err:
- strbuf_release(&buf);
return ret;
}
@@ -595,16 +626,18 @@ static void add_to_lru_cache(struct object_cache *oc, uint32_t idx, bool create)
sd_dprintf("oid %"PRIx64" added\n", idx_to_oid(oc->vid, idx));
- pthread_rwlock_wrlock(&oc->lock);
- assert(!lru_tree_insert(&oc->lru_tree, entry));
+ write_lock_cache(oc);
+ if (lru_tree_insert(&oc->lru_tree, entry))
+ panic("the object already exist\n");
uatomic_add(&gcache.capacity, CACHE_OBJECT_SIZE);
list_add_tail(&entry->lru_list, &oc->lru_head);
if (create) {
+ /* Cache lock assure it is not raced with pusher */
entry->bmap = UINT64_MAX;
entry->idx |= CACHE_CREATE_BIT;
- list_add(&entry->dirty_list, &oc->dirty_head);
+ list_add_tail(&entry->dirty_list, &oc->dirty_head);
}
- pthread_rwlock_unlock(&oc->lock);
+ unlock_cache(oc);
}
static inline int lookup_path(char *path)
@@ -688,7 +721,7 @@ static int create_cache_object(struct object_cache *oc, uint32_t idx,
if (ret != buf_size) {
ret = SD_RES_EIO;
sd_eprintf("failed, vid %"PRIx32", idx %"PRIx32"\n",
- oc->vid, idx);
+ oc->vid, idx);
goto out_close;
}
/* This is intended to take care of partial write due to crash */
@@ -765,7 +798,7 @@ static int object_cache_push(struct object_cache *oc)
int ret = SD_RES_SUCCESS;
- pthread_rwlock_wrlock(&oc->lock);
+ write_lock_cache(oc);
list_for_each_entry_safe(entry, t, &oc->dirty_head, dirty_list) {
ret = push_cache_object(oc->vid, entry_idx(entry), entry->bmap,
!!(entry->idx & CACHE_CREATE_BIT));
@@ -776,7 +809,7 @@ static int object_cache_push(struct object_cache *oc)
list_del_init(&entry->dirty_list);
}
push_failed:
- pthread_rwlock_unlock(&oc->lock);
+ unlock_cache(oc);
return ret;
}
@@ -809,12 +842,12 @@ void object_cache_delete(uint32_t vid)
hlist_del(&cache->hash);
pthread_rwlock_unlock(&hashtable_lock[h]);
- pthread_rwlock_wrlock(&cache->lock);
+ write_lock_cache(cache);
list_for_each_entry_safe(entry, t, &cache->lru_head, lru_list) {
- free_cache_entry(entry, &cache->lru_tree);
+ free_cache_entry(entry);
uatomic_sub(&gcache.capacity, CACHE_OBJECT_SIZE);
}
- pthread_rwlock_unlock(&cache->lock);
+ unlock_cache(cache);
pthread_rwlock_destroy(&cache->lock);
free(cache);
@@ -824,27 +857,22 @@ void object_cache_delete(uint32_t vid)
}
static struct object_cache_entry *
-get_cache_entry(struct object_cache *cache, uint32_t idx)
+get_cache_entry_from(struct object_cache *cache, uint32_t idx)
{
struct object_cache_entry *entry;
- pthread_rwlock_rdlock(&cache->lock);
+ read_lock_cache(cache);
entry = lru_tree_search(&cache->lru_tree, idx);
if (!entry) {
/* The cache entry may be reclaimed, so try again. */
- pthread_rwlock_unlock(&cache->lock);
+ unlock_cache(cache);
return NULL;
}
- uatomic_inc(&entry->refcnt);
- pthread_rwlock_unlock(&cache->lock);
+ get_cache_entry(entry);
+ unlock_cache(cache);
return entry;
}
-static void put_cache_entry(struct object_cache_entry *entry)
-{
- uatomic_dec(&entry->refcnt);
-}
-
static int object_cache_flush_and_delete(struct object_cache *oc)
{
DIR *dir;
@@ -878,9 +906,9 @@ static int object_cache_flush_and_delete(struct object_cache *oc)
if (idx == ULLONG_MAX)
continue;
if (push_cache_object(vid, idx, all, true) !=
- SD_RES_SUCCESS) {
+ SD_RES_SUCCESS) {
sd_dprintf("failed to push %"PRIx64"\n",
- idx_to_oid(vid, idx));
+ idx_to_oid(vid, idx));
ret = -1;
goto out_close_dir;
}
@@ -939,7 +967,7 @@ int object_cache_handle_request(struct request *req)
bool create = false;
sd_dprintf("%08"PRIx32", len %"PRIu32", off %"PRIu64"\n", idx,
- hdr->data_length, hdr->obj.offset);
+ hdr->data_length, hdr->obj.offset);
cache = find_object_cache(vid, true);
@@ -958,7 +986,7 @@ retry:
return ret;
}
- entry = get_cache_entry(cache, idx);
+ entry = get_cache_entry_from(cache, idx);
if (!entry) {
sd_dprintf("retry oid %"PRIx64"\n", oid);
/*
@@ -999,7 +1027,7 @@ int object_cache_write(uint64_t oid, char *data, unsigned int datalen,
sd_dprintf("%" PRIx64 "\n", oid);
cache = find_object_cache(vid, false);
- entry = get_cache_entry(cache, idx);
+ entry = get_cache_entry_from(cache, idx);
if (!entry) {
sd_dprintf("%" PRIx64 " doesn't exist\n", oid);
return SD_RES_NO_CACHE;
@@ -1021,7 +1049,7 @@ int object_cache_read(uint64_t oid, char *data, unsigned int datalen,
sd_dprintf("%" PRIx64 "\n", oid);
cache = find_object_cache(vid, false);
- entry = get_cache_entry(cache, idx);
+ entry = get_cache_entry_from(cache, idx);
if (!entry) {
sd_dprintf("%" PRIx64 " doesn't exist\n", oid);
return SD_RES_NO_CACHE;
@@ -1070,13 +1098,13 @@ void object_cache_remove(uint64_t oid)
if (!oc)
return;
- pthread_rwlock_wrlock(&oc->lock);
+ write_lock_cache(oc);
entry = lru_tree_search(&oc->lru_tree, idx);
if (!entry)
goto out;
- free_cache_entry(entry, &oc->lru_tree);
+ free_cache_entry(entry);
out:
- pthread_rwlock_unlock(&oc->lock);
+ unlock_cache(oc);
return;
}
--
1.7.9.5
More information about the sheepdog
mailing list