[sheepdog] [PATCH 1/6] object cache: drop dirty tree
Liu Yuan
namei.unix at gmail.com
Mon Jan 21 13:32:26 CET 2013
From: Liu Yuan <tailai.ly at taobao.com>
We don't need it because we can rely object tree to find cache entry. And we
don't search dirty tree.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/object_cache.c | 123 ++++++++++----------------------------------------
1 file changed, 25 insertions(+), 98 deletions(-)
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index 8e637da..270cf76 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -59,7 +59,6 @@ struct object_cache_entry {
uint64_t bmap; /* each bit represents one dirty block in object */
struct object_cache *oc;
struct rb_node node;
- struct rb_node dirty_node;
struct list_head dirty_list;
struct list_head object_list;
struct cds_list_head lru_list;
@@ -71,7 +70,6 @@ struct object_cache {
struct list_head dirty_list;
struct list_head object_list;
- struct rb_root dirty_tree;
struct rb_root object_tree;
pthread_rwlock_t lock;
@@ -182,20 +180,15 @@ static struct object_cache_entry *object_tree_search(struct rb_root *root,
}
static inline void
-del_from_dirty_tree_and_list(struct object_cache_entry *entry,
- struct rb_root *dirty_tree)
-{
- rb_erase(&entry->dirty_node, dirty_tree);
- list_del_init(&entry->dirty_list);
-}
-
-static inline void
del_from_object_tree_and_list(struct object_cache_entry *entry,
struct rb_root *object_tree)
{
rb_erase(&entry->node, object_tree);
list_del_init(&entry->object_list);
cds_list_del_rcu(&entry->lru_list);
+ if (!list_empty(&entry->dirty_list))
+ list_del_init(&entry->dirty_list);
+ free(entry);
}
static uint64_t idx_to_oid(uint32_t vid, uint32_t idx)
@@ -214,74 +207,22 @@ static int remove_cache_object(struct object_cache *oc, uint32_t idx)
sprintf(path, "%s/%06"PRIx32"/%08"PRIx32, cache_dir, oc->vid, idx);
dprintf("removing cache object %"PRIx64"\n", idx_to_oid(oc->vid, idx));
if (unlink(path) < 0) {
- ret = SD_RES_EIO;
eprintf("failed to remove cached object %m\n");
+ if (errno == ENOENT)
+ return SD_RES_SUCCESS;
+ ret = SD_RES_EIO;
goto out;
}
out:
return ret;
}
-static struct object_cache_entry *
-dirty_tree_and_list_insert(struct object_cache *oc, uint32_t idx,
- uint64_t bmap, bool create)
-{
- struct rb_node **p = &oc->dirty_tree.rb_node;
- struct rb_node *parent = NULL;
- struct object_cache_entry *entry;
-
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct object_cache_entry, dirty_node);
-
- if (idx < entry_idx(entry))
- p = &(*p)->rb_left;
- else if (idx > entry_idx(entry))
- p = &(*p)->rb_right;
- else {
- /* already has this entry, merge bmap */
- entry->bmap |= bmap;
- return entry;
- }
- }
-
- entry = object_tree_search(&oc->object_tree, idx);
- if (!entry)
- panic("Can not find object entry %" PRIx32 "\n", idx);
-
- entry->bmap |= bmap;
- if (create)
- entry->idx |= CACHE_CREATE_BIT;
- rb_link_node(&entry->dirty_node, parent, p);
- rb_insert_color(&entry->dirty_node, &oc->dirty_tree);
- list_add(&entry->dirty_list, &oc->dirty_list);
-
- return entry;
-}
-
static inline void lru_move_entry(struct object_cache_entry *entry)
{
cds_list_del_rcu(&entry->lru_list);
cds_list_add_rcu(&entry->lru_list, &sys_cache.cache_lru_list);
}
-static inline void update_cache_entry(struct object_cache_entry *entry,
- uint32_t idx, size_t datalen,
- off_t offset, int dirty)
-{
- struct object_cache *oc = entry->oc;
-
- if (dirty) {
- uint64_t bmap = calc_object_bmap(datalen, offset);
-
- pthread_rwlock_wrlock(&oc->lock);
- dirty_tree_and_list_insert(oc, idx, bmap, false);
- pthread_rwlock_unlock(&oc->lock);
- }
-
- lru_move_entry(entry);
-}
-
static int read_cache_object_noupdate(uint32_t vid, uint32_t idx, void *buf,
size_t count, off_t offset)
{
@@ -358,7 +299,7 @@ static int read_cache_object(struct object_cache_entry *entry, void *buf,
ret = read_cache_object_noupdate(vid, idx, buf, count, offset);
if (ret == SD_RES_SUCCESS)
- update_cache_entry(entry, idx, count, offset, 0);
+ lru_move_entry(entry);
return ret;
}
@@ -372,10 +313,17 @@ static int write_cache_object(struct object_cache_entry *entry, void *buf,
int ret;
ret = write_cache_object_noupdate(vid, idx, buf, count, offset);
-
+ if (ret == SD_RES_SUCCESS && writeback) {
+ pthread_rwlock_wrlock(&entry->oc->lock);
+ entry->bmap |= calc_object_bmap(count, offset);
+ if (list_empty(&entry->dirty_list))
+ list_add(&entry->dirty_list, &entry->oc->dirty_list);
+ pthread_rwlock_unlock(&entry->oc->lock);
+ }
if (ret != SD_RES_SUCCESS)
return ret;
+ lru_move_entry(entry);
if (writeback)
goto out;
@@ -395,7 +343,6 @@ static int write_cache_object(struct object_cache_entry *entry, void *buf,
return ret;
}
out:
- update_cache_entry(entry, idx, count, offset, writeback);
return ret;
}
@@ -454,7 +401,6 @@ static int push_cache_object(uint32_t vid, uint32_t idx, uint64_t bmap,
ret = exec_local_req(&hdr, buf);
if (ret != SD_RES_SUCCESS)
eprintf("failed to push object %x\n", ret);
-
out:
free(buf);
return ret;
@@ -474,9 +420,8 @@ static int do_reclaim_object(struct object_cache_entry *entry)
uint64_t oid;
int ret = 0;
- pthread_rwlock_wrlock(&oc->lock);
-
oid = idx_to_oid(oc->vid, entry_idx(entry));
+ pthread_rwlock_wrlock(&oc->lock);
if (uatomic_read(&entry->refcnt) > 0) {
dprintf("%"PRIx64" is in operation, skip...\n", oid);
ret = -1;
@@ -529,7 +474,6 @@ static void do_reclaim(struct work *work)
data_length = data_length / 1024 / 1024;
uatomic_sub(&sys_cache.cache_size, data_length);
- free(entry);
}
dprintf("cache reclaim complete\n");
@@ -581,7 +525,6 @@ not_found:
cache->object_tree = RB_ROOT;
create_dir_for(vid);
- cache->dirty_tree = RB_ROOT;
INIT_LIST_HEAD(&cache->dirty_list);
INIT_LIST_HEAD(&cache->object_list);
@@ -616,7 +559,7 @@ void object_cache_try_to_reclaim(void)
}
static void add_to_object_cache(struct object_cache *oc, uint32_t idx,
- bool create)
+ bool writeback)
{
struct object_cache_entry *entry, *old;
uint32_t data_length;
@@ -645,9 +588,10 @@ static void add_to_object_cache(struct object_cache *oc, uint32_t idx,
free(entry);
entry = old;
}
- if (create) {
- uint64_t all = UINT64_MAX;
- dirty_tree_and_list_insert(oc, idx, all, create);
+ if (writeback) {
+ entry->bmap = UINT64_MAX;
+ entry->idx |= CACHE_CREATE_BIT;
+ list_add(&entry->dirty_list, &oc->dirty_list);
}
pthread_rwlock_unlock(&oc->lock);
lru_move_entry(entry);
@@ -817,7 +761,7 @@ static int object_cache_push(struct object_cache *oc)
goto push_failed;
entry->idx &= ~CACHE_CREATE_BIT;
entry->bmap = 0;
- del_from_dirty_tree_and_list(entry, &oc->dirty_tree);
+ list_del_init(&entry->dirty_list);
}
push_failed:
pthread_rwlock_unlock(&oc->lock);
@@ -856,9 +800,6 @@ void object_cache_delete(uint32_t vid)
pthread_rwlock_wrlock(&cache->lock);
list_for_each_entry_safe(entry, t, &cache->object_list, object_list) {
del_from_object_tree_and_list(entry, &cache->object_tree);
- if (!list_empty(&entry->dirty_list))
- del_from_dirty_tree_and_list(entry, &cache->dirty_tree);
- free(entry);
}
pthread_rwlock_unlock(&cache->lock);
free(cache);
@@ -880,16 +821,14 @@ get_cache_entry(struct object_cache *cache, uint32_t idx)
pthread_rwlock_unlock(&cache->lock);
return NULL;
}
-
uatomic_inc(&entry->refcnt);
pthread_rwlock_unlock(&cache->lock);
-
return entry;
}
static void put_cache_entry(struct object_cache_entry *entry)
{
- uatomic_dec(&entry->refcnt);
+ uatomic_dec(&entry->refcnt);
}
static int object_cache_flush_and_delete(struct object_cache *oc)
@@ -993,7 +932,6 @@ int object_cache_handle_request(struct request *req)
if (req->rq.opcode == SD_OP_CREATE_AND_WRITE_OBJ)
create = true;
-
retry:
ret = object_cache_lookup(cache, idx, create,
hdr->flags & SD_FLAG_CMD_CACHE);
@@ -1043,10 +981,8 @@ int object_cache_write(uint64_t oid, char *data, unsigned int datalen,
struct object_cache_entry *entry;
int ret;
- cache = find_object_cache(vid, false);
-
dprintf("%" PRIx64 "\n", oid);
-
+ cache = find_object_cache(vid, false);
entry = get_cache_entry(cache, idx);
if (!entry) {
dprintf("%" PRIx64 " doesn't exist\n", oid);
@@ -1054,9 +990,7 @@ int object_cache_write(uint64_t oid, char *data, unsigned int datalen,
}
ret = write_cache_object(entry, data, datalen, offset, create, false);
-
put_cache_entry(entry);
-
return ret;
}
@@ -1069,10 +1003,8 @@ int object_cache_read(uint64_t oid, char *data, unsigned int datalen,
struct object_cache_entry *entry;
int ret;
- cache = find_object_cache(vid, false);
-
dprintf("%" PRIx64 "\n", oid);
-
+ cache = find_object_cache(vid, false);
entry = get_cache_entry(cache, idx);
if (!entry) {
dprintf("%" PRIx64 " doesn't exist\n", oid);
@@ -1080,9 +1012,7 @@ int object_cache_read(uint64_t oid, char *data, unsigned int datalen,
}
ret = read_cache_object(entry, data, datalen, offset);
-
put_cache_entry(entry);
-
return ret;
}
@@ -1128,10 +1058,7 @@ void object_cache_remove(uint64_t oid)
entry = object_tree_search(&oc->object_tree, idx);
if (!entry)
goto out;
- if (!list_empty(&entry->dirty_list))
- del_from_dirty_tree_and_list(entry, &oc->dirty_tree);
del_from_object_tree_and_list(entry, &oc->object_tree);
- free(entry);
out:
pthread_rwlock_unlock(&oc->lock);
return;
--
1.7.9.5
More information about the sheepdog
mailing list