[Sheepdog] [PATCH] object cache: incorrect lock may lead to update lost
Yunkai Zhang
yunkai.me at gmail.com
Sat Apr 14 15:35:17 CEST 2012
Currently, we use a dirty_rb tree to record which cache object
have been updated.
Two kinds of threads will operate this dirty tree concurrently:
a. mulitple io-workers: write something to cache objects
b. one flush-worker: flush all updates to cluster
In the following scenario, update will be lost:
flush-worker one io-worker
[object_cache_push] [object_cache_rw]
|-(1) get a cache object from dirty tree |
|-(2) read the data file of this object |
| modify data file of this object (3)-|
|-(4) forward_write_obj_req() |
| add this object to dirty tree (5)-|
|-(6) rb_erase: remove this object from dirty tree |
Note: io-worker generate *new update* in step (3), but flush-worker
remove this cache object from dirty tree in step (6).
I use two dirty trees to fix this bug and avoid heavy lock between
flush-worker and io-wroker threads.
There is only one *active* dirty tree for io-workers in any time.
After io-worker modify something, it operate this active dirty tree.
When flush-worker want to flush all updates to cluster, it:
1. mark another tree as *active* dirty tree.
2. get update info from *unactive* dirty tree.
3. if something wrong occur in flushing process,
merge two dirty trees into active drity tree.
Signed-off-by: Yunkai Zhang <qiushu.zyk at taobao.com>
---
sheep/object_cache.c | 73 +++++++++++++++++++++++++++++++++++++++++++-------
sheep/sheep_priv.h | 9 +++++-
2 files changed, 70 insertions(+), 12 deletions(-)
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index df9ab49..9756fb6 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -127,9 +127,16 @@ not_found:
cache = xzalloc(sizeof(*cache));
cache->vid = vid;
create_dir_for(vid);
- cache->dirty_rb = RB_ROOT;
+
+ cache->dirty_trees[0] = RB_ROOT;
+ cache->dirty_trees[1] = RB_ROOT;
+ cache->active_dirty_tree = &cache->dirty_trees[0];
+
+ INIT_LIST_HEAD(&cache->dirty_lists[0]);
+ INIT_LIST_HEAD(&cache->dirty_lists[1]);
+ cache->active_dirty_list = &cache->dirty_lists[0];
+
pthread_mutex_init(&cache->lock, NULL);
- INIT_LIST_HEAD(&cache->dirty_list);
hlist_add_head(&cache->hash, head);
} else
cache = NULL;
@@ -144,15 +151,50 @@ static void add_to_dirty_tree_and_list(struct object_cache *oc, uint32_t idx, in
entry->idx = idx;
pthread_mutex_lock(&oc->lock);
- if (!dirty_tree_insert(&oc->dirty_rb, entry)) {
+ if (!dirty_tree_insert(oc->active_dirty_tree, entry)) {
if (create)
entry->create = 1;
- list_add(&entry->list, &oc->dirty_list);
+ list_add(&entry->list, oc->active_dirty_list);
} else
free(entry);
pthread_mutex_unlock(&oc->lock);
}
+static void active_dirty_tree_and_list_switch(struct object_cache *oc,
+ struct rb_root ** prev_dirty_tree,
+ struct list_head **prev_dirty_list)
+{
+ pthread_mutex_lock(&oc->lock);
+
+ *prev_dirty_list = oc->active_dirty_list;
+ *prev_dirty_tree = oc->active_dirty_tree;
+
+ if (oc->active_dirty_tree == &oc->dirty_trees[0]) {
+ oc->active_dirty_list = &oc->dirty_lists[1];
+ oc->active_dirty_tree = &oc->dirty_trees[1];
+ } else {
+ oc->active_dirty_list = &oc->dirty_lists[0];
+ oc->active_dirty_tree = &oc->dirty_trees[0];
+ }
+
+ pthread_mutex_unlock(&oc->lock);
+}
+
+static void active_dirty_tree_merge(struct object_cache *oc,
+ struct rb_root *prev_dirty_tree,
+ struct list_head *prev_dirty_list)
+{
+ struct object_cache_entry *entry, *t;
+
+ pthread_mutex_lock(&oc->lock);
+ list_for_each_entry_safe(entry, t, prev_dirty_list, list) {
+ dirty_tree_insert(oc->active_dirty_tree, entry);
+ rb_erase(&entry->rb, prev_dirty_tree);
+ list_del(&entry->list);
+ }
+ pthread_mutex_unlock(&oc->lock);
+}
+
int object_cache_lookup(struct object_cache *oc, uint32_t idx, int create)
{
struct strbuf buf;
@@ -464,23 +506,34 @@ out:
int object_cache_push(struct object_cache *oc)
{
struct object_cache_entry *entry, *t;
+ struct rb_root *prev_dirty_tree;
+ struct list_head *prev_dirty_list;
int ret = SD_RES_SUCCESS;
if (node_in_recovery())
/* We don't do flushing in recovery */
return SD_RES_SUCCESS;
- list_for_each_entry_safe(entry, t, &oc->dirty_list, list) {
+ active_dirty_tree_and_list_switch(oc,
+ &prev_dirty_tree,
+ &prev_dirty_list);
+
+ /* Only one flush worker thread would operate prev_dirty_list
+ * and prev_dirty_tree, needn't to use lock to protect them */
+ list_for_each_entry_safe(entry, t, prev_dirty_list, list) {
ret = push_cache_object(oc->vid, entry->idx, entry->create);
- if (ret != SD_RES_SUCCESS)
+ if (ret != SD_RES_SUCCESS) {
+ active_dirty_tree_merge(oc,
+ prev_dirty_tree,
+ prev_dirty_list);
goto out;
- pthread_mutex_lock(&oc->lock);
- rb_erase(&entry->rb, &oc->dirty_rb);
+ }
+ rb_erase(&entry->rb, prev_dirty_tree);
list_del(&entry->list);
- pthread_mutex_unlock(&oc->lock);
free(entry);
}
out:
+ assert(list_empty(prev_dirty_list));
return ret;
}
@@ -518,7 +571,7 @@ void object_cache_delete(uint32_t vid)
hlist_del(&cache->hash);
pthread_mutex_unlock(&hashtable_lock[h]);
- list_for_each_entry_safe(entry, t, &cache->dirty_list, list) {
+ list_for_each_entry_safe(entry, t, cache->active_dirty_list, list) {
free(entry);
}
free(cache);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index a9e8440..8e982cf 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -410,9 +410,14 @@ static inline int sys_can_halt(void)
struct object_cache {
uint32_t vid;
- struct list_head dirty_list;
struct hlist_node hash;
- struct rb_root dirty_rb;
+
+ struct list_head dirty_lists[2];
+ struct list_head *active_dirty_list;
+
+ struct rb_root dirty_trees[2];
+ struct rb_root *active_dirty_tree;
+
pthread_mutex_t lock;
};
--
1.7.7.6
More information about the sheepdog
mailing list