[sheepdog] [PATCH 2/2] object cache: add lock helpers to clarify the lock order

Liu Yuan namei.unix at gmail.com
Fri Jan 25 15:14:19 CET 2013


From: Liu Yuan <tailai.ly at taobao.com>

Since we have more than one locks, we use helpers to explicitly state what kind
of lock we use to avoid casual mistakes.

- add lock project strategy to clarify the correctness.
- fix a nasty bug that we don't destroy cache lock

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/object_cache.c |  113 +++++++++++++++++++++++++++++++++-----------------
 1 file changed, 76 insertions(+), 37 deletions(-)

diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index 9491bcd..09fd65f 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -118,7 +118,7 @@ static inline bool idx_has_vdi_bit(uint32_t idx)
 	return !!(idx & CACHE_VDI_BIT);
 }
 
-static uint64_t calc_object_bmap(size_t len, off_t offset)
+static inline uint64_t calc_object_bmap(size_t len, off_t offset)
 {
 	int start, end, nr;
 	unsigned long bmap = 0;
@@ -133,6 +133,48 @@ static uint64_t calc_object_bmap(size_t len, off_t offset)
 	return (uint64_t)bmap;
 }
 
+/*
+ * Mutual exclusive protection strategy:
+ *
+ * reader and writer:          no need to project since it is okay to read
+ *                             unacked stale data.
+ * writer and async pusher:    entry lock.
+ * writer and sync pusher:     guaranteed by VM request sequence.
+ * writer and reclaimer:       entry refcnt.
+ * async and sync pusher:      cache lock and entry refcnt.
+ * async pusher and reclaimer: entry refcnt.
+ * sync pusher and reclaimer:  cache lock.
+ */
+static inline void read_lock_cache(struct object_cache *oc)
+{
+	pthread_rwlock_rdlock(&oc->lock);
+}
+
+static inline void write_lock_cache(struct object_cache *oc)
+{
+	pthread_rwlock_wrlock(&oc->lock);
+}
+
+static inline void unlock_cache(struct object_cache *oc)
+{
+	pthread_rwlock_unlock(&oc->lock);
+}
+
+static inline void read_lock_entry(struct object_cache_entry *entry)
+{
+	pthread_rwlock_rdlock(&entry->lock);
+}
+
+static inline void write_lock_entry(struct object_cache_entry *entry)
+{
+	pthread_rwlock_wrlock(&entry->lock);
+}
+
+static inline void unlock_entry(struct object_cache_entry *entry)
+{
+	pthread_rwlock_unlock(&entry->lock);
+}
+
 static struct object_cache_entry *
 lru_tree_insert(struct rb_root *root, struct object_cache_entry *new)
 {
@@ -298,9 +340,9 @@ static int read_cache_object(struct object_cache_entry *entry, void *buf,
 	ret = read_cache_object_noupdate(vid, idx, buf, count, offset);
 
 	if (ret == SD_RES_SUCCESS) {
-		pthread_rwlock_wrlock(&oc->lock);
+		write_lock_cache(oc);
 		list_move_tail(&entry->lru_list, &oc->lru_head);
-		pthread_rwlock_unlock(&oc->lock);
+		unlock_cache(oc);
 	}
 	return ret;
 }
@@ -385,29 +427,27 @@ static void do_async_push(struct work *work)
 
 	sd_dprintf("%"PRIx32"\n", oc->vid);
 next:
-	pthread_rwlock_wrlock(&oc->lock);
+	write_lock_cache(oc);
 	if (list_empty(&oc->dirty_head)) {
-	    pthread_rwlock_unlock(&oc->lock);
-	    return;
+		unlock_cache(oc);
+		return;
 	}
 	entry = list_first_entry(&oc->dirty_head, typeof(*entry), dirty_list);
 	uatomic_inc(&entry->refcnt);
 	uatomic_dec(&oc->dirty_count);
-	pthread_rwlock_unlock(&oc->lock);
-
-	pthread_rwlock_rdlock(&entry->lock);
+	unlock_cache(oc);
 
+	read_lock_entry(entry);
 	assert(push_cache_object(oc->vid, entry_idx(entry), entry->bmap,
 				 !!(entry->idx & CACHE_CREATE_BIT))
 	       == SD_RES_SUCCESS);
 	entry->idx &= ~CACHE_CREATE_BIT;
 	entry->bmap = 0;
 
-	pthread_rwlock_wrlock(&oc->lock);
+	write_lock_cache(oc);
 	list_del_init(&entry->dirty_list);
-	pthread_rwlock_unlock(&oc->lock);
-
-	pthread_rwlock_unlock(&entry->lock);
+	unlock_cache(oc);
+	unlock_entry(entry);
 
 	uatomic_dec(&entry->refcnt);
 	goto next;
@@ -453,23 +493,23 @@ static int write_cache_object(struct object_cache_entry *entry, void *buf,
 	struct sd_req hdr;
 	int ret;
 
-	pthread_rwlock_wrlock(&entry->lock);
+	write_lock_entry(entry);
 	ret = write_cache_object_noupdate(vid, idx, buf, count, offset);
 	if (ret != SD_RES_SUCCESS) {
-		pthread_rwlock_unlock(&entry->lock);
+		unlock_entry(entry);
 		return ret;
 	}
 
-	pthread_rwlock_wrlock(&oc->lock);
+	write_lock_cache(oc);
 	if (writeback) {
 		entry->bmap |= calc_object_bmap(count, offset);
 		if (list_empty(&entry->dirty_list))
 			add_to_dirty_list(entry);
 	}
 	list_move_tail(&entry->lru_list, &oc->lru_head);
-	pthread_rwlock_unlock(&oc->lock);
+	unlock_cache(oc);
+	unlock_entry(entry);
 
-	pthread_rwlock_unlock(&entry->lock);
 	if (writeback)
 		goto out;
 
@@ -513,7 +553,7 @@ static void do_reclaim_object(struct object_cache *oc)
 	uint64_t oid;
 	uint32_t cap;
 
-	pthread_rwlock_wrlock(&oc->lock);
+	write_lock_cache(oc);
 	list_for_each_entry_safe(entry, t, &oc->lru_head, lru_list) {
 		oid = idx_to_oid(oc->vid, entry_idx(entry));
 		if (uatomic_read(&entry->refcnt) > 0) {
@@ -534,7 +574,7 @@ static void do_reclaim_object(struct object_cache *oc)
 		if (cap <= HIGH_WATERMARK)
 			break;
 	}
-	pthread_rwlock_unlock(&oc->lock);
+	unlock_cache(oc);
 	/*
 	 * Reclaimer grabs a write lock, which will blocks all the IO thread of
 	 * this VDI. We call pthread_yield() to expect that other threads can
@@ -591,18 +631,16 @@ static void reclaim_done(struct work *work)
 static int create_dir_for(uint32_t vid)
 {
 	int ret = 0;
-	struct strbuf buf = STRBUF_INIT;
+	char p[PATH_MAX];
 
-	strbuf_addstr(&buf, object_cache_dir);
-	strbuf_addf(&buf, "/%06"PRIx32, vid);
-	if (mkdir(buf.buf, def_dmode) < 0)
+	sprintf(p, "%s/%06"PRIx32, object_cache_dir, vid);
+	if (mkdir(p, def_dmode) < 0)
 		if (errno != EEXIST) {
-			sd_eprintf("%s, %m\n", buf.buf);
+			sd_eprintf("%s, %m\n", p);
 			ret = -1;
 			goto err;
 		}
 err:
-	strbuf_release(&buf);
 	return ret;
 }
 
@@ -687,7 +725,7 @@ static void add_to_lru_cache(struct object_cache *oc, uint32_t idx, bool create)
 
 	sd_dprintf("oid %"PRIx64" added\n", idx_to_oid(oc->vid, idx));
 
-	pthread_rwlock_wrlock(&oc->lock);
+	write_lock_cache(oc);
 	assert(!lru_tree_insert(&oc->lru_tree, entry));
 	uatomic_add(&gcache.capacity, CACHE_OBJECT_SIZE);
 	list_add_tail(&entry->lru_list, &oc->lru_head);
@@ -696,7 +734,7 @@ static void add_to_lru_cache(struct object_cache *oc, uint32_t idx, bool create)
 		entry->idx |= CACHE_CREATE_BIT;
 		add_to_dirty_list(entry);
 	}
-	pthread_rwlock_unlock(&oc->lock);
+	unlock_cache(oc);
 }
 
 static inline int lookup_path(char *path)
@@ -859,7 +897,7 @@ static int object_cache_sync_push(struct object_cache *oc)
 	struct object_cache_entry *entry, *t;
 	int ret = SD_RES_SUCCESS;
 
-	pthread_rwlock_wrlock(&oc->lock);
+	write_lock_cache(oc);
 	list_for_each_entry_safe(entry, t, &oc->dirty_head, dirty_list) {
 		if (uatomic_read(&entry->refcnt) > 0)
 			/* async pusher has been handling it */
@@ -873,7 +911,7 @@ static int object_cache_sync_push(struct object_cache *oc)
 		uatomic_dec(&oc->dirty_count);
 		list_del_init(&entry->dirty_list);
 	}
-	pthread_rwlock_unlock(&oc->lock);
+	unlock_cache(oc);
 	return ret;
 }
 
@@ -906,12 +944,13 @@ void object_cache_delete(uint32_t vid)
 	hlist_del(&cache->hash);
 	pthread_rwlock_unlock(&hashtable_lock[h]);
 
-	pthread_rwlock_wrlock(&cache->lock);
+	write_lock_cache(cache);
 	list_for_each_entry_safe(entry, t, &cache->lru_head, lru_list) {
 		free_cache_entry(entry);
 		uatomic_sub(&gcache.capacity, CACHE_OBJECT_SIZE);
 	}
-	pthread_rwlock_unlock(&cache->lock);
+	unlock_cache(cache);
+	pthread_rwlock_destroy(&cache->lock);
 	free(cache);
 
 	/* Then we free disk */
@@ -924,15 +963,15 @@ get_cache_entry(struct object_cache *cache, uint32_t idx)
 {
 	struct object_cache_entry *entry;
 
-	pthread_rwlock_rdlock(&cache->lock);
+	read_lock_cache(cache);
 	entry = lru_tree_search(&cache->lru_tree, idx);
 	if (!entry) {
 		/* The cache entry may be reclaimed, so try again. */
-		pthread_rwlock_unlock(&cache->lock);
+		unlock_cache(cache);
 		return NULL;
 	}
 	uatomic_inc(&entry->refcnt);
-	pthread_rwlock_unlock(&cache->lock);
+	unlock_cache(cache);
 	return entry;
 }
 
@@ -1166,13 +1205,13 @@ void object_cache_remove(uint64_t oid)
 	if (!oc)
 		return;
 
-	pthread_rwlock_wrlock(&oc->lock);
+	write_lock_cache(oc);
 	entry = lru_tree_search(&oc->lru_tree, idx);
 	if (!entry)
 		goto out;
 	free_cache_entry(entry);
 out:
-	pthread_rwlock_unlock(&oc->lock);
+	unlock_cache(oc);
 	return;
 }
 
-- 
1.7.9.5




More information about the sheepdog mailing list