[sheepdog] [PATCH 1/2] sheep: move all object cache code into object_cache.c

Christoph Hellwig hch at infradead.org
Thu May 31 10:46:52 CEST 2012


Move all code handling the object cache into object_cache.c, and thus allow
keeping its implementation details private.  Also add a missing
sys->enable_write_cache for the SD_OP_FLUSH_DEL_CACHE command.

Signed-off-by: Christoph Hellwig <hch at lst.de>

---
 sheep/gateway.c      |   60 ------------
 sheep/object_cache.c |  246 +++++++++++++++++++++++++++++++++++++++++++++++++--
 sheep/ops.c          |   60 ------------
 sheep/sheep_priv.h   |   52 ++--------
 sheep/store.c        |   77 ---------------
 5 files changed, 255 insertions(+), 240 deletions(-)

Index: sheepdog/sheep/gateway.c
===================================================================
--- sheepdog.orig/sheep/gateway.c	2012-05-31 07:38:36.095994000 +0200
+++ sheepdog/sheep/gateway.c	2012-05-31 10:20:06.907901581 +0200
@@ -16,66 +16,6 @@
 #include "sheep_priv.h"
 
 
-static int bypass_object_cache(struct request *req)
-{
-	uint64_t oid = req->rq.obj.oid;
-
-	if (!(req->rq.flags & SD_FLAG_CMD_CACHE)) {
-		uint32_t vid = oid_to_vid(oid);
-		struct object_cache *cache;
-
-		cache = find_object_cache(vid, 0);
-		if (!cache)
-			return 1;
-		if (req->rq.flags & SD_FLAG_CMD_WRITE) {
-			object_cache_flush_and_delete(req->vnodes, cache);
-			return 1;
-		} else  {
-			/* For read requet, we can read cache if any */
-			uint32_t idx = data_oid_to_idx(oid);
-			if (is_vdi_obj(oid))
-				idx |= 1 << CACHE_VDI_SHIFT;
-
-			if (object_cache_lookup(cache, idx, 0) < 0)
-				return 1;
-			else
-				return 0;
-		}
-	}
-
-	/*
-	 * For vmstate && vdi_attr object, we don't do caching
-	 */
-	if (is_vmstate_obj(oid) || is_vdi_attr_obj(oid) ||
-	    req->rq.flags & SD_FLAG_CMD_COW)
-		return 1;
-	return 0;
-}
-
-static int object_cache_handle_request(struct request *req)
-{
-	uint64_t oid = req->rq.obj.oid;
-	uint32_t vid = oid_to_vid(oid);
-	uint32_t idx = data_oid_to_idx(oid);
-	struct object_cache *cache;
-	int ret, create = 0;
-
-	if (is_vdi_obj(oid))
-		idx |= 1 << CACHE_VDI_SHIFT;
-
-	cache = find_object_cache(vid, 1);
-
-	if (req->rq.opcode == SD_OP_CREATE_AND_WRITE_OBJ)
-		create = 1;
-
-	if (object_cache_lookup(cache, idx, create) < 0) {
-		ret = object_cache_pull(req->vnodes, cache, idx);
-		if (ret != SD_RES_SUCCESS)
-			return ret;
-	}
-	return object_cache_rw(cache, idx, req);
-}
-
 int forward_read_obj_req(struct request *req)
 {
 	int i, fd, ret = SD_RES_SUCCESS;
Index: sheepdog/sheep/object_cache.c
===================================================================
--- sheepdog.orig/sheep/object_cache.c	2012-05-31 09:55:53.619915443 +0200
+++ sheepdog/sheep/object_cache.c	2012-05-31 10:30:33.551895606 +0200
@@ -27,12 +27,51 @@
 #include "strbuf.h"
 #include "rbtree.h"
 
-#define HASH_BITS	5
-#define HASH_SIZE	(1 << HASH_BITS)
+/*
+ * Object Cache ID
+ *
+ *  0 - 19 (20 bits): data object space
+ *  20 - 27 (8 bits): reserved
+ *  28 - 31 (4 bits): object type indentifier space
+ */
+#define CACHE_VDI_SHIFT       31
+#define CACHE_VDI_BIT         (UINT32_C(1) << CACHE_VDI_SHIFT)
+#define CACHE_BLOCK_SIZE      ((UINT64_C(1) << 10) * 64) /* 64 KB */
+
+struct object_cache {
+	uint32_t vid;
+	struct hlist_node hash;
+
+	struct list_head dirty_lists[2];
+	struct list_head *active_dirty_list;
+
+	struct rb_root dirty_trees[2];
+	struct rb_root *active_dirty_tree;
+
+	pthread_mutex_t lock;
+};
+
+struct object_cache_entry {
+	uint32_t idx;
+	uint64_t bmap; /* each bit represents one dirty
+			* block which should be flushed */
+	struct rb_node rb;
+	struct list_head list;
+	int create;
+};
+
+struct flush_work {
+	struct object_cache *cache;
+	struct vnode_info *vnode_info;
+	struct work work;
+};
 
 static char cache_dir[PATH_MAX];
 static int def_open_flags = O_RDWR;
 
+#define HASH_BITS	5
+#define HASH_SIZE	(1 << HASH_BITS)
+
 static pthread_mutex_t hashtable_lock[HASH_SIZE] = { [0 ... HASH_SIZE - 1] = PTHREAD_MUTEX_INITIALIZER };
 static struct hlist_head cache_hashtable[HASH_SIZE];
 
@@ -122,7 +161,7 @@ err:
 	return ret;
 }
 
-struct object_cache *find_object_cache(uint32_t vid, int create)
+static struct object_cache *find_object_cache(uint32_t vid, int create)
 {
 	int h = hash(vid);
 	struct hlist_head *head = cache_hashtable + h;
@@ -219,7 +258,8 @@ static void merge_dirty_tree_and_list(st
 	pthread_mutex_unlock(&oc->lock);
 }
 
-int object_cache_lookup(struct object_cache *oc, uint32_t idx, int create)
+static int object_cache_lookup(struct object_cache *oc, uint32_t idx,
+		int create)
 {
 	struct strbuf buf;
 	int fd, ret = 0, flags = def_open_flags;
@@ -364,7 +404,8 @@ out:
 	return ret;
 }
 
-int object_cache_rw(struct object_cache *oc, uint32_t idx, struct request *req)
+static int object_cache_rw(struct object_cache *oc, uint32_t idx,
+		struct request *req)
 {
 	struct sd_req *hdr = &req->rq;
 	uint64_t bmap = 0;
@@ -446,7 +487,7 @@ out:
 }
 
 /* Fetch the object, cache it in success */
-int object_cache_pull(struct vnode_info *vnodes, struct object_cache *oc,
+static int object_cache_pull(struct vnode_info *vnodes, struct object_cache *oc,
 		      uint32_t idx)
 {
 	struct request read_req;
@@ -570,7 +611,8 @@ out:
 }
 
 /* Push back all the dirty objects to sheep cluster storage */
-int object_cache_push(struct vnode_info *vnode_info, struct object_cache *oc)
+static int object_cache_push(struct vnode_info *vnode_info,
+		struct object_cache *oc)
 {
 	struct object_cache_entry *entry, *t;
 	struct rb_root *inactive_dirty_tree;
@@ -653,7 +695,7 @@ void object_cache_delete(uint32_t vid)
 
 }
 
-int object_cache_flush_and_delete(struct vnode_info *vnode_info,
+static int object_cache_flush_and_delete(struct vnode_info *vnode_info,
 		struct object_cache *oc)
 {
 	DIR *dir;
@@ -697,6 +739,194 @@ out:
 	return ret;
 }
 
+int bypass_object_cache(struct request *req)
+{
+	uint64_t oid = req->rq.obj.oid;
+
+	if (!(req->rq.flags & SD_FLAG_CMD_CACHE)) {
+		uint32_t vid = oid_to_vid(oid);
+		struct object_cache *cache;
+
+		cache = find_object_cache(vid, 0);
+		if (!cache)
+			return 1;
+		if (req->rq.flags & SD_FLAG_CMD_WRITE) {
+			object_cache_flush_and_delete(req->vnodes, cache);
+			return 1;
+		} else  {
+			/* For read requet, we can read cache if any */
+			uint32_t idx = data_oid_to_idx(oid);
+			if (is_vdi_obj(oid))
+				idx |= 1 << CACHE_VDI_SHIFT;
+
+			if (object_cache_lookup(cache, idx, 0) < 0)
+				return 1;
+			else
+				return 0;
+		}
+	}
+
+	/*
+	 * For vmstate && vdi_attr object, we don't do caching
+	 */
+	if (is_vmstate_obj(oid) || is_vdi_attr_obj(oid) ||
+	    req->rq.flags & SD_FLAG_CMD_COW)
+		return 1;
+	return 0;
+}
+
+int object_cache_handle_request(struct request *req)
+{
+	uint64_t oid = req->rq.obj.oid;
+	uint32_t vid = oid_to_vid(oid);
+	uint32_t idx = data_oid_to_idx(oid);
+	struct object_cache *cache;
+	int ret, create = 0;
+
+	if (is_vdi_obj(oid))
+		idx |= 1 << CACHE_VDI_SHIFT;
+
+	cache = find_object_cache(vid, 1);
+
+	if (req->rq.opcode == SD_OP_CREATE_AND_WRITE_OBJ)
+		create = 1;
+
+	if (object_cache_lookup(cache, idx, create) < 0) {
+		ret = object_cache_pull(req->vnodes, cache, idx);
+		if (ret != SD_RES_SUCCESS)
+			return ret;
+	}
+	return object_cache_rw(cache, idx, req);
+}
+
+int object_cache_write(uint64_t oid, char *data, unsigned int datalen,
+		uint64_t offset, uint16_t flags, int copies, uint32_t epoch,
+		int create)
+{
+	int ret;
+	struct request *req;
+	uint32_t vid = oid_to_vid(oid);
+	uint32_t idx = data_oid_to_idx(oid);
+	struct object_cache *cache;
+
+	if (is_vdi_obj(oid))
+		idx |= 1 << CACHE_VDI_SHIFT;
+
+	cache = find_object_cache(vid, 0);
+
+	req = zalloc(sizeof(*req));
+	if (!req)
+		return SD_RES_NO_MEM;
+
+	if (create)
+		req->rq.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
+	else
+		req->rq.opcode = SD_OP_WRITE_OBJ;
+	req->rq.flags = flags | SD_FLAG_CMD_WRITE;
+	req->rq.data_length = datalen;
+
+	req->rq.obj.oid = oid;
+	req->rq.obj.offset = offset;
+	req->rq.obj.copies = copies;
+
+	req->data = data;
+	req->op = get_sd_op(req->rq.opcode);
+
+	ret = object_cache_rw(cache, idx, req);
+
+	free(req);
+	return ret;
+}
+
+int object_cache_read(uint64_t oid, char *data, unsigned int datalen,
+		uint64_t offset, int copies, uint32_t epoch)
+{
+	int ret;
+	struct request *req;
+	uint32_t vid = oid_to_vid(oid);
+	uint32_t idx = data_oid_to_idx(oid);
+	struct object_cache *cache;
+
+	if (is_vdi_obj(oid))
+		idx |= 1 << CACHE_VDI_SHIFT;
+
+	cache = find_object_cache(vid, 0);
+
+	req = zalloc(sizeof(*req));
+	if (!req)
+		return SD_RES_NO_MEM;
+
+	req->rq.opcode = SD_OP_READ_OBJ;
+	req->rq.data_length = datalen;
+
+	req->rq.obj.oid = oid;
+	req->rq.obj.offset = offset;
+	req->rq.obj.copies = copies;
+
+	req->data = data;
+	req->op = get_sd_op(req->rq.opcode);
+
+	ret = object_cache_rw(cache, idx, req);
+
+	free(req);
+
+	return ret;
+}
+
+static void object_cache_flush_vdi_fn(struct work *work)
+{
+	struct flush_work *fw = container_of(work, struct flush_work, work);
+
+	dprintf("flush vdi %"PRIx32"\n", fw->cache->vid);
+	if (object_cache_push(fw->vnode_info, fw->cache) != SD_RES_SUCCESS)
+		eprintf("failed to flush vdi %"PRIx32"\n", fw->cache->vid);
+}
+
+static void object_cache_flush_vdi_done(struct work *work)
+{
+	struct flush_work *fw = container_of(work, struct flush_work, work);
+
+	dprintf("flush vdi %"PRIx32" done\n", fw->cache->vid);
+
+	put_vnode_info(fw->vnode_info);
+	free(fw);
+}
+
+int object_cache_flush_vdi(struct request *req)
+{
+	uint32_t vid = oid_to_vid(req->rq.obj.oid);
+	struct object_cache *cache;
+
+	cache = find_object_cache(vid, 0);
+	if (!cache)
+		return SD_RES_SUCCESS;
+
+	if (sys->async_flush) {
+		struct flush_work *fw = xmalloc(sizeof(*fw));
+
+		fw->work.fn = object_cache_flush_vdi_fn;
+		fw->work.done = object_cache_flush_vdi_done;
+		fw->cache = cache;
+		fw->vnode_info = grab_vnode_info(req->vnodes);
+
+		queue_work(sys->flush_wqueue, &fw->work);
+		return SD_RES_SUCCESS;
+	}
+
+	return object_cache_push(req->vnodes, cache);
+}
+
+int object_cache_flush_and_del(struct request *req)
+{
+	uint32_t vid = oid_to_vid(req->rq.obj.oid);
+	struct object_cache *cache;
+
+	cache = find_object_cache(vid, 0);
+	if (cache && object_cache_flush_and_delete(req->vnodes, cache) < 0)
+		return SD_RES_EIO;
+	return SD_RES_SUCCESS;
+}
+
 int object_cache_init(const char *p)
 {
 	int ret = 0;
Index: sheepdog/sheep/ops.c
===================================================================
--- sheepdog.orig/sheep/ops.c	2012-05-31 09:55:53.619915443 +0200
+++ sheepdog/sheep/ops.c	2012-05-31 10:26:24.363897983 +0200
@@ -58,12 +58,6 @@ struct sd_op_template {
 	int (*process_main)(const struct sd_req *req, struct sd_rsp *rsp, void *data);
 };
 
-struct flush_work {
-	struct object_cache *cache;
-	struct vnode_info *vnode_info;
-	struct work work;
-};
-
 static int stat_sheep(uint64_t *store_size, uint64_t *store_free, uint32_t epoch)
 {
 	struct statvfs vs;
@@ -549,64 +543,18 @@ static int local_get_snap_file(struct re
 	return ret;
 }
 
-static void flush_vdi_fn(struct work *work)
-{
-	struct flush_work *fw = container_of(work, struct flush_work, work);
-
-	dprintf("flush vdi %"PRIx32"\n", fw->cache->vid);
-	if (object_cache_push(fw->vnode_info, fw->cache) != SD_RES_SUCCESS)
-		eprintf("failed to flush vdi %"PRIx32"\n", fw->cache->vid);
-}
-
-static void flush_vdi_done(struct work *work)
-{
-	struct flush_work *fw = container_of(work, struct flush_work, work);
-
-	dprintf("flush vdi %"PRIx32" done\n", fw->cache->vid);
-
-	put_vnode_info(fw->vnode_info);
-	free(fw);
-}
-
 static int local_flush_vdi(struct request *req)
 {
-	uint64_t oid = req->rq.obj.oid;
-	uint32_t vid = oid_to_vid(oid);
-	struct object_cache *cache;
-
 	if (!sys->enable_write_cache)
 		return SD_RES_SUCCESS;
-
-	cache = find_object_cache(vid, 0);
-	if (cache) {
-		if (!sys->async_flush)
-			return object_cache_push(req->vnodes, cache);
-		else {
-			struct flush_work *fw = xmalloc(sizeof(*fw));
-
-			fw->work.fn = flush_vdi_fn;
-			fw->work.done = flush_vdi_done;
-			fw->cache = cache;
-			fw->vnode_info = grab_vnode_info(req->vnodes);
-
-			queue_work(sys->flush_wqueue, &fw->work);
-		}
-	}
-
-	return SD_RES_SUCCESS;
+	return object_cache_flush_vdi(req);
 }
 
 static int local_flush_and_del(struct request *req)
 {
-	uint64_t oid = req->rq.obj.oid;
-	uint32_t vid = oid_to_vid(oid);
-	struct object_cache *cache = find_object_cache(vid, 0);
-
-	if (cache)
-		if (object_cache_flush_and_delete(req->vnodes, cache) < 0)
-			return SD_RES_EIO;
-
-	return SD_RES_SUCCESS;
+	if (!sys->enable_write_cache)
+		return SD_RES_SUCCESS;
+	return object_cache_flush_and_del(req);
 }
 
 static int local_trace_ops(const struct sd_req *req, struct sd_rsp *rsp, void *data)
Index: sheepdog/sheep/sheep_priv.h
===================================================================
--- sheepdog.orig/sheep/sheep_priv.h	2012-05-31 07:38:36.099994000 +0200
+++ sheepdog/sheep/sheep_priv.h	2012-05-31 10:30:25.199895685 +0200
@@ -401,50 +401,20 @@ static inline int sys_can_halt(void)
 }
 
 /* object_cache */
-/*
- * Object Cache ID
- *
- *  0 - 19 (20 bits): data object space
- *  20 - 27 (8 bits): reserved
- *  28 - 31 (4 bits): object type indentifier space
- */
 
-#define CACHE_VDI_SHIFT       31
-#define CACHE_VDI_BIT         (UINT32_C(1) << CACHE_VDI_SHIFT)
-#define CACHE_BLOCK_SIZE      ((UINT64_C(1) << 10) * 64) /* 64 KB */
-
-struct object_cache {
-	uint32_t vid;
-	struct hlist_node hash;
-
-	struct list_head dirty_lists[2];
-	struct list_head *active_dirty_list;
-
-	struct rb_root dirty_trees[2];
-	struct rb_root *active_dirty_tree;
-
-	pthread_mutex_t lock;
-};
+int bypass_object_cache(struct request *req);
+int object_is_cached(uint64_t oid);
 
-struct object_cache_entry {
-	uint32_t idx;
-	uint64_t bmap; /* each bit represents one dirty
-			* block which should be flushed */
-	struct rb_node rb;
-	struct list_head list;
-	int create;
-};
+int object_cache_handle_request(struct request *req);
+int object_cache_write(uint64_t oid, char *data, unsigned int datalen,
+		uint64_t offset, uint16_t flags, int copies, uint32_t epoch,
+		int create);
+int object_cache_read(uint64_t oid, char *data, unsigned int datalen,
+		uint64_t offset, int copies, uint32_t epoch);
+int object_cache_flush_vdi(struct request *req);
+int object_cache_flush_and_del(struct request *req);
+void object_cache_delete(uint32_t vid);
 
-struct object_cache *find_object_cache(uint32_t vid, int create);
-int object_cache_lookup(struct object_cache *oc, uint32_t index, int create);
-int object_cache_rw(struct object_cache *oc, uint32_t idx, struct request *);
-int object_cache_pull(struct vnode_info *vnode_info, struct object_cache *oc,
-		uint32_t index);
-int object_cache_push(struct vnode_info *vnode_info, struct object_cache *oc);
 int object_cache_init(const char *p);
-int object_is_cached(uint64_t oid);
-void object_cache_delete(uint32_t vid);
-int object_cache_flush_and_delete(struct vnode_info *vnode_info,
-		struct object_cache *oc);
 
 #endif
Index: sheepdog/sheep/store.c
===================================================================
--- sheepdog.orig/sheep/store.c	2012-05-31 07:38:36.099994000 +0200
+++ sheepdog/sheep/store.c	2012-05-31 10:22:29.963900215 +0200
@@ -533,45 +533,6 @@ int read_epoch(uint32_t *epoch, uint64_t
 	return SD_RES_SUCCESS;
 }
 
-static int write_object_cache(uint64_t oid, char *data, unsigned int datalen,
-			      uint64_t offset, uint16_t flags, int copies,
-			      uint32_t epoch, int create)
-{
-	int ret;
-	struct request *req;
-	uint32_t vid = oid_to_vid(oid);
-	uint32_t idx = data_oid_to_idx(oid);
-	struct object_cache *cache;
-
-	if (is_vdi_obj(oid))
-		idx |= 1 << CACHE_VDI_SHIFT;
-
-	cache = find_object_cache(vid, 0);
-
-	req = zalloc(sizeof(*req));
-	if (!req)
-		return SD_RES_NO_MEM;
-
-	if (create)
-		req->rq.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
-	else
-		req->rq.opcode = SD_OP_WRITE_OBJ;
-	req->rq.flags = flags | SD_FLAG_CMD_WRITE;
-	req->rq.data_length = datalen;
-
-	req->rq.obj.oid = oid;
-	req->rq.obj.offset = offset;
-	req->rq.obj.copies = copies;
-
-	req->data = data;
-	req->op = get_sd_op(req->rq.opcode);
-
-	ret = object_cache_rw(cache, idx, req);
-
-	free(req);
-	return ret;
-}
-
 /*
  * Write data to both local object cache (if enabled) and backends
  */
@@ -584,7 +545,7 @@ int write_object(struct vnode_info *vnod
 	int ret;
 
 	if (sys->enable_write_cache && object_is_cached(oid)) {
-		ret = write_object_cache(oid, data, datalen, offset,
+		ret = object_cache_write(oid, data, datalen, offset,
 			flags, nr_copies, epoch, create);
 		if (ret != 0) {
 			eprintf("write cache failed %"PRIx64" %"PRIx32"\n",
@@ -613,40 +574,6 @@ int write_object(struct vnode_info *vnod
 	return ret;
 }
 
-static int read_object_cache(uint64_t oid, char *data, unsigned int datalen,
-			     uint64_t offset, int copies, uint32_t epoch)
-{
-	int ret;
-	struct request *req;
-	uint32_t vid = oid_to_vid(oid);
-	uint32_t idx = data_oid_to_idx(oid);
-	struct object_cache *cache;
-
-	if (is_vdi_obj(oid))
-		idx |= 1 << CACHE_VDI_SHIFT;
-
-	cache = find_object_cache(vid, 0);
-
-	req = zalloc(sizeof(*req));
-	if (!req)
-		return SD_RES_NO_MEM;
-
-	req->rq.opcode = SD_OP_READ_OBJ;
-	req->rq.data_length = datalen;
-
-	req->rq.obj.oid = oid;
-	req->rq.obj.offset = offset;
-	req->rq.obj.copies = copies;
-
-	req->data = data;
-	req->op = get_sd_op(req->rq.opcode);
-
-	ret = object_cache_rw(cache, idx, req);
-
-	free(req);
-
-	return ret;
-}
 /*
  * Read data firstly from local object cache(if enabled), if fail,
  * try read backends
@@ -660,7 +587,7 @@ int read_object(struct vnode_info *vnode
 	int ret;
 
 	if (sys->enable_write_cache && object_is_cached(oid)) {
-		ret = read_object_cache(oid, data, datalen, offset,
+		ret = object_cache_read(oid, data, datalen, offset,
 					nr_copies, epoch);
 		if (ret != SD_RES_SUCCESS) {
 			eprintf("try forward read %"PRIx64" %"PRIx32"\n",



More information about the sheepdog mailing list