[sheepdog] [PATCH v2 1/3] sheepfs: add cache for http interface

Robin Dong robin.k.dong at gmail.com
Fri Mar 7 03:49:47 CET 2014


From: Robin Dong <sanbai at taobao.com>

At present, every read operation to files on sheepfs for http interface will
make a new http request which is bad for performance, so we add a cache: read
big chunk of data from http interface and then give it to read operation
on local.

To implement it, we need to add 'release' interface in fuse.

Signed-off-by: Robin Dong <sanbai at taobao.com>
---
v1-->v2:
  1. change sd_debug() to sheepfs_pr() 

 sheepfs/cluster.c |  3 +-
 sheepfs/config.c  |  8 ++++--
 sheepfs/core.c    | 23 ++++++++++++---
 sheepfs/http.c    | 86 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 sheepfs/node.c    |  6 ++--
 sheepfs/sheepfs.h | 32 ++++++++++++++-------
 sheepfs/vdi.c     |  3 +-
 sheepfs/volume.c  |  3 +-
 8 files changed, 131 insertions(+), 33 deletions(-)

diff --git a/sheepfs/cluster.c b/sheepfs/cluster.c
index 3eb1d0b..17cf2e9 100644
--- a/sheepfs/cluster.c
+++ b/sheepfs/cluster.c
@@ -39,7 +39,8 @@ int create_cluster_layout(void)
 	return 0;
 }
 
-int cluster_info_read(const char *path, char *buf, size_t size, off_t ignore)
+int cluster_info_read(const char *path, char *buf, size_t size, off_t ignore,
+		      struct fuse_file_info *fi)
 {
 	return shadow_file_read(path, buf, size, 0);
 }
diff --git a/sheepfs/config.c b/sheepfs/config.c
index f9b1ea2..3da1426 100644
--- a/sheepfs/config.c
+++ b/sheepfs/config.c
@@ -51,7 +51,8 @@ int create_config_layout(void)
 	return 0;
 }
 
-int config_pcache_read(const char *path, char *buf, size_t size, off_t ignore)
+int config_pcache_read(const char *path, char *buf, size_t size, off_t ignore,
+		       struct fuse_file_info *fi)
 {
 	snprintf(buf, size, "%d\n", sheepfs_page_cache);
 	return strlen(buf);
@@ -74,7 +75,8 @@ size_t config_pcache_get_size(const char *path)
 	return sizeof(int) + 1/* \n */;
 }
 
-int config_ocache_read(const char *path, char *buf, size_t size, off_t ignore)
+int config_ocache_read(const char *path, char *buf, size_t size, off_t ignore,
+		       struct fuse_file_info *fi)
 {
 	snprintf(buf, size, "%d\n", sheepfs_object_cache);
 	return strlen(buf);
@@ -98,7 +100,7 @@ size_t config_ocache_get_size(const char *path)
 }
 
 int config_sheep_info_read(const char *path, char *buf, size_t size,
-			   off_t ignore)
+			   off_t ignore, struct fuse_file_info *fi)
 {
 	snprintf(buf, size, "%s:%d\n", sdhost, sdport);
 	return strlen(buf);
diff --git a/sheepfs/core.c b/sheepfs/core.c
index f5ee7b2..1bcbaec 100644
--- a/sheepfs/core.c
+++ b/sheepfs/core.c
@@ -53,13 +53,15 @@ static struct option const long_options[] = {
 static const char *short_options = "a:dfhknp:";
 
 static struct sheepfs_file_operation {
-	int (*read)(const char *path, char *buf, size_t size, off_t);
+	int (*read)(const char *path, char *buf, size_t size, off_t,
+		    struct fuse_file_info *fi);
 	int (*write)(const char *path, const char *buf, size_t size, off_t);
 	size_t (*get_size)(const char *path);
 	int (*sync)(const char *path);
 	int (*open)(const char *path, struct fuse_file_info *);
 	int (*unlink)(const char *path);
 	int (*rmdir)(const char *path);
+	int (*release)(const char *path, struct fuse_file_info *);
 } sheepfs_file_ops[] = {
 	[OP_NULL]           = { NULL, NULL, NULL },
 	[OP_CLUSTER_INFO]   = { cluster_info_read, NULL,
@@ -82,8 +84,9 @@ static struct sheepfs_file_operation {
 	[OP_HTTP_ADDRESS]   = { http_address_read, http_address_write,
 				http_address_get_size },
 	[OP_HTTP_OBJECT]    = { NULL, http_object_write },
-	[OP_OBJECT]         = { object_read, NULL, object_get_size, NULL, NULL,
-				object_unlink },
+	[OP_OBJECT]         = { object_read, NULL, object_get_size, NULL,
+				object_open, object_unlink,
+				NULL, object_release },
 	[OP_CONTAINER]      = { NULL, NULL, NULL, NULL, NULL, NULL,
 				container_rmdir },
 #endif
@@ -218,7 +221,7 @@ static int sheepfs_read(const char *path, char *buf, size_t size,
 	unsigned op = sheepfs_get_op(path);
 
 	if (sheepfs_file_ops[op].read)
-		ret = sheepfs_file_ops[op].read(path, buf, size, offset);
+		ret = sheepfs_file_ops[op].read(path, buf, size, offset, fi);
 
 	return ret;
 }
@@ -274,6 +277,17 @@ static int sheepfs_open(const char *path, struct fuse_file_info *fi)
 	return ret;
 }
 
+static int sheepfs_release(const char *path, struct fuse_file_info *fi)
+{
+	int ret = 0;
+	unsigned op = sheepfs_get_op(path);
+
+	if (sheepfs_file_ops[op].release)
+		ret = sheepfs_file_ops[op].release(path, fi);
+
+	return ret;
+}
+
 static struct fuse_operations sheepfs_ops =  {
 	.getattr  = sheepfs_getattr,
 	.unlink   = sheepfs_unlink,
@@ -284,6 +298,7 @@ static struct fuse_operations sheepfs_ops =  {
 	.write    = sheepfs_write,
 	.fsync    = sheepfs_fsync,
 	.open     = sheepfs_open,
+	.release  = sheepfs_release,
 };
 
 static int sheepfs_main_loop(char *mountpoint)
diff --git a/sheepfs/http.c b/sheepfs/http.c
index 50c0a82..2f6d0b8 100644
--- a/sheepfs/http.c
+++ b/sheepfs/http.c
@@ -49,7 +49,8 @@ int create_http_layout(void)
 	return 0;
 }
 
-int http_address_read(const char *path, char *buf, size_t size, off_t ignore)
+int http_address_read(const char *path, char *buf, size_t size, off_t ignore,
+		      struct fuse_file_info *fi)
 {
 	return shadow_file_read(path, buf, size, 0);
 }
@@ -159,8 +160,11 @@ static size_t curl_read_object(const char *url, char *buf, size_t size,
 			       "content_length: %"PRIu64", get_size: %"PRIu64,
 			       (size_t)content_length, size);
 			size = 0;
-		} else
-			sd_debug("Read out %"PRIu64" data from %s", size, url);
+		} else {
+			sheepfs_pr("Read out %"PRIu64" data from %s",
+				   size, url);
+			size = (size_t)content_length;
+		}
 	} else {
 		sheepfs_pr("Failed to call libcurl res: %s, url: %s",
 		       curl_easy_strerror(res), url);
@@ -228,11 +232,22 @@ out:
 	return ret;
 }
 
-int object_read(const char *path, char *buf, size_t size, off_t offset)
+/* no rationale */
+#define CACHE_SIZE	(64 * 1024 * 1024)
+
+struct cache_handle {
+	char *mem;
+	off_t offset;
+	size_t size;
+};
+
+int object_read(const char *path, char *buf, size_t size, off_t offset,
+		struct fuse_file_info *fi)
 {
 	char url[PATH_MAX];
 	char *pos;
 	int ret;
+	struct cache_handle *ch;
 
 	pos = strstr(path, PATH_HTTP);
 	if (!pos) {
@@ -241,18 +256,67 @@ int object_read(const char *path, char *buf, size_t size, off_t offset)
 		goto out;
 	}
 
-	pos += strlen(PATH_HTTP);
-	/* don't need '\n' at the end of 'path' */
-	ret = generate_url(pos, strlen(path) - strlen(PATH_HTTP),
-			   url, PATH_MAX);
-	if (ret)
-		goto out;
+	ch = (struct cache_handle *)fi->fh;
+
+	while (true) {
+		/* try to read from cache first */
+		if (offset >= ch->offset && (ch->offset + ch->size) > offset) {
+			if ((ch->offset + ch->size) > (offset + size))
+				ret = size;
+			else
+				ret = (ch->offset + ch->size) - offset;
+			memcpy(buf, ch->mem + (offset - ch->offset), ret);
+			break;
+		} else { /* update cache */
+			if (!ch->mem)
+				ch->mem = xmalloc(CACHE_SIZE);
+
+			pos += strlen(PATH_HTTP);
+			/* don't need '\n' at the end of 'path' */
+			ret = generate_url(pos,
+					   strlen(path) - strlen(PATH_HTTP),
+					   url, PATH_MAX);
+			if (ret)
+				goto out;
 
-	ret = curl_read_object(url, buf, size, offset);
+			ret = curl_read_object(url, ch->mem, CACHE_SIZE,
+					       offset);
+			ch->offset = offset;
+			ch->size = ret;
+			sheepfs_pr("update cache offset %lu size %d",
+				   offset, ret);
+			if (ret <= 0)
+				break;
+		}
+	}
 out:
 	return ret;
 }
 
+int object_open(const char *path, struct fuse_file_info *fi)
+{
+	struct cache_handle *ch;
+
+	/* don't need page cache of fuse */
+	fi->direct_io = 1;
+
+	ch = xzalloc(sizeof(*ch));
+	fi->fh = (uint64_t)ch;
+
+	return 0;
+}
+
+int object_release(const char *path, struct fuse_file_info *fi)
+{
+	struct cache_handle *ch = (struct cache_handle *)fi->fh;
+
+	free(ch->mem);
+	free(ch);
+	fi->fh = 0;
+
+	return 0;
+}
+
 size_t object_get_size(const char *path)
 {
 	uint64_t object_size;
diff --git a/sheepfs/node.c b/sheepfs/node.c
index 2a7df5b..56e8dc7 100644
--- a/sheepfs/node.c
+++ b/sheepfs/node.c
@@ -46,7 +46,8 @@ int create_node_layout(void)
 	return 0;
 }
 
-int node_info_read(const char *path, char *buf, size_t size, off_t ignore)
+int node_info_read(const char *path, char *buf, size_t size, off_t ignore,
+		   struct fuse_file_info *fi)
 {
 	return shadow_file_read(path, buf, size, 0);
 }
@@ -69,7 +70,8 @@ size_t node_info_get_size(const char *path)
 	return len;
 }
 
-int node_list_read(const char *path, char *buf, size_t size, off_t ignore)
+int node_list_read(const char *path, char *buf, size_t size, off_t ignore,
+		   struct fuse_file_info *fi)
 {
 	return shadow_file_read(path, buf, size, 0);
 }
diff --git a/sheepfs/sheepfs.h b/sheepfs/sheepfs.h
index 495b2e5..5c2998b 100644
--- a/sheepfs/sheepfs.h
+++ b/sheepfs/sheepfs.h
@@ -58,7 +58,8 @@ bool shadow_file_stat(const char *path, struct stat *st);
 
 /* volume.c */
 int create_volume_layout(void);
-int volume_read(const char *path, char *buf, size_t size, off_t offset);
+int volume_read(const char *path, char *buf, size_t size, off_t offset,
+		struct fuse_file_info *fi);
 int volume_write(const char *, const char *buf, size_t size, off_t);
 size_t volume_get_size(const char *);
 int volume_create_entry(const char *entry);
@@ -73,37 +74,44 @@ int sheepfs_bnode_reader(uint64_t oid, void **mem, unsigned int len,
 			 uint64_t offset);
 
 /* cluster.c */
-int cluster_info_read(const char *path, char *buf, size_t size, off_t);
+int cluster_info_read(const char *path, char *buf, size_t size, off_t,
+		      struct fuse_file_info *fi);
 size_t cluster_info_get_size(const char *path);
 int create_cluster_layout(void);
 
 /* vdi.c */
 int create_vdi_layout(void);
-int vdi_list_read(const char *path, char *buf, size_t size, off_t);
+int vdi_list_read(const char *path, char *buf, size_t size, off_t,
+		  struct fuse_file_info *fi);
 size_t vdi_list_get_size(const char *path);
 
 int vdi_mount_write(const char *, const char *buf, size_t size, off_t);
 int vdi_unmount_write(const char *, const char *buf, size_t, off_t);
 
 /* node.c */
-int node_list_read(const char *path, char *buf, size_t size, off_t);
+int node_list_read(const char *path, char *buf, size_t size, off_t,
+		   struct fuse_file_info *fi);
 size_t node_list_get_size(const char *path);
-int node_info_read(const char *path, char *buf, size_t size, off_t);
+int node_info_read(const char *path, char *buf, size_t size, off_t,
+		   struct fuse_file_info *fi);
 size_t node_info_get_size(const char *path);
 int create_node_layout(void);
 
 /* config.c */
 int create_config_layout(void);
 
-int config_pcache_read(const char *path, char *buf, size_t size, off_t);
+int config_pcache_read(const char *path, char *buf, size_t size, off_t,
+		       struct fuse_file_info *fi);
 int config_pcache_write(const char *path, const char *, size_t, off_t);
 size_t config_pcache_get_size(const char *path);
 
-int config_ocache_read(const char *path, char *buf, size_t size, off_t);
+int config_ocache_read(const char *path, char *buf, size_t size, off_t,
+		       struct fuse_file_info *fi);
 int config_ocache_write(const char *path, const char *, size_t, off_t);
 size_t config_ocache_get_size(const char *path);
 
-int config_sheep_info_read(const char *path, char *, size_t size, off_t);
+int config_sheep_info_read(const char *path, char *, size_t size, off_t,
+			   struct fuse_file_info *fi);
 int config_sheep_info_write(const char *, const char *, size_t, off_t);
 size_t config_sheep_info_get_size(const char *path);
 
@@ -114,14 +122,18 @@ int create_http_layout(void);
 static inline int create_http_layout(void) { return 0; }
 #endif
 
-int http_address_read(const char *path, char *buf, size_t size, off_t ignore);
+int http_address_read(const char *path, char *buf, size_t size, off_t ignore,
+		      struct fuse_file_info *fi);
 int http_address_write(const char *path, const char *buf, size_t size,
 		       off_t ignore);
 size_t http_address_get_size(const char *path);
 int http_object_write(const char *path, const char *buf, size_t size,
 		      off_t ignore);
 
-int object_read(const char *path, char *buf, size_t size, off_t ignore);
+int object_open(const char *path, struct fuse_file_info *fi);
+int object_release(const char *path, struct fuse_file_info *fi);
+int object_read(const char *path, char *buf, size_t size, off_t ignore,
+		struct fuse_file_info *fi);
 size_t object_get_size(const char *path);
 int object_unlink(const char *path);
 int container_rmdir(const char *path);
diff --git a/sheepfs/vdi.c b/sheepfs/vdi.c
index f6d0639..3dad753 100644
--- a/sheepfs/vdi.c
+++ b/sheepfs/vdi.c
@@ -51,7 +51,8 @@ int create_vdi_layout(void)
 	return 0;
 }
 
-int vdi_list_read(const char *path, char *buf, size_t size, off_t ignore)
+int vdi_list_read(const char *path, char *buf, size_t size, off_t ignore,
+		  struct fuse_file_info *fi)
 {
 	return shadow_file_read(path, buf, size, 0);
 }
diff --git a/sheepfs/volume.c b/sheepfs/volume.c
index f93525a..b904b6c 100644
--- a/sheepfs/volume.c
+++ b/sheepfs/volume.c
@@ -267,7 +267,8 @@ int sheepfs_bnode_reader(uint64_t oid, void **mem, unsigned int len,
 	return ret;
 }
 
-int volume_read(const char *path, char *buf, size_t size, off_t offset)
+int volume_read(const char *path, char *buf, size_t size, off_t offset,
+		struct fuse_file_info *fi)
 {
 	ssize_t done;
 
-- 
1.7.12.4




More information about the sheepdog mailing list