[sheepdog] [PATCH v3 1/4] sheepfs: add cache for http interface
Robin Dong
robin.k.dong at gmail.com
Fri Mar 7 10:48:56 CET 2014
From: Robin Dong <sanbai at taobao.com>
At present, every read operation to files on sheepfs for http interface will
make a new http request which is bad for performance, so we add a cache: read
big chunk of data from http interface and then give it to read operation
on local.
To implement it, we need to add 'release' interface in fuse.
Signed-off-by: Robin Dong <sanbai at taobao.com>
---
v1-->v2:
1. change sd_debug() to sheepfs_pr()
sheepfs/cluster.c | 3 +-
sheepfs/config.c | 8 ++++--
sheepfs/core.c | 23 ++++++++++++---
sheepfs/http.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++-------
sheepfs/node.c | 6 ++--
sheepfs/sheepfs.h | 32 ++++++++++++++-------
sheepfs/vdi.c | 3 +-
sheepfs/volume.c | 3 +-
8 files changed, 131 insertions(+), 33 deletions(-)
diff --git a/sheepfs/cluster.c b/sheepfs/cluster.c
index 3eb1d0b..17cf2e9 100644
--- a/sheepfs/cluster.c
+++ b/sheepfs/cluster.c
@@ -39,7 +39,8 @@ int create_cluster_layout(void)
return 0;
}
-int cluster_info_read(const char *path, char *buf, size_t size, off_t ignore)
+int cluster_info_read(const char *path, char *buf, size_t size, off_t ignore,
+ struct fuse_file_info *fi)
{
return shadow_file_read(path, buf, size, 0);
}
diff --git a/sheepfs/config.c b/sheepfs/config.c
index f9b1ea2..3da1426 100644
--- a/sheepfs/config.c
+++ b/sheepfs/config.c
@@ -51,7 +51,8 @@ int create_config_layout(void)
return 0;
}
-int config_pcache_read(const char *path, char *buf, size_t size, off_t ignore)
+int config_pcache_read(const char *path, char *buf, size_t size, off_t ignore,
+ struct fuse_file_info *fi)
{
snprintf(buf, size, "%d\n", sheepfs_page_cache);
return strlen(buf);
@@ -74,7 +75,8 @@ size_t config_pcache_get_size(const char *path)
return sizeof(int) + 1/* \n */;
}
-int config_ocache_read(const char *path, char *buf, size_t size, off_t ignore)
+int config_ocache_read(const char *path, char *buf, size_t size, off_t ignore,
+ struct fuse_file_info *fi)
{
snprintf(buf, size, "%d\n", sheepfs_object_cache);
return strlen(buf);
@@ -98,7 +100,7 @@ size_t config_ocache_get_size(const char *path)
}
int config_sheep_info_read(const char *path, char *buf, size_t size,
- off_t ignore)
+ off_t ignore, struct fuse_file_info *fi)
{
snprintf(buf, size, "%s:%d\n", sdhost, sdport);
return strlen(buf);
diff --git a/sheepfs/core.c b/sheepfs/core.c
index f5ee7b2..1bcbaec 100644
--- a/sheepfs/core.c
+++ b/sheepfs/core.c
@@ -53,13 +53,15 @@ static struct option const long_options[] = {
static const char *short_options = "a:dfhknp:";
static struct sheepfs_file_operation {
- int (*read)(const char *path, char *buf, size_t size, off_t);
+ int (*read)(const char *path, char *buf, size_t size, off_t,
+ struct fuse_file_info *fi);
int (*write)(const char *path, const char *buf, size_t size, off_t);
size_t (*get_size)(const char *path);
int (*sync)(const char *path);
int (*open)(const char *path, struct fuse_file_info *);
int (*unlink)(const char *path);
int (*rmdir)(const char *path);
+ int (*release)(const char *path, struct fuse_file_info *);
} sheepfs_file_ops[] = {
[OP_NULL] = { NULL, NULL, NULL },
[OP_CLUSTER_INFO] = { cluster_info_read, NULL,
@@ -82,8 +84,9 @@ static struct sheepfs_file_operation {
[OP_HTTP_ADDRESS] = { http_address_read, http_address_write,
http_address_get_size },
[OP_HTTP_OBJECT] = { NULL, http_object_write },
- [OP_OBJECT] = { object_read, NULL, object_get_size, NULL, NULL,
- object_unlink },
+ [OP_OBJECT] = { object_read, NULL, object_get_size, NULL,
+ object_open, object_unlink,
+ NULL, object_release },
[OP_CONTAINER] = { NULL, NULL, NULL, NULL, NULL, NULL,
container_rmdir },
#endif
@@ -218,7 +221,7 @@ static int sheepfs_read(const char *path, char *buf, size_t size,
unsigned op = sheepfs_get_op(path);
if (sheepfs_file_ops[op].read)
- ret = sheepfs_file_ops[op].read(path, buf, size, offset);
+ ret = sheepfs_file_ops[op].read(path, buf, size, offset, fi);
return ret;
}
@@ -274,6 +277,17 @@ static int sheepfs_open(const char *path, struct fuse_file_info *fi)
return ret;
}
+static int sheepfs_release(const char *path, struct fuse_file_info *fi)
+{
+ int ret = 0;
+ unsigned op = sheepfs_get_op(path);
+
+ if (sheepfs_file_ops[op].release)
+ ret = sheepfs_file_ops[op].release(path, fi);
+
+ return ret;
+}
+
static struct fuse_operations sheepfs_ops = {
.getattr = sheepfs_getattr,
.unlink = sheepfs_unlink,
@@ -284,6 +298,7 @@ static struct fuse_operations sheepfs_ops = {
.write = sheepfs_write,
.fsync = sheepfs_fsync,
.open = sheepfs_open,
+ .release = sheepfs_release,
};
static int sheepfs_main_loop(char *mountpoint)
diff --git a/sheepfs/http.c b/sheepfs/http.c
index 50c0a82..2f6d0b8 100644
--- a/sheepfs/http.c
+++ b/sheepfs/http.c
@@ -49,7 +49,8 @@ int create_http_layout(void)
return 0;
}
-int http_address_read(const char *path, char *buf, size_t size, off_t ignore)
+int http_address_read(const char *path, char *buf, size_t size, off_t ignore,
+ struct fuse_file_info *fi)
{
return shadow_file_read(path, buf, size, 0);
}
@@ -159,8 +160,11 @@ static size_t curl_read_object(const char *url, char *buf, size_t size,
"content_length: %"PRIu64", get_size: %"PRIu64,
(size_t)content_length, size);
size = 0;
- } else
- sd_debug("Read out %"PRIu64" data from %s", size, url);
+ } else {
+ sheepfs_pr("Read out %"PRIu64" data from %s",
+ size, url);
+ size = (size_t)content_length;
+ }
} else {
sheepfs_pr("Failed to call libcurl res: %s, url: %s",
curl_easy_strerror(res), url);
@@ -228,11 +232,22 @@ out:
return ret;
}
-int object_read(const char *path, char *buf, size_t size, off_t offset)
+/* no rationale */
+#define CACHE_SIZE (64 * 1024 * 1024)
+
+struct cache_handle {
+ char *mem;
+ off_t offset;
+ size_t size;
+};
+
+int object_read(const char *path, char *buf, size_t size, off_t offset,
+ struct fuse_file_info *fi)
{
char url[PATH_MAX];
char *pos;
int ret;
+ struct cache_handle *ch;
pos = strstr(path, PATH_HTTP);
if (!pos) {
@@ -241,18 +256,67 @@ int object_read(const char *path, char *buf, size_t size, off_t offset)
goto out;
}
- pos += strlen(PATH_HTTP);
- /* don't need '\n' at the end of 'path' */
- ret = generate_url(pos, strlen(path) - strlen(PATH_HTTP),
- url, PATH_MAX);
- if (ret)
- goto out;
+ ch = (struct cache_handle *)fi->fh;
+
+ while (true) {
+ /* try to read from cache first */
+ if (offset >= ch->offset && (ch->offset + ch->size) > offset) {
+ if ((ch->offset + ch->size) > (offset + size))
+ ret = size;
+ else
+ ret = (ch->offset + ch->size) - offset;
+ memcpy(buf, ch->mem + (offset - ch->offset), ret);
+ break;
+ } else { /* update cache */
+ if (!ch->mem)
+ ch->mem = xmalloc(CACHE_SIZE);
+
+ pos += strlen(PATH_HTTP);
+ /* don't need '\n' at the end of 'path' */
+ ret = generate_url(pos,
+ strlen(path) - strlen(PATH_HTTP),
+ url, PATH_MAX);
+ if (ret)
+ goto out;
- ret = curl_read_object(url, buf, size, offset);
+ ret = curl_read_object(url, ch->mem, CACHE_SIZE,
+ offset);
+ ch->offset = offset;
+ ch->size = ret;
+ sheepfs_pr("update cache offset %lu size %d",
+ offset, ret);
+ if (ret <= 0)
+ break;
+ }
+ }
out:
return ret;
}
+int object_open(const char *path, struct fuse_file_info *fi)
+{
+ struct cache_handle *ch;
+
+ /* don't need page cache of fuse */
+ fi->direct_io = 1;
+
+ ch = xzalloc(sizeof(*ch));
+ fi->fh = (uint64_t)ch;
+
+ return 0;
+}
+
+int object_release(const char *path, struct fuse_file_info *fi)
+{
+ struct cache_handle *ch = (struct cache_handle *)fi->fh;
+
+ free(ch->mem);
+ free(ch);
+ fi->fh = 0;
+
+ return 0;
+}
+
size_t object_get_size(const char *path)
{
uint64_t object_size;
diff --git a/sheepfs/node.c b/sheepfs/node.c
index 2a7df5b..56e8dc7 100644
--- a/sheepfs/node.c
+++ b/sheepfs/node.c
@@ -46,7 +46,8 @@ int create_node_layout(void)
return 0;
}
-int node_info_read(const char *path, char *buf, size_t size, off_t ignore)
+int node_info_read(const char *path, char *buf, size_t size, off_t ignore,
+ struct fuse_file_info *fi)
{
return shadow_file_read(path, buf, size, 0);
}
@@ -69,7 +70,8 @@ size_t node_info_get_size(const char *path)
return len;
}
-int node_list_read(const char *path, char *buf, size_t size, off_t ignore)
+int node_list_read(const char *path, char *buf, size_t size, off_t ignore,
+ struct fuse_file_info *fi)
{
return shadow_file_read(path, buf, size, 0);
}
diff --git a/sheepfs/sheepfs.h b/sheepfs/sheepfs.h
index 495b2e5..5c2998b 100644
--- a/sheepfs/sheepfs.h
+++ b/sheepfs/sheepfs.h
@@ -58,7 +58,8 @@ bool shadow_file_stat(const char *path, struct stat *st);
/* volume.c */
int create_volume_layout(void);
-int volume_read(const char *path, char *buf, size_t size, off_t offset);
+int volume_read(const char *path, char *buf, size_t size, off_t offset,
+ struct fuse_file_info *fi);
int volume_write(const char *, const char *buf, size_t size, off_t);
size_t volume_get_size(const char *);
int volume_create_entry(const char *entry);
@@ -73,37 +74,44 @@ int sheepfs_bnode_reader(uint64_t oid, void **mem, unsigned int len,
uint64_t offset);
/* cluster.c */
-int cluster_info_read(const char *path, char *buf, size_t size, off_t);
+int cluster_info_read(const char *path, char *buf, size_t size, off_t,
+ struct fuse_file_info *fi);
size_t cluster_info_get_size(const char *path);
int create_cluster_layout(void);
/* vdi.c */
int create_vdi_layout(void);
-int vdi_list_read(const char *path, char *buf, size_t size, off_t);
+int vdi_list_read(const char *path, char *buf, size_t size, off_t,
+ struct fuse_file_info *fi);
size_t vdi_list_get_size(const char *path);
int vdi_mount_write(const char *, const char *buf, size_t size, off_t);
int vdi_unmount_write(const char *, const char *buf, size_t, off_t);
/* node.c */
-int node_list_read(const char *path, char *buf, size_t size, off_t);
+int node_list_read(const char *path, char *buf, size_t size, off_t,
+ struct fuse_file_info *fi);
size_t node_list_get_size(const char *path);
-int node_info_read(const char *path, char *buf, size_t size, off_t);
+int node_info_read(const char *path, char *buf, size_t size, off_t,
+ struct fuse_file_info *fi);
size_t node_info_get_size(const char *path);
int create_node_layout(void);
/* config.c */
int create_config_layout(void);
-int config_pcache_read(const char *path, char *buf, size_t size, off_t);
+int config_pcache_read(const char *path, char *buf, size_t size, off_t,
+ struct fuse_file_info *fi);
int config_pcache_write(const char *path, const char *, size_t, off_t);
size_t config_pcache_get_size(const char *path);
-int config_ocache_read(const char *path, char *buf, size_t size, off_t);
+int config_ocache_read(const char *path, char *buf, size_t size, off_t,
+ struct fuse_file_info *fi);
int config_ocache_write(const char *path, const char *, size_t, off_t);
size_t config_ocache_get_size(const char *path);
-int config_sheep_info_read(const char *path, char *, size_t size, off_t);
+int config_sheep_info_read(const char *path, char *, size_t size, off_t,
+ struct fuse_file_info *fi);
int config_sheep_info_write(const char *, const char *, size_t, off_t);
size_t config_sheep_info_get_size(const char *path);
@@ -114,14 +122,18 @@ int create_http_layout(void);
static inline int create_http_layout(void) { return 0; }
#endif
-int http_address_read(const char *path, char *buf, size_t size, off_t ignore);
+int http_address_read(const char *path, char *buf, size_t size, off_t ignore,
+ struct fuse_file_info *fi);
int http_address_write(const char *path, const char *buf, size_t size,
off_t ignore);
size_t http_address_get_size(const char *path);
int http_object_write(const char *path, const char *buf, size_t size,
off_t ignore);
-int object_read(const char *path, char *buf, size_t size, off_t ignore);
+int object_open(const char *path, struct fuse_file_info *fi);
+int object_release(const char *path, struct fuse_file_info *fi);
+int object_read(const char *path, char *buf, size_t size, off_t ignore,
+ struct fuse_file_info *fi);
size_t object_get_size(const char *path);
int object_unlink(const char *path);
int container_rmdir(const char *path);
diff --git a/sheepfs/vdi.c b/sheepfs/vdi.c
index f6d0639..3dad753 100644
--- a/sheepfs/vdi.c
+++ b/sheepfs/vdi.c
@@ -51,7 +51,8 @@ int create_vdi_layout(void)
return 0;
}
-int vdi_list_read(const char *path, char *buf, size_t size, off_t ignore)
+int vdi_list_read(const char *path, char *buf, size_t size, off_t ignore,
+ struct fuse_file_info *fi)
{
return shadow_file_read(path, buf, size, 0);
}
diff --git a/sheepfs/volume.c b/sheepfs/volume.c
index f93525a..b904b6c 100644
--- a/sheepfs/volume.c
+++ b/sheepfs/volume.c
@@ -267,7 +267,8 @@ int sheepfs_bnode_reader(uint64_t oid, void **mem, unsigned int len,
return ret;
}
-int volume_read(const char *path, char *buf, size_t size, off_t offset)
+int volume_read(const char *path, char *buf, size_t size, off_t offset,
+ struct fuse_file_info *fi)
{
ssize_t done;
--
1.7.12.4
More information about the sheepdog
mailing list