From: Liu Yuan <tailai.ly at taobao.com> Signed-off-by: Liu Yuan <tailai.ly at taobao.com> --- include/net.h | 2 + sheep/Makefile.am | 2 +- sheep/sheep.c | 2 +- sheep/sheepfs/core.c | 19 +++- sheep/sheepfs/sheepfs.h | 11 ++- sheep/sheepfs/volume.c | 284 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 313 insertions(+), 7 deletions(-) create mode 100644 sheep/sheepfs/volume.c diff --git a/include/net.h b/include/net.h index 698b55e..0286ea5 100644 --- a/include/net.h +++ b/include/net.h @@ -4,6 +4,8 @@ #include <sys/socket.h> #include <arpa/inet.h> +#include "sheepdog_proto.h" + #define DEFAULT_SOCKET_TIMEOUT 5 /* seconds */ enum conn_state { diff --git a/sheep/Makefile.am b/sheep/Makefile.am index 09cb091..2e2e17a 100644 --- a/sheep/Makefile.am +++ b/sheep/Makefile.am @@ -48,7 +48,7 @@ endif if BUILD_SHEEPFS sheep_SOURCES += sheepfs/core.c sheepfs/cluster.c sheepfs/VDI.c \ - sheepfs/shadow_file.c + sheepfs/shadow_file.c sheepfs/volume.c endif sheep_LDADD = ../lib/libsheepdog.a -lpthread \ diff --git a/sheep/sheep.c b/sheep/sheep.c index 3f7ef3d..b3118f0 100644 --- a/sheep/sheep.c +++ b/sheep/sheep.c @@ -233,7 +233,7 @@ int main(int argc, char **argv) if (ret) exit(1); - ret = sheepfs_init(dir); + ret = sheepfs_init(dir, port); if (ret) exit(1); diff --git a/sheep/sheepfs/core.c b/sheep/sheepfs/core.c index b07200c..a518e83 100644 --- a/sheep/sheepfs/core.c +++ b/sheep/sheepfs/core.c @@ -8,22 +8,24 @@ #include "strbuf.h" #include "logger.h" #include "sheepfs.h" +#include "net.h" #define SH_OP_NAME "user.sheepfs.opcode" #define SH_OP_SIZE sizeof(uint32_t) char sheepfs_shadow[PATH_MAX]; +int sheep_fd; static struct sheepfs_file_operation { int (*read)(const char *path, char *buf, size_t size, off_t); int (*write)(const char *path, const char *buf, size_t size, off_t); size_t (*get_size)(const char *path); } sheepfs_file_ops[] = { - [OP_NULL = { NULL, NULL, NULL }, - [OP_CLUSTER_INFO] = { cluster_info_read, NULL, - cluster_info_get_size }, + [OP_NULL] = { NULL, NULL, NULL }, + [OP_CLUSTER_INFO] = { cluster_info_read, NULL, cluster_info_get_size }, [OP_VDI_LIST] = { vdi_list_read, NULL, vdi_list_get_size }, [OP_VDI_MOUNT] = { NULL, vdi_mount_write, NULL }, + [OP_VOLUME] = { volume_read, volume_write, volume_get_size }, }; int sheepfs_set_op(const char *path, unsigned opcode) @@ -164,6 +166,8 @@ static void sheepfs_main_loop(char *root) } fuse_opt_add_arg(&args, "sheepfs"); /* placeholder for argv[0] */ + fuse_opt_add_arg(&args, "-oallow_root"); + fuse_opt_add_arg(&args, "-obig_writes"); fuse_opt_add_arg(&args, "-ofsname=sheepfs"); fuse_opt_add_arg(&args, root); ret = fuse_main(args.argc, args.argv, &sheepfs_ops, NULL); @@ -177,11 +181,13 @@ static int create_sheepfs_layout(void) return -1; if (create_vdi_layout() < 0) return -1; + if (create_volume_layout() < 0) + return -1; return 0; } -int sheepfs_init(const char *dir) +int sheepfs_init(const char *dir, int port) { struct strbuf path = STRBUF_INIT; pid_t pid; @@ -207,6 +213,11 @@ int sheepfs_init(const char *dir) strbuf_release(&path); return 0; } else /* child */ { + sheep_fd = connect_to("localhost", port); + if (sheep_fd < 0) { + eprintf("failed to connect sheep\n"); + exit(-1); + } sheepfs_main_loop(path.buf); exit(0); } diff --git a/sheep/sheepfs/sheepfs.h b/sheep/sheepfs/sheepfs.h index 85c95e3..f53b74c 100644 --- a/sheep/sheepfs/sheepfs.h +++ b/sheep/sheepfs/sheepfs.h @@ -6,12 +6,14 @@ enum sheepfs_opcode { OP_CLUSTER_INFO, OP_VDI_LIST, OP_VDI_MOUNT, + OP_VOLUME, }; extern char sheepfs_shadow[]; +extern int sheep_fd; extern struct strbuf *sheepfs_run_cmd(const char *command); -extern int sheepfs_init(const char *dir); +extern int sheepfs_init(const char *dir, int port); extern int sheepfs_set_op(const char *path, unsigned opcode); /* shadow_file.c */ @@ -26,6 +28,13 @@ extern int shadow_file_getxattr(const char *path, const char *name, extern int shadow_file_delete(const char *path); extern int shadow_file_exsit(const char *path); +/* volume.c */ +extern int create_volume_layout(void); +extern int volume_read(const char *path, char *buf, size_t size, off_t offset); +extern int volume_write(const char *, const char *buf, size_t size, off_t); +extern size_t volume_get_size(const char *); +extern int volume_create_entry(const char *entry); + /* cluster.c */ extern int cluster_info_read(const char *path, char *buf, size_t size, off_t); extern size_t cluster_info_get_size(const char *path); diff --git a/sheep/sheepfs/volume.c b/sheep/sheepfs/volume.c new file mode 100644 index 0000000..d667952 --- /dev/null +++ b/sheep/sheepfs/volume.c @@ -0,0 +1,284 @@ +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <stdlib.h> +#include <stdio.h> +#include <time.h> +#include <assert.h> + +#include "../sheep_priv.h" +#include "sheepdog_proto.h" +#include "../strbuf.h" +#include "sheepfs.h" +#include "logger.h" +#include "net.h" + +#define PATH_VOLUME "/volume" + +#define SH_VID_NAME "user.volume.vid" +#define SH_VID_SIZE sizeof(uint32_t) + +#define SH_SIZE_NAME "user.volume.size" +#define SH_SIZE_SIZE sizeof(size_t) + +#define VOLUME_READ 0 +#define VOLUME_WRITE 1 + +struct vdi_inode { + struct rb_node rb; + uint32_t vid; + struct sheepdog_inode *inode; +}; + +static struct rb_root vdi_inode_tree = RB_ROOT; + +static struct vdi_inode *vdi_inode_tree_insert(struct vdi_inode *new) +{ + struct rb_node **p = &vdi_inode_tree.rb_node; + struct rb_node *parent = NULL; + struct vdi_inode *entry; + + while (*p) { + parent = *p; + entry = rb_entry(parent, struct vdi_inode, rb); + + if (new->vid < entry->vid) + p = &(*p)->rb_left; + else if (new->vid > entry->vid) + p = &(*p)->rb_right; + else + return entry; /* already has this entry */ + } + rb_link_node(&new->rb, parent, p); + rb_insert_color(&new->rb, &vdi_inode_tree); + + return NULL; /* insert successfully */ +} + +static struct vdi_inode *vdi_inode_tree_search(uint32_t vid) +{ + struct rb_node *n = vdi_inode_tree.rb_node; + struct vdi_inode *t; + + while (n) { + t = rb_entry(n, struct vdi_inode, rb); + + if (vid < t->vid) + n = n->rb_left; + else if (vid > t->vid) + n = n->rb_right; + else + return t; /* found it */ + } + + return NULL; +} + +int create_volume_layout(void) +{ + if (shadow_dir_create(PATH_VOLUME) < 0) + return -1; + return 0; +} + +static int volume_rw_object(char *buf, uint64_t oid, size_t size, + off_t off, int rw) +{ + struct sd_obj_req hdr = { 0 }; + struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr; + int ret; + unsigned wlen = 0, rlen = 0; + int create = 0; + uint32_t vid = oid_to_vid(oid); + struct vdi_inode *vdi = vdi_inode_tree_search(vid); + unsigned long idx = 0; + + if (is_data_obj(oid)) { + idx = data_oid_to_idx(oid); + assert(vdi); + if (!vdi->inode->data_vdi_id[idx]) { + /* if object doesn't exist, we'er done */ + if (rw == VOLUME_READ) + goto done; + create = 1; + } + } + + if (rw == VOLUME_READ) { + rlen = size; + hdr.opcode = SD_OP_READ_OBJ; + } else { + wlen = size; + hdr.opcode = create ? + SD_OP_CREATE_AND_WRITE_OBJ : SD_OP_WRITE_OBJ; + hdr.flags |= SD_FLAG_CMD_WRITE | SD_FLAG_CMD_CACHE; + } + + hdr.oid = oid; + hdr.data_length = size; + hdr.offset = off; + + ret = exec_req(sheep_fd, (struct sd_req *)&hdr, buf, &wlen, &rlen); + + if (ret) { + eprintf("failed to operate object %" PRIx64 "\n", oid); + return -1; + } + + if (rsp->result != SD_RES_SUCCESS) { + eprintf("failed to operate object %" PRIx64 " %s\n", oid, + sd_strerror(rsp->result)); + return -1; + } + + if (create) { + vdi->inode->data_vdi_id[idx] = vid; + /* writeback inode update */ + volume_rw_object((char *)&vid, vid_to_vdi_oid(vid), + sizeof(vid), + SD_INODE_HEADER_SIZE + sizeof(vid) * idx, + VOLUME_WRITE); + } +done: + return size; +} + +/* Do sync read/write */ +static int volume_do_rw(const char *path, char *buf, size_t size, + off_t offset, int rw) +{ + uint32_t vid; + uint64_t oid; + unsigned long idx; + off_t start; + size_t len, ret; + + if (shadow_file_getxattr(path, SH_VID_NAME, &vid, SH_VID_SIZE) < 0) + return -1; + + idx = offset / SD_DATA_OBJ_SIZE; + oid = vid_to_data_oid(vid, idx); + start = offset % SD_DATA_OBJ_SIZE; + + len = SD_DATA_OBJ_SIZE - start; + if (size < len) + len = size; + + do { + ret = volume_rw_object(buf, oid, len, start, rw); + dprintf("%s oid %016"PRIx64", off %ju, len %zu," + "ret %zu, size %zu\n", + rw == VOLUME_READ ? "read" : "write", + oid, start, len, ret, size); + + if (ret != len) + return -1; + + oid++; + size -= len; + start += len; + buf += len; + len = size > SD_DATA_OBJ_SIZE ? SD_DATA_OBJ_SIZE : size; + } while (size > 0); + + return 0; +} + +int volume_read(const char *path, char *buf, size_t size, off_t offset) +{ + + if (volume_do_rw(path, buf, size, offset, VOLUME_READ) < 0) + return -EIO; + + return size; +} + +int volume_write(const char *path, const char *buf, size_t size, off_t offset) +{ + if (volume_do_rw(path, (char *)buf, size, offset, VOLUME_WRITE) < 0) + return -EIO; + + return size; +} + +size_t volume_get_size(const char *path) +{ + size_t size = 0; + + shadow_file_getxattr(path, SH_SIZE_NAME, &size, SH_SIZE_SIZE); + return size; +} + +static int init_vdi_info(const char *entry, uint32_t *vid, size_t *size) +{ + struct strbuf *buf; + void *inode_buf; + struct vdi_inode *inode; + char command[256] = { 0 }; + + sprintf(command, "%s %s\n", "collie vdi list -r", entry); + buf = sheepfs_run_cmd(command); + if (!buf) + return -1; + if (sscanf(buf->buf, "%*s %*s %*d %zu %*s %*s %*s %"PRIx32, + size, vid) < 2) { + dprintf("%m\n"); + return -1; + } + + inode_buf = malloc(SD_INODE_SIZE); + if (!inode_buf) { + dprintf("%m\n"); + return -1; + } + + if (volume_rw_object(inode_buf, vid_to_vdi_oid(*vid), SD_INODE_SIZE, + 0, VOLUME_READ) < 0) { + free(inode_buf); + return -1; + } + + inode = xzalloc(sizeof(*inode)); + inode->vid = *vid; + inode->inode = inode_buf; + if (vdi_inode_tree_insert(inode)) + free(inode); + + strbuf_release(buf); + return 0; +} + +int volume_create_entry(const char *entry) +{ + char path[PATH_MAX], *ch; + uint32_t vid; + size_t size; + + ch = strchr(entry, '\n'); + if (ch != NULL) + *ch = '\0'; + + sprintf(path, "%s/%s", PATH_VOLUME, entry); + if (shadow_file_exsit(path)) + return 0; + + if (shadow_file_create(path) < 0) + return -1; + + if (init_vdi_info(entry, &vid, &size) < 0) + return -1; + if (shadow_file_setxattr(path, SH_VID_NAME, &vid, SH_VID_SIZE) < 0) { + shadow_file_delete(path); + return -1; + } + if (shadow_file_setxattr(path, SH_SIZE_NAME, &size, SH_SIZE_SIZE) < 0) { + shadow_file_delete(path); + return -1; + } + if (sheepfs_set_op(path, OP_VOLUME) < 0) + return -1; + + return 0; +} -- 1.7.8.2 |