[sheepdog] [PATCH v3 2/9] sbd: introduce basic framework for Sheepdog Block Device

Robin Dong robin.k.dong at gmail.com
Wed May 28 11:56:18 CEST 2014


2014-05-26 13:18 GMT+08:00 Liu Yuan <namei.unix at gmail.com>:

> From: Liu Yuan <tailai.ly at taobao.com>
>
> This is similar to Ceph's RBD. The main motivation is to replace complex
> and ineffecient middle ware (such as iscci softwafe) with simple software
> stacks
> to expose sheepdog storage as Linux block device interface.
>
> Usage:
>
> We control the device the same way as RBD.
>
> # associate vdi 'test' to /dev/sbd0
> $ echo 127.0.0.1 7000 test > /sys/bus/sbd/add
>
> # remove the device sbd0
> $ echo 0 > /sys/bus/sbd/remove
>
> For now we don't do actual read/write yet, just a aio framework sketched
> out.
>
> Signed-off-by: Liu Yuan <namei.unix at gmail.com>
> ---
>  sbd/Kbuild               |   5 +
>  sbd/Makefile             |   8 +
>  sbd/sbd.h                | 119 +++++++++++++
>  sbd/sheep.c              | 436
> +++++++++++++++++++++++++++++++++++++++++++++++
>  sbd/sheep_block_device.c | 299 ++++++++++++++++++++++++++++++++
>  5 files changed, 867 insertions(+)
>  create mode 100644 sbd/Kbuild
>  create mode 100644 sbd/Makefile
>  create mode 100644 sbd/sbd.h
>  create mode 100644 sbd/sheep.c
>  create mode 100644 sbd/sheep_block_device.c
>
> diff --git a/sbd/Kbuild b/sbd/Kbuild
> new file mode 100644
> index 0000000..c1f7c07
> --- /dev/null
> +++ b/sbd/Kbuild
> @@ -0,0 +1,5 @@
> +MODULE_NAME=sbd
> +
> +ccflags-y      := -I$(PWD)/../include -DDEBUG
> +obj-m          := $(MODULE_NAME).o
> +$(MODULE_NAME)-y := sheep_block_device.o sheep.o
> diff --git a/sbd/Makefile b/sbd/Makefile
> new file mode 100644
> index 0000000..940c0c3
> --- /dev/null
> +++ b/sbd/Makefile
> @@ -0,0 +1,8 @@
> +KDIR   ?= /lib/modules/`uname -r`/build
> +
> +default:
> +       $(MAKE) -C $(KDIR) M=$$PWD modules
> +clean:
> +       $(MAKE) -C $(KDIR) M=$$PWD clean
> +install:
> +       $(MAKE) -C $(KDIR) M=$$PWD modules_install
> diff --git a/sbd/sbd.h b/sbd/sbd.h
> new file mode 100644
> index 0000000..e938561
> --- /dev/null
> +++ b/sbd/sbd.h
> @@ -0,0 +1,119 @@
> +#ifndef _SBD_H_
> +#define _SBD_H_
> +
> +#include <linux/socket.h>
> +#include <linux/in.h>
> +#include <linux/inet.h>
> +#include <linux/socket.h>
> +#include <linux/net.h>
> +#include <linux/tcp.h>
> +#include <linux/slab.h>
> +#include <linux/kernel.h>
> +#include <linux/device.h>
> +#include <linux/module.h>
> +#include <linux/fs.h>
> +#include <linux/blkdev.h>
> +#include <linux/kthread.h>
> +#include <linux/gfp.h>
> +
> +#include "sheepdog_proto.h"
> +
> +#define DRV_NAME "sbd"
> +#define DEV_NAME_LEN 32
> +#define SBD_MINORS_PER_MAJOR 32
> +#define SECTOR_SIZE 512
> +
> +struct sheep_vdi {
> +       struct sd_inode *inode;
> +       u32 vid;
> +       char ip[16];
> +       unsigned int port;
> +       char name[SD_MAX_VDI_LEN];
> +};
> +
> +struct sbd_device {
> +       struct socket *sock;
> +       int id;         /* blkdev unique id */
> +       atomic_t seq_num;
> +
> +       int major;
> +       int minor;
> +       struct gendisk *disk;
> +       struct request_queue *rq;
> +       spinlock_t queue_lock;   /* request queue lock */
> +
> +       struct sheep_vdi vdi;           /* Associated sheep image */
> +
> +       struct list_head inflight_head;
> +       wait_queue_head_t inflight_wq;
> +       struct list_head blocking_head;
> +
> +       struct list_head list;
> +       struct task_struct *reaper;
> +};
> +
> +struct sheep_aiocb {
> +       struct request *request;
> +       u64 offset;
> +       u64 length;
> +       int ret;
> +       u32 nr_requests;
> +       char *buf;
> +       int buf_iter;
> +       void (*aio_done_func)(struct sheep_aiocb *, bool);
> +};
> +
> +enum sheep_request_type {
> +       SHEEP_READ,
> +       SHEEP_WRITE,
> +       SHEEP_CREATE,
> +};
> +
> +struct sheep_request {
> +       struct list_head list;
> +       struct sheep_aiocb *aiocb;
> +       u64 oid;
> +       u32 seq_num;
> +       int type;
> +       int offset;
> +       int length;
> +       char *buf;
> +};
> +
> +void socket_shutdown(struct socket *sock);
> +int sheep_setup_vdi(struct sbd_device *dev);
> +struct sheep_aiocb *sheep_aiocb_setup(struct request *req);
> +int sheep_aiocb_submit(struct sheep_aiocb *aiocb);
> +int sheep_handle_reply(struct sbd_device *dev);
> +
> +#if defined(CONFIG_DYNAMIC_DEBUG) && defined _DPRINTK_FLAGS_INCL_MODNAME
> +
> +# define _SBD_FLAGS (_DPRINTK_FLAGS_PRINT | _DPRINTK_FLAGS_INCL_MODNAME \
> +       | _DPRINTK_FLAGS_INCL_FUNCNAME | _DPRINTK_FLAGS_INCL_LINENO)
> +
> +# define SBD_DYNAMIC_DEBUG_METADATA(name, fmt)                  \
> +       static struct _ddebug  __aligned(8)                     \
> +        __attribute__((section("__verbose"))) name = {          \
> +               .modname = KBUILD_MODNAME,                      \
> +               .function = __func__,                           \
> +               .filename = __FILE__,                           \
> +               .format = (fmt),                                \
> +               .lineno = __LINE__,                             \
> +               .flags =  _SBD_FLAGS,                           \
> +       }
> +
> +# define sbd_debug(fmt, ...)                            \
> +({                                                      \
> +       SBD_DYNAMIC_DEBUG_METADATA(descriptor, fmt);    \
> +       __dynamic_pr_debug(&descriptor, pr_fmt(fmt),    \
> +                          ##__VA_ARGS__);              \
> +})
> +
> +#else
> +
> +/* If -DDEBUG is not set, pr_debug = no_printk */
> +# define sbd_debug pr_debug
> +
> +#endif /* CONFIG_DYNAMIC_DEBUG */
> +
> +#endif /* _SBD_H_ */
> diff --git a/sbd/sheep.c b/sbd/sheep.c
> new file mode 100644
> index 0000000..33269b4
> --- /dev/null
> +++ b/sbd/sheep.c
> @@ -0,0 +1,436 @@
> +/*
> + * Copyright (C) 2014 Liu Yuan <namei.unix at gmail.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "sbd.h"
> +
> +void socket_shutdown(struct socket *sock)
> +{
> +       if (sock)
> +               kernel_sock_shutdown(sock, SHUT_RDWR);
> +}
> +
> +static struct sbd_device *sheep_aiocb_to_device(struct sheep_aiocb *aiocb)
> +{
> +       return aiocb->request->q->queuedata;
> +}
> +
> +static int socket_create(struct socket **sock, const char *ip_addr, int
> port)
> +{
> +       struct sockaddr_in addr;
> +       mm_segment_t oldmm = get_fs();
> +       struct linger linger_opt = {1, 0};
> +       int ret, nodelay = 1;
> +
> +       ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, sock);
> +       if (ret < 0) {
> +               pr_err("fail to create socket\n");
> +               return ret;
> +       }
> +
> +       set_fs(KERNEL_DS);
> +       ret = sock_setsockopt(*sock, SOL_SOCKET, SO_LINGER,
> +                             (char *)&linger_opt, sizeof(linger_opt));
> +       set_fs(oldmm);
> +       if (ret != 0) {
> +               pr_err("Can't set SO_LINGER: %d\n", ret);
> +               goto shutdown;
> +       }
> +
> +       set_fs(KERNEL_DS);
> +       ret = sock_setsockopt(*sock, SOL_TCP, TCP_NODELAY,
> +                             (char *)&nodelay, sizeof(nodelay));
> +       set_fs(oldmm);
> +       if (ret != 0) {
> +               pr_err("Can't set SO_LINGER: %d\n", ret);
> +               goto shutdown;
> +       }
> +
> +       memset(&addr, 0, sizeof(addr));
> +       addr.sin_family = AF_INET;
> +       addr.sin_port = htons(port);
> +       addr.sin_addr.s_addr = in_aton(ip_addr);
> +       ret = (*sock)->ops->connect(*sock, (struct sockaddr *)&addr,
> +                                   sizeof(addr), 0);
> +       if (ret < 0) {
> +               pr_err("failed connect to %s:%d\n", ip_addr, port);
> +               goto shutdown;
> +       }
> +
> +       return ret;
> +shutdown:
> +       socket_shutdown(*sock);
> +       *sock = NULL;
> +       return ret;
> +}
> +
> +static int socket_xmit(struct socket *sock, void *buf, int size, bool
> send,
> +                      int msg_flags)
> +{
> +       int result;
> +       struct msghdr msg;
> +       struct kvec iov;
> +       sigset_t blocked, oldset;
> +
> +       if (unlikely(!sock))
> +               return -EINVAL;
> +
> +       /* Don't allow signals to interrupt the transmission */
> +       siginitsetinv(&blocked, 0);
> +       sigprocmask(SIG_SETMASK, &blocked, &oldset);
> +
> +       do {
> +               sock->sk->sk_allocation = GFP_NOIO;
> +               iov.iov_base = buf;
> +               iov.iov_len = size;
> +               msg.msg_name = NULL;
> +               msg.msg_namelen = 0;
> +               msg.msg_control = NULL;
> +               msg.msg_controllen = 0;
> +               msg.msg_flags = msg_flags | MSG_NOSIGNAL;
> +
> +               if (send)
> +                       result = kernel_sendmsg(sock, &msg, &iov, 1, size);
> +               else
> +                       result = kernel_recvmsg(sock, &msg, &iov, 1, size,
> +                                               msg.msg_flags);
> +
> +               if (result <= 0) {
> +                       if (result == 0)
> +                               result = -EPIPE; /* short read */
> +                       break;
> +               }
> +               size -= result;
> +               buf += result;
> +       } while (size > 0);
> +
> +       sigprocmask(SIG_SETMASK, &oldset, NULL);
> +
> +       return result;
> +}
> +
> +static int socket_read(struct socket *sock, char *buf, int length)
> +{
> +       return socket_xmit(sock, buf, length, false, 0);
> +}
>
+
> +static int socket_write(struct socket *sock, void *buf, int len)
> +{
> +       return socket_xmit(sock, buf, len, true, 0);
> +}
> +
> +static int sheep_submit_sdreq(struct socket *sock, struct sd_req *hdr,
> +                             void *data, unsigned int wlen)
> +{
> +       int ret = socket_write(sock, hdr, sizeof(*hdr));
> +
> +       if (ret < 0)
> +               return ret;
> +
> +       if (wlen)
> +               return socket_write(sock, data, wlen);
> +       return 0;
> +}
> +
> +/* Run the request synchronously */
> +static int sheep_run_sdreq(struct socket *sock, struct sd_req *hdr,
> +                          void *data)
> +{
> +       struct sd_rsp *rsp = (struct sd_rsp *)hdr;
> +       unsigned int wlen, rlen;
> +       int ret;
> +
> +       if (hdr->flags & SD_FLAG_CMD_WRITE) {
> +               wlen = hdr->data_length;
> +               rlen = 0;
> +       } else {
> +               wlen = 0;
> +               rlen = hdr->data_length;
> +       }
> +
> +       ret = sheep_submit_sdreq(sock, hdr, data, wlen);
> +       if (ret < 0) {
> +               pr_err("failed to sbumit the request\n");
> +               return ret;
> +       }
> +
> +       ret = socket_read(sock, (char *)rsp, sizeof(*rsp));
> +       if (ret < 0) {
> +               pr_err("failed to read a response hdr\n");
> +               return ret;
> +       }
> +
> +       if (rlen > rsp->data_length)
> +               rlen = rsp->data_length;
> +
> +       if (rlen) {
> +               ret = socket_read(sock, data, rlen);
> +               if (ret < 0) {
> +                       pr_err("failed to read the response data\n");
> +                       return ret;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
> +static int lookup_sheep_vdi(struct sbd_device *dev)
> +{
> +       struct sd_req hdr = {};
> +       struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> +       int ret;
> +
> +       hdr.opcode = SD_OP_LOCK_VDI;
> +       hdr.data_length = SD_MAX_VDI_LEN;
> +       hdr.flags = SD_FLAG_CMD_WRITE;
> +       ret = sheep_run_sdreq(dev->sock, &hdr, dev->vdi.name);
> +       if (ret < 0)
> +               return ret;
> +
> +       /* XXX switch case */
> +       if (rsp->result != SD_RES_SUCCESS) {
> +               sbd_debug("Cannot get VDI info for %s\n", dev->vdi.name);
> +               return -EIO;
> +       }
> +
> +       dev->vdi.vid = rsp->vdi.vdi_id;
> +
> +       return 0;
> +}
> +
> +int sheep_setup_vdi(struct sbd_device *dev)
> +{
> +       struct sd_req hdr = {};
> +       struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> +       struct sd_inode *inode;
> +       int ret;
> +
> +       inode = vmalloc(sizeof(*inode));
> +       if (!inode)
> +               return -ENOMEM;
> +       memset(inode, 0 , sizeof(*inode));
> +
> +       ret = socket_create(&dev->sock, dev->vdi.ip, dev->vdi.port);
> +       if (ret < 0)
> +               goto out;
> +
> +       ret = lookup_sheep_vdi(dev);
> +       if (ret < 0)
> +               goto out_release;
> +
> +       hdr.opcode = SD_OP_READ_OBJ;
> +       hdr.data_length = SD_INODE_SIZE;
> +       hdr.obj.oid = vid_to_vdi_oid(dev->vdi.vid);
> +       hdr.obj.offset = 0;
> +       ret = sheep_run_sdreq(dev->sock, &hdr, inode);
> +       if (ret < 0)
> +               goto out_release;
> +
> +       /* XXX switch case */
> +       if (rsp->result != SD_RES_SUCCESS) {
> +               ret = -EIO;
> +               goto out_release;
> +       }
> +
> +       dev->vdi.inode = inode;
> +       pr_info("%s: Associated to %s\n", DRV_NAME, inode->name);
> +       return 0;
> +out_release:
> +       socket_shutdown(dev->sock);
> +       dev->sock = NULL;
> +out:
> +       vfree(inode);
> +       return ret;
> +}
> +
> +static void submit_sheep_request(struct sheep_request *req)
> +{
> +}
>
Maybe we could remove this unused function ?


> +
> +static inline void free_sheep_aiocb(struct sheep_aiocb *aiocb)
> +{
> +       kfree(aiocb->buf);
> +       kfree(aiocb);
> +}
> +
> +static void aio_write_done(struct sheep_aiocb *aiocb, bool locked)
> +{
> +       sbd_debug("off %llu, len %llu\n", aiocb->offset, aiocb->length);
> +
> +       if (locked)
> +               __blk_end_request_all(aiocb->request, aiocb->ret);
> +       else
> +               blk_end_request_all(aiocb->request, aiocb->ret);
> +       free_sheep_aiocb(aiocb);
> +}
> +
> +static void aio_read_done(struct sheep_aiocb *aiocb, bool locked)
> +{
> +       sbd_debug("off %llu, len %llu\n", aiocb->offset, aiocb->length);
> +
> +       if (locked)
> +               __blk_end_request_all(aiocb->request, aiocb->ret);
> +       else
> +               blk_end_request_all(aiocb->request, aiocb->ret);
> +       free_sheep_aiocb(aiocb);
> +}
> +
> +struct sheep_aiocb *sheep_aiocb_setup(struct request *req)
> +{
> +       struct sheep_aiocb *aiocb = kmalloc(sizeof(*aiocb), GFP_KERNEL);
> +       struct req_iterator iter;
> +       struct bio_vec *bvec;
> +       int len = 0;
> +
> +       if (!aiocb)
> +               return ERR_PTR(-ENOMEM);
> +
> +       aiocb->offset = blk_rq_pos(req) * SECTOR_SIZE;
> +       aiocb->length = blk_rq_bytes(req);
> +       aiocb->nr_requests = 0;
> +       aiocb->ret = 0;
> +       aiocb->buf_iter = 0;
> +       aiocb->request = req;
> +       aiocb->buf = kzalloc(aiocb->length, GFP_KERNEL);
> +
> +       switch (rq_data_dir(req)) {
> +       case WRITE:
> +               rq_for_each_segment(bvec, req, iter) {
> +                       unsigned long flags;
> +                       void *addr = bvec_kmap_irq(bvec, &flags);
> +
> +                       memcpy(aiocb->buf + len, addr, bvec->bv_len);
> +                       flush_dcache_page(bvec->bv_page);
> +                       bvec_kunmap_irq(addr, &flags);
> +
> +                       len += bvec->bv_len;
> +               }
> +               aiocb->aio_done_func = aio_write_done;
> +               break;
> +       case READ:
> +               aiocb->aio_done_func = aio_read_done;
> +               break;
> +       default:
> +               /* impossible case */
> +               WARN_ON(1);
> +               free_sheep_aiocb(aiocb);
> +               return ERR_PTR(-EINVAL);
> +       }
> +
> +       return aiocb;
> +}
> +
> +static struct sheep_request *alloc_sheep_request(struct sheep_aiocb
> *aiocb,
> +                                                u64 oid, int len,
> +                                                int offset)
> +{
> +       struct sheep_request *req = kmalloc(sizeof(*req), GFP_KERNEL);
> +       struct sbd_device *dev = sheep_aiocb_to_device(aiocb);
> +
> +       if (!req)
> +               return ERR_PTR(-ENOMEM);
> +
> +       req->offset = offset;
> +       req->length = len;
> +       req->oid = oid;
> +       req->aiocb = aiocb;
> +       req->buf = aiocb->buf + aiocb->buf_iter;
> +       req->seq_num = atomic_inc_return(&dev->seq_num);
> +
> +       switch (rq_data_dir(aiocb->request)) {
> +       case WRITE:
> +               req->type = SHEEP_WRITE;
> +               break;
> +       case READ:
> +               req->type = SHEEP_READ;
> +               break;
> +       default:
> +               /* impossible case */
> +               WARN_ON(1);
> +               kfree(req);
> +               return ERR_PTR(-EINVAL);
> +       }
> +
> +       aiocb->buf_iter += len;
> +       aiocb->nr_requests++;
> +
> +       return req;
> +}
> +
> +static void end_sheep_request(struct sheep_request *req, bool
> queue_locked)
> +{
> +       struct sheep_aiocb *aiocb = req->aiocb;
> +
> +       if (--aiocb->nr_requests == 0)
> +               aiocb->aio_done_func(aiocb, queue_locked);
> +
> +       sbd_debug("end oid %llx off %d, len %d, seq %u\n", req->oid,
> +                 req->offset, req->length, req->seq_num);
> +       kfree(req);
> +}
> +
> +int sheep_aiocb_submit(struct sheep_aiocb *aiocb)
> +{
> +       struct sbd_device *dev = sheep_aiocb_to_device(aiocb);
> +       u64 offset = aiocb->offset;
> +       u64 total = aiocb->length;
> +       u64 start = offset % SD_DATA_OBJ_SIZE;
> +       u32 vid = dev->vdi.vid;
> +       u64 oid = vid_to_data_oid(vid, offset / SD_DATA_OBJ_SIZE);
> +       u32 idx = data_oid_to_idx(oid);
> +       int len = SD_DATA_OBJ_SIZE - start;
> +
> +       if (total < len)
> +               len = total;
> +
> +       sbd_debug("submit oid %llx off %llu, len %llu\n", oid, offset,
> total);
> +       /*
> +        * Make sure we don't free the aiocb before we are done with all
> +        * requests.This additional reference is dropped at the end of this
> +        * function.
> +        */
> +       aiocb->nr_requests++;
> +
> +       do {
> +               struct sheep_request *req;
> +
> +               req = alloc_sheep_request(aiocb, oid, len, start);
> +               if (IS_ERR(req))
> +                       return PTR_ERR(req);
> +
> +               if (likely(dev->vdi.inode->data_vdi_id[idx]))
> +                       goto submit;
> +
> +               /* Object is not created yet... */
> +               switch (req->type) {
> +               case SHEEP_WRITE:
> +               case SHEEP_READ:
> +                       end_sheep_request(req, true);
> +                       goto done;
> +               }
> +submit:
> +               submit_sheep_request(req);
> +done:
> +               oid++;
> +               total -= len;
> +               start = (start + len) % SD_DATA_OBJ_SIZE;
> +               len = total > SD_DATA_OBJ_SIZE ? SD_DATA_OBJ_SIZE : total;
> +       } while (total > 0);
> +
> +       if (--aiocb->nr_requests == 0)
> +               aiocb->aio_done_func(aiocb, true);
> +
> +       return 0;
> +}
> +
> +int sheep_handle_reply(struct sbd_device *dev)
> +{
> +       return 0;
> +}
> diff --git a/sbd/sheep_block_device.c b/sbd/sheep_block_device.c
> new file mode 100644
> index 0000000..e7331dc
> --- /dev/null
> +++ b/sbd/sheep_block_device.c
> @@ -0,0 +1,299 @@
> +/*
> + * Copyright (C) 2014 Liu Yuan <namei.unix at gmail.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +/*
> + * SBD - Sheepdog Block Device
> + *
> + * This file implements the glue functions to export sheep vdi as Linux
> block
> + * device.
> + *
> + */
> +
> +#include "sbd.h"
> +
> +static LIST_HEAD(sbd_dev_list);
> +
> +static const struct block_device_operations sbd_bd_ops = {
> +       .owner          = THIS_MODULE,
> +};
> +
> +static int sbd_submit_request(struct request *req)
> +{
> +       struct sheep_aiocb *aiocb = sheep_aiocb_setup(req);
> +
> +       if (IS_ERR(aiocb))
> +               return PTR_ERR(aiocb);
> +
> +       return sheep_aiocb_submit(aiocb);
> +}
> +
> +static void sbd_request_submiter(struct request_queue *q)
> +{
> +       struct request *req;
> +
> +       while ((req = blk_fetch_request(q)) != NULL) {
> +               int ret;
> +
> +               /* filter out block requests we don't understand */
> +               if (req->cmd_type != REQ_TYPE_FS) {
> +                       __blk_end_request_all(req, 0);
> +                       continue;
> +               }
> +               ret = sbd_submit_request(req);
> +               if (ret < 0)
> +                       break;
> +       }
> +}
> +
> +static int sbd_add_disk(struct sbd_device *dev)
> +{
> +       struct gendisk *disk;
> +       struct request_queue *rq;
> +
> +       disk = alloc_disk(SBD_MINORS_PER_MAJOR);
> +       if (!disk)
> +               return -ENOMEM;
> +
> +       snprintf(disk->disk_name, DEV_NAME_LEN, DRV_NAME "%d", dev->id);
> +       disk->major = dev->major;
> +       disk->first_minor = 0;
> +       disk->fops = &sbd_bd_ops;
> +       disk->private_data = dev;
> +
> +       rq = blk_init_queue(sbd_request_submiter, &dev->queue_lock);
> +       if (!rq) {
> +               put_disk(disk);
> +               return -ENOMEM;
> +       }
> +
> +       blk_queue_max_hw_sectors(rq, SD_DATA_OBJ_SIZE / SECTOR_SIZE);
> +       blk_queue_max_segments(rq, SD_DATA_OBJ_SIZE / SECTOR_SIZE);
> +       blk_queue_max_segment_size(rq, SD_DATA_OBJ_SIZE);
> +       blk_queue_io_opt(rq, SD_DATA_OBJ_SIZE);
> +
> +       disk->queue = rq;
> +       rq->queuedata = dev;
> +       dev->disk = disk;
> +       dev->rq = rq;
> +
> +       set_capacity(disk, dev->vdi.inode->vdi_size / SECTOR_SIZE);
> +       add_disk(disk);
> +
> +       return 0;
> +}
> +
> +static int sbd_request_reaper(void *data)
> +{
> +       struct sbd_device *dev = data;
> +
> +       while (!kthread_should_stop() || !list_empty(&dev->inflight_head))
> {
> +               wait_event_interruptible(dev->inflight_wq,
> +                                        kthread_should_stop() ||
> +                                        !list_empty(&dev->inflight_head));
> +
> +               if (list_empty(&dev->inflight_head))
> +                       continue;
> +
> +               sheep_handle_reply(dev);
> +       }
> +       return 0;
> +}
> +
> +static inline void free_sbd_device(struct sbd_device *dev)
> +{
> +       socket_shutdown(dev->sock);
> +       vfree(dev->vdi.inode);
> +       kfree(dev);
> +}
> +
> +static ssize_t sbd_add(struct bus_type *bus, const char *buf,
> +                      size_t count)
> +{
> +       struct sbd_device *dev, *tmp;
> +       ssize_t ret;
> +       int new_id = 0;
> +       char name[DEV_NAME_LEN];
> +
> +       if (!try_module_get(THIS_MODULE))
> +               return -ENODEV;
> +
> +       dev = kzalloc(sizeof(*dev), GFP_KERNEL);
> +       if (!dev) {
> +               ret = -ENOMEM;
> +               goto err_put;
> +       }
> +
> +       if (sscanf(buf, "%s %d %s", dev->vdi.ip, &dev->vdi.port,
> +                  dev->vdi.name) != 3) {
> +               ret = -EINVAL;
> +               goto err_put;
> +       }
> +
> +       spin_lock_init(&dev->queue_lock);
> +       INIT_LIST_HEAD(&dev->inflight_head);
> +       INIT_LIST_HEAD(&dev->blocking_head);
> +       init_waitqueue_head(&dev->inflight_wq);
> +
> +       list_for_each_entry(tmp, &sbd_dev_list, list) {
> +               if (tmp->id > new_id)
> +                       new_id = tmp->id + 1;
> +       }
> +
> +       ret = sheep_setup_vdi(dev);
> +       if (ret < 0)
> +               goto err_free_dev;
> +
> +       dev->id = new_id;
> +       snprintf(name, DEV_NAME_LEN, DRV_NAME "%d", dev->id);
> +       ret = register_blkdev(0, name);
> +       if (ret < 0)
> +               goto err_free_dev;
> +       dev->major = ret;
> +       dev->minor = 0;
> +       dev->reaper = kthread_run(sbd_request_reaper, dev, "sbd_reaper");
> +
> +       ret = sbd_add_disk(dev);
> +       if (ret < 0)
> +               goto err_unreg_blkdev;
> +
> +       list_add_tail(&dev->list, &sbd_dev_list);
> +
> +       return count;
> +err_unreg_blkdev:
> +       unregister_blkdev(dev->major, name);
> +err_free_dev:
> +       free_sbd_device(dev);
> +err_put:
> +       module_put(THIS_MODULE);
> +       pr_err("%s: error adding device %s", DRV_NAME, buf);
> +       return ret;
> +}
> +
> +static void sbd_del_disk(struct sbd_device *dev)
> +{
> +       struct gendisk *disk = dev->disk;
> +
> +       if (!disk)
> +               return;
> +
> +       if (disk->flags & GENHD_FL_UP)
> +               del_gendisk(disk);
> +       if (disk->queue)
> +               blk_cleanup_queue(disk->queue);
> +       put_disk(disk);
> +}
> +
> +static ssize_t sbd_remove(struct bus_type *bus, const char *buf,
> +                         size_t count)
> +{
> +
> +       struct list_head *tmp, *n;
> +       struct sbd_device *dev;
> +       unsigned long ul;
> +       int target_id, ret;
> +
> +       ret = strict_strtoul(buf, 10, &ul);
> +       if (ret)
> +               return ret;
> +
> +       /* convert to int; abort if we lost anything in the conversion */
> +       target_id = (int)ul;
> +       if (target_id != ul)
> +               return -EINVAL;
> +
> +       list_for_each_safe(tmp, n, &sbd_dev_list) {
> +               dev = list_entry(tmp, struct sbd_device, list);
> +               if (dev->id == target_id) {
> +                       list_del(&dev->list);
> +                       break;
> +               }
> +               dev = NULL;
> +       }
> +
> +       if (!dev)
> +               return -ENOENT;
> +
> +       kthread_stop(dev->reaper);
> +       wake_up_interruptible(&dev->inflight_wq);
> +
> +       sbd_del_disk(dev);
> +       free_sbd_device(dev);
> +       module_put(THIS_MODULE);
> +
> +       return count;
> +}
> +
> +static struct bus_attribute sbd_bus_attrs[] = {
> +       __ATTR(add, S_IWUSR, NULL, sbd_add),
> +       __ATTR(remove, S_IWUSR, NULL, sbd_remove),
> +       __ATTR_NULL
> +};
> +
> +static struct bus_type sbd_bus_type = {
> +       .name           = "sbd",
> +       .bus_attrs      = sbd_bus_attrs,
> +};
> +
> +static void sbd_root_dev_release(struct device *dev)
> +{
> +}
> +
> +static struct device sbd_root_dev = {
> +       .init_name      = "sbd",
> +       .release        = sbd_root_dev_release,
> +};
> +
> +/* Create control files in /sys/bus/sbd/... */
> +static int sbd_sysfs_init(void)
> +{
> +       int ret;
> +
> +       ret = device_register(&sbd_root_dev);
> +       if (ret < 0)
> +               return ret;
> +
> +       ret = bus_register(&sbd_bus_type);
> +       if (ret < 0)
> +               device_unregister(&sbd_root_dev);
> +
> +       return ret;
> +}
> +
> +static void sbd_sysfs_cleanup(void)
> +{
> +       bus_unregister(&sbd_bus_type);
> +       device_unregister(&sbd_root_dev);
> +}
> +
> +int __init sbd_init(void)
> +{
> +       int ret;
> +
> +       ret = sbd_sysfs_init();
> +       if (ret < 0)
> +               return ret;
> +
> +       pr_info("%s: Sheepdog block device loaded\n", DRV_NAME);
> +       return 0;
> +}
> +
> +void __exit sbd_exit(void)
> +{
> +       sbd_sysfs_cleanup();
> +       pr_info("%s: Sheepdog block device unloaded\n", DRV_NAME);
> +}
> +
> +module_init(sbd_init);
> +module_exit(sbd_exit);
> +
> +MODULE_AUTHOR("Liu Yuan <namei.unix at gmail.com>");
> +MODULE_DESCRIPTION("Sheepdog Block Device");
> +MODULE_LICENSE("GPL");
> --
> 1.8.1.2
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog
>



-- 
--
Best Regard
Robin Dong
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.wpkg.org/pipermail/sheepdog/attachments/20140528/6d802c33/attachment-0004.html>


More information about the sheepdog mailing list