[sheepdog] [PATCH v3 2/9] sbd: introduce basic framework for Sheepdog Block Device
Robin Dong
robin.k.dong at gmail.com
Wed May 28 11:56:18 CEST 2014
2014-05-26 13:18 GMT+08:00 Liu Yuan <namei.unix at gmail.com>:
> From: Liu Yuan <tailai.ly at taobao.com>
>
> This is similar to Ceph's RBD. The main motivation is to replace complex
> and ineffecient middle ware (such as iscci softwafe) with simple software
> stacks
> to expose sheepdog storage as Linux block device interface.
>
> Usage:
>
> We control the device the same way as RBD.
>
> # associate vdi 'test' to /dev/sbd0
> $ echo 127.0.0.1 7000 test > /sys/bus/sbd/add
>
> # remove the device sbd0
> $ echo 0 > /sys/bus/sbd/remove
>
> For now we don't do actual read/write yet, just a aio framework sketched
> out.
>
> Signed-off-by: Liu Yuan <namei.unix at gmail.com>
> ---
> sbd/Kbuild | 5 +
> sbd/Makefile | 8 +
> sbd/sbd.h | 119 +++++++++++++
> sbd/sheep.c | 436
> +++++++++++++++++++++++++++++++++++++++++++++++
> sbd/sheep_block_device.c | 299 ++++++++++++++++++++++++++++++++
> 5 files changed, 867 insertions(+)
> create mode 100644 sbd/Kbuild
> create mode 100644 sbd/Makefile
> create mode 100644 sbd/sbd.h
> create mode 100644 sbd/sheep.c
> create mode 100644 sbd/sheep_block_device.c
>
> diff --git a/sbd/Kbuild b/sbd/Kbuild
> new file mode 100644
> index 0000000..c1f7c07
> --- /dev/null
> +++ b/sbd/Kbuild
> @@ -0,0 +1,5 @@
> +MODULE_NAME=sbd
> +
> +ccflags-y := -I$(PWD)/../include -DDEBUG
> +obj-m := $(MODULE_NAME).o
> +$(MODULE_NAME)-y := sheep_block_device.o sheep.o
> diff --git a/sbd/Makefile b/sbd/Makefile
> new file mode 100644
> index 0000000..940c0c3
> --- /dev/null
> +++ b/sbd/Makefile
> @@ -0,0 +1,8 @@
> +KDIR ?= /lib/modules/`uname -r`/build
> +
> +default:
> + $(MAKE) -C $(KDIR) M=$$PWD modules
> +clean:
> + $(MAKE) -C $(KDIR) M=$$PWD clean
> +install:
> + $(MAKE) -C $(KDIR) M=$$PWD modules_install
> diff --git a/sbd/sbd.h b/sbd/sbd.h
> new file mode 100644
> index 0000000..e938561
> --- /dev/null
> +++ b/sbd/sbd.h
> @@ -0,0 +1,119 @@
> +#ifndef _SBD_H_
> +#define _SBD_H_
> +
> +#include <linux/socket.h>
> +#include <linux/in.h>
> +#include <linux/inet.h>
> +#include <linux/socket.h>
> +#include <linux/net.h>
> +#include <linux/tcp.h>
> +#include <linux/slab.h>
> +#include <linux/kernel.h>
> +#include <linux/device.h>
> +#include <linux/module.h>
> +#include <linux/fs.h>
> +#include <linux/blkdev.h>
> +#include <linux/kthread.h>
> +#include <linux/gfp.h>
> +
> +#include "sheepdog_proto.h"
> +
> +#define DRV_NAME "sbd"
> +#define DEV_NAME_LEN 32
> +#define SBD_MINORS_PER_MAJOR 32
> +#define SECTOR_SIZE 512
> +
> +struct sheep_vdi {
> + struct sd_inode *inode;
> + u32 vid;
> + char ip[16];
> + unsigned int port;
> + char name[SD_MAX_VDI_LEN];
> +};
> +
> +struct sbd_device {
> + struct socket *sock;
> + int id; /* blkdev unique id */
> + atomic_t seq_num;
> +
> + int major;
> + int minor;
> + struct gendisk *disk;
> + struct request_queue *rq;
> + spinlock_t queue_lock; /* request queue lock */
> +
> + struct sheep_vdi vdi; /* Associated sheep image */
> +
> + struct list_head inflight_head;
> + wait_queue_head_t inflight_wq;
> + struct list_head blocking_head;
> +
> + struct list_head list;
> + struct task_struct *reaper;
> +};
> +
> +struct sheep_aiocb {
> + struct request *request;
> + u64 offset;
> + u64 length;
> + int ret;
> + u32 nr_requests;
> + char *buf;
> + int buf_iter;
> + void (*aio_done_func)(struct sheep_aiocb *, bool);
> +};
> +
> +enum sheep_request_type {
> + SHEEP_READ,
> + SHEEP_WRITE,
> + SHEEP_CREATE,
> +};
> +
> +struct sheep_request {
> + struct list_head list;
> + struct sheep_aiocb *aiocb;
> + u64 oid;
> + u32 seq_num;
> + int type;
> + int offset;
> + int length;
> + char *buf;
> +};
> +
> +void socket_shutdown(struct socket *sock);
> +int sheep_setup_vdi(struct sbd_device *dev);
> +struct sheep_aiocb *sheep_aiocb_setup(struct request *req);
> +int sheep_aiocb_submit(struct sheep_aiocb *aiocb);
> +int sheep_handle_reply(struct sbd_device *dev);
> +
> +#if defined(CONFIG_DYNAMIC_DEBUG) && defined _DPRINTK_FLAGS_INCL_MODNAME
> +
> +# define _SBD_FLAGS (_DPRINTK_FLAGS_PRINT | _DPRINTK_FLAGS_INCL_MODNAME \
> + | _DPRINTK_FLAGS_INCL_FUNCNAME | _DPRINTK_FLAGS_INCL_LINENO)
> +
> +# define SBD_DYNAMIC_DEBUG_METADATA(name, fmt) \
> + static struct _ddebug __aligned(8) \
> + __attribute__((section("__verbose"))) name = { \
> + .modname = KBUILD_MODNAME, \
> + .function = __func__, \
> + .filename = __FILE__, \
> + .format = (fmt), \
> + .lineno = __LINE__, \
> + .flags = _SBD_FLAGS, \
> + }
> +
> +# define sbd_debug(fmt, ...) \
> +({ \
> + SBD_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
> + __dynamic_pr_debug(&descriptor, pr_fmt(fmt), \
> + ##__VA_ARGS__); \
> +})
> +
> +#else
> +
> +/* If -DDEBUG is not set, pr_debug = no_printk */
> +# define sbd_debug pr_debug
> +
> +#endif /* CONFIG_DYNAMIC_DEBUG */
> +
> +#endif /* _SBD_H_ */
> diff --git a/sbd/sheep.c b/sbd/sheep.c
> new file mode 100644
> index 0000000..33269b4
> --- /dev/null
> +++ b/sbd/sheep.c
> @@ -0,0 +1,436 @@
> +/*
> + * Copyright (C) 2014 Liu Yuan <namei.unix at gmail.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "sbd.h"
> +
> +void socket_shutdown(struct socket *sock)
> +{
> + if (sock)
> + kernel_sock_shutdown(sock, SHUT_RDWR);
> +}
> +
> +static struct sbd_device *sheep_aiocb_to_device(struct sheep_aiocb *aiocb)
> +{
> + return aiocb->request->q->queuedata;
> +}
> +
> +static int socket_create(struct socket **sock, const char *ip_addr, int
> port)
> +{
> + struct sockaddr_in addr;
> + mm_segment_t oldmm = get_fs();
> + struct linger linger_opt = {1, 0};
> + int ret, nodelay = 1;
> +
> + ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, sock);
> + if (ret < 0) {
> + pr_err("fail to create socket\n");
> + return ret;
> + }
> +
> + set_fs(KERNEL_DS);
> + ret = sock_setsockopt(*sock, SOL_SOCKET, SO_LINGER,
> + (char *)&linger_opt, sizeof(linger_opt));
> + set_fs(oldmm);
> + if (ret != 0) {
> + pr_err("Can't set SO_LINGER: %d\n", ret);
> + goto shutdown;
> + }
> +
> + set_fs(KERNEL_DS);
> + ret = sock_setsockopt(*sock, SOL_TCP, TCP_NODELAY,
> + (char *)&nodelay, sizeof(nodelay));
> + set_fs(oldmm);
> + if (ret != 0) {
> + pr_err("Can't set SO_LINGER: %d\n", ret);
> + goto shutdown;
> + }
> +
> + memset(&addr, 0, sizeof(addr));
> + addr.sin_family = AF_INET;
> + addr.sin_port = htons(port);
> + addr.sin_addr.s_addr = in_aton(ip_addr);
> + ret = (*sock)->ops->connect(*sock, (struct sockaddr *)&addr,
> + sizeof(addr), 0);
> + if (ret < 0) {
> + pr_err("failed connect to %s:%d\n", ip_addr, port);
> + goto shutdown;
> + }
> +
> + return ret;
> +shutdown:
> + socket_shutdown(*sock);
> + *sock = NULL;
> + return ret;
> +}
> +
> +static int socket_xmit(struct socket *sock, void *buf, int size, bool
> send,
> + int msg_flags)
> +{
> + int result;
> + struct msghdr msg;
> + struct kvec iov;
> + sigset_t blocked, oldset;
> +
> + if (unlikely(!sock))
> + return -EINVAL;
> +
> + /* Don't allow signals to interrupt the transmission */
> + siginitsetinv(&blocked, 0);
> + sigprocmask(SIG_SETMASK, &blocked, &oldset);
> +
> + do {
> + sock->sk->sk_allocation = GFP_NOIO;
> + iov.iov_base = buf;
> + iov.iov_len = size;
> + msg.msg_name = NULL;
> + msg.msg_namelen = 0;
> + msg.msg_control = NULL;
> + msg.msg_controllen = 0;
> + msg.msg_flags = msg_flags | MSG_NOSIGNAL;
> +
> + if (send)
> + result = kernel_sendmsg(sock, &msg, &iov, 1, size);
> + else
> + result = kernel_recvmsg(sock, &msg, &iov, 1, size,
> + msg.msg_flags);
> +
> + if (result <= 0) {
> + if (result == 0)
> + result = -EPIPE; /* short read */
> + break;
> + }
> + size -= result;
> + buf += result;
> + } while (size > 0);
> +
> + sigprocmask(SIG_SETMASK, &oldset, NULL);
> +
> + return result;
> +}
> +
> +static int socket_read(struct socket *sock, char *buf, int length)
> +{
> + return socket_xmit(sock, buf, length, false, 0);
> +}
>
+
> +static int socket_write(struct socket *sock, void *buf, int len)
> +{
> + return socket_xmit(sock, buf, len, true, 0);
> +}
> +
> +static int sheep_submit_sdreq(struct socket *sock, struct sd_req *hdr,
> + void *data, unsigned int wlen)
> +{
> + int ret = socket_write(sock, hdr, sizeof(*hdr));
> +
> + if (ret < 0)
> + return ret;
> +
> + if (wlen)
> + return socket_write(sock, data, wlen);
> + return 0;
> +}
> +
> +/* Run the request synchronously */
> +static int sheep_run_sdreq(struct socket *sock, struct sd_req *hdr,
> + void *data)
> +{
> + struct sd_rsp *rsp = (struct sd_rsp *)hdr;
> + unsigned int wlen, rlen;
> + int ret;
> +
> + if (hdr->flags & SD_FLAG_CMD_WRITE) {
> + wlen = hdr->data_length;
> + rlen = 0;
> + } else {
> + wlen = 0;
> + rlen = hdr->data_length;
> + }
> +
> + ret = sheep_submit_sdreq(sock, hdr, data, wlen);
> + if (ret < 0) {
> + pr_err("failed to sbumit the request\n");
> + return ret;
> + }
> +
> + ret = socket_read(sock, (char *)rsp, sizeof(*rsp));
> + if (ret < 0) {
> + pr_err("failed to read a response hdr\n");
> + return ret;
> + }
> +
> + if (rlen > rsp->data_length)
> + rlen = rsp->data_length;
> +
> + if (rlen) {
> + ret = socket_read(sock, data, rlen);
> + if (ret < 0) {
> + pr_err("failed to read the response data\n");
> + return ret;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int lookup_sheep_vdi(struct sbd_device *dev)
> +{
> + struct sd_req hdr = {};
> + struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> + int ret;
> +
> + hdr.opcode = SD_OP_LOCK_VDI;
> + hdr.data_length = SD_MAX_VDI_LEN;
> + hdr.flags = SD_FLAG_CMD_WRITE;
> + ret = sheep_run_sdreq(dev->sock, &hdr, dev->vdi.name);
> + if (ret < 0)
> + return ret;
> +
> + /* XXX switch case */
> + if (rsp->result != SD_RES_SUCCESS) {
> + sbd_debug("Cannot get VDI info for %s\n", dev->vdi.name);
> + return -EIO;
> + }
> +
> + dev->vdi.vid = rsp->vdi.vdi_id;
> +
> + return 0;
> +}
> +
> +int sheep_setup_vdi(struct sbd_device *dev)
> +{
> + struct sd_req hdr = {};
> + struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> + struct sd_inode *inode;
> + int ret;
> +
> + inode = vmalloc(sizeof(*inode));
> + if (!inode)
> + return -ENOMEM;
> + memset(inode, 0 , sizeof(*inode));
> +
> + ret = socket_create(&dev->sock, dev->vdi.ip, dev->vdi.port);
> + if (ret < 0)
> + goto out;
> +
> + ret = lookup_sheep_vdi(dev);
> + if (ret < 0)
> + goto out_release;
> +
> + hdr.opcode = SD_OP_READ_OBJ;
> + hdr.data_length = SD_INODE_SIZE;
> + hdr.obj.oid = vid_to_vdi_oid(dev->vdi.vid);
> + hdr.obj.offset = 0;
> + ret = sheep_run_sdreq(dev->sock, &hdr, inode);
> + if (ret < 0)
> + goto out_release;
> +
> + /* XXX switch case */
> + if (rsp->result != SD_RES_SUCCESS) {
> + ret = -EIO;
> + goto out_release;
> + }
> +
> + dev->vdi.inode = inode;
> + pr_info("%s: Associated to %s\n", DRV_NAME, inode->name);
> + return 0;
> +out_release:
> + socket_shutdown(dev->sock);
> + dev->sock = NULL;
> +out:
> + vfree(inode);
> + return ret;
> +}
> +
> +static void submit_sheep_request(struct sheep_request *req)
> +{
> +}
>
Maybe we could remove this unused function ?
> +
> +static inline void free_sheep_aiocb(struct sheep_aiocb *aiocb)
> +{
> + kfree(aiocb->buf);
> + kfree(aiocb);
> +}
> +
> +static void aio_write_done(struct sheep_aiocb *aiocb, bool locked)
> +{
> + sbd_debug("off %llu, len %llu\n", aiocb->offset, aiocb->length);
> +
> + if (locked)
> + __blk_end_request_all(aiocb->request, aiocb->ret);
> + else
> + blk_end_request_all(aiocb->request, aiocb->ret);
> + free_sheep_aiocb(aiocb);
> +}
> +
> +static void aio_read_done(struct sheep_aiocb *aiocb, bool locked)
> +{
> + sbd_debug("off %llu, len %llu\n", aiocb->offset, aiocb->length);
> +
> + if (locked)
> + __blk_end_request_all(aiocb->request, aiocb->ret);
> + else
> + blk_end_request_all(aiocb->request, aiocb->ret);
> + free_sheep_aiocb(aiocb);
> +}
> +
> +struct sheep_aiocb *sheep_aiocb_setup(struct request *req)
> +{
> + struct sheep_aiocb *aiocb = kmalloc(sizeof(*aiocb), GFP_KERNEL);
> + struct req_iterator iter;
> + struct bio_vec *bvec;
> + int len = 0;
> +
> + if (!aiocb)
> + return ERR_PTR(-ENOMEM);
> +
> + aiocb->offset = blk_rq_pos(req) * SECTOR_SIZE;
> + aiocb->length = blk_rq_bytes(req);
> + aiocb->nr_requests = 0;
> + aiocb->ret = 0;
> + aiocb->buf_iter = 0;
> + aiocb->request = req;
> + aiocb->buf = kzalloc(aiocb->length, GFP_KERNEL);
> +
> + switch (rq_data_dir(req)) {
> + case WRITE:
> + rq_for_each_segment(bvec, req, iter) {
> + unsigned long flags;
> + void *addr = bvec_kmap_irq(bvec, &flags);
> +
> + memcpy(aiocb->buf + len, addr, bvec->bv_len);
> + flush_dcache_page(bvec->bv_page);
> + bvec_kunmap_irq(addr, &flags);
> +
> + len += bvec->bv_len;
> + }
> + aiocb->aio_done_func = aio_write_done;
> + break;
> + case READ:
> + aiocb->aio_done_func = aio_read_done;
> + break;
> + default:
> + /* impossible case */
> + WARN_ON(1);
> + free_sheep_aiocb(aiocb);
> + return ERR_PTR(-EINVAL);
> + }
> +
> + return aiocb;
> +}
> +
> +static struct sheep_request *alloc_sheep_request(struct sheep_aiocb
> *aiocb,
> + u64 oid, int len,
> + int offset)
> +{
> + struct sheep_request *req = kmalloc(sizeof(*req), GFP_KERNEL);
> + struct sbd_device *dev = sheep_aiocb_to_device(aiocb);
> +
> + if (!req)
> + return ERR_PTR(-ENOMEM);
> +
> + req->offset = offset;
> + req->length = len;
> + req->oid = oid;
> + req->aiocb = aiocb;
> + req->buf = aiocb->buf + aiocb->buf_iter;
> + req->seq_num = atomic_inc_return(&dev->seq_num);
> +
> + switch (rq_data_dir(aiocb->request)) {
> + case WRITE:
> + req->type = SHEEP_WRITE;
> + break;
> + case READ:
> + req->type = SHEEP_READ;
> + break;
> + default:
> + /* impossible case */
> + WARN_ON(1);
> + kfree(req);
> + return ERR_PTR(-EINVAL);
> + }
> +
> + aiocb->buf_iter += len;
> + aiocb->nr_requests++;
> +
> + return req;
> +}
> +
> +static void end_sheep_request(struct sheep_request *req, bool
> queue_locked)
> +{
> + struct sheep_aiocb *aiocb = req->aiocb;
> +
> + if (--aiocb->nr_requests == 0)
> + aiocb->aio_done_func(aiocb, queue_locked);
> +
> + sbd_debug("end oid %llx off %d, len %d, seq %u\n", req->oid,
> + req->offset, req->length, req->seq_num);
> + kfree(req);
> +}
> +
> +int sheep_aiocb_submit(struct sheep_aiocb *aiocb)
> +{
> + struct sbd_device *dev = sheep_aiocb_to_device(aiocb);
> + u64 offset = aiocb->offset;
> + u64 total = aiocb->length;
> + u64 start = offset % SD_DATA_OBJ_SIZE;
> + u32 vid = dev->vdi.vid;
> + u64 oid = vid_to_data_oid(vid, offset / SD_DATA_OBJ_SIZE);
> + u32 idx = data_oid_to_idx(oid);
> + int len = SD_DATA_OBJ_SIZE - start;
> +
> + if (total < len)
> + len = total;
> +
> + sbd_debug("submit oid %llx off %llu, len %llu\n", oid, offset,
> total);
> + /*
> + * Make sure we don't free the aiocb before we are done with all
> + * requests.This additional reference is dropped at the end of this
> + * function.
> + */
> + aiocb->nr_requests++;
> +
> + do {
> + struct sheep_request *req;
> +
> + req = alloc_sheep_request(aiocb, oid, len, start);
> + if (IS_ERR(req))
> + return PTR_ERR(req);
> +
> + if (likely(dev->vdi.inode->data_vdi_id[idx]))
> + goto submit;
> +
> + /* Object is not created yet... */
> + switch (req->type) {
> + case SHEEP_WRITE:
> + case SHEEP_READ:
> + end_sheep_request(req, true);
> + goto done;
> + }
> +submit:
> + submit_sheep_request(req);
> +done:
> + oid++;
> + total -= len;
> + start = (start + len) % SD_DATA_OBJ_SIZE;
> + len = total > SD_DATA_OBJ_SIZE ? SD_DATA_OBJ_SIZE : total;
> + } while (total > 0);
> +
> + if (--aiocb->nr_requests == 0)
> + aiocb->aio_done_func(aiocb, true);
> +
> + return 0;
> +}
> +
> +int sheep_handle_reply(struct sbd_device *dev)
> +{
> + return 0;
> +}
> diff --git a/sbd/sheep_block_device.c b/sbd/sheep_block_device.c
> new file mode 100644
> index 0000000..e7331dc
> --- /dev/null
> +++ b/sbd/sheep_block_device.c
> @@ -0,0 +1,299 @@
> +/*
> + * Copyright (C) 2014 Liu Yuan <namei.unix at gmail.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +/*
> + * SBD - Sheepdog Block Device
> + *
> + * This file implements the glue functions to export sheep vdi as Linux
> block
> + * device.
> + *
> + */
> +
> +#include "sbd.h"
> +
> +static LIST_HEAD(sbd_dev_list);
> +
> +static const struct block_device_operations sbd_bd_ops = {
> + .owner = THIS_MODULE,
> +};
> +
> +static int sbd_submit_request(struct request *req)
> +{
> + struct sheep_aiocb *aiocb = sheep_aiocb_setup(req);
> +
> + if (IS_ERR(aiocb))
> + return PTR_ERR(aiocb);
> +
> + return sheep_aiocb_submit(aiocb);
> +}
> +
> +static void sbd_request_submiter(struct request_queue *q)
> +{
> + struct request *req;
> +
> + while ((req = blk_fetch_request(q)) != NULL) {
> + int ret;
> +
> + /* filter out block requests we don't understand */
> + if (req->cmd_type != REQ_TYPE_FS) {
> + __blk_end_request_all(req, 0);
> + continue;
> + }
> + ret = sbd_submit_request(req);
> + if (ret < 0)
> + break;
> + }
> +}
> +
> +static int sbd_add_disk(struct sbd_device *dev)
> +{
> + struct gendisk *disk;
> + struct request_queue *rq;
> +
> + disk = alloc_disk(SBD_MINORS_PER_MAJOR);
> + if (!disk)
> + return -ENOMEM;
> +
> + snprintf(disk->disk_name, DEV_NAME_LEN, DRV_NAME "%d", dev->id);
> + disk->major = dev->major;
> + disk->first_minor = 0;
> + disk->fops = &sbd_bd_ops;
> + disk->private_data = dev;
> +
> + rq = blk_init_queue(sbd_request_submiter, &dev->queue_lock);
> + if (!rq) {
> + put_disk(disk);
> + return -ENOMEM;
> + }
> +
> + blk_queue_max_hw_sectors(rq, SD_DATA_OBJ_SIZE / SECTOR_SIZE);
> + blk_queue_max_segments(rq, SD_DATA_OBJ_SIZE / SECTOR_SIZE);
> + blk_queue_max_segment_size(rq, SD_DATA_OBJ_SIZE);
> + blk_queue_io_opt(rq, SD_DATA_OBJ_SIZE);
> +
> + disk->queue = rq;
> + rq->queuedata = dev;
> + dev->disk = disk;
> + dev->rq = rq;
> +
> + set_capacity(disk, dev->vdi.inode->vdi_size / SECTOR_SIZE);
> + add_disk(disk);
> +
> + return 0;
> +}
> +
> +static int sbd_request_reaper(void *data)
> +{
> + struct sbd_device *dev = data;
> +
> + while (!kthread_should_stop() || !list_empty(&dev->inflight_head))
> {
> + wait_event_interruptible(dev->inflight_wq,
> + kthread_should_stop() ||
> + !list_empty(&dev->inflight_head));
> +
> + if (list_empty(&dev->inflight_head))
> + continue;
> +
> + sheep_handle_reply(dev);
> + }
> + return 0;
> +}
> +
> +static inline void free_sbd_device(struct sbd_device *dev)
> +{
> + socket_shutdown(dev->sock);
> + vfree(dev->vdi.inode);
> + kfree(dev);
> +}
> +
> +static ssize_t sbd_add(struct bus_type *bus, const char *buf,
> + size_t count)
> +{
> + struct sbd_device *dev, *tmp;
> + ssize_t ret;
> + int new_id = 0;
> + char name[DEV_NAME_LEN];
> +
> + if (!try_module_get(THIS_MODULE))
> + return -ENODEV;
> +
> + dev = kzalloc(sizeof(*dev), GFP_KERNEL);
> + if (!dev) {
> + ret = -ENOMEM;
> + goto err_put;
> + }
> +
> + if (sscanf(buf, "%s %d %s", dev->vdi.ip, &dev->vdi.port,
> + dev->vdi.name) != 3) {
> + ret = -EINVAL;
> + goto err_put;
> + }
> +
> + spin_lock_init(&dev->queue_lock);
> + INIT_LIST_HEAD(&dev->inflight_head);
> + INIT_LIST_HEAD(&dev->blocking_head);
> + init_waitqueue_head(&dev->inflight_wq);
> +
> + list_for_each_entry(tmp, &sbd_dev_list, list) {
> + if (tmp->id > new_id)
> + new_id = tmp->id + 1;
> + }
> +
> + ret = sheep_setup_vdi(dev);
> + if (ret < 0)
> + goto err_free_dev;
> +
> + dev->id = new_id;
> + snprintf(name, DEV_NAME_LEN, DRV_NAME "%d", dev->id);
> + ret = register_blkdev(0, name);
> + if (ret < 0)
> + goto err_free_dev;
> + dev->major = ret;
> + dev->minor = 0;
> + dev->reaper = kthread_run(sbd_request_reaper, dev, "sbd_reaper");
> +
> + ret = sbd_add_disk(dev);
> + if (ret < 0)
> + goto err_unreg_blkdev;
> +
> + list_add_tail(&dev->list, &sbd_dev_list);
> +
> + return count;
> +err_unreg_blkdev:
> + unregister_blkdev(dev->major, name);
> +err_free_dev:
> + free_sbd_device(dev);
> +err_put:
> + module_put(THIS_MODULE);
> + pr_err("%s: error adding device %s", DRV_NAME, buf);
> + return ret;
> +}
> +
> +static void sbd_del_disk(struct sbd_device *dev)
> +{
> + struct gendisk *disk = dev->disk;
> +
> + if (!disk)
> + return;
> +
> + if (disk->flags & GENHD_FL_UP)
> + del_gendisk(disk);
> + if (disk->queue)
> + blk_cleanup_queue(disk->queue);
> + put_disk(disk);
> +}
> +
> +static ssize_t sbd_remove(struct bus_type *bus, const char *buf,
> + size_t count)
> +{
> +
> + struct list_head *tmp, *n;
> + struct sbd_device *dev;
> + unsigned long ul;
> + int target_id, ret;
> +
> + ret = strict_strtoul(buf, 10, &ul);
> + if (ret)
> + return ret;
> +
> + /* convert to int; abort if we lost anything in the conversion */
> + target_id = (int)ul;
> + if (target_id != ul)
> + return -EINVAL;
> +
> + list_for_each_safe(tmp, n, &sbd_dev_list) {
> + dev = list_entry(tmp, struct sbd_device, list);
> + if (dev->id == target_id) {
> + list_del(&dev->list);
> + break;
> + }
> + dev = NULL;
> + }
> +
> + if (!dev)
> + return -ENOENT;
> +
> + kthread_stop(dev->reaper);
> + wake_up_interruptible(&dev->inflight_wq);
> +
> + sbd_del_disk(dev);
> + free_sbd_device(dev);
> + module_put(THIS_MODULE);
> +
> + return count;
> +}
> +
> +static struct bus_attribute sbd_bus_attrs[] = {
> + __ATTR(add, S_IWUSR, NULL, sbd_add),
> + __ATTR(remove, S_IWUSR, NULL, sbd_remove),
> + __ATTR_NULL
> +};
> +
> +static struct bus_type sbd_bus_type = {
> + .name = "sbd",
> + .bus_attrs = sbd_bus_attrs,
> +};
> +
> +static void sbd_root_dev_release(struct device *dev)
> +{
> +}
> +
> +static struct device sbd_root_dev = {
> + .init_name = "sbd",
> + .release = sbd_root_dev_release,
> +};
> +
> +/* Create control files in /sys/bus/sbd/... */
> +static int sbd_sysfs_init(void)
> +{
> + int ret;
> +
> + ret = device_register(&sbd_root_dev);
> + if (ret < 0)
> + return ret;
> +
> + ret = bus_register(&sbd_bus_type);
> + if (ret < 0)
> + device_unregister(&sbd_root_dev);
> +
> + return ret;
> +}
> +
> +static void sbd_sysfs_cleanup(void)
> +{
> + bus_unregister(&sbd_bus_type);
> + device_unregister(&sbd_root_dev);
> +}
> +
> +int __init sbd_init(void)
> +{
> + int ret;
> +
> + ret = sbd_sysfs_init();
> + if (ret < 0)
> + return ret;
> +
> + pr_info("%s: Sheepdog block device loaded\n", DRV_NAME);
> + return 0;
> +}
> +
> +void __exit sbd_exit(void)
> +{
> + sbd_sysfs_cleanup();
> + pr_info("%s: Sheepdog block device unloaded\n", DRV_NAME);
> +}
> +
> +module_init(sbd_init);
> +module_exit(sbd_exit);
> +
> +MODULE_AUTHOR("Liu Yuan <namei.unix at gmail.com>");
> +MODULE_DESCRIPTION("Sheepdog Block Device");
> +MODULE_LICENSE("GPL");
> --
> 1.8.1.2
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog
>
--
--
Best Regard
Robin Dong
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.wpkg.org/pipermail/sheepdog/attachments/20140528/6d802c33/attachment-0004.html>
More information about the sheepdog
mailing list