[sheepdog] [PATCH v2 2/9] sbd: introduce basic framework for Sheepdog Block Device

Liu Yuan namei.unix at gmail.com
Sun May 25 09:53:14 CEST 2014


From: Liu Yuan <tailai.ly at taobao.com>

This is similar to Ceph's RBD. The main motivation is to replace complex
and ineffecient middle ware (such as iscci softwafe) with simple software stacks
to expose sheepdog storage as Linux block device interface.

Usage:

We control the device the same way as RBD.

# associate vdi 'test' to /dev/sbd0
$ echo 127.0.0.1 7000 test > /sys/bus/sbd/add

# remove the device sbd0
$ echo 0 > /sys/bus/sbd/remove

For now we don't do actual read/write yet, just a aio framework sketched out.

Signed-off-by: Liu Yuan <namei.unix at gmail.com>
---
 sbd/Kbuild               |   5 +
 sbd/Makefile             |   8 +
 sbd/sbd.h                | 119 +++++++++++++
 sbd/sheep.c              | 436 +++++++++++++++++++++++++++++++++++++++++++++++
 sbd/sheep_block_device.c | 299 ++++++++++++++++++++++++++++++++
 5 files changed, 867 insertions(+)
 create mode 100644 sbd/Kbuild
 create mode 100644 sbd/Makefile
 create mode 100644 sbd/sbd.h
 create mode 100644 sbd/sheep.c
 create mode 100644 sbd/sheep_block_device.c

diff --git a/sbd/Kbuild b/sbd/Kbuild
new file mode 100644
index 0000000..2bf714a
--- /dev/null
+++ b/sbd/Kbuild
@@ -0,0 +1,5 @@
+MODULE_NAME=sbd
+
+ccflags-y	:= -I$(PWD)/../include -std=gnu99 -DDEBUG
+obj-m		:= $(MODULE_NAME).o
+$(MODULE_NAME)-y := sheep_block_device.o sheep.o
diff --git a/sbd/Makefile b/sbd/Makefile
new file mode 100644
index 0000000..940c0c3
--- /dev/null
+++ b/sbd/Makefile
@@ -0,0 +1,8 @@
+KDIR	?= /lib/modules/`uname -r`/build
+
+default:
+	$(MAKE) -C $(KDIR) M=$$PWD modules
+clean:
+	$(MAKE) -C $(KDIR) M=$$PWD clean
+install:
+	$(MAKE) -C $(KDIR) M=$$PWD modules_install
diff --git a/sbd/sbd.h b/sbd/sbd.h
new file mode 100644
index 0000000..a2252ff
--- /dev/null
+++ b/sbd/sbd.h
@@ -0,0 +1,119 @@
+#ifndef _SBD_H_
+#define _SBD_H_
+
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/tcp.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/kthread.h>
+#include <linux/gfp.h>
+
+#include "sheepdog_proto.h"
+
+#define DRV_NAME "sbd"
+#define DEV_NAME_LEN 32
+#define SBD_MINORS_PER_MAJOR 32
+#define SECTOR_SIZE 512
+
+struct sheep_vdi {
+	struct sd_inode *inode;
+	u32 vid;
+	char ip[16];
+	unsigned int port;
+	char name[SD_MAX_VDI_LEN];
+};
+
+struct sbd_device {
+	struct socket *sock;
+	int id;		/* blkdev unique id */
+	atomic_t seq_num;
+
+	int major;
+	int minor;
+	struct gendisk *disk;
+	struct request_queue *rq;
+	spinlock_t queue_lock;   /* request queue lock */
+
+	struct sheep_vdi vdi;		/* Associated sheep image */
+
+	struct list_head inflight_head;
+	wait_queue_head_t inflight_wq;
+	struct list_head blocking_head;
+
+	struct list_head list;
+	struct task_struct *reaper;
+};
+
+struct sheep_aiocb {
+	struct request *request;
+	u64 offset;
+	u64 length;
+	int ret;
+	u32 nr_requests;
+	char *buf;
+	int buf_iter;
+	void (*aio_done_func)(struct sheep_aiocb *, bool);
+};
+
+enum sheep_request_type {
+	SHEEP_READ,
+	SHEEP_WRITE,
+	SHEEP_CREATE,
+};
+
+struct sheep_request {
+	struct list_head list;
+	struct sheep_aiocb *aiocb;
+	u64 oid;
+	u32 seq_num;
+	int type;
+	int offset;
+	int length;
+	char *buf;
+};
+
+void socket_shutdown(struct socket *sock);
+int sheep_setup_vdi(struct sbd_device *dev);
+struct sheep_aiocb *sheep_aiocb_setup(struct request *req);
+int sheep_aiocb_submit(struct sheep_aiocb *aiocb);
+int sheep_handle_reply(struct sbd_device *dev);
+
+#if defined(CONFIG_DYNAMIC_DEBUG)
+
+# define _SBD_FLAGS (_DPRINTK_FLAGS_PRINT | _DPRINTK_FLAGS_INCL_MODNAME \
+	| _DPRINTK_FLAGS_INCL_FUNCNAME | _DPRINTK_FLAGS_INCL_LINENO)
+
+# define SBD_DYNAMIC_DEBUG_METADATA(name, fmt)                  \
+	static struct _ddebug  __aligned(8)                     \
+	 __attribute__((section("__verbose"))) name = {          \
+		.modname = KBUILD_MODNAME,                      \
+		.function = __func__,                           \
+		.filename = __FILE__,                           \
+		.format = (fmt),                                \
+		.lineno = __LINE__,                             \
+		.flags =  _SBD_FLAGS,                           \
+	}
+
+# define sbd_debug(fmt, ...)                            \
+({                                                      \
+	SBD_DYNAMIC_DEBUG_METADATA(descriptor, fmt);    \
+	__dynamic_pr_debug(&descriptor, pr_fmt(fmt),    \
+			   ##__VA_ARGS__);              \
+})
+
+#else
+
+/* If -DDEBUG is not set, pr_debug = no_printk */
+# define sbd_debug pr_debug
+
+#endif /* CONFIG_DYNAMIC_DEBUG */
+
+#endif /* _SBD_H_ */
diff --git a/sbd/sheep.c b/sbd/sheep.c
new file mode 100644
index 0000000..33269b4
--- /dev/null
+++ b/sbd/sheep.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright (C) 2014 Liu Yuan <namei.unix at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "sbd.h"
+
+void socket_shutdown(struct socket *sock)
+{
+	if (sock)
+		kernel_sock_shutdown(sock, SHUT_RDWR);
+}
+
+static struct sbd_device *sheep_aiocb_to_device(struct sheep_aiocb *aiocb)
+{
+	return aiocb->request->q->queuedata;
+}
+
+static int socket_create(struct socket **sock, const char *ip_addr, int port)
+{
+	struct sockaddr_in addr;
+	mm_segment_t oldmm = get_fs();
+	struct linger linger_opt = {1, 0};
+	int ret, nodelay = 1;
+
+	ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, sock);
+	if (ret < 0) {
+		pr_err("fail to create socket\n");
+		return ret;
+	}
+
+	set_fs(KERNEL_DS);
+	ret = sock_setsockopt(*sock, SOL_SOCKET, SO_LINGER,
+			      (char *)&linger_opt, sizeof(linger_opt));
+	set_fs(oldmm);
+	if (ret != 0) {
+		pr_err("Can't set SO_LINGER: %d\n", ret);
+		goto shutdown;
+	}
+
+	set_fs(KERNEL_DS);
+	ret = sock_setsockopt(*sock, SOL_TCP, TCP_NODELAY,
+			      (char *)&nodelay, sizeof(nodelay));
+	set_fs(oldmm);
+	if (ret != 0) {
+		pr_err("Can't set SO_LINGER: %d\n", ret);
+		goto shutdown;
+	}
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(port);
+	addr.sin_addr.s_addr = in_aton(ip_addr);
+	ret = (*sock)->ops->connect(*sock, (struct sockaddr *)&addr,
+				    sizeof(addr), 0);
+	if (ret < 0) {
+		pr_err("failed connect to %s:%d\n", ip_addr, port);
+		goto shutdown;
+	}
+
+	return ret;
+shutdown:
+	socket_shutdown(*sock);
+	*sock = NULL;
+	return ret;
+}
+
+static int socket_xmit(struct socket *sock, void *buf, int size, bool send,
+		       int msg_flags)
+{
+	int result;
+	struct msghdr msg;
+	struct kvec iov;
+	sigset_t blocked, oldset;
+
+	if (unlikely(!sock))
+		return -EINVAL;
+
+	/* Don't allow signals to interrupt the transmission */
+	siginitsetinv(&blocked, 0);
+	sigprocmask(SIG_SETMASK, &blocked, &oldset);
+
+	do {
+		sock->sk->sk_allocation = GFP_NOIO;
+		iov.iov_base = buf;
+		iov.iov_len = size;
+		msg.msg_name = NULL;
+		msg.msg_namelen = 0;
+		msg.msg_control = NULL;
+		msg.msg_controllen = 0;
+		msg.msg_flags = msg_flags | MSG_NOSIGNAL;
+
+		if (send)
+			result = kernel_sendmsg(sock, &msg, &iov, 1, size);
+		else
+			result = kernel_recvmsg(sock, &msg, &iov, 1, size,
+						msg.msg_flags);
+
+		if (result <= 0) {
+			if (result == 0)
+				result = -EPIPE; /* short read */
+			break;
+		}
+		size -= result;
+		buf += result;
+	} while (size > 0);
+
+	sigprocmask(SIG_SETMASK, &oldset, NULL);
+
+	return result;
+}
+
+static int socket_read(struct socket *sock, char *buf, int length)
+{
+	return socket_xmit(sock, buf, length, false, 0);
+}
+
+static int socket_write(struct socket *sock, void *buf, int len)
+{
+	return socket_xmit(sock, buf, len, true, 0);
+}
+
+static int sheep_submit_sdreq(struct socket *sock, struct sd_req *hdr,
+			      void *data, unsigned int wlen)
+{
+	int ret = socket_write(sock, hdr, sizeof(*hdr));
+
+	if (ret < 0)
+		return ret;
+
+	if (wlen)
+		return socket_write(sock, data, wlen);
+	return 0;
+}
+
+/* Run the request synchronously */
+static int sheep_run_sdreq(struct socket *sock, struct sd_req *hdr,
+			   void *data)
+{
+	struct sd_rsp *rsp = (struct sd_rsp *)hdr;
+	unsigned int wlen, rlen;
+	int ret;
+
+	if (hdr->flags & SD_FLAG_CMD_WRITE) {
+		wlen = hdr->data_length;
+		rlen = 0;
+	} else {
+		wlen = 0;
+		rlen = hdr->data_length;
+	}
+
+	ret = sheep_submit_sdreq(sock, hdr, data, wlen);
+	if (ret < 0) {
+		pr_err("failed to sbumit the request\n");
+		return ret;
+	}
+
+	ret = socket_read(sock, (char *)rsp, sizeof(*rsp));
+	if (ret < 0) {
+		pr_err("failed to read a response hdr\n");
+		return ret;
+	}
+
+	if (rlen > rsp->data_length)
+		rlen = rsp->data_length;
+
+	if (rlen) {
+		ret = socket_read(sock, data, rlen);
+		if (ret < 0) {
+			pr_err("failed to read the response data\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int lookup_sheep_vdi(struct sbd_device *dev)
+{
+	struct sd_req hdr = {};
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+	int ret;
+
+	hdr.opcode = SD_OP_LOCK_VDI;
+	hdr.data_length = SD_MAX_VDI_LEN;
+	hdr.flags = SD_FLAG_CMD_WRITE;
+	ret = sheep_run_sdreq(dev->sock, &hdr, dev->vdi.name);
+	if (ret < 0)
+		return ret;
+
+	/* XXX switch case */
+	if (rsp->result != SD_RES_SUCCESS) {
+		sbd_debug("Cannot get VDI info for %s\n", dev->vdi.name);
+		return -EIO;
+	}
+
+	dev->vdi.vid = rsp->vdi.vdi_id;
+
+	return 0;
+}
+
+int sheep_setup_vdi(struct sbd_device *dev)
+{
+	struct sd_req hdr = {};
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+	struct sd_inode *inode;
+	int ret;
+
+	inode = vmalloc(sizeof(*inode));
+	if (!inode)
+		return -ENOMEM;
+	memset(inode, 0 , sizeof(*inode));
+
+	ret = socket_create(&dev->sock, dev->vdi.ip, dev->vdi.port);
+	if (ret < 0)
+		goto out;
+
+	ret = lookup_sheep_vdi(dev);
+	if (ret < 0)
+		goto out_release;
+
+	hdr.opcode = SD_OP_READ_OBJ;
+	hdr.data_length = SD_INODE_SIZE;
+	hdr.obj.oid = vid_to_vdi_oid(dev->vdi.vid);
+	hdr.obj.offset = 0;
+	ret = sheep_run_sdreq(dev->sock, &hdr, inode);
+	if (ret < 0)
+		goto out_release;
+
+	/* XXX switch case */
+	if (rsp->result != SD_RES_SUCCESS) {
+		ret = -EIO;
+		goto out_release;
+	}
+
+	dev->vdi.inode = inode;
+	pr_info("%s: Associated to %s\n", DRV_NAME, inode->name);
+	return 0;
+out_release:
+	socket_shutdown(dev->sock);
+	dev->sock = NULL;
+out:
+	vfree(inode);
+	return ret;
+}
+
+static void submit_sheep_request(struct sheep_request *req)
+{
+}
+
+static inline void free_sheep_aiocb(struct sheep_aiocb *aiocb)
+{
+	kfree(aiocb->buf);
+	kfree(aiocb);
+}
+
+static void aio_write_done(struct sheep_aiocb *aiocb, bool locked)
+{
+	sbd_debug("off %llu, len %llu\n", aiocb->offset, aiocb->length);
+
+	if (locked)
+		__blk_end_request_all(aiocb->request, aiocb->ret);
+	else
+		blk_end_request_all(aiocb->request, aiocb->ret);
+	free_sheep_aiocb(aiocb);
+}
+
+static void aio_read_done(struct sheep_aiocb *aiocb, bool locked)
+{
+	sbd_debug("off %llu, len %llu\n", aiocb->offset, aiocb->length);
+
+	if (locked)
+		__blk_end_request_all(aiocb->request, aiocb->ret);
+	else
+		blk_end_request_all(aiocb->request, aiocb->ret);
+	free_sheep_aiocb(aiocb);
+}
+
+struct sheep_aiocb *sheep_aiocb_setup(struct request *req)
+{
+	struct sheep_aiocb *aiocb = kmalloc(sizeof(*aiocb), GFP_KERNEL);
+	struct req_iterator iter;
+	struct bio_vec *bvec;
+	int len = 0;
+
+	if (!aiocb)
+		return ERR_PTR(-ENOMEM);
+
+	aiocb->offset = blk_rq_pos(req) * SECTOR_SIZE;
+	aiocb->length = blk_rq_bytes(req);
+	aiocb->nr_requests = 0;
+	aiocb->ret = 0;
+	aiocb->buf_iter = 0;
+	aiocb->request = req;
+	aiocb->buf = kzalloc(aiocb->length, GFP_KERNEL);
+
+	switch (rq_data_dir(req)) {
+	case WRITE:
+		rq_for_each_segment(bvec, req, iter) {
+			unsigned long flags;
+			void *addr = bvec_kmap_irq(bvec, &flags);
+
+			memcpy(aiocb->buf + len, addr, bvec->bv_len);
+			flush_dcache_page(bvec->bv_page);
+			bvec_kunmap_irq(addr, &flags);
+
+			len += bvec->bv_len;
+		}
+		aiocb->aio_done_func = aio_write_done;
+		break;
+	case READ:
+		aiocb->aio_done_func = aio_read_done;
+		break;
+	default:
+		/* impossible case */
+		WARN_ON(1);
+		free_sheep_aiocb(aiocb);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return aiocb;
+}
+
+static struct sheep_request *alloc_sheep_request(struct sheep_aiocb *aiocb,
+						 u64 oid, int len,
+						 int offset)
+{
+	struct sheep_request *req = kmalloc(sizeof(*req), GFP_KERNEL);
+	struct sbd_device *dev = sheep_aiocb_to_device(aiocb);
+
+	if (!req)
+		return ERR_PTR(-ENOMEM);
+
+	req->offset = offset;
+	req->length = len;
+	req->oid = oid;
+	req->aiocb = aiocb;
+	req->buf = aiocb->buf + aiocb->buf_iter;
+	req->seq_num = atomic_inc_return(&dev->seq_num);
+
+	switch (rq_data_dir(aiocb->request)) {
+	case WRITE:
+		req->type = SHEEP_WRITE;
+		break;
+	case READ:
+		req->type = SHEEP_READ;
+		break;
+	default:
+		/* impossible case */
+		WARN_ON(1);
+		kfree(req);
+		return ERR_PTR(-EINVAL);
+	}
+
+	aiocb->buf_iter += len;
+	aiocb->nr_requests++;
+
+	return req;
+}
+
+static void end_sheep_request(struct sheep_request *req, bool queue_locked)
+{
+	struct sheep_aiocb *aiocb = req->aiocb;
+
+	if (--aiocb->nr_requests == 0)
+		aiocb->aio_done_func(aiocb, queue_locked);
+
+	sbd_debug("end oid %llx off %d, len %d, seq %u\n", req->oid,
+		  req->offset, req->length, req->seq_num);
+	kfree(req);
+}
+
+int sheep_aiocb_submit(struct sheep_aiocb *aiocb)
+{
+	struct sbd_device *dev = sheep_aiocb_to_device(aiocb);
+	u64 offset = aiocb->offset;
+	u64 total = aiocb->length;
+	u64 start = offset % SD_DATA_OBJ_SIZE;
+	u32 vid = dev->vdi.vid;
+	u64 oid = vid_to_data_oid(vid, offset / SD_DATA_OBJ_SIZE);
+	u32 idx = data_oid_to_idx(oid);
+	int len = SD_DATA_OBJ_SIZE - start;
+
+	if (total < len)
+		len = total;
+
+	sbd_debug("submit oid %llx off %llu, len %llu\n", oid, offset, total);
+	/*
+	 * Make sure we don't free the aiocb before we are done with all
+	 * requests.This additional reference is dropped at the end of this
+	 * function.
+	 */
+	aiocb->nr_requests++;
+
+	do {
+		struct sheep_request *req;
+
+		req = alloc_sheep_request(aiocb, oid, len, start);
+		if (IS_ERR(req))
+			return PTR_ERR(req);
+
+		if (likely(dev->vdi.inode->data_vdi_id[idx]))
+			goto submit;
+
+		/* Object is not created yet... */
+		switch (req->type) {
+		case SHEEP_WRITE:
+		case SHEEP_READ:
+			end_sheep_request(req, true);
+			goto done;
+		}
+submit:
+		submit_sheep_request(req);
+done:
+		oid++;
+		total -= len;
+		start = (start + len) % SD_DATA_OBJ_SIZE;
+		len = total > SD_DATA_OBJ_SIZE ? SD_DATA_OBJ_SIZE : total;
+	} while (total > 0);
+
+	if (--aiocb->nr_requests == 0)
+		aiocb->aio_done_func(aiocb, true);
+
+	return 0;
+}
+
+int sheep_handle_reply(struct sbd_device *dev)
+{
+	return 0;
+}
diff --git a/sbd/sheep_block_device.c b/sbd/sheep_block_device.c
new file mode 100644
index 0000000..c2c9dce
--- /dev/null
+++ b/sbd/sheep_block_device.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2014 Liu Yuan <namei.unix at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * SBD - Sheepdog Block Device
+ *
+ * This file implements the glue functions to export sheep vdi as Linux block
+ * device.
+ *
+ */
+
+#include "sbd.h"
+
+static LIST_HEAD(sbd_dev_list);
+
+static const struct block_device_operations sbd_bd_ops = {
+	.owner		= THIS_MODULE,
+};
+
+static int sbd_submit_request(struct request *req)
+{
+	struct sheep_aiocb *aiocb = sheep_aiocb_setup(req);
+
+	if (IS_ERR(aiocb))
+		return PTR_ERR(aiocb);
+
+	return sheep_aiocb_submit(aiocb);
+}
+
+static void sbd_request_submiter(struct request_queue *q)
+{
+	struct request *req;
+
+	while ((req = blk_fetch_request(q)) != NULL) {
+		int ret;
+
+		/* filter out block requests we don't understand */
+		if (req->cmd_type != REQ_TYPE_FS) {
+			__blk_end_request_all(req, 0);
+			continue;
+		}
+		ret = sbd_submit_request(req);
+		if (ret < 0)
+			break;
+	}
+}
+
+static int sbd_add_disk(struct sbd_device *dev)
+{
+	struct gendisk *disk;
+	struct request_queue *rq;
+
+	disk = alloc_disk(SBD_MINORS_PER_MAJOR);
+	if (!disk)
+		return -ENOMEM;
+
+	snprintf(disk->disk_name, DEV_NAME_LEN, DRV_NAME "%d", dev->id);
+	disk->major = dev->major;
+	disk->first_minor = 0;
+	disk->fops = &sbd_bd_ops;
+	disk->private_data = dev;
+
+	rq = blk_init_queue(sbd_request_submiter, &dev->queue_lock);
+	if (!rq) {
+		put_disk(disk);
+		return -ENOMEM;
+	}
+
+	blk_queue_max_hw_sectors(rq, SD_DATA_OBJ_SIZE / SECTOR_SIZE);
+	blk_queue_max_segments(rq, SD_DATA_OBJ_SIZE / SECTOR_SIZE);
+	blk_queue_max_segment_size(rq, SD_DATA_OBJ_SIZE);
+	blk_queue_io_opt(rq, SD_DATA_OBJ_SIZE);
+
+	disk->queue = rq;
+	rq->queuedata = dev;
+	dev->disk = disk;
+	dev->rq = rq;
+
+	set_capacity(disk, dev->vdi.inode->vdi_size / SECTOR_SIZE);
+	add_disk(disk);
+
+	return 0;
+}
+
+static int sbd_request_reaper(void *data)
+{
+	struct sbd_device *dev = data;
+
+	while (!kthread_should_stop() || !list_empty(&dev->inflight_head)) {
+		wait_event_interruptible(dev->inflight_wq,
+					 kthread_should_stop() ||
+					 !list_empty(&dev->inflight_head));
+
+		if (list_empty(&dev->inflight_head))
+			continue;
+
+		sheep_handle_reply(dev);
+	}
+	return 0;
+}
+
+static inline void free_sbd_device(struct sbd_device *dev)
+{
+	socket_shutdown(dev->sock);
+	vfree(dev->vdi.inode);
+	kfree(dev);
+}
+
+static ssize_t sbd_add(struct bus_type *bus, const char *buf,
+		       size_t count)
+{
+	struct sbd_device *dev, *tmp;
+	ssize_t ret;
+	int new_id = 0;
+	char name[DEV_NAME_LEN];
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev) {
+		ret = -ENOMEM;
+		goto err_put;
+	}
+
+	if (sscanf(buf, "%s %d %s", dev->vdi.ip, &dev->vdi.port,
+		   dev->vdi.name) != 3) {
+		ret = -EINVAL;
+		goto err_put;
+	}
+
+	spin_lock_init(&dev->queue_lock);
+	INIT_LIST_HEAD(&dev->inflight_head);
+	INIT_LIST_HEAD(&dev->blocking_head);
+	init_waitqueue_head(&dev->inflight_wq);
+
+	list_for_each_entry(tmp, &sbd_dev_list, list) {
+		if (tmp->id > new_id)
+			new_id = tmp->id + 1;
+	}
+
+	ret = sheep_setup_vdi(dev);
+	if (ret < 0)
+		goto err_free_dev;
+
+	dev->id = new_id;
+	snprintf(name, DEV_NAME_LEN, DRV_NAME "%d", dev->id);
+	ret = register_blkdev(0, name);
+	if (ret < 0)
+		goto err_free_dev;
+	dev->major = ret;
+	dev->minor = 0;
+	dev->reaper = kthread_run(sbd_request_reaper, dev, "sbd_reaper");
+
+	ret = sbd_add_disk(dev);
+	if (ret < 0)
+		goto err_unreg_blkdev;
+
+	list_add_tail(&dev->list, &sbd_dev_list);
+
+	return count;
+err_unreg_blkdev:
+	unregister_blkdev(dev->major, name);
+err_free_dev:
+	free_sbd_device(dev);
+err_put:
+	module_put(THIS_MODULE);
+	pr_err("%s: error adding device %s", DRV_NAME, buf);
+	return ret;
+}
+
+static void sbd_del_disk(struct sbd_device *dev)
+{
+	struct gendisk *disk = dev->disk;
+
+	if (!disk)
+		return;
+
+	if (disk->flags & GENHD_FL_UP)
+		del_gendisk(disk);
+	if (disk->queue)
+		blk_cleanup_queue(disk->queue);
+	put_disk(disk);
+}
+
+static ssize_t sbd_remove(struct bus_type *bus, const char *buf,
+			  size_t count)
+{
+
+	struct list_head *tmp, *n;
+	struct sbd_device *dev;
+	unsigned long ul;
+	int target_id, ret;
+
+	ret = kstrtoul(buf, 10, &ul);
+	if (ret)
+		return ret;
+
+	/* convert to int; abort if we lost anything in the conversion */
+	target_id = (int)ul;
+	if (target_id != ul)
+		return -EINVAL;
+
+	list_for_each_safe(tmp, n, &sbd_dev_list) {
+		dev = list_entry(tmp, struct sbd_device, list);
+		if (dev->id == target_id) {
+			list_del(&dev->list);
+			break;
+		}
+		dev = NULL;
+	}
+
+	if (!dev)
+		return -ENOENT;
+
+	kthread_stop(dev->reaper);
+	wake_up_interruptible(&dev->inflight_wq);
+
+	sbd_del_disk(dev);
+	free_sbd_device(dev);
+	module_put(THIS_MODULE);
+
+	return count;
+}
+
+static struct bus_attribute sbd_bus_attrs[] = {
+	__ATTR(add, S_IWUSR, NULL, sbd_add),
+	__ATTR(remove, S_IWUSR, NULL, sbd_remove),
+	__ATTR_NULL
+};
+
+static struct bus_type sbd_bus_type = {
+	.name		= "sbd",
+	.bus_attrs	= sbd_bus_attrs,
+};
+
+static void sbd_root_dev_release(struct device *dev)
+{
+}
+
+static struct device sbd_root_dev = {
+	.init_name	= "sbd",
+	.release	= sbd_root_dev_release,
+};
+
+/* Create control files in /sys/bus/sbd/... */
+static int sbd_sysfs_init(void)
+{
+	int ret;
+
+	ret = device_register(&sbd_root_dev);
+	if (ret < 0)
+		return ret;
+
+	ret = bus_register(&sbd_bus_type);
+	if (ret < 0)
+		device_unregister(&sbd_root_dev);
+
+	return ret;
+}
+
+static void sbd_sysfs_cleanup(void)
+{
+	bus_unregister(&sbd_bus_type);
+	device_unregister(&sbd_root_dev);
+}
+
+int __init sbd_init(void)
+{
+	int ret;
+
+	ret = sbd_sysfs_init();
+	if (ret < 0)
+		return ret;
+
+	pr_info("%s: Sheepdog block device loaded\n", DRV_NAME);
+	return 0;
+}
+
+void __exit sbd_exit(void)
+{
+	sbd_sysfs_cleanup();
+	pr_info("%s: Sheepdog block device unloaded\n", DRV_NAME);
+}
+
+module_init(sbd_init);
+module_exit(sbd_exit);
+
+MODULE_AUTHOR("Liu Yuan <namei.unix at gmail.com>");
+MODULE_DESCRIPTION("Sheepdog Block Device");
+MODULE_LICENSE("GPL");
-- 
1.8.1.2




More information about the sheepdog mailing list