<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">2014-05-26 13:18 GMT+08:00 Liu Yuan <span dir="ltr"><<a href="mailto:namei.unix@gmail.com" target="_blank">namei.unix@gmail.com</a>></span>:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Liu Yuan <<a href="mailto:tailai.ly@taobao.com">tailai.ly@taobao.com</a>><br>
<br>
This is similar to Ceph's RBD. The main motivation is to replace complex<br>
and ineffecient middle ware (such as iscci softwafe) with simple software stacks<br>
to expose sheepdog storage as Linux block device interface.<br>
<br>
Usage:<br>
<br>
We control the device the same way as RBD.<br>
<br>
# associate vdi 'test' to /dev/sbd0<br>
$ echo 127.0.0.1 7000 test > /sys/bus/sbd/add<br>
<br>
# remove the device sbd0<br>
$ echo 0 > /sys/bus/sbd/remove<br>
<br>
For now we don't do actual read/write yet, just a aio framework sketched out.<br>
<br>
Signed-off-by: Liu Yuan <<a href="mailto:namei.unix@gmail.com">namei.unix@gmail.com</a>><br>
---<br>
sbd/Kbuild | 5 +<br>
sbd/Makefile | 8 +<br>
sbd/sbd.h | 119 +++++++++++++<br>
sbd/sheep.c | 436 +++++++++++++++++++++++++++++++++++++++++++++++<br>
sbd/sheep_block_device.c | 299 ++++++++++++++++++++++++++++++++<br>
5 files changed, 867 insertions(+)<br>
create mode 100644 sbd/Kbuild<br>
create mode 100644 sbd/Makefile<br>
create mode 100644 sbd/sbd.h<br>
create mode 100644 sbd/sheep.c<br>
create mode 100644 sbd/sheep_block_device.c<br>
<br>
diff --git a/sbd/Kbuild b/sbd/Kbuild<br>
new file mode 100644<br>
index 0000000..c1f7c07<br>
--- /dev/null<br>
+++ b/sbd/Kbuild<br>
@@ -0,0 +1,5 @@<br>
+MODULE_NAME=sbd<br>
+<br>
+ccflags-y := -I$(PWD)/../include -DDEBUG<br>
+obj-m := $(MODULE_NAME).o<br>
+$(MODULE_NAME)-y := sheep_block_device.o sheep.o<br>
diff --git a/sbd/Makefile b/sbd/Makefile<br>
new file mode 100644<br>
index 0000000..940c0c3<br>
--- /dev/null<br>
+++ b/sbd/Makefile<br>
@@ -0,0 +1,8 @@<br>
+KDIR ?= /lib/modules/`uname -r`/build<br>
+<br>
+default:<br>
+ $(MAKE) -C $(KDIR) M=$$PWD modules<br>
+clean:<br>
+ $(MAKE) -C $(KDIR) M=$$PWD clean<br>
+install:<br>
+ $(MAKE) -C $(KDIR) M=$$PWD modules_install<br>
diff --git a/sbd/sbd.h b/sbd/sbd.h<br>
new file mode 100644<br>
index 0000000..e938561<br>
--- /dev/null<br>
+++ b/sbd/sbd.h<br>
@@ -0,0 +1,119 @@<br>
+#ifndef _SBD_H_<br>
+#define _SBD_H_<br>
+<br>
+#include <linux/socket.h><br>
+#include <linux/in.h><br>
+#include <linux/inet.h><br>
+#include <linux/socket.h><br>
+#include <linux/net.h><br>
+#include <linux/tcp.h><br>
+#include <linux/slab.h><br>
+#include <linux/kernel.h><br>
+#include <linux/device.h><br>
+#include <linux/module.h><br>
+#include <linux/fs.h><br>
+#include <linux/blkdev.h><br>
+#include <linux/kthread.h><br>
+#include <linux/gfp.h><br>
+<br>
+#include "sheepdog_proto.h"<br>
+<br>
+#define DRV_NAME "sbd"<br>
+#define DEV_NAME_LEN 32<br>
+#define SBD_MINORS_PER_MAJOR 32<br>
+#define SECTOR_SIZE 512<br>
+<br>
+struct sheep_vdi {<br>
+ struct sd_inode *inode;<br>
+ u32 vid;<br>
+ char ip[16];<br>
+ unsigned int port;<br>
+ char name[SD_MAX_VDI_LEN];<br>
+};<br>
+<br>
+struct sbd_device {<br>
+ struct socket *sock;<br>
+ int id; /* blkdev unique id */<br>
+ atomic_t seq_num;<br>
+<br>
+ int major;<br>
+ int minor;<br>
+ struct gendisk *disk;<br>
+ struct request_queue *rq;<br>
+ spinlock_t queue_lock; /* request queue lock */<br>
+<br>
+ struct sheep_vdi vdi; /* Associated sheep image */<br>
+<br>
+ struct list_head inflight_head;<br>
+ wait_queue_head_t inflight_wq;<br>
+ struct list_head blocking_head;<br>
+<br>
+ struct list_head list;<br>
+ struct task_struct *reaper;<br>
+};<br>
+<br>
+struct sheep_aiocb {<br>
+ struct request *request;<br>
+ u64 offset;<br>
+ u64 length;<br>
+ int ret;<br>
+ u32 nr_requests;<br>
+ char *buf;<br>
+ int buf_iter;<br>
+ void (*aio_done_func)(struct sheep_aiocb *, bool);<br>
+};<br>
+<br>
+enum sheep_request_type {<br>
+ SHEEP_READ,<br>
+ SHEEP_WRITE,<br>
+ SHEEP_CREATE,<br>
+};<br>
+<br>
+struct sheep_request {<br>
+ struct list_head list;<br>
+ struct sheep_aiocb *aiocb;<br>
+ u64 oid;<br>
+ u32 seq_num;<br>
+ int type;<br>
+ int offset;<br>
+ int length;<br>
+ char *buf;<br>
+};<br>
+<br>
+void socket_shutdown(struct socket *sock);<br>
+int sheep_setup_vdi(struct sbd_device *dev);<br>
+struct sheep_aiocb *sheep_aiocb_setup(struct request *req);<br>
+int sheep_aiocb_submit(struct sheep_aiocb *aiocb);<br>
+int sheep_handle_reply(struct sbd_device *dev);<br>
+<br>
+#if defined(CONFIG_DYNAMIC_DEBUG) && defined _DPRINTK_FLAGS_INCL_MODNAME<br>
+<br>
+# define _SBD_FLAGS (_DPRINTK_FLAGS_PRINT | _DPRINTK_FLAGS_INCL_MODNAME \<br>
+ | _DPRINTK_FLAGS_INCL_FUNCNAME | _DPRINTK_FLAGS_INCL_LINENO)<br>
+<br>
+# define SBD_DYNAMIC_DEBUG_METADATA(name, fmt) \<br>
+ static struct _ddebug __aligned(8) \<br>
+ __attribute__((section("__verbose"))) name = { \<br>
+ .modname = KBUILD_MODNAME, \<br>
+ .function = __func__, \<br>
+ .filename = __FILE__, \<br>
+ .format = (fmt), \<br>
+ .lineno = __LINE__, \<br>
+ .flags = _SBD_FLAGS, \<br>
+ }<br>
+<br>
+# define sbd_debug(fmt, ...) \<br>
+({ \<br>
+ SBD_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \<br>
+ __dynamic_pr_debug(&descriptor, pr_fmt(fmt), \<br>
+ ##__VA_ARGS__); \<br>
+})<br>
+<br>
+#else<br>
+<br>
+/* If -DDEBUG is not set, pr_debug = no_printk */<br>
+# define sbd_debug pr_debug<br>
+<br>
+#endif /* CONFIG_DYNAMIC_DEBUG */<br>
+<br>
+#endif /* _SBD_H_ */<br>
diff --git a/sbd/sheep.c b/sbd/sheep.c<br>
new file mode 100644<br>
index 0000000..33269b4<br>
--- /dev/null<br>
+++ b/sbd/sheep.c<br>
@@ -0,0 +1,436 @@<br>
+/*<br>
+ * Copyright (C) 2014 Liu Yuan <<a href="mailto:namei.unix@gmail.com">namei.unix@gmail.com</a>><br>
+ *<br>
+ * This program is free software; you can redistribute it and/or<br>
+ * modify it under the terms of the GNU General Public License version<br>
+ * 2 as published by the Free Software Foundation.<br>
+ *<br>
+ * You should have received a copy of the GNU General Public License<br>
+ * along with this program. If not, see <<a href="http://www.gnu.org/licenses/" target="_blank">http://www.gnu.org/licenses/</a>>.<br>
+ */<br>
+<br>
+#include "sbd.h"<br>
+<br>
+void socket_shutdown(struct socket *sock)<br>
+{<br>
+ if (sock)<br>
+ kernel_sock_shutdown(sock, SHUT_RDWR);<br>
+}<br>
+<br>
+static struct sbd_device *sheep_aiocb_to_device(struct sheep_aiocb *aiocb)<br>
+{<br>
+ return aiocb->request->q->queuedata;<br>
+}<br>
+<br>
+static int socket_create(struct socket **sock, const char *ip_addr, int port)<br>
+{<br>
+ struct sockaddr_in addr;<br>
+ mm_segment_t oldmm = get_fs();<br>
+ struct linger linger_opt = {1, 0};<br>
+ int ret, nodelay = 1;<br>
+<br>
+ ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, sock);<br>
+ if (ret < 0) {<br>
+ pr_err("fail to create socket\n");<br>
+ return ret;<br>
+ }<br>
+<br>
+ set_fs(KERNEL_DS);<br>
+ ret = sock_setsockopt(*sock, SOL_SOCKET, SO_LINGER,<br>
+ (char *)&linger_opt, sizeof(linger_opt));<br>
+ set_fs(oldmm);<br>
+ if (ret != 0) {<br>
+ pr_err("Can't set SO_LINGER: %d\n", ret);<br>
+ goto shutdown;<br>
+ }<br>
+<br>
+ set_fs(KERNEL_DS);<br>
+ ret = sock_setsockopt(*sock, SOL_TCP, TCP_NODELAY,<br>
+ (char *)&nodelay, sizeof(nodelay));<br>
+ set_fs(oldmm);<br>
+ if (ret != 0) {<br>
+ pr_err("Can't set SO_LINGER: %d\n", ret);<br>
+ goto shutdown;<br>
+ }<br>
+<br>
+ memset(&addr, 0, sizeof(addr));<br>
+ addr.sin_family = AF_INET;<br>
+ addr.sin_port = htons(port);<br>
+ addr.sin_addr.s_addr = in_aton(ip_addr);<br>
+ ret = (*sock)->ops->connect(*sock, (struct sockaddr *)&addr,<br>
+ sizeof(addr), 0);<br>
+ if (ret < 0) {<br>
+ pr_err("failed connect to %s:%d\n", ip_addr, port);<br>
+ goto shutdown;<br>
+ }<br>
+<br>
+ return ret;<br>
+shutdown:<br>
+ socket_shutdown(*sock);<br>
+ *sock = NULL;<br>
+ return ret;<br>
+}<br>
+<br>
+static int socket_xmit(struct socket *sock, void *buf, int size, bool send,<br>
+ int msg_flags)<br>
+{<br>
+ int result;<br>
+ struct msghdr msg;<br>
+ struct kvec iov;<br>
+ sigset_t blocked, oldset;<br>
+<br>
+ if (unlikely(!sock))<br>
+ return -EINVAL;<br>
+<br>
+ /* Don't allow signals to interrupt the transmission */<br>
+ siginitsetinv(&blocked, 0);<br>
+ sigprocmask(SIG_SETMASK, &blocked, &oldset);<br>
+<br>
+ do {<br>
+ sock->sk->sk_allocation = GFP_NOIO;<br>
+ iov.iov_base = buf;<br>
+ iov.iov_len = size;<br>
+ msg.msg_name = NULL;<br>
+ msg.msg_namelen = 0;<br>
+ msg.msg_control = NULL;<br>
+ msg.msg_controllen = 0;<br>
+ msg.msg_flags = msg_flags | MSG_NOSIGNAL;<br>
+<br>
+ if (send)<br>
+ result = kernel_sendmsg(sock, &msg, &iov, 1, size);<br>
+ else<br>
+ result = kernel_recvmsg(sock, &msg, &iov, 1, size,<br>
+ msg.msg_flags);<br>
+<br>
+ if (result <= 0) {<br>
+ if (result == 0)<br>
+ result = -EPIPE; /* short read */<br>
+ break;<br>
+ }<br>
+ size -= result;<br>
+ buf += result;<br>
+ } while (size > 0);<br>
+<br>
+ sigprocmask(SIG_SETMASK, &oldset, NULL);<br>
+<br>
+ return result;<br>
+}<br>
+<br>
+static int socket_read(struct socket *sock, char *buf, int length)<br>
+{<br>
+ return socket_xmit(sock, buf, length, false, 0);<br>
+}<br></blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">+<br>
+static int socket_write(struct socket *sock, void *buf, int len)<br>
+{<br>
+ return socket_xmit(sock, buf, len, true, 0);<br>
+}<br>
+<br>
+static int sheep_submit_sdreq(struct socket *sock, struct sd_req *hdr,<br>
+ void *data, unsigned int wlen)<br>
+{<br>
+ int ret = socket_write(sock, hdr, sizeof(*hdr));<br>
+<br>
+ if (ret < 0)<br>
+ return ret;<br>
+<br>
+ if (wlen)<br>
+ return socket_write(sock, data, wlen);<br>
+ return 0;<br>
+}<br>
+<br>
+/* Run the request synchronously */<br>
+static int sheep_run_sdreq(struct socket *sock, struct sd_req *hdr,<br>
+ void *data)<br>
+{<br>
+ struct sd_rsp *rsp = (struct sd_rsp *)hdr;<br>
+ unsigned int wlen, rlen;<br>
+ int ret;<br>
+<br>
+ if (hdr->flags & SD_FLAG_CMD_WRITE) {<br>
+ wlen = hdr->data_length;<br>
+ rlen = 0;<br>
+ } else {<br>
+ wlen = 0;<br>
+ rlen = hdr->data_length;<br>
+ }<br>
+<br>
+ ret = sheep_submit_sdreq(sock, hdr, data, wlen);<br>
+ if (ret < 0) {<br>
+ pr_err("failed to sbumit the request\n");<br>
+ return ret;<br>
+ }<br>
+<br>
+ ret = socket_read(sock, (char *)rsp, sizeof(*rsp));<br>
+ if (ret < 0) {<br>
+ pr_err("failed to read a response hdr\n");<br>
+ return ret;<br>
+ }<br>
+<br>
+ if (rlen > rsp->data_length)<br>
+ rlen = rsp->data_length;<br>
+<br>
+ if (rlen) {<br>
+ ret = socket_read(sock, data, rlen);<br>
+ if (ret < 0) {<br>
+ pr_err("failed to read the response data\n");<br>
+ return ret;<br>
+ }<br>
+ }<br>
+<br>
+ return 0;<br>
+}<br>
+<br>
+static int lookup_sheep_vdi(struct sbd_device *dev)<br>
+{<br>
+ struct sd_req hdr = {};<br>
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;<br>
+ int ret;<br>
+<br>
+ hdr.opcode = SD_OP_LOCK_VDI;<br>
+ hdr.data_length = SD_MAX_VDI_LEN;<br>
+ hdr.flags = SD_FLAG_CMD_WRITE;<br>
+ ret = sheep_run_sdreq(dev->sock, &hdr, dev-><a href="http://vdi.name" target="_blank">vdi.name</a>);<br>
+ if (ret < 0)<br>
+ return ret;<br>
+<br>
+ /* XXX switch case */<br>
+ if (rsp->result != SD_RES_SUCCESS) {<br>
+ sbd_debug("Cannot get VDI info for %s\n", dev-><a href="http://vdi.name" target="_blank">vdi.name</a>);<br>
+ return -EIO;<br>
+ }<br>
+<br>
+ dev->vdi.vid = rsp->vdi.vdi_id;<br>
+<br>
+ return 0;<br>
+}<br>
+<br>
+int sheep_setup_vdi(struct sbd_device *dev)<br>
+{<br>
+ struct sd_req hdr = {};<br>
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;<br>
+ struct sd_inode *inode;<br>
+ int ret;<br>
+<br>
+ inode = vmalloc(sizeof(*inode));<br>
+ if (!inode)<br>
+ return -ENOMEM;<br>
+ memset(inode, 0 , sizeof(*inode));<br>
+<br>
+ ret = socket_create(&dev->sock, dev->vdi.ip, dev->vdi.port);<br>
+ if (ret < 0)<br>
+ goto out;<br>
+<br>
+ ret = lookup_sheep_vdi(dev);<br>
+ if (ret < 0)<br>
+ goto out_release;<br>
+<br>
+ hdr.opcode = SD_OP_READ_OBJ;<br>
+ hdr.data_length = SD_INODE_SIZE;<br>
+ hdr.obj.oid = vid_to_vdi_oid(dev->vdi.vid);<br>
+ hdr.obj.offset = 0;<br>
+ ret = sheep_run_sdreq(dev->sock, &hdr, inode);<br>
+ if (ret < 0)<br>
+ goto out_release;<br>
+<br>
+ /* XXX switch case */<br>
+ if (rsp->result != SD_RES_SUCCESS) {<br>
+ ret = -EIO;<br>
+ goto out_release;<br>
+ }<br>
+<br>
+ dev->vdi.inode = inode;<br>
+ pr_info("%s: Associated to %s\n", DRV_NAME, inode->name);<br>
+ return 0;<br>
+out_release:<br>
+ socket_shutdown(dev->sock);<br>
+ dev->sock = NULL;<br>
+out:<br>
+ vfree(inode);<br>
+ return ret;<br>
+}<br>
+<br>
+static void submit_sheep_request(struct sheep_request *req)<br>
+{<br>
+}<br></blockquote><div>Maybe we could remove this unused function ?</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+static inline void free_sheep_aiocb(struct sheep_aiocb *aiocb)<br>
+{<br>
+ kfree(aiocb->buf);<br>
+ kfree(aiocb);<br>
+}<br>
+<br>
+static void aio_write_done(struct sheep_aiocb *aiocb, bool locked)<br>
+{<br>
+ sbd_debug("off %llu, len %llu\n", aiocb->offset, aiocb->length);<br>
+<br>
+ if (locked)<br>
+ __blk_end_request_all(aiocb->request, aiocb->ret);<br>
+ else<br>
+ blk_end_request_all(aiocb->request, aiocb->ret);<br>
+ free_sheep_aiocb(aiocb);<br>
+}<br>
+<br>
+static void aio_read_done(struct sheep_aiocb *aiocb, bool locked)<br>
+{<br>
+ sbd_debug("off %llu, len %llu\n", aiocb->offset, aiocb->length);<br>
+<br>
+ if (locked)<br>
+ __blk_end_request_all(aiocb->request, aiocb->ret);<br>
+ else<br>
+ blk_end_request_all(aiocb->request, aiocb->ret);<br>
+ free_sheep_aiocb(aiocb);<br>
+}<br>
+<br>
+struct sheep_aiocb *sheep_aiocb_setup(struct request *req)<br>
+{<br>
+ struct sheep_aiocb *aiocb = kmalloc(sizeof(*aiocb), GFP_KERNEL);<br>
+ struct req_iterator iter;<br>
+ struct bio_vec *bvec;<br>
+ int len = 0;<br>
+<br>
+ if (!aiocb)<br>
+ return ERR_PTR(-ENOMEM);<br>
+<br>
+ aiocb->offset = blk_rq_pos(req) * SECTOR_SIZE;<br>
+ aiocb->length = blk_rq_bytes(req);<br>
+ aiocb->nr_requests = 0;<br>
+ aiocb->ret = 0;<br>
+ aiocb->buf_iter = 0;<br>
+ aiocb->request = req;<br>
+ aiocb->buf = kzalloc(aiocb->length, GFP_KERNEL);<br>
+<br>
+ switch (rq_data_dir(req)) {<br>
+ case WRITE:<br>
+ rq_for_each_segment(bvec, req, iter) {<br>
+ unsigned long flags;<br>
+ void *addr = bvec_kmap_irq(bvec, &flags);<br>
+<br>
+ memcpy(aiocb->buf + len, addr, bvec->bv_len);<br>
+ flush_dcache_page(bvec->bv_page);<br>
+ bvec_kunmap_irq(addr, &flags);<br>
+<br>
+ len += bvec->bv_len;<br>
+ }<br>
+ aiocb->aio_done_func = aio_write_done;<br>
+ break;<br>
+ case READ:<br>
+ aiocb->aio_done_func = aio_read_done;<br>
+ break;<br>
+ default:<br>
+ /* impossible case */<br>
+ WARN_ON(1);<br>
+ free_sheep_aiocb(aiocb);<br>
+ return ERR_PTR(-EINVAL);<br>
+ }<br>
+<br>
+ return aiocb;<br>
+}<br>
+<br>
+static struct sheep_request *alloc_sheep_request(struct sheep_aiocb *aiocb,<br>
+ u64 oid, int len,<br>
+ int offset)<br>
+{<br>
+ struct sheep_request *req = kmalloc(sizeof(*req), GFP_KERNEL);<br>
+ struct sbd_device *dev = sheep_aiocb_to_device(aiocb);<br>
+<br>
+ if (!req)<br>
+ return ERR_PTR(-ENOMEM);<br>
+<br>
+ req->offset = offset;<br>
+ req->length = len;<br>
+ req->oid = oid;<br>
+ req->aiocb = aiocb;<br>
+ req->buf = aiocb->buf + aiocb->buf_iter;<br>
+ req->seq_num = atomic_inc_return(&dev->seq_num);<br>
+<br>
+ switch (rq_data_dir(aiocb->request)) {<br>
+ case WRITE:<br>
+ req->type = SHEEP_WRITE;<br>
+ break;<br>
+ case READ:<br>
+ req->type = SHEEP_READ;<br>
+ break;<br>
+ default:<br>
+ /* impossible case */<br>
+ WARN_ON(1);<br>
+ kfree(req);<br>
+ return ERR_PTR(-EINVAL);<br>
+ }<br>
+<br>
+ aiocb->buf_iter += len;<br>
+ aiocb->nr_requests++;<br>
+<br>
+ return req;<br>
+}<br>
+<br>
+static void end_sheep_request(struct sheep_request *req, bool queue_locked)<br>
+{<br>
+ struct sheep_aiocb *aiocb = req->aiocb;<br>
+<br>
+ if (--aiocb->nr_requests == 0)<br>
+ aiocb->aio_done_func(aiocb, queue_locked);<br>
+<br>
+ sbd_debug("end oid %llx off %d, len %d, seq %u\n", req->oid,<br>
+ req->offset, req->length, req->seq_num);<br>
+ kfree(req);<br>
+}<br>
+<br>
+int sheep_aiocb_submit(struct sheep_aiocb *aiocb)<br>
+{<br>
+ struct sbd_device *dev = sheep_aiocb_to_device(aiocb);<br>
+ u64 offset = aiocb->offset;<br>
+ u64 total = aiocb->length;<br>
+ u64 start = offset % SD_DATA_OBJ_SIZE;<br>
+ u32 vid = dev->vdi.vid;<br>
+ u64 oid = vid_to_data_oid(vid, offset / SD_DATA_OBJ_SIZE);<br>
+ u32 idx = data_oid_to_idx(oid);<br>
+ int len = SD_DATA_OBJ_SIZE - start;<br>
+<br>
+ if (total < len)<br>
+ len = total;<br>
+<br>
+ sbd_debug("submit oid %llx off %llu, len %llu\n", oid, offset, total);<br>
+ /*<br>
+ * Make sure we don't free the aiocb before we are done with all<br>
+ * requests.This additional reference is dropped at the end of this<br>
+ * function.<br>
+ */<br>
+ aiocb->nr_requests++;<br>
+<br>
+ do {<br>
+ struct sheep_request *req;<br>
+<br>
+ req = alloc_sheep_request(aiocb, oid, len, start);<br>
+ if (IS_ERR(req))<br>
+ return PTR_ERR(req);<br>
+<br>
+ if (likely(dev->vdi.inode->data_vdi_id[idx]))<br>
+ goto submit;<br>
+<br>
+ /* Object is not created yet... */<br>
+ switch (req->type) {<br>
+ case SHEEP_WRITE:<br>
+ case SHEEP_READ:<br>
+ end_sheep_request(req, true);<br>
+ goto done;<br>
+ }<br>
+submit:<br>
+ submit_sheep_request(req);<br>
+done:<br>
+ oid++;<br>
+ total -= len;<br>
+ start = (start + len) % SD_DATA_OBJ_SIZE;<br>
+ len = total > SD_DATA_OBJ_SIZE ? SD_DATA_OBJ_SIZE : total;<br>
+ } while (total > 0);<br>
+<br>
+ if (--aiocb->nr_requests == 0)<br>
+ aiocb->aio_done_func(aiocb, true);<br>
+<br>
+ return 0;<br>
+}<br>
+<br>
+int sheep_handle_reply(struct sbd_device *dev)<br>
+{<br>
+ return 0;<br>
+}<br>
diff --git a/sbd/sheep_block_device.c b/sbd/sheep_block_device.c<br>
new file mode 100644<br>
index 0000000..e7331dc<br>
--- /dev/null<br>
+++ b/sbd/sheep_block_device.c<br>
@@ -0,0 +1,299 @@<br>
+/*<br>
+ * Copyright (C) 2014 Liu Yuan <<a href="mailto:namei.unix@gmail.com">namei.unix@gmail.com</a>><br>
+ *<br>
+ * This program is free software; you can redistribute it and/or<br>
+ * modify it under the terms of the GNU General Public License version<br>
+ * 2 as published by the Free Software Foundation.<br>
+ *<br>
+ * You should have received a copy of the GNU General Public License<br>
+ * along with this program. If not, see <<a href="http://www.gnu.org/licenses/" target="_blank">http://www.gnu.org/licenses/</a>>.<br>
+ */<br>
+<br>
+/*<br>
+ * SBD - Sheepdog Block Device<br>
+ *<br>
+ * This file implements the glue functions to export sheep vdi as Linux block<br>
+ * device.<br>
+ *<br>
+ */<br>
+<br>
+#include "sbd.h"<br>
+<br>
+static LIST_HEAD(sbd_dev_list);<br>
+<br>
+static const struct block_device_operations sbd_bd_ops = {<br>
+ .owner = THIS_MODULE,<br>
+};<br>
+<br>
+static int sbd_submit_request(struct request *req)<br>
+{<br>
+ struct sheep_aiocb *aiocb = sheep_aiocb_setup(req);<br>
+<br>
+ if (IS_ERR(aiocb))<br>
+ return PTR_ERR(aiocb);<br>
+<br>
+ return sheep_aiocb_submit(aiocb);<br>
+}<br>
+<br>
+static void sbd_request_submiter(struct request_queue *q)<br>
+{<br>
+ struct request *req;<br>
+<br>
+ while ((req = blk_fetch_request(q)) != NULL) {<br>
+ int ret;<br>
+<br>
+ /* filter out block requests we don't understand */<br>
+ if (req->cmd_type != REQ_TYPE_FS) {<br>
+ __blk_end_request_all(req, 0);<br>
+ continue;<br>
+ }<br>
+ ret = sbd_submit_request(req);<br>
+ if (ret < 0)<br>
+ break;<br>
+ }<br>
+}<br>
+<br>
+static int sbd_add_disk(struct sbd_device *dev)<br>
+{<br>
+ struct gendisk *disk;<br>
+ struct request_queue *rq;<br>
+<br>
+ disk = alloc_disk(SBD_MINORS_PER_MAJOR);<br>
+ if (!disk)<br>
+ return -ENOMEM;<br>
+<br>
+ snprintf(disk->disk_name, DEV_NAME_LEN, DRV_NAME "%d", dev->id);<br>
+ disk->major = dev->major;<br>
+ disk->first_minor = 0;<br>
+ disk->fops = &sbd_bd_ops;<br>
+ disk->private_data = dev;<br>
+<br>
+ rq = blk_init_queue(sbd_request_submiter, &dev->queue_lock);<br>
+ if (!rq) {<br>
+ put_disk(disk);<br>
+ return -ENOMEM;<br>
+ }<br>
+<br>
+ blk_queue_max_hw_sectors(rq, SD_DATA_OBJ_SIZE / SECTOR_SIZE);<br>
+ blk_queue_max_segments(rq, SD_DATA_OBJ_SIZE / SECTOR_SIZE);<br>
+ blk_queue_max_segment_size(rq, SD_DATA_OBJ_SIZE);<br>
+ blk_queue_io_opt(rq, SD_DATA_OBJ_SIZE);<br>
+<br>
+ disk->queue = rq;<br>
+ rq->queuedata = dev;<br>
+ dev->disk = disk;<br>
+ dev->rq = rq;<br>
+<br>
+ set_capacity(disk, dev->vdi.inode->vdi_size / SECTOR_SIZE);<br>
+ add_disk(disk);<br>
+<br>
+ return 0;<br>
+}<br>
+<br>
+static int sbd_request_reaper(void *data)<br>
+{<br>
+ struct sbd_device *dev = data;<br>
+<br>
+ while (!kthread_should_stop() || !list_empty(&dev->inflight_head)) {<br>
+ wait_event_interruptible(dev->inflight_wq,<br>
+ kthread_should_stop() ||<br>
+ !list_empty(&dev->inflight_head));<br>
+<br>
+ if (list_empty(&dev->inflight_head))<br>
+ continue;<br>
+<br>
+ sheep_handle_reply(dev);<br>
+ }<br>
+ return 0;<br>
+}<br>
+<br>
+static inline void free_sbd_device(struct sbd_device *dev)<br>
+{<br>
+ socket_shutdown(dev->sock);<br>
+ vfree(dev->vdi.inode);<br>
+ kfree(dev);<br>
+}<br>
+<br>
+static ssize_t sbd_add(struct bus_type *bus, const char *buf,<br>
+ size_t count)<br>
+{<br>
+ struct sbd_device *dev, *tmp;<br>
+ ssize_t ret;<br>
+ int new_id = 0;<br>
+ char name[DEV_NAME_LEN];<br>
+<br>
+ if (!try_module_get(THIS_MODULE))<br>
+ return -ENODEV;<br>
+<br>
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);<br>
+ if (!dev) {<br>
+ ret = -ENOMEM;<br>
+ goto err_put;<br>
+ }<br>
+<br>
+ if (sscanf(buf, "%s %d %s", dev->vdi.ip, &dev->vdi.port,<br>
+ dev-><a href="http://vdi.name" target="_blank">vdi.name</a>) != 3) {<br>
+ ret = -EINVAL;<br>
+ goto err_put;<br>
+ }<br>
+<br>
+ spin_lock_init(&dev->queue_lock);<br>
+ INIT_LIST_HEAD(&dev->inflight_head);<br>
+ INIT_LIST_HEAD(&dev->blocking_head);<br>
+ init_waitqueue_head(&dev->inflight_wq);<br>
+<br>
+ list_for_each_entry(tmp, &sbd_dev_list, list) {<br>
+ if (tmp->id > new_id)<br>
+ new_id = tmp->id + 1;<br>
+ }<br>
+<br>
+ ret = sheep_setup_vdi(dev);<br>
+ if (ret < 0)<br>
+ goto err_free_dev;<br>
+<br>
+ dev->id = new_id;<br>
+ snprintf(name, DEV_NAME_LEN, DRV_NAME "%d", dev->id);<br>
+ ret = register_blkdev(0, name);<br>
+ if (ret < 0)<br>
+ goto err_free_dev;<br>
+ dev->major = ret;<br>
+ dev->minor = 0;<br>
+ dev->reaper = kthread_run(sbd_request_reaper, dev, "sbd_reaper");<br>
+<br>
+ ret = sbd_add_disk(dev);<br>
+ if (ret < 0)<br>
+ goto err_unreg_blkdev;<br>
+<br>
+ list_add_tail(&dev->list, &sbd_dev_list);<br>
+<br>
+ return count;<br>
+err_unreg_blkdev:<br>
+ unregister_blkdev(dev->major, name);<br>
+err_free_dev:<br>
+ free_sbd_device(dev);<br>
+err_put:<br>
+ module_put(THIS_MODULE);<br>
+ pr_err("%s: error adding device %s", DRV_NAME, buf);<br>
+ return ret;<br>
+}<br>
+<br>
+static void sbd_del_disk(struct sbd_device *dev)<br>
+{<br>
+ struct gendisk *disk = dev->disk;<br>
+<br>
+ if (!disk)<br>
+ return;<br>
+<br>
+ if (disk->flags & GENHD_FL_UP)<br>
+ del_gendisk(disk);<br>
+ if (disk->queue)<br>
+ blk_cleanup_queue(disk->queue);<br>
+ put_disk(disk);<br>
+}<br>
+<br>
+static ssize_t sbd_remove(struct bus_type *bus, const char *buf,<br>
+ size_t count)<br>
+{<br>
+<br>
+ struct list_head *tmp, *n;<br>
+ struct sbd_device *dev;<br>
+ unsigned long ul;<br>
+ int target_id, ret;<br>
+<br>
+ ret = strict_strtoul(buf, 10, &ul);<br>
+ if (ret)<br>
+ return ret;<br>
+<br>
+ /* convert to int; abort if we lost anything in the conversion */<br>
+ target_id = (int)ul;<br>
+ if (target_id != ul)<br>
+ return -EINVAL;<br>
+<br>
+ list_for_each_safe(tmp, n, &sbd_dev_list) {<br>
+ dev = list_entry(tmp, struct sbd_device, list);<br>
+ if (dev->id == target_id) {<br>
+ list_del(&dev->list);<br>
+ break;<br>
+ }<br>
+ dev = NULL;<br>
+ }<br>
+<br>
+ if (!dev)<br>
+ return -ENOENT;<br>
+<br>
+ kthread_stop(dev->reaper);<br>
+ wake_up_interruptible(&dev->inflight_wq);<br>
+<br>
+ sbd_del_disk(dev);<br>
+ free_sbd_device(dev);<br>
+ module_put(THIS_MODULE);<br>
+<br>
+ return count;<br>
+}<br>
+<br>
+static struct bus_attribute sbd_bus_attrs[] = {<br>
+ __ATTR(add, S_IWUSR, NULL, sbd_add),<br>
+ __ATTR(remove, S_IWUSR, NULL, sbd_remove),<br>
+ __ATTR_NULL<br>
+};<br>
+<br>
+static struct bus_type sbd_bus_type = {<br>
+ .name = "sbd",<br>
+ .bus_attrs = sbd_bus_attrs,<br>
+};<br>
+<br>
+static void sbd_root_dev_release(struct device *dev)<br>
+{<br>
+}<br>
+<br>
+static struct device sbd_root_dev = {<br>
+ .init_name = "sbd",<br>
+ .release = sbd_root_dev_release,<br>
+};<br>
+<br>
+/* Create control files in /sys/bus/sbd/... */<br>
+static int sbd_sysfs_init(void)<br>
+{<br>
+ int ret;<br>
+<br>
+ ret = device_register(&sbd_root_dev);<br>
+ if (ret < 0)<br>
+ return ret;<br>
+<br>
+ ret = bus_register(&sbd_bus_type);<br>
+ if (ret < 0)<br>
+ device_unregister(&sbd_root_dev);<br>
+<br>
+ return ret;<br>
+}<br>
+<br>
+static void sbd_sysfs_cleanup(void)<br>
+{<br>
+ bus_unregister(&sbd_bus_type);<br>
+ device_unregister(&sbd_root_dev);<br>
+}<br>
+<br>
+int __init sbd_init(void)<br>
+{<br>
+ int ret;<br>
+<br>
+ ret = sbd_sysfs_init();<br>
+ if (ret < 0)<br>
+ return ret;<br>
+<br>
+ pr_info("%s: Sheepdog block device loaded\n", DRV_NAME);<br>
+ return 0;<br>
+}<br>
+<br>
+void __exit sbd_exit(void)<br>
+{<br>
+ sbd_sysfs_cleanup();<br>
+ pr_info("%s: Sheepdog block device unloaded\n", DRV_NAME);<br>
+}<br>
+<br>
+module_init(sbd_init);<br>
+module_exit(sbd_exit);<br>
+<br>
+MODULE_AUTHOR("Liu Yuan <<a href="mailto:namei.unix@gmail.com">namei.unix@gmail.com</a>>");<br>
+MODULE_DESCRIPTION("Sheepdog Block Device");<br>
+MODULE_LICENSE("GPL");<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.8.1.2<br>
<br>
--<br>
sheepdog mailing list<br>
<a href="mailto:sheepdog@lists.wpkg.org">sheepdog@lists.wpkg.org</a><br>
<a href="http://lists.wpkg.org/mailman/listinfo/sheepdog" target="_blank">http://lists.wpkg.org/mailman/listinfo/sheepdog</a><br>
</font></span></blockquote></div><br><br clear="all"><div><br></div>-- <br>--<br>Best Regard<br>Robin Dong
</div></div>