<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">2014-05-26 13:18 GMT+08:00 Liu Yuan <span dir="ltr"><<a href="mailto:namei.unix@gmail.com" target="_blank">namei.unix@gmail.com</a>></span>:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Liu Yuan <<a href="mailto:tailai.ly@taobao.com">tailai.ly@taobao.com</a>><br>
<br>
This is similar to Ceph's RBD. The main motivation is to replace complex<br>
and ineffecient middle ware (such as iscci softwafe) with simple software stacks<br>
to expose sheepdog storage as Linux block device interface.<br>
<br>
Usage:<br>
<br>
We control the device the same way as RBD.<br>
<br>
# associate vdi 'test' to /dev/sbd0<br>
$ echo 127.0.0.1 7000 test > /sys/bus/sbd/add<br>
<br>
# remove the device sbd0<br>
$ echo 0 > /sys/bus/sbd/remove<br>
<br>
For now we don't do actual read/write yet, just a aio framework sketched out.<br>
<br>
Signed-off-by: Liu Yuan <<a href="mailto:namei.unix@gmail.com">namei.unix@gmail.com</a>><br>
---<br>
 sbd/Kbuild               |   5 +<br>
 sbd/Makefile             |   8 +<br>
 sbd/sbd.h                | 119 +++++++++++++<br>
 sbd/sheep.c              | 436 +++++++++++++++++++++++++++++++++++++++++++++++<br>
 sbd/sheep_block_device.c | 299 ++++++++++++++++++++++++++++++++<br>
 5 files changed, 867 insertions(+)<br>
 create mode 100644 sbd/Kbuild<br>
 create mode 100644 sbd/Makefile<br>
 create mode 100644 sbd/sbd.h<br>
 create mode 100644 sbd/sheep.c<br>
 create mode 100644 sbd/sheep_block_device.c<br>
<br>
diff --git a/sbd/Kbuild b/sbd/Kbuild<br>
new file mode 100644<br>
index 0000000..c1f7c07<br>
--- /dev/null<br>
+++ b/sbd/Kbuild<br>
@@ -0,0 +1,5 @@<br>
+MODULE_NAME=sbd<br>
+<br>
+ccflags-y      := -I$(PWD)/../include -DDEBUG<br>
+obj-m          := $(MODULE_NAME).o<br>
+$(MODULE_NAME)-y := sheep_block_device.o sheep.o<br>
diff --git a/sbd/Makefile b/sbd/Makefile<br>
new file mode 100644<br>
index 0000000..940c0c3<br>
--- /dev/null<br>
+++ b/sbd/Makefile<br>
@@ -0,0 +1,8 @@<br>
+KDIR   ?= /lib/modules/`uname -r`/build<br>
+<br>
+default:<br>
+       $(MAKE) -C $(KDIR) M=$$PWD modules<br>
+clean:<br>
+       $(MAKE) -C $(KDIR) M=$$PWD clean<br>
+install:<br>
+       $(MAKE) -C $(KDIR) M=$$PWD modules_install<br>
diff --git a/sbd/sbd.h b/sbd/sbd.h<br>
new file mode 100644<br>
index 0000000..e938561<br>
--- /dev/null<br>
+++ b/sbd/sbd.h<br>
@@ -0,0 +1,119 @@<br>
+#ifndef _SBD_H_<br>
+#define _SBD_H_<br>
+<br>
+#include <linux/socket.h><br>
+#include <linux/in.h><br>
+#include <linux/inet.h><br>
+#include <linux/socket.h><br>
+#include <linux/net.h><br>
+#include <linux/tcp.h><br>
+#include <linux/slab.h><br>
+#include <linux/kernel.h><br>
+#include <linux/device.h><br>
+#include <linux/module.h><br>
+#include <linux/fs.h><br>
+#include <linux/blkdev.h><br>
+#include <linux/kthread.h><br>
+#include <linux/gfp.h><br>
+<br>
+#include "sheepdog_proto.h"<br>
+<br>
+#define DRV_NAME "sbd"<br>
+#define DEV_NAME_LEN 32<br>
+#define SBD_MINORS_PER_MAJOR 32<br>
+#define SECTOR_SIZE 512<br>
+<br>
+struct sheep_vdi {<br>
+       struct sd_inode *inode;<br>
+       u32 vid;<br>
+       char ip[16];<br>
+       unsigned int port;<br>
+       char name[SD_MAX_VDI_LEN];<br>
+};<br>
+<br>
+struct sbd_device {<br>
+       struct socket *sock;<br>
+       int id;         /* blkdev unique id */<br>
+       atomic_t seq_num;<br>
+<br>
+       int major;<br>
+       int minor;<br>
+       struct gendisk *disk;<br>
+       struct request_queue *rq;<br>
+       spinlock_t queue_lock;   /* request queue lock */<br>
+<br>
+       struct sheep_vdi vdi;           /* Associated sheep image */<br>
+<br>
+       struct list_head inflight_head;<br>
+       wait_queue_head_t inflight_wq;<br>
+       struct list_head blocking_head;<br>
+<br>
+       struct list_head list;<br>
+       struct task_struct *reaper;<br>
+};<br>
+<br>
+struct sheep_aiocb {<br>
+       struct request *request;<br>
+       u64 offset;<br>
+       u64 length;<br>
+       int ret;<br>
+       u32 nr_requests;<br>
+       char *buf;<br>
+       int buf_iter;<br>
+       void (*aio_done_func)(struct sheep_aiocb *, bool);<br>
+};<br>
+<br>
+enum sheep_request_type {<br>
+       SHEEP_READ,<br>
+       SHEEP_WRITE,<br>
+       SHEEP_CREATE,<br>
+};<br>
+<br>
+struct sheep_request {<br>
+       struct list_head list;<br>
+       struct sheep_aiocb *aiocb;<br>
+       u64 oid;<br>
+       u32 seq_num;<br>
+       int type;<br>
+       int offset;<br>
+       int length;<br>
+       char *buf;<br>
+};<br>
+<br>
+void socket_shutdown(struct socket *sock);<br>
+int sheep_setup_vdi(struct sbd_device *dev);<br>
+struct sheep_aiocb *sheep_aiocb_setup(struct request *req);<br>
+int sheep_aiocb_submit(struct sheep_aiocb *aiocb);<br>
+int sheep_handle_reply(struct sbd_device *dev);<br>
+<br>
+#if defined(CONFIG_DYNAMIC_DEBUG) && defined _DPRINTK_FLAGS_INCL_MODNAME<br>
+<br>
+# define _SBD_FLAGS (_DPRINTK_FLAGS_PRINT | _DPRINTK_FLAGS_INCL_MODNAME \<br>
+       | _DPRINTK_FLAGS_INCL_FUNCNAME | _DPRINTK_FLAGS_INCL_LINENO)<br>
+<br>
+# define SBD_DYNAMIC_DEBUG_METADATA(name, fmt)                  \<br>
+       static struct _ddebug  __aligned(8)                     \<br>
+        __attribute__((section("__verbose"))) name = {          \<br>
+               .modname = KBUILD_MODNAME,                      \<br>
+               .function = __func__,                           \<br>
+               .filename = __FILE__,                           \<br>
+               .format = (fmt),                                \<br>
+               .lineno = __LINE__,                             \<br>
+               .flags =  _SBD_FLAGS,                           \<br>
+       }<br>
+<br>
+# define sbd_debug(fmt, ...)                            \<br>
+({                                                      \<br>
+       SBD_DYNAMIC_DEBUG_METADATA(descriptor, fmt);    \<br>
+       __dynamic_pr_debug(&descriptor, pr_fmt(fmt),    \<br>
+                          ##__VA_ARGS__);              \<br>
+})<br>
+<br>
+#else<br>
+<br>
+/* If -DDEBUG is not set, pr_debug = no_printk */<br>
+# define sbd_debug pr_debug<br>
+<br>
+#endif /* CONFIG_DYNAMIC_DEBUG */<br>
+<br>
+#endif /* _SBD_H_ */<br>
diff --git a/sbd/sheep.c b/sbd/sheep.c<br>
new file mode 100644<br>
index 0000000..33269b4<br>
--- /dev/null<br>
+++ b/sbd/sheep.c<br>
@@ -0,0 +1,436 @@<br>
+/*<br>
+ * Copyright (C) 2014 Liu Yuan <<a href="mailto:namei.unix@gmail.com">namei.unix@gmail.com</a>><br>
+ *<br>
+ * This program is free software; you can redistribute it and/or<br>
+ * modify it under the terms of the GNU General Public License version<br>
+ * 2 as published by the Free Software Foundation.<br>
+ *<br>
+ * You should have received a copy of the GNU General Public License<br>
+ * along with this program. If not, see <<a href="http://www.gnu.org/licenses/" target="_blank">http://www.gnu.org/licenses/</a>>.<br>
+ */<br>
+<br>
+#include "sbd.h"<br>
+<br>
+void socket_shutdown(struct socket *sock)<br>
+{<br>
+       if (sock)<br>
+               kernel_sock_shutdown(sock, SHUT_RDWR);<br>
+}<br>
+<br>
+static struct sbd_device *sheep_aiocb_to_device(struct sheep_aiocb *aiocb)<br>
+{<br>
+       return aiocb->request->q->queuedata;<br>
+}<br>
+<br>
+static int socket_create(struct socket **sock, const char *ip_addr, int port)<br>
+{<br>
+       struct sockaddr_in addr;<br>
+       mm_segment_t oldmm = get_fs();<br>
+       struct linger linger_opt = {1, 0};<br>
+       int ret, nodelay = 1;<br>
+<br>
+       ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, sock);<br>
+       if (ret < 0) {<br>
+               pr_err("fail to create socket\n");<br>
+               return ret;<br>
+       }<br>
+<br>
+       set_fs(KERNEL_DS);<br>
+       ret = sock_setsockopt(*sock, SOL_SOCKET, SO_LINGER,<br>
+                             (char *)&linger_opt, sizeof(linger_opt));<br>
+       set_fs(oldmm);<br>
+       if (ret != 0) {<br>
+               pr_err("Can't set SO_LINGER: %d\n", ret);<br>
+               goto shutdown;<br>
+       }<br>
+<br>
+       set_fs(KERNEL_DS);<br>
+       ret = sock_setsockopt(*sock, SOL_TCP, TCP_NODELAY,<br>
+                             (char *)&nodelay, sizeof(nodelay));<br>
+       set_fs(oldmm);<br>
+       if (ret != 0) {<br>
+               pr_err("Can't set SO_LINGER: %d\n", ret);<br>
+               goto shutdown;<br>
+       }<br>
+<br>
+       memset(&addr, 0, sizeof(addr));<br>
+       addr.sin_family = AF_INET;<br>
+       addr.sin_port = htons(port);<br>
+       addr.sin_addr.s_addr = in_aton(ip_addr);<br>
+       ret = (*sock)->ops->connect(*sock, (struct sockaddr *)&addr,<br>
+                                   sizeof(addr), 0);<br>
+       if (ret < 0) {<br>
+               pr_err("failed connect to %s:%d\n", ip_addr, port);<br>
+               goto shutdown;<br>
+       }<br>
+<br>
+       return ret;<br>
+shutdown:<br>
+       socket_shutdown(*sock);<br>
+       *sock = NULL;<br>
+       return ret;<br>
+}<br>
+<br>
+static int socket_xmit(struct socket *sock, void *buf, int size, bool send,<br>
+                      int msg_flags)<br>
+{<br>
+       int result;<br>
+       struct msghdr msg;<br>
+       struct kvec iov;<br>
+       sigset_t blocked, oldset;<br>
+<br>
+       if (unlikely(!sock))<br>
+               return -EINVAL;<br>
+<br>
+       /* Don't allow signals to interrupt the transmission */<br>
+       siginitsetinv(&blocked, 0);<br>
+       sigprocmask(SIG_SETMASK, &blocked, &oldset);<br>
+<br>
+       do {<br>
+               sock->sk->sk_allocation = GFP_NOIO;<br>
+               iov.iov_base = buf;<br>
+               iov.iov_len = size;<br>
+               msg.msg_name = NULL;<br>
+               msg.msg_namelen = 0;<br>
+               msg.msg_control = NULL;<br>
+               msg.msg_controllen = 0;<br>
+               msg.msg_flags = msg_flags | MSG_NOSIGNAL;<br>
+<br>
+               if (send)<br>
+                       result = kernel_sendmsg(sock, &msg, &iov, 1, size);<br>
+               else<br>
+                       result = kernel_recvmsg(sock, &msg, &iov, 1, size,<br>
+                                               msg.msg_flags);<br>
+<br>
+               if (result <= 0) {<br>
+                       if (result == 0)<br>
+                               result = -EPIPE; /* short read */<br>
+                       break;<br>
+               }<br>
+               size -= result;<br>
+               buf += result;<br>
+       } while (size > 0);<br>
+<br>
+       sigprocmask(SIG_SETMASK, &oldset, NULL);<br>
+<br>
+       return result;<br>
+}<br>
+<br>
+static int socket_read(struct socket *sock, char *buf, int length)<br>
+{<br>
+       return socket_xmit(sock, buf, length, false, 0);<br>
+}<br></blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">+<br>
+static int socket_write(struct socket *sock, void *buf, int len)<br>
+{<br>
+       return socket_xmit(sock, buf, len, true, 0);<br>
+}<br>
+<br>
+static int sheep_submit_sdreq(struct socket *sock, struct sd_req *hdr,<br>
+                             void *data, unsigned int wlen)<br>
+{<br>
+       int ret = socket_write(sock, hdr, sizeof(*hdr));<br>
+<br>
+       if (ret < 0)<br>
+               return ret;<br>
+<br>
+       if (wlen)<br>
+               return socket_write(sock, data, wlen);<br>
+       return 0;<br>
+}<br>
+<br>
+/* Run the request synchronously */<br>
+static int sheep_run_sdreq(struct socket *sock, struct sd_req *hdr,<br>
+                          void *data)<br>
+{<br>
+       struct sd_rsp *rsp = (struct sd_rsp *)hdr;<br>
+       unsigned int wlen, rlen;<br>
+       int ret;<br>
+<br>
+       if (hdr->flags & SD_FLAG_CMD_WRITE) {<br>
+               wlen = hdr->data_length;<br>
+               rlen = 0;<br>
+       } else {<br>
+               wlen = 0;<br>
+               rlen = hdr->data_length;<br>
+       }<br>
+<br>
+       ret = sheep_submit_sdreq(sock, hdr, data, wlen);<br>
+       if (ret < 0) {<br>
+               pr_err("failed to sbumit the request\n");<br>
+               return ret;<br>
+       }<br>
+<br>
+       ret = socket_read(sock, (char *)rsp, sizeof(*rsp));<br>
+       if (ret < 0) {<br>
+               pr_err("failed to read a response hdr\n");<br>
+               return ret;<br>
+       }<br>
+<br>
+       if (rlen > rsp->data_length)<br>
+               rlen = rsp->data_length;<br>
+<br>
+       if (rlen) {<br>
+               ret = socket_read(sock, data, rlen);<br>
+               if (ret < 0) {<br>
+                       pr_err("failed to read the response data\n");<br>
+                       return ret;<br>
+               }<br>
+       }<br>
+<br>
+       return 0;<br>
+}<br>
+<br>
+static int lookup_sheep_vdi(struct sbd_device *dev)<br>
+{<br>
+       struct sd_req hdr = {};<br>
+       struct sd_rsp *rsp = (struct sd_rsp *)&hdr;<br>
+       int ret;<br>
+<br>
+       hdr.opcode = SD_OP_LOCK_VDI;<br>
+       hdr.data_length = SD_MAX_VDI_LEN;<br>
+       hdr.flags = SD_FLAG_CMD_WRITE;<br>
+       ret = sheep_run_sdreq(dev->sock, &hdr, dev-><a href="http://vdi.name" target="_blank">vdi.name</a>);<br>
+       if (ret < 0)<br>
+               return ret;<br>
+<br>
+       /* XXX switch case */<br>
+       if (rsp->result != SD_RES_SUCCESS) {<br>
+               sbd_debug("Cannot get VDI info for %s\n", dev-><a href="http://vdi.name" target="_blank">vdi.name</a>);<br>
+               return -EIO;<br>
+       }<br>
+<br>
+       dev->vdi.vid = rsp->vdi.vdi_id;<br>
+<br>
+       return 0;<br>
+}<br>
+<br>
+int sheep_setup_vdi(struct sbd_device *dev)<br>
+{<br>
+       struct sd_req hdr = {};<br>
+       struct sd_rsp *rsp = (struct sd_rsp *)&hdr;<br>
+       struct sd_inode *inode;<br>
+       int ret;<br>
+<br>
+       inode = vmalloc(sizeof(*inode));<br>
+       if (!inode)<br>
+               return -ENOMEM;<br>
+       memset(inode, 0 , sizeof(*inode));<br>
+<br>
+       ret = socket_create(&dev->sock, dev->vdi.ip, dev->vdi.port);<br>
+       if (ret < 0)<br>
+               goto out;<br>
+<br>
+       ret = lookup_sheep_vdi(dev);<br>
+       if (ret < 0)<br>
+               goto out_release;<br>
+<br>
+       hdr.opcode = SD_OP_READ_OBJ;<br>
+       hdr.data_length = SD_INODE_SIZE;<br>
+       hdr.obj.oid = vid_to_vdi_oid(dev->vdi.vid);<br>
+       hdr.obj.offset = 0;<br>
+       ret = sheep_run_sdreq(dev->sock, &hdr, inode);<br>
+       if (ret < 0)<br>
+               goto out_release;<br>
+<br>
+       /* XXX switch case */<br>
+       if (rsp->result != SD_RES_SUCCESS) {<br>
+               ret = -EIO;<br>
+               goto out_release;<br>
+       }<br>
+<br>
+       dev->vdi.inode = inode;<br>
+       pr_info("%s: Associated to %s\n", DRV_NAME, inode->name);<br>
+       return 0;<br>
+out_release:<br>
+       socket_shutdown(dev->sock);<br>
+       dev->sock = NULL;<br>
+out:<br>
+       vfree(inode);<br>
+       return ret;<br>
+}<br>
+<br>
+static void submit_sheep_request(struct sheep_request *req)<br>
+{<br>
+}<br></blockquote><div>Maybe we could remove this unused function ?</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+static inline void free_sheep_aiocb(struct sheep_aiocb *aiocb)<br>
+{<br>
+       kfree(aiocb->buf);<br>
+       kfree(aiocb);<br>
+}<br>
+<br>
+static void aio_write_done(struct sheep_aiocb *aiocb, bool locked)<br>
+{<br>
+       sbd_debug("off %llu, len %llu\n", aiocb->offset, aiocb->length);<br>
+<br>
+       if (locked)<br>
+               __blk_end_request_all(aiocb->request, aiocb->ret);<br>
+       else<br>
+               blk_end_request_all(aiocb->request, aiocb->ret);<br>
+       free_sheep_aiocb(aiocb);<br>
+}<br>
+<br>
+static void aio_read_done(struct sheep_aiocb *aiocb, bool locked)<br>
+{<br>
+       sbd_debug("off %llu, len %llu\n", aiocb->offset, aiocb->length);<br>
+<br>
+       if (locked)<br>
+               __blk_end_request_all(aiocb->request, aiocb->ret);<br>
+       else<br>
+               blk_end_request_all(aiocb->request, aiocb->ret);<br>
+       free_sheep_aiocb(aiocb);<br>
+}<br>
+<br>
+struct sheep_aiocb *sheep_aiocb_setup(struct request *req)<br>
+{<br>
+       struct sheep_aiocb *aiocb = kmalloc(sizeof(*aiocb), GFP_KERNEL);<br>
+       struct req_iterator iter;<br>
+       struct bio_vec *bvec;<br>
+       int len = 0;<br>
+<br>
+       if (!aiocb)<br>
+               return ERR_PTR(-ENOMEM);<br>
+<br>
+       aiocb->offset = blk_rq_pos(req) * SECTOR_SIZE;<br>
+       aiocb->length = blk_rq_bytes(req);<br>
+       aiocb->nr_requests = 0;<br>
+       aiocb->ret = 0;<br>
+       aiocb->buf_iter = 0;<br>
+       aiocb->request = req;<br>
+       aiocb->buf = kzalloc(aiocb->length, GFP_KERNEL);<br>
+<br>
+       switch (rq_data_dir(req)) {<br>
+       case WRITE:<br>
+               rq_for_each_segment(bvec, req, iter) {<br>
+                       unsigned long flags;<br>
+                       void *addr = bvec_kmap_irq(bvec, &flags);<br>
+<br>
+                       memcpy(aiocb->buf + len, addr, bvec->bv_len);<br>
+                       flush_dcache_page(bvec->bv_page);<br>
+                       bvec_kunmap_irq(addr, &flags);<br>
+<br>
+                       len += bvec->bv_len;<br>
+               }<br>
+               aiocb->aio_done_func = aio_write_done;<br>
+               break;<br>
+       case READ:<br>
+               aiocb->aio_done_func = aio_read_done;<br>
+               break;<br>
+       default:<br>
+               /* impossible case */<br>
+               WARN_ON(1);<br>
+               free_sheep_aiocb(aiocb);<br>
+               return ERR_PTR(-EINVAL);<br>
+       }<br>
+<br>
+       return aiocb;<br>
+}<br>
+<br>
+static struct sheep_request *alloc_sheep_request(struct sheep_aiocb *aiocb,<br>
+                                                u64 oid, int len,<br>
+                                                int offset)<br>
+{<br>
+       struct sheep_request *req = kmalloc(sizeof(*req), GFP_KERNEL);<br>
+       struct sbd_device *dev = sheep_aiocb_to_device(aiocb);<br>
+<br>
+       if (!req)<br>
+               return ERR_PTR(-ENOMEM);<br>
+<br>
+       req->offset = offset;<br>
+       req->length = len;<br>
+       req->oid = oid;<br>
+       req->aiocb = aiocb;<br>
+       req->buf = aiocb->buf + aiocb->buf_iter;<br>
+       req->seq_num = atomic_inc_return(&dev->seq_num);<br>
+<br>
+       switch (rq_data_dir(aiocb->request)) {<br>
+       case WRITE:<br>
+               req->type = SHEEP_WRITE;<br>
+               break;<br>
+       case READ:<br>
+               req->type = SHEEP_READ;<br>
+               break;<br>
+       default:<br>
+               /* impossible case */<br>
+               WARN_ON(1);<br>
+               kfree(req);<br>
+               return ERR_PTR(-EINVAL);<br>
+       }<br>
+<br>
+       aiocb->buf_iter += len;<br>
+       aiocb->nr_requests++;<br>
+<br>
+       return req;<br>
+}<br>
+<br>
+static void end_sheep_request(struct sheep_request *req, bool queue_locked)<br>
+{<br>
+       struct sheep_aiocb *aiocb = req->aiocb;<br>
+<br>
+       if (--aiocb->nr_requests == 0)<br>
+               aiocb->aio_done_func(aiocb, queue_locked);<br>
+<br>
+       sbd_debug("end oid %llx off %d, len %d, seq %u\n", req->oid,<br>
+                 req->offset, req->length, req->seq_num);<br>
+       kfree(req);<br>
+}<br>
+<br>
+int sheep_aiocb_submit(struct sheep_aiocb *aiocb)<br>
+{<br>
+       struct sbd_device *dev = sheep_aiocb_to_device(aiocb);<br>
+       u64 offset = aiocb->offset;<br>
+       u64 total = aiocb->length;<br>
+       u64 start = offset % SD_DATA_OBJ_SIZE;<br>
+       u32 vid = dev->vdi.vid;<br>
+       u64 oid = vid_to_data_oid(vid, offset / SD_DATA_OBJ_SIZE);<br>
+       u32 idx = data_oid_to_idx(oid);<br>
+       int len = SD_DATA_OBJ_SIZE - start;<br>
+<br>
+       if (total < len)<br>
+               len = total;<br>
+<br>
+       sbd_debug("submit oid %llx off %llu, len %llu\n", oid, offset, total);<br>
+       /*<br>
+        * Make sure we don't free the aiocb before we are done with all<br>
+        * requests.This additional reference is dropped at the end of this<br>
+        * function.<br>
+        */<br>
+       aiocb->nr_requests++;<br>
+<br>
+       do {<br>
+               struct sheep_request *req;<br>
+<br>
+               req = alloc_sheep_request(aiocb, oid, len, start);<br>
+               if (IS_ERR(req))<br>
+                       return PTR_ERR(req);<br>
+<br>
+               if (likely(dev->vdi.inode->data_vdi_id[idx]))<br>
+                       goto submit;<br>
+<br>
+               /* Object is not created yet... */<br>
+               switch (req->type) {<br>
+               case SHEEP_WRITE:<br>
+               case SHEEP_READ:<br>
+                       end_sheep_request(req, true);<br>
+                       goto done;<br>
+               }<br>
+submit:<br>
+               submit_sheep_request(req);<br>
+done:<br>
+               oid++;<br>
+               total -= len;<br>
+               start = (start + len) % SD_DATA_OBJ_SIZE;<br>
+               len = total > SD_DATA_OBJ_SIZE ? SD_DATA_OBJ_SIZE : total;<br>
+       } while (total > 0);<br>
+<br>
+       if (--aiocb->nr_requests == 0)<br>
+               aiocb->aio_done_func(aiocb, true);<br>
+<br>
+       return 0;<br>
+}<br>
+<br>
+int sheep_handle_reply(struct sbd_device *dev)<br>
+{<br>
+       return 0;<br>
+}<br>
diff --git a/sbd/sheep_block_device.c b/sbd/sheep_block_device.c<br>
new file mode 100644<br>
index 0000000..e7331dc<br>
--- /dev/null<br>
+++ b/sbd/sheep_block_device.c<br>
@@ -0,0 +1,299 @@<br>
+/*<br>
+ * Copyright (C) 2014 Liu Yuan <<a href="mailto:namei.unix@gmail.com">namei.unix@gmail.com</a>><br>
+ *<br>
+ * This program is free software; you can redistribute it and/or<br>
+ * modify it under the terms of the GNU General Public License version<br>
+ * 2 as published by the Free Software Foundation.<br>
+ *<br>
+ * You should have received a copy of the GNU General Public License<br>
+ * along with this program. If not, see <<a href="http://www.gnu.org/licenses/" target="_blank">http://www.gnu.org/licenses/</a>>.<br>
+ */<br>
+<br>
+/*<br>
+ * SBD - Sheepdog Block Device<br>
+ *<br>
+ * This file implements the glue functions to export sheep vdi as Linux block<br>
+ * device.<br>
+ *<br>
+ */<br>
+<br>
+#include "sbd.h"<br>
+<br>
+static LIST_HEAD(sbd_dev_list);<br>
+<br>
+static const struct block_device_operations sbd_bd_ops = {<br>
+       .owner          = THIS_MODULE,<br>
+};<br>
+<br>
+static int sbd_submit_request(struct request *req)<br>
+{<br>
+       struct sheep_aiocb *aiocb = sheep_aiocb_setup(req);<br>
+<br>
+       if (IS_ERR(aiocb))<br>
+               return PTR_ERR(aiocb);<br>
+<br>
+       return sheep_aiocb_submit(aiocb);<br>
+}<br>
+<br>
+static void sbd_request_submiter(struct request_queue *q)<br>
+{<br>
+       struct request *req;<br>
+<br>
+       while ((req = blk_fetch_request(q)) != NULL) {<br>
+               int ret;<br>
+<br>
+               /* filter out block requests we don't understand */<br>
+               if (req->cmd_type != REQ_TYPE_FS) {<br>
+                       __blk_end_request_all(req, 0);<br>
+                       continue;<br>
+               }<br>
+               ret = sbd_submit_request(req);<br>
+               if (ret < 0)<br>
+                       break;<br>
+       }<br>
+}<br>
+<br>
+static int sbd_add_disk(struct sbd_device *dev)<br>
+{<br>
+       struct gendisk *disk;<br>
+       struct request_queue *rq;<br>
+<br>
+       disk = alloc_disk(SBD_MINORS_PER_MAJOR);<br>
+       if (!disk)<br>
+               return -ENOMEM;<br>
+<br>
+       snprintf(disk->disk_name, DEV_NAME_LEN, DRV_NAME "%d", dev->id);<br>
+       disk->major = dev->major;<br>
+       disk->first_minor = 0;<br>
+       disk->fops = &sbd_bd_ops;<br>
+       disk->private_data = dev;<br>
+<br>
+       rq = blk_init_queue(sbd_request_submiter, &dev->queue_lock);<br>
+       if (!rq) {<br>
+               put_disk(disk);<br>
+               return -ENOMEM;<br>
+       }<br>
+<br>
+       blk_queue_max_hw_sectors(rq, SD_DATA_OBJ_SIZE / SECTOR_SIZE);<br>
+       blk_queue_max_segments(rq, SD_DATA_OBJ_SIZE / SECTOR_SIZE);<br>
+       blk_queue_max_segment_size(rq, SD_DATA_OBJ_SIZE);<br>
+       blk_queue_io_opt(rq, SD_DATA_OBJ_SIZE);<br>
+<br>
+       disk->queue = rq;<br>
+       rq->queuedata = dev;<br>
+       dev->disk = disk;<br>
+       dev->rq = rq;<br>
+<br>
+       set_capacity(disk, dev->vdi.inode->vdi_size / SECTOR_SIZE);<br>
+       add_disk(disk);<br>
+<br>
+       return 0;<br>
+}<br>
+<br>
+static int sbd_request_reaper(void *data)<br>
+{<br>
+       struct sbd_device *dev = data;<br>
+<br>
+       while (!kthread_should_stop() || !list_empty(&dev->inflight_head)) {<br>
+               wait_event_interruptible(dev->inflight_wq,<br>
+                                        kthread_should_stop() ||<br>
+                                        !list_empty(&dev->inflight_head));<br>
+<br>
+               if (list_empty(&dev->inflight_head))<br>
+                       continue;<br>
+<br>
+               sheep_handle_reply(dev);<br>
+       }<br>
+       return 0;<br>
+}<br>
+<br>
+static inline void free_sbd_device(struct sbd_device *dev)<br>
+{<br>
+       socket_shutdown(dev->sock);<br>
+       vfree(dev->vdi.inode);<br>
+       kfree(dev);<br>
+}<br>
+<br>
+static ssize_t sbd_add(struct bus_type *bus, const char *buf,<br>
+                      size_t count)<br>
+{<br>
+       struct sbd_device *dev, *tmp;<br>
+       ssize_t ret;<br>
+       int new_id = 0;<br>
+       char name[DEV_NAME_LEN];<br>
+<br>
+       if (!try_module_get(THIS_MODULE))<br>
+               return -ENODEV;<br>
+<br>
+       dev = kzalloc(sizeof(*dev), GFP_KERNEL);<br>
+       if (!dev) {<br>
+               ret = -ENOMEM;<br>
+               goto err_put;<br>
+       }<br>
+<br>
+       if (sscanf(buf, "%s %d %s", dev->vdi.ip, &dev->vdi.port,<br>
+                  dev-><a href="http://vdi.name" target="_blank">vdi.name</a>) != 3) {<br>
+               ret = -EINVAL;<br>
+               goto err_put;<br>
+       }<br>
+<br>
+       spin_lock_init(&dev->queue_lock);<br>
+       INIT_LIST_HEAD(&dev->inflight_head);<br>
+       INIT_LIST_HEAD(&dev->blocking_head);<br>
+       init_waitqueue_head(&dev->inflight_wq);<br>
+<br>
+       list_for_each_entry(tmp, &sbd_dev_list, list) {<br>
+               if (tmp->id > new_id)<br>
+                       new_id = tmp->id + 1;<br>
+       }<br>
+<br>
+       ret = sheep_setup_vdi(dev);<br>
+       if (ret < 0)<br>
+               goto err_free_dev;<br>
+<br>
+       dev->id = new_id;<br>
+       snprintf(name, DEV_NAME_LEN, DRV_NAME "%d", dev->id);<br>
+       ret = register_blkdev(0, name);<br>
+       if (ret < 0)<br>
+               goto err_free_dev;<br>
+       dev->major = ret;<br>
+       dev->minor = 0;<br>
+       dev->reaper = kthread_run(sbd_request_reaper, dev, "sbd_reaper");<br>
+<br>
+       ret = sbd_add_disk(dev);<br>
+       if (ret < 0)<br>
+               goto err_unreg_blkdev;<br>
+<br>
+       list_add_tail(&dev->list, &sbd_dev_list);<br>
+<br>
+       return count;<br>
+err_unreg_blkdev:<br>
+       unregister_blkdev(dev->major, name);<br>
+err_free_dev:<br>
+       free_sbd_device(dev);<br>
+err_put:<br>
+       module_put(THIS_MODULE);<br>
+       pr_err("%s: error adding device %s", DRV_NAME, buf);<br>
+       return ret;<br>
+}<br>
+<br>
+static void sbd_del_disk(struct sbd_device *dev)<br>
+{<br>
+       struct gendisk *disk = dev->disk;<br>
+<br>
+       if (!disk)<br>
+               return;<br>
+<br>
+       if (disk->flags & GENHD_FL_UP)<br>
+               del_gendisk(disk);<br>
+       if (disk->queue)<br>
+               blk_cleanup_queue(disk->queue);<br>
+       put_disk(disk);<br>
+}<br>
+<br>
+static ssize_t sbd_remove(struct bus_type *bus, const char *buf,<br>
+                         size_t count)<br>
+{<br>
+<br>
+       struct list_head *tmp, *n;<br>
+       struct sbd_device *dev;<br>
+       unsigned long ul;<br>
+       int target_id, ret;<br>
+<br>
+       ret = strict_strtoul(buf, 10, &ul);<br>
+       if (ret)<br>
+               return ret;<br>
+<br>
+       /* convert to int; abort if we lost anything in the conversion */<br>
+       target_id = (int)ul;<br>
+       if (target_id != ul)<br>
+               return -EINVAL;<br>
+<br>
+       list_for_each_safe(tmp, n, &sbd_dev_list) {<br>
+               dev = list_entry(tmp, struct sbd_device, list);<br>
+               if (dev->id == target_id) {<br>
+                       list_del(&dev->list);<br>
+                       break;<br>
+               }<br>
+               dev = NULL;<br>
+       }<br>
+<br>
+       if (!dev)<br>
+               return -ENOENT;<br>
+<br>
+       kthread_stop(dev->reaper);<br>
+       wake_up_interruptible(&dev->inflight_wq);<br>
+<br>
+       sbd_del_disk(dev);<br>
+       free_sbd_device(dev);<br>
+       module_put(THIS_MODULE);<br>
+<br>
+       return count;<br>
+}<br>
+<br>
+static struct bus_attribute sbd_bus_attrs[] = {<br>
+       __ATTR(add, S_IWUSR, NULL, sbd_add),<br>
+       __ATTR(remove, S_IWUSR, NULL, sbd_remove),<br>
+       __ATTR_NULL<br>
+};<br>
+<br>
+static struct bus_type sbd_bus_type = {<br>
+       .name           = "sbd",<br>
+       .bus_attrs      = sbd_bus_attrs,<br>
+};<br>
+<br>
+static void sbd_root_dev_release(struct device *dev)<br>
+{<br>
+}<br>
+<br>
+static struct device sbd_root_dev = {<br>
+       .init_name      = "sbd",<br>
+       .release        = sbd_root_dev_release,<br>
+};<br>
+<br>
+/* Create control files in /sys/bus/sbd/... */<br>
+static int sbd_sysfs_init(void)<br>
+{<br>
+       int ret;<br>
+<br>
+       ret = device_register(&sbd_root_dev);<br>
+       if (ret < 0)<br>
+               return ret;<br>
+<br>
+       ret = bus_register(&sbd_bus_type);<br>
+       if (ret < 0)<br>
+               device_unregister(&sbd_root_dev);<br>
+<br>
+       return ret;<br>
+}<br>
+<br>
+static void sbd_sysfs_cleanup(void)<br>
+{<br>
+       bus_unregister(&sbd_bus_type);<br>
+       device_unregister(&sbd_root_dev);<br>
+}<br>
+<br>
+int __init sbd_init(void)<br>
+{<br>
+       int ret;<br>
+<br>
+       ret = sbd_sysfs_init();<br>
+       if (ret < 0)<br>
+               return ret;<br>
+<br>
+       pr_info("%s: Sheepdog block device loaded\n", DRV_NAME);<br>
+       return 0;<br>
+}<br>
+<br>
+void __exit sbd_exit(void)<br>
+{<br>
+       sbd_sysfs_cleanup();<br>
+       pr_info("%s: Sheepdog block device unloaded\n", DRV_NAME);<br>
+}<br>
+<br>
+module_init(sbd_init);<br>
+module_exit(sbd_exit);<br>
+<br>
+MODULE_AUTHOR("Liu Yuan <<a href="mailto:namei.unix@gmail.com">namei.unix@gmail.com</a>>");<br>
+MODULE_DESCRIPTION("Sheepdog Block Device");<br>
+MODULE_LICENSE("GPL");<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.8.1.2<br>
<br>
--<br>
sheepdog mailing list<br>
<a href="mailto:sheepdog@lists.wpkg.org">sheepdog@lists.wpkg.org</a><br>
<a href="http://lists.wpkg.org/mailman/listinfo/sheepdog" target="_blank">http://lists.wpkg.org/mailman/listinfo/sheepdog</a><br>
</font></span></blockquote></div><br><br clear="all"><div><br></div>-- <br>--<br>Best Regard<br>Robin Dong
</div></div>