[stgt] [PATCH] sg-based backing store

Alexander Nezhinsky nezhinsky at gmail.com
Sun Oct 5 20:41:53 CEST 2008


Backing store based on SCSI Generic driver (sg), v.3.
allows to route scsi commands to the native scsi devices
unchanged. It uses Direct I/O (dio) to avoid memcopy
to the buffer cache.

Using sg allows asynchronous execution, commands are 
written to a sg-backed char device, completions are
read from the same device, which can be polled 
asynchronously.

This bs provides significant performance improvement when 
working with native scsi devices. In a setup, where 
the scsi devices are exported by a tgt with bs_null,
and both links (from initiator to target and from the 
target to the "backing-store" target) are iSER/IB
sustained bandwidth of 1450 MB/s for READ and
1350 MB/s for WRITE is achieved. This to be compared to
700-800 MB/s when running with bs_rdwr in the same setup.
Some improvements are seen with IOPS as well:
60 kIOPS for READ, 38 kIOPS for WRITE
(compared to 31/35KIOPS with bs_rdwr).


Signed-off-by: Alexander Nezhinsky <nezhinsky at gmail.com>
---

diff --git a/usr/Makefile b/usr/Makefile
index a59364b..9b3376c 100644
--- a/usr/Makefile
+++ b/usr/Makefile
@@ -58,7 +58,7 @@ PROGRAMS += tgtd tgtadm
 SCRIPTS += ../scripts/tgt-setup-lun ../scripts/tgt-admin
 TGTD_OBJS += tgtd.o mgmt.o target.o scsi.o log.o driver.o util.o work.o \
 		parser.o spc.o sbc.o mmc.o osd.o scc.o smc.o ssc.o bs_ssc.o \
-		bs_null.o bs.o
+		bs_null.o bs_sg.o bs.o
 MANPAGES = ../doc/manpages/tgtadm.8 ../doc/manpages/tgt-admin.8 \
 		../doc/manpages/tgt-setup-lun.8
 
diff --git a/usr/bs_sg.c b/usr/bs_sg.c
new file mode 100644
index 0000000..8baa480
--- /dev/null
+++ b/usr/bs_sg.c
@@ -0,0 +1,250 @@
+/*
+ * SCSI Generic I/O backing store
+ *
+ * Copyright (C) 2008 Alexander Nezhinsky <nezhinsky at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/fs.h>
+#include <linux/major.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/epoll.h>
+#include <scsi/sg.h>
+
+#include "list.h"
+#include "util.h"
+#include "tgtd.h"
+#include "scsi.h"
+
+#define BS_SG_RESVD_SZ  (512 * 1024)
+#define BS_SG_TIMEOUT	2000
+
+static int graceful_read(int fd, void *p_read, int to_read)
+{
+	int err;
+
+	while (to_read > 0) {
+		err = read(fd, p_read, to_read);
+		if (err >= 0) {
+			to_read -= err;
+			p_read += err;
+		} else if (errno == EINTR)
+			continue;
+		else {
+			eprintf("sg device %d read failed, errno: %d\n",
+				fd, errno);
+			return errno;
+		}
+	}
+	return 0;
+}
+
+static int graceful_write(int fd, void *p_write, int to_write)
+{
+	int err;
+
+	while (to_write > 0) {
+		err = write(fd, p_write, to_write);
+		if (err >= 0) {
+			to_write -= err;
+			p_write += err;
+		} else if (errno == EINTR)
+			continue;
+		else {
+			eprintf("sg device %d write failed, errno: %d\n",
+				fd, errno);
+			return errno;
+		}
+	}
+	return 0;
+}
+
+static void set_cmd_failed(struct scsi_cmd *cmd)
+{
+	int result = SAM_STAT_CHECK_CONDITION;
+	uint16_t asc = ASC_READ_ERROR;
+	uint8_t key = MEDIUM_ERROR;
+
+	scsi_set_result(cmd, result);
+	sense_data_build(cmd, key, asc);
+}
+
+static int bs_sg_cmd_submit(struct scsi_cmd *cmd)
+{
+	struct scsi_lu *dev = cmd->dev;
+	int fd = dev->fd;
+	struct sg_io_hdr io_hdr;
+	int err = 0;
+
+	memset(&io_hdr, 0, sizeof(io_hdr));
+	io_hdr.interface_id = 'S';
+	io_hdr.cmd_len = cmd->scb_len;
+	io_hdr.cmdp = cmd->scb;
+
+	if (scsi_get_data_dir(cmd) == DATA_WRITE) {
+		io_hdr.dxfer_direction = SG_DXFER_TO_DEV;
+		io_hdr.dxfer_len = scsi_get_out_length(cmd);
+		io_hdr.dxferp = (void *)scsi_get_out_buffer(cmd);
+	} else {
+		io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+		io_hdr.dxfer_len = scsi_get_in_length(cmd);
+		io_hdr.dxferp = (void *)scsi_get_in_buffer(cmd);
+	}
+	io_hdr.mx_sb_len = sizeof(cmd->sense_buffer);
+	io_hdr.sbp = cmd->sense_buffer;
+	io_hdr.timeout = BS_SG_TIMEOUT;
+	io_hdr.pack_id = -1;
+	io_hdr.usr_ptr = cmd;
+	io_hdr.flags |= SG_FLAG_DIRECT_IO;
+
+	err = graceful_write(fd, &io_hdr, sizeof(io_hdr));
+	if (!err)
+		set_cmd_async(cmd);
+	else {
+		eprintf("failed to start cmd 0x%p\n", cmd);
+		set_cmd_failed(cmd);
+	}
+	return 0;
+}
+
+static void bs_sg_cmd_complete(int fd, int events, void *data)
+{
+	struct sg_io_hdr io_hdr;
+	struct scsi_cmd *cmd;
+	int err;
+
+	memset(&io_hdr, 0, sizeof(io_hdr));
+	io_hdr.interface_id = 'S';
+	io_hdr.pack_id = -1;
+
+	err = graceful_read(fd, &io_hdr, sizeof(io_hdr));
+	if (err)
+		return;
+
+	cmd = (struct scsi_cmd *)io_hdr.usr_ptr;
+	if (!io_hdr.status) {
+		scsi_set_out_resid(cmd, io_hdr.resid);
+		scsi_set_in_resid(cmd, io_hdr.resid);
+	} else {
+		cmd->sense_len = io_hdr.sb_len_wr;
+		scsi_set_out_resid_by_actual(cmd, 0);
+		scsi_set_in_resid_by_actual(cmd, 0);
+	}
+
+	cmd->scsi_cmd_done(cmd, io_hdr.status);
+}
+
+static int chk_sg_device(char *path)
+{
+	struct stat st;
+
+	if (stat(path, &st) < 0) {
+		eprintf("stat() failed errno: %d\n", errno);
+		return -1;
+	}
+
+	if (S_ISCHR(st.st_mode) && major(st.st_rdev) == SCSI_GENERIC_MAJOR)
+		return 0;
+	else
+		return -1;
+}
+
+static int init_sg_device(int fd)
+{
+	int t, err;
+
+	err = ioctl(fd, SG_GET_VERSION_NUM, &t);
+	if ((err < 0) || (t < 30000)) {
+		eprintf("sg driver prior to 3.x\n");
+		return -1;
+	}
+
+	t = BS_SG_RESVD_SZ;
+	err = ioctl(fd, SG_SET_RESERVED_SIZE, &t);
+	if (err < 0) {
+		eprintf("SG_SET_RESERVED_SIZE errno: %d\n", errno);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int bs_sg_open(struct scsi_lu *lu, char *path, int *fd, uint64_t *size)
+{
+	int sg_fd, err;
+
+	err = chk_sg_device(path);
+	if (err) {
+		eprintf("Not recognized %s as an SG device\n", path);
+		return -EINVAL;
+	}
+
+	sg_fd = open(path, O_RDWR);
+	if (sg_fd < 0) {
+		eprintf("Could not open %s, %m\n", path);
+		return sg_fd;
+	}
+
+	err = init_sg_device(sg_fd);
+	if (err) {
+		eprintf("Failed to initialize sg device %s\n", path);
+		return err;
+	}
+
+	err = tgt_event_add(sg_fd, EPOLLIN, bs_sg_cmd_complete, NULL);
+	if (err) {
+		eprintf("Failed to add sg device event %s\n", path);
+		return err;
+	}
+
+	*fd = sg_fd;
+	*size = 0;
+	return 0;
+}
+
+static void bs_sg_close(struct scsi_lu *lu)
+{
+	close(lu->fd);
+}
+
+static int bs_sg_cmd_done(struct scsi_cmd *cmd)
+{
+	return 0;
+}
+
+static struct backingstore_template sg_bst = {
+	.bs_name		= "sg",
+	.bs_datasize		= 0,
+	.bs_open		= bs_sg_open,
+	.bs_close		= bs_sg_close,
+	.bs_cmd_submit		= bs_sg_cmd_submit,
+	.bs_cmd_done		= bs_sg_cmd_done,
+};
+
+__attribute__((constructor)) static void bs_sg_constructor(void)
+{
+	register_backingstore_template(&sg_bst);
+}
--
To unsubscribe from this list: send the line "unsubscribe stgt" in
the body of a message to majordomo at vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



More information about the stgt mailing list