[stgt] [PATCH] iscsi: add sendfile support

FUJITA Tomonori fujita.tomonori at lab.ntt.co.jp
Tue Sep 15 11:51:56 CEST 2009


Here's a patch to add sendfile support; tgtd send data in zero-copy
manner wrt READ_ commands as IET does (and some other iSCSI target
implementations, I guess).

I'm not eager for this much (this doesn't give no notable performance
boost in my 10GbE environment) and just lightly tested this.

But I'll merge this if someone is interested in this (and tests this
heavily).


=
From: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
Subject: [PATCH] iscsi: add sendfile support

Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
 usr/bs_rdwr.c            |   14 ++++++++++----
 usr/fcoe/openfc_target.c |    2 +-
 usr/iscsi/conn.c         |   12 ++++++++++++
 usr/iscsi/iscsi_rdma.c   |    2 +-
 usr/iscsi/iscsi_tcp.c    |   23 +++++++++++++++++++----
 usr/iscsi/iscsid.c       |    3 ++-
 usr/iscsi/iscsid.h       |    1 +
 usr/iscsi/session.c      |    8 ++++++--
 usr/iscsi/transport.h    |    2 +-
 usr/scsi_cmnd.h          |    3 +++
 usr/target.c             |    4 +++-
 usr/target.h             |    4 ++++
 usr/tgtd.h               |    3 ++-
 usr/tgtif.c              |    2 +-
 14 files changed, 66 insertions(+), 17 deletions(-)

diff --git a/usr/bs_rdwr.c b/usr/bs_rdwr.c
index 6068479..80d6b1b 100644
--- a/usr/bs_rdwr.c
+++ b/usr/bs_rdwr.c
@@ -36,6 +36,7 @@
 #include "util.h"
 #include "tgtd.h"
 #include "scsi.h"
+#include "target.h"
 #include "bs_thread.h"
 
 static void set_medium_error(int *result, uint8_t *key, uint16_t *asc)
@@ -107,11 +108,16 @@ static void bs_rdwr_request(struct scsi_cmd *cmd)
 	case READ_12:
 	case READ_16:
 		length = scsi_get_in_length(cmd);
-		ret = pread64(fd, scsi_get_in_buffer(cmd), length,
-			      cmd->offset);
 
-		if (ret != length)
-			set_medium_error(&result, &key, &asc);
+		if (cmd->it_nexus->features & NEXUS_F_SENDFILE) {
+			cmd->send_fd = fd;
+			posix_fadvise64(fd, cmd->offset, length, POSIX_FADV_WILLNEED);
+		} else {
+			ret = pread64(fd, scsi_get_in_buffer(cmd), length,
+				      cmd->offset);
+			if (ret != length)
+				set_medium_error(&result, &key, &asc);
+		}
 		break;
 	default:
 		break;
diff --git a/usr/fcoe/openfc_target.c b/usr/fcoe/openfc_target.c
index 60d3e27..bae23ec 100644
--- a/usr/fcoe/openfc_target.c
+++ b/usr/fcoe/openfc_target.c
@@ -240,7 +240,7 @@ static int openfct_session_create(void *arg, struct fc_remote_port *port)
 		sess->fcid = port->rp_fid;
 		INIT_LIST_HEAD(&sess->list);
 
-		it_nexus_create(1, port->rp_fid, 0, (void *)sess);
+		it_nexus_create(1, port->rp_fid, 0, (void *)sess, 0);
 		list_add(&sess->list, &tgt->sess_list);
 		tgt->sess_count++;
 	}
diff --git a/usr/iscsi/conn.c b/usr/iscsi/conn.c
index 61b570e..2ebb7a7 100644
--- a/usr/iscsi/conn.c
+++ b/usr/iscsi/conn.c
@@ -239,3 +239,15 @@ int conn_close_force(uint32_t tid, uint64_t sid, uint32_t cid)
 
 	return sess_found ? TGTADM_NO_CONNECTION : TGTADM_NO_SESSION;
 }
+
+int is_sendfile_capable(struct iscsi_connection *conn)
+{
+	struct param *param = conn->session_param;
+
+	if (!conn->tp->rdma &&
+	    !(param[ISCSI_PARAM_HDRDGST_EN].val & DIGEST_CRC32C) &&
+	    !(param[ISCSI_PARAM_DATADGST_EN].val & DIGEST_CRC32C))
+		return 1;
+
+	return 0;
+}
diff --git a/usr/iscsi/iscsi_rdma.c b/usr/iscsi/iscsi_rdma.c
index b25abf7..5a3c376 100644
--- a/usr/iscsi/iscsi_rdma.c
+++ b/usr/iscsi/iscsi_rdma.c
@@ -1348,7 +1348,7 @@ static size_t iscsi_iser_read(struct iscsi_connection *conn, void *buf,
 }
 
 static size_t iscsi_iser_write_begin(struct iscsi_connection *conn,
-				     void *buf, size_t nbytes)
+				     void *buf, size_t nbytes, int tx_data)
 {
 	struct conn_info *ci = RDMA_CONN(conn);
 	struct sendlist *sendl;
diff --git a/usr/iscsi/iscsi_tcp.c b/usr/iscsi/iscsi_tcp.c
index edc4e86..13fb12a 100644
--- a/usr/iscsi/iscsi_tcp.c
+++ b/usr/iscsi/iscsi_tcp.c
@@ -30,6 +30,7 @@
 #include <netinet/in.h>
 #include <netinet/tcp.h>
 #include <sys/epoll.h>
+#include <sys/sendfile.h>
 #include <sys/socket.h>
 
 #include "iscsid.h"
@@ -271,13 +272,27 @@ static size_t iscsi_tcp_read(struct iscsi_connection *conn, void *buf,
 }
 
 static size_t iscsi_tcp_write_begin(struct iscsi_connection *conn, void *buf,
-				    size_t nbytes)
+				    size_t nbytes, int tx_data)
 {
 	struct iscsi_tcp_connection *tcp_conn = TCP_CONN(conn);
-	int opt = 1;
+	struct iscsi_task *task = conn->tx_task;
+	struct scsi_cmd *scsi_cmd = NULL;
+	int ret, opt = 1;
 
-	setsockopt(tcp_conn->fd, SOL_TCP, TCP_CORK, &opt, sizeof(opt));
-	return write(tcp_conn->fd, buf, nbytes);
+	if (task)
+		scsi_cmd = &task->scmd;
+
+	if (tx_data && scsi_cmd && scsi_cmd->send_fd) {
+		off_t off = scsi_cmd->offset + scsi_cmd->send_offset;
+		ret = sendfile(tcp_conn->fd, scsi_cmd->send_fd, &off, nbytes);
+		if (ret > 0)
+			scsi_cmd->send_offset += ret;
+	} else {
+		setsockopt(tcp_conn->fd, SOL_TCP, TCP_CORK, &opt, sizeof(opt));
+		ret = write(tcp_conn->fd, buf, nbytes);
+	}
+
+	return ret;
 }
 
 static void iscsi_tcp_write_end(struct iscsi_connection *conn)
diff --git a/usr/iscsi/iscsid.c b/usr/iscsi/iscsid.c
index 79c6e2d..10650ba 100644
--- a/usr/iscsi/iscsid.c
+++ b/usr/iscsi/iscsid.c
@@ -2001,7 +2001,8 @@ static int do_send(struct iscsi_connection *conn, int next_state)
 {
 	int ret;
 again:
-	ret = conn->tp->ep_write_begin(conn, conn->tx_buffer, conn->tx_size);
+	ret = conn->tp->ep_write_begin(conn, conn->tx_buffer, conn->tx_size,
+				       conn->tx_iostate == IOSTATE_TX_DATA);
 	if (ret < 0) {
 		if (errno != EINTR && errno != EAGAIN)
 			conn->state = STATE_CLOSE;
diff --git a/usr/iscsi/iscsid.h b/usr/iscsi/iscsid.h
index b301c78..7295c4b 100644
--- a/usr/iscsi/iscsid.h
+++ b/usr/iscsi/iscsid.h
@@ -270,6 +270,7 @@ extern struct iscsi_connection * conn_find(struct iscsi_session *session, uint32
 extern int conn_take_fd(struct iscsi_connection *conn);
 extern void conn_add_to_session(struct iscsi_connection *conn, struct iscsi_session *session);
 extern int conn_close_force(uint32_t tid, uint64_t sid, uint32_t cid);
+extern int is_sendfile_capable(struct iscsi_connection *conn);
 
 /* iscsid.c */
 extern char *text_key_find(struct iscsi_connection *conn, char *searchKey);
diff --git a/usr/iscsi/session.c b/usr/iscsi/session.c
index 028d538..b7d06df 100644
--- a/usr/iscsi/session.c
+++ b/usr/iscsi/session.c
@@ -28,6 +28,7 @@
 
 #include "iscsid.h"
 #include "tgtd.h"
+#include "target.h"
 #include "util.h"
 
 static LIST_HEAD(sessions_list);
@@ -69,7 +70,7 @@ int session_create(struct iscsi_connection *conn)
 	static uint16_t tsih, last_tsih = 0;
 	struct iscsi_target *target;
 	char addr[128];
-
+	unsigned int features = 0;
 
 	target = target_find_by_id(conn->tid);
 	if (!target)
@@ -109,7 +110,10 @@ int session_create(struct iscsi_connection *conn)
 		 _TAB3 "Connection: %u\n"
 		 _TAB4 "%s\n", session->initiator, conn->cid, addr);
 
-	err = it_nexus_create(target->tid, tsih, 0, session->info);
+	if (is_sendfile_capable(conn))
+		features |= NEXUS_F_SENDFILE;
+
+	err = it_nexus_create(target->tid, tsih, 0, session->info, features);
 	if (err) {
 		free(session->initiator);
 		free(session->info);
diff --git a/usr/iscsi/transport.h b/usr/iscsi/transport.h
index 92a6f0a..8b88965 100644
--- a/usr/iscsi/transport.h
+++ b/usr/iscsi/transport.h
@@ -23,7 +23,7 @@ struct iscsi_transport {
 	size_t (*ep_read)(struct iscsi_connection *conn, void *buf,
 			  size_t nbytes);
 	size_t (*ep_write_begin)(struct iscsi_connection *conn, void *buf,
-				 size_t nbytes);
+				 size_t nbytes, int tx_data);
 	void (*ep_write_end)(struct iscsi_connection *conn);
 	int (*ep_rdma_read)(struct iscsi_connection *conn);
 	int (*ep_rdma_write)(struct iscsi_connection *conn);
diff --git a/usr/scsi_cmnd.h b/usr/scsi_cmnd.h
index 011f3e6..6cbbd4c 100644
--- a/usr/scsi_cmnd.h
+++ b/usr/scsi_cmnd.h
@@ -51,6 +51,9 @@ struct scsi_cmd {
 
 	struct it_nexus *it_nexus;
 	struct it_nexus_lu_info *itn_lu_info;
+
+	int send_fd;
+	off_t send_offset;
 };
 
 #define scsi_cmnd_accessor(field, type)						\
diff --git a/usr/target.c b/usr/target.c
index 14ab298..a9d1fb7 100644
--- a/usr/target.c
+++ b/usr/target.c
@@ -229,7 +229,8 @@ void ua_sense_add_other_it_nexus(uint64_t itn_id, struct scsi_lu *lu,
 	}
 }
 
-int it_nexus_create(int tid, uint64_t itn_id, int host_no, char *info)
+int it_nexus_create(int tid, uint64_t itn_id, int host_no, char *info,
+		    unsigned int features)
 {
 	int i, ret;
 	struct target *target;
@@ -256,6 +257,7 @@ int it_nexus_create(int tid, uint64_t itn_id, int host_no, char *info)
 	itn->host_no = host_no;
 	itn->nexus_target = target;
 	itn->info = info;
+	itn->features = features;
 	INIT_LIST_HEAD(&itn->it_nexus_lu_info_list);
 
 	list_for_each_entry(lu, &target->device_list, device_siblings) {
diff --git a/usr/target.h b/usr/target.h
index 8fe30aa..62cdeca 100644
--- a/usr/target.h
+++ b/usr/target.h
@@ -56,6 +56,10 @@ struct it_nexus {
 
 	struct list_head it_nexus_lu_info_list;
 
+	unsigned int features;
+
+#define NEXUS_F_SENDFILE		1
+
 	/* only used for show operation */
 	char *info;
 };
diff --git a/usr/tgtd.h b/usr/tgtd.h
index 303627e..75e57f2 100644
--- a/usr/tgtd.h
+++ b/usr/tgtd.h
@@ -266,7 +266,8 @@ extern int account_ctl(int tid, int type, char *user, int bind);
 extern int account_show(char *buf, int rest);
 extern int account_available(int tid, int dir);
 
-extern int it_nexus_create(int tid, uint64_t itn_id, int host_no, char *info);
+extern int it_nexus_create(int tid, uint64_t itn_id, int host_no, char *info,
+			   unsigned int features);
 extern int it_nexus_destroy(int tid, uint64_t itn_id);
 
 extern int device_type_register(struct device_type_template *);
diff --git a/usr/tgtif.c b/usr/tgtif.c
index 1419ad2..6e465ef 100644
--- a/usr/tgtif.c
+++ b/usr/tgtif.c
@@ -282,7 +282,7 @@ static void kern_it_nexus_request(struct tgt_event *ev)
 	if (function)
 		ret = it_nexus_destroy(tid, itn_id);
 	else
-		ret = it_nexus_create(tid, itn_id, host_no, NULL);
+		ret = it_nexus_create(tid, itn_id, host_no, NULL, 0);
 
 	kspace_send_it_nexus_res(host_no, itn_id, function, ret);
 }
-- 
1.5.6.5

--
To unsubscribe from this list: send the line "unsubscribe stgt" in
the body of a message to majordomo at vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



More information about the stgt mailing list