[stgt] [PATCH] use pthread per target

FUJITA Tomonori fujita.tomonori at lab.ntt.co.jp
Fri May 14 05:53:48 CEST 2010


There was some discussion on the scalability problem of tgt.

We have the feature to run multiple tgt processes on the single host
to for it. However, it has some disadvantages such as you can't share
portals, you need to use tgtadm against each tgtd daemon, etc.

We discussed converting tgt to use {process|pthread} per
{target|nexus|etc} to improve the performance.

This patch converts tgt to use pthread per target. It's a hacky patch
and there are tons of more things to fix. And I've not tested it much.

But I want to see how it could improve the performance.

Anyone can test this patch, please?

Note that with this patch, tgt needs the signalfd feature so you need
2.6.22 or later.

Thanks,

=
From: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
Subject: [PATCH] use pthread per target

Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
 usr/bs.c              |   88 ++++++++++++++++++++++++++++---------------------
 usr/iscsi/iscsi_tcp.c |   21 +++++++++++-
 usr/iscsi/iscsid.c    |   14 +++++++-
 usr/iscsi/iscsid.h    |    2 +
 usr/iscsi/target.c    |   48 ++++++++++++++++++++++++++-
 usr/target.c          |    8 ++++-
 usr/target.h          |    6 +++
 usr/tgtd.c            |   55 ++++++++++++++++++++++++------
 usr/tgtd.h            |    6 +++-
 9 files changed, 193 insertions(+), 55 deletions(-)

diff --git a/usr/bs.c b/usr/bs.c
index d0fcce4..a585677 100644
--- a/usr/bs.c
+++ b/usr/bs.c
@@ -37,13 +37,10 @@
 #include "tgtadm_error.h"
 #include "util.h"
 #include "bs_thread.h"
+#include "target.h"
 
 static LIST_HEAD(bst_list);
 
-static int sig_fd = -1;
-static LIST_HEAD(sig_finished_list);
-static pthread_mutex_t sig_finished_lock;
-
 int register_backingstore_template(struct backingstore_template *bst)
 {
 	list_add(&bst->backingstore_siblings, &bst_list);
@@ -157,15 +154,18 @@ void bs_sig_request_done(int fd, int events, void *data)
 	struct scsi_cmd *cmd;
 	struct signalfd_siginfo siginfo[16];
 	LIST_HEAD(list);
+	struct scsi_lu *lu = data;
 
 	ret = read(fd, (char *)siginfo, sizeof(siginfo));
 	if (ret <= 0) {
 		return;
 	}
 
-	pthread_mutex_lock(&sig_finished_lock);
-	list_splice_init(&sig_finished_list, &list);
-	pthread_mutex_unlock(&sig_finished_lock);
+	dprintf("%d\n", (int)pthread_self());
+
+	pthread_mutex_lock(&lu->tgt->t_sig_finished_lock);
+	list_splice_init(&lu->tgt->t_sig_finished_list, &list);
+	pthread_mutex_unlock(&lu->tgt->t_sig_finished_lock);
 
 	while (!list_empty(&list)) {
 		cmd = list_first_entry(&list, struct scsi_cmd, bs_list);
@@ -181,6 +181,7 @@ static void *bs_thread_worker_fn(void *arg)
 	struct bs_thread_info *info = arg;
 	struct scsi_cmd *cmd;
 	sigset_t set;
+	struct scsi_lu *lu = (struct scsi_lu *)((char *)info - sizeof(*lu));
 
 	sigfillset(&set);
 	sigprocmask(SIG_BLOCK, &set, NULL);
@@ -211,57 +212,66 @@ static void *bs_thread_worker_fn(void *arg)
 
 		info->request_fn(cmd);
 
-		if (sig_fd < 0) {
-			pthread_mutex_lock(&info->finished_lock);
-			list_add_tail(&cmd->bs_list, &info->finished_list);
-			pthread_mutex_unlock(&info->finished_lock);
-
-			pthread_cond_signal(&info->finished_cond);
-		} else {
-			pthread_mutex_lock(&sig_finished_lock);
-			list_add_tail(&cmd->bs_list, &sig_finished_list);
-			pthread_mutex_unlock(&sig_finished_lock);
-
-			kill(getpid(), SIGUSR2);
+/* 		if (sig_fd < 0) { */
+/* 			pthread_mutex_lock(&info->finished_lock); */
+/* 			list_add_tail(&cmd->bs_list, &info->finished_list); */
+/* 			pthread_mutex_unlock(&info->finished_lock); */
+
+/* 			pthread_cond_signal(&info->finished_cond); */
+/* 		} else { */
+		{
+			pthread_mutex_lock(&lu->tgt->t_sig_finished_lock);
+			list_add_tail(&cmd->bs_list, &lu->tgt->t_sig_finished_list);
+			pthread_mutex_unlock(&lu->tgt->t_sig_finished_lock);
+
+			if (lu->tgt->efd == -1)
+				kill(getpid(), SIGUSR2);
+			else
+				pthread_kill(lu->tgt->thread, SIGUSR2);
 		}
 	}
 
 	pthread_exit(NULL);
 }
 
-static void bs_init(void)
+static int bs_init(struct scsi_lu *lu)
 {
-	static int done = 0;
 	sigset_t mask;
 	int ret;
-
-	if (done)
-		return;
-	done++;
+	int sfd;
 
 	sigemptyset(&mask);
 	sigaddset(&mask, SIGUSR2);
 	sigprocmask(SIG_BLOCK, &mask, NULL);
 
-	sig_fd = __signalfd(-1, &mask, 0);
-	if (sig_fd < 0)
-		return;
+	sfd = __signalfd(-1, &mask, 0);
+	if (sfd < 0)
+		return 1;
+
+	ret = tgt_target_event_add(lu->tgt, sfd, EPOLLIN, bs_sig_request_done, lu);
 
-	ret = tgt_event_add(sig_fd, EPOLLIN, bs_sig_request_done, NULL);
 	if (ret < 0) {
-		close (sig_fd);
-		sig_fd = -1;
+		close (sfd);
+/* 		sig_fd = -1; */
+		return 1;
 	}
 
-	pthread_mutex_init(&sig_finished_lock, NULL);
+
+	return 0;
 }
 
 int bs_thread_open(struct bs_thread_info *info, request_func_t *rfn,
 		   int nr_threads)
 {
 	int i, ret;
+	struct scsi_lu *lu = (struct scsi_lu *)((char *)info - sizeof(*lu));
+
+	ret = bs_init(lu);
+	if (ret)
+		return TGTADM_INVALID_REQUEST;
 
-	bs_init();
+	if (lu->tgt->efd == -1)
+		return TGTADM_INVALID_REQUEST;
 
 	info->request_fn = rfn;
 
@@ -288,7 +298,7 @@ int bs_thread_open(struct bs_thread_info *info, request_func_t *rfn,
 		goto close_command_fd;
 	}
 
-	if (sig_fd < 0) {
+	if (lu->tgt->efd < 0) {
 		ret = tgt_event_add(info->done_fd[0], EPOLLIN, bs_thread_request_done,
 				    info);
 		if (ret) {
@@ -343,8 +353,8 @@ destroy_threads:
 		eprintf("stopped the worker thread %d\n", i - 1);
 	}
 event_del:
-	if (sig_fd < 0)
-		tgt_event_del(info->done_fd[0]);
+/* 	if (sig_fd < 0) */
+/* 		tgt_event_del(info->done_fd[0]); */
 
 close_done_fd:
 	close(info->done_fd[0]);
@@ -382,8 +392,8 @@ void bs_thread_close(struct bs_thread_info *info)
 	pthread_mutex_destroy(&info->pending_lock);
 	pthread_mutex_destroy(&info->startup_lock);
 
-	if (sig_fd < 0)
-		tgt_event_del(info->done_fd[0]);
+/* 	if (sig_fd < 0) */
+/* 		tgt_event_del(info->done_fd[0]); */
 
 	close(info->done_fd[0]);
 	close(info->done_fd[1]);
@@ -399,6 +409,8 @@ int bs_thread_cmd_submit(struct scsi_cmd *cmd)
 	struct scsi_lu *lu = cmd->dev;
 	struct bs_thread_info *info = BS_THREAD_I(lu);
 
+	dprintf("%d\n", pthread_self());
+
 	pthread_mutex_lock(&info->pending_lock);
 
 	list_add_tail(&cmd->bs_list, &info->pending_list);
diff --git a/usr/iscsi/iscsi_tcp.c b/usr/iscsi/iscsi_tcp.c
index 8fc145f..35770e3 100644
--- a/usr/iscsi/iscsi_tcp.c
+++ b/usr/iscsi/iscsi_tcp.c
@@ -52,6 +52,18 @@ static inline struct iscsi_tcp_connection *TCP_CONN(struct iscsi_connection *con
 	return container_of(conn, struct iscsi_tcp_connection, iscsi_conn);
 }
 
+void iscsi_pass_tcp_conn(struct iscsi_connection *conn)
+{
+	struct iscsi_tcp_connection *tcp_conn = TCP_CONN(conn);
+	struct target *t = conn->session->target->t;
+	int ret;
+
+	ret = tgt_target_event_add(t, tcp_conn->fd, EPOLLIN, iscsi_tcp_event_handler, conn);
+	conn->tp->ep_event_modify(conn, EPOLLIN);
+
+	eprintf("register the event\n");
+}
+
 static int set_keepalive(int fd)
 {
 	int ret, opt;
@@ -336,7 +348,14 @@ static void iscsi_event_modify(struct iscsi_connection *conn, int events)
 	struct iscsi_tcp_connection *tcp_conn = TCP_CONN(conn);
 	int ret;
 
-	ret = tgt_event_modify(tcp_conn->fd, events);
+	if (conn->state == STATE_SCSI && conn->session_type == SESSION_NORMAL) {
+		struct iscsi_session *session = conn->session;
+		struct target *target = session->target->t;
+
+		ret = tgt_target_event_modify(target, tcp_conn->fd, events);
+	} else
+		ret = tgt_event_modify(tcp_conn->fd, events);
+
 	if (ret)
 		eprintf("tgt_event_modify failed\n");
 }
diff --git a/usr/iscsi/iscsid.c b/usr/iscsi/iscsid.c
index dcca384..a04613d 100644
--- a/usr/iscsi/iscsid.c
+++ b/usr/iscsi/iscsid.c
@@ -2079,6 +2079,9 @@ again:
 	return 0;
 }
 
+#include "target.h"
+extern void iscsi_pass_tcp_conn(struct iscsi_connection *conn);
+
 int iscsi_tx_handler(struct iscsi_connection *conn)
 {
 	int ret = 0, hdigest, ddigest;
@@ -2222,9 +2225,16 @@ finish:
 		if (ret)
 			conn->state = STATE_CLOSE;
 		else {
-			conn->state = STATE_SCSI;
 			conn_read_pdu(conn);
-			conn->tp->ep_event_modify(conn, EPOLLIN);
+
+			if (conn->session->target->t->efd == -1) {
+				conn->state = STATE_SCSI;
+				conn->tp->ep_event_modify(conn, EPOLLIN);
+			} else {
+				conn->tp->ep_event_modify(conn, 0);
+				conn->state = STATE_SCSI;
+				iscsi_pass_tcp_conn(conn);
+			}
 		}
 		break;
 	case STATE_EXIT:
diff --git a/usr/iscsi/iscsid.h b/usr/iscsi/iscsid.h
index 6b982cb..b2f5d16 100644
--- a/usr/iscsi/iscsid.h
+++ b/usr/iscsi/iscsid.h
@@ -244,6 +244,8 @@ struct iscsi_target {
 	int nr_sessions;
 
 	struct list_head isns_list;
+
+	struct target *t;
 };
 
 enum task_flags {
diff --git a/usr/iscsi/target.c b/usr/iscsi/target.c
index c6ac031..5db3373 100644
--- a/usr/iscsi/target.c
+++ b/usr/iscsi/target.c
@@ -22,10 +22,12 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <pthread.h>
 #include <unistd.h>
 #include <netdb.h>
 #include <sys/stat.h>
 #include <sys/un.h>
+#include <sys/epoll.h>
 #include <netinet/in.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
@@ -36,6 +38,7 @@
 #include "tgtadm.h"
 #include "tgtd.h"
 #include "target.h"
+#include "util.h"
 
 LIST_HEAD(iscsi_targets_list);
 
@@ -258,9 +261,45 @@ void iscsi_target_destroy(int tid)
 	return;
 }
 
+static void *iscsi_thread_fn(void *arg)
+{
+	struct target *t = arg;
+	struct epoll_event events[1024];
+	struct event_data *tev;
+	sigset_t mask;
+	int nevent, i;
+	int count = 0;
+
+	sigemptyset(&mask);
+	sigaddset(&mask, SIGUSR2);
+	pthread_sigmask(SIG_BLOCK, &mask, NULL);
+
+	t->efd = epoll_create(128);
+	if (t->efd < 0)
+		eprintf("fail %m\n");
+
+retry:
+	nevent = epoll_wait(t->efd, events, ARRAY_SIZE(events), 10000);
+	if (nevent < 0) {
+		if (errno != EINTR) {
+			eprintf("%m\n");
+			exit(1);
+		}
+	} else if (nevent) {
+		for (i = 0; i < nevent; i++) {
+			tev = (struct event_data *) events[i].data.ptr;
+			tev->handler(tev->fd, events[i].events, tev->data);
+		}
+	}
+
+	dprintf("retry %d, %d\n", (int)pthread_self(), count++);
+
+	goto retry;
+}
+
 int iscsi_target_create(struct target *t)
 {
-	int tid = t->tid;
+	int tid = t->tid, ret;
 	struct iscsi_target *target;
 	struct param default_tgt_session_param[] = {
 		[ISCSI_PARAM_MAX_RECV_DLENGTH] = {0, 8192},
@@ -301,9 +340,16 @@ int iscsi_target_create(struct target *t)
 	INIT_LIST_HEAD(&target->sessions_list);
 	INIT_LIST_HEAD(&target->isns_list);
 	target->tid = tid;
+	target->t = t;
 	list_add_tail(&target->tlist, &iscsi_targets_list);
 
 	isns_target_register(tgt_targetname(tid));
+
+	ret = pthread_create(&t->thread, NULL,
+			     iscsi_thread_fn, t);
+
+	eprintf("create a new pthread %d\n", (int)t->thread);
+
 	return 0;
 }
 
diff --git a/usr/target.c b/usr/target.c
index c848757..0f690dd 100644
--- a/usr/target.c
+++ b/usr/target.c
@@ -1750,11 +1750,17 @@ int tgt_target_create(int lld, int tid, char *args)
 	INIT_LIST_HEAD(&target->acl_list);
 	INIT_LIST_HEAD(&target->it_nexus_list);
 
-	tgt_device_create(tid, TYPE_RAID, 0, NULL, 0);
+	INIT_LIST_HEAD(&target->events_list);
+	target->efd = -1;
+
+	pthread_mutex_init(&target->t_sig_finished_lock, NULL);
+	INIT_LIST_HEAD(&target->t_sig_finished_list);
 
 	if (tgt_drivers[lld]->target_create)
 		tgt_drivers[lld]->target_create(target);
 
+	tgt_device_create(tid, TYPE_RAID, 0, NULL, 0);
+
 	dprintf("Succeed to create a new target %d\n", tid);
 
 	return 0;
diff --git a/usr/target.h b/usr/target.h
index 9283431..cc104ae 100644
--- a/usr/target.h
+++ b/usr/target.h
@@ -38,6 +38,12 @@ struct target {
 
 	struct list_head acl_list;
 
+	struct list_head events_list;
+	int efd;
+	pthread_t thread;
+	struct list_head t_sig_finished_list;
+	pthread_mutex_t t_sig_finished_lock;
+
 	struct tgt_account account;
 };
 
diff --git a/usr/tgtd.c b/usr/tgtd.c
index c386281..2196de4 100644
--- a/usr/tgtd.c
+++ b/usr/tgtd.c
@@ -38,6 +38,7 @@
 #include "driver.h"
 #include "work.h"
 #include "util.h"
+#include "target.h"
 
 unsigned long pagesize, pageshift;
 
@@ -135,7 +136,8 @@ set_rlimit:
 	return 0;
 }
 
-int tgt_event_add(int fd, int events, event_handler_t handler, void *data)
+static int __tgt_event_add(int fd, int events, event_handler_t handler,
+			   void *data, int efd, struct list_head *list)
 {
 	struct epoll_event ev;
 	struct event_data *tev;
@@ -152,39 +154,50 @@ int tgt_event_add(int fd, int events, event_handler_t handler, void *data)
 	memset(&ev, 0, sizeof(ev));
 	ev.events = events;
 	ev.data.ptr = tev;
-	err = epoll_ctl(ep_fd, EPOLL_CTL_ADD, fd, &ev);
+	err = epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev);
 	if (err) {
 		eprintf("Cannot add fd, %m\n");
 		free(tev);
 	} else
-		list_add(&tev->e_list, &tgt_events_list);
+		list_add(&tev->e_list, list);
 
 	return err;
 }
 
-static struct event_data *tgt_event_lookup(int fd)
+int tgt_event_add(int fd, int events, event_handler_t handler, void *data)
+{
+	return __tgt_event_add(fd, events, handler, data, ep_fd, &tgt_events_list);
+}
+
+int tgt_target_event_add(struct target *t, int fd, int events,
+			 event_handler_t handler, void *data)
+{
+	return __tgt_event_add(fd, events, handler, data, t->efd, &t->events_list);
+}
+
+static struct event_data *tgt_event_lookup(struct list_head *list, int fd)
 {
 	struct event_data *tev;
 
-	list_for_each_entry(tev, &tgt_events_list, e_list) {
+	list_for_each_entry(tev, list, e_list) {
 		if (tev->fd == fd)
 			return tev;
 	}
 	return NULL;
 }
 
-void tgt_event_del(int fd)
+static void __tgt_event_del(int efd, struct list_head *list, int fd)
 {
 	struct event_data *tev;
 	int ret;
 
-	tev = tgt_event_lookup(fd);
+	tev = tgt_event_lookup(list, fd);
 	if (!tev) {
 		eprintf("Cannot find event %d\n", fd);
 		return;
 	}
 
-	ret = epoll_ctl(ep_fd, EPOLL_CTL_DEL, fd, NULL);
+	ret = epoll_ctl(efd, EPOLL_CTL_DEL, fd, NULL);
 	if (ret < 0)
 		eprintf("fail to remove epoll event, %s\n", strerror(errno));
 
@@ -192,12 +205,22 @@ void tgt_event_del(int fd)
 	free(tev);
 }
 
-int tgt_event_modify(int fd, int events)
+void tgt_event_del(int fd)
+{
+	__tgt_event_del(ep_fd, &tgt_events_list, fd);
+}
+
+void tgt_target_event_del(struct target *t, int fd)
+{
+	__tgt_event_del(t->efd, &t->events_list, fd);
+}
+
+static int __tgt_event_modify(int efd, struct list_head *list, int fd, int events)
 {
 	struct epoll_event ev;
 	struct event_data *tev;
 
-	tev = tgt_event_lookup(fd);
+	tev = tgt_event_lookup(list, fd);
 	if (!tev) {
 		eprintf("Cannot find event %d\n", fd);
 		return -EINVAL;
@@ -207,7 +230,17 @@ int tgt_event_modify(int fd, int events)
 	ev.events = events;
 	ev.data.ptr = tev;
 
-	return epoll_ctl(ep_fd, EPOLL_CTL_MOD, fd, &ev);
+	return epoll_ctl(efd, EPOLL_CTL_MOD, fd, &ev);
+}
+
+int tgt_event_modify(int fd, int events)
+{
+	return __tgt_event_modify(ep_fd, &tgt_events_list, fd, events);
+}
+
+int tgt_target_event_modify(struct target *t, int fd, int events)
+{
+	return __tgt_event_modify(t->efd, &t->events_list, fd, events);
 }
 
 void tgt_init_sched_event(struct event_data *evt,
diff --git a/usr/tgtd.h b/usr/tgtd.h
index 3323a9b..f628c3c 100644
--- a/usr/tgtd.h
+++ b/usr/tgtd.h
@@ -244,11 +244,15 @@ typedef void (*event_handler_t)(int fd, int events, void *data);
 
 extern int tgt_event_add(int fd, int events, event_handler_t handler, void *data);
 extern void tgt_event_del(int fd);
+extern int tgt_event_modify(int fd, int events);
+
+extern int tgt_target_event_add(struct target *t,int fd, int events, event_handler_t handler, void *data);
+extern void tgt_target_event_del(struct target *t, int fd);
+extern int tgt_target_event_modify(struct target *t, int fd, int events);
 
 extern void tgt_add_sched_event(struct event_data *evt);
 extern void tgt_remove_sched_event(struct event_data *evt);
 
-extern int tgt_event_modify(int fd, int events);
 extern int target_cmd_queue(int tid, struct scsi_cmd *cmd);
 extern void target_cmd_done(struct scsi_cmd *cmd);
 struct scsi_cmd *target_cmd_lookup(int tid, uint64_t itn_id, uint64_t tag);
-- 
1.6.5

--
To unsubscribe from this list: send the line "unsubscribe stgt" in
the body of a message to majordomo at vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



More information about the stgt mailing list