[stgt] [PATCH 3/3] iscsi: use pthread per target for tcp
FUJITA Tomonori
fujita.tomonori at lab.ntt.co.jp
Thu Jun 10 03:19:29 CEST 2010
Currently, the main process handles SCSI protocol processing (and
network I/O for iSCSI), and four I/O threads runs per lun to handle
disk I/Os.
The current model doesn't scale with the number of targets if you have
fast network (10GbE) and disk drives (SSDs).
With this patch, we use pthread per target for iSCSI/TCP (not iSER).
Target's pthread handles SCSI protocol processing and network I/Os for
the target, and four I/O threads runs per lun to handle disk I/Os.
Note that the pthread-per-target model is enabled only if tgt uses
signalfd. Even if with the main process model, tgt is much faster with
signalfd-capable kernels. Linux 2.6.22 or newer is strongly
recommended.
Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
usr/bs.c | 55 +++++++++++++++++++++---------------
usr/iscsi/conn.c | 5 ++-
usr/iscsi/iscsi_rdma.c | 8 +++++
usr/iscsi/iscsi_tcp.c | 56 ++++++++++++++++++++++++++++++++++--
usr/iscsi/iscsid.c | 9 +++++-
usr/iscsi/iscsid.h | 9 ++++++
usr/iscsi/target.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++-
usr/iscsi/transport.h | 3 ++
usr/target.h | 2 +
usr/tgtd.h | 8 +++++
10 files changed, 197 insertions(+), 31 deletions(-)
diff --git a/usr/bs.c b/usr/bs.c
index e74cc13..a29a5f4 100644
--- a/usr/bs.c
+++ b/usr/bs.c
@@ -34,14 +34,14 @@
#include "list.h"
#include "tgtd.h"
+#include "target.h"
#include "tgtadm_error.h"
#include "util.h"
#include "bs_thread.h"
static LIST_HEAD(bst_list);
-static LIST_HEAD(finished_list);
-static pthread_mutex_t finished_lock;
+struct bs_finish bs_finish, *bsf = &bs_finish;
int sig_fd = -1;
@@ -87,15 +87,15 @@ retry:
goto out;
}
- pthread_mutex_lock(&finished_lock);
+ pthread_mutex_lock(&bsf->finished_lock);
retest:
- if (list_empty(&finished_list)) {
- pthread_cond_wait(&finished_cond, &finished_lock);
+ if (list_empty(&bsf->finished_list)) {
+ pthread_cond_wait(&finished_cond, &bsf->finished_lock);
goto retest;
}
- while (!list_empty(&finished_list)) {
- cmd = list_first_entry(&finished_list,
+ while (!list_empty(&bsf->finished_list)) {
+ cmd = list_first_entry(&bsf->finished_list,
struct scsi_cmd, bs_list);
dprintf("found %p\n", cmd);
@@ -104,7 +104,7 @@ retest:
list_add_tail(&cmd->bs_list, &ack_list);
}
- pthread_mutex_unlock(&finished_lock);
+ pthread_mutex_unlock(&bsf->finished_lock);
nr = 1;
rewrite:
@@ -154,9 +154,10 @@ rewrite:
}
}
-static void bs_sig_request_done(int fd, int events, void *data)
+void bs_sig_request_done(int fd, int events, void *data)
{
int ret;
+ struct bs_finish *b = data;
struct scsi_cmd *cmd;
struct signalfd_siginfo siginfo[16];
LIST_HEAD(list);
@@ -166,9 +167,9 @@ static void bs_sig_request_done(int fd, int events, void *data)
return;
}
- pthread_mutex_lock(&finished_lock);
- list_splice_init(&finished_list, &list);
- pthread_mutex_unlock(&finished_lock);
+ pthread_mutex_lock(&b->finished_lock);
+ list_splice_init(&b->finished_list, &list);
+ pthread_mutex_unlock(&b->finished_lock);
while (!list_empty(&list)) {
cmd = list_first_entry(&list, struct scsi_cmd, bs_list);
@@ -184,6 +185,7 @@ static void *bs_thread_worker_fn(void *arg)
struct bs_thread_info *info = arg;
struct scsi_cmd *cmd;
sigset_t set;
+ struct bs_finish *tbsf;
sigfillset(&set);
sigprocmask(SIG_BLOCK, &set, NULL);
@@ -207,16 +209,24 @@ static void *bs_thread_worker_fn(void *arg)
cmd = list_first_entry(&info->pending_list,
struct scsi_cmd, bs_list);
+
+ if (cmd->c_target->bsf)
+ tbsf = cmd->c_target->bsf;
+ else
+ tbsf = bsf;
+
list_del(&cmd->bs_list);
pthread_mutex_unlock(&info->pending_lock);
info->request_fn(cmd);
- pthread_mutex_lock(&finished_lock);
- list_add_tail(&cmd->bs_list, &finished_list);
- pthread_mutex_unlock(&finished_lock);
+ pthread_mutex_lock(&tbsf->finished_lock);
+ list_add_tail(&cmd->bs_list, &tbsf->finished_list);
+ pthread_mutex_unlock(&tbsf->finished_lock);
- if (sig_fd < 0)
+ if (cmd->c_target->bsf)
+ pthread_kill(cmd->c_target->bsf->thread, SIGUSR2);
+ else if (sig_fd < 0)
pthread_cond_signal(&finished_cond);
else
kill(getpid(), SIGUSR2);
@@ -225,13 +235,11 @@ static void *bs_thread_worker_fn(void *arg)
pthread_exit(NULL);
}
-static int bs_init_signalfd(void)
+static int bs_init_signalfd(struct bs_finish *b)
{
sigset_t mask;
int ret;
- pthread_mutex_init(&finished_lock, NULL);
-
sigemptyset(&mask);
sigaddset(&mask, SIGUSR2);
sigprocmask(SIG_BLOCK, &mask, NULL);
@@ -240,7 +248,7 @@ static int bs_init_signalfd(void)
if (sig_fd < 0)
return 1;
- ret = tgt_event_add(sig_fd, EPOLLIN, bs_sig_request_done, NULL);
+ ret = tgt_event_add(sig_fd, EPOLLIN, bs_sig_request_done, b);
if (ret < 0) {
close (sig_fd);
sig_fd = -1;
@@ -256,7 +264,6 @@ static int bs_init_notify_thread(void)
int ret;
pthread_cond_init(&finished_cond, NULL);
- pthread_mutex_init(&finished_lock, NULL);
ret = pipe(command_fd);
if (ret) {
@@ -298,7 +305,6 @@ close_command_fd:
close(command_fd[1]);
destroy_cond_mutex:
pthread_cond_destroy(&finished_cond);
- pthread_mutex_destroy(&finished_lock);
return 1;
}
@@ -307,7 +313,10 @@ int bs_init(void)
{
int ret;
- ret = bs_init_signalfd();
+ pthread_mutex_init(&bsf->finished_lock, NULL);
+ INIT_LIST_HEAD(&bsf->finished_list);
+
+ ret = bs_init_signalfd(bsf);
if (!ret) {
eprintf("use signalfd notification\n");
return 0;
diff --git a/usr/iscsi/conn.c b/usr/iscsi/conn.c
index ba7a58f..d8601e1 100644
--- a/usr/iscsi/conn.c
+++ b/usr/iscsi/conn.c
@@ -23,6 +23,7 @@
#include <string.h>
#include <errno.h>
#include <sys/stat.h>
+#include <sys/epoll.h>
#include "iscsid.h"
#include "tgtd.h"
@@ -231,7 +232,9 @@ int conn_close_force(uint32_t tid, uint64_t sid, uint32_t cid)
list_for_each_entry(conn, &session->conn_list, clist) {
if (conn->cid == cid) {
eprintf("close %" PRIx64 " %u\n", sid, cid);
- conn_close(conn);
+ conn->state = STATE_CLOSE;
+ conn->tp->ep_event_modify(conn,
+ EPOLLIN|EPOLLOUT|EPOLLERR);
return TGTADM_SUCCESS;
}
}
diff --git a/usr/iscsi/iscsi_rdma.c b/usr/iscsi/iscsi_rdma.c
index 63edebf..115d774 100644
--- a/usr/iscsi/iscsi_rdma.c
+++ b/usr/iscsi/iscsi_rdma.c
@@ -1194,6 +1194,8 @@ static int iscsi_rdma_init(void)
INIT_LIST_HEAD(&iser_conn_list);
INIT_LIST_HEAD(&temp_conn);
+ iscsi_rdma_enabled = 1;
+
return ret;
}
@@ -1246,6 +1248,11 @@ static int iscsi_rdma_login_complete(struct iscsi_connection *conn)
return ret;
}
+static void iscsi_rdma_nexus_init(struct iscsi_connection *conn)
+{
+ conn->tp->ep_event_modify(conn, EPOLLIN);
+}
+
/*
* Copy the remote va and stag that were temporarily saved in conn_info.
*/
@@ -1725,6 +1732,7 @@ static struct iscsi_transport iscsi_iser = {
.data_padding = 1,
.ep_init = iscsi_rdma_init,
.ep_login_complete = iscsi_rdma_login_complete,
+ .ep_nexus_init = iscsi_rdma_nexus_init,
.alloc_task = iscsi_iser_alloc_task,
.free_task = iscsi_iser_free_task,
.ep_read = iscsi_iser_read,
diff --git a/usr/iscsi/iscsi_tcp.c b/usr/iscsi/iscsi_tcp.c
index 8fc145f..d1edd84 100644
--- a/usr/iscsi/iscsi_tcp.c
+++ b/usr/iscsi/iscsi_tcp.c
@@ -31,6 +31,7 @@
#include <netinet/tcp.h>
#include <sys/epoll.h>
#include <sys/socket.h>
+#include <pthread.h>
#include "iscsid.h"
#include "tgtd.h"
@@ -43,6 +44,7 @@ static struct iscsi_transport iscsi_tcp;
struct iscsi_tcp_connection {
int fd;
+ int pthread;
struct iscsi_connection iscsi_conn;
};
@@ -153,6 +155,7 @@ out:
static void iscsi_tcp_event_handler(int fd, int events, void *data)
{
struct iscsi_connection *conn = (struct iscsi_connection *) data;
+ struct iscsi_tcp_connection *tcp_conn = TCP_CONN(conn);
if (events & EPOLLIN)
iscsi_rx_handler(conn);
@@ -165,7 +168,19 @@ static void iscsi_tcp_event_handler(int fd, int events, void *data)
if (conn->state == STATE_CLOSE) {
dprintf("connection closed %p\n", conn);
- conn_close(conn);
+ if (tcp_conn->pthread) {
+ struct iscsi_target *target = conn->session->target;
+
+ pthread_mutex_lock(&target->event_lock);
+ do_tgt_event_del(target->efd, &target->events_list,
+ tcp_conn->fd);
+ pthread_mutex_unlock(&target->event_lock);
+ /* let the main thread handle this */
+ tcp_conn->pthread = 0;
+ tgt_event_modify(tcp_conn->fd, EPOLLIN|EPOLLOUT|EPOLLERR);
+ } else {
+ conn_close(conn);
+ }
}
}
@@ -263,6 +278,29 @@ static int iscsi_tcp_conn_login_complete(struct iscsi_connection *conn)
return 0;
}
+static void iscsi_tcp_conn_nexus_init(struct iscsi_connection *conn)
+{
+ struct iscsi_tcp_connection *tcp_conn = TCP_CONN(conn);
+ struct iscsi_target *target = conn->session->target;
+
+ if (iscsi_pthread_per_target()) {
+ /* remove the conn from the main thread. */
+ conn->tp->ep_event_modify(conn, 0);
+
+ pthread_mutex_lock(&target->event_lock);
+
+ do_tgt_event_add(target->efd, &target->events_list,
+ tcp_conn->fd, EPOLLIN,
+ iscsi_tcp_event_handler, conn);
+
+ pthread_mutex_unlock(&target->event_lock);
+
+ tcp_conn->pthread = 1;
+ }
+
+ conn->tp->ep_event_modify(conn, EPOLLIN);
+}
+
static size_t iscsi_tcp_read(struct iscsi_connection *conn, void *buf,
size_t nbytes)
{
@@ -336,9 +374,18 @@ static void iscsi_event_modify(struct iscsi_connection *conn, int events)
struct iscsi_tcp_connection *tcp_conn = TCP_CONN(conn);
int ret;
- ret = tgt_event_modify(tcp_conn->fd, events);
- if (ret)
- eprintf("tgt_event_modify failed\n");
+ if (tcp_conn->pthread) {
+ struct iscsi_target *target = conn->session->target;
+
+ pthread_mutex_lock(&target->event_lock);
+ do_tgt_event_modify(target->efd, &target->events_list,
+ tcp_conn->fd, events);
+ pthread_mutex_unlock(&target->event_lock);
+ } else {
+ ret = tgt_event_modify(tcp_conn->fd, events);
+ if (ret)
+ eprintf("tgt_event_modify failed\n");
+ }
}
static struct iscsi_task *iscsi_tcp_alloc_task(struct iscsi_connection *conn,
@@ -391,6 +438,7 @@ static struct iscsi_transport iscsi_tcp = {
.ep_init = iscsi_tcp_init,
.ep_exit = iscsi_tcp_exit,
.ep_login_complete = iscsi_tcp_conn_login_complete,
+ .ep_nexus_init = iscsi_tcp_conn_nexus_init,
.alloc_task = iscsi_tcp_alloc_task,
.free_task = iscsi_tcp_free_task,
.ep_read = iscsi_tcp_read,
diff --git a/usr/iscsi/iscsid.c b/usr/iscsi/iscsid.c
index dcca384..b4e0969 100644
--- a/usr/iscsi/iscsid.c
+++ b/usr/iscsi/iscsid.c
@@ -73,6 +73,13 @@ enum {
IOSTATE_TX_END,
};
+int iscsi_rdma_enabled;
+
+int iscsi_pthread_per_target(void)
+{
+ return sig_fd >= 0 && !iscsi_rdma_enabled;
+}
+
void conn_read_pdu(struct iscsi_connection *conn)
{
conn->rx_iostate = IOSTATE_RX_BHS;
@@ -2224,7 +2231,7 @@ finish:
else {
conn->state = STATE_SCSI;
conn_read_pdu(conn);
- conn->tp->ep_event_modify(conn, EPOLLIN);
+ conn->tp->ep_nexus_init(conn);
}
break;
case STATE_EXIT:
diff --git a/usr/iscsi/iscsid.h b/usr/iscsi/iscsid.h
index 6b982cb..1e70d81 100644
--- a/usr/iscsi/iscsid.h
+++ b/usr/iscsi/iscsid.h
@@ -244,6 +244,13 @@ struct iscsi_target {
int nr_sessions;
struct list_head isns_list;
+
+ int efd;
+ pthread_mutex_t event_lock;
+ struct list_head events_list;
+
+ struct bs_finish bsfin;
+ int stop_pthread;
};
enum task_flags {
@@ -310,6 +317,8 @@ extern int iscsi_target_show(int mode, int tid, uint64_t sid, uint32_t cid,
int iscsi_target_update(int mode, int op, int tid, uint64_t sid, uint64_t lun,
uint32_t cid, char *name);
+int iscsi_pthread_per_target(void);
+
/* param.c */
int param_index_by_name(char *name, struct iscsi_key *keys);
diff --git a/usr/iscsi/target.c b/usr/iscsi/target.c
index c6ac031..b547626 100644
--- a/usr/iscsi/target.c
+++ b/usr/iscsi/target.c
@@ -25,6 +25,7 @@
#include <unistd.h>
#include <netdb.h>
#include <sys/stat.h>
+#include <sys/epoll.h>
#include <sys/un.h>
#include <netinet/in.h>
#include <sys/socket.h>
@@ -32,10 +33,12 @@
#include <netinet/tcp.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
+#include <pthread.h>
#include "iscsid.h"
#include "tgtadm.h"
#include "tgtd.h"
#include "target.h"
+#include "util.h"
LIST_HEAD(iscsi_targets_list);
@@ -252,12 +255,63 @@ void iscsi_target_destroy(int tid)
}
list_del(&target->tlist);
+
+ pthread_mutex_init(&target->event_lock, NULL);
+
+ if (target->bsfin.thread) {
+ target->stop_pthread = 1;
+ pthread_kill(target->bsfin.thread, SIGUSR2);
+
+ pthread_join(target->bsfin.thread, NULL);
+ pthread_mutex_destroy(&target->bsfin.finished_lock);
+ }
+
+ close(target->efd);
free(target);
isns_target_deregister(tgt_targetname(tid));
return;
}
+static void *iscsi_thread_fn(void *arg)
+{
+ struct iscsi_target *t = arg;
+ struct epoll_event events[1024];
+ struct event_data *tev;
+ sigset_t mask;
+ int nevent, i;
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGUSR2);
+ pthread_sigmask(SIG_BLOCK, &mask, NULL);
+
+ pthread_mutex_lock(&t->event_lock);
+
+ do_tgt_event_add(t->efd, &t->events_list, sig_fd, EPOLLIN,
+ bs_sig_request_done, &t->bsfin);
+
+ pthread_mutex_unlock(&t->event_lock);
+
+retry:
+ nevent = epoll_wait(t->efd, events, ARRAY_SIZE(events), 1000);
+ if (nevent < 0) {
+ if (errno != EINTR) {
+ eprintf("%m\n");
+ exit(1);
+ }
+ } else if (nevent) {
+ for (i = 0; i < nevent; i++) {
+ tev = (struct event_data *) events[i].data.ptr;
+ tev->handler(tev->fd, events[i].events, tev->data);
+ }
+ }
+
+ if (!t->stop_pthread)
+ goto retry;
+
+ pthread_exit(NULL);
+}
+
int iscsi_target_create(struct target *t)
{
int tid = t->tid;
@@ -288,11 +342,15 @@ int iscsi_target_create(struct target *t)
[ISCSI_PARAM_MAX_OUTST_PDU] = {0, 0}, /* not in open-iscsi */
};
- target = malloc(sizeof(*target));
+ target = zalloc(sizeof(*target));
if (!target)
return -ENOMEM;
- memset(target, 0, sizeof(*target));
+ target->efd = epoll_create(128);
+ if (target->efd < 0) {
+ free(target);
+ return -EINVAL;
+ }
memcpy(target->session_param, default_tgt_session_param,
sizeof(target->session_param));
@@ -300,10 +358,21 @@ int iscsi_target_create(struct target *t)
INIT_LIST_HEAD(&target->tlist);
INIT_LIST_HEAD(&target->sessions_list);
INIT_LIST_HEAD(&target->isns_list);
+ INIT_LIST_HEAD(&target->events_list);
target->tid = tid;
list_add_tail(&target->tlist, &iscsi_targets_list);
isns_target_register(tgt_targetname(tid));
+
+ if (iscsi_pthread_per_target()) {
+ pthread_create(&target->bsfin.thread, NULL, iscsi_thread_fn, target);
+
+ pthread_mutex_init(&target->bsfin.finished_lock, NULL);
+ INIT_LIST_HEAD(&target->bsfin.finished_list);
+ t->bsf = &target->bsfin;
+ eprintf("create thread %u\n", (unsigned)target->bsfin.thread);
+ }
+
return 0;
}
diff --git a/usr/iscsi/transport.h b/usr/iscsi/transport.h
index 92a6f0a..c94b86b 100644
--- a/usr/iscsi/transport.h
+++ b/usr/iscsi/transport.h
@@ -4,6 +4,8 @@
#include <sys/socket.h>
#include "list.h"
+extern int iscsi_rdma_enabled;
+
struct iscsi_connection;
struct iscsi_task;
@@ -17,6 +19,7 @@ struct iscsi_transport {
int (*ep_init) (void);
void (*ep_exit) (void);
int (*ep_login_complete)(struct iscsi_connection *conn);
+ void (*ep_nexus_init)(struct iscsi_connection *conn);
struct iscsi_task *(*alloc_task)(struct iscsi_connection *conn,
size_t ext_len);
void (*free_task)(struct iscsi_task *task);
diff --git a/usr/target.h b/usr/target.h
index 9283431..4607fc4 100644
--- a/usr/target.h
+++ b/usr/target.h
@@ -39,6 +39,8 @@ struct target {
struct list_head acl_list;
struct tgt_account account;
+
+ struct bs_finish *bsf;
};
struct it_nexus {
diff --git a/usr/tgtd.h b/usr/tgtd.h
index 79d9c88..b8541c8 100644
--- a/usr/tgtd.h
+++ b/usr/tgtd.h
@@ -334,6 +334,14 @@ struct event_data {
extern int sig_fd;
+struct bs_finish {
+ struct list_head finished_list;
+ pthread_mutex_t finished_lock;
+ pthread_t thread;
+};
+
+void bs_sig_request_done(int fd, int events, void *data);
+
int do_tgt_event_add(int efd, struct list_head *list, int fd, int events,
event_handler_t handler, void *data);
void do_tgt_event_del(int efd, struct list_head *list, int fd);
--
1.6.5
--
To unsubscribe from this list: send the line "unsubscribe stgt" in
the body of a message to majordomo at vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
More information about the stgt
mailing list