[stgt] help tgt segfault
FUJITA Tomonori
fujita.tomonori at lab.ntt.co.jp
Wed Dec 17 18:24:57 CET 2008
On Wed, 17 Dec 2008 16:54:11 +0100
Tomasz Chmielewski <mangoo at wpkg.org> wrote:
> FUJITA Tomonori schrieb:
> > On Wed, 17 Dec 2008 16:09:58 +0100
> > Tomasz Chmielewski <mangoo at wpkg.org> wrote:
> >
> >> Tomasz Chmielewski schrieb:
> >>> FUJITA Tomonori schrieb:
> >>>
> >>>>> Which will result in:
> >>>>>
> >>>>> Dec 16 13:55:32 megathecus kernel: tgtd[4872]: segfault at 00000220
> >>>>> eip 0804f0b5 esp 77c43ff0 error 4
> >>>> Hmm, unfortunately, I can't reproduce this.
> >>> Strange, it is 100% reproducible here, on a x86 system. I didn't try 64
> >>> bit.
> >>> I just do dmsetup suspend / wait a few seconds / dmsetup resume, while
> >>> the initiator is reading data, and tgtd segfaults.
> >> It crashes if I wait about 120 seconds or more.
> >> If I wait less, it does not crash.
> >
> > I made sure that I wait about 120 seconds but I can't reproduce this
> > (it takes about 120 seconds to see the abort and close messages in the
> > log.
> >
> > We should not see a series of 'conn_close' messages. Something
> > unexpected happens at your box.
> >
> > Can you try this and send the log? It doesn't fix the problem but
> > gives more information.
>
> Does it help?
>
> Dec 17 16:50:25 megathecus tgtd: abort_task_set(988) found e0000a02 0
> Dec 17 16:50:25 megathecus tgtd: iscsi_tcp_event_handler(167) connection closed 0x807028c
> Dec 17 16:50:25 megathecus tgtd: conn_close(90) connection closed 0x807028c 2 0 11
> Dec 17 16:50:28 megathecus tgtd: conn_close(90) connection closed 0x807028c 1 0 11
> Dec 17 16:51:27 megathecus tgtd: abort_task_set(988) found f0000a06 0
> Dec 17 16:51:27 megathecus tgtd: iscsi_tcp_event_handler(167) connection closed 0x807028c
> Dec 17 16:51:27 megathecus tgtd: conn_close(90) connection closed 0x807028c 2 0 11
> Dec 17 16:51:30 megathecus tgtd: conn_close(90) connection closed 0x807028c 1 0 11
> Dec 17 16:51:54 megathecus kernel: tgtd[19616]: segfault at 000001e4 eip 0804c2da esp 77c8b430 error 4
Can you try one more time with this patch (including the previous
patch so please do git-reset --hard first).
Thanks,
diff --git a/usr/iscsi/conn.c b/usr/iscsi/conn.c
index c205397..7a626c9 100644
--- a/usr/iscsi/conn.c
+++ b/usr/iscsi/conn.c
@@ -82,10 +82,12 @@ void conn_exit(struct iscsi_connection *conn)
void conn_close(struct iscsi_connection *conn)
{
struct iscsi_task *task, *tmp;
+ int ret;
- conn->tp->ep_close(conn);
+ ret = conn->tp->ep_close(conn);
- eprintf("connection closed %p %u\n", conn, conn->refcount);
+ eprintf("connection closed %p %u %d %d\n",
+ conn, conn->refcount, ret, errno);
/* may not have been in FFP yet */
if (!conn->session)
diff --git a/usr/iscsi/iscsi_tcp.c b/usr/iscsi/iscsi_tcp.c
index 2320b3e..0007e66 100644
--- a/usr/iscsi/iscsi_tcp.c
+++ b/usr/iscsi/iscsi_tcp.c
@@ -164,8 +164,8 @@ static void iscsi_tcp_event_handler(int fd, int events, void *data)
iscsi_tx_handler(conn);
if (conn->state == STATE_CLOSE) {
+ eprintf("connection closed %p\n", conn);
conn_close(conn);
- dprintf("connection closed\n");
}
}
diff --git a/usr/iscsi/iscsid.c b/usr/iscsi/iscsid.c
index c22a6f6..2bab387 100644
--- a/usr/iscsi/iscsid.c
+++ b/usr/iscsi/iscsid.c
@@ -240,9 +240,11 @@ static void login_security_done(struct iscsi_connection *conn)
struct iscsi_connection *ent, *next;
/* do session reinstatement */
+ eprintf("session reinstatement, %p\n", session);
list_for_each_entry_safe(ent, next, &session->conn_list,
clist) {
+ eprintf("reinstatement close, %p %u\n", ent, ent->state);
conn_close(ent);
}
@@ -1085,6 +1087,8 @@ static int iscsi_scsi_cmd_done(uint64_t nid, int result, struct scsi_cmd *scmd)
* task got reassinged to another connection.
*/
if (task->conn->state == STATE_CLOSE) {
+ eprintf("connection already closed %p %u\n",
+ task->conn, task->conn->refcount);
iscsi_free_cmd_task(task);
return 0;
}
diff --git a/usr/tgtd.c b/usr/tgtd.c
index 758e7d5..f29dab1 100644
--- a/usr/tgtd.c
+++ b/usr/tgtd.c
@@ -137,6 +137,7 @@ static struct event_data *tgt_event_lookup(int fd)
void tgt_event_del(int fd)
{
struct event_data *tev;
+ int ret;
tev = tgt_event_lookup(fd);
if (!tev) {
@@ -144,7 +145,10 @@ void tgt_event_del(int fd)
return;
}
- epoll_ctl(ep_fd, EPOLL_CTL_DEL, fd, NULL);
+ ret = epoll_ctl(ep_fd, EPOLL_CTL_DEL, fd, NULL);
+ if (ret < 0)
+ eprintf("fail to remove epoll event %d %d\n", ret, errno);
+
list_del(&tev->e_list);
free(tev);
}
--
To unsubscribe from this list: send the line "unsubscribe stgt" in
the body of a message to majordomo at vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
More information about the stgt
mailing list