[stgt] tgtd segfault during heavy I/O

FUJITA Tomonori fujita.tomonori at lab.ntt.co.jp
Tue Jul 19 20:40:02 CEST 2011


On Tue, 19 Jul 2011 14:57:45 +0800
Kiefer Chang <zapchang at gmail.com> wrote:

> Forget to mention, signalfd doesn't work in my platform.
> pthread notification is used by default.

It should be fine. However, after reading your log, I found another
bug.

Can you try the following patch (against the latest git)?

Thanks a lot for the testing.

diff --git a/usr/bs.c b/usr/bs.c
index d72d090..8f56aee 100644
--- a/usr/bs.c
+++ b/usr/bs.c
@@ -122,7 +122,7 @@ out:
 	pthread_exit(NULL);
 }
 
-static void bs_thread_request_done(int fd, int events, void *data)
+void bs_thread_request_done(int fd, int events, void *data)
 {
 	struct scsi_cmd *cmd;
 	int nr_events, ret;
@@ -230,6 +230,8 @@ static int bs_init_signalfd(void)
 	sigset_t mask;
 	int ret;
 
+	return 1;
+
 	pthread_mutex_init(&finished_lock, NULL);
 
 	sigemptyset(&mask);
@@ -270,6 +272,7 @@ static int bs_init_notify_thread(void)
 		goto close_command_fd;
 	}
 
+	set_non_blocking(done_fd[0]);
 	ret = tgt_event_add(done_fd[0], EPOLLIN, bs_thread_request_done, NULL);
 	if (ret) {
 		eprintf("failed to add epoll event\n");
diff --git a/usr/bs_thread.h b/usr/bs_thread.h
index d460032..14456a7 100644
--- a/usr/bs_thread.h
+++ b/usr/bs_thread.h
@@ -29,3 +29,4 @@ extern int bs_thread_open(struct bs_thread_info *info, request_func_t *rfn,
 extern void bs_thread_close(struct bs_thread_info *info);
 extern int bs_thread_cmd_submit(struct scsi_cmd *cmd);
 
+extern void bs_wait_one_completion(void);
diff --git a/usr/iscsi/conn.c b/usr/iscsi/conn.c
index 53e719e..3836655 100644
--- a/usr/iscsi/conn.c
+++ b/usr/iscsi/conn.c
@@ -115,14 +115,14 @@ void conn_close(struct iscsi_connection *conn)
 		if (task->conn != conn)
 			continue;
 
-		eprintf("Forcing release of pending task %p %" PRIx64 "\n",
-			task, task->tag);
+		eprintf("Forcing release of pending task %p %" PRIx64 " %u\n",
+			task, task->tag, conn->refcount);
 		list_del(&task->c_list);
 		iscsi_free_task(task);
 	}
 
 	if (conn->tx_task) {
-		dprintf("Add current tx task to the tx list for removal "
+		eprintf("Add current tx task to the tx list for removal "
 			"%p %" PRIx64 "\n",
 			conn->tx_task, conn->tx_task->tag);
 		list_add(&conn->tx_task->c_list, &conn->tx_clist);
@@ -134,8 +134,8 @@ void conn_close(struct iscsi_connection *conn)
 
 		op = task->req.opcode & ISCSI_OPCODE_MASK;
 
-		eprintf("Forcing release of tx task %p %" PRIx64 " %x\n",
-			task, task->tag, op);
+		eprintf("Forcing release of tx task %p %" PRIx64 " %x %u\n",
+			task, task->tag, op, conn->refcount);
 		switch (op) {
 		case ISCSI_OP_SCSI_CMD:
 			/*
@@ -155,14 +155,14 @@ void conn_close(struct iscsi_connection *conn)
 			iscsi_free_task(task);
 			break;
 		default:
-			eprintf("%x\n", op);
+			eprintf("unknow op %x\n", op);
 			break;
 		}
 	}
 
 	if (conn->rx_task) {
-		eprintf("Forcing release of rx task %p %" PRIx64 "\n",
-			conn->rx_task, conn->rx_task->tag);
+		eprintf("Forcing release of rx task %p %" PRIx64 " %u\n",
+			conn->rx_task, conn->rx_task->tag, conn->refcount);
 		iscsi_free_task(conn->rx_task);
 	}
 	conn->rx_task = NULL;
@@ -173,10 +173,25 @@ void conn_close(struct iscsi_connection *conn)
 		 * This task is in SCSI. We need to wait for I/O
 		 * completion.
 		 */
+		eprintf("release task %p %x %" PRIx64 " flag %lx, %u\n",
+			task, task->req.opcode & ISCSI_OPCODE_MASK,
+			task->tag, task->flags, conn->refcount);
+
 		if (task_in_scsi(task))
 			continue;
 		iscsi_free_task(task);
 	}
+
+	eprintf("%p %u\n", conn, conn->refcount);
+
+	while (conn->refcount != 1) {
+		struct timeval t;
+		t.tv_sec = 1;
+		t.tv_usec = 0;
+		eprintf("%p %u\n", conn, conn->refcount);
+		bs_thread_request_done(0, 0, NULL);
+		select(0, NULL, NULL, NULL, &t);
+	}
 done:
 	conn_put(conn);
 }
diff --git a/usr/iscsi/iscsi_tcp.c b/usr/iscsi/iscsi_tcp.c
index 1988b2f..c61faa1 100644
--- a/usr/iscsi/iscsi_tcp.c
+++ b/usr/iscsi/iscsi_tcp.c
@@ -164,7 +164,7 @@ static void iscsi_tcp_event_handler(int fd, int events, void *data)
 		iscsi_tx_handler(conn);
 
 	if (conn->state == STATE_CLOSE) {
-		dprintf("connection closed %p\n", conn);
+		eprintf("connection closed %p\n", conn);
 		conn_close(conn);
 	}
 }
diff --git a/usr/iscsi/iscsid.c b/usr/iscsi/iscsid.c
index 22a21cc..4cde178 100644
--- a/usr/iscsi/iscsid.c
+++ b/usr/iscsi/iscsid.c
@@ -1209,6 +1209,12 @@ static int iscsi_scsi_cmd_done(uint64_t nid, int result, struct scsi_cmd *scmd)
 	struct iscsi_task *task = ITASK(scmd);
 	uint32_t read_len = scsi_get_in_length(scmd);
 
+	if (result == TASK_ABORTED) {
+		list_del(&task->c_hlist);
+		iscsi_free_task(task);
+		return 0;
+	}
+
 	/*
 	 * Since the connection is closed we just free the task.
 	 * We could delay the closing of the conn in some cases and send
@@ -1216,6 +1222,8 @@ static int iscsi_scsi_cmd_done(uint64_t nid, int result, struct scsi_cmd *scmd)
 	 * task got reassinged to another connection.
 	 */
 	if (task->conn->state == STATE_CLOSE) {
+		eprintf("finish a task on a closed conn, %p %u\n",
+			task, task->conn->refcount);
 		iscsi_free_cmd_task(task);
 		return 0;
 	}
@@ -1376,6 +1384,13 @@ static int iscsi_tm_done(struct mgmt_req *mreq)
 
 	task = (struct iscsi_task *) (unsigned long) mreq->mid;
 
+	if (task->conn->state == STATE_CLOSE) {
+		eprintf("finish a tm task on a closed conn, %p %u\n",
+			task, task->conn->refcount);
+		iscsi_free_task(task);
+		return 0;
+	}
+
 	switch (mreq->result) {
 	case 0:
 		task->result = ISCSI_TMF_RSP_COMPLETE;
@@ -1405,6 +1420,8 @@ static int iscsi_tm_execute(struct iscsi_task *task)
 	struct iscsi_tm *req = (struct iscsi_tm *) &task->req;
 	int fn = 0, err = 0;
 
+	eprintf("%x\n", req->flags & ISCSI_FLAG_TM_FUNC_MASK);
+
 	switch (req->flags & ISCSI_FLAG_TM_FUNC_MASK) {
 	case ISCSI_TM_FUNC_ABORT_TASK:
 		fn = ABORT_TASK;
@@ -1412,15 +1429,11 @@ static int iscsi_tm_execute(struct iscsi_task *task)
 	case ISCSI_TM_FUNC_ABORT_TASK_SET:
 		fn = ABORT_TASK_SET;
 		break;
-	case ISCSI_TM_FUNC_CLEAR_ACA:
-		fn = CLEAR_TASK_SET;
-		break;
-	case ISCSI_TM_FUNC_CLEAR_TASK_SET:
-		fn = CLEAR_ACA;
-		break;
 	case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
 		fn = LOGICAL_UNIT_RESET;
 		break;
+	case ISCSI_TM_FUNC_CLEAR_ACA:
+	case ISCSI_TM_FUNC_CLEAR_TASK_SET:
 	case ISCSI_TM_FUNC_TARGET_WARM_RESET:
 	case ISCSI_TM_FUNC_TARGET_COLD_RESET:
 	case ISCSI_TM_FUNC_TASK_REASSIGN:
diff --git a/usr/target.c b/usr/target.c
index 5f04cf9..5afce53 100644
--- a/usr/target.c
+++ b/usr/target.c
@@ -1101,6 +1101,8 @@ void target_cmd_done(struct scsi_cmd *cmd)
 	struct mgmt_req *mreq;
 
 	mreq = cmd->mreq;
+	if (mreq)
+		eprintf("%p %d\n", mreq, mreq->busy);
 	if (mreq && !--mreq->busy) {
 		mreq->result = mreq->function == ABORT_TASK ? -EEXIST : 0;
 		mreq->itn_id = cmd->cmd_itn_id;
@@ -1127,7 +1129,10 @@ static int abort_cmd(struct target* target, struct mgmt_req *mreq,
 		cmd->mreq = mreq;
 		err = -EBUSY;
 	} else {
-		cmd->dev->cmd_done(target, cmd);
+		eprintf("found %" PRIx64 " %lx\n", cmd->tag, cmd->state);
+		cmd_hlist_remove(cmd);
+		list_del(&cmd->qlist);
+
 		target_cmd_io_done(cmd, TASK_ABORTED);
 	}
 	return err;
@@ -1140,7 +1145,7 @@ static int abort_task_set(struct mgmt_req *mreq, struct target* target,
 	struct it_nexus *itn;
 	int i, err, count = 0;
 
-	eprintf("found %" PRIx64 " %d\n", tag, all);
+	eprintf("aborting %" PRIx64 " %d, %p\n", tag, all, mreq);
 
 	list_for_each_entry(itn, &target->it_nexus_list, nexus_siblings) {
 		for (i = 0; i < ARRAY_SIZE(itn->cmd_hash_list); i++) {

--
To unsubscribe from this list: send the line "unsubscribe stgt" in
the body of a message to majordomo at vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



More information about the stgt mailing list