[stgt] [PATCH] protect again tgtd process hang as of hanging redirect script

Or Gerlitz ogerlitz at mellanox.com
Thu Feb 17 13:47:17 CET 2011


If the child process spawned to run the redirect callback script hangs, e.g
as of load/bug in the application which this script is dealing with, tgt
can hang forever. Protect against that by selecting the fd from which tgt
is expected to read, for up to 100ms, if the timeout expires, tgt terminates
the child process and fail the initiator login attempt.

Signed-off-by: Or Gerlitz <ogerlitz at mellanox.com>
--------

I suspended the process which this callback scripts communicates with,
and attempted to login, tgt hangs forever in waitpid

(gdb) bt
#0  0x0000003e8420e305 in waitpid () from /lib64/libpthread.so.0
#1  0x00000000004184bc in call_program (cmd=0x7fffa2595cc0 "/tmp/redirect_callback.bash iqn.n2p0 192.168.20.15",
    callback=0, data=0x0, output=0x7fffa2595890 "", op_len=1061, flags=<value optimized out>) at tgtd.c:324
#2  0x00000000004099f1 in target_redirected (target=<value optimized out>, conn=<value optimized out>,
    buf=0x7fffa25961f0 "192.168.", reason=0x7fffa259661c) at iscsi/target.c:198
#3  0x0000000000406055 in login_start (conn=0x146bdf38) at iscsi/iscsid.c:512
#4  0x0000000000407711 in iscsi_rx_handler (conn=0x146bdf38) at iscsi/iscsid.c:732
#5  0x000000000040d828 in iscsi_tcp_event_handler (fd=<value optimized out>, events=1, data=0x0) at iscsi/iscsi_tcp.c:165
#6  0x000000000041907e in main (argc=5, argv=0x7fffa2599f48) at tgtd.c:388

 tgtd.c |   30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/usr/tgtd.c b/usr/tgtd.c
index 066f46e..f913c17 100644
--- a/usr/tgtd.c
+++ b/usr/tgtd.c
@@ -309,7 +309,25 @@ int call_program(const char *cmd, void (*callback)(void *data, int result),
 		eprintf("execv failed for: %s, %m\n", cmd);
 		exit(-1);
 	} else {
+		struct timeval tv;
+		fd_set rfds;
+		int ret_sel;
+
 		close(fds[1]);
+
+		do {
+			FD_ZERO(&rfds);
+			FD_SET(fds[0],&rfds);
+			tv.tv_sec = 0;
+			tv.tv_usec = 100000;
+			ret_sel = select(fds[0]+1, &rfds, NULL, NULL, &tv);
+		} while (ret_sel < 0 && errno == EINTR);
+		if (ret_sel <= 0) { /* error or timeout */
+			eprintf("timeout on redirect callback, terminating child pid %d\n", pid);
+			kill(pid, SIGTERM);
+
+		}
+
 		do {
 			ret = waitpid(pid, &i, 0);
 		} while (ret < 0 && errno == EINTR);
@@ -318,11 +336,13 @@ int call_program(const char *cmd, void (*callback)(void *data, int result),
 			close(fds[0]);
 			return ret;
 		}
-		ret = read(fds[0], output, op_len);
-		if (ret < 0) {
-			eprintf("failed to get the output from: %s\n", cmd);
-			close(fds[0]);
-			return ret;
+		if (ret_sel > 0) {
+			ret = read(fds[0], output, op_len);
+			if (ret < 0) {
+				eprintf("failed to get the output from: %s\n", cmd);
+				close(fds[0]);
+				return ret;
+			}
 		}

 		if (callback)
--
To unsubscribe from this list: send the line "unsubscribe stgt" in
the body of a message to majordomo at vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



More information about the stgt mailing list