[stgt] segfault when stopping the target

FUJITA Tomonori fujita.tomonori at lab.ntt.co.jp
Tue Oct 21 16:08:36 CEST 2008


On Tue, 21 Oct 2008 15:50:36 +0200
Tomasz Chmielewski <mangoo at wpkg.org> wrote:

> FUJITA Tomonori schrieb:
> > On Tue, 21 Oct 2008 14:15:57 +0200
> > Tomasz Chmielewski <mangoo at wpkg.org> wrote:
> > 
> >> FUJITA Tomonori schrieb:
> >>
> >>>>>> # tgtadm --op delete --mode conn --tid 2 --sid 2 --cid 0
> >>>>>> Segmentation fault
> >>>>>>
> >>>>>> If a segfault does not happen immediately, start all these commands 
> >>>>>> again (or, generally, "tgtadm --op delete --mode conn --tid 2 --sid 1 
> >>>>>> --cid 0" is enough).
> >>>>>>
> >>>>>> For me, on x86, segfault happens in 90% of cases. Sometimes, the 
> >>>>>> connection is eventually deleted.
> >>> I tried the above commands three times on x86 but I can't reproduce
> >>> this problem.
> >> It is easier when there is some traffic to the target.
> >>
> >>
> >>> Can you use gdb to find where tgtadm crashes?
> >> Sure.
> >> Here is strace output, but it doesn't say much, does it? I'll try to get more data with gdb.
> >>
> >> execve("/usr/sbin/tgtadm", ["tgtadm", "--op", "delete", "--mode", "conn", "--tid", "1", "--sid", "1", "--cid", "0"], [/* 20 vars */]) = 0
> >> uname({sys="Linux", node="megathecus", ...}) = 0
> >> brk(0)                                  = 0x804d000
> >> access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
> >> mmap2(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x6feee000
> >> access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
> >> open("/etc/ld.so.cache", O_RDONLY)      = 3
> >> fstat64(3, {st_mode=S_IFREG|0644, st_size=16095, ...}) = 0
> >> mmap2(NULL, 16095, PROT_READ, MAP_PRIVATE, 3, 0) = 0x6feea000
> >> close(3)                                = 0
> >> access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
> >> open("/lib/tls/libc.so.6", O_RDONLY)    = 3
> >> read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\240O\1"..., 512) = 512
> >> fstat64(3, {st_mode=S_IFREG|0644, st_size=1245488, ...}) = 0
> >> mmap2(NULL, 1251484, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x6fdb8000
> >> mmap2(0x6fee0000, 28672, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x128) = 0x6fee0000
> >> mmap2(0x6fee7000, 10396, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x6fee7000
> >> close(3)                                = 0
> >> mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x6fdb7000
> >> mprotect(0x6fee0000, 20480, PROT_READ)  = 0
> >> set_thread_area({entry_number:-1 -> 6, base_addr:0x6fdb78e0, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0,useable:1}) = 0
> >> munmap(0x6feea000, 16095)               = 0
> >> brk(0)                                  = 0x804d000
> >> brk(0x8070000)                          = 0x8070000
> >> socket(PF_FILE, SOCK_STREAM, 0)         = 3
> >> connect(3, {sa_family=AF_FILE, path=@TGT_IPC_ABSTRACT_NAMESPACE}, 110) = 0
> >> write(3, "\4\0\0\0\1\0\0\0iscsi\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 116) = 116
> >> read(3, "", 8)                          = 0
> >> --- SIGSEGV (Segmentation fault) @ 0 (0) ---
> >> +++ killed by SIGSEGV +++
> >> Process 5131 detached
> > 
> > This helps?
> 
> At least it does not crash any more ;)
> But it loops endlessly.

Ah, thanks.

With this patch, tgtadm should fail properly instead of going into
endless loop.

But we need to know why tgtd closed tgtadm's socket. Can you try this
patch and let me know if you get error messages in the log.


diff --git a/usr/mgmt.c b/usr/mgmt.c
index f6141cb..a40bf69 100644
--- a/usr/mgmt.c
+++ b/usr/mgmt.c
@@ -493,16 +493,22 @@ static void mgmt_event_handler(int accept_fd, int events, void *data)
 	struct mgmt_task *mtask;
 
 	fd = ipc_accept(accept_fd);
-	if (fd < 0)
+	if (fd < 0) {
+		eprintf("failed to accept a socket\n");
 		return;
+	}
 
 	err = ipc_perm(fd);
-	if (err < 0)
+	if (err < 0) {
+		eprintf("permission error\n");
 		goto out;
+	}
 
 	err = set_non_blocking(fd);
-	if (err)
+	if (err) {
+		eprintf("failed to set a socket non-blocking\n");
 		goto out;
+	}
 
 	mtask = zalloc(sizeof(*mtask));
 	if (!mtask) {
@@ -512,6 +518,7 @@ static void mgmt_event_handler(int accept_fd, int events, void *data)
 
 	mtask->buf = zalloc(BUFSIZE);
 	if (!mtask->buf) {
+		eprintf("can't allocate mtask buffer\n");
 		free(mtask);
 		goto out;
 	}
@@ -522,6 +529,7 @@ static void mgmt_event_handler(int accept_fd, int events, void *data)
 	if (err) {
 		free(mtask->buf);
 		free(mtask);
+		eprintf("failed to add a socket to epoll\n");
 		goto out;
 	}
 
diff --git a/usr/tgtadm.c b/usr/tgtadm.c
index 23dbc53..9db832f 100644
--- a/usr/tgtadm.c
+++ b/usr/tgtadm.c
@@ -198,13 +198,32 @@ static int ipc_mgmt_rsp(int fd)
 {
 	struct tgtadm_rsp rsp;
 	int err, rest, len;
+	char *p;
 
-	err = read(fd, &rsp, sizeof(rsp));
+	rest = sizeof(rsp);
+	p = (char *)&rsp;
+retry:
+	err = recv(fd, p, rest, MSG_WAITALL);
 	if (err < 0) {
-		eprintf("can't get the response, %m\n");
+		if (errno == EAGAIN)
+			goto retry;
+		else if (errno == EINTR)
+			eprintf("interrupted by a signal\n");
+		else
+			eprintf("can't get the response, %m\n");
+
 		return errno;
+	} else if (err == 0) {
+		eprintf("tgtd closed the socket\n");
+		return 0;
+	} else {
+		p += err;
+		rest -= err;
 	}
 
+	if (rest)
+		goto retry;
+
 	if (rsp.err != TGTADM_SUCCESS) {
 		eprintf("%s\n", tgtadm_emsg[rsp.err]);
 		return EINVAL;
--
To unsubscribe from this list: send the line "unsubscribe stgt" in
the body of a message to majordomo at vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



More information about the stgt mailing list