[stgt] tgtd crash when more than 40 LUNs per target (and a way to reproduce)
FUJITA Tomonori
fujita.tomonori at lab.ntt.co.jp
Tue Dec 23 11:43:00 CET 2008
On Fri, 07 Nov 2008 11:57:18 +0100
Tomasz Chmielewski <mangoo at wpkg.org> wrote:
> FUJITA Tomonori schrieb:
>
> (...)
>
> >>> This patch enables you to create more luns (but eventually, tgtd
> >>> crashes) ?
> >> Yes, with this patch it crashes after ~110 luns or ~60 targets.
> >
> > I see, thanks.
> >
> >
> > The first problem is that tgtd can't handle the failure to create
> > pthreads. I'll try to fix this soon.
> >
> > The second problem is seems that glibc has the limit of the maximum
> > number of pthreads per process.
> >
> > I guess that we create too many pthreads per lun. Probably, it would
> > be better to have some pthreads per target (regardless of the number
> > of luns). But it takes some time to fix the second problem.
>
> Any news on this?
> I'll be hitting this limit (~60 targets and more) on my 32 bit targets
> by the end of this year and this issue makes me feel uncomfortable :(
I don't loosen the limit yet but this patch should fix the segfault
and deadlocks caused by the target or lun creation. Can you try this?
diff --git a/usr/bs.c b/usr/bs.c
index 542ef55..cd19b86 100644
--- a/usr/bs.c
+++ b/usr/bs.c
@@ -27,6 +27,7 @@
#include "list.h"
#include "tgtd.h"
+#include "tgtadm_error.h"
#include "util.h"
#include "bs_thread.h"
@@ -68,6 +69,9 @@ retry:
goto out;
}
+ if (info->stop)
+ goto out;
+
pthread_mutex_lock(&info->finished_lock);
retest:
if (list_empty(&info->finished_list)) {
@@ -100,7 +104,7 @@ rewrite:
goto retry;
out:
- return NULL;
+ pthread_exit(NULL);
}
static void bs_thread_request_done(int fd, int events, void *data)
@@ -141,7 +145,11 @@ static void *bs_thread_worker_fn(void *arg)
struct bs_thread_info *info = arg;
struct scsi_cmd *cmd;
- while (1) {
+ pthread_mutex_lock(&info->startup_lock);
+ dprintf("started this thread\n");
+ pthread_mutex_unlock(&info->startup_lock);
+
+ while (!info->stop) {
pthread_mutex_lock(&info->pending_lock);
retest:
if (list_empty(&info->pending_list)) {
@@ -170,7 +178,7 @@ static void *bs_thread_worker_fn(void *arg)
pthread_cond_signal(&info->finished_cond);
}
- return NULL;
+ pthread_exit(NULL);
}
int bs_thread_open(struct bs_thread_info *info, request_func_t *rfn,
@@ -189,35 +197,50 @@ int bs_thread_open(struct bs_thread_info *info, request_func_t *rfn,
pthread_mutex_init(&info->finished_lock, NULL);
pthread_mutex_init(&info->pending_lock, NULL);
+ pthread_mutex_init(&info->startup_lock, NULL);
ret = pipe(info->command_fd);
- if (ret)
+ if (ret) {
+ eprintf("failed to create command pipe, %m\n");
goto destroy_cond_mutex;
+ }
ret = pipe(info->done_fd);
- if (ret)
+ if (ret) {
+ eprintf("failed to done command pipe, %m\n");
goto close_command_fd;
+ }
ret = tgt_event_add(info->done_fd[0], EPOLLIN, bs_thread_request_done, info);
- if (ret)
+ if (ret) {
+ eprintf("failed to add epoll event\n");
goto close_done_fd;
+ }
ret = pthread_create(&info->ack_thread, NULL, bs_thread_ack_fn, info);
- if (ret)
+ if (ret) {
+ eprintf("failed to create an ack thread, %s\n", strerror(ret));
goto event_del;
+ }
if (nr_threads > ARRAY_SIZE(info->worker_thread)) {
eprintf("too many threads %d\n", nr_threads);
nr_threads = ARRAY_SIZE(info->worker_thread);
}
+ pthread_mutex_lock(&info->startup_lock);
for (i = 0; i < nr_threads; i++) {
ret = pthread_create(&info->worker_thread[i], NULL,
bs_thread_worker_fn, info);
- if (ret)
- goto destroy_threads;
- }
+ if (ret) {
+ eprintf("failed to create a worker thread, %d %s\n",
+ i, strerror(ret));
+ if (ret)
+ goto destroy_threads;
+ }
+ }
+ pthread_mutex_unlock(&info->startup_lock);
rewrite:
ret = write(info->command_fd[1], &ret, sizeof(ret));
if (ret < 0) {
@@ -228,19 +251,17 @@ rewrite:
return 0;
destroy_threads:
+ info->stop = 1;
write(info->command_fd[1], &ret, sizeof(ret));
- pthread_cancel(info->ack_thread);
- pthread_cond_signal(&info->finished_cond);
pthread_join(info->ack_thread, NULL);
- info->stop = 1;
- for (i = 0; info->worker_thread[i]; i++) {
- pthread_cancel(info->worker_thread[i]);
- pthread_cond_signal(&info->pending_cond);
- }
+ eprintf("stopped the ack thread\n");
- for (i = 0; info->worker_thread[i]; i++)
- pthread_join(info->worker_thread[i], NULL);
+ pthread_mutex_unlock(&info->startup_lock);
+ for (; i > 0; i--) {
+ pthread_join(info->worker_thread[i - 1], NULL);
+ eprintf("stopped the worker thread %d\n", i - 1);
+ }
event_del:
tgt_event_del(info->done_fd[0]);
close_done_fd:
@@ -254,8 +275,9 @@ destroy_cond_mutex:
pthread_cond_destroy(&info->pending_cond);
pthread_mutex_destroy(&info->finished_lock);
pthread_mutex_destroy(&info->pending_lock);
+ pthread_mutex_destroy(&info->startup_lock);
- return -1;
+ return TGTADM_NOMEM;
}
void bs_thread_close(struct bs_thread_info *info)
@@ -276,6 +298,7 @@ void bs_thread_close(struct bs_thread_info *info)
pthread_cond_destroy(&info->pending_cond);
pthread_mutex_destroy(&info->finished_lock);
pthread_mutex_destroy(&info->pending_lock);
+ pthread_mutex_destroy(&info->startup_lock);
tgt_event_del(info->done_fd[0]);
diff --git a/usr/bs_thread.h b/usr/bs_thread.h
index b2975a5..9dfbbd5 100644
--- a/usr/bs_thread.h
+++ b/usr/bs_thread.h
@@ -20,6 +20,8 @@ struct bs_thread_info {
/* protected by pending_lock */
struct list_head pending_list;
+ pthread_mutex_t startup_lock;
+
int command_fd[2];
int done_fd[2];
diff --git a/usr/tgtd.c b/usr/tgtd.c
index 1a3cc02..fa0fb7b 100644
--- a/usr/tgtd.c
+++ b/usr/tgtd.c
@@ -30,6 +30,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <sys/resource.h>
#include <sys/epoll.h>
#include "list.h"
@@ -96,6 +97,37 @@ static int oom_adjust(void)
return 0;
}
+static int nr_file_adjust(void)
+{
+ int ret, fd;
+ char path[] = "/proc/sys/fs/nr_open";
+ char buf[64];
+ struct rlimit rlim;
+
+ /* Avoid oom-killer */
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "can't open %s, %m\n", path);
+ return errno;
+ }
+ ret = read(fd, buf, sizeof(buf));
+ if (ret < 0) {
+ fprintf(stderr, "can't read %s, %m\n", path);
+ return errno;
+ }
+ close(fd);
+
+ rlim.rlim_cur = rlim.rlim_max = atoi(buf);
+
+ ret = setrlimit(RLIMIT_NOFILE, &rlim);
+ if (ret < 0) {
+ fprintf(stderr, "can't adjust nr_open %m\n");
+ return errno;
+ }
+
+ return 0;
+}
+
int tgt_event_add(int fd, int events, event_handler_t handler, void *data)
{
struct epoll_event ev;
@@ -340,6 +372,10 @@ int main(int argc, char **argv)
if (err)
exit(1);
+ err = nr_file_adjust();
+ if (err)
+ exit(1);
+
err = log_init(program_name, LOG_SPACE_SIZE, is_daemon, is_debug);
if (err)
exit(1);
--
To unsubscribe from this list: send the line "unsubscribe stgt" in
the body of a message to majordomo at vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
More information about the stgt
mailing list