[sheepdog] [PATCH v2 08/11] work: don't call get_node_info in the worker thread
Liu Yuan
namei.unix at gmail.com
Fri Apr 19 10:28:23 CEST 2013
On 04/19/2013 03:14 PM, MORITA Kazutaka wrote:
> Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
> ---
> sheep/work.c | 22 ++++++++++++----------
> sheep/work.h | 1 +
> 2 files changed, 13 insertions(+), 10 deletions(-)
>
> diff --git a/sheep/work.c b/sheep/work.c
> index 29e1f86..3dce178 100644
> --- a/sheep/work.c
> +++ b/sheep/work.c
> @@ -55,27 +55,22 @@ static uint64_t get_msec_time(void)
> return tv.tv_sec * 1000 + tv.tv_usec / 1000;
> }
>
> -static inline uint64_t wq_get_roof(enum wq_thread_control tc)
> +static inline uint64_t wq_get_roof(struct worker_info *wi)
> {
> - struct vnode_info *vinfo;
> - int nr_nodes;
> uint64_t nr = 1;
>
> - switch (tc) {
> + switch (wi->tc) {
> case WQ_ORDERED:
> break;
> case WQ_DYNAMIC:
> - vinfo = get_vnode_info();
> - nr_nodes = vinfo->nr_nodes;
> - put_vnode_info(vinfo);
> /* FIXME: 2 * nr_nodes threads. No rationale yet. */
> - nr = nr_nodes * 2;
> + nr = wi->nr_nodes * 2;
> break;
> case WQ_UNLIMITED:
> nr = SIZE_MAX;
> break;
> default:
> - panic("Invalid threads control %d", tc);
> + panic("Invalid threads control %d", wi->tc);
> }
> return nr;
> }
> @@ -83,7 +78,7 @@ static inline uint64_t wq_get_roof(enum wq_thread_control tc)
> static bool wq_need_grow(struct worker_info *wi)
> {
> if (wi->nr_threads < wi->nr_pending + wi->nr_running &&
> - wi->nr_threads * 2 <= wq_get_roof(wi->tc)) {
> + wi->nr_threads * 2 <= wq_get_roof(wi)) {
> wi->tm_end_of_protection = get_msec_time() +
> WQ_PROTECTION_PERIOD;
> return true;
> @@ -153,12 +148,17 @@ static void bs_thread_request_done(int fd, int events, void *data)
> struct work *work;
> eventfd_t value;
> LIST_HEAD(list);
> + struct vnode_info *vinfo;
>
> ret = eventfd_read(fd, &value);
> if (ret < 0)
> return;
>
> + vinfo = get_vnode_info();
> +
> list_for_each_entry(wi, &worker_info_list, worker_info_siblings) {
> + wi->nr_nodes = vinfo->nr_nodes;
> +
> pthread_mutex_lock(&wi->finished_lock);
> list_splice_init(&wi->finished_list, &list);
> pthread_mutex_unlock(&wi->finished_lock);
> @@ -170,6 +170,8 @@ static void bs_thread_request_done(int fd, int events, void *data)
> work->done(work);
> }
> }
> +
> + put_vnode_info(vinfo);
> }
>
> static void *worker_routine(void *arg)
> diff --git a/sheep/work.h b/sheep/work.h
> index 5706a84..eee41be 100644
> --- a/sheep/work.h
> +++ b/sheep/work.h
> @@ -46,6 +46,7 @@ struct worker_info {
> /* we cannot shrink work queue till this time */
> uint64_t tm_end_of_protection;
> enum wq_thread_control tc;
> + size_t nr_nodes;
> };
>
> extern struct list_head worker_info_list;
>
Seems that this patch cause problem. I can't pass tests/036 with corosync driver.
core dump's bt:
(gdb) bt
#0 0x00007ffada064b7b in raise (sig=<optimized out>) at ../nptl/sysdeps/unix/sysv/linux/pt-raise.c:42
#1 0x000000000042530f in reraise_crash_signal (signo=11, status=1) at util.c:414
#2 <signal handler called>
#3 bs_thread_request_done (fd=<optimized out>, events=<optimized out>, data=<optimized out>) at work.c:160
#4 0x0000000000421899 in do_event_loop (sort_with_prio=false, timeout=<optimized out>) at event.c:211
#5 0x0000000000404ebf in main (argc=<optimized out>, argv=<optimized out>) at sheep.c:691
The sheep.log:
Apr 19 16:26:42 [main] queue_request(353) READ_VDIS, 2
Apr 19 16:26:42 [io 21652] do_process_work(1344) 15, 0, 0
Apr 19 16:26:42 [main] crash_handler(181) sheep exits unexpectedly (Segmentation fault).
Apr 19 16:26:42 [main] sd_backtrace(833) sheep.c:183: crash_handler
Apr 19 16:26:42 [main] sd_backtrace(847) /lib/x86_64-linux-gnu/libpthread.so.0(+0xfcaf) [0x7ffada064caf]
Apr 19 16:26:42 [main] sd_backtrace(833) work.c:159: bs_thread_request_done
Apr 19 16:26:43 [main] sd_backtrace(833) event.c:211: do_event_loop
Apr 19 16:26:43 [main] sd_backtrace(833) sheep.c:691: main
Apr 19 16:26:43 [main] sd_backtrace(847) /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xec) [0x7ffad939676c]
Apr 19 16:26:43 [main] sd_backtrace(847) ../sheep/sheep() [0x405618]
Apr 19 16:26:43 [main] __dump_stack_frames(780) #7 0x00000000004235a8 in sd_backtrace () at logger.c:852
Apr 19 16:26:43 [main] __dump_stack_frames(794) 852 dump_stack_frames();
Apr 19 16:26:43 [main] __dump_stack_frames(794) current_sp = 0x7fff128ce8e0
Apr 19 16:26:43 [main] __dump_stack_frames(794) addrs = {0x42344d, 0x405798, 0x7ffada064cb0, 0x40e9a0, 0x421899, 0x404ebf, 0x7ffad939676d, 0x405619, 0x0 <repeats 1016 times>}
Apr 19 16:26:43 [main] __dump_stack_frames(794) i = <optimized out>
Apr 19 16:26:43 [main] __dump_stack_frames(794) n = <optimized out>
Apr 19 16:26:43 [main] __dump_stack_frames(794) __func__ = "sd_backtrace"
Apr 19 16:26:43 [main] __dump_stack_frames(780) #8 0x0000000000405798 in crash_handler (signo=11) at sheep.c:183
Apr 19 16:26:43 [main] __dump_stack_frames(794) 183 sd_backtrace();
Apr 19 16:26:43 [main] __dump_stack_frames(794) __func__ = "crash_handler"
Apr 19 16:26:44 [main] __dump_stack_frames(780) #9 <signal handler called>
Apr 19 16:26:44 [main] __dump_stack_frames(794) No symbol table info available.
Apr 19 16:26:44 [main] __dump_stack_frames(780) #10 bs_thread_request_done (fd=<optimized out>, events=<optimized out>, data=<optimized out>) at work.c:160
Apr 19 16:26:44 [main] __dump_stack_frames(794) 160 wi->nr_nodes = vinfo->nr_nodes;
Apr 19 16:26:44 [main] __dump_stack_frames(794) ret = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) wi = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) work = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) value = 1
Apr 19 16:26:44 [main] __dump_stack_frames(794) list = {next = 0x7fff128f1fe0, prev = 0x7fff128f1fe0}
Apr 19 16:26:44 [main] __dump_stack_frames(794) vinfo = 0x0
Apr 19 16:26:44 [main] __dump_stack_frames(780) #11 0x0000000000421899 in do_event_loop (sort_with_prio=false, timeout=<optimized out>) at event.c:211
Apr 19 16:26:44 [main] __dump_stack_frames(794) 211 ei->handler(ei->fd, events[i].events, ei->data);
Apr 19 16:26:44 [main] __dump_stack_frames(794) ei = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) i = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) nr = 1
Apr 19 16:26:44 [main] __dump_stack_frames(780) #12 0x0000000000404ebf in main (argc=<optimized out>, argv=<optimized out>) at sheep.c:691
Apr 19 16:26:44 [main] __dump_stack_frames(794) 691 event_loop(-1);
Apr 19 16:26:44 [main] __dump_stack_frames(794) ch = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) longindex = 0
Apr 19 16:26:44 [main] __dump_stack_frames(794) ret = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) port = 7000
Apr 19 16:26:44 [main] __dump_stack_frames(794) io_port = 7000
Apr 19 16:26:44 [main] __dump_stack_frames(794) log_level = 7
Apr 19 16:26:44 [main] __dump_stack_frames(794) nr_vnodes = 64
Apr 19 16:26:44 [main] __dump_stack_frames(794) dirp = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) short_options = 0x8d9ee0 "b:c:dDfF:ghi:j:l:nop:P:s:uvw:y:z:"
Apr 19 16:26:44 [main] __dump_stack_frames(794) dir = 0xb67030 "\230\335r\331\372\177"
Apr 19 16:26:44 [main] __dump_stack_frames(794) p = 0x7fff128f4803 ""
Apr 19 16:26:44 [main] __dump_stack_frames(794) pid_file = 0x0
Apr 19 16:26:44 [main] __dump_stack_frames(794) bindaddr = 0x0
Apr 19 16:26:44 [main] __dump_stack_frames(794) path = "/tmp/sheepdog/0/sheep.log", '\000' <repeats 879 times>, "=\214'\332\372\177", '\000' <repeats 75 times>, "P\000\000\000\000\000\000\fI\000\000\000\000\000\000\fI", '\000' <repeats 14 times>, "\005\000\000\000\000\000\000\000\000@ \000\000\000\000\
Apr 19 16:26:44 [main] __dump_stack_frames(794) 000\000` \000\000\000\000\000\220Q \000\000\000\000\000\240Q \000\000\000\000\000\000@\000\000\000\000\000\000\003", '\000' <repeats 63 times>, "J\221'\332\372\177\000\000\000\000\000\000\000\000\000\000\030UG\332\372\177\000\000/\000\000\000\000\000\000\
Apr 19 16:26:44 [main] __dump_stack_frames(794) 000\r\330'\332\372\177\000\000\000\000\000\000\000\000\000\000\020\000\000\000\000\000\000\000\032\000\000\000\000\000\000\000\001", '\000' <repeats 23 times>"\300, '\217"...
Apr 19 16:26:44 [main] __dump_stack_frames(794) argp = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) is_daemon = true
Apr 19 16:26:44 [main] __dump_stack_frames(794) to_stdout = false
Apr 19 16:26:44 [main] __dump_stack_frames(794) explicit_addr = true
Apr 19 16:26:44 [main] __dump_stack_frames(794) zone = 0
Apr 19 16:26:44 [main] __dump_stack_frames(794) free_space = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) cdrv = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) long_options = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) log_format = 0x7fff128f4823 "default"
Apr 19 16:26:44 [main] __dump_stack_frames(794) sheep_info = {port = 7000}
Apr 19 16:26:44 [main] __dump_stack_frames(794) __func__ = "main"
Apr 19 16:26:45 [main] __sd_dump_variable(717) dump __sys
Apr 19 16:26:45 [main] __sd_dump_variable(720) $1 = {cdrv = 0x638200, cdrv_option = 0x0, join_finished = false, this_node = {nid = {addr = '\000' <repeats 12 times>, "\177\000\000\001", port = 7000, io_addr = '\000' <repeats 15 times>, io_port = 0, pad = "\000\000\000"}, nr_vnodes = 64, zone = 0, spac
Apr 19 16:26:45 [main] __sd_dump_variable(725) e = 155314237440}, epoch = 0, status = 2, flags = 0, disk_space = 155314237440, failed_nodes = {next = 0x63a608, prev = 0x63a608}, delayed_nodes = {next = 0x63a618, prev = 0x63a618}, vdi_inuse = {0 <repeats 262144 times>}, nr_copies = 0 '\000', local_req_
Apr 19 16:26:45 [main] __sd_dump_variable(725) efd = 8, local_req_lock = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0}, local_req_queue = {next = 0x83a658, prev = 0x83a658
Apr 19 16:26:45 [main] __sd_dump_variable(725) }, req_wait_queue = {next = 0x83a668, prev = 0x83a668}, nr_outstanding_reqs = 1, gateway_only = false, disable_recovery = false, nosync = false, gateway_wqueue = 0xb68480, io_wqueue = 0xb686d0, deletion_wqueue = 0xb68b70, recovery_wqueue = 0xb68920, recov
Apr 19 16:26:45 [main] __sd_dump_variable(725) ery_notify_wqueue = 0x0, block_wqueue = 0xb68dc0, sockfd_wqueue = 0xb69010, oc_reclaim_wqueue = 0x0, oc_push_wqueue = 0x0, md_wqueue = 0xb69260, enable_object_cache = false, object_cache_size = 0, object_cache_directio = false, use_journal = {val = 0}, ba
Apr 19 16:26:45 [main] __sd_dump_variable(725) ckend_dio = false, upgrade = false}
Thanks,
Yuan
More information about the sheepdog
mailing list