[sheepdog] [PATCH v2 08/11] work: don't call get_node_info in the worker thread

Liu Yuan namei.unix at gmail.com
Fri Apr 19 10:28:23 CEST 2013


On 04/19/2013 03:14 PM, MORITA Kazutaka wrote:
> Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
> ---
>  sheep/work.c | 22 ++++++++++++----------
>  sheep/work.h |  1 +
>  2 files changed, 13 insertions(+), 10 deletions(-)
> 
> diff --git a/sheep/work.c b/sheep/work.c
> index 29e1f86..3dce178 100644
> --- a/sheep/work.c
> +++ b/sheep/work.c
> @@ -55,27 +55,22 @@ static uint64_t get_msec_time(void)
>  	return tv.tv_sec * 1000 + tv.tv_usec / 1000;
>  }
>  
> -static inline uint64_t wq_get_roof(enum wq_thread_control tc)
> +static inline uint64_t wq_get_roof(struct worker_info *wi)
>  {
> -	struct vnode_info *vinfo;
> -	int nr_nodes;
>  	uint64_t nr = 1;
>  
> -	switch (tc) {
> +	switch (wi->tc) {
>  	case WQ_ORDERED:
>  		break;
>  	case WQ_DYNAMIC:
> -		vinfo = get_vnode_info();
> -		nr_nodes = vinfo->nr_nodes;
> -		put_vnode_info(vinfo);
>  		/* FIXME: 2 * nr_nodes threads. No rationale yet. */
> -		nr = nr_nodes * 2;
> +		nr = wi->nr_nodes * 2;
>  		break;
>  	case WQ_UNLIMITED:
>  		nr = SIZE_MAX;
>  		break;
>  	default:
> -		panic("Invalid threads control %d", tc);
> +		panic("Invalid threads control %d", wi->tc);
>  	}
>  	return nr;
>  }
> @@ -83,7 +78,7 @@ static inline uint64_t wq_get_roof(enum wq_thread_control tc)
>  static bool wq_need_grow(struct worker_info *wi)
>  {
>  	if (wi->nr_threads < wi->nr_pending + wi->nr_running &&
> -	    wi->nr_threads * 2 <= wq_get_roof(wi->tc)) {
> +	    wi->nr_threads * 2 <= wq_get_roof(wi)) {
>  		wi->tm_end_of_protection = get_msec_time() +
>  			WQ_PROTECTION_PERIOD;
>  		return true;
> @@ -153,12 +148,17 @@ static void bs_thread_request_done(int fd, int events, void *data)
>  	struct work *work;
>  	eventfd_t value;
>  	LIST_HEAD(list);
> +	struct vnode_info *vinfo;
>  
>  	ret = eventfd_read(fd, &value);
>  	if (ret < 0)
>  		return;
>  
> +	vinfo = get_vnode_info();
> +
>  	list_for_each_entry(wi, &worker_info_list, worker_info_siblings) {
> +		wi->nr_nodes = vinfo->nr_nodes;
> +
>  		pthread_mutex_lock(&wi->finished_lock);
>  		list_splice_init(&wi->finished_list, &list);
>  		pthread_mutex_unlock(&wi->finished_lock);
> @@ -170,6 +170,8 @@ static void bs_thread_request_done(int fd, int events, void *data)
>  			work->done(work);
>  		}
>  	}
> +
> +	put_vnode_info(vinfo);
>  }
>  
>  static void *worker_routine(void *arg)
> diff --git a/sheep/work.h b/sheep/work.h
> index 5706a84..eee41be 100644
> --- a/sheep/work.h
> +++ b/sheep/work.h
> @@ -46,6 +46,7 @@ struct worker_info {
>  	/* we cannot shrink work queue till this time */
>  	uint64_t tm_end_of_protection;
>  	enum wq_thread_control tc;
> +	size_t nr_nodes;
>  };
>  
>  extern struct list_head worker_info_list;
> 

Seems that this patch cause problem. I can't pass tests/036 with corosync driver.

core dump's bt:
(gdb) bt
#0  0x00007ffada064b7b in raise (sig=<optimized out>) at ../nptl/sysdeps/unix/sysv/linux/pt-raise.c:42
#1  0x000000000042530f in reraise_crash_signal (signo=11, status=1) at util.c:414
#2  <signal handler called>
#3  bs_thread_request_done (fd=<optimized out>, events=<optimized out>, data=<optimized out>) at work.c:160
#4  0x0000000000421899 in do_event_loop (sort_with_prio=false, timeout=<optimized out>) at event.c:211
#5  0x0000000000404ebf in main (argc=<optimized out>, argv=<optimized out>) at sheep.c:691


The sheep.log:
Apr 19 16:26:42 [main] queue_request(353) READ_VDIS, 2
Apr 19 16:26:42 [io 21652] do_process_work(1344) 15, 0, 0
Apr 19 16:26:42 [main] crash_handler(181) sheep exits unexpectedly (Segmentation fault).
Apr 19 16:26:42 [main] sd_backtrace(833) sheep.c:183: crash_handler
Apr 19 16:26:42 [main] sd_backtrace(847) /lib/x86_64-linux-gnu/libpthread.so.0(+0xfcaf) [0x7ffada064caf]
Apr 19 16:26:42 [main] sd_backtrace(833) work.c:159: bs_thread_request_done
Apr 19 16:26:43 [main] sd_backtrace(833) event.c:211: do_event_loop
Apr 19 16:26:43 [main] sd_backtrace(833) sheep.c:691: main
Apr 19 16:26:43 [main] sd_backtrace(847) /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xec) [0x7ffad939676c]
Apr 19 16:26:43 [main] sd_backtrace(847) ../sheep/sheep() [0x405618]
Apr 19 16:26:43 [main] __dump_stack_frames(780) #7  0x00000000004235a8 in sd_backtrace () at logger.c:852
Apr 19 16:26:43 [main] __dump_stack_frames(794) 852		dump_stack_frames();
Apr 19 16:26:43 [main] __dump_stack_frames(794) current_sp = 0x7fff128ce8e0
Apr 19 16:26:43 [main] __dump_stack_frames(794) addrs = {0x42344d, 0x405798, 0x7ffada064cb0, 0x40e9a0, 0x421899, 0x404ebf, 0x7ffad939676d, 0x405619, 0x0 <repeats 1016 times>}
Apr 19 16:26:43 [main] __dump_stack_frames(794) i = <optimized out>
Apr 19 16:26:43 [main] __dump_stack_frames(794) n = <optimized out>
Apr 19 16:26:43 [main] __dump_stack_frames(794) __func__ = "sd_backtrace"
Apr 19 16:26:43 [main] __dump_stack_frames(780) #8  0x0000000000405798 in crash_handler (signo=11) at sheep.c:183
Apr 19 16:26:43 [main] __dump_stack_frames(794) 183		sd_backtrace();
Apr 19 16:26:43 [main] __dump_stack_frames(794) __func__ = "crash_handler"
Apr 19 16:26:44 [main] __dump_stack_frames(780) #9  <signal handler called>
Apr 19 16:26:44 [main] __dump_stack_frames(794) No symbol table info available.
Apr 19 16:26:44 [main] __dump_stack_frames(780) #10 bs_thread_request_done (fd=<optimized out>, events=<optimized out>, data=<optimized out>) at work.c:160
Apr 19 16:26:44 [main] __dump_stack_frames(794) 160			wi->nr_nodes = vinfo->nr_nodes;
Apr 19 16:26:44 [main] __dump_stack_frames(794) ret = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) wi = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) work = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) value = 1
Apr 19 16:26:44 [main] __dump_stack_frames(794) list = {next = 0x7fff128f1fe0, prev = 0x7fff128f1fe0}
Apr 19 16:26:44 [main] __dump_stack_frames(794) vinfo = 0x0
Apr 19 16:26:44 [main] __dump_stack_frames(780) #11 0x0000000000421899 in do_event_loop (sort_with_prio=false, timeout=<optimized out>) at event.c:211
Apr 19 16:26:44 [main] __dump_stack_frames(794) 211				ei->handler(ei->fd, events[i].events, ei->data);
Apr 19 16:26:44 [main] __dump_stack_frames(794) ei = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) i = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) nr = 1
Apr 19 16:26:44 [main] __dump_stack_frames(780) #12 0x0000000000404ebf in main (argc=<optimized out>, argv=<optimized out>) at sheep.c:691
Apr 19 16:26:44 [main] __dump_stack_frames(794) 691			event_loop(-1);
Apr 19 16:26:44 [main] __dump_stack_frames(794) ch = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) longindex = 0
Apr 19 16:26:44 [main] __dump_stack_frames(794) ret = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) port = 7000
Apr 19 16:26:44 [main] __dump_stack_frames(794) io_port = 7000
Apr 19 16:26:44 [main] __dump_stack_frames(794) log_level = 7
Apr 19 16:26:44 [main] __dump_stack_frames(794) nr_vnodes = 64
Apr 19 16:26:44 [main] __dump_stack_frames(794) dirp = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) short_options = 0x8d9ee0 "b:c:dDfF:ghi:j:l:nop:P:s:uvw:y:z:"
Apr 19 16:26:44 [main] __dump_stack_frames(794) dir = 0xb67030 "\230\335r\331\372\177"
Apr 19 16:26:44 [main] __dump_stack_frames(794) p = 0x7fff128f4803 ""
Apr 19 16:26:44 [main] __dump_stack_frames(794) pid_file = 0x0
Apr 19 16:26:44 [main] __dump_stack_frames(794) bindaddr = 0x0
Apr 19 16:26:44 [main] __dump_stack_frames(794) path = "/tmp/sheepdog/0/sheep.log", '\000' <repeats 879 times>, "=\214'\332\372\177", '\000' <repeats 75 times>, "P\000\000\000\000\000\000\fI\000\000\000\000\000\000\fI", '\000' <repeats 14 times>, "\005\000\000\000\000\000\000\000\000@ \000\000\000\000\
Apr 19 16:26:44 [main] __dump_stack_frames(794) 000\000` \000\000\000\000\000\220Q \000\000\000\000\000\240Q \000\000\000\000\000\000@\000\000\000\000\000\000\003", '\000' <repeats 63 times>, "J\221'\332\372\177\000\000\000\000\000\000\000\000\000\000\030UG\332\372\177\000\000/\000\000\000\000\000\000\
Apr 19 16:26:44 [main] __dump_stack_frames(794) 000\r\330'\332\372\177\000\000\000\000\000\000\000\000\000\000\020\000\000\000\000\000\000\000\032\000\000\000\000\000\000\000\001", '\000' <repeats 23 times>"\300, '\217"...
Apr 19 16:26:44 [main] __dump_stack_frames(794) argp = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) is_daemon = true
Apr 19 16:26:44 [main] __dump_stack_frames(794) to_stdout = false
Apr 19 16:26:44 [main] __dump_stack_frames(794) explicit_addr = true
Apr 19 16:26:44 [main] __dump_stack_frames(794) zone = 0
Apr 19 16:26:44 [main] __dump_stack_frames(794) free_space = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) cdrv = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) long_options = <optimized out>
Apr 19 16:26:44 [main] __dump_stack_frames(794) log_format = 0x7fff128f4823 "default"
Apr 19 16:26:44 [main] __dump_stack_frames(794) sheep_info = {port = 7000}
Apr 19 16:26:44 [main] __dump_stack_frames(794) __func__ = "main"
Apr 19 16:26:45 [main] __sd_dump_variable(717) dump __sys
Apr 19 16:26:45 [main] __sd_dump_variable(720) $1 = {cdrv = 0x638200, cdrv_option = 0x0, join_finished = false, this_node = {nid = {addr = '\000' <repeats 12 times>, "\177\000\000\001", port = 7000, io_addr = '\000' <repeats 15 times>, io_port = 0, pad = "\000\000\000"}, nr_vnodes = 64, zone = 0, spac
Apr 19 16:26:45 [main] __sd_dump_variable(725) e = 155314237440}, epoch = 0, status = 2, flags = 0, disk_space = 155314237440, failed_nodes = {next = 0x63a608, prev = 0x63a608}, delayed_nodes = {next = 0x63a618, prev = 0x63a618}, vdi_inuse = {0 <repeats 262144 times>}, nr_copies = 0 '\000', local_req_
Apr 19 16:26:45 [main] __sd_dump_variable(725) efd = 8, local_req_lock = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0}, local_req_queue = {next = 0x83a658, prev = 0x83a658
Apr 19 16:26:45 [main] __sd_dump_variable(725) }, req_wait_queue = {next = 0x83a668, prev = 0x83a668}, nr_outstanding_reqs = 1, gateway_only = false, disable_recovery = false, nosync = false, gateway_wqueue = 0xb68480, io_wqueue = 0xb686d0, deletion_wqueue = 0xb68b70, recovery_wqueue = 0xb68920, recov
Apr 19 16:26:45 [main] __sd_dump_variable(725) ery_notify_wqueue = 0x0, block_wqueue = 0xb68dc0, sockfd_wqueue = 0xb69010, oc_reclaim_wqueue = 0x0, oc_push_wqueue = 0x0, md_wqueue = 0xb69260, enable_object_cache = false, object_cache_size = 0, object_cache_directio = false, use_journal = {val = 0}, ba
Apr 19 16:26:45 [main] __sd_dump_variable(725) ckend_dio = false, upgrade = false}

Thanks,
Yuan



More information about the sheepdog mailing list