[sheepdog] [PATCH 2/2] trace: fix stack before dumping a core file

Liu Yuan namei.unix at gmail.com
Sat Aug 31 14:00:38 CEST 2013


On Fri, Aug 30, 2013 at 11:14:53PM +0900, MORITA Kazutaka wrote:
> From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
> 
> Our tracer modifies a stack to inject trace_function_exit(), and it
> breaks a backtrace result of GDB.  This fixes the stack data in
> crash_handler(), and makes it easy to debug with a dumped core file.
> 
> Even when you attach GDB to a running sheep, you can also fix a stack
> information by calling __trace_cleanup() from the GDB console like as
> follows.
> 
>  (gdb) call __trace_cleanup()
> 

This information is hard to newbies to find it after it is buried deep in the
commit log. How about adding a new newbie oriented doc/trace.txt?

Thanks
Yuan

>
> Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
> ---
>  include/work.h      |    1 +
>  lib/work.c          |   14 ++++++++++++++
>  sheep/sheep.c       |    2 ++
>  sheep/trace/trace.c |   28 ++++++++++++++++++++++++++++
>  sheep/trace/trace.h |    2 ++
>  5 files changed, 47 insertions(+)
> 
> diff --git a/include/work.h b/include/work.h
> index a5808b5..322b930 100644
> --- a/include/work.h
> +++ b/include/work.h
> @@ -63,6 +63,7 @@ struct work_queue *create_work_queue(const char *name, enum wq_thread_control);
>  struct work_queue *create_ordered_work_queue(const char *name);
>  void suspend_worker_threads(void);
>  void resume_worker_threads(void);
> +void call_on_worker_threads(void (*func)(void));
>  void queue_work(struct work_queue *q, struct work *work);
>  bool work_queue_empty(struct work_queue *q);
>  
> diff --git a/lib/work.c b/lib/work.c
> index f19e315..ec626e8 100644
> --- a/lib/work.c
> +++ b/lib/work.c
> @@ -39,6 +39,7 @@ static size_t tid_max;
>  static unsigned long *tid_map;
>  static int resume_efd;
>  static int ack_efd;
> +static void (*suspend_callback)(void);
>  
>  /*
>   * The protection period from shrinking work queue.  This is necessary
> @@ -203,6 +204,16 @@ void resume_worker_threads(void)
>  	}
>  }
>  
> +/* suspend all the threads, call 'func', and resume the threads. */
> +void call_on_worker_threads(void (*func)(void))
> +{
> +	suspend_callback = func;
> +	suspend_worker_threads();
> +	func();
> +	suspend_callback = NULL;
> +	resume_worker_threads();
> +}
> +
>  void queue_work(struct work_queue *q, struct work *work)
>  {
>  	struct worker_info *wi = container_of(q, struct worker_info, q);
> @@ -312,6 +323,9 @@ static void suspend(int num)
>  {
>  	int uninitialized_var(value);
>  
> +	if (suspend_callback)
> +		suspend_callback();
> +
>  	eventfd_xwrite(ack_efd, 1); /* ack of suspend */
>  	value = eventfd_xread(resume_efd);
>  	assert(value == 1);
> diff --git a/sheep/sheep.c b/sheep/sheep.c
> index 5856ba2..db131c6 100644
> --- a/sheep/sheep.c
> +++ b/sheep/sheep.c
> @@ -249,6 +249,8 @@ static void crash_handler(int signo)
>  {
>  	sd_emerg("sheep exits unexpectedly (%s).", strsignal(signo));
>  
> +	trace_cleanup();
> +
>  	sd_backtrace();
>  	sd_dump_variable(__sys);
>  
> diff --git a/sheep/trace/trace.c b/sheep/trace/trace.c
> index 1d3007b..746e7c6 100644
> --- a/sheep/trace/trace.c
> +++ b/sheep/trace/trace.c
> @@ -23,6 +23,7 @@ static __thread int ret_stack_index;
>  static __thread struct {
>  	const struct caller *caller;
>  	unsigned long ret;
> +	unsigned long *retp;

This struct become complex time by time, I think it is good time to add comment
for each field along with this patch.

>  } trace_ret_stack[SD_MAX_STACK_DEPTH];
>  
>  static struct caller *callers;
> @@ -121,6 +122,7 @@ void trace_function_enter(unsigned long ip, unsigned long *ret_addr)
>  
>  	trace_ret_stack[ret_stack_index].caller = caller;
>  	trace_ret_stack[ret_stack_index].ret = *ret_addr;
> +	trace_ret_stack[ret_stack_index].retp = ret_addr;
>  	ret_stack_index++;
>  	*ret_addr = (unsigned long)trace_return_caller;
>  
> @@ -420,3 +422,29 @@ int trace_init(void)
>  	sd_info("trace support enabled. cpu count %d.", nr_cpu);
>  	return 0;
>  }
> +
> +static void __trace_cleanup(void)

If this function is supposed to be called by users, I think we need a more
meaningful name without any '_'.

How about trace_repair_stack?

> +{
> +	if (in_trace) {
> +		/*
> +		 * We cannot update trace_ret_stack if trace_function_enter() or
> +		 * trace_function_exit() is using it.
> +		 */
> +		sd_info("failed to clean up trace stack");
> +		return;
> +	}
> +
> +	for (int i = 0; i < ret_stack_index; i++)
> +		*trace_ret_stack[i].retp = trace_ret_stack[i].ret;
> +	ret_stack_index = 0;
> +}
> +
> +void trace_cleanup(void)

trace_repair looks better.

Thanks
Yuan



More information about the sheepdog mailing list