[sheepdog] [PATCH 2/2] trace: fix stack before dumping a core file
Liu Yuan
namei.unix at gmail.com
Sat Aug 31 14:00:38 CEST 2013
On Fri, Aug 30, 2013 at 11:14:53PM +0900, MORITA Kazutaka wrote:
> From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
>
> Our tracer modifies a stack to inject trace_function_exit(), and it
> breaks a backtrace result of GDB. This fixes the stack data in
> crash_handler(), and makes it easy to debug with a dumped core file.
>
> Even when you attach GDB to a running sheep, you can also fix a stack
> information by calling __trace_cleanup() from the GDB console like as
> follows.
>
> (gdb) call __trace_cleanup()
>
This information is hard to newbies to find it after it is buried deep in the
commit log. How about adding a new newbie oriented doc/trace.txt?
Thanks
Yuan
>
> Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
> ---
> include/work.h | 1 +
> lib/work.c | 14 ++++++++++++++
> sheep/sheep.c | 2 ++
> sheep/trace/trace.c | 28 ++++++++++++++++++++++++++++
> sheep/trace/trace.h | 2 ++
> 5 files changed, 47 insertions(+)
>
> diff --git a/include/work.h b/include/work.h
> index a5808b5..322b930 100644
> --- a/include/work.h
> +++ b/include/work.h
> @@ -63,6 +63,7 @@ struct work_queue *create_work_queue(const char *name, enum wq_thread_control);
> struct work_queue *create_ordered_work_queue(const char *name);
> void suspend_worker_threads(void);
> void resume_worker_threads(void);
> +void call_on_worker_threads(void (*func)(void));
> void queue_work(struct work_queue *q, struct work *work);
> bool work_queue_empty(struct work_queue *q);
>
> diff --git a/lib/work.c b/lib/work.c
> index f19e315..ec626e8 100644
> --- a/lib/work.c
> +++ b/lib/work.c
> @@ -39,6 +39,7 @@ static size_t tid_max;
> static unsigned long *tid_map;
> static int resume_efd;
> static int ack_efd;
> +static void (*suspend_callback)(void);
>
> /*
> * The protection period from shrinking work queue. This is necessary
> @@ -203,6 +204,16 @@ void resume_worker_threads(void)
> }
> }
>
> +/* suspend all the threads, call 'func', and resume the threads. */
> +void call_on_worker_threads(void (*func)(void))
> +{
> + suspend_callback = func;
> + suspend_worker_threads();
> + func();
> + suspend_callback = NULL;
> + resume_worker_threads();
> +}
> +
> void queue_work(struct work_queue *q, struct work *work)
> {
> struct worker_info *wi = container_of(q, struct worker_info, q);
> @@ -312,6 +323,9 @@ static void suspend(int num)
> {
> int uninitialized_var(value);
>
> + if (suspend_callback)
> + suspend_callback();
> +
> eventfd_xwrite(ack_efd, 1); /* ack of suspend */
> value = eventfd_xread(resume_efd);
> assert(value == 1);
> diff --git a/sheep/sheep.c b/sheep/sheep.c
> index 5856ba2..db131c6 100644
> --- a/sheep/sheep.c
> +++ b/sheep/sheep.c
> @@ -249,6 +249,8 @@ static void crash_handler(int signo)
> {
> sd_emerg("sheep exits unexpectedly (%s).", strsignal(signo));
>
> + trace_cleanup();
> +
> sd_backtrace();
> sd_dump_variable(__sys);
>
> diff --git a/sheep/trace/trace.c b/sheep/trace/trace.c
> index 1d3007b..746e7c6 100644
> --- a/sheep/trace/trace.c
> +++ b/sheep/trace/trace.c
> @@ -23,6 +23,7 @@ static __thread int ret_stack_index;
> static __thread struct {
> const struct caller *caller;
> unsigned long ret;
> + unsigned long *retp;
This struct become complex time by time, I think it is good time to add comment
for each field along with this patch.
> } trace_ret_stack[SD_MAX_STACK_DEPTH];
>
> static struct caller *callers;
> @@ -121,6 +122,7 @@ void trace_function_enter(unsigned long ip, unsigned long *ret_addr)
>
> trace_ret_stack[ret_stack_index].caller = caller;
> trace_ret_stack[ret_stack_index].ret = *ret_addr;
> + trace_ret_stack[ret_stack_index].retp = ret_addr;
> ret_stack_index++;
> *ret_addr = (unsigned long)trace_return_caller;
>
> @@ -420,3 +422,29 @@ int trace_init(void)
> sd_info("trace support enabled. cpu count %d.", nr_cpu);
> return 0;
> }
> +
> +static void __trace_cleanup(void)
If this function is supposed to be called by users, I think we need a more
meaningful name without any '_'.
How about trace_repair_stack?
> +{
> + if (in_trace) {
> + /*
> + * We cannot update trace_ret_stack if trace_function_enter() or
> + * trace_function_exit() is using it.
> + */
> + sd_info("failed to clean up trace stack");
> + return;
> + }
> +
> + for (int i = 0; i < ret_stack_index; i++)
> + *trace_ret_stack[i].retp = trace_ret_stack[i].ret;
> + ret_stack_index = 0;
> +}
> +
> +void trace_cleanup(void)
trace_repair looks better.
Thanks
Yuan
More information about the sheepdog
mailing list