[sheepdog] [PATCH 0/4] show more debug info when program terminates unexpectedly

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Wed Feb 27 18:15:34 CET 2013


This patch dumps function arguments and local variabls for each stack,
and cluster info with GDB when the program terminates unexpectedly.

Example:

If the following diff is added,
==
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -719,6 +719,7 @@ int main(int argc, char **argv)
 	check_host_env();
 	sd_printf(SDOG_NOTICE, "sheepdog daemon (version %s) started",
 		  PACKAGE_VERSION);
+	abort();
 
 	while (sys->nr_outstanding_reqs != 0 ||
 	       (sys->status != SD_STATUS_KILLED &&

==

we can get the following log output.

Feb 28 00:58:39 [main] send_join_request(1040) IPv4 ip:127.0.0.1 port:7000
Feb 28 00:58:39 [main] check_host_env(422) WARN: Allowed open files 1024 too small, suggested 1024000
Feb 28 00:58:39 [main] main(721) sheepdog daemon (version 0.5.5_134_gb73c4ad) started
Feb 28 00:58:39 [main] crash_handler(181) sheep exits unexpectedly (Aborted).
Feb 28 00:58:39 [main] sd_backtrace(776) sheep.c:183: crash_handler
Feb 28 00:58:39 [main] sd_backtrace(788) /lib/libpthread.so.0(+0xefef) [0x7fbe6b6d4fef]
Feb 28 00:58:39 [main] sd_backtrace(788) /lib/libc.so.6(gsignal+0x34) [0x7fbe6ad0b1b4]
Feb 28 00:58:39 [main] sd_backtrace(788) /lib/libc.so.6(abort+0x17f) [0x7fbe6ad0dfbf]
Feb 28 00:58:39 [main] sd_backtrace(776) sheep.c:722: main
Feb 28 00:58:39 [main] sd_backtrace(788) /lib/libc.so.6(__libc_start_main+0xfc) [0x7fbe6acf7c8c]
Feb 28 00:58:39 [main] sd_backtrace(788) ./sheep/sheep() [0x404028]
Feb 28 00:58:39 [main] __dump_stack_frames(734) #6  0x000000000042601c in sd_backtrace () at logger.c:793
Feb 28 00:58:39 [main] __dump_stack_frames(739) 793		dump_stack_frames();
Feb 28 00:58:39 [main] __dump_stack_frames(739) current_sp = 0x7fff74fe97a0
Feb 28 00:58:39 [main] __dump_stack_frames(739) addrs = {0x425e97, 0x4046ca, 0x7fbe6b6d4ff0, 0x7fbe6ad0b1b5, 0x7fbe6ad0dfc0, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x405d06, 0x7fbe6acf7c8d, 0x404029, 0x0 <repeats 439 times>, 0x7fbe6ad49566, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x7fff7500bd90, 0x7fff7500bd80, 0x21, 0x7fff7500c7f0, 0x7fff7500bd90, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x7fbe6ad1ae19, 0x0, 0x2, 0x42c7fe, 0x42c7fe, 0x0, 0x0, 0x0, 0x0, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x7fff7500bc70, 0x0, 0x42c7e0, 0x0, 0x0, 0x7fff7500bcdc, 0x7fff7500bd18, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x7fff7500bcf8, 0x0, 0x0, 0x7fff7500b860, 0x0, 0x0, 0x0, 0x7fff7500bcd0, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0xffffffffffffffff, 0x0, 0x7fff7500bc48, 0x0, 0x0, 0xffffe93800000000, 0x20, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffffffff, 0x0, 0x73, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x7fff7500bc48, 0x0, 0x42c7fa, 0x0, 0x100000000, 0x0 <repeats 130 times>, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x3000000020, 0x7fff7500c8e0, 0x7fff7500c810, 0x0 <repeats 16 times>, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x7fbe6ad49566, 0x7fff7500c3f0, 0x7fff7500c3e0, 0x1b, 0x7fff7500ce50, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x7fff7500c3f0, 0x7fbe6ad1ae19, 0x0, 0x7fff7500bd90, 0x7fff7500c390, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x7fff7500be80, 0x42c7e0, 0x7fff7500c7f0, 0x3ff, 0x0, 0x7fff7500c390, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x7fbe6ad40732, 0xfbad8001, 0x7fff7500c390, 0x7fff7500c390, 0x7fff7500c390, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x7fff7500c390, 0x7fff7500c3b3, 0x7fff7500c78f, 0x7fff7500c390, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x7fff7500c78f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x42ca08, 0x0, 0x7fff75000000, 0x0, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x42cca0, 0x0, 0x0, 0x0, 0x7fff7500c408, 0x7fff7500c3e8, 0xffffffff, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x7500c31100000000, 0x7fff7500bf50, 0x7fbe6b033fc0, 0x0, 0x42cf14, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x207500c3c0, 0x7fbe68467193, 0x0, 0x7fbe684671a0, 0x7fff7500bee0, 0x424ca3, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x0, 0xb5750af8fd, 0x42cf06, 0x7500c7c0, 0x7fff7500c330, 0x7fbe6846717f, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x7fbe684671cf, 0x424f03, 0x7fff7500c311, 0x7fff7500c7f0, 0x42c7e0, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0xb500000000, 0x42cf06, 0x42ca3b, 0x73, 0x7fff7500c338, 0x0, 0x42ccb9, 0x0, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x100000000, 0x0 <repeats 99 times>, 0x7fbe6b8eb00a, 0x0, 0x7fff7500c300, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x0, 0x7fff7500c300, 0x7fbe6bade768, 0xb, 0x105942c9, 0x7fbe6b8eb82c, 0x0, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x0, 0x9, 0x41650b, 0x7fbe6acdca60, 0x7fff7500c450, 0x7fbe6acdcc28, 
Feb 28 00:58:39 [main] __dump_stack_frames(739)   0x7fbe6ace8bc8, 0x0, 0x0, 0x0, 0x7fbe6bade7c8...}
Feb 28 00:58:39 [main] __dump_stack_frames(739) i = 8
Feb 28 00:58:39 [main] __dump_stack_frames(739) n = 8
Feb 28 00:58:39 [main] __dump_stack_frames(739) __func__ = "sd_backtrace"
Feb 28 00:58:39 [main] __dump_stack_frames(734) #7  0x00000000004046ca in crash_handler (signo=6) at sheep.c:183
Feb 28 00:58:39 [main] __dump_stack_frames(739) 183		sd_backtrace();
Feb 28 00:58:39 [main] __dump_stack_frames(739) __func__ = "crash_handler"
Feb 28 00:58:40 [main] __dump_stack_frames(734) #8  <signal handler called>
Feb 28 00:58:40 [main] __dump_stack_frames(739) No symbol table info available.
Feb 28 00:58:40 [main] __dump_stack_frames(734) #9  0x00007fbe6ad0b1b5 in *__GI_raise (sig=<value optimized out>)
Feb 28 00:58:40 [main] __dump_stack_frames(739)     at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
Feb 28 00:58:40 [main] __dump_stack_frames(739) 	in ../nptl/sysdeps/unix/sysv/linux/raise.c
Feb 28 00:58:40 [main] __dump_stack_frames(739) pid = <value optimized out>
Feb 28 00:58:40 [main] __dump_stack_frames(739) selftid = <value optimized out>
Feb 28 00:58:40 [main] __dump_stack_frames(734) #10 0x00007fbe6ad0dfc0 in *__GI_abort () at abort.c:92
Feb 28 00:58:40 [main] __dump_stack_frames(739) 	in abort.c
Feb 28 00:58:40 [main] __dump_stack_frames(739) act = {__sigaction_handler = {sa_handler = 0x42ca08, sa_sigaction = 0x42ca08}, 
Feb 28 00:58:40 [main] __dump_stack_frames(739)   sa_mask = {__val = {4378240, 140735156375488, 4346376, 4377760, 
Feb 28 00:58:40 [main] __dump_stack_frames(739)       3098634407856, 140455812397832, 140455827081608, 206158430248, 
Feb 28 00:58:40 [main] __dump_stack_frames(739)       4294967295, 140735156375296, 5, 6529768, 0, 0, 0, 0}}, 
Feb 28 00:58:40 [main] __dump_stack_frames(739)   sa_flags = 1804531474, sa_restorer = 0x7fff00000005}
Feb 28 00:58:40 [main] __dump_stack_frames(739) sigs = {__val = {32, 0 <repeats 15 times>}}
Feb 28 00:58:40 [main] __dump_stack_frames(734) #11 0x0000000000405d06 in main (argc=2, argv=0x7fff7500e178) at sheep.c:722
Feb 28 00:58:40 [main] __dump_stack_frames(739) 722		abort();
Feb 28 00:58:40 [main] __dump_stack_frames(739) ch = -1
Feb 28 00:58:40 [main] __dump_stack_frames(739) longindex = 6531104
Feb 28 00:58:40 [main] __dump_stack_frames(739) ret = 0
Feb 28 00:58:40 [main] __dump_stack_frames(739) port = 7000
Feb 28 00:58:40 [main] __dump_stack_frames(739) io_port = 7000
Feb 28 00:58:40 [main] __dump_stack_frames(739) log_level = 6
Feb 28 00:58:40 [main] __dump_stack_frames(739) nr_vnodes = 64
Feb 28 00:58:40 [main] __dump_stack_frames(739) dirp = 0x7fff7500f1c0 "/store/0/"
Feb 28 00:58:40 [main] __dump_stack_frames(739) short_options = 0x864a40 "b:c:dDfF:ghi:j:l:nop:P:s:uvw:y:z:"
Feb 28 00:58:40 [main] __dump_stack_frames(739) dir = 0x2404010 ""
Feb 28 00:58:40 [main] __dump_stack_frames(739) p = 0x7fff7500e010 "\006"
Feb 28 00:58:40 [main] __dump_stack_frames(739) pid_file = 0x0
Feb 28 00:58:40 [main] __dump_stack_frames(739) bindaddr = 0x0
Feb 28 00:58:40 [main] __dump_stack_frames(739) path = "/data2/store/0/sheep.log", '\000' <repeats 32 times>, "\035\006\217k\276\177\000\000\000\000\000\000\000\000\000\000=d\255j\276\177\000\000\000\000\000\000\000\000\000\000\"\227\216k\276\177", '\000' <repeats 42 times>"\217, \204\216k\276\177\000\
Feb 28 00:58:40 [main] __dump_stack_frames(739) 000\000\000\000\000\000\000\000\000U\272\003k\276\177\000\000\000\000\000\000\001", '\000' <repeats 19 times>"\217, \204\216k\276\177", '\000' <repeats 27 times>, "p\000\000\000\000\000\000\260i\000\000\000\000\000\000\260i", '\000' <repeats 14 times>, "\
Feb 28 00:58:40 [main] __dump_stack_frames(739) 005\000\000\000\000\000\000\000\000` \000\000\000\000\000\000\200 \000\000\000\000\000lr \000\000\000\000\000\320{ \000\000\000\000\000\000`\000\000\000\000\000\000\003\000\000\000\000\000\000\000\000"...
Feb 28 00:58:40 [main] __dump_stack_frames(739) is_daemon = true
Feb 28 00:58:40 [main] __dump_stack_frames(739) to_stdout = false
Feb 28 00:58:40 [main] __dump_stack_frames(739) explicit_addr = false
Feb 28 00:58:40 [main] __dump_stack_frames(739) zone = -1
Feb 28 00:58:40 [main] __dump_stack_frames(739) free_space = 0
Feb 28 00:58:40 [main] __dump_stack_frames(739) cdrv = 0x42c035
Feb 28 00:58:40 [main] __dump_stack_frames(739) long_options = 0x862a20
Feb 28 00:58:40 [main] __dump_stack_frames(739) log_format = 0x42cab5 "default"
Feb 28 00:58:40 [main] __dump_stack_frames(739) sheep_info = {port = 7000}
Feb 28 00:58:40 [main] __dump_stack_frames(739) __func__ = "main"
Feb 28 00:58:40 [main] __sd_dump_variable(675) dump __sys
Feb 28 00:58:40 [main] __sd_dump_variable(678) $1 = {cdrv = 0x63c6e0, cdrv_option = 0x0, join_finished = false, this_node = {
Feb 28 00:58:40 [main] __sd_dump_variable(683)     nid = {addr = '\000' <repeats 12 times>, "\177\000\000\001", port = 7000, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)       io_addr = '\000' <repeats 15 times>, io_port = 0, pad = "\000\000\000"}, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)     nr_vnodes = 64, zone = 16777343, space = 981442125824}, epoch = 0, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)   status = 2, flags = 0, disk_space = 981442125824, failed_nodes = {
Feb 28 00:58:40 [main] __sd_dump_variable(683)     next = 0x63c948, prev = 0x63c948}, delayed_nodes = {next = 0x63c958, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)     prev = 0x63c958}, pending_block_list = {next = 0x63c968, prev = 0x63c968}, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)   pending_notify_list = {next = 0x63c978, prev = 0x63c978}, vdi_inuse = {
Feb 28 00:58:40 [main] __sd_dump_variable(683)     0 <repeats 262144 times>}, nr_copies = 0 '\000', req_efd = 9, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)   wait_req_lock = {__data = {__lock = 0, __count = 0, __owner = 0, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)       __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)         __next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0}, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)   wait_req_queue = {next = 0x83c9b8, prev = 0x83c9b8}, wait_rw_queue = {
Feb 28 00:58:40 [main] __sd_dump_variable(683)     next = 0x83c9c8, prev = 0x83c9c8}, wait_obj_queue = {next = 0x83c9d8, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)     prev = 0x83c9d8}, nr_outstanding_reqs = 0, recovered_epoch = 0, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)   gateway_only = false, disable_recovery = false, nosync = false, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)   gateway_wqueue = 0x2406520, io_wqueue = 0x2406620, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)   deletion_wqueue = 0x2406820, recovery_wqueue = 0x2406720, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)   recovery_notify_wqueue = 0x0, block_wqueue = 0x2406a50, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)   sockfd_wqueue = 0x2406c80, oc_reclaim_wqueue = 0x0, oc_push_wqueue = 0x0, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)   enabled_cache_type = 0, object_cache_size = 0, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)   object_cache_directio = false, use_journal = {val = 0}, backend_dio = false, 
Feb 28 00:58:40 [main] __sd_dump_variable(683)   upgrade = false}
Feb 28 00:58:40 [main] crash_handler(457) sheep pid 9285 exited unexpectedly.


MORITA Kazutaka (4):
  logger: use ARG_MAX for limit of command line length
  logger: prohibit inlining sd_backtrace
  logger: dump stack frames when showing backtrace
  sheep: dump cluster info when exiting unexpectedly

 include/logger.h |   6 +++
 lib/logger.c     | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
 sheep/sheep.c    |   1 +
 3 files changed, 143 insertions(+), 3 deletions(-)

-- 
1.8.1.3.566.gaa39828




More information about the sheepdog mailing list