[sheepdog] [PATCH 02/10] trace: create a caller list in tracer_init()

Liu Yuan namei.unix at gmail.com
Wed Aug 7 19:25:28 CEST 2013


On Thu, Aug 08, 2013 at 12:49:00AM +0900, MORITA Kazutaka wrote:
> From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
> 
> In tracer_enable(), we replace all the mcount() calls in the caller
> list with trace_caller().  However, we create the caller list
> dynamically; we update the list when sheep calls a new function which
> is not in the list for the first time.  This means that we cannot
> trace the functions which were not called before we call
> tracer_enable().
> 
> To include all the function into the tracer targets, this patch
> creates a complete caller list just after sheep starts up.  This
> achieves it with BFD (a part of binutils-dev) and makes the current
> tracer code much simpler.
> 
> This can also remove trace.ld, and allows us to see a line number when
> debugging with GDB.
> 
> Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
> ---
>  configure.ac         |   14 ++--
>  sheep/Makefile.am    |    2 +-
>  sheep/trace/graph.c  |    6 +-
>  sheep/trace/mcount.S |   28 -------
>  sheep/trace/stabs.c  |  210 -----------------------------------------------
>  sheep/trace/trace.c  |  222 +++++++++++++++++++++++++++++---------------------
>  sheep/trace/trace.h  |   19 +----
>  sheep/trace/trace.ld |   18 ----
>  8 files changed, 142 insertions(+), 377 deletions(-)
>  delete mode 100644 sheep/trace/stabs.c
>  delete mode 100644 sheep/trace/trace.ld
> 
> diff --git a/configure.ac b/configure.ac
> index 84ce448..b69df2c 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -289,6 +289,10 @@ if test "x${enable_trace}" = xyes; then
>  	if [[[ $host != *x86_64* ]]]; then
>  		AC_MSG_ERROR(tracer can be used on x86_64 architectures)
>  	fi
> +	AC_CHECK_LIB([bfd], [bfd_openr],,
> +		AC_MSG_ERROR(requires binutils-dev))
> +	AC_CHECK_HEADERS([bfd.h],,
> +		AC_MSG_ERROR(requires binutils-dev))
>  	AC_CHECK_LIB([rt], [clock_gettime],,
>  		AC_MSG_ERROR(librt not found))
>  	AC_DEFINE_UNQUOTED([HAVE_TRACE], 1, [have trace])
> @@ -371,11 +375,9 @@ else
>  fi
>  
>  if test "x${enable_trace}" = xyes && \
> -		cc_supports_flag -pg && \
> -		cc_supports_flag -gstabs ; then
> -	AC_MSG_NOTICE([Enabling trace (-pg -gstabs)])
> -	TRACE_CFLAGS="-pg -gstabs"
> -	TRACE_LDFLAGS="-T$(pwd)/sheep/trace/trace.ld"
> +		cc_supports_flag -pg ; then
> +	AC_MSG_NOTICE([Enabling trace (-pg)])
> +	TRACE_CFLAGS="-pg"
>  	PACKAGE_FEATURES="$PACKAGE_FEATURES trace"
>  else
>  	TRACE_CFLAGS=""
> @@ -386,7 +388,7 @@ CFLAGS="$ENV_CFLAGS $OPT_CFLAGS $GDB_FLAGS $OS_CFLAGS \
>  	$TRACE_CFLAGS $COVERAGE_CFLAGS $EXTRA_WARNINGS $WERROR_CFLAGS \
>  	-D_GNU_SOURCE -D_LGPL_SOURCE -std=gnu99"
>  CPPFLAGS="$ENV_CPPFLAGS $ANSI_CPPFLAGS $OS_CPPFLAGS"
> -LDFLAGS="$ENV_LDFLAGS $COVERAGE_LDFLAGS $OS_LDFLAGS $TRACE_LDFLAGS"
> +LDFLAGS="$ENV_LDFLAGS $COVERAGE_LDFLAGS $OS_LDFLAGS"
>  
>  # substitute what we need:
>  AC_SUBST([OS_DYFLAGS])
> diff --git a/sheep/Makefile.am b/sheep/Makefile.am
> index fe4a938..8b9cc10 100644
> --- a/sheep/Makefile.am
> +++ b/sheep/Makefile.am
> @@ -41,7 +41,7 @@ sheep_SOURCES		+= cluster/zookeeper.c
>  endif
>  
>  if BUILD_TRACE
> -sheep_SOURCES		+= trace/trace.c trace/mcount.S trace/stabs.c trace/graph.c
> +sheep_SOURCES		+= trace/trace.c trace/mcount.S trace/graph.c
>  endif
>  
>  sheep_LDADD	  	= ../lib/libsheepdog.a -lpthread -lm\
> diff --git a/sheep/trace/graph.c b/sheep/trace/graph.c
> index ce2ff7a..deaf6a4 100644
> --- a/sheep/trace/graph.c
> +++ b/sheep/trace/graph.c
> @@ -89,10 +89,8 @@ static notrace void graph_tracer(unsigned long ip, unsigned long *ret_addr)
>  
>  	memset(&trace, 0, sizeof(trace));
>  
> -	cr = trace_lookup_ip(ip, false);
> -	assert(cr->namelen + 1 < TRACE_FNAME_LEN);
> -	memcpy(trace.fname, cr->name, cr->namelen);
> -	memset(trace.fname + cr->namelen, '\0', 1);
> +	cr = trace_lookup_ip(ip);
> +	pstrcpy(trace.fname, sizeof(trace.fname), cr->name);
>  	get_thread_name(trace.tname);
>  
>  	*ret_addr = (unsigned long)trace_return_caller;
> diff --git a/sheep/trace/mcount.S b/sheep/trace/mcount.S
> index 69a9cbd..a621332 100644
> --- a/sheep/trace/mcount.S
> +++ b/sheep/trace/mcount.S
> @@ -19,34 +19,6 @@
>  #define ENTRY(x) \
>          .text; _ALIGN_TEXT; .globl x; .type x, at function; x:
>  
> -ENTRY(mcount)
> -	subq $0x38, %rsp
> -	movq %rax, (%rsp)
> -	movq %rcx, 8(%rsp)
> -	movq %rdx, 16(%rsp)
> -	movq %rsi, 24(%rsp)
> -	movq %rdi, 32(%rsp)
> -	movq %r8, 40(%rsp)
> -	movq %r9, 48(%rsp)
> -
> -	movq 0x38(%rsp), %rdi
> -	subq $INSN_SIZE, %rdi
> -
> -.globl mcount_call
> -mcount_call:
> -	call trace_stub
> -
> -	movq 48(%rsp), %r9
> -	movq 40(%rsp), %r8
> -	movq 32(%rsp), %rdi
> -	movq 24(%rsp), %rsi
> -	movq 16(%rsp), %rdx
> -	movq 8(%rsp), %rcx
> -	movq (%rsp), %rax
> -	addq $0x38, %rsp
> -
> -	retq
> -
>  ENTRY(trace_caller)
>  	subq $0x38, %rsp
>  	movq %rax, (%rsp)
> diff --git a/sheep/trace/stabs.c b/sheep/trace/stabs.c
> deleted file mode 100644
> index eeae153..0000000
> --- a/sheep/trace/stabs.c
> +++ /dev/null
> @@ -1,210 +0,0 @@
> -/*
> - * Copyright (C) 2012 Taobao Inc.
> - *
> - * Liu Yuan <namei.unix at gmail.com>
> - *
> - * This program is free software; you can redistribute it and/or
> - * modify it under the terms of the GNU General Public License version
> - * 2 as published by the Free Software Foundation.
> - *
> - * You should have received a copy of the GNU General Public License
> - * along with this program. If not, see <http://www.gnu.org/licenses/>.
> - */
> -
> -#include <stab.h>
> -
> -#include "trace.h"
> -
> -/* referrence to the MIT JOS code */
> -
> -/* Entries in the STABS table are formatted as follows */
> -struct stab {
> -	uint32_t index;		/* index into string table of name */
> -	uint8_t type;		/* type of symbol */
> -	uint8_t misc;		/* misc info (usually empty) */
> -	uint16_t desc;		/* description field */
> -	uint32_t value;		/* value of symbol */
> -};
> -
> -extern const struct stab __STAB_BEGIN__[];
> -extern const struct stab __STAB_END__[];
> -extern const char __STABSTR_BEGIN__[];
> -extern const char __STABSTR_END__[];
> -
> -/*
> -   stab_bsearch(stabs, region_left, region_right, type, addr)
> -
> -   Some stab types are arranged in increasing order by instruction
> -   address.  For example, N_FUN stabs (stab entries with type ==
> -   N_FUN), which mark functions, and N_SO stabs, which mark source files.
> -
> -   Given an instruction address, this function finds the single stab
> -   entry of type 'type' that contains that address.
> -
> -   The search modifies *region_left and *region_right to bracket the
> -   'addr'.  *region_left points to the matching stab that contains
> -   'addr', and *region_right points just before the next stab.  If
> -   *region_left > *region_right, then 'addr' is not contained in any
> -   matching stab.
> -
> -   For example, given these N_SO stabs:
> -   Index  Type   Address
> -   0      SO     f0100000
> -   13     SO     f0100040
> -   117    SO     f0100176
> -   118    SO     f0100178
> -   555    SO     f0100652
> -   556    SO     f0100654
> -   657    SO     f0100849
> -   this code:
> -	left = 0, right = 657;
> -	stab_bsearch(stabs, &left, &right, N_SO, 0xf0100184);
> -   will exit setting left = 118, right = 554.
> - */
> -static notrace void stab_bsearch(const struct stab *stabs, int *region_left, int *region_right,
> -		int type, uintptr_t addr)
> -{
> -	int l = *region_left, r = *region_right;
> -	bool any_matches = false;
> -
> -	while (l <= r) {
> -		int true_m = (l + r) / 2, m = true_m;
> -
> -		/* search for earliest stab with right type */
> -		while (m >= l && stabs[m].type != type)
> -			m--;
> -		if (m < l) {	/* no match in [l, m] */
> -			l = true_m + 1;
> -			continue;
> -		}
> -
> -		/* actual binary search */
> -		any_matches = true;
> -		if (stabs[m].value < addr) {
> -			*region_left = m;
> -			l = true_m + 1;
> -		} else if (stabs[m].value > addr) {
> -			*region_right = m - 1;
> -			r = m - 1;
> -		} else {
> -			/* exact match for 'addr', but continue loop to find
> -			 * *region_right
> -			 */
> -			*region_left = m;
> -			l = m;
> -			addr++;
> -		}
> -	}
> -
> -	if (!any_matches)
> -		*region_right = *region_left - 1;
> -	else {
> -		/* find rightmost region containing 'addr' */
> -		for (l = *region_right;
> -				l > *region_left && stabs[l].type != type;
> -				l--)
> -			/* do nothing */;
> -		*region_left = l;
> -	}
> -}
> -
> -/*
> - * Fill in the 'info' structure with information about the specified
> - * instruction address, 'addr'.
> - *
> - * Returns
> - *  0 if information was found
> - * -1 if not.
> - *
> - * NB: But even if it returns negative it has stored some
> - * information into '*info'.
> - */
> -
> -notrace int get_ipinfo(uintptr_t addr, struct ipinfo *info)
> -{
> -	const struct stab *stabs, *stab_end;
> -	const char *stabstr, *stabstr_end;
> -	int lfile, rfile, lfun, rfun, lline, rline;
> -
> -	info->file = "<unknown>";
> -	info->line = 0;
> -	info->fn_name = "<unknown>";
> -	info->fn_namelen = 9;
> -	info->fn_addr = addr;
> -	info->fn_narg = 0;
> -
> -	stabs = __STAB_BEGIN__;
> -	stab_end = __STAB_END__;
> -	stabstr = __STABSTR_BEGIN__;
> -	stabstr_end = __STABSTR_END__;
> -
> -	if (stabstr_end <= stabstr || stabstr_end[-1] != 0)
> -		return -1;
> -
> -	/* Now we find the right stabs that define the function containing
> -	 * 'eip'.  First, we find the basic source file containing 'eip'.
> -	 * Then, we look in that source file for the function.  Then we look
> -	 * for the line number.
> -	 */
> -
> -	lfile = 0;
> -	rfile = (stab_end - stabs) - 1;
> -	stab_bsearch(stabs, &lfile, &rfile, N_SO, addr);
> -	if (lfile == 0)
> -		return -1;
> -
> -	lfun = lfile;
> -	rfun = rfile;
> -	stab_bsearch(stabs, &lfun, &rfun, N_FUN, addr);
> -
> -	if (lfun <= rfun) {
> -		/* stabs[lfun] points to the function name
> -		 * in the string table, but check bounds just in case.
> -		 */
> -		if (stabs[lfun].index < stabstr_end - stabstr)
> -			info->fn_name = stabstr + stabs[lfun].index;
> -		info->fn_addr = stabs[lfun].value;
> -		addr -= info->fn_addr;
> -		/* Search within the function definition for the line number. */
> -		lline = lfun;
> -		rline = rfun;
> -	} else {
> -		/* Couldn't find function stab!  Maybe we're in an assembly
> -		 * file.  Search the whole file for the line number.
> -		 */
> -		info->fn_addr = addr;
> -		lline = lfile;
> -		rline = rfile;
> -	}
> -	/* Ignore stuff after the colon. */
> -	info->fn_namelen = strchr(info->fn_name, ':') - info->fn_name;
> -
> -
> -	/* Search within [lline, rline] for the line number stab. */
> -	stab_bsearch(stabs, &lline, &rline, N_SLINE, addr);
> -	if (lline <= rline)
> -		info->line = stabs[lline].desc;
> -	else
> -		return -1;
> -	/* Search backwards from the line number for the relevant filename
> -	 * stab.
> -	 * We can't just use the "lfile" stab because inlined functions
> -	 * can interpolate code from a different file!
> -	 * Such included source files use the N_SOL stab type.
> -	 */
> -	while (lline >= lfile &&
> -		stabs[lline].type != N_SOL &&
> -		(stabs[lline].type != N_SO || !stabs[lline].value))
> -		lline--;
> -	if (lline >= lfile && stabs[lline].index < stabstr_end - stabstr)
> -		info->file = stabstr + stabs[lline].index;
> -	/* Set fn_narg to the number of arguments taken by the function,
> -	 * or 0 if there was no containing function.
> -	 */
> -	if (lfun < rfun)
> -		for (lline = lfun + 1; lline < rfun && stabs[lline].type == N_PSYM;
> -		     lline++)
> -			info->fn_narg++;
> -
> -	return 0;
> -}
> diff --git a/sheep/trace/trace.c b/sheep/trace/trace.c
> index e0a91c8..be4cdb6 100644
> --- a/sheep/trace/trace.c
> +++ b/sheep/trace/trace.c
> @@ -11,14 +11,12 @@
>   * along with this program. If not, see <http://www.gnu.org/licenses/>.
>   */
>  
> -#include "trace.h"
> +#include <bfd.h>
>  
> -#define TRACE_HASH_BITS       7
> -#define TRACE_HASH_SIZE       (1 << TRACE_HASH_BITS)
> +#include "trace.h"
>  
> -static struct hlist_head trace_hashtable[TRACE_HASH_SIZE];
> -static LIST_HEAD(caller_list);
> -static pthread_mutex_t trace_lock = PTHREAD_MUTEX_INITIALIZER;
> +static struct caller *callers;
> +static size_t nr_callers;
>  
>  static trace_func_t trace_func = trace_call;
>  
> @@ -33,9 +31,9 @@ union instruction {
>  	} __attribute__((packed));
>  };
>  
> -static inline int trace_hash(unsigned long ip)
> +static notrace int caller_cmp(const struct caller *a, const struct caller *b)
>  {
> -	return hash_64(ip, TRACE_HASH_BITS);
> +	return intcmp(a->mcount, b->mcount);
>  }
>  
>  static notrace unsigned char *get_new_call(unsigned long ip, unsigned long addr)
> @@ -56,13 +54,6 @@ static notrace void replace_call(unsigned long ip, unsigned long func)
>  	memcpy((void *)ip, new, INSN_SIZE);
>  }
>  
> -static inline void replace_mcount_call(unsigned long func)
> -{
> -	unsigned long ip = (unsigned long)mcount_call;
> -
> -	replace_call(ip, func);
> -}
> -
>  static inline void replace_trace_call(unsigned long func)
>  {
>  	unsigned long ip = (unsigned long)trace_call;
> @@ -77,66 +68,13 @@ static notrace int make_text_writable(unsigned long ip)
>  	return mprotect((void *)start, getpagesize() + INSN_SIZE, PROT_READ | PROT_EXEC | PROT_WRITE);
>  }
>  
> -notrace struct caller *trace_lookup_ip(unsigned long ip, bool create)
> -{
> -	int h = trace_hash(ip);
> -	struct hlist_head *head = trace_hashtable + h;
> -	struct hlist_node *node;
> -	struct ipinfo info;
> -	struct caller *new = NULL;
> -
> -	pthread_mutex_lock(&trace_lock);
> -	if (hlist_empty(head))
> -		goto not_found;
> -
> -	hlist_for_each_entry(new, node, head, hash) {
> -		if (new->mcount == ip)
> -			goto out;
> -	}
> -not_found:
> -	if (get_ipinfo(ip, &info) < 0) {
> -		sd_dprintf("ip: %lx not found", ip);
> -		new = NULL;
> -		goto out;
> -	}
> -	if (create) {
> -		new = malloc(sizeof(*new));
> -		if (!new) {
> -			sd_eprintf("out of memory");
> -			goto out;
> -		}
> -		new->mcount = ip;
> -		new->namelen = info.fn_namelen;
> -		new->name = info.fn_name;
> -		hlist_add_head(&new->hash, head);
> -		list_add(&new->list, &caller_list);
> -		sd_dprintf("add %.*s", info.fn_namelen, info.fn_name);
> -	} else {
> -		sd_dprintf("%.*s\n not found", info.fn_namelen, info.fn_name);
> -		new = NULL;
> -	}
> -out:
> -	pthread_mutex_unlock(&trace_lock);
> -	return new;
> -}
> -
> -/*
> - * Try to NOP all the mcount call sites that are supposed to be traced.
> - * Later we can enable it by asking these sites to point to trace_caller,
> - * where we can override trace_call() with our own trace function. We can
> - * do this, because below function record the IP of 'call mcount' inside the
> - * callers.
> - *
> - * IP points to the return address.
> - */
> -static notrace void do_trace_init(unsigned long ip)
> +notrace struct caller *trace_lookup_ip(unsigned long ip)
>  {
> +	const struct caller key = {
> +		.mcount = ip,
> +	};
>  
> -	if (make_text_writable(ip) < 0)
> -		return;
> -
> -	memcpy((void *)ip, NOP5, INSN_SIZE);
> -	trace_lookup_ip(ip, true);
> +	return xbsearch(&key, callers, nr_callers, caller_cmp);
>  }
>  
>  notrace int register_trace_function(trace_func_t func)
> @@ -151,26 +89,14 @@ notrace int register_trace_function(trace_func_t func)
>  
>  static notrace void patch_all_sites(unsigned long addr)
>  {
> -	struct caller *ca;
> -	unsigned char *new;
> -
> -	pthread_mutex_lock(&trace_lock);
> -	list_for_each_entry(ca, &caller_list, list) {
> -		new = get_new_call(ca->mcount, addr);
> -		memcpy((void *)ca->mcount, new, INSN_SIZE);
> -	}
> -	pthread_mutex_unlock(&trace_lock);
> +	for (int i = 0; i < nr_callers; i++)
> +		replace_call(callers[i].mcount, addr);
>  }
>  
>  static notrace void nop_all_sites(void)
>  {
> -	struct caller *ca;
> -
> -	pthread_mutex_lock(&trace_lock);
> -	list_for_each_entry(ca, &caller_list, list) {
> -		memcpy((void *)ca->mcount, NOP5, INSN_SIZE);
> -	}
> -	pthread_mutex_unlock(&trace_lock);
> +	for (int i = 0; i < nr_callers; i++)
> +		memcpy((void *)callers[i].mcount, NOP5, INSN_SIZE);
>  }
>  
>  notrace int trace_enable(void)
> @@ -224,16 +150,126 @@ notrace void trace_buffer_push(int cpuid, struct trace_graph_item *item)
>  	strbuf_add(&buffer[cpuid], item, sizeof(*item));
>  }
>  
> +/* assume that mcount call exists in the first FIND_MCOUNT_RANGE bytes */
> +#define FIND_MCOUNT_RANGE 32
> +
> +static unsigned long find_mcount_call(unsigned long entry_addr)
> +{
> +	unsigned long start = entry_addr;
> +	unsigned long end = entry_addr + FIND_MCOUNT_RANGE;
> +
> +	while (start < end) {
> +		union instruction *code;
> +		unsigned long addr;
> +
> +		/* 0xe8 means a opcode of call */
> +		code = memchr((void *)start, 0xe8, end - start);
> +		addr = (unsigned long)code;
> +
> +		if (code == NULL)
> +			break;
> +
> +		if ((int)((unsigned long)mcount - addr - INSN_SIZE) ==
> +		    code->offset)
> +			return addr;
> +
> +		start = addr + 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static bfd *get_bfd(void)
> +{
> +	char fname[PATH_MAX] = {0};
> +	bfd *abfd;
> +
> +	if (readlink("/proc/self/exe", fname, sizeof(fname)) < 0)
> +		panic("failed to get a path of the program.");
> +
> +	abfd = bfd_openr(fname, NULL);
> +	if (abfd == 0) {
> +		sd_eprintf("cannot open %s", fname);
> +		return NULL;
> +	}
> +
> +	if (!bfd_check_format(abfd, bfd_object)) {
> +		sd_eprintf("invalid format");
> +		return NULL;
> +	}
> +
> +	if (!(bfd_get_file_flags(abfd) & HAS_SYMS)) {
> +		sd_eprintf("no symbols found");
> +		return NULL;
> +	}
> +
> +	return abfd;
> +}
> +
> +/* Create a caller list which has a mcount call. */
> +static int init_callers(void)
> +{
> +	int max_symtab_size;
> +	asymbol **symtab;
> +	int symcount;
> +	bfd *abfd;
> +
> +	abfd = get_bfd();
> +	if (abfd == NULL)
> +		return -1;
> +
> +	max_symtab_size = bfd_get_symtab_upper_bound(abfd);
> +	if (max_symtab_size < 0) {
> +		sd_eprintf("failed to get symtab size");
> +		return -1;
> +	}
> +
> +	symtab = xmalloc(max_symtab_size);
> +	symcount = bfd_canonicalize_symtab(abfd, symtab);
> +
> +	callers = xzalloc(sizeof(*callers) * symcount);
> +	for (int i = 0; i < symcount; i++) {
> +		asymbol *sym = symtab[i];
> +		unsigned long ip, addr = bfd_asymbol_value(sym);
> +		const char *name = bfd_asymbol_name(sym);
> +
> +		if (addr == 0 || !(sym->flags & BSF_FUNCTION))
> +			/* sym is not a function */
> +			continue;
> +
> +		ip = find_mcount_call(addr);
> +		if (ip == 0) {
> +			sd_dprintf("%s doesn't have mcount call", name);
> +			continue;
> +		}
> +		if (make_text_writable(ip) < 0)
> +			abort();

any reason use abort() instead of panic()?

Thanks
Yuan



More information about the sheepdog mailing list