[sheepdog] [PATCH 02/10] trace: create a caller list in tracer_init()
MORITA Kazutaka
morita.kazutaka at gmail.com
Wed Aug 7 17:49:00 CEST 2013
From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
In tracer_enable(), we replace all the mcount() calls in the caller
list with trace_caller(). However, we create the caller list
dynamically; we update the list when sheep calls a new function which
is not in the list for the first time. This means that we cannot
trace the functions which were not called before we call
tracer_enable().
To include all the function into the tracer targets, this patch
creates a complete caller list just after sheep starts up. This
achieves it with BFD (a part of binutils-dev) and makes the current
tracer code much simpler.
This can also remove trace.ld, and allows us to see a line number when
debugging with GDB.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
configure.ac | 14 ++--
sheep/Makefile.am | 2 +-
sheep/trace/graph.c | 6 +-
sheep/trace/mcount.S | 28 -------
sheep/trace/stabs.c | 210 -----------------------------------------------
sheep/trace/trace.c | 222 +++++++++++++++++++++++++++++---------------------
sheep/trace/trace.h | 19 +----
sheep/trace/trace.ld | 18 ----
8 files changed, 142 insertions(+), 377 deletions(-)
delete mode 100644 sheep/trace/stabs.c
delete mode 100644 sheep/trace/trace.ld
diff --git a/configure.ac b/configure.ac
index 84ce448..b69df2c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -289,6 +289,10 @@ if test "x${enable_trace}" = xyes; then
if [[[ $host != *x86_64* ]]]; then
AC_MSG_ERROR(tracer can be used on x86_64 architectures)
fi
+ AC_CHECK_LIB([bfd], [bfd_openr],,
+ AC_MSG_ERROR(requires binutils-dev))
+ AC_CHECK_HEADERS([bfd.h],,
+ AC_MSG_ERROR(requires binutils-dev))
AC_CHECK_LIB([rt], [clock_gettime],,
AC_MSG_ERROR(librt not found))
AC_DEFINE_UNQUOTED([HAVE_TRACE], 1, [have trace])
@@ -371,11 +375,9 @@ else
fi
if test "x${enable_trace}" = xyes && \
- cc_supports_flag -pg && \
- cc_supports_flag -gstabs ; then
- AC_MSG_NOTICE([Enabling trace (-pg -gstabs)])
- TRACE_CFLAGS="-pg -gstabs"
- TRACE_LDFLAGS="-T$(pwd)/sheep/trace/trace.ld"
+ cc_supports_flag -pg ; then
+ AC_MSG_NOTICE([Enabling trace (-pg)])
+ TRACE_CFLAGS="-pg"
PACKAGE_FEATURES="$PACKAGE_FEATURES trace"
else
TRACE_CFLAGS=""
@@ -386,7 +388,7 @@ CFLAGS="$ENV_CFLAGS $OPT_CFLAGS $GDB_FLAGS $OS_CFLAGS \
$TRACE_CFLAGS $COVERAGE_CFLAGS $EXTRA_WARNINGS $WERROR_CFLAGS \
-D_GNU_SOURCE -D_LGPL_SOURCE -std=gnu99"
CPPFLAGS="$ENV_CPPFLAGS $ANSI_CPPFLAGS $OS_CPPFLAGS"
-LDFLAGS="$ENV_LDFLAGS $COVERAGE_LDFLAGS $OS_LDFLAGS $TRACE_LDFLAGS"
+LDFLAGS="$ENV_LDFLAGS $COVERAGE_LDFLAGS $OS_LDFLAGS"
# substitute what we need:
AC_SUBST([OS_DYFLAGS])
diff --git a/sheep/Makefile.am b/sheep/Makefile.am
index fe4a938..8b9cc10 100644
--- a/sheep/Makefile.am
+++ b/sheep/Makefile.am
@@ -41,7 +41,7 @@ sheep_SOURCES += cluster/zookeeper.c
endif
if BUILD_TRACE
-sheep_SOURCES += trace/trace.c trace/mcount.S trace/stabs.c trace/graph.c
+sheep_SOURCES += trace/trace.c trace/mcount.S trace/graph.c
endif
sheep_LDADD = ../lib/libsheepdog.a -lpthread -lm\
diff --git a/sheep/trace/graph.c b/sheep/trace/graph.c
index ce2ff7a..deaf6a4 100644
--- a/sheep/trace/graph.c
+++ b/sheep/trace/graph.c
@@ -89,10 +89,8 @@ static notrace void graph_tracer(unsigned long ip, unsigned long *ret_addr)
memset(&trace, 0, sizeof(trace));
- cr = trace_lookup_ip(ip, false);
- assert(cr->namelen + 1 < TRACE_FNAME_LEN);
- memcpy(trace.fname, cr->name, cr->namelen);
- memset(trace.fname + cr->namelen, '\0', 1);
+ cr = trace_lookup_ip(ip);
+ pstrcpy(trace.fname, sizeof(trace.fname), cr->name);
get_thread_name(trace.tname);
*ret_addr = (unsigned long)trace_return_caller;
diff --git a/sheep/trace/mcount.S b/sheep/trace/mcount.S
index 69a9cbd..a621332 100644
--- a/sheep/trace/mcount.S
+++ b/sheep/trace/mcount.S
@@ -19,34 +19,6 @@
#define ENTRY(x) \
.text; _ALIGN_TEXT; .globl x; .type x, at function; x:
-ENTRY(mcount)
- subq $0x38, %rsp
- movq %rax, (%rsp)
- movq %rcx, 8(%rsp)
- movq %rdx, 16(%rsp)
- movq %rsi, 24(%rsp)
- movq %rdi, 32(%rsp)
- movq %r8, 40(%rsp)
- movq %r9, 48(%rsp)
-
- movq 0x38(%rsp), %rdi
- subq $INSN_SIZE, %rdi
-
-.globl mcount_call
-mcount_call:
- call trace_stub
-
- movq 48(%rsp), %r9
- movq 40(%rsp), %r8
- movq 32(%rsp), %rdi
- movq 24(%rsp), %rsi
- movq 16(%rsp), %rdx
- movq 8(%rsp), %rcx
- movq (%rsp), %rax
- addq $0x38, %rsp
-
- retq
-
ENTRY(trace_caller)
subq $0x38, %rsp
movq %rax, (%rsp)
diff --git a/sheep/trace/stabs.c b/sheep/trace/stabs.c
deleted file mode 100644
index eeae153..0000000
--- a/sheep/trace/stabs.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (C) 2012 Taobao Inc.
- *
- * Liu Yuan <namei.unix at gmail.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <stab.h>
-
-#include "trace.h"
-
-/* referrence to the MIT JOS code */
-
-/* Entries in the STABS table are formatted as follows */
-struct stab {
- uint32_t index; /* index into string table of name */
- uint8_t type; /* type of symbol */
- uint8_t misc; /* misc info (usually empty) */
- uint16_t desc; /* description field */
- uint32_t value; /* value of symbol */
-};
-
-extern const struct stab __STAB_BEGIN__[];
-extern const struct stab __STAB_END__[];
-extern const char __STABSTR_BEGIN__[];
-extern const char __STABSTR_END__[];
-
-/*
- stab_bsearch(stabs, region_left, region_right, type, addr)
-
- Some stab types are arranged in increasing order by instruction
- address. For example, N_FUN stabs (stab entries with type ==
- N_FUN), which mark functions, and N_SO stabs, which mark source files.
-
- Given an instruction address, this function finds the single stab
- entry of type 'type' that contains that address.
-
- The search modifies *region_left and *region_right to bracket the
- 'addr'. *region_left points to the matching stab that contains
- 'addr', and *region_right points just before the next stab. If
- *region_left > *region_right, then 'addr' is not contained in any
- matching stab.
-
- For example, given these N_SO stabs:
- Index Type Address
- 0 SO f0100000
- 13 SO f0100040
- 117 SO f0100176
- 118 SO f0100178
- 555 SO f0100652
- 556 SO f0100654
- 657 SO f0100849
- this code:
- left = 0, right = 657;
- stab_bsearch(stabs, &left, &right, N_SO, 0xf0100184);
- will exit setting left = 118, right = 554.
- */
-static notrace void stab_bsearch(const struct stab *stabs, int *region_left, int *region_right,
- int type, uintptr_t addr)
-{
- int l = *region_left, r = *region_right;
- bool any_matches = false;
-
- while (l <= r) {
- int true_m = (l + r) / 2, m = true_m;
-
- /* search for earliest stab with right type */
- while (m >= l && stabs[m].type != type)
- m--;
- if (m < l) { /* no match in [l, m] */
- l = true_m + 1;
- continue;
- }
-
- /* actual binary search */
- any_matches = true;
- if (stabs[m].value < addr) {
- *region_left = m;
- l = true_m + 1;
- } else if (stabs[m].value > addr) {
- *region_right = m - 1;
- r = m - 1;
- } else {
- /* exact match for 'addr', but continue loop to find
- * *region_right
- */
- *region_left = m;
- l = m;
- addr++;
- }
- }
-
- if (!any_matches)
- *region_right = *region_left - 1;
- else {
- /* find rightmost region containing 'addr' */
- for (l = *region_right;
- l > *region_left && stabs[l].type != type;
- l--)
- /* do nothing */;
- *region_left = l;
- }
-}
-
-/*
- * Fill in the 'info' structure with information about the specified
- * instruction address, 'addr'.
- *
- * Returns
- * 0 if information was found
- * -1 if not.
- *
- * NB: But even if it returns negative it has stored some
- * information into '*info'.
- */
-
-notrace int get_ipinfo(uintptr_t addr, struct ipinfo *info)
-{
- const struct stab *stabs, *stab_end;
- const char *stabstr, *stabstr_end;
- int lfile, rfile, lfun, rfun, lline, rline;
-
- info->file = "<unknown>";
- info->line = 0;
- info->fn_name = "<unknown>";
- info->fn_namelen = 9;
- info->fn_addr = addr;
- info->fn_narg = 0;
-
- stabs = __STAB_BEGIN__;
- stab_end = __STAB_END__;
- stabstr = __STABSTR_BEGIN__;
- stabstr_end = __STABSTR_END__;
-
- if (stabstr_end <= stabstr || stabstr_end[-1] != 0)
- return -1;
-
- /* Now we find the right stabs that define the function containing
- * 'eip'. First, we find the basic source file containing 'eip'.
- * Then, we look in that source file for the function. Then we look
- * for the line number.
- */
-
- lfile = 0;
- rfile = (stab_end - stabs) - 1;
- stab_bsearch(stabs, &lfile, &rfile, N_SO, addr);
- if (lfile == 0)
- return -1;
-
- lfun = lfile;
- rfun = rfile;
- stab_bsearch(stabs, &lfun, &rfun, N_FUN, addr);
-
- if (lfun <= rfun) {
- /* stabs[lfun] points to the function name
- * in the string table, but check bounds just in case.
- */
- if (stabs[lfun].index < stabstr_end - stabstr)
- info->fn_name = stabstr + stabs[lfun].index;
- info->fn_addr = stabs[lfun].value;
- addr -= info->fn_addr;
- /* Search within the function definition for the line number. */
- lline = lfun;
- rline = rfun;
- } else {
- /* Couldn't find function stab! Maybe we're in an assembly
- * file. Search the whole file for the line number.
- */
- info->fn_addr = addr;
- lline = lfile;
- rline = rfile;
- }
- /* Ignore stuff after the colon. */
- info->fn_namelen = strchr(info->fn_name, ':') - info->fn_name;
-
-
- /* Search within [lline, rline] for the line number stab. */
- stab_bsearch(stabs, &lline, &rline, N_SLINE, addr);
- if (lline <= rline)
- info->line = stabs[lline].desc;
- else
- return -1;
- /* Search backwards from the line number for the relevant filename
- * stab.
- * We can't just use the "lfile" stab because inlined functions
- * can interpolate code from a different file!
- * Such included source files use the N_SOL stab type.
- */
- while (lline >= lfile &&
- stabs[lline].type != N_SOL &&
- (stabs[lline].type != N_SO || !stabs[lline].value))
- lline--;
- if (lline >= lfile && stabs[lline].index < stabstr_end - stabstr)
- info->file = stabstr + stabs[lline].index;
- /* Set fn_narg to the number of arguments taken by the function,
- * or 0 if there was no containing function.
- */
- if (lfun < rfun)
- for (lline = lfun + 1; lline < rfun && stabs[lline].type == N_PSYM;
- lline++)
- info->fn_narg++;
-
- return 0;
-}
diff --git a/sheep/trace/trace.c b/sheep/trace/trace.c
index e0a91c8..be4cdb6 100644
--- a/sheep/trace/trace.c
+++ b/sheep/trace/trace.c
@@ -11,14 +11,12 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include "trace.h"
+#include <bfd.h>
-#define TRACE_HASH_BITS 7
-#define TRACE_HASH_SIZE (1 << TRACE_HASH_BITS)
+#include "trace.h"
-static struct hlist_head trace_hashtable[TRACE_HASH_SIZE];
-static LIST_HEAD(caller_list);
-static pthread_mutex_t trace_lock = PTHREAD_MUTEX_INITIALIZER;
+static struct caller *callers;
+static size_t nr_callers;
static trace_func_t trace_func = trace_call;
@@ -33,9 +31,9 @@ union instruction {
} __attribute__((packed));
};
-static inline int trace_hash(unsigned long ip)
+static notrace int caller_cmp(const struct caller *a, const struct caller *b)
{
- return hash_64(ip, TRACE_HASH_BITS);
+ return intcmp(a->mcount, b->mcount);
}
static notrace unsigned char *get_new_call(unsigned long ip, unsigned long addr)
@@ -56,13 +54,6 @@ static notrace void replace_call(unsigned long ip, unsigned long func)
memcpy((void *)ip, new, INSN_SIZE);
}
-static inline void replace_mcount_call(unsigned long func)
-{
- unsigned long ip = (unsigned long)mcount_call;
-
- replace_call(ip, func);
-}
-
static inline void replace_trace_call(unsigned long func)
{
unsigned long ip = (unsigned long)trace_call;
@@ -77,66 +68,13 @@ static notrace int make_text_writable(unsigned long ip)
return mprotect((void *)start, getpagesize() + INSN_SIZE, PROT_READ | PROT_EXEC | PROT_WRITE);
}
-notrace struct caller *trace_lookup_ip(unsigned long ip, bool create)
-{
- int h = trace_hash(ip);
- struct hlist_head *head = trace_hashtable + h;
- struct hlist_node *node;
- struct ipinfo info;
- struct caller *new = NULL;
-
- pthread_mutex_lock(&trace_lock);
- if (hlist_empty(head))
- goto not_found;
-
- hlist_for_each_entry(new, node, head, hash) {
- if (new->mcount == ip)
- goto out;
- }
-not_found:
- if (get_ipinfo(ip, &info) < 0) {
- sd_dprintf("ip: %lx not found", ip);
- new = NULL;
- goto out;
- }
- if (create) {
- new = malloc(sizeof(*new));
- if (!new) {
- sd_eprintf("out of memory");
- goto out;
- }
- new->mcount = ip;
- new->namelen = info.fn_namelen;
- new->name = info.fn_name;
- hlist_add_head(&new->hash, head);
- list_add(&new->list, &caller_list);
- sd_dprintf("add %.*s", info.fn_namelen, info.fn_name);
- } else {
- sd_dprintf("%.*s\n not found", info.fn_namelen, info.fn_name);
- new = NULL;
- }
-out:
- pthread_mutex_unlock(&trace_lock);
- return new;
-}
-
-/*
- * Try to NOP all the mcount call sites that are supposed to be traced.
- * Later we can enable it by asking these sites to point to trace_caller,
- * where we can override trace_call() with our own trace function. We can
- * do this, because below function record the IP of 'call mcount' inside the
- * callers.
- *
- * IP points to the return address.
- */
-static notrace void do_trace_init(unsigned long ip)
+notrace struct caller *trace_lookup_ip(unsigned long ip)
{
+ const struct caller key = {
+ .mcount = ip,
+ };
- if (make_text_writable(ip) < 0)
- return;
-
- memcpy((void *)ip, NOP5, INSN_SIZE);
- trace_lookup_ip(ip, true);
+ return xbsearch(&key, callers, nr_callers, caller_cmp);
}
notrace int register_trace_function(trace_func_t func)
@@ -151,26 +89,14 @@ notrace int register_trace_function(trace_func_t func)
static notrace void patch_all_sites(unsigned long addr)
{
- struct caller *ca;
- unsigned char *new;
-
- pthread_mutex_lock(&trace_lock);
- list_for_each_entry(ca, &caller_list, list) {
- new = get_new_call(ca->mcount, addr);
- memcpy((void *)ca->mcount, new, INSN_SIZE);
- }
- pthread_mutex_unlock(&trace_lock);
+ for (int i = 0; i < nr_callers; i++)
+ replace_call(callers[i].mcount, addr);
}
static notrace void nop_all_sites(void)
{
- struct caller *ca;
-
- pthread_mutex_lock(&trace_lock);
- list_for_each_entry(ca, &caller_list, list) {
- memcpy((void *)ca->mcount, NOP5, INSN_SIZE);
- }
- pthread_mutex_unlock(&trace_lock);
+ for (int i = 0; i < nr_callers; i++)
+ memcpy((void *)callers[i].mcount, NOP5, INSN_SIZE);
}
notrace int trace_enable(void)
@@ -224,16 +150,126 @@ notrace void trace_buffer_push(int cpuid, struct trace_graph_item *item)
strbuf_add(&buffer[cpuid], item, sizeof(*item));
}
+/* assume that mcount call exists in the first FIND_MCOUNT_RANGE bytes */
+#define FIND_MCOUNT_RANGE 32
+
+static unsigned long find_mcount_call(unsigned long entry_addr)
+{
+ unsigned long start = entry_addr;
+ unsigned long end = entry_addr + FIND_MCOUNT_RANGE;
+
+ while (start < end) {
+ union instruction *code;
+ unsigned long addr;
+
+ /* 0xe8 means a opcode of call */
+ code = memchr((void *)start, 0xe8, end - start);
+ addr = (unsigned long)code;
+
+ if (code == NULL)
+ break;
+
+ if ((int)((unsigned long)mcount - addr - INSN_SIZE) ==
+ code->offset)
+ return addr;
+
+ start = addr + 1;
+ }
+
+ return 0;
+}
+
+static bfd *get_bfd(void)
+{
+ char fname[PATH_MAX] = {0};
+ bfd *abfd;
+
+ if (readlink("/proc/self/exe", fname, sizeof(fname)) < 0)
+ panic("failed to get a path of the program.");
+
+ abfd = bfd_openr(fname, NULL);
+ if (abfd == 0) {
+ sd_eprintf("cannot open %s", fname);
+ return NULL;
+ }
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ sd_eprintf("invalid format");
+ return NULL;
+ }
+
+ if (!(bfd_get_file_flags(abfd) & HAS_SYMS)) {
+ sd_eprintf("no symbols found");
+ return NULL;
+ }
+
+ return abfd;
+}
+
+/* Create a caller list which has a mcount call. */
+static int init_callers(void)
+{
+ int max_symtab_size;
+ asymbol **symtab;
+ int symcount;
+ bfd *abfd;
+
+ abfd = get_bfd();
+ if (abfd == NULL)
+ return -1;
+
+ max_symtab_size = bfd_get_symtab_upper_bound(abfd);
+ if (max_symtab_size < 0) {
+ sd_eprintf("failed to get symtab size");
+ return -1;
+ }
+
+ symtab = xmalloc(max_symtab_size);
+ symcount = bfd_canonicalize_symtab(abfd, symtab);
+
+ callers = xzalloc(sizeof(*callers) * symcount);
+ for (int i = 0; i < symcount; i++) {
+ asymbol *sym = symtab[i];
+ unsigned long ip, addr = bfd_asymbol_value(sym);
+ const char *name = bfd_asymbol_name(sym);
+
+ if (addr == 0 || !(sym->flags & BSF_FUNCTION))
+ /* sym is not a function */
+ continue;
+
+ ip = find_mcount_call(addr);
+ if (ip == 0) {
+ sd_dprintf("%s doesn't have mcount call", name);
+ continue;
+ }
+ if (make_text_writable(ip) < 0)
+ abort();
+
+ callers[nr_callers].addr = addr;
+ callers[nr_callers].mcount = ip;
+ callers[nr_callers].name = strdup(name);
+ nr_callers++;
+ }
+ xqsort(callers, nr_callers, caller_cmp);
+
+ free(symtab);
+ bfd_close(abfd);
+
+ return 0;
+}
+
+/*
+ * Try to NOP all the mcount call sites that are supposed to be traced. Later
+ * we can enable it by asking these sites to point to trace_caller.
+ */
notrace int trace_init(void)
{
int i;
- if (make_text_writable((unsigned long)mcount_call) < 0) {
- sd_dprintf("%m");
+ if (init_callers() < 0)
return -1;
- }
- replace_mcount_call((unsigned long)do_trace_init);
+ nop_all_sites();
nr_cpu = sysconf(_SC_NPROCESSORS_ONLN);
buffer = xzalloc(sizeof(*buffer) * nr_cpu);
diff --git a/sheep/trace/trace.h b/sheep/trace/trace.h
index 977b48f..f65aba8 100644
--- a/sheep/trace/trace.h
+++ b/sheep/trace/trace.h
@@ -7,20 +7,9 @@
#include "sheep_priv.h"
-struct ipinfo {
- const char *file; /* Source code filename for EIP */
- int line; /* Source code linenumber for EIP */
- const char *fn_name; /* Name of function containing EIP */
- int fn_namelen; /* Length of function name */
- unsigned long fn_addr; /* Address of start of function */
- int fn_narg; /* Number of function arguments */
-};
-
struct caller {
- struct list_head list;
- struct hlist_node hash;
+ unsigned long addr;
unsigned long mcount;
- int namelen;
const char *name;
};
@@ -31,12 +20,8 @@ typedef void (*trace_func_graph_ent_t)(struct trace_graph_item *);
/* graph.c */
-/* stabs.c */
-int get_ipinfo(unsigned long ip, struct ipinfo *info);
-
/* mcount.S */
void mcount(void);
-void mcount_call(void);
void trace_caller(void);
void trace_call(unsigned long, unsigned long *);
extern const unsigned char NOP5[];
@@ -49,7 +34,7 @@ unsigned long trace_return_call(void);
int register_trace_function(trace_func_t func);
int trace_enable(void);
int trace_disable(void);
- struct caller *trace_lookup_ip(unsigned long ip, bool create);
+ struct caller *trace_lookup_ip(unsigned long ip);
int trace_buffer_pop(void *buf, uint32_t len);
void trace_buffer_push(int cpuid, struct trace_graph_item *item);
diff --git a/sheep/trace/trace.ld b/sheep/trace/trace.ld
deleted file mode 100644
index 031ada7..0000000
--- a/sheep/trace/trace.ld
+++ /dev/null
@@ -1,18 +0,0 @@
-SECTIONS
-{
- stab : {
- PROVIDE(__STAB_BEGIN__ = .);
- *(.stab);
- PROVIDE(__STAB_END__ = .);
- BYTE(0) /* Force linker allocate memeory for this section */
- }
-
- stabstr : {
- PROVIDE(__STABSTR_BEGIN__ = .);
- *(.stabstr);
- PROVIDE(__STABSTR_END__ = .);
- BYTE(0)
- }
-}
-
-INSERT AFTER .rodata
--
1.7.9.5
More information about the sheepdog
mailing list