From: Liu Yuan <tailai.ly at taobao.com> Signed-off-by: Liu Yuan <tailai.ly at taobao.com> --- sheep/sheep.c | 5 + sheep/trace/mcount.S | 4 + sheep/trace/trace.c | 263 ++++++++++++++++++++++++++++++++++++++++++++++++++ sheep/trace/trace.h | 35 +++++++ 4 files changed, 307 insertions(+), 0 deletions(-) create mode 100644 sheep/trace/trace.c diff --git a/sheep/sheep.c b/sheep/sheep.c index b86f8e5..392b286 100644 --- a/sheep/sheep.c +++ b/sheep/sheep.c @@ -21,6 +21,7 @@ #include <sys/syslog.h> #include "sheep_priv.h" +#include "trace/trace.h" #define EPOLL_SIZE 4096 #define DEFAULT_OBJECT_DIR "/tmp" @@ -210,6 +211,10 @@ int main(int argc, char **argv) ret = init_signal(); if (ret) exit(1); + + ret = trace_init(); + if (ret) + exit(1); vprintf(SDOG_NOTICE, "sheepdog daemon (version %s) started\n", PACKAGE_VERSION); while (!sys_stat_shutdown() || sys->nr_outstanding_reqs != 0) diff --git a/sheep/trace/mcount.S b/sheep/trace/mcount.S index c16e5ae..5f1e6b5 100644 --- a/sheep/trace/mcount.S +++ b/sheep/trace/mcount.S @@ -64,3 +64,7 @@ trace_call: .globl trace_stub trace_stub: retq + +.globl NOP5 +NOP5: + .byte 0x0f,0x1f,0x44,0x00,0x00 # Intel recommended one for 5 bytes nops diff --git a/sheep/trace/trace.c b/sheep/trace/trace.c new file mode 100644 index 0000000..611f6f5 --- /dev/null +++ b/sheep/trace/trace.c @@ -0,0 +1,263 @@ +#include <string.h> +#include <sys/mman.h> +#include <unistd.h> +#include <pthread.h> +#include <signal.h> + +#include "trace.h" +#include "logger.h" +#include "list.h" +#include "work.h" +#include "sheepdog_proto.h" + +#define TRACE_HASH_BITS 7 +#define TRACE_HASH_SIZE (1 << TRACE_HASH_BITS) + +static struct hlist_head trace_hashtable[TRACE_HASH_SIZE]; +static LIST_HEAD(caller_list); +static pthread_mutex_t trace_lock = PTHREAD_MUTEX_INITIALIZER; + +static trace_func_t trace_func = trace_call; +static int trace_count; + +pthread_cond_t trace_cond = PTHREAD_COND_INITIALIZER; +pthread_mutex_t trace_mux = PTHREAD_MUTEX_INITIALIZER; + +union instruction { + unsigned char start[INSN_SIZE]; + struct { + char opcode; + int offset; + } __attribute__((packed)); +}; + +notrace void suspend(int num) +{ + dprintf("worker thread %u going to suspend\n", (int)pthread_self()); + + pthread_mutex_lock(&trace_mux); + trace_count--; + pthread_cond_wait(&trace_cond, &trace_mux); + pthread_mutex_unlock(&trace_mux); + dprintf("worker thread going to resume\n"); +} + +static inline int trace_hash(unsigned long ip) +{ + return hash_64(ip, TRACE_HASH_BITS); +} + +/* Stop speculative execution */ +static inline void sync_core(void) +{ + int tmp; + + asm volatile("cpuid" : "=a" (tmp) : "0" (1) + : "ebx", "ecx", "edx", "memory"); +} + +static notrace unsigned char *get_new_call(unsigned long ip, unsigned long addr) +{ + static union instruction code; + + code.opcode = 0xe8; /* opcode of call */ + code.offset = (int)(addr - ip - INSN_SIZE); + + return code.start; +} + +static notrace void replace_call(unsigned long ip, unsigned long func) +{ + unsigned char *new; + + new = get_new_call(ip, func); + memcpy((void *)ip, new, INSN_SIZE); + sync_core(); +} + +static inline void replace_mcount_call(unsigned long func) +{ + unsigned long ip = (unsigned long)mcount_call; + + replace_call(ip, func); +} + +static inline void replace_trace_call(unsigned long func) +{ + unsigned long ip = (unsigned long)trace_call; + + replace_call(ip, func); +} + +static notrace int make_text_writable(unsigned long ip) +{ + unsigned long start = ip & ~(getpagesize() - 1); + + return mprotect((void *)start, INSN_SIZE, PROT_READ | PROT_EXEC | PROT_WRITE); +} + +notrace struct caller *trace_lookup_ip(unsigned long ip, int create) +{ + int h = trace_hash(ip); + struct hlist_head *head = trace_hashtable + h; + struct hlist_node *node; + struct ipinfo info; + struct caller *new = NULL; + + pthread_mutex_lock(&trace_lock); + if (hlist_empty(head)) + goto not_found; + + hlist_for_each_entry(new, node, head, hash) { + if (new->mcount == ip) + goto out; + } +not_found: + if (create) { + if (get_ipinfo(ip, &info) < 0) { + dprintf("ip: %lx not found\n", ip); + new = NULL; + goto out; + } + /* unlock to avoid deadlock */ + pthread_mutex_unlock(&trace_lock); + new = xzalloc(sizeof(*new)); + pthread_mutex_lock(&trace_lock); + new->mcount = ip; + new->namelen = info.fn_namelen; + new->name = info.fn_name; + hlist_add_head(&new->hash, head); + list_add(&new->list, &caller_list); + dprintf("add %.*s\n", info.fn_namelen, info.fn_name); + } +out: + pthread_mutex_unlock(&trace_lock); + return new; +} + +/* + * Try to NOP all the mcount call sites that are supposed to be traced. + * Later we can enable it by asking these sites to point to trace_caller, + * where we can override trace_call() with our own trace function. We can + * do this, because below function record the IP of 'call mcount' inside the + * callers. + * + * IP points to the return address. + */ +static notrace void do_trace_init(unsigned long ip) +{ + + if (make_text_writable(ip) < 0) + return; + + memcpy((void *)ip, NOP5, INSN_SIZE); + sync_core(); + + trace_lookup_ip(ip, 1); +} + +notrace int register_trace_function(trace_func_t func) +{ + if (make_text_writable((unsigned long)trace_call) < 0) + return -1; + + replace_trace_call((unsigned long)func); + trace_func = func; + return 0; +} + +static notrace void suspend_worker_threads(void) +{ + struct worker_info *wi; + int i; + trace_count = total_nr_workers; + list_for_each_entry(wi, &worker_info_list, worker_info_siblings) { + for (i = 0; i < wi->nr_threads; i++) + if (pthread_kill(wi->worker_thread[i], SIGUSR1) != 0) + dprintf("%m\n"); + } +wait_for_worker_suspend: + pthread_mutex_lock(&trace_mux); + if (trace_count > 0) { + pthread_mutex_unlock(&trace_mux); + pthread_yield(); + goto wait_for_worker_suspend; + } + pthread_mutex_unlock(&trace_mux); +} + +static notrace void resume_worker_threads(void) +{ + pthread_mutex_lock(&trace_mux); + pthread_cond_broadcast(&trace_cond); + pthread_mutex_unlock(&trace_mux); +} + +static notrace void patch_all_sites(unsigned long addr) +{ + struct caller *ca; + unsigned char *new; + + pthread_mutex_lock(&trace_lock); + list_for_each_entry(ca, &caller_list, list) { + new = get_new_call(ca->mcount, addr); + memcpy((void *)ca->mcount, new, INSN_SIZE); + } + pthread_mutex_unlock(&trace_lock); +} + +static notrace void nop_all_sites(void) +{ + struct caller *ca; + + pthread_mutex_lock(&trace_lock); + list_for_each_entry(ca, &caller_list, list) { + memcpy((void *)ca->mcount, NOP5, INSN_SIZE); + } + pthread_mutex_unlock(&trace_lock); +} + +notrace int trace_enable(void) +{ + if (trace_func == trace_call) { + dprintf("no tracer available\n"); + return SD_RES_NO_TAG; + } + + suspend_worker_threads(); + patch_all_sites((unsigned long)trace_caller); + resume_worker_threads(); + dprintf("patch tracer done\n"); + return SD_RES_SUCCESS; +} + +notrace int trace_disable(void) +{ + suspend_worker_threads(); + nop_all_sites(); + resume_worker_threads(); + dprintf("patch nop done\n"); + return SD_RES_SUCCESS; +} + +notrace int trace_init() +{ + sigset_t block; + + sigemptyset(&block); + sigaddset(&block, SIGUSR1); + if (pthread_sigmask(SIG_BLOCK, &block, NULL) != 0) { + dprintf("%m\n"); + return -1; + } + + if (make_text_writable((unsigned long)mcount_call) < 0) { + dprintf("%m\n"); + return -1; + } + + replace_mcount_call((unsigned long)do_trace_init); + dprintf("main thread %u\n", (int)pthread_self()); + dprintf("trace support enabled.\n"); + return 0; +} diff --git a/sheep/trace/trace.h b/sheep/trace/trace.h index 92154c6..5dcbca5 100644 --- a/sheep/trace/trace.h +++ b/sheep/trace/trace.h @@ -1,6 +1,12 @@ #ifndef TRACE_H #define TRACE_H +#define INSN_SIZE 5 /* call(1b) + offset(4b) = 5b */ + +#ifndef __ASSEMBLY__ +#include <stdlib.h> + +#include "list.h" #include "util.h" struct ipinfo { @@ -12,6 +18,16 @@ struct ipinfo { int fn_narg; /* Number of function arguments */ }; +struct caller { + struct list_head list; + struct hlist_node hash; + unsigned long mcount; + int namelen; + const char *name; +}; + +typedef void (*trace_func_t)(unsigned long ip, unsigned long *parent_ip); + /* stabs.c */ extern int get_ipinfo(unsigned long ip, struct ipinfo *info); @@ -20,5 +36,24 @@ extern void mcount(void); extern void mcount_call(void); extern void trace_caller(void); extern void trace_call(unsigned long, unsigned long *); +extern const unsigned char NOP5[]; + +/* trace.c */ +extern pthread_cond_t trace_cond; +extern pthread_mutex_t trace_mux; + +extern int trace_init(void); +extern int register_trace_function(trace_func_t func); +extern int trace_enable(void); +extern int trace_disable(void); +extern struct caller *trace_lookup_ip(unsigned long ip, int create); + +#define register_tracer(new) \ +static void __attribute__((constructor)) \ +register_ ## _tracer(void) \ +{ \ + register_trace_function(new); \ +} +#endif /* __ASSEMBLY__ */ #endif -- 1.7.8.2 |