[Sheepdog] [RFC PATCH 06/11] trace: low-level trace infrastructure proper
Liu Yuan
namei.unix at gmail.com
Tue Jan 17 12:59:57 CET 2012
From: Liu Yuan <tailai.ly at taobao.com>
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/sheep.c | 5 +
sheep/trace/mcount.S | 4 +
sheep/trace/trace.c | 263 ++++++++++++++++++++++++++++++++++++++++++++++++++
sheep/trace/trace.h | 35 +++++++
4 files changed, 307 insertions(+), 0 deletions(-)
create mode 100644 sheep/trace/trace.c
diff --git a/sheep/sheep.c b/sheep/sheep.c
index b86f8e5..392b286 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -21,6 +21,7 @@
#include <sys/syslog.h>
#include "sheep_priv.h"
+#include "trace/trace.h"
#define EPOLL_SIZE 4096
#define DEFAULT_OBJECT_DIR "/tmp"
@@ -210,6 +211,10 @@ int main(int argc, char **argv)
ret = init_signal();
if (ret)
exit(1);
+
+ ret = trace_init();
+ if (ret)
+ exit(1);
vprintf(SDOG_NOTICE, "sheepdog daemon (version %s) started\n", PACKAGE_VERSION);
while (!sys_stat_shutdown() || sys->nr_outstanding_reqs != 0)
diff --git a/sheep/trace/mcount.S b/sheep/trace/mcount.S
index c16e5ae..5f1e6b5 100644
--- a/sheep/trace/mcount.S
+++ b/sheep/trace/mcount.S
@@ -64,3 +64,7 @@ trace_call:
.globl trace_stub
trace_stub:
retq
+
+.globl NOP5
+NOP5:
+ .byte 0x0f,0x1f,0x44,0x00,0x00 # Intel recommended one for 5 bytes nops
diff --git a/sheep/trace/trace.c b/sheep/trace/trace.c
new file mode 100644
index 0000000..611f6f5
--- /dev/null
+++ b/sheep/trace/trace.c
@@ -0,0 +1,263 @@
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <signal.h>
+
+#include "trace.h"
+#include "logger.h"
+#include "list.h"
+#include "work.h"
+#include "sheepdog_proto.h"
+
+#define TRACE_HASH_BITS 7
+#define TRACE_HASH_SIZE (1 << TRACE_HASH_BITS)
+
+static struct hlist_head trace_hashtable[TRACE_HASH_SIZE];
+static LIST_HEAD(caller_list);
+static pthread_mutex_t trace_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static trace_func_t trace_func = trace_call;
+static int trace_count;
+
+pthread_cond_t trace_cond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t trace_mux = PTHREAD_MUTEX_INITIALIZER;
+
+union instruction {
+ unsigned char start[INSN_SIZE];
+ struct {
+ char opcode;
+ int offset;
+ } __attribute__((packed));
+};
+
+notrace void suspend(int num)
+{
+ dprintf("worker thread %u going to suspend\n", (int)pthread_self());
+
+ pthread_mutex_lock(&trace_mux);
+ trace_count--;
+ pthread_cond_wait(&trace_cond, &trace_mux);
+ pthread_mutex_unlock(&trace_mux);
+ dprintf("worker thread going to resume\n");
+}
+
+static inline int trace_hash(unsigned long ip)
+{
+ return hash_64(ip, TRACE_HASH_BITS);
+}
+
+/* Stop speculative execution */
+static inline void sync_core(void)
+{
+ int tmp;
+
+ asm volatile("cpuid" : "=a" (tmp) : "0" (1)
+ : "ebx", "ecx", "edx", "memory");
+}
+
+static notrace unsigned char *get_new_call(unsigned long ip, unsigned long addr)
+{
+ static union instruction code;
+
+ code.opcode = 0xe8; /* opcode of call */
+ code.offset = (int)(addr - ip - INSN_SIZE);
+
+ return code.start;
+}
+
+static notrace void replace_call(unsigned long ip, unsigned long func)
+{
+ unsigned char *new;
+
+ new = get_new_call(ip, func);
+ memcpy((void *)ip, new, INSN_SIZE);
+ sync_core();
+}
+
+static inline void replace_mcount_call(unsigned long func)
+{
+ unsigned long ip = (unsigned long)mcount_call;
+
+ replace_call(ip, func);
+}
+
+static inline void replace_trace_call(unsigned long func)
+{
+ unsigned long ip = (unsigned long)trace_call;
+
+ replace_call(ip, func);
+}
+
+static notrace int make_text_writable(unsigned long ip)
+{
+ unsigned long start = ip & ~(getpagesize() - 1);
+
+ return mprotect((void *)start, INSN_SIZE, PROT_READ | PROT_EXEC | PROT_WRITE);
+}
+
+notrace struct caller *trace_lookup_ip(unsigned long ip, int create)
+{
+ int h = trace_hash(ip);
+ struct hlist_head *head = trace_hashtable + h;
+ struct hlist_node *node;
+ struct ipinfo info;
+ struct caller *new = NULL;
+
+ pthread_mutex_lock(&trace_lock);
+ if (hlist_empty(head))
+ goto not_found;
+
+ hlist_for_each_entry(new, node, head, hash) {
+ if (new->mcount == ip)
+ goto out;
+ }
+not_found:
+ if (create) {
+ if (get_ipinfo(ip, &info) < 0) {
+ dprintf("ip: %lx not found\n", ip);
+ new = NULL;
+ goto out;
+ }
+ /* unlock to avoid deadlock */
+ pthread_mutex_unlock(&trace_lock);
+ new = xzalloc(sizeof(*new));
+ pthread_mutex_lock(&trace_lock);
+ new->mcount = ip;
+ new->namelen = info.fn_namelen;
+ new->name = info.fn_name;
+ hlist_add_head(&new->hash, head);
+ list_add(&new->list, &caller_list);
+ dprintf("add %.*s\n", info.fn_namelen, info.fn_name);
+ }
+out:
+ pthread_mutex_unlock(&trace_lock);
+ return new;
+}
+
+/*
+ * Try to NOP all the mcount call sites that are supposed to be traced.
+ * Later we can enable it by asking these sites to point to trace_caller,
+ * where we can override trace_call() with our own trace function. We can
+ * do this, because below function record the IP of 'call mcount' inside the
+ * callers.
+ *
+ * IP points to the return address.
+ */
+static notrace void do_trace_init(unsigned long ip)
+{
+
+ if (make_text_writable(ip) < 0)
+ return;
+
+ memcpy((void *)ip, NOP5, INSN_SIZE);
+ sync_core();
+
+ trace_lookup_ip(ip, 1);
+}
+
+notrace int register_trace_function(trace_func_t func)
+{
+ if (make_text_writable((unsigned long)trace_call) < 0)
+ return -1;
+
+ replace_trace_call((unsigned long)func);
+ trace_func = func;
+ return 0;
+}
+
+static notrace void suspend_worker_threads(void)
+{
+ struct worker_info *wi;
+ int i;
+ trace_count = total_nr_workers;
+ list_for_each_entry(wi, &worker_info_list, worker_info_siblings) {
+ for (i = 0; i < wi->nr_threads; i++)
+ if (pthread_kill(wi->worker_thread[i], SIGUSR1) != 0)
+ dprintf("%m\n");
+ }
+wait_for_worker_suspend:
+ pthread_mutex_lock(&trace_mux);
+ if (trace_count > 0) {
+ pthread_mutex_unlock(&trace_mux);
+ pthread_yield();
+ goto wait_for_worker_suspend;
+ }
+ pthread_mutex_unlock(&trace_mux);
+}
+
+static notrace void resume_worker_threads(void)
+{
+ pthread_mutex_lock(&trace_mux);
+ pthread_cond_broadcast(&trace_cond);
+ pthread_mutex_unlock(&trace_mux);
+}
+
+static notrace void patch_all_sites(unsigned long addr)
+{
+ struct caller *ca;
+ unsigned char *new;
+
+ pthread_mutex_lock(&trace_lock);
+ list_for_each_entry(ca, &caller_list, list) {
+ new = get_new_call(ca->mcount, addr);
+ memcpy((void *)ca->mcount, new, INSN_SIZE);
+ }
+ pthread_mutex_unlock(&trace_lock);
+}
+
+static notrace void nop_all_sites(void)
+{
+ struct caller *ca;
+
+ pthread_mutex_lock(&trace_lock);
+ list_for_each_entry(ca, &caller_list, list) {
+ memcpy((void *)ca->mcount, NOP5, INSN_SIZE);
+ }
+ pthread_mutex_unlock(&trace_lock);
+}
+
+notrace int trace_enable(void)
+{
+ if (trace_func == trace_call) {
+ dprintf("no tracer available\n");
+ return SD_RES_NO_TAG;
+ }
+
+ suspend_worker_threads();
+ patch_all_sites((unsigned long)trace_caller);
+ resume_worker_threads();
+ dprintf("patch tracer done\n");
+ return SD_RES_SUCCESS;
+}
+
+notrace int trace_disable(void)
+{
+ suspend_worker_threads();
+ nop_all_sites();
+ resume_worker_threads();
+ dprintf("patch nop done\n");
+ return SD_RES_SUCCESS;
+}
+
+notrace int trace_init()
+{
+ sigset_t block;
+
+ sigemptyset(&block);
+ sigaddset(&block, SIGUSR1);
+ if (pthread_sigmask(SIG_BLOCK, &block, NULL) != 0) {
+ dprintf("%m\n");
+ return -1;
+ }
+
+ if (make_text_writable((unsigned long)mcount_call) < 0) {
+ dprintf("%m\n");
+ return -1;
+ }
+
+ replace_mcount_call((unsigned long)do_trace_init);
+ dprintf("main thread %u\n", (int)pthread_self());
+ dprintf("trace support enabled.\n");
+ return 0;
+}
diff --git a/sheep/trace/trace.h b/sheep/trace/trace.h
index 92154c6..5dcbca5 100644
--- a/sheep/trace/trace.h
+++ b/sheep/trace/trace.h
@@ -1,6 +1,12 @@
#ifndef TRACE_H
#define TRACE_H
+#define INSN_SIZE 5 /* call(1b) + offset(4b) = 5b */
+
+#ifndef __ASSEMBLY__
+#include <stdlib.h>
+
+#include "list.h"
#include "util.h"
struct ipinfo {
@@ -12,6 +18,16 @@ struct ipinfo {
int fn_narg; /* Number of function arguments */
};
+struct caller {
+ struct list_head list;
+ struct hlist_node hash;
+ unsigned long mcount;
+ int namelen;
+ const char *name;
+};
+
+typedef void (*trace_func_t)(unsigned long ip, unsigned long *parent_ip);
+
/* stabs.c */
extern int get_ipinfo(unsigned long ip, struct ipinfo *info);
@@ -20,5 +36,24 @@ extern void mcount(void);
extern void mcount_call(void);
extern void trace_caller(void);
extern void trace_call(unsigned long, unsigned long *);
+extern const unsigned char NOP5[];
+
+/* trace.c */
+extern pthread_cond_t trace_cond;
+extern pthread_mutex_t trace_mux;
+
+extern int trace_init(void);
+extern int register_trace_function(trace_func_t func);
+extern int trace_enable(void);
+extern int trace_disable(void);
+extern struct caller *trace_lookup_ip(unsigned long ip, int create);
+
+#define register_tracer(new) \
+static void __attribute__((constructor)) \
+register_ ## _tracer(void) \
+{ \
+ register_trace_function(new); \
+}
+#endif /* __ASSEMBLY__ */
#endif
--
1.7.8.2
More information about the sheepdog
mailing list