[Sheepdog] [PATCH v2 06/13] trace: low-level trace infrastructure proper

Liu Yuan namei.unix at gmail.com
Thu Feb 16 12:21:25 CET 2012


From: Liu Yuan <tailai.ly at taobao.com>


Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/sheep.c        |    5 +
 sheep/trace/mcount.S |    4 +
 sheep/trace/trace.c  |  263 ++++++++++++++++++++++++++++++++++++++++++++++++++
 sheep/trace/trace.h  |   35 +++++++
 4 files changed, 307 insertions(+), 0 deletions(-)
 create mode 100644 sheep/trace/trace.c

diff --git a/sheep/sheep.c b/sheep/sheep.c
index b86f8e5..392b286 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -21,6 +21,7 @@
 #include <sys/syslog.h>
 
 #include "sheep_priv.h"
+#include "trace/trace.h"
 
 #define EPOLL_SIZE 4096
 #define DEFAULT_OBJECT_DIR "/tmp"
@@ -210,6 +211,10 @@ int main(int argc, char **argv)
 	ret = init_signal();
 	if (ret)
 		exit(1);
+
+	ret = trace_init();
+	if (ret)
+		exit(1);
 	vprintf(SDOG_NOTICE, "sheepdog daemon (version %s) started\n", PACKAGE_VERSION);
 
 	while (!sys_stat_shutdown() || sys->nr_outstanding_reqs != 0)
diff --git a/sheep/trace/mcount.S b/sheep/trace/mcount.S
index c16e5ae..5f1e6b5 100644
--- a/sheep/trace/mcount.S
+++ b/sheep/trace/mcount.S
@@ -64,3 +64,7 @@ trace_call:
 .globl trace_stub
 trace_stub:
 	retq
+
+.globl NOP5
+NOP5:
+	.byte 0x0f,0x1f,0x44,0x00,0x00 # Intel recommended one for 5 bytes nops
diff --git a/sheep/trace/trace.c b/sheep/trace/trace.c
new file mode 100644
index 0000000..611f6f5
--- /dev/null
+++ b/sheep/trace/trace.c
@@ -0,0 +1,263 @@
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <signal.h>
+
+#include "trace.h"
+#include "logger.h"
+#include "list.h"
+#include "work.h"
+#include "sheepdog_proto.h"
+
+#define TRACE_HASH_BITS       7
+#define TRACE_HASH_SIZE       (1 << TRACE_HASH_BITS)
+
+static struct hlist_head trace_hashtable[TRACE_HASH_SIZE];
+static LIST_HEAD(caller_list);
+static pthread_mutex_t trace_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static trace_func_t trace_func = trace_call;
+static int trace_count;
+
+pthread_cond_t trace_cond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t trace_mux = PTHREAD_MUTEX_INITIALIZER;
+
+union instruction {
+	unsigned char start[INSN_SIZE];
+	struct {
+		char opcode;
+		int offset;
+	} __attribute__((packed));
+};
+
+notrace void suspend(int num)
+{
+	dprintf("worker thread %u going to suspend\n", (int)pthread_self());
+
+	pthread_mutex_lock(&trace_mux);
+	trace_count--;
+	pthread_cond_wait(&trace_cond, &trace_mux);
+	pthread_mutex_unlock(&trace_mux);
+	dprintf("worker thread going to resume\n");
+}
+
+static inline int trace_hash(unsigned long ip)
+{
+	return hash_64(ip, TRACE_HASH_BITS);
+}
+
+/* Stop speculative execution */
+static inline void sync_core(void)
+{
+	int tmp;
+
+	asm volatile("cpuid" : "=a" (tmp) : "0" (1)
+			: "ebx", "ecx", "edx", "memory");
+}
+
+static notrace unsigned char *get_new_call(unsigned long ip, unsigned long addr)
+{
+	static union instruction code;
+
+	code.opcode = 0xe8; /* opcode of call */
+	code.offset = (int)(addr - ip - INSN_SIZE);
+
+	return code.start;
+}
+
+static notrace void replace_call(unsigned long ip, unsigned long func)
+{
+	unsigned char *new;
+
+	new = get_new_call(ip, func);
+	memcpy((void *)ip, new, INSN_SIZE);
+	sync_core();
+}
+
+static inline void replace_mcount_call(unsigned long func)
+{
+	unsigned long ip = (unsigned long)mcount_call;
+	
+	replace_call(ip, func);
+}
+
+static inline void replace_trace_call(unsigned long func)
+{
+	unsigned long ip = (unsigned long)trace_call;
+
+	replace_call(ip, func);
+}
+
+static notrace int make_text_writable(unsigned long ip)
+{
+	unsigned long start = ip & ~(getpagesize() - 1);
+
+	return mprotect((void *)start, INSN_SIZE, PROT_READ | PROT_EXEC | PROT_WRITE);
+}
+
+notrace struct caller *trace_lookup_ip(unsigned long ip, int create)
+{
+	int h = trace_hash(ip);
+	struct hlist_head *head = trace_hashtable + h;
+	struct hlist_node *node;
+	struct ipinfo info;
+	struct caller *new = NULL;
+
+	pthread_mutex_lock(&trace_lock);
+	if (hlist_empty(head))
+		goto not_found;
+
+	hlist_for_each_entry(new, node, head, hash) {
+		if (new->mcount == ip)
+			goto out;
+	}
+not_found:
+	if (create) {
+		if (get_ipinfo(ip, &info) < 0) {
+			dprintf("ip: %lx not found\n", ip);
+			new = NULL;
+			goto out;
+		}
+		/* unlock to avoid deadlock */
+		pthread_mutex_unlock(&trace_lock);
+		new = xzalloc(sizeof(*new));
+		pthread_mutex_lock(&trace_lock);
+		new->mcount = ip;
+		new->namelen = info.fn_namelen;
+		new->name = info.fn_name;
+		hlist_add_head(&new->hash, head);
+		list_add(&new->list, &caller_list);
+		dprintf("add %.*s\n", info.fn_namelen, info.fn_name);
+	}
+out:
+	pthread_mutex_unlock(&trace_lock);
+	return new;
+}
+
+/*
+ * Try to NOP all the mcount call sites that are supposed to be traced.
+ * Later we can enable it by asking these sites to point to trace_caller,
+ * where we can override trace_call() with our own trace function. We can
+ * do this, because below function record the IP of 'call mcount' inside the
+ * callers.
+ *
+ * IP points to the return address.
+ */
+static notrace void do_trace_init(unsigned long ip)
+{
+
+	if (make_text_writable(ip) < 0)
+		return;
+
+	memcpy((void *)ip, NOP5, INSN_SIZE);
+	sync_core();
+
+	trace_lookup_ip(ip, 1);
+}
+
+notrace int register_trace_function(trace_func_t func)
+{
+	if (make_text_writable((unsigned long)trace_call) < 0)
+		return -1;
+
+	replace_trace_call((unsigned long)func);
+	trace_func = func;
+	return 0;
+}
+
+static notrace void suspend_worker_threads(void)
+{
+	struct worker_info *wi;
+	int i;
+	trace_count = total_nr_workers;
+	list_for_each_entry(wi, &worker_info_list, worker_info_siblings) {
+		for (i = 0; i < wi->nr_threads; i++)
+			if (pthread_kill(wi->worker_thread[i], SIGUSR1) != 0)
+				dprintf("%m\n");
+	}
+wait_for_worker_suspend:
+	pthread_mutex_lock(&trace_mux);
+	if (trace_count > 0) {
+		pthread_mutex_unlock(&trace_mux);
+		pthread_yield();
+		goto wait_for_worker_suspend;
+	}
+	pthread_mutex_unlock(&trace_mux);
+}
+
+static notrace void resume_worker_threads(void)
+{
+	pthread_mutex_lock(&trace_mux);
+	pthread_cond_broadcast(&trace_cond);
+	pthread_mutex_unlock(&trace_mux);
+}
+
+static notrace void patch_all_sites(unsigned long addr)
+{
+	struct caller *ca;
+	unsigned char *new;
+
+	pthread_mutex_lock(&trace_lock);
+	list_for_each_entry(ca, &caller_list, list) {
+		new = get_new_call(ca->mcount, addr);
+		memcpy((void *)ca->mcount, new, INSN_SIZE);
+	}
+	pthread_mutex_unlock(&trace_lock);
+}
+
+static notrace void nop_all_sites(void)
+{
+	struct caller *ca;
+
+	pthread_mutex_lock(&trace_lock);
+	list_for_each_entry(ca, &caller_list, list) {
+		memcpy((void *)ca->mcount, NOP5, INSN_SIZE);
+	}
+	pthread_mutex_unlock(&trace_lock);
+}
+
+notrace int trace_enable(void)
+{
+	if (trace_func == trace_call) {
+		dprintf("no tracer available\n");
+		return SD_RES_NO_TAG;
+	}
+
+	suspend_worker_threads();
+	patch_all_sites((unsigned long)trace_caller);
+	resume_worker_threads();
+	dprintf("patch tracer done\n");
+	return SD_RES_SUCCESS;
+}
+
+notrace int trace_disable(void)
+{
+	suspend_worker_threads();
+	nop_all_sites();
+	resume_worker_threads();
+	dprintf("patch nop done\n");
+	return SD_RES_SUCCESS;
+}
+
+notrace int trace_init()
+{
+        sigset_t block;
+
+        sigemptyset(&block);
+        sigaddset(&block, SIGUSR1);
+	if (pthread_sigmask(SIG_BLOCK, &block, NULL) != 0) {
+		dprintf("%m\n");
+		return -1;
+	}
+
+	if (make_text_writable((unsigned long)mcount_call) < 0) {
+		dprintf("%m\n");
+		return -1;
+	}
+
+	replace_mcount_call((unsigned long)do_trace_init);
+	dprintf("main thread %u\n", (int)pthread_self());
+	dprintf("trace support enabled.\n");
+	return 0;
+}
diff --git a/sheep/trace/trace.h b/sheep/trace/trace.h
index 92154c6..5dcbca5 100644
--- a/sheep/trace/trace.h
+++ b/sheep/trace/trace.h
@@ -1,6 +1,12 @@
 #ifndef TRACE_H
 #define TRACE_H
 
+#define INSN_SIZE       5       /* call(1b) + offset(4b) = 5b */
+
+#ifndef __ASSEMBLY__
+#include <stdlib.h>
+
+#include "list.h"
 #include "util.h"
 
 struct ipinfo {
@@ -12,6 +18,16 @@ struct ipinfo {
         int fn_narg;                /* Number of function arguments */
 };
 
+struct caller {
+	struct list_head list;
+	struct hlist_node hash;
+	unsigned long mcount;
+	int namelen;
+	const char *name;
+};
+
+typedef void (*trace_func_t)(unsigned long ip, unsigned long *parent_ip);
+
 /* stabs.c */
 extern int get_ipinfo(unsigned long ip, struct ipinfo *info);
 
@@ -20,5 +36,24 @@ extern void mcount(void);
 extern void mcount_call(void);
 extern void trace_caller(void);
 extern void trace_call(unsigned long, unsigned long *);
+extern const unsigned char NOP5[];
+
+/* trace.c */
+extern pthread_cond_t trace_cond;
+extern pthread_mutex_t trace_mux;
+
+extern int trace_init(void);
+extern int register_trace_function(trace_func_t func);
+extern int trace_enable(void);
+extern int trace_disable(void);
+extern struct caller *trace_lookup_ip(unsigned long ip, int create);
+
+#define register_tracer(new)			\
+static void __attribute__((constructor))	\
+register_ ## _tracer(void) 			\
+{  						\
+	register_trace_function(new);		\
+}
 
+#endif /* __ASSEMBLY__ */
 #endif
-- 
1.7.8.2




More information about the sheepdog mailing list