[sheepdog] [PATCH 3/4] trace: fix random crash for enable command

Liu Yuan namei.unix at gmail.com
Sat Sep 1 17:26:17 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

We should wait for all the worker's signal handler running like old code, or we
will get random crash when signal handler is called after patching.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/request.c     |  2 +-
 sheep/trace/trace.c | 24 ++++++++++++++++++------
 sheep/work.c        |  4 ++--
 sheep/work.h        |  2 +-
 4 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/sheep/request.c b/sheep/request.c
index 7c2dec8..bcf705a 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -446,7 +446,7 @@ int exec_local_req(struct sd_req *rq, void *data)
 
 	ret = eventfd_read(req->wait_efd, &value);
 	if (ret < 0)
-		eprintf("event fd read error %m");
+		eprintf("%m\n");
 
 	close(req->wait_efd);
 	ret = req->rp.result;
diff --git a/sheep/trace/trace.c b/sheep/trace/trace.c
index 79868b7..4b59b09 100644
--- a/sheep/trace/trace.c
+++ b/sheep/trace/trace.c
@@ -39,7 +39,9 @@ static int trace_efd;
 static int nr_short_thread;
 static int trace_in_patch;
 
-pthread_mutex_t suspend_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_mutex_t suspend_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t suspend_cond = PTHREAD_COND_INITIALIZER;
+static int suspend_count;
 
 static struct strbuf *buffer;
 static int nr_cpu;
@@ -54,11 +56,10 @@ union instruction {
 
 static notrace void suspend(int num)
 {
-	dprintf("going to suspend\n");
 	pthread_mutex_lock(&suspend_lock);
-	/* Now I am suspended and sleep on suspend_lock */
+	suspend_count--;
+	pthread_cond_wait(&suspend_cond, &suspend_lock);
 	pthread_mutex_unlock(&suspend_lock);
-	dprintf("going to resume\n");
 }
 
 static inline int trace_hash(unsigned long ip)
@@ -180,18 +181,29 @@ notrace int register_trace_function(trace_func_t func)
 static notrace void suspend_worker_threads(void)
 {
 	struct worker_info *wi;
+	suspend_count = total_ordered_workers;
 
 	/* Hold the lock, then all other worker can sleep on it */
-	pthread_mutex_lock(&suspend_lock);
 	list_for_each_entry(wi, &worker_info_list, worker_info_siblings) {
-		if (wi->worker_thread &&
+		if (wi->ordered &&
 		    pthread_kill(wi->worker_thread, SIGUSR2) != 0)
 			dprintf("%m\n");
 	}
+
+wait_for_worker_suspend:
+	pthread_mutex_lock(&suspend_lock);
+	if (suspend_count > 0) {
+		pthread_mutex_unlock(&suspend_lock);
+		pthread_yield();
+		goto wait_for_worker_suspend;
+	}
+	pthread_mutex_unlock(&suspend_lock);
 }
 
 static notrace void resume_worker_threads(void)
 {
+	pthread_mutex_lock(&suspend_lock);
+	pthread_cond_broadcast(&suspend_cond);
 	pthread_mutex_unlock(&suspend_lock);
 }
 
diff --git a/sheep/work.c b/sheep/work.c
index 80096e3..d96026d 100644
--- a/sheep/work.c
+++ b/sheep/work.c
@@ -35,7 +35,7 @@
 #include "trace/trace.h"
 
 static int efd;
-int total_nr_workers;
+int total_ordered_workers;
 LIST_HEAD(worker_info_list);
 
 enum wq_state {
@@ -246,11 +246,11 @@ struct work_queue *init_work_queue(const char *name, bool ordered)
 		}
 
 		pthread_mutex_unlock(&wi->startup_lock);
+		total_ordered_workers++;
 	}
 
 	list_add(&wi->worker_info_siblings, &worker_info_list);
 
-	total_nr_workers++;
 	return &wi->q;
 destroy_threads:
 
diff --git a/sheep/work.h b/sheep/work.h
index bcb8383..4d45dd6 100644
--- a/sheep/work.h
+++ b/sheep/work.h
@@ -42,7 +42,7 @@ struct worker_info {
 };
 
 extern struct list_head worker_info_list;
-extern int total_nr_workers;
+extern int total_ordered_workers;
 
 /* if 'ordered' is true, the work queue are processes in order. */
 struct work_queue *init_work_queue(const char *name, bool ordered);
-- 
1.7.12.84.gefa6462




More information about the sheepdog mailing list