[sheepdog] [PATCH v2] call a default signal handler in crash_handler()

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Mon Apr 15 02:31:34 CEST 2013


This re-raises the signal in the crash handler for the default signal
hanldlers to dump a core file.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 collie/collie.c | 10 +++++++++-
 include/util.h  |  1 +
 lib/logger.c    | 14 ++++++++++++--
 lib/util.c      | 28 +++++++++++++++++++++++++++-
 sheep/sheep.c   |  2 ++
 5 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/collie/collie.c b/collie/collie.c
index 2394350..045d175 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -320,7 +320,15 @@ static void crash_handler(int signo)
 
 	sd_backtrace();
 
-	exit(EXIT_SYSFAIL);
+	/*
+	 * OOM raises SIGABRT in xmalloc but the administrator expects
+	 * that collie exits with EXIT_SYSFAIL.  We have to give up
+	 * dumping a core file in this case.
+	 */
+	if (signo == SIGABRT)
+		exit(EXIT_SYSFAIL);
+
+	reraise_crash_signal(signo, EXIT_SYSFAIL);
 }
 
 int main(int argc, char **argv)
diff --git a/include/util.h b/include/util.h
index 8ba6710..38efb8b 100644
--- a/include/util.h
+++ b/include/util.h
@@ -89,6 +89,7 @@ int purge_directory(char *dir_path);
 bool is_numeric(const char *p);
 int install_sighandler(int signum, void (*handler)(int), bool once);
 int install_crash_handler(void (*handler)(int));
+void reraise_crash_signal(int signo, int status);
 pid_t gettid(void);
 bool is_xattr_enabled(const char *path);
 
diff --git a/lib/logger.c b/lib/logger.c
index a31cb1f..d359b4d 100644
--- a/lib/logger.c
+++ b/lib/logger.c
@@ -475,9 +475,14 @@ static notrace void log_flush(void)
 	}
 }
 
+static bool is_sheep_dead(int signo)
+{
+	return signo == SIGHUP;
+}
+
 static notrace void crash_handler(int signo)
 {
-	if (signo == SIGHUP)
+	if (is_sheep_dead(signo))
 		sd_printf(SDOG_ERR, "sheep pid %d exited unexpectedly.",
 			  sheep_pid);
 	else {
@@ -489,7 +494,12 @@ static notrace void crash_handler(int signo)
 	log_flush();
 	closelog();
 	free_logarea();
-	exit(1);
+
+	/* If the signal isn't caused by the logger crash, we simply exit. */
+	if (is_sheep_dead(signo))
+		exit(1);
+
+	reraise_crash_signal(signo, 1);
 }
 
 static notrace void logger(char *log_dir, char *outfile)
diff --git a/lib/util.c b/lib/util.c
index 39779a5..aac0fa9 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -374,13 +374,17 @@ bool is_numeric(const char *s)
 	return false;
 }
 
+/*
+ * If 'once' is true, the signal will be restored to the default state
+ * after 'handler' is called.
+ */
 int install_sighandler(int signum, void (*handler)(int), bool once)
 {
 	struct sigaction sa = {};
 
 	sa.sa_handler = handler;
 	if (once)
-		sa.sa_flags = SA_RESETHAND;
+		sa.sa_flags = SA_RESETHAND | SA_NODEFER;
 	sigemptyset(&sa.sa_mask);
 
 	return sigaction(signum, &sa, NULL);
@@ -395,6 +399,28 @@ int install_crash_handler(void (*handler)(int))
 		install_sighandler(SIGFPE, handler, true);
 }
 
+/*
+ * Re-raise the signal 'signo' for the default signal handler to dump
+ * a core file, and exit with 'status' if the default handler cannot
+ * terminate the process.  This function is expected to be called in
+ * the installed signal handlers with install_crash_handler().
+ */
+void reraise_crash_signal(int signo, int status)
+{
+	int ret = raise(signo);
+
+	/* We won't get here normally. */
+	if (ret != 0)
+		sd_printf(SDOG_EMERG, "failed to re-raise signal %d (%s).",
+			  signo, strsignal(signo));
+	else
+		sd_printf(SDOG_EMERG, "default handler for the re-raised "
+			  "signal %d (%s) didn't work expectedly", signo,
+			  strsignal(signo));
+
+	exit(status);
+}
+
 pid_t gettid(void)
 {
 	return syscall(SYS_gettid);
diff --git a/sheep/sheep.c b/sheep/sheep.c
index cda8493..b1e04a1 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -182,6 +182,8 @@ static void crash_handler(int signo)
 
 	sd_backtrace();
 	sd_dump_variable(__sys);
+
+	reraise_crash_signal(signo, 1);
 }
 
 static struct cluster_info __sys;
-- 
1.8.1.3.566.gaa39828




More information about the sheepdog mailing list