[sheepdog] [PATCH experimental 2/2] tests: add a DynamoRIO client for testing the jounaling mechanism

Hitoshi Mitake mitake.hitoshi at lab.ntt.co.jp
Mon Apr 15 07:07:44 CEST 2013


This patch adds a DynamoRIO (often called DR) client for testing the
jounaling mechanism. Because of its nature, the recoverying path is
the most important and hard to test part of the journaling
mechanism. They need to be tested well.

But testing targetted recovery paths with traditional tests/ stuff is
hard because:
1. killing sheeps with kill commands doesn't take into account the
   internal state
2. inserting exit()s into sheep manually is a painful work

So this patch implements a fault injection mechanism with DR. DR
provides rich functionalities of transparent dynamic
instrumentation. One of the functionalities makes inserting function
calls before and after system calls possible. With this mechanism, the
fault injection mechanism lets sheep exit at suitable timings for
testing recovery paths of the journaling.

How to use:
0. preparation
   $ cd
   $ svn checkout http://dynamorio.googlecode.com/svn/trunk/ dynamorio
   $ cd dynamorio
   $ mkdir build
   $ cd build
   $ cmake ..
   $ make

(This patch assumes the source code of DR is store in $HOME/dynamorio,
and the build is done in $HOME/dynamorio/build)

1. build the DR client
   $ cd tests/dr_clients/journal_fi/
   $ cmake .
   $ make

2. run tests with preset scenarios
   $ ./01.sh 	  # for testing recovery of object store
   $ ./02.sh 	  # for testing recovery of epoch
   $ ./03.sh 	  # for testing recovery of config

The fault injection implemented with this patch is so slack and not
capable for exhaustive testing. It is clear that we need unified,
programmable fault injection mechanism for testing distributed
systems. This is only a supoprted care. But I believe it is useful.

With this patch, I tested these recovery paths and checked they work
well.

Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
 tests/dr_clients/.gitignore                |    6 +
 tests/dr_clients/journal_fi/01.sh          |   21 ++
 tests/dr_clients/journal_fi/02.sh          |   20 ++
 tests/dr_clients/journal_fi/03.sh          |   20 ++
 tests/dr_clients/journal_fi/CMakeLists.txt |   10 +
 tests/dr_clients/journal_fi/journal_fi.c   |  370 ++++++++++++++++++++++++++++
 6 files changed, 447 insertions(+), 0 deletions(-)
 create mode 100644 tests/dr_clients/.gitignore
 create mode 100755 tests/dr_clients/journal_fi/01.sh
 create mode 100755 tests/dr_clients/journal_fi/02.sh
 create mode 100755 tests/dr_clients/journal_fi/03.sh
 create mode 100644 tests/dr_clients/journal_fi/CMakeLists.txt
 create mode 100644 tests/dr_clients/journal_fi/journal_fi.c

diff --git a/tests/dr_clients/.gitignore b/tests/dr_clients/.gitignore
new file mode 100644
index 0000000..09c1215
--- /dev/null
+++ b/tests/dr_clients/.gitignore
@@ -0,0 +1,6 @@
+*/CMakeCache.txt
+*/CMakeFiles
+*/Makefile
+*/cmake_install.cmake
+*/ldscript
+*/*.so
diff --git a/tests/dr_clients/journal_fi/01.sh b/tests/dr_clients/journal_fi/01.sh
new file mode 100755
index 0000000..ef2274d
--- /dev/null
+++ b/tests/dr_clients/journal_fi/01.sh
@@ -0,0 +1,21 @@
+#! /bin/bash
+
+# fault injection for testing journaling with object store
+
+sudo killall -KILL sheep
+sudo rm -rf /tmp/sheepdog/*
+
+sudo mkdir /tmp/sheepdog/0
+sudo mkdir /tmp/sheepdog/0/journal
+
+sudo ~/dynamorio/build/bin64/drrun -c libjournal_fi.so 1 --\
+ sheep -d -c local -p 7000 -j dir=/tmp/sheepdog/0/journal,size=64\
+ /tmp/sheepdog/0
+
+sleep 1
+
+collie cluster format -c 1
+collie vdi create test 100M
+
+sudo sheep -d -c local -p 7000 -j dir=/tmp/sheepdog/0/journal,size=64\
+ /tmp/sheepdog/0
diff --git a/tests/dr_clients/journal_fi/02.sh b/tests/dr_clients/journal_fi/02.sh
new file mode 100755
index 0000000..d9152ef
--- /dev/null
+++ b/tests/dr_clients/journal_fi/02.sh
@@ -0,0 +1,20 @@
+#! /bin/bash
+
+# fault injection for testing journaling for epoch
+
+sudo killall -KILL sheep
+sudo rm -rf /tmp/sheepdog/*
+
+sudo mkdir /tmp/sheepdog/0
+sudo mkdir /tmp/sheepdog/0/journal
+
+sudo ~/dynamorio/build/bin64/drrun -c libjournal_fi.so 2 --\
+ sheep -d -c local -p 7000 -j dir=/tmp/sheepdog/0/journal,size=64\
+ /tmp/sheepdog/0
+
+sleep 1
+
+collie cluster format -c 1	# this operation causes fault injection
+
+sudo sheep -d -c local -p 7000 -j dir=/tmp/sheepdog/0/journal,size=64\
+ /tmp/sheepdog/0
diff --git a/tests/dr_clients/journal_fi/03.sh b/tests/dr_clients/journal_fi/03.sh
new file mode 100755
index 0000000..6da668f
--- /dev/null
+++ b/tests/dr_clients/journal_fi/03.sh
@@ -0,0 +1,20 @@
+#! /bin/bash
+
+# fault injection for testing journaling for config
+
+sudo killall -KILL sheep
+sudo rm -rf /tmp/sheepdog/*
+
+sudo mkdir /tmp/sheepdog/0
+sudo mkdir /tmp/sheepdog/0/journal
+
+sudo ~/dynamorio/build/bin64/drrun -c libjournal_fi.so 3 --\
+ sheep -d -c local -p 7000 -j dir=/tmp/sheepdog/0/journal,size=64\
+ /tmp/sheepdog/0
+
+# launching sheep causes fault injection
+
+sleep 1
+
+sudo sheep -d -c local -p 7000 -j dir=/tmp/sheepdog/0/journal,size=64\
+ /tmp/sheepdog/0
diff --git a/tests/dr_clients/journal_fi/CMakeLists.txt b/tests/dr_clients/journal_fi/CMakeLists.txt
new file mode 100644
index 0000000..1000cac
--- /dev/null
+++ b/tests/dr_clients/journal_fi/CMakeLists.txt
@@ -0,0 +1,10 @@
+cmake_minimum_required(VERSION 2.8)
+
+SET(DynamoRIO_DIR "~/dynamorio/exports/cmake")
+find_package(DynamoRIO)
+
+add_library(journal_fi SHARED journal_fi.c)
+configure_DynamoRIO_client(journal_fi)
+
+use_DynamoRIO_extension(journal_fi drwrap)
+use_DynamoRIO_extension(journal_fi drcontainers)
diff --git a/tests/dr_clients/journal_fi/journal_fi.c b/tests/dr_clients/journal_fi/journal_fi.c
new file mode 100644
index 0000000..5c223a9
--- /dev/null
+++ b/tests/dr_clients/journal_fi/journal_fi.c
@@ -0,0 +1,370 @@
+/*
+ * DynamoRIO client for fault injection which can be used for testing
+ * journaling mechanism
+ */
+
+#include "dr_api.h"
+#include "drwrap.h"
+#include "drmgr.h"
+#include "hashtable.h"
+#include "dr_tools.h"
+
+#include <string.h>
+#include <syscall.h>
+
+#include <stdint.h>
+
+struct journal_descriptor {
+	uint32_t magic;
+	uint16_t flag;
+	uint16_t reserved;
+	union {
+		uint32_t epoch;
+		uint64_t oid;
+	};
+	uint64_t offset;
+	uint64_t size;
+	uint8_t create;
+	uint8_t pad[475];
+} __packed;
+
+#define JOURNAL_DESC_MAGIC 0xfee1900d
+
+#define JF_STORE 0
+#define JF_EPOCH 1
+#define JF_CONFIG 2
+
+enum scenario_id {
+	SID_UNDEF = -1,
+
+	SID_DO_NOTHING = 0,
+	SID_DEATH_AFTER_STORE,
+	SID_DEATH_AFTER_EPOCH,
+	SID_DEATH_AFTER_CONFIG,
+};
+
+enum scenario_id sid = SID_UNDEF;
+
+static int tls_idx;
+static file_t log_file = INVALID_FILE;
+
+static int jfile_fds[2];
+
+#define fi_printf(fmt, args...) do {					\
+		if (log_file == INVALID_FILE)				\
+			dr_printf("%s(%d), " fmt,			\
+				__func__, __LINE__, ## args);		\
+		else							\
+			dr_fprintf(log_file, "%s(%d), " fmt,		\
+				__func__, __LINE__, ## args);		\
+	} while (0)
+
+#define die(fmt, args...) do {						\
+		fi_printf("FATAL %s(%d), " fmt,				\
+			__func__, __LINE__, ## args);			\
+	} while (0)
+
+static void *xmalloc(size_t size)
+{
+	void *ret;
+
+	ret = __wrap_malloc(size);
+	if (!ret)
+		die("allocating memory with __wrap_malloc() failed\n");
+
+	return ret;
+}
+
+static void *xzalloc(size_t size)
+{
+	void *ret;
+
+	ret = xmalloc(size);
+	memset(ret, 0, size);
+
+	return ret;
+}
+
+static void *xcalloc(size_t size, size_t nmnb)
+{
+	void *ret;
+	size_t length = size * nmnb;
+
+	ret = __wrap_malloc(length);
+	if (!ret)
+		die("allocating memory with __wrap_malloc() failed\n");
+	memset(ret, 0, length);
+
+	return ret;
+}
+
+static void xfree(void *ptr)
+{
+	__wrap_free(ptr);
+}
+
+enum thread_state {
+	THREAD_STATE_DEFAULT,
+
+	THREAD_STATE_OPENING_JFILE_0,
+	THREAD_STATE_OPENING_JFILE_1,
+
+	THREAD_STATE_WRITING_JFILE,
+};
+
+enum pwrite_state {
+	PWRITE_WRITING_STORE,
+	PWRITE_WRITING_EPOCH,
+	PWRITE_WRITING_CONFIG,
+};
+
+struct per_thread_journal_state {
+	enum thread_state state;
+	int using_fd;
+
+	enum pwrite_state pwrite_state;
+};
+
+static void thread_init_event(void *drcontext)
+{
+	struct per_thread_journal_state *new_jstate;
+
+	new_jstate = xzalloc(sizeof(*new_jstate));
+
+	drmgr_set_tls_field(drcontext, tls_idx, new_jstate);
+}
+
+static void thread_exit_event(void *drcontext)
+{
+	struct per_thread_journal_state *jstate;
+
+	jstate = (struct per_thread_journal_state *)
+		drmgr_get_tls_field(drcontext, tls_idx);
+	xfree(jstate);
+}
+
+static void pre_open(void *drcontext)
+{
+	const char *path;
+	struct per_thread_journal_state *jstate;
+
+	jstate = (struct per_thread_journal_state *)
+		drmgr_get_tls_field(drcontext, tls_idx);
+
+	path = (const char *)dr_syscall_get_param(drcontext, 0);
+
+	if (strstr(path, "journal_file0")) {
+		fi_printf("journal_file0 is opened\n");
+		DR_ASSERT(jstate->state == THREAD_STATE_DEFAULT);
+		jstate->state = THREAD_STATE_OPENING_JFILE_0;
+	} else if (strstr(path, "journal_file1")) {
+		fi_printf("journal_file1 is opened\n");
+		DR_ASSERT(jstate->state == THREAD_STATE_DEFAULT);
+		jstate->state = THREAD_STATE_OPENING_JFILE_1;
+	}
+}
+
+static void pre_close(void *drcontext)
+{
+}
+
+static void pre_read(void *drcontext)
+{
+}
+
+static void pre_write(void *drcontext)
+{
+}
+
+static void pre_pwrite(void *drcontext)
+{
+	int fd;
+	struct per_thread_journal_state *jstate;
+	struct journal_descriptor *jd;
+
+	fd = (int)dr_syscall_get_param(drcontext, 0);
+	if (fd != jfile_fds[0] && fd != jfile_fds[1])
+		return;
+
+	jstate = (struct per_thread_journal_state *)
+		drmgr_get_tls_field(drcontext, tls_idx);
+
+	fi_printf("writing journal\n");
+	jstate->using_fd = fd;
+	jstate->state = THREAD_STATE_WRITING_JFILE;
+
+	jd = (struct journal_descriptor *)dr_syscall_get_param(drcontext, 1);
+	DR_ASSERT(jd->magic == JOURNAL_DESC_MAGIC);
+	if (jd->flag == JF_STORE)
+		jstate->pwrite_state = PWRITE_WRITING_STORE;
+	else if (jd->flag == JF_EPOCH)
+		jstate->pwrite_state = PWRITE_WRITING_EPOCH;
+	else if (jd->flag == JF_CONFIG)
+		jstate->pwrite_state = PWRITE_WRITING_CONFIG;
+	else
+		die("unknown journal flag: %d\n", jd->flag);
+}
+
+static bool pre_syscall(void *drcontext, int sysnum)
+{
+	switch (sysnum) {
+	case SYS_open:
+		pre_open(drcontext);
+		break;
+	case SYS_close:
+		pre_close(drcontext);
+		break;
+	case SYS_read:
+		pre_read(drcontext);
+		break;
+	case SYS_write:
+		pre_write(drcontext);
+		break;
+	case SYS_pwrite64:
+		pre_pwrite(drcontext);
+		break;
+	default:
+		break;
+	}
+
+	return true;
+}
+
+static void post_open(void *drcontext)
+{
+	int fd;
+	struct per_thread_journal_state *jstate;
+
+	jstate = (struct per_thread_journal_state *)
+		drmgr_get_tls_field(drcontext, tls_idx);
+
+	if (jstate->state == THREAD_STATE_DEFAULT)
+		return;
+
+	fd = (int)dr_syscall_get_result(drcontext);
+
+	if (jstate->state == THREAD_STATE_OPENING_JFILE_0) {
+		fi_printf("fd of jfile0: %d\n", fd);
+		jfile_fds[0] = fd;
+	} else if (jstate->state == THREAD_STATE_OPENING_JFILE_1) {
+		fi_printf("fd of jfile1: %d\n", fd);
+		jfile_fds[1] = fd;
+	}
+
+	jstate->state = THREAD_STATE_DEFAULT;
+}
+
+static void post_close(void *drcontext)
+{
+}
+
+static void post_read(void *drcontext)
+{
+}
+
+static void post_write(void *drcontext)
+{
+}
+
+static void post_pwrite(void *drcontext)
+{
+	int fd;
+	struct per_thread_journal_state *jstate;
+
+	jstate = (struct per_thread_journal_state *)
+		drmgr_get_tls_field(drcontext, tls_idx);
+
+	if (jstate->state != THREAD_STATE_WRITING_JFILE)
+		return;
+
+	fd = jstate->using_fd;
+	DR_ASSERT(fd == jfile_fds[0] || fd == jfile_fds[1]);
+
+	switch (sid) {
+	case SID_DEATH_AFTER_STORE:
+		if (jstate->pwrite_state != PWRITE_WRITING_STORE)
+			return;
+
+		fi_printf("scenario is death after writing normal store,"
+			" exiting\n");
+		exit(1);
+		break;
+	case SID_DEATH_AFTER_EPOCH:
+		if (jstate->pwrite_state != PWRITE_WRITING_EPOCH)
+			return;
+
+		fi_printf("scenario is death after writing epoch,"
+			" exiting\n");
+		exit(1);
+		break;
+	case SID_DEATH_AFTER_CONFIG:
+		if (jstate->pwrite_state != PWRITE_WRITING_CONFIG)
+			return;
+
+		fi_printf("scenario is death after writing config,"
+			" exiting\n");
+		exit(1);
+		break;
+	default:
+		die("invalid SID: %d\n", sid);
+		break;
+	}
+}
+
+static void post_syscall(void *drcontext, int sysnum)
+{
+	switch (sysnum) {
+	case SYS_open:
+		post_open(drcontext);
+		break;
+	case SYS_close:
+		post_close(drcontext);
+		break;
+	case SYS_read:
+		post_read(drcontext);
+		break;
+	case SYS_write:
+		post_write(drcontext);
+		break;
+	case SYS_pwrite64:
+		post_pwrite(drcontext);
+		break;
+	}
+}
+
+static bool pre_syscall_filter(void *drcontext, int sysnum)
+{
+	return true;
+}
+
+static bool post_syscall_filter(void *drcontext, int sysnum)
+{
+	return true;
+}
+
+DR_EXPORT void dr_init(client_id_t id)
+{
+	const char *option;
+
+	option = dr_get_options(id);
+	fi_printf("the passed option to this client: %s\n", option);
+	sid = atoi(option);
+	fi_printf("sid: %d\n", sid);
+
+	log_file = dr_open_file("journal_fi.log", DR_FILE_WRITE_APPEND);
+	if (log_file == INVALID_FILE)
+		die("opening journal_fi.log failed\n");
+
+	dr_register_filter_syscall_event(pre_syscall_filter);
+	drmgr_init();
+
+	tls_idx = drmgr_register_tls_field();
+	drmgr_register_pre_syscall_event(pre_syscall);
+	drmgr_register_post_syscall_event(post_syscall);
+
+	drmgr_register_thread_init_event(thread_init_event);
+	drmgr_register_thread_exit_event(thread_exit_event);
+
+	jfile_fds[0] = -1;
+	jfile_fds[1] = -1;
+}
-- 
1.7.2.5




More information about the sheepdog mailing list