[Sheepdog] [PATCH 2/7] suppor direct IO

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Thu Apr 7 02:56:18 CEST 2011


O_SYNC on ext3/4 with barrier=1 and btrfs causes severe performance
problems.  This introduces -D option to sheep command line arguments,
and enables O_DIRECT for data objects.

TODO:
  Enables O_DIRECT for other kinds of objects (e.g. vdi objects)

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 include/sheepdog_proto.h |    1 +
 sheep/sdnet.c            |   12 +++++++++---
 sheep/sheep.c            |    8 +++++++-
 sheep/sheep_priv.h       |    2 ++
 sheep/store.c            |   29 +++++++++++++++++++++++------
 5 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 1614bb7..5c50aa7 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -81,6 +81,7 @@
 #define SD_MAX_VDI_ATTR_VALUE_LEN (UINT64_C(1) << 22)
 #define SD_NR_VDIS   (1U << 24)
 #define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22)
+#define SECTOR_SIZE (1U << 9)
 
 #define SD_INODE_SIZE (sizeof(struct sheepdog_inode))
 #define SD_INODE_HEADER_SIZE (sizeof(struct sheepdog_inode) - \
diff --git a/sheep/sdnet.c b/sheep/sdnet.c
index 3c8668a..f1ef27d 100644
--- a/sheep/sdnet.c
+++ b/sheep/sdnet.c
@@ -267,14 +267,19 @@ static struct request *alloc_request(struct client_info *ci, int data_length)
 {
 	struct request *req;
 
-	req = zalloc(sizeof(struct request) + data_length);
+	req = zalloc(sizeof(struct request));
 	if (!req)
 		return NULL;
 
 	req->ci = ci;
 	client_incref(ci);
-	if (data_length)
-		req->data = (char *)req + sizeof(*req);
+	if (data_length) {
+		req->data = valloc(data_length);
+		if (!req->data) {
+			free(req);
+			return NULL;
+		}
+	}
 
 	list_add(&req->r_siblings, &ci->reqs);
 	INIT_LIST_HEAD(&req->r_wlist);
@@ -287,6 +292,7 @@ static struct request *alloc_request(struct client_info *ci, int data_length)
 static void free_request(struct request *req)
 {
 	list_del(&req->r_siblings);
+	free(req->data);
 	free(req);
 
 	sys->nr_outstanding_reqs--;
diff --git a/sheep/sheep.c b/sheep/sheep.c
index 3ea6d25..5a40888 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -32,11 +32,12 @@ static struct option const long_options[] = {
 	{"foreground", no_argument, NULL, 'f'},
 	{"loglevel", required_argument, NULL, 'l'},
 	{"debug", no_argument, NULL, 'd'},
+	{"directio", no_argument, NULL, 'D'},
 	{"help", no_argument, NULL, 'h'},
 	{NULL, 0, NULL, 0},
 };
 
-static const char *short_options = "p:fl:dh";
+static const char *short_options = "p:fl:dDh";
 
 static void usage(int status)
 {
@@ -51,6 +52,7 @@ Sheepdog Daemon, version %s\n\
   -f, --foreground        make the program run in the foreground\n\
   -l, --loglevel          specify the message level printed by default\n\
   -d, --debug             print debug messages\n\
+  -D, --directio          use direct IO\n\
   -h, --help              display this help and exit\n\
 ", PACKAGE_VERSION);
 	}
@@ -85,6 +87,10 @@ int main(int argc, char **argv)
 			/* removed soon. use loglevel instead */
 			log_level = LOG_DEBUG;
 			break;
+		case 'D':
+			dprintf("direct IO mode\n");
+			sys->use_directio = 1;
+			break;
 		case 'h':
 			usage(0);
 			break;
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 8c2199d..13a5ce2 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -135,6 +135,8 @@ struct cluster_info {
 	int nr_outstanding_reqs;
 
 	uint32_t recovered_epoch;
+
+	int use_directio;
 };
 
 extern struct cluster_info *sys;
diff --git a/sheep/store.c b/sheep/store.c
index e5a04a0..9beb476 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -479,9 +479,14 @@ out:
 static int ob_open(uint32_t epoch, uint64_t oid, int aflags, int *ret)
 {
 	char path[1024];
-	int flags = O_SYNC | O_RDWR | aflags;
+	int flags;
 	int fd;
 
+	if (sys->use_directio && is_data_obj(oid))
+		flags = O_DIRECT | O_RDWR | aflags;
+	else
+		flags = O_SYNC | O_RDWR | aflags;
+
 	snprintf(path, sizeof(path), "%s%08u/%016" PRIx64, obj_path, epoch, oid);
 
 	fd = open(path, flags, def_fmode);
@@ -608,7 +613,7 @@ static int store_queue_request_local(struct request *req, uint32_t epoch)
 		if (hdr->flags & SD_FLAG_CMD_COW) {
 			dprintf("%" PRIu64 ", %" PRIx64 "\n", oid, hdr->cow_oid);
 
-			buf = zalloc(SD_DATA_OBJ_SIZE);
+			buf = valloc(SD_DATA_OBJ_SIZE);
 			if (!buf) {
 				eprintf("failed to allocate memory\n");
 				ret = SD_RES_NO_MEM;
@@ -632,10 +637,19 @@ static int store_queue_request_local(struct request *req, uint32_t epoch)
 			free(buf);
 			buf = NULL;
 		} else {
-			int zero = 0;
+			int size = SECTOR_SIZE;
+			buf = valloc(size);
+			if (!buf) {
+				eprintf("failed to allocate memory\n");
+				ret = SD_RES_NO_MEM;
+				goto out;
+			}
+			memset(buf, 0, size);
+			ret = pwrite64(fd, buf, size, SD_DATA_OBJ_SIZE - size);
+			free(buf);
+			buf = NULL;
 
-			ret = pwrite64(fd, &zero, sizeof(zero), SD_DATA_OBJ_SIZE - sizeof(zero));
-			if (ret != sizeof(zero)) {
+			if (ret != size) {
 				if (errno == ENOSPC)
 					ret = SD_RES_NO_SPACE;
 				else
@@ -753,11 +767,12 @@ static int fix_object_consistency(struct request *req, int idx)
 	else
 		data_length = SD_DATA_OBJ_SIZE;
 
-	buf = zalloc(data_length);
+	buf = valloc(data_length);
 	if (buf == NULL) {
 		eprintf("out of memory\n");
 		goto out;
 	}
+	memset(buf, 0, data_length);
 
 	req->data = buf;
 	hdr->offset = 0;
@@ -1324,6 +1339,8 @@ static void recover_one(struct work *work, int idx)
 		buf = malloc(sizeof(struct sheepdog_inode));
 	else if (is_vdi_attr_obj(oid))
 		buf = malloc(SD_MAX_VDI_ATTR_VALUE_LEN);
+	else if (is_data_obj(oid))
+		buf = valloc(SD_DATA_OBJ_SIZE);
 	else
 		buf = malloc(SD_DATA_OBJ_SIZE);
 
-- 
1.5.6.5




More information about the sheepdog mailing list