[Sheepdog] [PATCH v2 14/15] sheepfs: teach volume to read/write COW objects

Liu Yuan namei.unix at gmail.com
Mon May 14 11:47:39 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

Now we can opreate on the volume with cloned vdi, for e.g, we can boot up vdi
named of 'clone' by below command:

$ qemu-system-x86_64 --enable-kvm -m 1024 -drive \
  file=sheepfs_dir/volume/test1,cache=writeback

tailai.ly at taobao:~/sheepdog$ cat sheepfs_dir/vdi/list
  Name        Id    Size    Used  Shared    Creation time   VDI id  Tag
c clone        1   20 MB  0.0 MB   20 MB 2012-05-14 12:01   72a1e2
s test1        1   20 MB   20 MB  0.0 MB 2012-05-14 11:57   fd32fc  snap
  test1        2   20 MB  0.0 MB   20 MB 2012-05-14 11:58   fd32fd

- add an option to disable 'object cache' for volumes

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheepfs/core.c    |   57 ++++++++++++++++++++++++++++------------------------
 sheepfs/sheepfs.h |    1 +
 sheepfs/volume.c  |   23 ++++++++++++++++++--
 3 files changed, 52 insertions(+), 29 deletions(-)

diff --git a/sheepfs/core.c b/sheepfs/core.c
index 9a6d1a0..819256d 100644
--- a/sheepfs/core.c
+++ b/sheepfs/core.c
@@ -34,6 +34,7 @@ char sheepfs_shadow[PATH_MAX];
 static int sheepfs_debug;
 static int sheepfs_fg;
 int sheepfs_page_cache = 0;
+int sheepfs_object_cache = 1;
 const char *sdhost = "localhost";
 int sdport = SD_LISTEN_PORT;
 
@@ -43,11 +44,12 @@ static struct option const long_options[] = {
 	{"help", no_argument, NULL, 'h'},
 	{"foreground", no_argument, NULL, 'f'},
 	{"pagecache", no_argument, NULL, 'k'},
+	{"no-object-cache", no_argument, NULL, 'n'},
 	{"port", required_argument, NULL, 'p'},
 	{NULL, 0, NULL, 0},
 };
 
-static const char *short_options = "a:dfhkp:";
+static const char *short_options = "a:dfhknp:";
 
 static struct sheepfs_file_operation {
 	int (*read)(const char *path, char *buf, size_t size, off_t);
@@ -264,6 +266,7 @@ Options:\n\
   -d, --debug             enable debug output (implies -f)\n\
   -f, --foreground        sheepfs run in the foreground\n\
   -k, --pagecache         use local kernel's page cache to access volume\n\
+  -n  --no-object-cache   disable object cache of the attached volumes\n\
   -p  --port              specify the sheep port (default: 7000)\n\
   -h, --help              display this help and exit\n\
 ");
@@ -276,35 +279,37 @@ int main(int argc, char **argv)
 	int ch, longindex;
 	char *dir = NULL, *cwd;
 
-
 	while ((ch = getopt_long(argc, argv, short_options, long_options,
 				 &longindex)) >= 0) {
 		switch (ch) {
-			case 'a':
-				sdhost = optarg;
-				break;
-			case 'd':
-				sheepfs_debug = 1;
-				break;
-			case 'h':
-				usage(0);
-				break;
-			case 'f':
-				sheepfs_fg = 1;
-				break;
-			case 'k':
-				sheepfs_page_cache = 1;
-				break;
-			case 'p':
-				sdport = strtol(optarg, NULL, 10);
-				if (sdport < 1 || sdport > UINT16_MAX) {
-					fprintf(stderr,
+		case 'a':
+			sdhost = optarg;
+			break;
+		case 'd':
+			sheepfs_debug = 1;
+			break;
+		case 'h':
+			usage(0);
+			break;
+		case 'f':
+			sheepfs_fg = 1;
+			break;
+		case 'k':
+			sheepfs_page_cache = 1;
+			break;
+		case 'n':
+			sheepfs_object_cache = 0;
+			break;
+		case 'p':
+			sdport = strtol(optarg, NULL, 10);
+			if (sdport < 1 || sdport > UINT16_MAX) {
+				fprintf(stderr,
 					"Invalid port number '%s'\n", optarg);
-					exit(1);
-				}
-				break;
-			default:
-				usage(1);
+				exit(1);
+			}
+			break;
+		default:
+			usage(1);
 		}
 	}
 
diff --git a/sheepfs/sheepfs.h b/sheepfs/sheepfs.h
index c17ec55..e1ed8ae 100644
--- a/sheepfs/sheepfs.h
+++ b/sheepfs/sheepfs.h
@@ -18,6 +18,7 @@ enum sheepfs_opcode {
 
 extern char sheepfs_shadow[];
 extern int sheepfs_page_cache;
+extern int sheepfs_object_cache;
 extern const char *sdhost;
 extern int sdport;
 
diff --git a/sheepfs/volume.c b/sheepfs/volume.c
index f162a61..549d169 100644
--- a/sheepfs/volume.c
+++ b/sheepfs/volume.c
@@ -153,6 +153,7 @@ static int volume_rw_object(char *buf, uint64_t oid, size_t size,
 	uint32_t vid = oid_to_vid(oid);
 	struct vdi_inode *vdi = vdi_inode_tree_search(vid);
 	unsigned long idx = 0;
+	uint64_t cow_oid = 0;
 
 	if (is_data_obj(oid)) {
 		idx = data_oid_to_idx(oid);
@@ -164,6 +165,19 @@ static int volume_rw_object(char *buf, uint64_t oid, size_t size,
 				goto done;
 			}
 			create = 1;
+		} else {
+			if (rw == VOLUME_READ) {
+				oid = vid_to_data_oid(
+					vdi->inode->data_vdi_id[idx],
+					idx);
+			/* in case we are writing a COW object */
+			} else if (!is_data_obj_writeable(vdi->inode, idx)) {
+				cow_oid = vid_to_data_oid(
+						vdi->inode->data_vdi_id[idx],
+						idx);
+				hdr.flags |= SD_FLAG_CMD_COW;
+				create = 1;
+			}
 		}
 	}
 
@@ -178,9 +192,11 @@ static int volume_rw_object(char *buf, uint64_t oid, size_t size,
 	}
 
 	hdr.oid = oid;
+	hdr.cow_oid = cow_oid;
 	hdr.data_length = size;
 	hdr.offset = off;
-	hdr.flags |= SD_FLAG_CMD_CACHE;
+	if (sheepfs_object_cache)
+		hdr.flags |= SD_FLAG_CMD_CACHE;
 
 	fd = get_socket_fd(vdi, &sock_idx);
 	ret = exec_req(fd, (struct sd_req *)&hdr, buf, &wlen, &rlen);
@@ -306,7 +322,7 @@ int volume_sync(const char *path)
 	if (shadow_file_getxattr(path, SH_VID_NAME, &vid, SH_VID_SIZE) < 0)
 		return -EIO;
 
-	if (volume_do_sync(vid) < 0)
+	if (sheepfs_object_cache && volume_do_sync(vid) < 0)
 		return -EIO;
 
 	return 0;
@@ -481,7 +497,8 @@ int volume_remove_entry(const char *entry)
 	/* No need to check error code, for case of connected sheep crashed,
 	 * we continue to do cleanup.
 	 */
-	volume_sync_and_delete(vid);
+	if (sheepfs_object_cache)
+		volume_sync_and_delete(vid);
 
 	vdi = vdi_inode_tree_search(vid);
 	destroy_socket_pool(vdi->socket_pool, SOCKET_POOL_SIZE);
-- 
1.7.8.2




More information about the sheepdog mailing list