[Sheepdog] [PATCH 3/4] store vdi as directories

FUJITA Tomonori fujita.tomonori at lab.ntt.co.jp
Mon Dec 21 09:04:16 CET 2009


Here's an exmaple:

./linux
./linux/0000000000080000-4b2f0541
./linux/0000000000040000-00000000
./linux2
./linux2/00000000000c0000-00000000

'linux' and 'linux2' are vdi names. 'linux' vdi has one snapshot.

'0000000000080000-4b2f0541' is that the oid of the vdi object and its
tag.

Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
 collie/net.c             |    2 +
 collie/store.c           |  151 ++++++++++++++++++++++++++++++++++++++-
 collie/vdi.c             |  180 ++++++++++++----------------------------------
 include/net.h            |    3 +-
 include/sheepdog_proto.h |    9 ++-
 lib/net.c                |   10 ++-
 6 files changed, 211 insertions(+), 144 deletions(-)

diff --git a/collie/net.c b/collie/net.c
index 0e95c08..be2d084 100644
--- a/collie/net.c
+++ b/collie/net.c
@@ -62,6 +62,8 @@ static void queue_request(struct request *req)
 		req->work.fn = cluster_queue_request;
 		break;
 	case SD_OP_SO:
+	case SD_OP_SO_NEW_VDI:
+	case SD_OP_SO_LOOKUP_VDI:
 		req->work.fn = so_queue_request;
 		break;
 	default:
diff --git a/collie/store.c b/collie/store.c
index 00dcf41..41a8dc5 100644
--- a/collie/store.c
+++ b/collie/store.c
@@ -13,6 +13,7 @@
 #include <fcntl.h>
 #include <mntent.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <unistd.h>
 #include <sys/xattr.h>
 #include <sys/statvfs.h>
@@ -22,6 +23,7 @@
 
 #define ANAME_LAST_OID "user.sheepdog.last_oid"
 #define ANAME_COPIES "user.sheepdog.copes"
+#define ANAME_CURRENT "user.sheepdog.current"
 
 static char *obj_dir;
 static char *mnt_dir;
@@ -321,13 +323,85 @@ out:
 		close(fd);
 }
 
+static int so_lookup_vdi(struct request *req)
+{
+	struct sd_so_req *hdr = (struct sd_so_req *)&req->rq;
+	struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req->rp;
+	DIR *dir;
+	struct dirent *dent;
+	char *p;
+	int fd, ret;
+	uint64_t coid, oid;
+	char path[1024];
+
+	memset(path, 0, sizeof(path));
+	snprintf(path, sizeof(path), "%s/vdi/", obj_dir);
+	strncpy(path + strlen(path), (char *)req->data,	hdr->data_length);
+
+	dprintf("%s, %x\n", path, hdr->tag);
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		eprintf("%m\n");
+		return SD_RES_EIO;
+	}
+
+	ret = fgetxattr(fd, ANAME_CURRENT, &coid,
+			sizeof(coid));
+	if (ret != sizeof(coid)) {
+		close(fd);
+		eprintf("%m\n");
+		return SD_RES_EIO;
+	}
+
+	dprintf("%lx, %x\n", coid, hdr->tag);
+
+	close(fd);
+
+	if (hdr->tag == 0xffffffff) {
+		close(fd);
+		rsp->oid = coid;
+		rsp->flags = SD_VDI_RSP_FLAG_CURRENT;
+		return SD_RES_SUCCESS;
+	}
+
+	dir = opendir(path);
+
+	while ((dent = readdir(dir))) {
+		if (!strcmp(dent->d_name, ".") ||
+		    !strcmp(dent->d_name, ".."))
+			continue;
+
+		p = strchr(dent->d_name, '-');
+		if (!p) {
+			eprintf("bug %s\n", dent->d_name);
+			continue;
+		}
+
+		if (strtoull(p + 1, NULL, 16) == hdr->tag) {
+			*p = '\0';
+			oid = strtoull(dent->d_name, NULL, 16);
+			rsp->oid = oid;
+			dprintf("%lx, %x\n", oid, hdr->tag);
+			if (oid == coid)
+				rsp->flags = SD_VDI_RSP_FLAG_CURRENT;
+
+			ret = SD_RES_SUCCESS;
+			break;
+		}
+	}
+	closedir(dir);
+
+	return SD_RES_SUCCESS;
+}
+
 void so_queue_request(struct work *work, int idx)
 {
 	struct request *req = container_of(work, struct request, work);
 	struct sd_so_req *hdr = (struct sd_so_req *)&req->rq;
 	struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req->rp;
 	struct cluster_info *cluster = req->ci->cluster;
-	int fd = -1, ret, result = SD_RES_SUCCESS;
+	int nfd, fd = -1, ret, result = SD_RES_SUCCESS;
 	uint32_t opcode = hdr->opcode;
 	uint64_t last_oid = 0;
 	char path[1024];
@@ -343,10 +417,10 @@ void so_queue_request(struct work *work, int idx)
 		goto out;
 
 	memset(path, 0, sizeof(path));
+	snprintf(path, sizeof(path), "%s/vdi", obj_dir);
 
 	switch (opcode) {
 	case SD_OP_SO:
-		snprintf(path, sizeof(path), "%s/vdi", obj_dir);
 		ret = mkdir(path, S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP |
 			    S_IWGRP | S_IXGRP);
 		if (ret && errno != EEXIST) {
@@ -372,6 +446,79 @@ void so_queue_request(struct work *work, int idx)
 				sizeof(hdr->copies), 0);
 		if (ret)
 			result = SD_RES_EIO;
+		break;
+	case SD_OP_SO_NEW_VDI:
+		fd = open(path, O_RDONLY);
+		if (fd < 0) {
+			result = SD_RES_EIO;
+			goto out;
+		}
+
+		ret = fgetxattr(fd, ANAME_LAST_OID, &last_oid,
+				sizeof(last_oid));
+		if (ret != sizeof(last_oid)) {
+			close(fd);
+			result = SD_RES_EIO;
+			goto out;
+		}
+
+		strncpy(path + strlen(path), "/", 1);
+		strncpy(path + strlen(path), (char *)req->data,	hdr->data_length);
+
+		if (hdr->tag)
+			;
+		else {
+			ret = mkdir(path, S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP |
+				    S_IWGRP | S_IXGRP);
+			if (ret) {
+				eprintf("%m\n");
+				result = SD_RES_EIO;
+				goto out;
+			}
+		}
+
+		nfd = open(path, O_RDONLY);
+		if (nfd < 0) {
+			eprintf("%m\n");
+			result = SD_RES_EIO;
+			goto out;
+		}
+
+		last_oid += MAX_DATA_OBJS;
+
+		snprintf(path+ strlen(path), sizeof(path) - strlen(path),
+			 "/%016lx-%08x", last_oid, hdr->tag);
+		ret = creat(path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
+		if (ret < 0) {
+			eprintf("%m\n");
+			result = SD_RES_EIO;
+			goto out;
+		}
+		close(ret);
+
+		ret = fsetxattr(fd, ANAME_LAST_OID, &last_oid,
+				sizeof(last_oid), 0);
+		if (ret) {
+			eprintf("%m\n");
+			close(fd);
+			result = SD_RES_EIO;
+			goto out;
+		}
+
+		close(fd);
+
+		ret = fsetxattr(nfd, ANAME_CURRENT, &last_oid,
+				sizeof(last_oid), 0);
+
+		close(nfd);
+
+		eprintf("%lx\n", last_oid);
+		rsp->oid = last_oid;
+		break;
+
+	case SD_OP_SO_LOOKUP_VDI:
+		ret = so_lookup_vdi(req);
+		break;
 	}
 
 out:
diff --git a/collie/vdi.c b/collie/vdi.c
index d774d71..cf8d9ac 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -17,15 +17,6 @@
 #include "meta.h"
 #include "collie.h"
 
-static int sheepdog_match(struct sheepdog_dir_entry *ent, char *name, int len)
-{
-	if (!ent->name_len)
-		return 0;
-	if (ent->name_len != len)
-		return 0;
-	return !memcmp(ent->name, name, len);
-}
-
 /* TODO: should be performed atomically */
 static int create_inode_obj(struct sheepdog_node_list_entry *entries,
 			    int nr_nodes, uint64_t epoch, int copies,
@@ -87,103 +78,49 @@ static int create_inode_obj(struct sheepdog_node_list_entry *entries,
 	return ret;
 }
 
-#define DIR_BUF_LEN (UINT64_C(1) << 20)
-
 /*
  * TODO: handle larger buffer
  */
-int add_vdi(struct cluster_info *cluster, char *name, int len, uint64_t size,
+int add_vdi(struct cluster_info *ci, char *name, int len, uint64_t size,
 	    uint64_t *added_oid, uint64_t base_oid, uint32_t tag)
 {
 	struct sheepdog_node_list_entry entries[SD_MAX_NODES];
 	int nr_nodes;
-	struct sheepdog_dir_entry *prv, *ent;
 	uint64_t oid = 0;
-	char *buf;
-	int ret, rest;
-	struct sheepdog_super_block *sb;
+	int ret;
 	int copies;
+	struct sd_so_req req;
 
-	nr_nodes = build_node_list(&cluster->node_list, entries);
+	memset(&req, 0, sizeof(req));
 
-	eprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size,
+	nr_nodes = build_node_list(&ci->node_list, entries);
+
+	dprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size,
 		base_oid);
 
-	buf = zalloc(DIR_BUF_LEN);
-	if (!buf)
-		return 1;
+	/* todo */
+/* 	copies = sb->default_nr_copies; */
+	copies = 3;
+	if (copies > nr_nodes)
+		copies = nr_nodes;
 
-	ret = read_object(entries, nr_nodes, cluster->epoch,
-			  SD_DIR_OID, buf, DIR_BUF_LEN, 0, nr_nodes);
-	if (ret < 0) {
-		ret = SD_RES_DIR_READ;
-		goto out;
-	}
+	req.opcode = SD_OP_SO_NEW_VDI;
+	req.copies = copies;
+	req.tag = tag;
 
-	sb = (struct sheepdog_super_block *)buf;
-	copies = sb->default_nr_copies;
-
-	ret = read_object(entries, nr_nodes, cluster->epoch,
-			  SD_DIR_OID, buf, DIR_BUF_LEN, sizeof(*sb), nr_nodes);
-	if (ret < 0) {
-		ret = SD_RES_DIR_READ;
-		goto out;
-	}
+	ret = exec_reqs(entries, nr_nodes, ci->epoch,
+			SD_DIR_OID, (struct sd_req *)&req, name, len, copies);
 
-	ent = (struct sheepdog_dir_entry *)buf;
-	rest = ret;
-	while (rest > 0) {
-		if (!ent->name_len)
-			break;
+	/* todo: error handling */
 
-		if (sheepdog_match(ent, name, len) && !tag) {
-			ret = SD_RES_VDI_EXIST;
-			goto out;
-		}
-		oid = ent->oid;
-		prv = ent;
-		ent = next_entry(prv);
-		rest -= ((char *)ent - (char *)prv);
-	}
+	oid = ((struct sd_so_rsp *)&req)->oid;
+	*added_oid = oid;
 
-	/* need to check if the buffer is large enough here. */
-	oid += (1 << 18);
+	dprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size,
+		oid);
 
-	ret = create_inode_obj(entries, nr_nodes, cluster->epoch, copies,
+	ret = create_inode_obj(entries, nr_nodes, ci->epoch, copies,
 			       oid, size, base_oid);
-	if (ret)
-		goto out;
-
-	ent->oid = oid;
-	ent->tag = tag;
-
-	ent->flags = FLAG_CURRENT;
-	ent->name_len = len;
-	memcpy(ent->name, name, len);
-
-	if (tag) {
-		struct sheepdog_dir_entry *e = (struct sheepdog_dir_entry *)buf;
-
-		while (e < ent) {
-			if (sheepdog_match(e, name, len))
-				e->flags &= ~FLAG_CURRENT;
-			e = next_entry(e);
-		}
-	}
-
-	ent = next_entry(ent);
-
-	ret = write_object(entries, nr_nodes, cluster->epoch,
-			   SD_DIR_OID, buf, (char *)ent - buf, sizeof(*sb),
-			   copies, 0);
-	if (ret) {
-		ret = SD_RES_DIR_WRITE;
-		goto out;
-	}
-
-	*added_oid = oid;
-out:
-	free(buf);
 
 	return ret;
 }
@@ -193,68 +130,41 @@ int del_vdi(struct cluster_info *cluster, char *name, int len)
 	return 0;
 }
 
-int lookup_vdi(struct cluster_info *cluster,
+int lookup_vdi(struct cluster_info *ci,
 	       char *filename, uint64_t * oid, uint32_t tag, int do_lock,
 	       int *current)
 {
 	struct sheepdog_node_list_entry entries[SD_MAX_NODES];
 	int nr_nodes;
-	int rest, ret;
-	char *buf;
-	struct sheepdog_dir_entry *prv, *ent;
+	int ret, copies;
+	struct sd_so_req req;
+	struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req;
 
-	nr_nodes = build_node_list(&cluster->node_list, entries);
+	memset(&req, 0, sizeof(req));
+
+	nr_nodes = build_node_list(&ci->node_list, entries);
 
 	*current = 0;
-	buf = zalloc(DIR_BUF_LEN);
-	if (!buf)
-		return 1;
-
-	ret = read_object(entries, nr_nodes, cluster->epoch,
-			  SD_DIR_OID, buf, DIR_BUF_LEN,
-			  sizeof(struct sheepdog_super_block), nr_nodes);
-	if (ret < 0) {
-		ret = SD_RES_DIR_READ;
-		goto out;
-	}
 
-	eprintf("looking for %s %zd, %d\n", filename, strlen(filename), ret);
+	dprintf("looking for %s %zd\n", filename, strlen(filename));
 
-	ent = (struct sheepdog_dir_entry *)buf;
-	rest = ret;
-	ret = SD_RES_NO_VDI;
-	while (rest > 0) {
-		if (!ent->name_len)
-			break;
+	/* todo */
+	copies = 3;
+	if (copies > nr_nodes)
+		copies = nr_nodes;
 
-		eprintf("%s %d %" PRIu64 "\n", ent->name, ent->name_len,
-			ent->oid);
+	req.opcode = SD_OP_SO_LOOKUP_VDI;
+	req.tag = tag;
 
-		if (sheepdog_match(ent, filename, strlen(filename))) {
-			if (ent->tag != tag && tag != -1) {
-				ret = SD_RES_NO_TAG;
-				goto next;
-			}
-			if (ent->tag != tag && !(ent->flags & FLAG_CURRENT)) {
-				/* current vdi must exsit */
-				ret = SD_RES_SYSTEM_ERROR;
-				goto next;
-			}
+	ret = exec_reqs(entries, nr_nodes, ci->epoch,
+			SD_DIR_OID, (struct sd_req *)&req, filename, strlen(filename), copies);
 
-			*oid = ent->oid;
-			ret = 0;
+	*oid = rsp->oid;
+	if (rsp->flags & SD_VDI_RSP_FLAG_CURRENT)
+		*current = 1;
+
+	dprintf("looking for %s %lx\n", filename, *oid);
 
-			if (ent->flags & FLAG_CURRENT)
-				*current = 1;
-			break;
-		}
-next:
-		prv = ent;
-		ent = next_entry(prv);
-		rest -= ((char *)ent - (char *)prv);
-	}
-out:
-	free(buf);
 	return ret;
 }
 
@@ -277,7 +187,7 @@ int make_super_object(struct cluster_info *ci, struct sd_vdi_req *hdr)
 	nr_nodes = build_node_list(&ci->node_list, entries);
 
 	ret = exec_reqs(entries, nr_nodes, ci->epoch,
-			SD_DIR_OID, (struct sd_req *)&req, req.copies);
+			SD_DIR_OID, (struct sd_req *)&req, NULL, 0, req.copies);
 
 	return ret;
 }
diff --git a/include/net.h b/include/net.h
index 68f18ac..7205f6a 100644
--- a/include/net.h
+++ b/include/net.h
@@ -45,7 +45,8 @@ int read_object(struct sheepdog_node_list_entry *e,
 		uint64_t offset, int nr);
 
 int exec_reqs(struct sheepdog_node_list_entry *e,
-	      int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr, int nr);
+	      int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr,
+	      char *wdata, unsigned int wdatalen, int nr);
 
 int create_listen_ports(int port, int (*callback)(int fd, void *), void *data);
 
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index d18e543..2f3bad4 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -45,6 +45,9 @@
 
 /* collie <-> collie */
 #define SD_OP_SO             0x60
+#define SD_OP_SO_NEW_VDI     0x61
+#define SD_OP_SO_DEL_VDI     0x62
+#define SD_OP_SO_LOOKUP_VDI  0x63
 
 #define SD_OP_STAT_SHEEP     0xB0
 
@@ -77,6 +80,7 @@
 #define SD_RES_VDI_NOT_LOCKED   0x17 /* Vdi is not locked */
 #define SD_RES_SHUTDOWN      0x18 /* Sheepdog is shutting down */
 
+#define SD_VDI_RSP_FLAG_CURRENT 0x01
 
 struct sd_req {
 	uint8_t		proto_ver;
@@ -109,7 +113,8 @@ struct sd_so_req {
 	uint64_t	oid;
 	uint64_t	ctime;
 	uint32_t	copies;
-	uint32_t	opcode_specific[3];
+	uint32_t	tag;
+	uint32_t	opcode_specific[2];
 };
 
 struct sd_so_rsp {
@@ -164,8 +169,6 @@ struct sd_vdi_req {
 	uint32_t        pad[2];
 };
 
-#define SD_VDI_RSP_FLAG_CURRENT 0x01;
-
 struct sd_vdi_rsp {
 	uint8_t		proto_ver;
 	uint8_t		opcode;
diff --git a/lib/net.c b/lib/net.c
index d8b45d4..4f05cb9 100644
--- a/lib/net.c
+++ b/lib/net.c
@@ -432,14 +432,15 @@ int read_object(struct sheepdog_node_list_entry *e,
 
 /* TODO: clean up with the above functions */
 int exec_reqs(struct sheepdog_node_list_entry *e,
-	      int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr, int nr)
+	      int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr,
+	      char *wdata, unsigned int wdatalen, int nr)
 {
 	char name[128];
 	int i = 0, n, fd, ret;
 	int success = 0;
 
 	for (i = 0; i < nr; i++) {
-		unsigned wlen = 0, rlen = 0;
+		unsigned wlen = wdatalen, rlen = 0;
 
 		n = obj_to_sheep(e, nodes, oid, i);
 
@@ -454,8 +455,11 @@ int exec_reqs(struct sheepdog_node_list_entry *e,
 			return -1;
 
 		hdr->epoch = node_version;
+		if (wdatalen)
+			hdr->flags = SD_FLAG_CMD_WRITE;
+		hdr->data_length = wlen;
 
-		ret = exec_req(fd, hdr, NULL, &wlen, &rlen);
+		ret = exec_req(fd, hdr, wdata, &wlen, &rlen);
 		close(fd);
 
 		if (!ret)
-- 
1.5.6.5




More information about the sheepdog mailing list