[Sheepdog] [PATCH 3/4] store vdi as directories
FUJITA Tomonori
fujita.tomonori at lab.ntt.co.jp
Mon Dec 21 09:04:16 CET 2009
Here's an exmaple:
./linux
./linux/0000000000080000-4b2f0541
./linux/0000000000040000-00000000
./linux2
./linux2/00000000000c0000-00000000
'linux' and 'linux2' are vdi names. 'linux' vdi has one snapshot.
'0000000000080000-4b2f0541' is that the oid of the vdi object and its
tag.
Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
collie/net.c | 2 +
collie/store.c | 151 ++++++++++++++++++++++++++++++++++++++-
collie/vdi.c | 180 ++++++++++++----------------------------------
include/net.h | 3 +-
include/sheepdog_proto.h | 9 ++-
lib/net.c | 10 ++-
6 files changed, 211 insertions(+), 144 deletions(-)
diff --git a/collie/net.c b/collie/net.c
index 0e95c08..be2d084 100644
--- a/collie/net.c
+++ b/collie/net.c
@@ -62,6 +62,8 @@ static void queue_request(struct request *req)
req->work.fn = cluster_queue_request;
break;
case SD_OP_SO:
+ case SD_OP_SO_NEW_VDI:
+ case SD_OP_SO_LOOKUP_VDI:
req->work.fn = so_queue_request;
break;
default:
diff --git a/collie/store.c b/collie/store.c
index 00dcf41..41a8dc5 100644
--- a/collie/store.c
+++ b/collie/store.c
@@ -13,6 +13,7 @@
#include <fcntl.h>
#include <mntent.h>
#include <stdio.h>
+#include <stdlib.h>
#include <unistd.h>
#include <sys/xattr.h>
#include <sys/statvfs.h>
@@ -22,6 +23,7 @@
#define ANAME_LAST_OID "user.sheepdog.last_oid"
#define ANAME_COPIES "user.sheepdog.copes"
+#define ANAME_CURRENT "user.sheepdog.current"
static char *obj_dir;
static char *mnt_dir;
@@ -321,13 +323,85 @@ out:
close(fd);
}
+static int so_lookup_vdi(struct request *req)
+{
+ struct sd_so_req *hdr = (struct sd_so_req *)&req->rq;
+ struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req->rp;
+ DIR *dir;
+ struct dirent *dent;
+ char *p;
+ int fd, ret;
+ uint64_t coid, oid;
+ char path[1024];
+
+ memset(path, 0, sizeof(path));
+ snprintf(path, sizeof(path), "%s/vdi/", obj_dir);
+ strncpy(path + strlen(path), (char *)req->data, hdr->data_length);
+
+ dprintf("%s, %x\n", path, hdr->tag);
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ eprintf("%m\n");
+ return SD_RES_EIO;
+ }
+
+ ret = fgetxattr(fd, ANAME_CURRENT, &coid,
+ sizeof(coid));
+ if (ret != sizeof(coid)) {
+ close(fd);
+ eprintf("%m\n");
+ return SD_RES_EIO;
+ }
+
+ dprintf("%lx, %x\n", coid, hdr->tag);
+
+ close(fd);
+
+ if (hdr->tag == 0xffffffff) {
+ close(fd);
+ rsp->oid = coid;
+ rsp->flags = SD_VDI_RSP_FLAG_CURRENT;
+ return SD_RES_SUCCESS;
+ }
+
+ dir = opendir(path);
+
+ while ((dent = readdir(dir))) {
+ if (!strcmp(dent->d_name, ".") ||
+ !strcmp(dent->d_name, ".."))
+ continue;
+
+ p = strchr(dent->d_name, '-');
+ if (!p) {
+ eprintf("bug %s\n", dent->d_name);
+ continue;
+ }
+
+ if (strtoull(p + 1, NULL, 16) == hdr->tag) {
+ *p = '\0';
+ oid = strtoull(dent->d_name, NULL, 16);
+ rsp->oid = oid;
+ dprintf("%lx, %x\n", oid, hdr->tag);
+ if (oid == coid)
+ rsp->flags = SD_VDI_RSP_FLAG_CURRENT;
+
+ ret = SD_RES_SUCCESS;
+ break;
+ }
+ }
+ closedir(dir);
+
+ return SD_RES_SUCCESS;
+}
+
void so_queue_request(struct work *work, int idx)
{
struct request *req = container_of(work, struct request, work);
struct sd_so_req *hdr = (struct sd_so_req *)&req->rq;
struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req->rp;
struct cluster_info *cluster = req->ci->cluster;
- int fd = -1, ret, result = SD_RES_SUCCESS;
+ int nfd, fd = -1, ret, result = SD_RES_SUCCESS;
uint32_t opcode = hdr->opcode;
uint64_t last_oid = 0;
char path[1024];
@@ -343,10 +417,10 @@ void so_queue_request(struct work *work, int idx)
goto out;
memset(path, 0, sizeof(path));
+ snprintf(path, sizeof(path), "%s/vdi", obj_dir);
switch (opcode) {
case SD_OP_SO:
- snprintf(path, sizeof(path), "%s/vdi", obj_dir);
ret = mkdir(path, S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP |
S_IWGRP | S_IXGRP);
if (ret && errno != EEXIST) {
@@ -372,6 +446,79 @@ void so_queue_request(struct work *work, int idx)
sizeof(hdr->copies), 0);
if (ret)
result = SD_RES_EIO;
+ break;
+ case SD_OP_SO_NEW_VDI:
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ result = SD_RES_EIO;
+ goto out;
+ }
+
+ ret = fgetxattr(fd, ANAME_LAST_OID, &last_oid,
+ sizeof(last_oid));
+ if (ret != sizeof(last_oid)) {
+ close(fd);
+ result = SD_RES_EIO;
+ goto out;
+ }
+
+ strncpy(path + strlen(path), "/", 1);
+ strncpy(path + strlen(path), (char *)req->data, hdr->data_length);
+
+ if (hdr->tag)
+ ;
+ else {
+ ret = mkdir(path, S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP |
+ S_IWGRP | S_IXGRP);
+ if (ret) {
+ eprintf("%m\n");
+ result = SD_RES_EIO;
+ goto out;
+ }
+ }
+
+ nfd = open(path, O_RDONLY);
+ if (nfd < 0) {
+ eprintf("%m\n");
+ result = SD_RES_EIO;
+ goto out;
+ }
+
+ last_oid += MAX_DATA_OBJS;
+
+ snprintf(path+ strlen(path), sizeof(path) - strlen(path),
+ "/%016lx-%08x", last_oid, hdr->tag);
+ ret = creat(path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
+ if (ret < 0) {
+ eprintf("%m\n");
+ result = SD_RES_EIO;
+ goto out;
+ }
+ close(ret);
+
+ ret = fsetxattr(fd, ANAME_LAST_OID, &last_oid,
+ sizeof(last_oid), 0);
+ if (ret) {
+ eprintf("%m\n");
+ close(fd);
+ result = SD_RES_EIO;
+ goto out;
+ }
+
+ close(fd);
+
+ ret = fsetxattr(nfd, ANAME_CURRENT, &last_oid,
+ sizeof(last_oid), 0);
+
+ close(nfd);
+
+ eprintf("%lx\n", last_oid);
+ rsp->oid = last_oid;
+ break;
+
+ case SD_OP_SO_LOOKUP_VDI:
+ ret = so_lookup_vdi(req);
+ break;
}
out:
diff --git a/collie/vdi.c b/collie/vdi.c
index d774d71..cf8d9ac 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -17,15 +17,6 @@
#include "meta.h"
#include "collie.h"
-static int sheepdog_match(struct sheepdog_dir_entry *ent, char *name, int len)
-{
- if (!ent->name_len)
- return 0;
- if (ent->name_len != len)
- return 0;
- return !memcmp(ent->name, name, len);
-}
-
/* TODO: should be performed atomically */
static int create_inode_obj(struct sheepdog_node_list_entry *entries,
int nr_nodes, uint64_t epoch, int copies,
@@ -87,103 +78,49 @@ static int create_inode_obj(struct sheepdog_node_list_entry *entries,
return ret;
}
-#define DIR_BUF_LEN (UINT64_C(1) << 20)
-
/*
* TODO: handle larger buffer
*/
-int add_vdi(struct cluster_info *cluster, char *name, int len, uint64_t size,
+int add_vdi(struct cluster_info *ci, char *name, int len, uint64_t size,
uint64_t *added_oid, uint64_t base_oid, uint32_t tag)
{
struct sheepdog_node_list_entry entries[SD_MAX_NODES];
int nr_nodes;
- struct sheepdog_dir_entry *prv, *ent;
uint64_t oid = 0;
- char *buf;
- int ret, rest;
- struct sheepdog_super_block *sb;
+ int ret;
int copies;
+ struct sd_so_req req;
- nr_nodes = build_node_list(&cluster->node_list, entries);
+ memset(&req, 0, sizeof(req));
- eprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size,
+ nr_nodes = build_node_list(&ci->node_list, entries);
+
+ dprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size,
base_oid);
- buf = zalloc(DIR_BUF_LEN);
- if (!buf)
- return 1;
+ /* todo */
+/* copies = sb->default_nr_copies; */
+ copies = 3;
+ if (copies > nr_nodes)
+ copies = nr_nodes;
- ret = read_object(entries, nr_nodes, cluster->epoch,
- SD_DIR_OID, buf, DIR_BUF_LEN, 0, nr_nodes);
- if (ret < 0) {
- ret = SD_RES_DIR_READ;
- goto out;
- }
+ req.opcode = SD_OP_SO_NEW_VDI;
+ req.copies = copies;
+ req.tag = tag;
- sb = (struct sheepdog_super_block *)buf;
- copies = sb->default_nr_copies;
-
- ret = read_object(entries, nr_nodes, cluster->epoch,
- SD_DIR_OID, buf, DIR_BUF_LEN, sizeof(*sb), nr_nodes);
- if (ret < 0) {
- ret = SD_RES_DIR_READ;
- goto out;
- }
+ ret = exec_reqs(entries, nr_nodes, ci->epoch,
+ SD_DIR_OID, (struct sd_req *)&req, name, len, copies);
- ent = (struct sheepdog_dir_entry *)buf;
- rest = ret;
- while (rest > 0) {
- if (!ent->name_len)
- break;
+ /* todo: error handling */
- if (sheepdog_match(ent, name, len) && !tag) {
- ret = SD_RES_VDI_EXIST;
- goto out;
- }
- oid = ent->oid;
- prv = ent;
- ent = next_entry(prv);
- rest -= ((char *)ent - (char *)prv);
- }
+ oid = ((struct sd_so_rsp *)&req)->oid;
+ *added_oid = oid;
- /* need to check if the buffer is large enough here. */
- oid += (1 << 18);
+ dprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size,
+ oid);
- ret = create_inode_obj(entries, nr_nodes, cluster->epoch, copies,
+ ret = create_inode_obj(entries, nr_nodes, ci->epoch, copies,
oid, size, base_oid);
- if (ret)
- goto out;
-
- ent->oid = oid;
- ent->tag = tag;
-
- ent->flags = FLAG_CURRENT;
- ent->name_len = len;
- memcpy(ent->name, name, len);
-
- if (tag) {
- struct sheepdog_dir_entry *e = (struct sheepdog_dir_entry *)buf;
-
- while (e < ent) {
- if (sheepdog_match(e, name, len))
- e->flags &= ~FLAG_CURRENT;
- e = next_entry(e);
- }
- }
-
- ent = next_entry(ent);
-
- ret = write_object(entries, nr_nodes, cluster->epoch,
- SD_DIR_OID, buf, (char *)ent - buf, sizeof(*sb),
- copies, 0);
- if (ret) {
- ret = SD_RES_DIR_WRITE;
- goto out;
- }
-
- *added_oid = oid;
-out:
- free(buf);
return ret;
}
@@ -193,68 +130,41 @@ int del_vdi(struct cluster_info *cluster, char *name, int len)
return 0;
}
-int lookup_vdi(struct cluster_info *cluster,
+int lookup_vdi(struct cluster_info *ci,
char *filename, uint64_t * oid, uint32_t tag, int do_lock,
int *current)
{
struct sheepdog_node_list_entry entries[SD_MAX_NODES];
int nr_nodes;
- int rest, ret;
- char *buf;
- struct sheepdog_dir_entry *prv, *ent;
+ int ret, copies;
+ struct sd_so_req req;
+ struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req;
- nr_nodes = build_node_list(&cluster->node_list, entries);
+ memset(&req, 0, sizeof(req));
+
+ nr_nodes = build_node_list(&ci->node_list, entries);
*current = 0;
- buf = zalloc(DIR_BUF_LEN);
- if (!buf)
- return 1;
-
- ret = read_object(entries, nr_nodes, cluster->epoch,
- SD_DIR_OID, buf, DIR_BUF_LEN,
- sizeof(struct sheepdog_super_block), nr_nodes);
- if (ret < 0) {
- ret = SD_RES_DIR_READ;
- goto out;
- }
- eprintf("looking for %s %zd, %d\n", filename, strlen(filename), ret);
+ dprintf("looking for %s %zd\n", filename, strlen(filename));
- ent = (struct sheepdog_dir_entry *)buf;
- rest = ret;
- ret = SD_RES_NO_VDI;
- while (rest > 0) {
- if (!ent->name_len)
- break;
+ /* todo */
+ copies = 3;
+ if (copies > nr_nodes)
+ copies = nr_nodes;
- eprintf("%s %d %" PRIu64 "\n", ent->name, ent->name_len,
- ent->oid);
+ req.opcode = SD_OP_SO_LOOKUP_VDI;
+ req.tag = tag;
- if (sheepdog_match(ent, filename, strlen(filename))) {
- if (ent->tag != tag && tag != -1) {
- ret = SD_RES_NO_TAG;
- goto next;
- }
- if (ent->tag != tag && !(ent->flags & FLAG_CURRENT)) {
- /* current vdi must exsit */
- ret = SD_RES_SYSTEM_ERROR;
- goto next;
- }
+ ret = exec_reqs(entries, nr_nodes, ci->epoch,
+ SD_DIR_OID, (struct sd_req *)&req, filename, strlen(filename), copies);
- *oid = ent->oid;
- ret = 0;
+ *oid = rsp->oid;
+ if (rsp->flags & SD_VDI_RSP_FLAG_CURRENT)
+ *current = 1;
+
+ dprintf("looking for %s %lx\n", filename, *oid);
- if (ent->flags & FLAG_CURRENT)
- *current = 1;
- break;
- }
-next:
- prv = ent;
- ent = next_entry(prv);
- rest -= ((char *)ent - (char *)prv);
- }
-out:
- free(buf);
return ret;
}
@@ -277,7 +187,7 @@ int make_super_object(struct cluster_info *ci, struct sd_vdi_req *hdr)
nr_nodes = build_node_list(&ci->node_list, entries);
ret = exec_reqs(entries, nr_nodes, ci->epoch,
- SD_DIR_OID, (struct sd_req *)&req, req.copies);
+ SD_DIR_OID, (struct sd_req *)&req, NULL, 0, req.copies);
return ret;
}
diff --git a/include/net.h b/include/net.h
index 68f18ac..7205f6a 100644
--- a/include/net.h
+++ b/include/net.h
@@ -45,7 +45,8 @@ int read_object(struct sheepdog_node_list_entry *e,
uint64_t offset, int nr);
int exec_reqs(struct sheepdog_node_list_entry *e,
- int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr, int nr);
+ int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr,
+ char *wdata, unsigned int wdatalen, int nr);
int create_listen_ports(int port, int (*callback)(int fd, void *), void *data);
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index d18e543..2f3bad4 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -45,6 +45,9 @@
/* collie <-> collie */
#define SD_OP_SO 0x60
+#define SD_OP_SO_NEW_VDI 0x61
+#define SD_OP_SO_DEL_VDI 0x62
+#define SD_OP_SO_LOOKUP_VDI 0x63
#define SD_OP_STAT_SHEEP 0xB0
@@ -77,6 +80,7 @@
#define SD_RES_VDI_NOT_LOCKED 0x17 /* Vdi is not locked */
#define SD_RES_SHUTDOWN 0x18 /* Sheepdog is shutting down */
+#define SD_VDI_RSP_FLAG_CURRENT 0x01
struct sd_req {
uint8_t proto_ver;
@@ -109,7 +113,8 @@ struct sd_so_req {
uint64_t oid;
uint64_t ctime;
uint32_t copies;
- uint32_t opcode_specific[3];
+ uint32_t tag;
+ uint32_t opcode_specific[2];
};
struct sd_so_rsp {
@@ -164,8 +169,6 @@ struct sd_vdi_req {
uint32_t pad[2];
};
-#define SD_VDI_RSP_FLAG_CURRENT 0x01;
-
struct sd_vdi_rsp {
uint8_t proto_ver;
uint8_t opcode;
diff --git a/lib/net.c b/lib/net.c
index d8b45d4..4f05cb9 100644
--- a/lib/net.c
+++ b/lib/net.c
@@ -432,14 +432,15 @@ int read_object(struct sheepdog_node_list_entry *e,
/* TODO: clean up with the above functions */
int exec_reqs(struct sheepdog_node_list_entry *e,
- int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr, int nr)
+ int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr,
+ char *wdata, unsigned int wdatalen, int nr)
{
char name[128];
int i = 0, n, fd, ret;
int success = 0;
for (i = 0; i < nr; i++) {
- unsigned wlen = 0, rlen = 0;
+ unsigned wlen = wdatalen, rlen = 0;
n = obj_to_sheep(e, nodes, oid, i);
@@ -454,8 +455,11 @@ int exec_reqs(struct sheepdog_node_list_entry *e,
return -1;
hdr->epoch = node_version;
+ if (wdatalen)
+ hdr->flags = SD_FLAG_CMD_WRITE;
+ hdr->data_length = wlen;
- ret = exec_req(fd, hdr, NULL, &wlen, &rlen);
+ ret = exec_req(fd, hdr, wdata, &wlen, &rlen);
close(fd);
if (!ret)
--
1.5.6.5
More information about the sheepdog
mailing list