Here's an exmaple: ./linux ./linux/0000000000080000-4b2f0541 ./linux/0000000000040000-00000000 ./linux2 ./linux2/00000000000c0000-00000000 'linux' and 'linux2' are vdi names. 'linux' vdi has one snapshot. '0000000000080000-4b2f0541' is that the oid of the vdi object and its tag. Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp> --- collie/net.c | 2 + collie/store.c | 151 ++++++++++++++++++++++++++++++++++++++- collie/vdi.c | 180 ++++++++++++---------------------------------- include/net.h | 3 +- include/sheepdog_proto.h | 9 ++- lib/net.c | 10 ++- 6 files changed, 211 insertions(+), 144 deletions(-) diff --git a/collie/net.c b/collie/net.c index 0e95c08..be2d084 100644 --- a/collie/net.c +++ b/collie/net.c @@ -62,6 +62,8 @@ static void queue_request(struct request *req) req->work.fn = cluster_queue_request; break; case SD_OP_SO: + case SD_OP_SO_NEW_VDI: + case SD_OP_SO_LOOKUP_VDI: req->work.fn = so_queue_request; break; default: diff --git a/collie/store.c b/collie/store.c index 00dcf41..41a8dc5 100644 --- a/collie/store.c +++ b/collie/store.c @@ -13,6 +13,7 @@ #include <fcntl.h> #include <mntent.h> #include <stdio.h> +#include <stdlib.h> #include <unistd.h> #include <sys/xattr.h> #include <sys/statvfs.h> @@ -22,6 +23,7 @@ #define ANAME_LAST_OID "user.sheepdog.last_oid" #define ANAME_COPIES "user.sheepdog.copes" +#define ANAME_CURRENT "user.sheepdog.current" static char *obj_dir; static char *mnt_dir; @@ -321,13 +323,85 @@ out: close(fd); } +static int so_lookup_vdi(struct request *req) +{ + struct sd_so_req *hdr = (struct sd_so_req *)&req->rq; + struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req->rp; + DIR *dir; + struct dirent *dent; + char *p; + int fd, ret; + uint64_t coid, oid; + char path[1024]; + + memset(path, 0, sizeof(path)); + snprintf(path, sizeof(path), "%s/vdi/", obj_dir); + strncpy(path + strlen(path), (char *)req->data, hdr->data_length); + + dprintf("%s, %x\n", path, hdr->tag); + + fd = open(path, O_RDONLY); + if (fd < 0) { + eprintf("%m\n"); + return SD_RES_EIO; + } + + ret = fgetxattr(fd, ANAME_CURRENT, &coid, + sizeof(coid)); + if (ret != sizeof(coid)) { + close(fd); + eprintf("%m\n"); + return SD_RES_EIO; + } + + dprintf("%lx, %x\n", coid, hdr->tag); + + close(fd); + + if (hdr->tag == 0xffffffff) { + close(fd); + rsp->oid = coid; + rsp->flags = SD_VDI_RSP_FLAG_CURRENT; + return SD_RES_SUCCESS; + } + + dir = opendir(path); + + while ((dent = readdir(dir))) { + if (!strcmp(dent->d_name, ".") || + !strcmp(dent->d_name, "..")) + continue; + + p = strchr(dent->d_name, '-'); + if (!p) { + eprintf("bug %s\n", dent->d_name); + continue; + } + + if (strtoull(p + 1, NULL, 16) == hdr->tag) { + *p = '\0'; + oid = strtoull(dent->d_name, NULL, 16); + rsp->oid = oid; + dprintf("%lx, %x\n", oid, hdr->tag); + if (oid == coid) + rsp->flags = SD_VDI_RSP_FLAG_CURRENT; + + ret = SD_RES_SUCCESS; + break; + } + } + closedir(dir); + + return SD_RES_SUCCESS; +} + void so_queue_request(struct work *work, int idx) { struct request *req = container_of(work, struct request, work); struct sd_so_req *hdr = (struct sd_so_req *)&req->rq; struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req->rp; struct cluster_info *cluster = req->ci->cluster; - int fd = -1, ret, result = SD_RES_SUCCESS; + int nfd, fd = -1, ret, result = SD_RES_SUCCESS; uint32_t opcode = hdr->opcode; uint64_t last_oid = 0; char path[1024]; @@ -343,10 +417,10 @@ void so_queue_request(struct work *work, int idx) goto out; memset(path, 0, sizeof(path)); + snprintf(path, sizeof(path), "%s/vdi", obj_dir); switch (opcode) { case SD_OP_SO: - snprintf(path, sizeof(path), "%s/vdi", obj_dir); ret = mkdir(path, S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP); if (ret && errno != EEXIST) { @@ -372,6 +446,79 @@ void so_queue_request(struct work *work, int idx) sizeof(hdr->copies), 0); if (ret) result = SD_RES_EIO; + break; + case SD_OP_SO_NEW_VDI: + fd = open(path, O_RDONLY); + if (fd < 0) { + result = SD_RES_EIO; + goto out; + } + + ret = fgetxattr(fd, ANAME_LAST_OID, &last_oid, + sizeof(last_oid)); + if (ret != sizeof(last_oid)) { + close(fd); + result = SD_RES_EIO; + goto out; + } + + strncpy(path + strlen(path), "/", 1); + strncpy(path + strlen(path), (char *)req->data, hdr->data_length); + + if (hdr->tag) + ; + else { + ret = mkdir(path, S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | + S_IWGRP | S_IXGRP); + if (ret) { + eprintf("%m\n"); + result = SD_RES_EIO; + goto out; + } + } + + nfd = open(path, O_RDONLY); + if (nfd < 0) { + eprintf("%m\n"); + result = SD_RES_EIO; + goto out; + } + + last_oid += MAX_DATA_OBJS; + + snprintf(path+ strlen(path), sizeof(path) - strlen(path), + "/%016lx-%08x", last_oid, hdr->tag); + ret = creat(path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); + if (ret < 0) { + eprintf("%m\n"); + result = SD_RES_EIO; + goto out; + } + close(ret); + + ret = fsetxattr(fd, ANAME_LAST_OID, &last_oid, + sizeof(last_oid), 0); + if (ret) { + eprintf("%m\n"); + close(fd); + result = SD_RES_EIO; + goto out; + } + + close(fd); + + ret = fsetxattr(nfd, ANAME_CURRENT, &last_oid, + sizeof(last_oid), 0); + + close(nfd); + + eprintf("%lx\n", last_oid); + rsp->oid = last_oid; + break; + + case SD_OP_SO_LOOKUP_VDI: + ret = so_lookup_vdi(req); + break; } out: diff --git a/collie/vdi.c b/collie/vdi.c index d774d71..cf8d9ac 100644 --- a/collie/vdi.c +++ b/collie/vdi.c @@ -17,15 +17,6 @@ #include "meta.h" #include "collie.h" -static int sheepdog_match(struct sheepdog_dir_entry *ent, char *name, int len) -{ - if (!ent->name_len) - return 0; - if (ent->name_len != len) - return 0; - return !memcmp(ent->name, name, len); -} - /* TODO: should be performed atomically */ static int create_inode_obj(struct sheepdog_node_list_entry *entries, int nr_nodes, uint64_t epoch, int copies, @@ -87,103 +78,49 @@ static int create_inode_obj(struct sheepdog_node_list_entry *entries, return ret; } -#define DIR_BUF_LEN (UINT64_C(1) << 20) - /* * TODO: handle larger buffer */ -int add_vdi(struct cluster_info *cluster, char *name, int len, uint64_t size, +int add_vdi(struct cluster_info *ci, char *name, int len, uint64_t size, uint64_t *added_oid, uint64_t base_oid, uint32_t tag) { struct sheepdog_node_list_entry entries[SD_MAX_NODES]; int nr_nodes; - struct sheepdog_dir_entry *prv, *ent; uint64_t oid = 0; - char *buf; - int ret, rest; - struct sheepdog_super_block *sb; + int ret; int copies; + struct sd_so_req req; - nr_nodes = build_node_list(&cluster->node_list, entries); + memset(&req, 0, sizeof(req)); - eprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size, + nr_nodes = build_node_list(&ci->node_list, entries); + + dprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size, base_oid); - buf = zalloc(DIR_BUF_LEN); - if (!buf) - return 1; + /* todo */ +/* copies = sb->default_nr_copies; */ + copies = 3; + if (copies > nr_nodes) + copies = nr_nodes; - ret = read_object(entries, nr_nodes, cluster->epoch, - SD_DIR_OID, buf, DIR_BUF_LEN, 0, nr_nodes); - if (ret < 0) { - ret = SD_RES_DIR_READ; - goto out; - } + req.opcode = SD_OP_SO_NEW_VDI; + req.copies = copies; + req.tag = tag; - sb = (struct sheepdog_super_block *)buf; - copies = sb->default_nr_copies; - - ret = read_object(entries, nr_nodes, cluster->epoch, - SD_DIR_OID, buf, DIR_BUF_LEN, sizeof(*sb), nr_nodes); - if (ret < 0) { - ret = SD_RES_DIR_READ; - goto out; - } + ret = exec_reqs(entries, nr_nodes, ci->epoch, + SD_DIR_OID, (struct sd_req *)&req, name, len, copies); - ent = (struct sheepdog_dir_entry *)buf; - rest = ret; - while (rest > 0) { - if (!ent->name_len) - break; + /* todo: error handling */ - if (sheepdog_match(ent, name, len) && !tag) { - ret = SD_RES_VDI_EXIST; - goto out; - } - oid = ent->oid; - prv = ent; - ent = next_entry(prv); - rest -= ((char *)ent - (char *)prv); - } + oid = ((struct sd_so_rsp *)&req)->oid; + *added_oid = oid; - /* need to check if the buffer is large enough here. */ - oid += (1 << 18); + dprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size, + oid); - ret = create_inode_obj(entries, nr_nodes, cluster->epoch, copies, + ret = create_inode_obj(entries, nr_nodes, ci->epoch, copies, oid, size, base_oid); - if (ret) - goto out; - - ent->oid = oid; - ent->tag = tag; - - ent->flags = FLAG_CURRENT; - ent->name_len = len; - memcpy(ent->name, name, len); - - if (tag) { - struct sheepdog_dir_entry *e = (struct sheepdog_dir_entry *)buf; - - while (e < ent) { - if (sheepdog_match(e, name, len)) - e->flags &= ~FLAG_CURRENT; - e = next_entry(e); - } - } - - ent = next_entry(ent); - - ret = write_object(entries, nr_nodes, cluster->epoch, - SD_DIR_OID, buf, (char *)ent - buf, sizeof(*sb), - copies, 0); - if (ret) { - ret = SD_RES_DIR_WRITE; - goto out; - } - - *added_oid = oid; -out: - free(buf); return ret; } @@ -193,68 +130,41 @@ int del_vdi(struct cluster_info *cluster, char *name, int len) return 0; } -int lookup_vdi(struct cluster_info *cluster, +int lookup_vdi(struct cluster_info *ci, char *filename, uint64_t * oid, uint32_t tag, int do_lock, int *current) { struct sheepdog_node_list_entry entries[SD_MAX_NODES]; int nr_nodes; - int rest, ret; - char *buf; - struct sheepdog_dir_entry *prv, *ent; + int ret, copies; + struct sd_so_req req; + struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req; - nr_nodes = build_node_list(&cluster->node_list, entries); + memset(&req, 0, sizeof(req)); + + nr_nodes = build_node_list(&ci->node_list, entries); *current = 0; - buf = zalloc(DIR_BUF_LEN); - if (!buf) - return 1; - - ret = read_object(entries, nr_nodes, cluster->epoch, - SD_DIR_OID, buf, DIR_BUF_LEN, - sizeof(struct sheepdog_super_block), nr_nodes); - if (ret < 0) { - ret = SD_RES_DIR_READ; - goto out; - } - eprintf("looking for %s %zd, %d\n", filename, strlen(filename), ret); + dprintf("looking for %s %zd\n", filename, strlen(filename)); - ent = (struct sheepdog_dir_entry *)buf; - rest = ret; - ret = SD_RES_NO_VDI; - while (rest > 0) { - if (!ent->name_len) - break; + /* todo */ + copies = 3; + if (copies > nr_nodes) + copies = nr_nodes; - eprintf("%s %d %" PRIu64 "\n", ent->name, ent->name_len, - ent->oid); + req.opcode = SD_OP_SO_LOOKUP_VDI; + req.tag = tag; - if (sheepdog_match(ent, filename, strlen(filename))) { - if (ent->tag != tag && tag != -1) { - ret = SD_RES_NO_TAG; - goto next; - } - if (ent->tag != tag && !(ent->flags & FLAG_CURRENT)) { - /* current vdi must exsit */ - ret = SD_RES_SYSTEM_ERROR; - goto next; - } + ret = exec_reqs(entries, nr_nodes, ci->epoch, + SD_DIR_OID, (struct sd_req *)&req, filename, strlen(filename), copies); - *oid = ent->oid; - ret = 0; + *oid = rsp->oid; + if (rsp->flags & SD_VDI_RSP_FLAG_CURRENT) + *current = 1; + + dprintf("looking for %s %lx\n", filename, *oid); - if (ent->flags & FLAG_CURRENT) - *current = 1; - break; - } -next: - prv = ent; - ent = next_entry(prv); - rest -= ((char *)ent - (char *)prv); - } -out: - free(buf); return ret; } @@ -277,7 +187,7 @@ int make_super_object(struct cluster_info *ci, struct sd_vdi_req *hdr) nr_nodes = build_node_list(&ci->node_list, entries); ret = exec_reqs(entries, nr_nodes, ci->epoch, - SD_DIR_OID, (struct sd_req *)&req, req.copies); + SD_DIR_OID, (struct sd_req *)&req, NULL, 0, req.copies); return ret; } diff --git a/include/net.h b/include/net.h index 68f18ac..7205f6a 100644 --- a/include/net.h +++ b/include/net.h @@ -45,7 +45,8 @@ int read_object(struct sheepdog_node_list_entry *e, uint64_t offset, int nr); int exec_reqs(struct sheepdog_node_list_entry *e, - int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr, int nr); + int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr, + char *wdata, unsigned int wdatalen, int nr); int create_listen_ports(int port, int (*callback)(int fd, void *), void *data); diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index d18e543..2f3bad4 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -45,6 +45,9 @@ /* collie <-> collie */ #define SD_OP_SO 0x60 +#define SD_OP_SO_NEW_VDI 0x61 +#define SD_OP_SO_DEL_VDI 0x62 +#define SD_OP_SO_LOOKUP_VDI 0x63 #define SD_OP_STAT_SHEEP 0xB0 @@ -77,6 +80,7 @@ #define SD_RES_VDI_NOT_LOCKED 0x17 /* Vdi is not locked */ #define SD_RES_SHUTDOWN 0x18 /* Sheepdog is shutting down */ +#define SD_VDI_RSP_FLAG_CURRENT 0x01 struct sd_req { uint8_t proto_ver; @@ -109,7 +113,8 @@ struct sd_so_req { uint64_t oid; uint64_t ctime; uint32_t copies; - uint32_t opcode_specific[3]; + uint32_t tag; + uint32_t opcode_specific[2]; }; struct sd_so_rsp { @@ -164,8 +169,6 @@ struct sd_vdi_req { uint32_t pad[2]; }; -#define SD_VDI_RSP_FLAG_CURRENT 0x01; - struct sd_vdi_rsp { uint8_t proto_ver; uint8_t opcode; diff --git a/lib/net.c b/lib/net.c index d8b45d4..4f05cb9 100644 --- a/lib/net.c +++ b/lib/net.c @@ -432,14 +432,15 @@ int read_object(struct sheepdog_node_list_entry *e, /* TODO: clean up with the above functions */ int exec_reqs(struct sheepdog_node_list_entry *e, - int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr, int nr) + int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr, + char *wdata, unsigned int wdatalen, int nr) { char name[128]; int i = 0, n, fd, ret; int success = 0; for (i = 0; i < nr; i++) { - unsigned wlen = 0, rlen = 0; + unsigned wlen = wdatalen, rlen = 0; n = obj_to_sheep(e, nodes, oid, i); @@ -454,8 +455,11 @@ int exec_reqs(struct sheepdog_node_list_entry *e, return -1; hdr->epoch = node_version; + if (wdatalen) + hdr->flags = SD_FLAG_CMD_WRITE; + hdr->data_length = wlen; - ret = exec_req(fd, hdr, NULL, &wlen, &rlen); + ret = exec_req(fd, hdr, wdata, &wlen, &rlen); close(fd); if (!ret) -- 1.5.6.5 |