This converts the super object to a directory (from a file). Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp> --- collie/collie.h | 4 ++ collie/group.c | 18 +----------- collie/net.c | 3 ++ collie/store.c | 69 ++++++++++++++++++++++++++++++++++++++++++++-- collie/vdi.c | 24 ++++++++++++++++ include/net.h | 3 ++ include/sheepdog_proto.h | 28 ++++++++++++++++++ lib/net.c | 35 +++++++++++++++++++++++ 8 files changed, 164 insertions(+), 20 deletions(-) diff --git a/collie/collie.h b/collie/collie.h index 0980ebc..3b2d871 100644 --- a/collie/collie.h +++ b/collie/collie.h @@ -81,11 +81,15 @@ int add_vdi(struct cluster_info *cluster, int lookup_vdi(struct cluster_info *cluster, char *filename, uint64_t * oid, uint32_t tag, int do_lock, int *current); +int make_super_object(struct cluster_info *ci, struct sd_vdi_req *hdr); + int build_node_list(struct list_head *node_list, struct sheepdog_node_list_entry *entries); struct cluster_info *create_cluster(int port); +void so_queue_request(struct work *work, int idx); + void store_queue_request(struct work *work, int idx); void cluster_queue_request(struct work *work, int idx); diff --git a/collie/group.c b/collie/group.c index a718afd..ccbd7ae 100644 --- a/collie/group.c +++ b/collie/group.c @@ -326,10 +326,6 @@ static void vdi_op(struct cluster_info *ci, struct vdi_op_message *msg) void *data = msg->data; int ret = SD_RES_SUCCESS, is_current; uint64_t oid = 0; - struct sheepdog_super_block *sb; - struct timeval tv; - struct sheepdog_node_list_entry entries[SD_MAX_NODES]; - int nr_nodes; switch (hdr->opcode) { case SD_OP_NEW_VDI: @@ -347,19 +343,7 @@ static void vdi_op(struct cluster_info *ci, struct vdi_op_message *msg) case SD_OP_RELEASE_VDI: break; case SD_OP_MAKE_FS: - sb = zalloc(sizeof(*sb)); - if (!sb) { - ret = -1; - break; - } - gettimeofday(&tv, NULL); - sb->ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000; - sb->default_nr_copies = 3; - - nr_nodes = build_node_list(&ci->node_list, entries); - ret = write_object(entries, nr_nodes, ci->epoch, - SD_DIR_OID, (char *)sb, sizeof(*sb), 0, - sb->default_nr_copies, 1); + ret = make_super_object(ci, &msg->req); break; case SD_OP_UPDATE_EPOCH: break; diff --git a/collie/net.c b/collie/net.c index cf2e4bb..0e95c08 100644 --- a/collie/net.c +++ b/collie/net.c @@ -61,6 +61,9 @@ static void queue_request(struct request *req) case SD_OP_SHUTDOWN: req->work.fn = cluster_queue_request; break; + case SD_OP_SO: + req->work.fn = so_queue_request; + break; default: eprintf("unknown operation %d\n", hdr->opcode); return; diff --git a/collie/store.c b/collie/store.c index 30bb115..00dcf41 100644 --- a/collie/store.c +++ b/collie/store.c @@ -20,6 +20,9 @@ #include "collie.h" #include "meta.h" +#define ANAME_LAST_OID "user.sheepdog.last_oid" +#define ANAME_COPIES "user.sheepdog.copes" + static char *obj_dir; static char *mnt_dir; static char *zero_block; @@ -160,7 +163,6 @@ void store_queue_request(struct work *work, int idx) int fd = -1, ret = SD_RES_SUCCESS; int flags = O_RDWR; char *buf = zero_block + idx * SD_DATA_OBJ_SIZE; - char aname[] = "user.sheepdog.copies"; struct sd_obj_req *hdr = (struct sd_obj_req *)&req->rq; struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&req->rp; uint64_t oid = hdr->oid; @@ -221,7 +223,7 @@ void store_queue_request(struct work *work, int idx) goto out; } - ret = fsetxattr(fd, aname, &hdr->copies, + ret = fsetxattr(fd, ANAME_COPIES, &hdr->copies, sizeof(hdr->copies), 0); if (ret) { eprintf("use 'user_xattr' option?\n"); @@ -270,7 +272,7 @@ void store_queue_request(struct work *work, int idx) * performance; qemu doesn't always need the copies. */ copies = 0; - ret = fgetxattr(fd, aname, &copies, sizeof(copies)); + ret = fgetxattr(fd, ANAME_COPIES, &copies, sizeof(copies)); if (ret != sizeof(copies)) { ret = SD_RES_SYSTEM_ERROR; goto out; @@ -319,6 +321,67 @@ out: close(fd); } +void so_queue_request(struct work *work, int idx) +{ + struct request *req = container_of(work, struct request, work); + struct sd_so_req *hdr = (struct sd_so_req *)&req->rq; + struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req->rp; + struct cluster_info *cluster = req->ci->cluster; + int fd = -1, ret, result = SD_RES_SUCCESS; + uint32_t opcode = hdr->opcode; + uint64_t last_oid = 0; + char path[1024]; + + if (list_empty(&cluster->node_list)) { + /* we haven't got SD_OP_GET_NODE_LIST response yet. */ + result = SD_RES_SYSTEM_ERROR; + goto out; + } + + result = check_epoch(cluster, req); + if (result != SD_RES_SUCCESS) + goto out; + + memset(path, 0, sizeof(path)); + + switch (opcode) { + case SD_OP_SO: + snprintf(path, sizeof(path), "%s/vdi", obj_dir); + ret = mkdir(path, S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | + S_IWGRP | S_IXGRP); + if (ret && errno != EEXIST) { + result = SD_RES_EIO; + goto out; + } + + fd = open(path, O_RDONLY); + if (fd < 0) { + result = SD_RES_EIO; + goto out; + } + + ret = fsetxattr(fd, ANAME_LAST_OID, &last_oid, + sizeof(last_oid), 0); + if (ret) { + close(fd); + result = SD_RES_EIO; + goto out; + } + + ret = fsetxattr(fd, ANAME_COPIES, &hdr->copies, + sizeof(hdr->copies), 0); + if (ret) + result = SD_RES_EIO; + } + +out: + if (result != SD_RES_SUCCESS) + rsp->result = result; + + if (fd != -1) + close(fd); +} + int init_store(char *dir) { int ret; diff --git a/collie/vdi.c b/collie/vdi.c index 184a22e..d774d71 100644 --- a/collie/vdi.c +++ b/collie/vdi.c @@ -257,3 +257,27 @@ out: free(buf); return ret; } + +/* todo: cleanup with the above */ +int make_super_object(struct cluster_info *ci, struct sd_vdi_req *hdr) +{ + struct timeval tv; + int nr_nodes, ret; + struct sheepdog_node_list_entry entries[SD_MAX_NODES]; + struct sd_so_req req; + + gettimeofday(&tv, NULL); + memset(&req, 0, sizeof(req)); + + req.oid = SD_DIR_OID; + req.opcode = SD_OP_SO; + req.ctime = (uint64_t)tv.tv_sec << 32 | tv.tv_usec * 1000; + req.copies = ((struct sd_obj_req *)hdr)->copies; + + nr_nodes = build_node_list(&ci->node_list, entries); + + ret = exec_reqs(entries, nr_nodes, ci->epoch, + SD_DIR_OID, (struct sd_req *)&req, req.copies); + + return ret; +} diff --git a/include/net.h b/include/net.h index 00d7843..68f18ac 100644 --- a/include/net.h +++ b/include/net.h @@ -44,6 +44,9 @@ int read_object(struct sheepdog_node_list_entry *e, uint64_t oid, char *data, unsigned int datalen, uint64_t offset, int nr); +int exec_reqs(struct sheepdog_node_list_entry *e, + int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr, int nr); + int create_listen_ports(int port, int (*callback)(int fd, void *), void *data); #endif diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index 40370b6..d18e543 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -43,6 +43,9 @@ #define SD_OP_WRITE_OBJ 0x04 #define SD_OP_SYNC_OBJ 0x05 +/* collie <-> collie */ +#define SD_OP_SO 0x60 + #define SD_OP_STAT_SHEEP 0xB0 #define SD_FLAG_CMD_WRITE 0x01 @@ -96,6 +99,31 @@ struct sd_rsp { uint32_t opcode_specific[7]; }; +struct sd_so_req { + uint8_t proto_ver; + uint8_t opcode; + uint16_t flags; + uint32_t epoch; + uint32_t id; + uint32_t data_length; + uint64_t oid; + uint64_t ctime; + uint32_t copies; + uint32_t opcode_specific[3]; +}; + +struct sd_so_rsp { + uint8_t proto_ver; + uint8_t opcode; + uint16_t flags; + uint32_t epoch; + uint32_t id; + uint32_t data_length; + uint32_t result; + uint64_t oid; + uint32_t opcode_specific[5]; +}; + struct sd_obj_req { uint8_t proto_ver; uint8_t opcode; diff --git a/lib/net.c b/lib/net.c index 5131fbf..d8b45d4 100644 --- a/lib/net.c +++ b/lib/net.c @@ -429,3 +429,38 @@ int read_object(struct sheepdog_node_list_entry *e, return -1; } + +/* TODO: clean up with the above functions */ +int exec_reqs(struct sheepdog_node_list_entry *e, + int nodes, uint32_t node_version, uint64_t oid, struct sd_req *hdr, int nr) +{ + char name[128]; + int i = 0, n, fd, ret; + int success = 0; + + for (i = 0; i < nr; i++) { + unsigned wlen = 0, rlen = 0; + + n = obj_to_sheep(e, nodes, oid, i); + + snprintf(name, sizeof(name), "%d.%d.%d.%d", + e[n].addr[12], + e[n].addr[13], + e[n].addr[14], + e[n].addr[15]); + + fd = connect_to(name, e[n].port); + if (fd < 0) + return -1; + + hdr->epoch = node_version; + + ret = exec_req(fd, hdr, NULL, &wlen, &rlen); + close(fd); + + if (!ret) + success++; + } + + return !success; +} -- 1.5.6.5 |