'collie vdi setattr' runs the following two: - allocates a vdi attr object id - writes an attribute data to the object So a race can happen between these two operations. With this patch, 'collie vdi setattr' sends the attribute data when allocating the object id to write the attribute atomically. Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp> --- collie/vdi.c | 47 ++++++++++++++++++++--------------------------- include/sheep.h | 11 +++++++---- sheep/group.c | 3 ++- sheep/sdnet.c | 9 +++++---- sheep/store.c | 5 +++-- sheep/vdi.c | 43 ++++++++++++++++++++++++++++++------------- 6 files changed, 67 insertions(+), 51 deletions(-) diff --git a/collie/vdi.c b/collie/vdi.c index 245a6e2..6ac73cf 100644 --- a/collie/vdi.c +++ b/collie/vdi.c @@ -784,8 +784,9 @@ static int vdi_object(int argc, char **argv) } static int find_vdi_attr_oid(char *vdiname, char *tag, uint32_t snapid, - char *key, uint32_t *vid, uint64_t *oid, - unsigned int *nr_copies, int creat, int excl) + char *key, void *value, unsigned int value_len, + uint32_t *vid, uint64_t *oid, unsigned int *nr_copies, + int creat, int excl, int delete) { struct sd_vdi_req hdr; struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr; @@ -793,7 +794,7 @@ static int find_vdi_attr_oid(char *vdiname, char *tag, uint32_t snapid, unsigned int wlen, rlen; struct sheepdog_vdi_attr *vattr; - vattr = zalloc(SD_ATTR_HEADER_SIZE); + vattr = zalloc(SD_ATTR_HEADER_SIZE + value_len); if (!vattr) return SD_RES_NO_MEM; @@ -801,6 +802,8 @@ static int find_vdi_attr_oid(char *vdiname, char *tag, uint32_t snapid, strncpy(vattr->tag, vdi_cmd_data.snapshot_tag, SD_MAX_VDI_TAG_LEN); vattr->snap_id = vdi_cmd_data.snapshot_id; strncpy(vattr->key, key, SD_MAX_VDI_ATTR_KEY_LEN); + if (value && value_len) + memcpy(vattr->value, value, value_len); fd = connect_to(sdhost, sdport); if (fd < 0) { @@ -810,7 +813,7 @@ static int find_vdi_attr_oid(char *vdiname, char *tag, uint32_t snapid, memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_GET_VDI_ATTR; - wlen = SD_ATTR_HEADER_SIZE; + wlen = SD_ATTR_HEADER_SIZE + value_len; rlen = 0; hdr.proto_ver = SD_PROTO_VER; hdr.data_length = wlen; @@ -820,6 +823,8 @@ static int find_vdi_attr_oid(char *vdiname, char *tag, uint32_t snapid, hdr.flags |= SD_FLAG_CMD_CREAT; if (excl) hdr.flags |= SD_FLAG_CMD_EXCL; + if (delete) + hdr.flags |= SD_FLAG_CMD_DEL; ret = exec_req(fd, (struct sd_req *)&hdr, vattr, &wlen, &rlen); if (ret) { @@ -844,8 +849,8 @@ out: static int vdi_setattr(int argc, char **argv) { - int ret; - uint64_t oid, attr_oid = 0; + int ret, value_len = 0; + uint64_t attr_oid = 0; uint32_t vid = 0, nr_copies = 0; char *vdiname = argv[optind++], *key, *value; uint64_t offset; @@ -878,10 +883,14 @@ reread: } } + if (value) + value_len = strlen(value); + ret = find_vdi_attr_oid(vdiname, vdi_cmd_data.snapshot_tag, - vdi_cmd_data.snapshot_id, key, &vid, &attr_oid, + vdi_cmd_data.snapshot_id, key, value, + value_len, &vid, &attr_oid, &nr_copies, !vdi_cmd_data.delete, - vdi_cmd_data.exclusive); + vdi_cmd_data.exclusive, vdi_cmd_data.delete); if (ret) { if (ret == SD_RES_VDI_EXIST) { fprintf(stderr, "the attribute already exists, %s\n", key); @@ -893,27 +902,11 @@ reread: fprintf(stderr, "vdi not found\n"); return EXIT_MISSING; } else - fprintf(stderr, "failed to find attr oid, %s\n", + fprintf(stderr, "failed to set attr, %s\n", sd_strerror(ret)); return EXIT_FAILURE; } - oid = attr_oid; - - if (vdi_cmd_data.delete) - ret = sd_write_object(oid, 0, (char *)"", 1, - offsetof(struct sheepdog_inode, name), 0, - nr_copies, 0); - else - ret = sd_write_object(oid, 0, value, strlen(value), - SD_ATTR_HEADER_SIZE, SD_FLAG_CMD_TRUNCATE, - nr_copies, 0); - - if (ret != SD_RES_SUCCESS) { - fprintf(stderr, "failed to set attribute\n"); - return EXIT_FAILURE; - } - return EXIT_SUCCESS; } @@ -931,8 +924,8 @@ static int vdi_getattr(int argc, char **argv) } ret = find_vdi_attr_oid(vdiname, vdi_cmd_data.snapshot_tag, - vdi_cmd_data.snapshot_id, key, &vid, &attr_oid, - &nr_copies, 0, 0); + vdi_cmd_data.snapshot_id, key, NULL, 0, + &vid, &attr_oid, &nr_copies, 0, 0, 0); if (ret == SD_RES_NO_OBJ) { fprintf(stderr, "no such attribute, %s\n", key); return EXIT_MISSING; diff --git a/include/sheep.h b/include/sheep.h index 31516d9..3c2d799 100644 --- a/include/sheep.h +++ b/include/sheep.h @@ -37,10 +37,13 @@ #define SD_OP_KILL_NODE 0x88 #define SD_OP_GET_VDI_ATTR 0x89 -#define SD_FLAG_CMD_DIRECT 0x10 -#define SD_FLAG_CMD_RECOVERY 0x20 -#define SD_FLAG_CMD_CREAT 0x40 -#define SD_FLAG_CMD_EXCL 0x80 +#define SD_FLAG_CMD_DIRECT 0x0010 +#define SD_FLAG_CMD_RECOVERY 0x0020 + +/* flags for vdi attribute operations */ +#define SD_FLAG_CMD_CREAT 0x0100 +#define SD_FLAG_CMD_EXCL 0x0200 +#define SD_FLAG_CMD_DEL 0x0400 #define SD_RES_OLD_NODE_VER 0x41 /* Remote node has an old epoch */ #define SD_RES_NEW_NODE_VER 0x42 /* Remote node has a new epoch */ diff --git a/sheep/group.c b/sheep/group.c index de3333e..8e9266f 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -898,7 +898,8 @@ static void vdi_op(struct vdi_op_message *msg) ret = get_vdi_attr(hdr->epoch, data, hdr->data_length, vid, &attrid, nr_copies, ctime, hdr->flags & SD_FLAG_CMD_CREAT, - hdr->flags & SD_FLAG_CMD_EXCL); + hdr->flags & SD_FLAG_CMD_EXCL, + hdr->flags & SD_FLAG_CMD_DEL); break; case SD_OP_RELEASE_VDI: break; diff --git a/sheep/sdnet.c b/sheep/sdnet.c index db2e691..eb375cb 100644 --- a/sheep/sdnet.c +++ b/sheep/sdnet.c @@ -629,7 +629,7 @@ int create_listen_port(int port, void *data) int write_object(struct sheepdog_vnode_list_entry *e, int vnodes, int zones, uint32_t node_version, uint64_t oid, char *data, unsigned int datalen, - uint64_t offset, int nr, int create) + uint64_t offset, uint16_t flags, int nr, int create) { struct sd_obj_req hdr; int i, n, fd, ret, success = 0; @@ -644,8 +644,8 @@ int write_object(struct sheepdog_vnode_list_entry *e, n = obj_to_sheep(e, vnodes, oid, i); if (is_myself(e[n].addr, e[n].port)) { - ret = write_object_local(oid, data, datalen, offset, nr, - node_version, create); + ret = write_object_local(oid, data, datalen, offset, + flags, nr, node_version, create); if (ret != 0) eprintf("fail %"PRIx64" %"PRIx32"\n", oid, ret); @@ -673,7 +673,8 @@ int write_object(struct sheepdog_vnode_list_entry *e, hdr.oid = oid; hdr.copies = nr; - hdr.flags = SD_FLAG_CMD_WRITE | SD_FLAG_CMD_DIRECT; + hdr.flags = flags; + hdr.flags |= SD_FLAG_CMD_WRITE | SD_FLAG_CMD_DIRECT; hdr.data_length = wlen; hdr.offset = offset; diff --git a/sheep/store.c b/sheep/store.c index 12e54d9..57654ff 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -490,7 +490,8 @@ int update_epoch_store(uint32_t epoch) } int write_object_local(uint64_t oid, char *data, unsigned int datalen, - uint64_t offset, int copies, uint32_t epoch, int create) + uint64_t offset, uint16_t flags, int copies, + uint32_t epoch, int create) { int ret; struct request *req; @@ -507,7 +508,7 @@ int write_object_local(uint64_t oid, char *data, unsigned int datalen, else hdr->opcode = SD_OP_WRITE_OBJ; hdr->copies = copies; - hdr->flags = SD_FLAG_CMD_WRITE; + hdr->flags = flags | SD_FLAG_CMD_WRITE; hdr->offset = offset; hdr->data_length = datalen; req->data = data; diff --git a/sheep/vdi.c b/sheep/vdi.c index 87a63c9..b192dcb 100644 --- a/sheep/vdi.c +++ b/sheep/vdi.c @@ -124,7 +124,7 @@ static int create_vdi_obj(uint32_t epoch, char *name, uint32_t new_vid, uint64_t if (is_snapshot && cur_vid != base_vid) { ret = write_object(entries, nr_vnodes, nr_zones, epoch, vid_to_vdi_oid(cur_vid), (char *)cur, - SD_INODE_HEADER_SIZE, 0, copies, 0); + SD_INODE_HEADER_SIZE, 0, 0, copies, 0); if (ret != 0) { vprintf(SDOG_ERR "failed\n"); ret = SD_RES_BASE_VDI_READ; @@ -135,7 +135,7 @@ static int create_vdi_obj(uint32_t epoch, char *name, uint32_t new_vid, uint64_t if (base_vid) { ret = write_object(entries, nr_vnodes, nr_zones, epoch, vid_to_vdi_oid(base_vid), (char *)base, - SD_INODE_HEADER_SIZE, 0, copies, 0); + SD_INODE_HEADER_SIZE, 0, 0, copies, 0); if (ret != 0) { vprintf(SDOG_ERR "failed\n"); ret = SD_RES_BASE_VDI_WRITE; @@ -145,7 +145,7 @@ static int create_vdi_obj(uint32_t epoch, char *name, uint32_t new_vid, uint64_t ret = write_object(entries, nr_vnodes, nr_zones, epoch, vid_to_vdi_oid(new_vid), (char *)new, sizeof(*new), - 0, copies, 1); + 0, 0, copies, 1); if (ret != 0) ret = SD_RES_VDI_WRITE; out: @@ -390,7 +390,7 @@ int del_vdi(uint32_t epoch, char *data, int data_len, uint32_t *vid, ret = write_object(entries, nr_vnodes, nr_zones, epoch, vid_to_vdi_oid(*vid), (char *)inode, - SD_INODE_HEADER_SIZE, 0, nr_reqs, 0); + SD_INODE_HEADER_SIZE, 0, 0, nr_reqs, 0); if (ret != 0) { ret = SD_RES_EIO; goto out; @@ -654,13 +654,14 @@ err: int get_vdi_attr(uint32_t epoch, struct sheepdog_vdi_attr *vattr, int data_len, uint32_t vid, uint32_t *attrid, int copies, uint64_t ctime, - int creat, int excl) + int write, int excl, int delete) { struct sheepdog_vnode_list_entry *entries; struct sheepdog_vdi_attr tmp_attr; uint64_t oid; uint32_t end; int ret, nr_zones, nr_vnodes; + int value_len; entries = malloc(sizeof(*entries) * SD_MAX_VNODES); if (!entries) { @@ -669,16 +670,13 @@ int get_vdi_attr(uint32_t epoch, struct sheepdog_vdi_attr *vattr, int data_len, goto out; } - if (data_len != SD_ATTR_HEADER_SIZE) { - ret = SD_RES_INVALID_PARMS; - goto out; - } + value_len = data_len - SD_ATTR_HEADER_SIZE; vattr->ctime = ctime; get_ordered_sd_vnode_list(entries, &nr_vnodes, &nr_zones); - *attrid = fnv_64a_buf(vattr, data_len, FNV1A_64_INIT); + *attrid = fnv_64a_buf(vattr, SD_ATTR_HEADER_SIZE, FNV1A_64_INIT); *attrid &= (UINT64_C(1) << VDI_SPACE_SHIFT) - 1; end = *attrid - 1; @@ -687,9 +685,9 @@ int get_vdi_attr(uint32_t epoch, struct sheepdog_vdi_attr *vattr, int data_len, ret = read_object(entries, nr_vnodes, nr_zones, epoch, oid, (char *)&tmp_attr, sizeof(tmp_attr), 0, copies); - if (ret == -SD_RES_NO_OBJ && creat) { + if (ret == -SD_RES_NO_OBJ && write) { ret = write_object(entries, nr_vnodes, nr_zones, epoch, oid, - (char *)vattr, data_len, 0, copies, 1); + (char *)vattr, data_len, 0, 0, copies, 1); if (ret) ret = SD_RES_EIO; else @@ -703,7 +701,26 @@ int get_vdi_attr(uint32_t epoch, struct sheepdog_vdi_attr *vattr, int data_len, if (memcmp(&tmp_attr, vattr, sizeof(tmp_attr)) == 0) { if (excl) ret = SD_RES_VDI_EXIST; - else + else if (delete) { + ret = write_object(entries, nr_vnodes, nr_zones, + epoch, oid, (char *)"", 1, + offsetof(struct sheepdog_vdi_attr, name), + 0, copies, 0); + if (ret) + ret = SD_RES_EIO; + else + ret = SD_RES_SUCCESS; + } else if (write) { + ret = write_object(entries, nr_vnodes, nr_zones, + epoch, oid, vattr->value, + value_len, SD_ATTR_HEADER_SIZE, + SD_FLAG_CMD_TRUNCATE, copies, 0); + + if (ret) + ret = SD_RES_EIO; + else + ret = SD_RES_SUCCESS; + } else ret = SD_RES_SUCCESS; goto out; } -- 1.7.2.5 |