verify_object() was used for /[stored dir]/obj/[epoch]/list, which contains the list of object IDs. This patch replaces the function to the following simpler procedures: 1. create "list.tmp" 2. write data to list.tmp 3. rename "list.tmp" to "list" rename(2) is an atomic operation, so we can create the file in the all or nothing way; we don't need verification of it. Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp> --- sheep/store.c | 114 ++++++--------------------------------------------------- 1 files changed, 12 insertions(+), 102 deletions(-) diff --git a/sheep/store.c b/sheep/store.c index 6043054..e424938 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -27,7 +27,6 @@ #define ANAME_CTIME "user.sheepdog.ctime" #define ANAME_COPIES "user.sheepdog.copies" #define ANAME_NODEID "user.sheepdog.nodeid" -#define ANAME_CHECKSUM "user.sheepdog.checksum" static char *obj_path; static char *epoch_path; @@ -98,63 +97,6 @@ static int is_obj_in_range(uint64_t oid, uint64_t start, uint64_t end) return (start < hval || hval <= end); } -static int verify_object(int fd, char *buf, size_t len, int set_chksum) -{ - int ret; - uint64_t checksum; - struct stat s; - char *p = NULL; - - if (!buf) { - ret = fstat(fd, &s); - if (ret < 0) { - eprintf("failed to get file size, %m\n"); - goto err; - } - len = s.st_size; - - p = malloc(len); - if (!p) { - eprintf("out of memory\n"); - goto err; - } - buf = p; - - ret = pread64(fd, buf, len, 0); - if (ret < 0) { - eprintf("failed to read file, %m\n"); - goto err; - } - } - - if (set_chksum) { - checksum = fnv_64a_buf(buf, len, FNV1A_64_INIT); - ret = fsetxattr(fd, ANAME_CHECKSUM, &checksum, sizeof(checksum), 0); - if (ret < 0) { - eprintf("failed to set xattr, %m\n"); - goto err; - } - } else { - ret = fgetxattr(fd, ANAME_CHECKSUM, &checksum, sizeof(checksum)); - if (ret != sizeof(checksum)) { - eprintf("failed to read checksum, %m\n"); - goto err; - } - - if (checksum != fnv_64a_buf(buf, len, FNV1A_64_INIT)) { - eprintf("invalid checksum, %"PRIx64", %"PRIx64"\n", checksum, - fnv_64a_buf(buf, len, FNV1A_64_INIT)); - goto err; - } - } - - free(p); - return 0; -err: - free(p); - return -1; -} - static int get_obj_list(struct request *req) { DIR *dir; @@ -198,14 +140,6 @@ static int get_obj_list(struct request *req) goto out; } obj_nr = read(fd, buf, buf_len); - - ret = verify_object(fd, buf, obj_nr, 0); - if (ret < 0) { - eprintf("verification failed, %s, %m\n", path); - close(fd); - res = SD_RES_EIO; - goto out; - } dprintf("read objct list from %s, %"PRIu32"\n", path, obj_nr); obj_nr /= sizeof(uint64_t); @@ -732,30 +666,13 @@ static int store_queue_request_local(struct request *req, uint32_t epoch) rsp->data_length = ret; rsp->copies = copies; - if (!is_data_obj(oid)) { - /* FIXME: need to check whether the object is valid or not */ -/* ret = verify_object(fd, NULL, 0, 0); */ -/* if (ret < 0) { */ -/* eprintf("verification failed, %"PRIx64"\n", oid); */ -/* ret = SD_RES_EIO; */ -/* goto out; */ -/* } */ - } - ret = SD_RES_SUCCESS; break; case SD_OP_WRITE_OBJ: + case SD_OP_CREATE_AND_WRITE_OBJ: if (!is_data_obj(oid)) { - /* FIXME: need to check whether the object is valid or not */ -/* ret = verify_object(fd, NULL, 0, 0); */ -/* if (ret < 0) { */ -/* eprintf("verification failed, %"PRIx64"\n", oid); */ -/* ret = SD_RES_EIO; */ -/* goto out; */ -/* } */ + /* FIXME: write data to journal */ } - /* fall through */ - case SD_OP_CREATE_AND_WRITE_OBJ: ret = pwrite64(fd, req->data, hdr->data_length, hdr->offset); if (ret != hdr->data_length) { if (errno == ENOSPC) @@ -766,13 +683,7 @@ static int store_queue_request_local(struct request *req, uint32_t epoch) } if (!is_data_obj(oid)) { - /* FIXME: need to update atomically */ -/* ret = verify_object(fd, NULL, 0, 1); */ -/* if (ret < 0) { */ -/* eprintf("failed to set checksum, %"PRIx64"\n", oid); */ -/* ret = SD_RES_EIO; */ -/* goto out; */ -/* } */ + /* FIXME: remove journal data */ } ret = SD_RES_SUCCESS; @@ -1488,7 +1399,7 @@ static void __start_recovery(struct work *work, int idx) int my_idx = -1; int i, fd; uint64_t start_hash, end_hash; - char path[PATH_MAX]; + char path[PATH_MAX], tmp_path[PATH_MAX]; int ret; dprintf("%u\n", epoch); @@ -1535,25 +1446,24 @@ static void __start_recovery(struct work *work, int idx) qsort(rw->buf, rw->count, sizeof(uint64_t), obj_cmp); snprintf(path, sizeof(path), "%s%08u/list", obj_path, epoch); - dprintf("write object list file to %s\n", path); + snprintf(tmp_path, sizeof(tmp_path), "%s%08u/list.tmp", obj_path, epoch); - fd = open(path, O_RDWR | O_CREAT, def_fmode); + dprintf("write object list to %s\n", tmp_path); + fd = open(tmp_path, O_RDWR | O_CREAT | O_SYNC, def_fmode); if (fd < 0) { - eprintf("failed to open %s, %s\n", path, strerror(errno)); + eprintf("failed to open %s, %s, %m\n", tmp_path, strerror(errno)); goto fail; } write(fd, rw->buf, sizeof(uint64_t) * rw->count); - fsync(fd); + close(fd); - ret = verify_object(fd, rw->buf, sizeof(uint64_t) * rw->count, 1); + dprintf("rename %s to %s\n", tmp_path, path); + ret = rename(tmp_path, path); if (ret < 0) { - eprintf("failed to set check sum, %s, %m\n", path); - close(fd); + eprintf("failed to rename %s to %s, %m\n", tmp_path, path); goto fail; } - close(fd); - return; fail: rw->count = 0; -- 1.5.6.5 |