[Sheepdog] [PATCH] sheep: remove verify_object()
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Sat Dec 25 09:52:52 CET 2010
verify_object() was used for /[stored dir]/obj/[epoch]/list, which
contains the list of object IDs. This patch replaces the function to
the following simpler procedures:
1. create "list.tmp"
2. write data to list.tmp
3. rename "list.tmp" to "list"
rename(2) is an atomic operation, so we can create the file in the all
or nothing way; we don't need verification of it.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
sheep/store.c | 114 ++++++---------------------------------------------------
1 files changed, 12 insertions(+), 102 deletions(-)
diff --git a/sheep/store.c b/sheep/store.c
index 6043054..e424938 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -27,7 +27,6 @@
#define ANAME_CTIME "user.sheepdog.ctime"
#define ANAME_COPIES "user.sheepdog.copies"
#define ANAME_NODEID "user.sheepdog.nodeid"
-#define ANAME_CHECKSUM "user.sheepdog.checksum"
static char *obj_path;
static char *epoch_path;
@@ -98,63 +97,6 @@ static int is_obj_in_range(uint64_t oid, uint64_t start, uint64_t end)
return (start < hval || hval <= end);
}
-static int verify_object(int fd, char *buf, size_t len, int set_chksum)
-{
- int ret;
- uint64_t checksum;
- struct stat s;
- char *p = NULL;
-
- if (!buf) {
- ret = fstat(fd, &s);
- if (ret < 0) {
- eprintf("failed to get file size, %m\n");
- goto err;
- }
- len = s.st_size;
-
- p = malloc(len);
- if (!p) {
- eprintf("out of memory\n");
- goto err;
- }
- buf = p;
-
- ret = pread64(fd, buf, len, 0);
- if (ret < 0) {
- eprintf("failed to read file, %m\n");
- goto err;
- }
- }
-
- if (set_chksum) {
- checksum = fnv_64a_buf(buf, len, FNV1A_64_INIT);
- ret = fsetxattr(fd, ANAME_CHECKSUM, &checksum, sizeof(checksum), 0);
- if (ret < 0) {
- eprintf("failed to set xattr, %m\n");
- goto err;
- }
- } else {
- ret = fgetxattr(fd, ANAME_CHECKSUM, &checksum, sizeof(checksum));
- if (ret != sizeof(checksum)) {
- eprintf("failed to read checksum, %m\n");
- goto err;
- }
-
- if (checksum != fnv_64a_buf(buf, len, FNV1A_64_INIT)) {
- eprintf("invalid checksum, %"PRIx64", %"PRIx64"\n", checksum,
- fnv_64a_buf(buf, len, FNV1A_64_INIT));
- goto err;
- }
- }
-
- free(p);
- return 0;
-err:
- free(p);
- return -1;
-}
-
static int get_obj_list(struct request *req)
{
DIR *dir;
@@ -198,14 +140,6 @@ static int get_obj_list(struct request *req)
goto out;
}
obj_nr = read(fd, buf, buf_len);
-
- ret = verify_object(fd, buf, obj_nr, 0);
- if (ret < 0) {
- eprintf("verification failed, %s, %m\n", path);
- close(fd);
- res = SD_RES_EIO;
- goto out;
- }
dprintf("read objct list from %s, %"PRIu32"\n", path, obj_nr);
obj_nr /= sizeof(uint64_t);
@@ -732,30 +666,13 @@ static int store_queue_request_local(struct request *req, uint32_t epoch)
rsp->data_length = ret;
rsp->copies = copies;
- if (!is_data_obj(oid)) {
- /* FIXME: need to check whether the object is valid or not */
-/* ret = verify_object(fd, NULL, 0, 0); */
-/* if (ret < 0) { */
-/* eprintf("verification failed, %"PRIx64"\n", oid); */
-/* ret = SD_RES_EIO; */
-/* goto out; */
-/* } */
- }
-
ret = SD_RES_SUCCESS;
break;
case SD_OP_WRITE_OBJ:
+ case SD_OP_CREATE_AND_WRITE_OBJ:
if (!is_data_obj(oid)) {
- /* FIXME: need to check whether the object is valid or not */
-/* ret = verify_object(fd, NULL, 0, 0); */
-/* if (ret < 0) { */
-/* eprintf("verification failed, %"PRIx64"\n", oid); */
-/* ret = SD_RES_EIO; */
-/* goto out; */
-/* } */
+ /* FIXME: write data to journal */
}
- /* fall through */
- case SD_OP_CREATE_AND_WRITE_OBJ:
ret = pwrite64(fd, req->data, hdr->data_length, hdr->offset);
if (ret != hdr->data_length) {
if (errno == ENOSPC)
@@ -766,13 +683,7 @@ static int store_queue_request_local(struct request *req, uint32_t epoch)
}
if (!is_data_obj(oid)) {
- /* FIXME: need to update atomically */
-/* ret = verify_object(fd, NULL, 0, 1); */
-/* if (ret < 0) { */
-/* eprintf("failed to set checksum, %"PRIx64"\n", oid); */
-/* ret = SD_RES_EIO; */
-/* goto out; */
-/* } */
+ /* FIXME: remove journal data */
}
ret = SD_RES_SUCCESS;
@@ -1488,7 +1399,7 @@ static void __start_recovery(struct work *work, int idx)
int my_idx = -1;
int i, fd;
uint64_t start_hash, end_hash;
- char path[PATH_MAX];
+ char path[PATH_MAX], tmp_path[PATH_MAX];
int ret;
dprintf("%u\n", epoch);
@@ -1535,25 +1446,24 @@ static void __start_recovery(struct work *work, int idx)
qsort(rw->buf, rw->count, sizeof(uint64_t), obj_cmp);
snprintf(path, sizeof(path), "%s%08u/list", obj_path, epoch);
- dprintf("write object list file to %s\n", path);
+ snprintf(tmp_path, sizeof(tmp_path), "%s%08u/list.tmp", obj_path, epoch);
- fd = open(path, O_RDWR | O_CREAT, def_fmode);
+ dprintf("write object list to %s\n", tmp_path);
+ fd = open(tmp_path, O_RDWR | O_CREAT | O_SYNC, def_fmode);
if (fd < 0) {
- eprintf("failed to open %s, %s\n", path, strerror(errno));
+ eprintf("failed to open %s, %s, %m\n", tmp_path, strerror(errno));
goto fail;
}
write(fd, rw->buf, sizeof(uint64_t) * rw->count);
- fsync(fd);
+ close(fd);
- ret = verify_object(fd, rw->buf, sizeof(uint64_t) * rw->count, 1);
+ dprintf("rename %s to %s\n", tmp_path, path);
+ ret = rename(tmp_path, path);
if (ret < 0) {
- eprintf("failed to set check sum, %s, %m\n", path);
- close(fd);
+ eprintf("failed to rename %s to %s, %m\n", tmp_path, path);
goto fail;
}
- close(fd);
-
return;
fail:
rw->count = 0;
--
1.5.6.5
More information about the sheepdog
mailing list