[Sheepdog] [PATCH] sheep: remove verify_object()

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Sat Dec 25 09:52:52 CET 2010


verify_object() was used for /[stored dir]/obj/[epoch]/list, which
contains the list of object IDs.  This patch replaces the function to
the following simpler procedures:

 1. create "list.tmp"
 2. write data to list.tmp
 3. rename "list.tmp" to "list"

rename(2) is an atomic operation, so we can create the file in the all
or nothing way; we don't need verification of it.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 sheep/store.c |  114 ++++++---------------------------------------------------
 1 files changed, 12 insertions(+), 102 deletions(-)

diff --git a/sheep/store.c b/sheep/store.c
index 6043054..e424938 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -27,7 +27,6 @@
 #define ANAME_CTIME "user.sheepdog.ctime"
 #define ANAME_COPIES "user.sheepdog.copies"
 #define ANAME_NODEID "user.sheepdog.nodeid"
-#define ANAME_CHECKSUM "user.sheepdog.checksum"
 
 static char *obj_path;
 static char *epoch_path;
@@ -98,63 +97,6 @@ static int is_obj_in_range(uint64_t oid, uint64_t start, uint64_t end)
 		return (start < hval || hval <= end);
 }
 
-static int verify_object(int fd, char *buf, size_t len, int set_chksum)
-{
-	int ret;
-	uint64_t checksum;
-	struct stat s;
-	char *p = NULL;
-
-	if (!buf) {
-		ret = fstat(fd, &s);
-		if (ret < 0) {
-			eprintf("failed to get file size, %m\n");
-			goto err;
-		}
-		len = s.st_size;
-
-		p = malloc(len);
-		if (!p) {
-			eprintf("out of memory\n");
-			goto err;
-		}
-		buf = p;
-
-		ret = pread64(fd, buf, len, 0);
-		if (ret < 0) {
-			eprintf("failed to read file, %m\n");
-			goto err;
-		}
-	}
-
-	if (set_chksum) {
-		checksum = fnv_64a_buf(buf, len, FNV1A_64_INIT);
-		ret = fsetxattr(fd, ANAME_CHECKSUM, &checksum, sizeof(checksum), 0);
-		if (ret < 0) {
-			eprintf("failed to set xattr, %m\n");
-			goto err;
-		}
-	} else {
-		ret = fgetxattr(fd, ANAME_CHECKSUM, &checksum, sizeof(checksum));
-		if (ret != sizeof(checksum)) {
-			eprintf("failed to read checksum, %m\n");
-			goto err;
-		}
-
-		if (checksum != fnv_64a_buf(buf, len, FNV1A_64_INIT)) {
-			eprintf("invalid checksum, %"PRIx64", %"PRIx64"\n", checksum,
-				fnv_64a_buf(buf, len, FNV1A_64_INIT));
-			goto err;
-		}
-	}
-
-	free(p);
-	return 0;
-err:
-	free(p);
-	return -1;
-}
-
 static int get_obj_list(struct request *req)
 {
 	DIR *dir;
@@ -198,14 +140,6 @@ static int get_obj_list(struct request *req)
 		goto out;
 	}
 	obj_nr = read(fd, buf, buf_len);
-
-	ret = verify_object(fd, buf, obj_nr, 0);
-	if (ret < 0) {
-		eprintf("verification failed, %s, %m\n", path);
-		close(fd);
-		res = SD_RES_EIO;
-		goto out;
-	}
 	dprintf("read objct list from %s, %"PRIu32"\n", path, obj_nr);
 
 	obj_nr /= sizeof(uint64_t);
@@ -732,30 +666,13 @@ static int store_queue_request_local(struct request *req, uint32_t epoch)
 		rsp->data_length = ret;
 		rsp->copies = copies;
 
-		if (!is_data_obj(oid)) {
-			/* FIXME: need to check whether the object is valid or not */
-/* 			ret = verify_object(fd, NULL, 0, 0); */
-/* 			if (ret < 0) { */
-/* 				eprintf("verification failed, %"PRIx64"\n", oid); */
-/* 				ret = SD_RES_EIO; */
-/* 				goto out; */
-/* 			} */
-		}
-
 		ret = SD_RES_SUCCESS;
 		break;
 	case SD_OP_WRITE_OBJ:
+	case SD_OP_CREATE_AND_WRITE_OBJ:
 		if (!is_data_obj(oid)) {
-			/* FIXME: need to check whether the object is valid or not */
-/* 			ret = verify_object(fd, NULL, 0, 0); */
-/* 			if (ret < 0) { */
-/* 				eprintf("verification failed, %"PRIx64"\n", oid); */
-/* 				ret = SD_RES_EIO; */
-/* 				goto out; */
-/* 			} */
+			/* FIXME: write data to journal */
 		}
-		/* fall through */
-	case SD_OP_CREATE_AND_WRITE_OBJ:
 		ret = pwrite64(fd, req->data, hdr->data_length, hdr->offset);
 		if (ret != hdr->data_length) {
 			if (errno == ENOSPC)
@@ -766,13 +683,7 @@ static int store_queue_request_local(struct request *req, uint32_t epoch)
 		}
 
 		if (!is_data_obj(oid)) {
-			/* FIXME: need to update atomically */
-/* 			ret = verify_object(fd, NULL, 0, 1); */
-/* 			if (ret < 0) { */
-/* 				eprintf("failed to set checksum, %"PRIx64"\n", oid); */
-/* 				ret = SD_RES_EIO; */
-/* 				goto out; */
-/* 			} */
+			/* FIXME: remove journal data */
 		}
 
 		ret = SD_RES_SUCCESS;
@@ -1488,7 +1399,7 @@ static void __start_recovery(struct work *work, int idx)
 	int my_idx = -1;
 	int i, fd;
 	uint64_t start_hash, end_hash;
-	char path[PATH_MAX];
+	char path[PATH_MAX], tmp_path[PATH_MAX];
 	int ret;
 
 	dprintf("%u\n", epoch);
@@ -1535,25 +1446,24 @@ static void __start_recovery(struct work *work, int idx)
 	qsort(rw->buf, rw->count, sizeof(uint64_t), obj_cmp);
 
 	snprintf(path, sizeof(path), "%s%08u/list", obj_path, epoch);
-	dprintf("write object list file to %s\n", path);
+	snprintf(tmp_path, sizeof(tmp_path), "%s%08u/list.tmp", obj_path, epoch);
 
-	fd = open(path, O_RDWR | O_CREAT, def_fmode);
+	dprintf("write object list to %s\n", tmp_path);
+	fd = open(tmp_path, O_RDWR | O_CREAT | O_SYNC, def_fmode);
 	if (fd < 0) {
-		eprintf("failed to open %s, %s\n", path, strerror(errno));
+		eprintf("failed to open %s, %s, %m\n", tmp_path, strerror(errno));
 		goto fail;
 	}
 	write(fd, rw->buf, sizeof(uint64_t) * rw->count);
-	fsync(fd);
+	close(fd);
 
-	ret = verify_object(fd, rw->buf, sizeof(uint64_t) * rw->count, 1);
+	dprintf("rename %s to %s\n", tmp_path, path);
+	ret = rename(tmp_path, path);
 	if (ret < 0) {
-		eprintf("failed to set check sum, %s, %m\n", path);
-		close(fd);
+		eprintf("failed to rename %s to %s, %m\n", tmp_path, path);
 		goto fail;
 	}
 
-	close(fd);
-
 	return;
 fail:
 	rw->count = 0;
-- 
1.5.6.5




More information about the sheepdog mailing list