[sheepdog] [PATCH v2] sheep/plain_store: move displaced object from read/write operation

Tue Feb 4 04:53:43 CET 2014

This is a defensive patch, but solve a problem we found on the test cluster. The
problem is tricky:

init_vdi_state() errorr out as "failed to read inode header xxx, 0".

After a detailed log analyzing, we found that the cause of this problem is
mismatch of object of path between get_obj_path() and its real path, e.g,

- disk1/oidA (real location)
- disk3/oidA (get_obj_path() location)

So default_read() fails.

To solve this problem and any future problems caused by misplaced object (if any
by any unexpected reason), we call default_exist() before default_{read, write}.
By this means, all the misplaced objects will be placed properly before
any read/write operations.

Signed-off-by: Liu Yuan <namei.unix at gmail.com>
---
 sheep/plain_store.c | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index 9a4871c..90ef0a6 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -77,6 +77,10 @@ static int get_stale_obj_path(uint64_t oid, uint32_t epoch, char *path,
 	return md_get_stale_path(oid, epoch, path, size);
 }
 
+/*
+ * Check if oid is in this nodes (if oid is in the wrong place, it will be moved
+ * to the correct one after this call in a MD setup.
+ */
 bool default_exist(uint64_t oid)
 {
 	return md_exist(oid);
@@ -142,6 +146,14 @@ int default_write(uint64_t oid, const struct siocb *iocb)
 
 	get_obj_path(oid, path, sizeof(path));
 
+	/*
+	 * Make sure oid is in the right place because oid might be misplaced
+	 * in a wrong place, due to 'shutdown/restart with less/more disks' or
+	 * any bugs. We need call err_to_sderr() to return EIO if disk is broken
+	 */
+	if (!default_exist(oid))
+		return err_to_sderr(path, oid, ENOENT);
+
 	fd = open(path, flags, sd_def_fmode);
 	if (unlikely(fd < 0))
 		return err_to_sderr(path, oid, errno);
@@ -210,8 +222,8 @@ static int init_vdi_state(uint64_t oid, const char *wd, uint32_t epoch)
 
 	ret = default_read(oid, &iocb);
 	if (ret != SD_RES_SUCCESS) {
-		sd_err("failed to read inode header %" PRIx64 " %" PRId32, oid,
-		       epoch);
+		sd_err("failed to read inode header %" PRIx64 " %" PRId32
+		       "wat %s", oid, epoch, wd);
 		goto out;
 	}
 
@@ -232,7 +244,8 @@ static int init_objlist_and_vdi_bitmap(uint64_t oid, const char *wd,
 	objlist_cache_insert(oid);
 
 	if (is_vdi_obj(oid)) {
-		sd_debug("found the VDI object %" PRIx64, oid);
+		sd_debug("found the VDI object %" PRIx64" epoch %"PRIu32
+			 " at %s", oid, epoch, wd);
 		ret = init_vdi_state(oid, wd, epoch);
 		if (ret != SD_RES_SUCCESS)
 			return ret;
@@ -254,6 +267,11 @@ int default_init(void)
 	return for_each_object_in_wd(init_objlist_and_vdi_bitmap, true, NULL);
 }
 
+static inline bool is_stale_path(const char *path)
+{
+	return !!strstr(path, "stale");
+}
+
 static int default_read_from_path(uint64_t oid, const char *path,
 				  const struct siocb *iocb)
 {
@@ -261,6 +279,16 @@ static int default_read_from_path(uint64_t oid, const char *path,
 	    ret = SD_RES_SUCCESS;
 	ssize_t size;
 
+	/*
+	 * Make sure oid is in the right place because oid might be misplaced
+	 * in a wrong place, due to 'shutdown/restart with less disks' or any
+	 * bugs. We need call err_to_sderr() to return EIO if disk is broken.
+	 *
+	 * For stale path, get_stale_obj_path() already does default_exist job.
+	 */
+	if (!is_stale_path(path) && !default_exist(oid))
+		return err_to_sderr(path, oid, ENOENT);
+
 	fd = open(path, flags);
 
 	if (fd < 0)
-- 
1.8.1.2