This is a defensive patch, but solve a problem we found on the test cluster. The problem is tricky: init_vdi_state() errorr out as "failed to read inode header xxx, 0". After a detailed log analyzing, we found that the cause of this problem is mismatch of object of path between get_obj_path() and its real path, e.g, - disk1/oidA (real location) - disk3/oidA (get_obj_path() location) So default_read() fails. To solve this problem and any future problems caused by misplaced object (if any by any unexpected reason), we call default_exist() before default_{read, write}. By this means, all the misplaced objects will be placed properly before any read/write operations. Signed-off-by: Liu Yuan <namei.unix at gmail.com> --- sheep/plain_store.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/sheep/plain_store.c b/sheep/plain_store.c index 9a4871c..90ef0a6 100644 --- a/sheep/plain_store.c +++ b/sheep/plain_store.c @@ -77,6 +77,10 @@ static int get_stale_obj_path(uint64_t oid, uint32_t epoch, char *path, return md_get_stale_path(oid, epoch, path, size); } +/* + * Check if oid is in this nodes (if oid is in the wrong place, it will be moved + * to the correct one after this call in a MD setup. + */ bool default_exist(uint64_t oid) { return md_exist(oid); @@ -142,6 +146,14 @@ int default_write(uint64_t oid, const struct siocb *iocb) get_obj_path(oid, path, sizeof(path)); + /* + * Make sure oid is in the right place because oid might be misplaced + * in a wrong place, due to 'shutdown/restart with less/more disks' or + * any bugs. We need call err_to_sderr() to return EIO if disk is broken + */ + if (!default_exist(oid)) + return err_to_sderr(path, oid, ENOENT); + fd = open(path, flags, sd_def_fmode); if (unlikely(fd < 0)) return err_to_sderr(path, oid, errno); @@ -210,8 +222,8 @@ static int init_vdi_state(uint64_t oid, const char *wd, uint32_t epoch) ret = default_read(oid, &iocb); if (ret != SD_RES_SUCCESS) { - sd_err("failed to read inode header %" PRIx64 " %" PRId32, oid, - epoch); + sd_err("failed to read inode header %" PRIx64 " %" PRId32 + "wat %s", oid, epoch, wd); goto out; } @@ -232,7 +244,8 @@ static int init_objlist_and_vdi_bitmap(uint64_t oid, const char *wd, objlist_cache_insert(oid); if (is_vdi_obj(oid)) { - sd_debug("found the VDI object %" PRIx64, oid); + sd_debug("found the VDI object %" PRIx64" epoch %"PRIu32 + " at %s", oid, epoch, wd); ret = init_vdi_state(oid, wd, epoch); if (ret != SD_RES_SUCCESS) return ret; @@ -254,6 +267,11 @@ int default_init(void) return for_each_object_in_wd(init_objlist_and_vdi_bitmap, true, NULL); } +static inline bool is_stale_path(const char *path) +{ + return !!strstr(path, "stale"); +} + static int default_read_from_path(uint64_t oid, const char *path, const struct siocb *iocb) { @@ -261,6 +279,16 @@ static int default_read_from_path(uint64_t oid, const char *path, ret = SD_RES_SUCCESS; ssize_t size; + /* + * Make sure oid is in the right place because oid might be misplaced + * in a wrong place, due to 'shutdown/restart with less disks' or any + * bugs. We need call err_to_sderr() to return EIO if disk is broken. + * + * For stale path, get_stale_obj_path() already does default_exist job. + */ + if (!is_stale_path(path) && !default_exist(oid)) + return err_to_sderr(path, oid, ENOENT); + fd = open(path, flags); if (fd < 0) -- 1.8.1.2 |