[sheepdog] [PATCH v2] sheep: fix missing .stale problem
Hitoshi Mitake
mitake.hitoshi at lab.ntt.co.jp
Mon Mar 2 05:25:18 CET 2015
At Mon, 2 Mar 2015 09:56:59 +0800,
Liu Yuan wrote:
>
> From: Liu Yuan <liuyuan at cmss.chinamobile.com>
>
> If .stale is missing, the recovery algorithm is almost broken. We have this
> problem because following logic for .stale:
>
> store->format() purge all the directories including .stale, rmdir(.stale)
> |
> V
> store->init() then mkdir(.stale)
>
> This order was previously strickly honored but dbf0e8782 that pushed the purge
> work into the worker thread, broke this ordering. On many systems, the actual
> execution order for .stale becomes:
>
> mkdir(.stale) -> rmdir(.stale)
>
> because rmdir(.stale) is deffered after store->init(), so this poor node will
> never have .stale exist.
>
> Since the store->cleanup() is the actual user of async rmdir, the fix is just
> add a new async interface and call it and have others call sync one.
>
> Cc: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
> Signed-off-by: Liu Yuan <liuyuan at cmss.chinamobile.com>
> ---
> include/util.h | 1 +
> lib/util.c | 18 ++++++++++++++----
> sheep/plain_store.c | 6 +++++-
> 3 files changed, 20 insertions(+), 5 deletions(-)
>
> diff --git a/include/util.h b/include/util.h
> index aba7b32..c5b5ac9 100644
> --- a/include/util.h
> +++ b/include/util.h
> @@ -107,6 +107,7 @@ void pstrcpy(char *buf, int buf_size, const char *str);
> char *chomp(char *str);
> int rmdir_r(const char *dir_path);
> int purge_directory(const char *dir_path);
> +int purge_directory_async(const char *dir_path);
> bool is_numeric(const char *p);
> const char *data_to_str(void *data, size_t data_length);
> int install_sighandler(int signum, void (*handler)(int, siginfo_t *, void *),
> diff --git a/lib/util.c b/lib/util.c
> index 73cf1af..f563c1e 100644
> --- a/lib/util.c
> +++ b/lib/util.c
> @@ -417,7 +417,7 @@ static void purge_work_done(struct work *work)
> }
>
> /* Purge directory recursively */
> -int purge_directory(const char *dir_path)
> +static int raw_purge_directory(const char *dir_path, bool async)
> {
> int ret = 0;
> struct stat s;
> @@ -433,7 +433,7 @@ int purge_directory(const char *dir_path)
> return -errno;
> }
>
> - if (util_wqueue) {
> + if (async) {
We shouldn't skip checking of util_wqueue because libsheepdog users
other than sheep might not initialize it.
Other part looks good to me.
Thanks,
Hitoshi
> /* we have workqueue for it, don't unlink in this thread */
> w = xzalloc(sizeof(*w));
> w->nr_units = 0;
> @@ -452,7 +452,7 @@ int purge_directory(const char *dir_path)
> goto out;
> }
>
> - if (util_wqueue) {
> + if (async) {
> struct purge_work_unit *unit;
>
> unit = &w->units[w->nr_units++];
> @@ -482,7 +482,7 @@ int purge_directory(const char *dir_path)
> }
> }
>
> - if (util_wqueue) {
> + if (async) {
> w->work.fn = purge_work_fn;
> w->work.done = purge_work_done;
> queue_work(util_wqueue, &w->work);
> @@ -493,6 +493,16 @@ out:
> return ret;
> }
>
> +int purge_directory(const char *dir_path)
> +{
> + return raw_purge_directory(dir_path, false);
> +}
> +
> +int purge_directory_async(const char *dir_path)
> +{
> + return raw_purge_directory(dir_path, true);
> +}
> +
> /* remove directory recursively */
> int rmdir_r(const char *dir_path)
> {
> diff --git a/sheep/plain_store.c b/sheep/plain_store.c
> index 9614afa..5ea2946 100644
> --- a/sheep/plain_store.c
> +++ b/sheep/plain_store.c
> @@ -255,7 +255,11 @@ static int purge_stale_dir(const char *path)
> char p[PATH_MAX];
>
> snprintf(p, PATH_MAX, "%s/.stale", path);
> - return purge_dir(p);
> +
> + if (purge_directory_async(p) < 0)
> + return SD_RES_EIO;
> +
> + return SD_RES_SUCCESS;
> }
>
> int default_cleanup(void)
> --
> 1.9.1
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> https://lists.wpkg.org/mailman/listinfo/sheepdog
More information about the sheepdog
mailing list