[sheepdog] [PATCH v2] sheep: fix missing .stale problem

Hitoshi Mitake mitake.hitoshi at lab.ntt.co.jp
Mon Mar 2 05:25:18 CET 2015


At Mon,  2 Mar 2015 09:56:59 +0800,
Liu Yuan wrote:
> 
> From: Liu Yuan <liuyuan at cmss.chinamobile.com>
> 
> If .stale is missing, the recovery algorithm is almost broken. We have this
> problem because following logic for .stale:
> 
> store->format() purge all the directories including .stale, rmdir(.stale)
>    |
>    V
> store->init() then mkdir(.stale)
> 
> This order was previously strickly honored but dbf0e8782 that pushed the purge
> work into the worker thread, broke this ordering. On many systems, the actual
> execution order for .stale becomes:
> 
> mkdir(.stale) -> rmdir(.stale)
> 
> because rmdir(.stale) is deffered after store->init(), so this poor node will
> never have .stale exist.
> 
> Since the store->cleanup() is the actual user of async rmdir, the fix is just
> add a new async interface and call it and have others call sync one.
> 
> Cc: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
> Signed-off-by: Liu Yuan <liuyuan at cmss.chinamobile.com>
> ---
>  include/util.h      |  1 +
>  lib/util.c          | 18 ++++++++++++++----
>  sheep/plain_store.c |  6 +++++-
>  3 files changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/include/util.h b/include/util.h
> index aba7b32..c5b5ac9 100644
> --- a/include/util.h
> +++ b/include/util.h
> @@ -107,6 +107,7 @@ void pstrcpy(char *buf, int buf_size, const char *str);
>  char *chomp(char *str);
>  int rmdir_r(const char *dir_path);
>  int purge_directory(const char *dir_path);
> +int purge_directory_async(const char *dir_path);
>  bool is_numeric(const char *p);
>  const char *data_to_str(void *data, size_t data_length);
>  int install_sighandler(int signum, void (*handler)(int, siginfo_t *, void *),
> diff --git a/lib/util.c b/lib/util.c
> index 73cf1af..f563c1e 100644
> --- a/lib/util.c
> +++ b/lib/util.c
> @@ -417,7 +417,7 @@ static void purge_work_done(struct work *work)
>  }
>  
>  /* Purge directory recursively */
> -int purge_directory(const char *dir_path)
> +static int raw_purge_directory(const char *dir_path, bool async)
>  {
>  	int ret = 0;
>  	struct stat s;
> @@ -433,7 +433,7 @@ int purge_directory(const char *dir_path)
>  		return -errno;
>  	}
>  
> -	if (util_wqueue) {
> +	if (async) {

We shouldn't skip checking of util_wqueue because libsheepdog users
other than sheep might not initialize it.

Other part looks good to me.

Thanks,
Hitoshi

>  		/* we have workqueue for it, don't unlink in this thread */
>  		w = xzalloc(sizeof(*w));
>  		w->nr_units = 0;
> @@ -452,7 +452,7 @@ int purge_directory(const char *dir_path)
>  			goto out;
>  		}
>  
> -		if (util_wqueue) {
> +		if (async) {
>  			struct purge_work_unit *unit;
>  
>  			unit = &w->units[w->nr_units++];
> @@ -482,7 +482,7 @@ int purge_directory(const char *dir_path)
>  		}
>  	}
>  
> -	if (util_wqueue) {
> +	if (async) {
>  		w->work.fn = purge_work_fn;
>  		w->work.done = purge_work_done;
>  		queue_work(util_wqueue, &w->work);
> @@ -493,6 +493,16 @@ out:
>  	return ret;
>  }
>  
> +int purge_directory(const char *dir_path)
> +{
> +	return raw_purge_directory(dir_path, false);
> +}
> +
> +int purge_directory_async(const char *dir_path)
> +{
> +	return raw_purge_directory(dir_path, true);
> +}
> +
>  /* remove directory recursively */
>  int rmdir_r(const char *dir_path)
>  {
> diff --git a/sheep/plain_store.c b/sheep/plain_store.c
> index 9614afa..5ea2946 100644
> --- a/sheep/plain_store.c
> +++ b/sheep/plain_store.c
> @@ -255,7 +255,11 @@ static int purge_stale_dir(const char *path)
>  	char p[PATH_MAX];
>  
>  	snprintf(p, PATH_MAX, "%s/.stale", path);
> -	return purge_dir(p);
> +
> +	if (purge_directory_async(p) < 0)
> +		return SD_RES_EIO;
> +
> +	return SD_RES_SUCCESS;
>  }
>  
>  int default_cleanup(void)
> -- 
> 1.9.1
> 
> -- 
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> https://lists.wpkg.org/mailman/listinfo/sheepdog



More information about the sheepdog mailing list