[sheepdog] [PATCH] sheep: remove the busy lock in the journal commiting process
Robin Dong
robin.k.dong at gmail.com
Fri Dec 27 02:58:07 CET 2013
2013/12/26 Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
> This patch removes the busy wait which can potentially be large in the
> journaling mechanism and sleep inside it.
>
> Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
> ---
> sheep/journal.c | 64
> ++++++++++++++++++++++++++++---------------------------
> 1 file changed, 33 insertions(+), 31 deletions(-)
>
> diff --git a/sheep/journal.c b/sheep/journal.c
> index b5f0ff2..17b0b03 100644
> --- a/sheep/journal.c
> +++ b/sheep/journal.c
> @@ -54,7 +54,9 @@ static int jfile_fds[2];
> static size_t jfile_size;
>
> static struct journal_file jfile;
> -static pthread_spinlock_t jfile_lock;
> +static struct sd_mutex jfile_lock = SD_MUTEX_INITIALIZER;
> +
> +static struct work_queue *commit_wq;
>
> static int create_journal_file(const char *root, const char *name)
> {
> @@ -277,7 +279,12 @@ int journal_file_init(const char *path, size_t size,
> bool skip)
> fd = create_journal_file(path, jfile_name[1]);
> jfile_fds[1] = fd;
>
> - pthread_spin_init(&jfile_lock, PTHREAD_PROCESS_PRIVATE);
> + commit_wq = create_ordered_work_queue("journal commit");
> + if (!commit_wq) {
> + sd_err("error at creating a workqueue for journal data
> commit");
> + return -1;
> + }
> +
> return 0;
> }
>
> @@ -301,49 +308,43 @@ void clean_journal_file(const char *p)
>
> static inline bool jfile_enough_space(size_t size)
> {
> - if (jfile.pos + size > jfile_size)
> - return false;
> - return true;
> + return jfile.pos + size < jfile_size;
>
IMHO, may be "return (jfile.pos + size) < jfile_size" is much easier for
reading.
> }
>
> +static struct sd_mutex journal_commit_mutex = SD_MUTEX_INITIALIZER;
> +
> /*
> * We rely on the kernel's page cache to cache data objects to 1) boost
> read
> * perfmance 2) simplify read path so that data commiting is simply a
> * sync() operation and We do it in a dedicated thread to avoid blocking
> * the writer by switch back and forth between two journal files.
> */
> -static void *commit_data(void *ignored)
> +static void journal_commit_data_work(struct work *work)
> {
> - int err;
> -
> - /* Tell runtime to release resources after termination */
> - err = pthread_detach(pthread_self());
> - if (unlikely(err))
> - panic("%s", strerror(err));
> -
> sync();
> +
> if (unlikely(xftruncate(jfile.commit_fd, 0) < 0))
> panic("truncate %m");
> if (unlikely(prealloc(jfile.commit_fd, jfile_size) < 0))
> - panic("prealloc");
> + panic("prealloc %m");
>
> - uatomic_set_false(&jfile.in_commit);
> + sd_mutex_unlock(&journal_commit_mutex);
> +}
>
> - pthread_exit(NULL);
> +static void journal_commit_data_done(struct work *work)
> +{
> + free(work);
> }
>
> -/* FIXME: Try not sleep inside lock */
> static void switch_journal_file(void)
> {
> - int old = jfile.fd, err;
> - pthread_t thread;
> -
> -retry:
> - if (unlikely(!uatomic_set_true(&jfile.in_commit))) {
> - sd_err("journal file in committing, "
> - "you might need enlarge jfile size");
> - usleep(100000); /* Wait until committing is finished */
> - goto retry;
> + int old = jfile.fd;
> + struct work *w;
> +
> + if (sd_mutex_trylock(&journal_commit_mutex) == EBUSY) {
> + sd_err("journal file in commiting, you might need"
> + " enlarge jfile size");
> + sd_mutex_lock(&journal_commit_mutex);
> }
>
> if (old == jfile_fds[0])
> @@ -353,9 +354,10 @@ retry:
> jfile.commit_fd = old;
> jfile.pos = 0;
>
> - err = pthread_create(&thread, NULL, commit_data, NULL);
> - if (unlikely(err))
> - panic("%s", strerror(err));
> + w = xzalloc(sizeof(*w));
> + w->fn = journal_commit_data_work;
> + w->done = journal_commit_data_done;
> + queue_work(commit_wq, w);
> }
>
> static int journal_file_write(struct journal_descriptor *jd, const char
> *buf)
> @@ -368,12 +370,12 @@ static int journal_file_write(struct
> journal_descriptor *jd, const char *buf)
> off_t woff;
> char *wbuffer, *p;
>
> - pthread_spin_lock(&jfile_lock);
> + sd_mutex_lock(&jfile_lock);
> if (!jfile_enough_space(wsize))
> switch_journal_file();
> woff = jfile.pos;
> jfile.pos += wsize;
> - pthread_spin_unlock(&jfile_lock);
> + sd_mutex_unlock(&jfile_lock);
>
> p = wbuffer = xvalloc(wsize);
> memcpy(p, jd, JOURNAL_DESC_SIZE);
> --
> 1.7.10.4
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog
>
--
--
Best Regard
Robin Dong
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.wpkg.org/pipermail/sheepdog/attachments/20131227/47bc1cb4/attachment-0004.html>
More information about the sheepdog
mailing list