<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">2013/12/26 Hitoshi Mitake <span dir="ltr"><<a href="mailto:mitake.hitoshi@lab.ntt.co.jp" target="_blank">mitake.hitoshi@lab.ntt.co.jp</a>></span><br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">This patch removes the busy wait which can potentially be large in the<br>
journaling mechanism and sleep inside it.<br>
<br>
Signed-off-by: Hitoshi Mitake <<a href="mailto:mitake.hitoshi@lab.ntt.co.jp">mitake.hitoshi@lab.ntt.co.jp</a>><br>
---<br>
sheep/journal.c | 64 ++++++++++++++++++++++++++++---------------------------<br>
1 file changed, 33 insertions(+), 31 deletions(-)<br>
<br>
diff --git a/sheep/journal.c b/sheep/journal.c<br>
index b5f0ff2..17b0b03 100644<br>
--- a/sheep/journal.c<br>
+++ b/sheep/journal.c<br>
@@ -54,7 +54,9 @@ static int jfile_fds[2];<br>
static size_t jfile_size;<br>
<br>
static struct journal_file jfile;<br>
-static pthread_spinlock_t jfile_lock;<br>
+static struct sd_mutex jfile_lock = SD_MUTEX_INITIALIZER;<br>
+<br>
+static struct work_queue *commit_wq;<br>
<br>
static int create_journal_file(const char *root, const char *name)<br>
{<br>
@@ -277,7 +279,12 @@ int journal_file_init(const char *path, size_t size, bool skip)<br>
fd = create_journal_file(path, jfile_name[1]);<br>
jfile_fds[1] = fd;<br>
<br>
- pthread_spin_init(&jfile_lock, PTHREAD_PROCESS_PRIVATE);<br>
+ commit_wq = create_ordered_work_queue("journal commit");<br>
+ if (!commit_wq) {<br>
+ sd_err("error at creating a workqueue for journal data commit");<br>
+ return -1;<br>
+ }<br>
+<br>
return 0;<br>
}<br>
<br>
@@ -301,49 +308,43 @@ void clean_journal_file(const char *p)<br>
<br>
static inline bool jfile_enough_space(size_t size)<br>
{<br>
- if (jfile.pos + size > jfile_size)<br>
- return false;<br>
- return true;<br>
+ return jfile.pos + size < jfile_size;<br></blockquote><div><br></div><div>IMHO, may be "return (jfile.pos + size) < jfile_size" is much easier for reading.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
}<br>
<br>
+static struct sd_mutex journal_commit_mutex = SD_MUTEX_INITIALIZER;<br>
+<br>
/*<br>
* We rely on the kernel's page cache to cache data objects to 1) boost read<br>
* perfmance 2) simplify read path so that data commiting is simply a<br>
* sync() operation and We do it in a dedicated thread to avoid blocking<br>
* the writer by switch back and forth between two journal files.<br>
*/<br>
-static void *commit_data(void *ignored)<br>
+static void journal_commit_data_work(struct work *work)<br>
{<br>
- int err;<br>
-<br>
- /* Tell runtime to release resources after termination */<br>
- err = pthread_detach(pthread_self());<br>
- if (unlikely(err))<br>
- panic("%s", strerror(err));<br>
-<br>
sync();<br>
+<br>
if (unlikely(xftruncate(jfile.commit_fd, 0) < 0))<br>
panic("truncate %m");<br>
if (unlikely(prealloc(jfile.commit_fd, jfile_size) < 0))<br>
- panic("prealloc");<br>
+ panic("prealloc %m");<br>
<br>
- uatomic_set_false(&jfile.in_commit);<br>
+ sd_mutex_unlock(&journal_commit_mutex);<br>
+}<br>
<br>
- pthread_exit(NULL);<br>
+static void journal_commit_data_done(struct work *work)<br>
+{<br>
+ free(work);<br>
}<br>
<br>
-/* FIXME: Try not sleep inside lock */<br>
static void switch_journal_file(void)<br>
{<br>
- int old = jfile.fd, err;<br>
- pthread_t thread;<br>
-<br>
-retry:<br>
- if (unlikely(!uatomic_set_true(&jfile.in_commit))) {<br>
- sd_err("journal file in committing, "<br>
- "you might need enlarge jfile size");<br>
- usleep(100000); /* Wait until committing is finished */<br>
- goto retry;<br>
+ int old = jfile.fd;<br>
+ struct work *w;<br>
+<br>
+ if (sd_mutex_trylock(&journal_commit_mutex) == EBUSY) {<br>
+ sd_err("journal file in commiting, you might need"<br>
+ " enlarge jfile size");<br>
+ sd_mutex_lock(&journal_commit_mutex);<br>
}<br>
<br>
if (old == jfile_fds[0])<br>
@@ -353,9 +354,10 @@ retry:<br>
jfile.commit_fd = old;<br>
jfile.pos = 0;<br>
<br>
- err = pthread_create(&thread, NULL, commit_data, NULL);<br>
- if (unlikely(err))<br>
- panic("%s", strerror(err));<br>
+ w = xzalloc(sizeof(*w));<br>
+ w->fn = journal_commit_data_work;<br>
+ w->done = journal_commit_data_done;<br>
+ queue_work(commit_wq, w);<br>
}<br>
<br>
static int journal_file_write(struct journal_descriptor *jd, const char *buf)<br>
@@ -368,12 +370,12 @@ static int journal_file_write(struct journal_descriptor *jd, const char *buf)<br>
off_t woff;<br>
char *wbuffer, *p;<br>
<br>
- pthread_spin_lock(&jfile_lock);<br>
+ sd_mutex_lock(&jfile_lock);<br>
if (!jfile_enough_space(wsize))<br>
switch_journal_file();<br>
woff = jfile.pos;<br>
jfile.pos += wsize;<br>
- pthread_spin_unlock(&jfile_lock);<br>
+ sd_mutex_unlock(&jfile_lock);<br>
<br>
p = wbuffer = xvalloc(wsize);<br>
memcpy(p, jd, JOURNAL_DESC_SIZE);<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.7.10.4<br>
<br>
--<br>
sheepdog mailing list<br>
<a href="mailto:sheepdog@lists.wpkg.org">sheepdog@lists.wpkg.org</a><br>
<a href="http://lists.wpkg.org/mailman/listinfo/sheepdog" target="_blank">http://lists.wpkg.org/mailman/listinfo/sheepdog</a><br>
</font></span></blockquote></div><br><br clear="all"><div><br></div>-- <br>--<br>Best Regard<br>Robin Dong
</div></div>