<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">2014/1/17 Liu Yuan <span dir="ltr"><<a href="mailto:namei.unix@gmail.com" target="_blank">namei.unix@gmail.com</a>></span><br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<div class="HOEnZb"><div class="h5">On Fri, Jan 17, 2014 at 02:34:12PM +0800, Robin Dong wrote:<br>
> From: Robin Dong <<a href="mailto:sanbai@taobao.com">sanbai@taobao.com</a>><br>
><br>
> If we use pthread like this:<br>
><br>
>       pthread_mutex_t mt;<br>
>       pthread_mutex_init(&mt, NULL);<br>
>       pthread_mutex_unlock(&mt);<br>
>       pthread_mutex_destroy(&mt);<br>
><br>
> The last 'pthread_mutex_destroy()' will return EBUSY because the mutex has<br>
> been 'unlocked' even no one locked it.<br>
> (ref: <a href="https://bugzilla.redhat.com/show_bug.cgi?id=113588" target="_blank">https://bugzilla.redhat.com/show_bug.cgi?id=113588</a>)<br>
><br>
> This will cause problem in cluster/zookeeper.c because any event from zk_watcher()<br>
> would unlocked the 'wait_wakeup' mutex. Therefore we need to use pthread-condition<br>
> to replace pthread-mutex.<br>
><br>
> Signed-off-by: Robin Dong <<a href="mailto:sanbai@taobao.com">sanbai@taobao.com</a>><br>
> ---<br>
>  include/util.h            |  9 +++++++++<br>
>  sheep/cluster/zookeeper.c | 18 +++++++++++-------<br>
>  2 files changed, 20 insertions(+), 7 deletions(-)<br>
><br>
> diff --git a/include/util.h b/include/util.h<br>
> index b7e0e8b..83f3c08 100644<br>
> --- a/include/util.h<br>
> +++ b/include/util.h<br>
> @@ -371,6 +371,15 @@ static inline int sd_cond_wait(struct sd_cond *cond, struct sd_mutex *mutex)<br>
>       return pthread_cond_wait(&cond->cond, &mutex->mutex);<br>
>  }<br>
><br>
> +static inline int sd_cond_wait_timeout(struct sd_cond *cond,<br>
> +                                    struct sd_mutex *mutex, int second)<br>
> +{<br>
> +     struct timespec wait_time;<br>
> +     wait_time.tv_sec = second;<br>
> +     wait_time.tv_nsec = 0;<br>
> +     return pthread_cond_timedwait(&cond->cond, &mutex->mutex, &wait_time);<br>
> +}<br>
> +<br>
>  static inline int sd_cond_broadcast(struct sd_cond *cond)<br>
>  {<br>
>       return pthread_cond_broadcast(&cond->cond);<br>
> diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c<br>
> index 3f6c146..4ef3a9a 100644<br>
> --- a/sheep/cluster/zookeeper.c<br>
> +++ b/sheep/cluster/zookeeper.c<br>
> @@ -42,7 +42,8 @@ struct cluster_lock {<br>
>       /* referenced by different threads in one sheepdog daemon */<br>
>       uint64_t ref;<br>
>       /* wait for the release of id by other lock owner */<br>
> -     struct sd_mutex wait_wakeup;<br>
> +     struct sd_cond wait_wakeup;<br>
> +     struct sd_mutex wait_mutex;<br>
>       /* lock for different threads of the same node on the same id */<br>
>       struct sd_mutex id_lock;<br>
>       char lock_path[MAX_NODE_STR_LEN];<br>
> @@ -314,7 +315,7 @@ static int lock_table_lookup_wakeup(uint64_t lock_id)<br>
>       sd_mutex_lock(table_locks + hval);<br>
>       hlist_for_each_entry(lock, iter, cluster_locks_table + hval, hnode) {<br>
>               if (lock->id == lock_id) {<br>
> -                     sd_mutex_unlock(&lock->wait_wakeup);<br>
> +                     sd_cond_broadcast(&lock->wait_wakeup);<br>
>                       res = 0;<br>
>                       break;<br>
>               }<br>
> @@ -351,7 +352,7 @@ static struct cluster_lock *lock_table_lookup_acquire(uint64_t lock_id)<br>
>               if (rc)<br>
>                       panic("Failed to init node %s", path);<br>
><br>
> -             sd_init_mutex(&ret_lock->wait_wakeup);<br>
> +             sd_cond_init(&ret_lock->wait_wakeup);<br>
>               sd_init_mutex(&ret_lock->id_lock);<br>
><br>
>               hlist_add_head(&(ret_lock->hnode), cluster_locks_table + hval);<br>
> @@ -394,7 +395,8 @@ static void lock_table_lookup_release(uint64_t lock_id)<br>
>                       hlist_del(iter);<br>
>                       /* free all resource used by this lock */<br>
>                       sd_destroy_mutex(&lock->id_lock);<br>
> -                     sd_destroy_mutex(&lock->wait_wakeup);<br>
> +                     sd_destroy_mutex(&lock->wait_mutex);<br>
> +                     sd_destroy_cond(&lock->wait_wakeup);<br>
>                       snprintf(path, MAX_NODE_STR_LEN, LOCK_ZNODE "/%"PRIu64,<br>
>                                lock->id);<br>
>                       /*<br>
> @@ -1229,7 +1231,7 @@ kick_block_event:<br>
>   * This operation will create a seq-ephemeral znode in lock directory<br>
>   * of zookeeper (use lock-id as dir name). The smallest file path in<br>
>   * this directory wil be the owner of the lock; the other threads will<br>
> - * wait on a struct sd_mutex (cluster_lock->wait_wakeup)<br>
> + * wait on a struct sd_cond (cluster_lock->wait_wakeup)<br>
>   */<br>
>  static void zk_lock(uint64_t lock_id)<br>
>  {<br>
> @@ -1259,7 +1261,7 @@ static void zk_lock(uint64_t lock_id)<br>
>       sd_debug("create path %s success", my_path);<br>
><br>
>       /* create node ok now */<br>
> -     snprintf(parent, MAX_NODE_STR_LEN, LOCK_ZNODE "/%"PRIu64"",<br>
> +     snprintf(parent, MAX_NODE_STR_LEN, LOCK_ZNODE "/%"PRIu64,<br>
>                cluster_lock->id);<br>
>       while (true) {<br>
>               zk_get_least_seq(parent, lowest_seq_path, MAX_NODE_STR_LEN,<br>
> @@ -1275,7 +1277,9 @@ static void zk_lock(uint64_t lock_id)<br>
>               rc = zoo_exists(zhandle, lowest_seq_path, 1, NULL);<br>
>               if (rc == ZOK) {<br>
>                       sd_debug("call zoo_exits success %s", lowest_seq_path);<br>
> -                     sd_mutex_lock(&cluster_lock->wait_wakeup);<br>
> +                     sd_cond_wait_timeout(&cluster_lock->wait_wakeup,<br>
> +                                          &cluster_lock->wait_mutex,<br>
> +                                          WAIT_TIME);<br>
<br>
</div></div>This looks overly complicated. So this mutex or condition is used to wakeup<br>
other lock contender(s) in the same node to save zookeeper traffic. But with<br>
current sd_cond, we can't save any traffic now because we timeout and retry<br>
zk_get_least_seq. So why not simply remove this kind of mutex completely like<br>
<br>
while (true) {<br>
        if get_lock == success<br>
                return;<br>
        zk_wait();<br>
}<br>
<br></blockquote><div>We can't just "return" here, because only the "master" could go out of this function zk_lock().</div><div>If we still use mutex(or other lock) here, how could us solve the "unlock and destroy" problem above?</div>
<div>And, pthread_cond_wait may missing the wakeup signal if it is called before "pthread_cond_signal", so</div><div>using pthread_cond_wait_timeout is almost the only way to be safe. </div><div>May be we could change WAIT_TIME to 2 or 3 seconds, so it can save traffic between zookeeper and sheepdog.</div>
<div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Thanks<br>
<span class="HOEnZb"><font color="#888888">Yuan<br>
</font></span></blockquote></div><br><br clear="all"><div><br></div>-- <br>--<br>Best Regard<br>Robin Dong
</div></div>