[sheepdog] [PATCH v2 1/2] sheep/cluster: use pthread_cond instread of pthread_mutex to avoid panic

Robin Dong robin.k.dong at gmail.com
Fri Jan 17 14:00:36 CET 2014


2014/1/17 Liu Yuan <namei.unix at gmail.com>

> On Fri, Jan 17, 2014 at 02:34:12PM +0800, Robin Dong wrote:
> > From: Robin Dong <sanbai at taobao.com>
> >
> > If we use pthread like this:
> >
> >       pthread_mutex_t mt;
> >       pthread_mutex_init(&mt, NULL);
> >       pthread_mutex_unlock(&mt);
> >       pthread_mutex_destroy(&mt);
> >
> > The last 'pthread_mutex_destroy()' will return EBUSY because the mutex
> has
> > been 'unlocked' even no one locked it.
> > (ref: https://bugzilla.redhat.com/show_bug.cgi?id=113588)
> >
> > This will cause problem in cluster/zookeeper.c because any event from
> zk_watcher()
> > would unlocked the 'wait_wakeup' mutex. Therefore we need to use
> pthread-condition
> > to replace pthread-mutex.
> >
> > Signed-off-by: Robin Dong <sanbai at taobao.com>
> > ---
> >  include/util.h            |  9 +++++++++
> >  sheep/cluster/zookeeper.c | 18 +++++++++++-------
> >  2 files changed, 20 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/util.h b/include/util.h
> > index b7e0e8b..83f3c08 100644
> > --- a/include/util.h
> > +++ b/include/util.h
> > @@ -371,6 +371,15 @@ static inline int sd_cond_wait(struct sd_cond
> *cond, struct sd_mutex *mutex)
> >       return pthread_cond_wait(&cond->cond, &mutex->mutex);
> >  }
> >
> > +static inline int sd_cond_wait_timeout(struct sd_cond *cond,
> > +                                    struct sd_mutex *mutex, int second)
> > +{
> > +     struct timespec wait_time;
> > +     wait_time.tv_sec = second;
> > +     wait_time.tv_nsec = 0;
> > +     return pthread_cond_timedwait(&cond->cond, &mutex->mutex,
> &wait_time);
> > +}
> > +
> >  static inline int sd_cond_broadcast(struct sd_cond *cond)
> >  {
> >       return pthread_cond_broadcast(&cond->cond);
> > diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
> > index 3f6c146..4ef3a9a 100644
> > --- a/sheep/cluster/zookeeper.c
> > +++ b/sheep/cluster/zookeeper.c
> > @@ -42,7 +42,8 @@ struct cluster_lock {
> >       /* referenced by different threads in one sheepdog daemon */
> >       uint64_t ref;
> >       /* wait for the release of id by other lock owner */
> > -     struct sd_mutex wait_wakeup;
> > +     struct sd_cond wait_wakeup;
> > +     struct sd_mutex wait_mutex;
> >       /* lock for different threads of the same node on the same id */
> >       struct sd_mutex id_lock;
> >       char lock_path[MAX_NODE_STR_LEN];
> > @@ -314,7 +315,7 @@ static int lock_table_lookup_wakeup(uint64_t lock_id)
> >       sd_mutex_lock(table_locks + hval);
> >       hlist_for_each_entry(lock, iter, cluster_locks_table + hval,
> hnode) {
> >               if (lock->id == lock_id) {
> > -                     sd_mutex_unlock(&lock->wait_wakeup);
> > +                     sd_cond_broadcast(&lock->wait_wakeup);
> >                       res = 0;
> >                       break;
> >               }
> > @@ -351,7 +352,7 @@ static struct cluster_lock
> *lock_table_lookup_acquire(uint64_t lock_id)
> >               if (rc)
> >                       panic("Failed to init node %s", path);
> >
> > -             sd_init_mutex(&ret_lock->wait_wakeup);
> > +             sd_cond_init(&ret_lock->wait_wakeup);
> >               sd_init_mutex(&ret_lock->id_lock);
> >
> >               hlist_add_head(&(ret_lock->hnode), cluster_locks_table +
> hval);
> > @@ -394,7 +395,8 @@ static void lock_table_lookup_release(uint64_t
> lock_id)
> >                       hlist_del(iter);
> >                       /* free all resource used by this lock */
> >                       sd_destroy_mutex(&lock->id_lock);
> > -                     sd_destroy_mutex(&lock->wait_wakeup);
> > +                     sd_destroy_mutex(&lock->wait_mutex);
> > +                     sd_destroy_cond(&lock->wait_wakeup);
> >                       snprintf(path, MAX_NODE_STR_LEN, LOCK_ZNODE
> "/%"PRIu64,
> >                                lock->id);
> >                       /*
> > @@ -1229,7 +1231,7 @@ kick_block_event:
> >   * This operation will create a seq-ephemeral znode in lock directory
> >   * of zookeeper (use lock-id as dir name). The smallest file path in
> >   * this directory wil be the owner of the lock; the other threads will
> > - * wait on a struct sd_mutex (cluster_lock->wait_wakeup)
> > + * wait on a struct sd_cond (cluster_lock->wait_wakeup)
> >   */
> >  static void zk_lock(uint64_t lock_id)
> >  {
> > @@ -1259,7 +1261,7 @@ static void zk_lock(uint64_t lock_id)
> >       sd_debug("create path %s success", my_path);
> >
> >       /* create node ok now */
> > -     snprintf(parent, MAX_NODE_STR_LEN, LOCK_ZNODE "/%"PRIu64"",
> > +     snprintf(parent, MAX_NODE_STR_LEN, LOCK_ZNODE "/%"PRIu64,
> >                cluster_lock->id);
> >       while (true) {
> >               zk_get_least_seq(parent, lowest_seq_path, MAX_NODE_STR_LEN,
> > @@ -1275,7 +1277,9 @@ static void zk_lock(uint64_t lock_id)
> >               rc = zoo_exists(zhandle, lowest_seq_path, 1, NULL);
> >               if (rc == ZOK) {
> >                       sd_debug("call zoo_exits success %s",
> lowest_seq_path);
> > -                     sd_mutex_lock(&cluster_lock->wait_wakeup);
> > +                     sd_cond_wait_timeout(&cluster_lock->wait_wakeup,
> > +                                          &cluster_lock->wait_mutex,
> > +                                          WAIT_TIME);
>
> This looks overly complicated. So this mutex or condition is used to wakeup
> other lock contender(s) in the same node to save zookeeper traffic. But
> with
> current sd_cond, we can't save any traffic now because we timeout and retry
> zk_get_least_seq. So why not simply remove this kind of mutex completely
> like
>
> while (true) {
>         if get_lock == success
>                 return;
>         zk_wait();
> }
>
> We can't just "return" here, because only the "master" could go out of
this function zk_lock().
If we still use mutex(or other lock) here, how could us solve the "unlock
and destroy" problem above?
And, pthread_cond_wait may missing the wakeup signal if it is called before
"pthread_cond_signal", so
using pthread_cond_wait_timeout is almost the only way to be safe.
May be we could change WAIT_TIME to 2 or 3 seconds, so it can save traffic
between zookeeper and sheepdog.

Thanks
> Yuan
>



-- 
--
Best Regard
Robin Dong
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.wpkg.org/pipermail/sheepdog/attachments/20140117/6d3feb99/attachment-0004.html>


More information about the sheepdog mailing list