From: Robin Dong <sanbai at taobao.com> If we use pthread like this: pthread_mutex_t mt; pthread_mutex_init(&mt, NULL); pthread_mutex_unlock(&mt); pthread_mutex_destroy(&mt); The last 'pthread_mutex_destroy()' will return EBUSY because the mutex has been 'unlocked' even no one locked it. (ref: https://bugzilla.redhat.com/show_bug.cgi?id=113588) This will cause problem in cluster/zookeeper.c because any event from zk_watcher() would unlocked the 'wait_wakeup' mutex. Therefore we need to use pthread-condition to replace pthread-mutex. Signed-off-by: Robin Dong <sanbai at taobao.com> --- include/util.h | 9 +++++++++ sheep/cluster/zookeeper.c | 18 +++++++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/include/util.h b/include/util.h index b7e0e8b..83f3c08 100644 --- a/include/util.h +++ b/include/util.h @@ -371,6 +371,15 @@ static inline int sd_cond_wait(struct sd_cond *cond, struct sd_mutex *mutex) return pthread_cond_wait(&cond->cond, &mutex->mutex); } +static inline int sd_cond_wait_timeout(struct sd_cond *cond, + struct sd_mutex *mutex, int second) +{ + struct timespec wait_time; + wait_time.tv_sec = second; + wait_time.tv_nsec = 0; + return pthread_cond_timedwait(&cond->cond, &mutex->mutex, &wait_time); +} + static inline int sd_cond_broadcast(struct sd_cond *cond) { return pthread_cond_broadcast(&cond->cond); diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c index 3f6c146..4ef3a9a 100644 --- a/sheep/cluster/zookeeper.c +++ b/sheep/cluster/zookeeper.c @@ -42,7 +42,8 @@ struct cluster_lock { /* referenced by different threads in one sheepdog daemon */ uint64_t ref; /* wait for the release of id by other lock owner */ - struct sd_mutex wait_wakeup; + struct sd_cond wait_wakeup; + struct sd_mutex wait_mutex; /* lock for different threads of the same node on the same id */ struct sd_mutex id_lock; char lock_path[MAX_NODE_STR_LEN]; @@ -314,7 +315,7 @@ static int lock_table_lookup_wakeup(uint64_t lock_id) sd_mutex_lock(table_locks + hval); hlist_for_each_entry(lock, iter, cluster_locks_table + hval, hnode) { if (lock->id == lock_id) { - sd_mutex_unlock(&lock->wait_wakeup); + sd_cond_broadcast(&lock->wait_wakeup); res = 0; break; } @@ -351,7 +352,7 @@ static struct cluster_lock *lock_table_lookup_acquire(uint64_t lock_id) if (rc) panic("Failed to init node %s", path); - sd_init_mutex(&ret_lock->wait_wakeup); + sd_cond_init(&ret_lock->wait_wakeup); sd_init_mutex(&ret_lock->id_lock); hlist_add_head(&(ret_lock->hnode), cluster_locks_table + hval); @@ -394,7 +395,8 @@ static void lock_table_lookup_release(uint64_t lock_id) hlist_del(iter); /* free all resource used by this lock */ sd_destroy_mutex(&lock->id_lock); - sd_destroy_mutex(&lock->wait_wakeup); + sd_destroy_mutex(&lock->wait_mutex); + sd_destroy_cond(&lock->wait_wakeup); snprintf(path, MAX_NODE_STR_LEN, LOCK_ZNODE "/%"PRIu64, lock->id); /* @@ -1229,7 +1231,7 @@ kick_block_event: * This operation will create a seq-ephemeral znode in lock directory * of zookeeper (use lock-id as dir name). The smallest file path in * this directory wil be the owner of the lock; the other threads will - * wait on a struct sd_mutex (cluster_lock->wait_wakeup) + * wait on a struct sd_cond (cluster_lock->wait_wakeup) */ static void zk_lock(uint64_t lock_id) { @@ -1259,7 +1261,7 @@ static void zk_lock(uint64_t lock_id) sd_debug("create path %s success", my_path); /* create node ok now */ - snprintf(parent, MAX_NODE_STR_LEN, LOCK_ZNODE "/%"PRIu64"", + snprintf(parent, MAX_NODE_STR_LEN, LOCK_ZNODE "/%"PRIu64, cluster_lock->id); while (true) { zk_get_least_seq(parent, lowest_seq_path, MAX_NODE_STR_LEN, @@ -1275,7 +1277,9 @@ static void zk_lock(uint64_t lock_id) rc = zoo_exists(zhandle, lowest_seq_path, 1, NULL); if (rc == ZOK) { sd_debug("call zoo_exits success %s", lowest_seq_path); - sd_mutex_lock(&cluster_lock->wait_wakeup); + sd_cond_wait_timeout(&cluster_lock->wait_wakeup, + &cluster_lock->wait_mutex, + WAIT_TIME); } else { sd_debug("failed to call zoo_exists %s", zerror(rc)); if (rc != ZNONODE) -- 1.7.12.4 |