[sheepdog] [PATCH v2] cluster: change the interface of distributed lock
Liu Yuan
namei.unix at gmail.com
Mon Dec 9 07:30:38 CET 2013
On Mon, Dec 09, 2013 at 12:42:38PM +0800, Robin Dong wrote:
> From: Robin Dong <sanbai at taobao.com>
>
> Using only lock_id as parameter of lock interface and reduce function to only two:
>
> lock(uint64_t lock_id)
> unlock(uint64_t lock_id)
>
> The ->lock will create and acquire the distributed lock and ->unlock will release it.
> It is more convient for end user.
> We use existed hash table to store the
> "<lock_id, cluster_lock*>" and release all the resources of a distributed lock after
> all threads call "->unlock".
>
> Signed-off-by: Robin Dong <sanbai at taobao.com>
> ---
> include/sheep.h | 12 ----
> sheep/cluster.h | 31 +++++----
> sheep/cluster/corosync.c | 10 +--
> sheep/cluster/local.c | 10 +--
> sheep/cluster/zookeeper.c | 167 ++++++++++++++++++++++++++++------------------
> 5 files changed, 121 insertions(+), 109 deletions(-)
>
> diff --git a/include/sheep.h b/include/sheep.h
> index e5726e8..293e057 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -255,18 +255,6 @@ static inline void nodes_to_buffer(struct rb_root *nroot, void *buffer)
>
> #define MAX_NODE_STR_LEN 256
>
> -/* structure for distributed lock */
> -struct cluster_lock {
> - struct hlist_node hnode;
> - /* id is passed by users to represent a lock handle */
> - uint64_t id;
> - /* wait for the release of id by other lock owner */
> - pthread_mutex_t wait_release;
> - /* lock for different threads of the same node on the same id */
> - pthread_mutex_t id_lock;
> - char lock_path[MAX_NODE_STR_LEN];
> -};
> -
> static inline const char *node_to_str(const struct sd_node *id)
> {
> static __thread char str[MAX_NODE_STR_LEN];
> diff --git a/sheep/cluster.h b/sheep/cluster.h
> index 08df91c..0693633 100644
> --- a/sheep/cluster.h
> +++ b/sheep/cluster.h
> @@ -109,33 +109,32 @@ struct cluster_driver {
> int (*unblock)(void *msg, size_t msg_len);
>
> /*
> - * Init a distributed mutually exclusive lock to avoid race condition
> - * when the whole sheepdog cluster process one exclusive resource.
> + * Acquire the distributed lock.
> *
> - * This function use 'lock_id' as the id of this distributed lock.
> - * A thread can create many locks in one sheep daemon.
> + * Create a distributed mutually exclusive lock to avoid race condition
> + * and try to acquire the lock.
> *
> - * Returns SD_RES_XXX
> - */
> - int (*init_lock)(struct cluster_lock *lock, uint64_t lock_id);
> -
> - /*
> - * Acquire the distributed lock.
> + * This function use 'lock_id' as the id of this distributed lock.
> + * A thread can acquire many locks with different lock_id in one
> + * sheep daemon.
> *
> - * The cluster_lock referenced by 'lock' shall be locked by calling
> - * cluster->lock(). If the cluster_lock is already locked, the calling
> - * thread shall block until the cluster_lock becomes available.
> + * The cluster lock referenced by 'lock' shall be locked by calling
> + * cluster->lock(). If the cluster lock is already locked, the calling
> + * thread shall block until the cluster lock becomes available.
> */
> - void (*lock)(struct cluster_lock *lock);
> + void (*lock)(uint64_t lock_id);
>
> /*
> * Release the distributed lock.
> *
> - * If the owner of the cluster_lock release it (or the owner is
> + * If the owner of the cluster lock release it (or the owner is
> * killed by accident), zookeeper will trigger zk_watch() which will
> * wake up all waiting threads to compete new owner of the lock
> + *
> + * After all thread unlock, all the resource of this distributed lock
> + * will be released.
> */
> - void (*unlock)(struct cluster_lock *lock);
> + void (*unlock)(uint64_t lock_id);
I think there are cases that lock/unlock fails, so it is better to return 'int'.
>
> /*
> * Update the specific node in the driver's private copy of nodes
> diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c
> index 19fc73c..6974dd9 100644
> --- a/sheep/cluster/corosync.c
> +++ b/sheep/cluster/corosync.c
> @@ -774,16 +774,11 @@ again:
> return 0;
> }
>
> -static int corosync_init_lock(struct cluster_lock *cluster_lock, uint64_t id)
> -{
> - return -1;
> -}
> -
> -static void corosync_lock(struct cluster_lock *cluster_lock)
> +static void corosync_lock(uint64_t lock_id)
> {
> }
>
> -static void corosync_unlock(struct cluster_lock *cluster_lock)
> +static void corosync_unlock(uint64_t lock_id)
> {
> }
>
> @@ -807,7 +802,6 @@ static struct cluster_driver cdrv_corosync = {
> .notify = corosync_notify,
> .block = corosync_block,
> .unblock = corosync_unblock,
> - .init_lock = corosync_init_lock,
> .lock = corosync_lock,
> .unlock = corosync_unlock,
> .update_node = corosync_update_node,
> diff --git a/sheep/cluster/local.c b/sheep/cluster/local.c
> index 4c4d83b..6d0af68 100644
> --- a/sheep/cluster/local.c
> +++ b/sheep/cluster/local.c
> @@ -547,16 +547,11 @@ static int local_init(const char *option)
> return 0;
> }
>
> -static int local_init_lock(struct cluster_lock *cluster_lock, uint64_t id)
> +static void local_lock(uint64_t lock_id)
> {
> - return -1;
> }
>
> -static void local_lock(struct cluster_lock *cluster_lock)
> -{
> -}
> -
> -static void local_unlock(struct cluster_lock *cluster_lock)
> +static void local_unlock(uint64_t lock_id)
> {
> }
>
> @@ -579,7 +574,6 @@ static struct cluster_driver cdrv_local = {
> .notify = local_notify,
> .block = local_block,
> .unblock = local_unblock,
> - .init_lock = local_init_lock,
> .lock = local_lock,
> .unlock = local_unlock,
> .update_node = local_update_node,
> diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
> index 9e4ffa5..3af20fd 100644
> --- a/sheep/cluster/zookeeper.c
> +++ b/sheep/cluster/zookeeper.c
> @@ -32,6 +32,23 @@
> #define MASTER_ZNONE BASE_ZNODE "/master"
> #define LOCK_ZNODE BASE_ZNODE "/lock"
>
> +static inline ZOOAPI int zk_init_node(const char *path);
> +static inline ZOOAPI int zk_delete_node(const char *path, int version);
> +
> +/* structure for distributed lock */
> +struct cluster_lock {
> + struct hlist_node hnode;
> + /* id is passed by users to represent a lock handle */
> + uint64_t id;
> + /* referenced by different threads in one sheepdog daemon */
> + uint64_t ref;
> + /* wait for the release of id by other lock owner */
> + pthread_mutex_t wait_wakeup;
> + /* lock for different threads of the same node on the same id */
> + pthread_mutex_t id_lock;
> + char lock_path[MAX_NODE_STR_LEN];
> +};
> +
> #define WAIT_TIME 1 /* second */
>
> #define HASH_BUCKET_NR 1021
> @@ -43,49 +60,109 @@ static pthread_mutex_t table_locks[HASH_BUCKET_NR];
> * cluster_lock->id_lock so we don't need to add lock here
> */
>
> -static void lock_table_del(uint64_t lock_id)
> +static int lock_table_lookup_wakeup(uint64_t lock_id)
> {
> uint64_t hval = sd_hash_64(lock_id) % HASH_BUCKET_NR;
> + int res = -1;
> struct hlist_node *iter;
> struct cluster_lock *lock;
>
> pthread_mutex_lock(table_locks + hval);
> hlist_for_each_entry(lock, iter, cluster_locks_table + hval, hnode) {
> if (lock->id == lock_id) {
> - hlist_del(iter);
> + pthread_mutex_unlock(&lock->wait_wakeup);
> + res = 0;
> break;
> }
> }
> pthread_mutex_unlock(table_locks + hval);
> + return res;
> }
>
> -static void lock_table_add(uint64_t lock_id,
> - struct cluster_lock *cluster_lock)
> +static struct cluster_lock *lock_table_lookup_acquire(uint64_t lock_id)
> {
> uint64_t hval = sd_hash_64(lock_id) % HASH_BUCKET_NR;
> + int rc;
> + struct hlist_node *iter;
> + struct cluster_lock *lock, *ret_lock = NULL;
> + char path[MAX_NODE_STR_LEN];
>
> pthread_mutex_lock(table_locks + hval);
> - hlist_add_head(&(cluster_lock->hnode), cluster_locks_table + hval);
> + hlist_for_each_entry(lock, iter, cluster_locks_table + hval, hnode) {
> + if (lock->id == lock_id) {
> + ret_lock = lock;
> + ret_lock->ref++;
> + break;
> + }
> + }
> +
> + if (!ret_lock) {
> + /* create lock and add it to hash table */
> + ret_lock = xzalloc(sizeof(*ret_lock));
> + ret_lock->id = lock_id;
> + ret_lock->ref = 1;
> + snprintf(path, MAX_NODE_STR_LEN, LOCK_ZNODE "/%lu",
> + ret_lock->id);
> + rc = zk_init_node(path);
> + if (rc)
> + panic("Failed to init node %s", path);
> +
> + rc = pthread_mutex_init(&ret_lock->wait_wakeup, NULL);
> + if (rc)
> + panic("failed to init cluster_lock->wait_wakeup");
> +
> + rc = pthread_mutex_init(&ret_lock->id_lock, NULL);
> + if (rc)
> + panic("failed to init cluster_lock->id_lock");
> +
> + hlist_add_head(&(ret_lock->hnode), cluster_locks_table + hval);
> + }
> pthread_mutex_unlock(table_locks + hval);
> +
> + /*
> + * if many threads use locks with same id, we should use
> + * ->id_lock to avoid the only zookeeper handler to
> + * create many seq-ephemeral files.
> + */
> + pthread_mutex_lock(&ret_lock->id_lock);
> + return ret_lock;
> }
>
> -static int lock_table_lookup_release(uint64_t lock_id)
> +static void lock_table_lookup_release(uint64_t lock_id)
> {
> uint64_t hval = sd_hash_64(lock_id) % HASH_BUCKET_NR;
> - int res = -1;
> + int rc;
> struct hlist_node *iter;
> struct cluster_lock *lock;
> + char path[MAX_NODE_STR_LEN];
>
> pthread_mutex_lock(table_locks + hval);
> hlist_for_each_entry(lock, iter, cluster_locks_table + hval, hnode) {
> - if (lock->id == lock_id) {
> - pthread_mutex_unlock(&lock->wait_release);
> - res = 0;
> - break;
> + if (lock->id != lock_id)
> + continue;
> + rc = zk_delete_node(lock->lock_path, -1);
> + if (rc != ZOK)
> + sd_err("Failed to delete path: %s %s", lock->lock_path,
> + zerror(rc));
I think we need to return instead of continue when error happens.
Thanks
Yuan
More information about the sheepdog
mailing list