[sheepdog] [PATCH v3 3/3] sheep/http: add lock to protect container and object

Liu Yuan namei.unix at gmail.com
Thu Dec 12 09:33:08 CET 2013


On Wed, Dec 11, 2013 at 06:14:43PM +0800, Robin Dong wrote:
> From: Robin Dong <sanbai at taobao.com>
> 
> Add locks to protect containers and objects when users create/delete
> containers or create/delete objects in the same time.
> 
> Signed-off-by: Robin Dong <sanbai at taobao.com>
> ---
>  sheep/http/kv.c    | 252 ++++++++++++++++++++++++++++++++---------------------
>  sheep/http/swift.c |  10 ++-
>  2 files changed, 162 insertions(+), 100 deletions(-)
> 
> diff --git a/sheep/http/kv.c b/sheep/http/kv.c
> index fb66dfa..4d690cb 100644
> --- a/sheep/http/kv.c
> +++ b/sheep/http/kv.c
> @@ -218,22 +218,22 @@ static int kv_get_account(const char *account, uint32_t *nr_buckets)
>  
>  	ret = lookup_vdi(account, &account_vid);
>  	if (ret != SD_RES_SUCCESS)
> -		return ret;
> +		goto out;
>  
>  	/* read account vdi out */
>  	oid = vid_to_vdi_oid(account_vid);
>  	ret = sd_read_object(oid, (char *)&inode, sizeof(struct sd_inode), 0);
>  	if (ret != SD_RES_SUCCESS) {
>  		sd_err("Failed to read inode header %lx", oid);
> -		return ret;
> +		goto out;
>  	}
>  
>  	struct list_buckets_arg arg = {NULL, NULL, NULL, 0};
>  	traverse_btree(sheep_bnode_reader, &inode, list_buckets_cb, &arg);
>  	if (nr_buckets)
>  		*nr_buckets = arg.bucket_counter;
> -
> -	return SD_RES_SUCCESS;
> +out:
> +	return ret;
>  }
>  
>  int kv_read_account(const char *account, uint32_t *nr_buckets)
> @@ -528,21 +528,22 @@ out:
>  }
>  
>  static int kv_get_bucket(struct sd_inode *account_inode, const char *account,
> -			 const char *bucket)
> +			 const char *bucket, uint32_t *account_vid)
>  {
>  	char vdi_name[SD_MAX_VDI_LEN];
>  	uint64_t oid;
> -	uint32_t account_vid, bucket_vid;
> +	uint32_t bucket_vid;
>  	int ret;
>  
> -	ret = lookup_vdi(account, &account_vid);
> +	ret = lookup_vdi(account, account_vid);
>  	if (ret != SD_RES_SUCCESS) {
>  		sd_err("Failed to find account %s", account);
>  		return -1;
>  	}
>  
> +	sys->cdrv->lock(*account_vid);
>  	/* read account vdi out */
> -	oid = vid_to_vdi_oid(account_vid);
> +	oid = vid_to_vdi_oid(*account_vid);
>  	ret = sd_read_object(oid, (char *)account_inode,
>  			  sizeof(struct sd_inode), 0);
>  	if (ret != SD_RES_SUCCESS)
> @@ -559,15 +560,19 @@ int kv_create_bucket(const char *account, const char *bucket)
>  {
>  	struct sd_inode inode;
>  	uint64_t hval, i;
> +	uint32_t account_vid;
>  	int ret;
>  
> -	ret = kv_get_bucket(&inode, account, bucket);
> +	ret = kv_get_bucket(&inode, account, bucket, &account_vid);
>  	/* if bucket is exists, return SD_RES_VDI_EXIST */
>  	if (!ret) {
>  		sd_err("bucket %s is exists.", bucket);
> -		return SD_RES_VDI_EXIST;
> -	} else if (ret < 0)
> -		return -1;
> +		ret = SD_RES_VDI_EXIST;
> +		goto out;
> +	} else if (ret < 0) {
> +		ret = -1;
> +		goto out;
> +	}
>  
>  	sd_debug("read account inode success");
>  
> @@ -581,7 +586,7 @@ int kv_create_bucket(const char *account, const char *bucket)
>  			continue;
>  		} else if (ret < 0) {
>  			sd_err("Failed to add bucket");
> -			return ret;
> +			goto out;
>  		}
>  		/* add bucket success */
>  		sd_debug("add bucket success");
> @@ -590,9 +595,12 @@ int kv_create_bucket(const char *account, const char *bucket)
>  
>  	if (i >= MAX_BUCKETS) {
>  		sd_err("Containers in vdi %s is full!", account);
> -		return -1;
> +		ret = -1;
> +		goto out;
>  	}
> -	return 0;
> +out:
> +	sys->cdrv->unlock(account_vid);
> +	return ret;
>  }
>  
>  int kv_read_bucket(const char *account, const char *bucket)
> @@ -612,12 +620,13 @@ int kv_delete_bucket(const char *account, const char *bucket)
>  {
>  	struct sd_inode inode;
>  	uint64_t hval, i;
> +	uint32_t account_vid;
>  	int ret;
>  
> -	ret = kv_get_bucket(&inode, account, bucket);
> +	ret = kv_get_bucket(&inode, account, bucket, &account_vid);
>  	if (ret) {
>  		sd_err("Failed to get bucket");
> -		return ret;
> +		goto out;
>  	}
>  
>  	hval = sd_hash(bucket, strlen(bucket));
> @@ -629,7 +638,7 @@ int kv_delete_bucket(const char *account, const char *bucket)
>  			continue;
>  		} else if (ret < 0) {
>  			sd_err("Failed to delete bucket %d", ret);
> -			return ret;
> +			goto out;
>  		}
>  		/* delete bucket success */
>  		sd_debug("delete bucket success");
> @@ -638,9 +647,12 @@ int kv_delete_bucket(const char *account, const char *bucket)
>  
>  	if (i >= MAX_BUCKETS) {
>  		sd_err("Can't find bucket %s", bucket);
> -		return SD_RES_NO_VDI;
> +		ret = SD_RES_NO_VDI;
> +		goto out;
>  	}
> -	return SD_RES_SUCCESS;
> +out:
> +	sys->cdrv->unlock(account_vid);
> +	return ret;
>  }
>  
>  int kv_list_buckets(struct http_request *req, const char *account,
> @@ -659,17 +671,20 @@ int kv_list_buckets(struct http_request *req, const char *account,
>  
>  	/* read account vdi out */
>  	oid = vid_to_vdi_oid(account_vid);
> +	sys->cdrv->lock(account_vid);
>  	ret = sd_read_object(oid, (char *)&account_inode,
>  			  sizeof(struct sd_inode), 0);
>  	if (ret != SD_RES_SUCCESS) {
>  		sd_err("Failed to read account inode header %lx", oid);
> -		return ret;
> +		goto out;
>  	}
>  
>  	struct list_buckets_arg arg = {req, opaque, cb, 0};
>  	traverse_btree(sheep_bnode_reader, &account_inode,
>  		       list_buckets_cb, &arg);
> -	return SD_RES_SUCCESS;
> +out:
> +	sys->cdrv->unlock(account_vid);
> +	return ret;
>  }
>  
>  /*
> @@ -867,13 +882,72 @@ out:
>  	return ret;
>  }
>  
> +static int kv_create_extent_onode(struct http_request *req, uint32_t data_vid,
> +				  struct kv_onode *onode, ssize_t *total_size)
> +{
> +	ssize_t size;
> +	uint64_t start = 0, count, limit, block;
> +	int ret;
> +	char *data_buf = NULL;
> +
> +	count = (req->data_length + SD_DATA_OBJ_SIZE + 1) / SD_DATA_OBJ_SIZE;
> +	sys->cdrv->lock(data_vid);
> +	ret = oalloc_new_prepare(data_vid, &start, count);
> +	sys->cdrv->unlock(data_vid);
> +	if (ret != SD_RES_SUCCESS) {
> +		sd_err("Failed to prepare allocation of %lu bytes!",
> +		       req->data_length);
> +		ret = -1;
> +		goto out;
> +	}
> +
> +	/* receive and write data at first, then write onode */
> +	data_buf = xmalloc(SD_DATA_OBJ_SIZE);
> +
> +	sd_debug("start: %lu, count: %lu", start, count);
> +	for (block = start, limit = start + count; block < limit; block++) {
> +		sd_debug("block: %lu, limit: %lu", block, limit);
> +		size = http_request_read(req, data_buf, SD_DATA_OBJ_SIZE);
> +		*total_size += size;
> +		ret = sd_write_object(vid_to_data_oid(data_vid, block),
> +				      data_buf, size, 0, true);
> +		if (ret != SD_RES_SUCCESS) {
> +			sd_err("Failed to write data object for %" PRIx32" %s",
> +			       data_vid, sd_strerror(ret));
> +			goto out;
> +		}
> +		if (size < SD_DATA_OBJ_SIZE)
> +			break;
> +	}
> +
> +	sd_debug("DATA_LENGTH: %lu, total size: %lu, last blocks: %lu",
> +		 req->data_length, *total_size, start);
> +
> +	sd_debug("finish start: %lu, count: %lu", start, count);
> +	sys->cdrv->lock(data_vid);
> +	ret = oalloc_new_finish(data_vid, start, count);
> +	sys->cdrv->unlock(data_vid);
> +	if (ret != SD_RES_SUCCESS) {
> +		sd_err("Failed to finish allocation of %lu bytes!",
> +		       req->data_length);
> +		ret = -1;
> +		goto out;
> +	}
> +
> +	onode->o_extent[0].start = start;
> +	onode->o_extent[0].count = count;
> +	onode->hdr.nr_extent = 1;
> +out:
> +	return ret;
> +}
> +
>  int kv_create_object(struct http_request *req, const char *account,
>  		     const char *bucket, const char *name)
>  {
>  	struct kv_onode *onode;
>  	ssize_t size, total_size = 0;
>  	int ret;
> -	uint64_t hval, start = 0, count, block, limit;
> +	uint64_t hval;
>  	uint32_t vid, data_vid;
>  	struct timeval tv;
>  	char vdi_name[SD_MAX_VDI_LEN];
> @@ -904,54 +978,10 @@ int kv_create_object(struct http_request *req, const char *account,
>  		}
>  		total_size = size;
>  	} else {
> -		sd_debug("data_length: %lu, %lu", req->data_length,
> -			 SD_DATA_OBJ_SIZE);
> -		count = (req->data_length + SD_DATA_OBJ_SIZE + 1) /
> -			SD_DATA_OBJ_SIZE;
> -		ret = oalloc_new_prepare(data_vid, &start, count);
> -		if (ret != SD_RES_SUCCESS) {
> -			sd_err("Failed to prepare allocation of %lu bytes!",
> -			       req->data_length);
> -			ret = -1;
> -			goto out;
> -		}
> -
> -		/* receive and write data at first, then write onode */
> -		data_buf = xmalloc(SD_DATA_OBJ_SIZE);
> -
> -		sd_debug("start: %lu, count: %lu", start, count);
> -		for (block = start, limit = start + count;
> -		     block < limit; block++) {
> -			sd_debug("block: %lu, limit: %lu", block, limit);
> -			size = http_request_read(req, data_buf,
> -						 SD_DATA_OBJ_SIZE);
> -			total_size += size;
> -			ret = sd_write_object(vid_to_data_oid(data_vid, block),
> -					      data_buf, size, 0, true);
> -			if (ret != SD_RES_SUCCESS) {
> -				sd_err("Failed to write data object for %"
> -				       PRIx32" %s", data_vid, sd_strerror(ret));
> -				goto out;
> -			}
> -			if (size < SD_DATA_OBJ_SIZE)
> -				break;
> -		}
> -
> -		sd_debug("DATA_LENGTH: %lu, total size: %lu, last blocks: %lu",
> -			 req->data_length, total_size, start);
> -
> -		sd_debug("finish start: %lu, count: %lu", start, count);
> -		ret = oalloc_new_finish(data_vid, start, count);
> -		if (ret != SD_RES_SUCCESS) {
> -			sd_err("Failed to finish allocation of %lu bytes!",
> -			       req->data_length);
> -			ret = -1;
> +		/* for extented onode */
> +		ret = kv_create_extent_onode(req, data_vid, onode, &total_size);
> +		if (ret != SD_RES_SUCCESS)
>  			goto out;
> -		}
> -
> -		onode->o_extent[0].start = start;
> -		onode->o_extent[0].count = count;
> -		onode->hdr.nr_extent = 1;
>  	}
>  
>  	/* after write data, we write onode now */
> @@ -963,6 +993,8 @@ int kv_create_object(struct http_request *req, const char *account,
>  	onode->hdr.size = total_size;
>  	onode->hdr.data_vid = data_vid;
>  
> +	/* lock vid which stores onode */
> +	sys->cdrv->lock(vid);
>  	hval = sd_hash(name, strlen(name));
>  	for (int i = 0; i < MAX_DATA_OBJS; i++) {
>  		uint32_t idx = (hval + i) % MAX_DATA_OBJS;
> @@ -982,6 +1014,7 @@ int kv_create_object(struct http_request *req, const char *account,
>  	/* no free space to create a object */
>  	http_response_header(req, SERVICE_UNAVAILABLE);
>  out:
> +	sys->cdrv->unlock(vid);
>  	free(onode);
>  	free(data_buf);

it seems that data_buf is not used after this refactor.

Thanks
Yuan



More information about the sheepdog mailing list