[sheepdog] [PATCH v1 1/2] sheep/http: store accounts and containers into hyper volume for object-storage

Liu Yuan namei.unix at gmail.com
Sat Dec 7 07:52:44 CET 2013


On Fri, Dec 06, 2013 at 05:04:18PM +0800, Robin Dong wrote:
> From: Robin Dong <sanbai at taobao.com>
> 
> Using hyper volume (size up to 16PB) to store large number of accounts
> and containers.
> 
> Signed-off-by: Robin Dong <sanbai at taobao.com>
> ---
>  sheep/http/http.c  |   2 +
>  sheep/http/http.h  |   1 +
>  sheep/http/kv.c    | 630 +++++++++++++++++++++++++++++++++++++++++++++++------
>  sheep/http/kv.h    |  20 +-
>  sheep/http/s3.c    |   4 +-
>  sheep/http/swift.c |  98 ++++++---
>  6 files changed, 654 insertions(+), 101 deletions(-)
> 
> diff --git a/sheep/http/http.c b/sheep/http/http.c
> index 04ef364..cae3f44 100644
> --- a/sheep/http/http.c
> +++ b/sheep/http/http.c
> @@ -52,6 +52,7 @@ static inline const char *strstatus(enum http_status status)
>  		[NO_CONTENT] = "204 No Content",
>  		[PARTIAL_CONTENT] = "206 Partial Content",
>  		[BAD_REQUEST] = "400 Bad Request",
> +		[UNAUTHORIZED] = "401 Unauthorized",
>  		[NOT_FOUND] = "404 Not Found",
>  		[METHOD_NOT_ALLOWED] = "405 Method Not Allowed",
>  		[CONFLICT] = "409 Conflict",
> @@ -233,6 +234,7 @@ static void http_run_request(struct work *work)
>  
>  		if (method != NULL) {
>  			method(req);
> +			sd_debug("req->status %d", req->status);
>  			if (req->status != UNKNOWN)
>  				goto out;
>  		}
> diff --git a/sheep/http/http.h b/sheep/http/http.h
> index 046d412..a8527d1 100644
> --- a/sheep/http/http.h
> +++ b/sheep/http/http.h
> @@ -32,6 +32,7 @@ enum http_status {
>  	NO_CONTENT,                     /* 204 */
>  	PARTIAL_CONTENT,                /* 206 */
>  	BAD_REQUEST,                    /* 400 */
> +	UNAUTHORIZED,			/* 401 */
>  	NOT_FOUND,                      /* 404 */
>  	METHOD_NOT_ALLOWED,             /* 405 */
>  	CONFLICT,                       /* 409 */
> diff --git a/sheep/http/kv.c b/sheep/http/kv.c
> index 8113389..55a7e24 100644
> --- a/sheep/http/kv.c
> +++ b/sheep/http/kv.c
> @@ -16,14 +16,25 @@
>  
>  #define FOR_EACH_VDI(nr, vdis) FOR_EACH_BIT(nr, vdis, SD_NR_VDIS)
>  
> -static int lookup_bucket(struct http_request *req, const char *bucket,
> -			 uint32_t *vid)
> +struct bucket_inode {
> +	char bucket_name[SD_MAX_BUCKET_NAME];
> +	uint64_t obj_count;
> +	uint64_t bytes_used;
> +	uint32_t vdi_id;		/* kv_onode stores in this vdi */

simply name it as onode_vid and no need for comment.

> +	uint32_t pad;
> +	uint64_t reserved[SD_MAX_BUCKET_NAME/sizeof(uint64_t) - 3];
> +};

Use a union like in kv_onode to expelictly set how much space it actually takes.

> +
> +#define MAX_BUCKETS (SD_MAX_VDI_SIZE / sizeof(struct bucket_inode))
> +#define BUCKETS_PER_SD_OBJ (SD_DATA_OBJ_SIZE / sizeof(struct bucket_inode))
> +
> +static int lookup_vdi(const char *name, uint32_t *vid)
>  {
>  	int ret;
>  	struct vdi_info info = {};
>  	struct vdi_iocb iocb = {
> -		.name = bucket,
> -		.data_len = strlen(bucket),
> +		.name = name,
> +		.data_len = strlen(name),
>  	};
>  
>  	ret = vdi_lookup(&iocb, &info);
> @@ -32,27 +43,23 @@ static int lookup_bucket(struct http_request *req, const char *bucket,
>  		*vid = info.vid;
>  		break;
>  	case SD_RES_NO_VDI:
> -		sd_info("no such bucket %s", bucket);
> -		http_response_header(req, NOT_FOUND);
> -		return -1;
> +		sd_info("no such vdi %s", name);
> +		break;
>  	default:
> -		sd_err("%s: bucket %s", sd_strerror(ret), bucket);
> -		http_response_header(req, INTERNAL_SERVER_ERROR);
> -		return -1;
> +		sd_err("Failed to find vdi %s %s", name, sd_strerror(ret));
>  	}
>  
> -	return 0;
> +	return ret;
>  }
>  
> -/* Bucket operations */
> -
> -int kv_create_bucket(struct http_request *req, const char *bucket)
> +static int kv_create_hyper_volume(const char *name, uint32_t *vdi_id)
>  {
>  	struct sd_req hdr;
> +	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
>  	int ret;
>  	char buf[SD_MAX_VDI_LEN] = {0};
>  
> -	pstrcpy(buf, SD_MAX_VDI_LEN, bucket);
> +	pstrcpy(buf, SD_MAX_VDI_LEN, name);
>  
>  	sd_init_req(&hdr, SD_OP_NEW_VDI);
>  	hdr.flags = SD_FLAG_CMD_WRITE;
> @@ -64,44 +71,50 @@ int kv_create_bucket(struct http_request *req, const char *bucket)
>  	hdr.vdi.store_policy = 1;
>  
>  	ret = exec_local_req(&hdr, buf);
> -	switch (ret) {
> -	case SD_RES_SUCCESS:
> -		http_response_header(req, CREATED);
> -		break;
> -	case SD_RES_VDI_EXIST:
> -		http_response_header(req, ACCEPTED);
> -		break;
> -	default:
> -		sd_err("%s: bucket %s", sd_strerror(ret), bucket);
> -		http_response_header(req, INTERNAL_SERVER_ERROR);
> -		return -1;
> -	}
> +	if (rsp->result != SD_RES_SUCCESS)
> +		sd_err("Failed to create VDI %s: %s", name,
> +		       sd_strerror(rsp->result));
>  
> -	return 0;
> -}
> +	if (vdi_id)
> +		*vdi_id = rsp->vdi.vdi_id;
>  
> -int kv_read_bucket(struct http_request *req, const char *bucket)
> -{
> -	/* TODO: read metadata of the bucket */
> -	return -1;
> +	return ret;
>  }
>  
> -int kv_update_bucket(struct http_request *req, const char *bucket)
> +static int discard_data_obj(uint64_t oid)
>  {
> -	/* TODO: update metadata of the bucket */
> -	return -1;
> +	int ret;
> +	struct sd_req hdr;
> +
> +	sd_init_req(&hdr, SD_OP_DELETE_CACHE);
> +	hdr.obj.oid = oid;
> +
> +	ret = exec_local_req(&hdr, NULL);
> +	if (ret != SD_RES_SUCCESS) {
> +		sd_err("Failed to execute request");
> +		return ret;
> +	}
> +

Since our container can be accessed by multiple clients, I think we will never
allow object cache for it, which is designed for a single client. So need to run
SD_OP_DELETE_CACHE request.

For account HEAD operation: 

yliu at ubuntu-precise:~/sheepdog$ curl -i -X HEAD http://localhost/v1/yliu
HTTP/1.1 204 No Content
Server: nginx/1.1.19
Date: Sat, 07 Dec 2013 06:41:46 GMT
Content-Type: text/plain;
Connection: keep-alive
X-Account-Container-Count: 2

Seems that we need to pass X-Account-Bytes-Used too?

For account GET operation:
yliu at ubuntu-precise:~/sheepdog$ curl  -X GET http://localhost/v1/yliu

girls
wives

Seems that we write extra blank line and lack Content-Length field?

Thanks
Yuan



More information about the sheepdog mailing list