[sheepdog] [PATCH v1 1/3] sheep/http: add APPEND operation for PUT request

Liu Yuan namei.unix at gmail.com
Fri Mar 14 07:16:54 CET 2014


On Thu, Mar 13, 2014 at 06:45:34PM +0800, Robin Dong wrote:
> From: Robin Dong <sanbai at taobao.com>
> 
> Users could add http headers 'FLAG: append' for PUT http request to upload
> file or stdout to the tail of an existed object (a object which has been
> appended is just ONODE_INIT), and after all append-operations, it cound use
> http headers 'FLAG: eof' to set the object to ONODE_COMPLETE.
> 
> Signed-off-by: Robin Dong <sanbai at taobao.com>
> ---
>  sheep/http/http.c  |   7 ++
>  sheep/http/http.h  |  10 ++-
>  sheep/http/kv.c    | 203 ++++++++++++++++++++++++++++++++++++++++-------------
>  sheep/http/s3.c    |   2 +-
>  sheep/http/swift.c |  12 +++-
>  5 files changed, 180 insertions(+), 54 deletions(-)
> 
> diff --git a/sheep/http/http.c b/sheep/http/http.c
> index 3027153..382937f 100644
> --- a/sheep/http/http.c
> +++ b/sheep/http/http.c
> @@ -202,6 +202,13 @@ static int request_init_operation(struct http_request *req)
>  		if (!strcmp("true", p))
>  			req->force = true;
>  	}
> +	p = FCGX_GetParam("HTTP_FLAG", env);
> +	if (p && p[0] != '\0') {
> +		if (!strcmp("append", p))
> +			req->append = true;
> +		else if (!strcmp("eof", p))
> +			req->eof = true;
> +	}

For "FORCE" header, we check 'FORCE' but for FLAG or check "HTTP_FLAG". I am
wondering why they are treated differently.

It seems that http proxy server can rename 'HTTP_XXX' as 'XXX' with proper
configuration.

For consistency I'd suggest check both 'HTTP_XXX' and 'XXX' internally and for
our test file, make nginx rename every 'HTTP_XXX" as 'XXX'.

>  
>  	req->status = UNKNOWN;
>  
> diff --git a/sheep/http/http.h b/sheep/http/http.h
> index 06dfd5d..149f49d 100644
> --- a/sheep/http/http.h
> +++ b/sheep/http/http.h
> @@ -50,6 +50,8 @@ struct http_request {
>  	uint64_t data_length;
>  	uint64_t offset;
>  	bool force;
> +	bool append;
> +	bool eof;
>  };
>  
>  struct http_driver {
> @@ -140,9 +142,15 @@ int kv_iterate_bucket(const char *account,
>  		      void (*cb)(const char *bucket, void *opaque),
>  		      void *opaque);
>  
> +/* creat_flags for kv_create_object */
> +#define ONODE_APPEND	1	/* append data to the tail of onode */

should be 'append data to the tail of object'

> +#define ONODE_EOF	2	/* all data has been uploaded */

should be 'finish the append operation'

>
> +
>  /* Object operations */
> +int kv_complete_object(struct http_request *req, const char *account,
> +		       const char *bucket, const char *object);
>  int kv_create_object(struct http_request *req, const char *account,
> -		     const char *bucket, const char *object);
> +		     const char *bucket, const char *object, int creat_flags);
>  int kv_read_object(struct http_request *req, const char *account,
>  		   const char *bucket, const char *object);
>  int kv_read_object_meta(struct http_request *req, const char *account,
> diff --git a/sheep/http/kv.c b/sheep/http/kv.c
> index ff3bfd8..07196ab 100644
> --- a/sheep/http/kv.c
> +++ b/sheep/http/kv.c
> @@ -54,6 +54,19 @@ struct onode_extent {
>  #define ONODE_INIT	1	/* created and allocated space, but no data */
>  #define ONODE_COMPLETE	2	/* data upload complete */
>  
> +
> +/*
> + * Add 'append' operation for creating object.
> + * Users could add http headers 'FLAG: append' to PUT file or stdout to
> + * the tail of an existed object (a object which has been appended is
> + * just ONODE_INIT), and after all append-operations, it cound use http headers
> + * 'FLAG: eof' to set the object to ONODE_COMPLETE.
> + */

Rephrase the comment as following:

We allow append write for PUT operation. When 'FLAG: append' is specified in the
http PUT request header, we append the new data at the tail of the existing object
instead of a 'delete-then-create' semantic. When we append objects, we mark them
as ONODE_INIT. When all the append operations are done, we specify 'FLAG: eof'
in the PUT request hearder to finalize the whole transaction, which mark the
objects as ONODE_COMPLETE.


>
> +static inline bool is_append(int creat_flags)
> +{
> +	return (creat_flags & ONODE_APPEND) > 0;
> +}
> +
>  #define ONODE_HDR_SIZE  BLOCK_SIZE
>  
>  struct kv_onode {
> @@ -672,7 +685,7 @@ static int onode_allocate_extents(struct kv_onode *onode,
>  {
>  	uint64_t start = 0, count;
>  	int ret;
> -	uint32_t data_vid = onode->data_vid;
> +	uint32_t data_vid = onode->data_vid, idx;
>  
>  	count = DIV_ROUND_UP(req->data_length, SD_DATA_OBJ_SIZE);
>  	sys->cdrv->lock(data_vid);
> @@ -693,9 +706,10 @@ static int onode_allocate_extents(struct kv_onode *onode,
>  		goto out;
>  	}
>  
> -	onode->o_extent[0].start = start;
> -	onode->o_extent[0].count = count;
> -	onode->nr_extent = 1;
> +	idx = onode->nr_extent;
> +	onode->o_extent[idx].start = start;
> +	onode->o_extent[idx].count = count;
> +	onode->nr_extent++;
>  out:
>  	return ret;
>  }
> @@ -704,7 +718,7 @@ static int onode_populate_extents(struct kv_onode *onode,
>  				  struct http_request *req)
>  {
>  	ssize_t size;
> -	uint64_t start = onode->o_extent[0].start;
> +	uint64_t start = onode->o_extent[onode->nr_extent - 1].start;
>  	uint64_t done = 0, total, offset;
>  	uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length);
>  	int ret = SD_RES_SUCCESS;
> @@ -745,33 +759,39 @@ static uint64_t get_seconds(void)
>  	return seconds;
>  }
>  
> -static int onode_allocate_data(struct kv_onode *onode, struct http_request *req)
> +static int onode_allocate_data(struct kv_onode *onode, struct http_request *req,
> +			       int creat_flags)
>  {
>  	int ret = SD_RES_SUCCESS;
>  
> -	if (req->data_length <= KV_ONODE_INLINE_SIZE)
> -		onode->inlined = 1;
> +	/* only 'created' object could be inlined */

This line is meaningless. Or you should add why append object can't be inlined
briefly.

let the code itself explain how and what, use comment to explain why.

> +	if (req->data_length <= KV_ONODE_INLINE_SIZE && !is_append(creat_flags))
> +			onode->inlined = 1;
>  	else {
>  		ret = onode_allocate_extents(onode, req);
>  		if (ret != SD_RES_SUCCESS)
>  			goto out;
>  	}
>  
> -	onode->ctime = get_seconds();
> -	onode->size = req->data_length;
> +	if (!is_append(creat_flags))
> +		onode->ctime = get_seconds();
> +	onode->size += req->data_length;
>  out:
>  	return ret;
>  }
>  
> -static int onode_populate_data(struct kv_onode *onode, struct http_request *req)
> +static int onode_populate_data(struct kv_onode *onode, struct http_request *req,
> +			       int creat_flags)
>  {
>  	ssize_t size;
>  	int ret = SD_RES_SUCCESS;
>  
>  	onode->mtime = get_seconds();
> -	onode->flags = ONODE_COMPLETE;
> +	if (!is_append(creat_flags))
> +		onode->flags = ONODE_COMPLETE;
>  
> -	if (req->data_length <= KV_ONODE_INLINE_SIZE) {
> +	if (req->data_length <= KV_ONODE_INLINE_SIZE &&
> +	    !is_append(creat_flags)) {
>  		size = http_request_read(req, onode->data, sizeof(onode->data));
>  		if (size < 0 || req->data_length != size) {
>  			sd_err("Failed to read from web server for %s",
> @@ -881,16 +901,22 @@ out:
>  static int onode_free_data(struct kv_onode *onode)
>  {
>  	uint32_t data_vid = onode->data_vid;
> -	int ret = SD_RES_SUCCESS;
> +	int ret = SD_RES_SUCCESS, i;
>  
>  	/* it don't need to free data for inlined onode */
>  	if (!onode->inlined) {
>  		sys->cdrv->lock(data_vid);
> -		ret = oalloc_free(data_vid, onode->o_extent[0].start,
> -				  onode->o_extent[0].count);
> +		for (i = 0; i < onode->nr_extent; i++) {
> +			ret = oalloc_free(data_vid, onode->o_extent[i].start,
> +					  onode->o_extent[i].count);
> +			if (ret != SD_RES_SUCCESS)
> +				sd_err("failed to free start: %"PRIu64
> +				       ", count: %"PRIu64", for %s",
> +				       onode->o_extent[i].start,
> +				       onode->o_extent[i].count,
> +				       onode->name);
> +		}
>  		sys->cdrv->unlock(data_vid);
> -		if (ret != SD_RES_SUCCESS)
> -			sd_err("failed to free %s", onode->name);
>  	}
>  	return ret;
>  }
> @@ -1073,64 +1099,141 @@ static int onode_delete(struct kv_onode *onode)
>  /* Create onode and allocate space for it */
>  static int onode_allocate_space(struct http_request *req, const char *account,
>  				uint32_t bucket_vid, const char *bucket,
> -				const char *name, struct kv_onode *onode)
> +				const char *name, struct kv_onode *onode,
> +				int creat_flags)
>  {
>  	char vdi_name[SD_MAX_VDI_LEN];
>  	uint32_t data_vid;
> +	uint64_t len;
>  	int ret = SD_RES_SUCCESS;
> +	bool object_exists;
>  
>  	sys->cdrv->lock(bucket_vid);
>  	ret = onode_lookup_nolock(onode, bucket_vid, name);
> -	if (ret == SD_RES_SUCCESS) {
> -		/* if the exists onode has not been uploaded complete */
> -		if (onode->flags != ONODE_COMPLETE) {
> -			ret = SD_RES_INCOMPLETE;
> -			sd_err("The exists onode %s is incomplete", name);
> -			goto out;
> -		}
> -		/* For overwrite, we delete old object and then create */
> -		ret = onode_delete(onode);
> -		if (ret != SD_RES_SUCCESS) {
> -			sd_err("Failed to delete exists object %s", name);
> +
> +	if (is_append(creat_flags)) {
> +		if (ret == SD_RES_SUCCESS && onode->flags == ONODE_COMPLETE) {

check ret in the first place would simplify the procedures.

I think onode_allocate_space is fairly bloated with create_flats, so would
better to add some helper functions to modulaize it.

Thanks
Yuan



More information about the sheepdog mailing list