[sheepdog] [PATCH v2 1/3] sheep/http: add APPEND operation for PUT request
Robin Dong
robin.k.dong at gmail.com
Wed Mar 19 05:44:23 CET 2014
From: Robin Dong <sanbai at taobao.com>
We allow append write for PUT operation. When 'FLAG: append' is specified in the
http PUT request header, we append the new data at the tail of the existing object
instead of a 'delete-then-create' semantic. When we append objects, we mark them
as ONODE_INIT. When all the append operations are done, we specify 'FLAG: eof'
in the PUT request hearder to finalize the whole transaction, which mark the
objects as ONODE_COMPLETE.
Signed-off-by: Robin Dong <sanbai at taobao.com>
---
v1-->v2:
1. change comment for code
2. add functions instead of add 'if condition'
3. use extra-space in previous o_extent when appending
4. add "aligned block" data for test-case
sheep/http/http.c | 7 ++
sheep/http/http.h | 6 ++
sheep/http/kv.c | 226 +++++++++++++++++++++++++++++++++++++++++++++++++----
sheep/http/swift.c | 8 +-
4 files changed, 229 insertions(+), 18 deletions(-)
diff --git a/sheep/http/http.c b/sheep/http/http.c
index 3027153..382937f 100644
--- a/sheep/http/http.c
+++ b/sheep/http/http.c
@@ -202,6 +202,13 @@ static int request_init_operation(struct http_request *req)
if (!strcmp("true", p))
req->force = true;
}
+ p = FCGX_GetParam("HTTP_FLAG", env);
+ if (p && p[0] != '\0') {
+ if (!strcmp("append", p))
+ req->append = true;
+ else if (!strcmp("eof", p))
+ req->eof = true;
+ }
req->status = UNKNOWN;
diff --git a/sheep/http/http.h b/sheep/http/http.h
index 06dfd5d..a900402 100644
--- a/sheep/http/http.h
+++ b/sheep/http/http.h
@@ -50,6 +50,8 @@ struct http_request {
uint64_t data_length;
uint64_t offset;
bool force;
+ bool append;
+ bool eof;
};
struct http_driver {
@@ -143,6 +145,10 @@ int kv_iterate_bucket(const char *account,
/* Object operations */
int kv_create_object(struct http_request *req, const char *account,
const char *bucket, const char *object);
+int kv_append_object(struct http_request *req, const char *account,
+ const char *bucket, const char *object);
+int kv_complete_object(struct http_request *req, const char *account,
+ const char *bucket, const char *object);
int kv_read_object(struct http_request *req, const char *account,
const char *bucket, const char *object);
int kv_read_object_meta(struct http_request *req, const char *account,
diff --git a/sheep/http/kv.c b/sheep/http/kv.c
index ff3bfd8..9a653b4 100644
--- a/sheep/http/kv.c
+++ b/sheep/http/kv.c
@@ -672,7 +672,7 @@ static int onode_allocate_extents(struct kv_onode *onode,
{
uint64_t start = 0, count;
int ret;
- uint32_t data_vid = onode->data_vid;
+ uint32_t data_vid = onode->data_vid, idx;
count = DIV_ROUND_UP(req->data_length, SD_DATA_OBJ_SIZE);
sys->cdrv->lock(data_vid);
@@ -693,9 +693,10 @@ static int onode_allocate_extents(struct kv_onode *onode,
goto out;
}
- onode->o_extent[0].start = start;
- onode->o_extent[0].count = count;
- onode->nr_extent = 1;
+ idx = onode->nr_extent;
+ onode->o_extent[idx].start = start;
+ onode->o_extent[idx].count = count;
+ onode->nr_extent++;
out:
return ret;
}
@@ -704,7 +705,7 @@ static int onode_populate_extents(struct kv_onode *onode,
struct http_request *req)
{
ssize_t size;
- uint64_t start = onode->o_extent[0].start;
+ uint64_t start = onode->o_extent[onode->nr_extent - 1].start;
uint64_t done = 0, total, offset;
uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length);
int ret = SD_RES_SUCCESS;
@@ -758,7 +759,20 @@ static int onode_allocate_data(struct kv_onode *onode, struct http_request *req)
}
onode->ctime = get_seconds();
- onode->size = req->data_length;
+ onode->size += req->data_length;
+out:
+ return ret;
+}
+
+static int onode_append_data(struct kv_onode *onode, struct http_request *req)
+{
+ int ret = SD_RES_SUCCESS;
+
+ ret = onode_allocate_extents(onode, req);
+ if (ret != SD_RES_SUCCESS)
+ goto out;
+
+ onode->size += req->data_length;
out:
return ret;
}
@@ -800,6 +814,29 @@ out:
return ret;
}
+static int onode_populate_append_data(struct kv_onode *onode,
+ struct http_request *req)
+{
+ uint64_t len;
+ int ret = SD_RES_SUCCESS;
+
+ onode->mtime = get_seconds();
+
+ ret = onode_populate_extents(onode, req);
+ if (ret != SD_RES_SUCCESS)
+ goto out;
+ len = sizeof(struct onode_extent) * onode->nr_extent;
+ ret = sd_write_object(onode->oid, (char *)onode,
+ ONODE_HDR_SIZE + len, 0, false);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to write mtime and flags of onode %s",
+ onode->name);
+ goto out;
+ }
+out:
+ return ret;
+}
+
static int onode_do_create(struct kv_onode *onode, struct sd_inode *inode,
uint32_t idx, bool create)
{
@@ -881,16 +918,22 @@ out:
static int onode_free_data(struct kv_onode *onode)
{
uint32_t data_vid = onode->data_vid;
- int ret = SD_RES_SUCCESS;
+ int ret = SD_RES_SUCCESS, i;
/* it don't need to free data for inlined onode */
if (!onode->inlined) {
sys->cdrv->lock(data_vid);
- ret = oalloc_free(data_vid, onode->o_extent[0].start,
- onode->o_extent[0].count);
+ for (i = 0; i < onode->nr_extent; i++) {
+ ret = oalloc_free(data_vid, onode->o_extent[i].start,
+ onode->o_extent[i].count);
+ if (ret != SD_RES_SUCCESS)
+ sd_err("failed to free start: %"PRIu64
+ ", count: %"PRIu64", for %s",
+ onode->o_extent[i].start,
+ onode->o_extent[i].count,
+ onode->name);
+ }
sys->cdrv->unlock(data_vid);
- if (ret != SD_RES_SUCCESS)
- sd_err("failed to free %s", onode->name);
}
return ret;
}
@@ -1070,6 +1113,30 @@ static int onode_delete(struct kv_onode *onode)
return SD_RES_SUCCESS;
}
+static int
+onode_create_and_update_bnode(struct http_request *req, const char *account,
+ uint32_t bucket_vid, const char *bucket,
+ uint32_t data_vid, struct kv_onode *onode)
+{
+ int ret;
+
+ ret = onode_create(onode, bucket_vid);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("failed to create onode for %s", onode->name);
+ onode_delete(onode);
+ goto out;
+ }
+
+ ret = bnode_update(account, bucket, req->data_length, true);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("failed to update bucket for %s", onode->name);
+ onode_delete(onode);
+ goto out;
+ }
+out:
+ return ret;
+}
+
/* Create onode and allocate space for it */
static int onode_allocate_space(struct http_request *req, const char *account,
uint32_t bucket_vid, const char *bucket,
@@ -1120,17 +1187,103 @@ static int onode_allocate_space(struct http_request *req, const char *account,
goto out;
}
- ret = onode_create(onode, bucket_vid);
+ ret = onode_create_and_update_bnode(req, account, bucket_vid, bucket,
+ data_vid, onode);
+out:
+ sys->cdrv->unlock(bucket_vid);
+ return ret;
+}
+
+static int onode_append_space(struct http_request *req, const char *account,
+ uint32_t bucket_vid, const char *bucket,
+ const char *name, struct kv_onode *onode)
+{
+ char vdi_name[SD_MAX_VDI_LEN];
+ uint32_t data_vid;
+ uint64_t len;
+ int ret = SD_RES_SUCCESS;
+ bool object_exists = false;
+
+ sys->cdrv->lock(bucket_vid);
+ ret = onode_lookup_nolock(onode, bucket_vid, name);
+
+ if (ret == SD_RES_SUCCESS) {
+ object_exists = true;
+ if (onode->flags == ONODE_COMPLETE) {
+ /* Not allowed "append" to a COMPLETED onode */
+ sd_err("Failed to append data to the object %s, which"
+ " is marked COMPLETE", onode->name);
+ goto out;
+ }
+ }
+
+ snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s/allocator", account, bucket);
+ ret = sd_lookup_vdi(vdi_name, &data_vid);
+ if (ret != SD_RES_SUCCESS)
+ goto out;
+
+ if (!object_exists) {
+ memset(onode, 0, sizeof(*onode));
+ pstrcpy(onode->name, sizeof(onode->name), name);
+ onode->data_vid = data_vid;
+ onode->flags = ONODE_INIT;
+ }
+
+ ret = onode_append_data(onode, req);
if (ret != SD_RES_SUCCESS) {
- sd_err("failed to create onode for %s", name);
- onode_free_data(onode);
+ sd_err("failed to write data for %s", name);
goto out;
}
- ret = bnode_update(account, bucket, req->data_length, true);
+ if (!object_exists)
+ ret = onode_create_and_update_bnode(req, account, bucket_vid,
+ bucket, data_vid, onode);
+ else {
+ /* update new appended o_extent[] */
+ len = sizeof(struct onode_extent) * onode->nr_extent;
+ ret = sd_write_object(onode->oid, (char *)onode,
+ ONODE_HDR_SIZE + len, 0, 0);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to write o_extent[] for %s %s",
+ onode->name, sd_strerror(ret));
+ goto out;
+ }
+ }
+out:
+ sys->cdrv->unlock(bucket_vid);
+ return ret;
+}
+
+int kv_complete_object(struct http_request *req, const char *account,
+ const char *bucket, const char *object)
+{
+ char vdi_name[SD_MAX_VDI_LEN];
+ struct kv_onode *onode = NULL;
+ uint32_t bucket_vid;
+ int ret;
+
+ snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s", account, bucket);
+ ret = sd_lookup_vdi(vdi_name, &bucket_vid);
+ if (ret != SD_RES_SUCCESS)
+ goto out;
+
+ onode = xzalloc(sizeof(*onode));
+
+ sys->cdrv->lock(bucket_vid);
+ ret = onode_lookup_nolock(onode, bucket_vid, object);
if (ret != SD_RES_SUCCESS) {
- sd_err("failed to update bucket for %s", name);
- onode_delete(onode);
+ sd_err("Failed to lookup onode %s (%s)", object,
+ sd_strerror(ret));
+ goto out;
+ }
+
+ /* update flag of onode */
+ onode->flags = ONODE_COMPLETE;
+ ret = sd_write_object(onode->oid, (char *)onode, ONODE_HDR_SIZE, 0,
+ false);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to update onode %s to COMPLETE (%s)",
+ onode->name, sd_strerror(ret));
goto out;
}
out:
@@ -1204,6 +1357,45 @@ out:
return ret;
}
+/*
+ * We allow append write for PUT operation. When 'FLAG: append' is specified
+ * in the http PUT request header, we append the new data at the tail of the
+ * existing object instead of a 'delete-then-create' semantic.
+ * When we append objects, we mark them as ONODE_INIT. When all the append
+ * operations are done, we specify 'FLAG: eof' in the PUT request hearder to
+ * finalize the whole transaction, which mark the
+ * objects as ONODE_COMPLETE.
+ */
+int kv_append_object(struct http_request *req, const char *account,
+ const char *bucket, const char *name)
+{
+ char vdi_name[SD_MAX_VDI_LEN];
+ struct kv_onode *onode = NULL;
+ uint32_t bucket_vid;
+ int ret;
+
+ snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s", account, bucket);
+ ret = sd_lookup_vdi(vdi_name, &bucket_vid);
+ if (ret != SD_RES_SUCCESS)
+ goto out;
+
+ onode = xzalloc(sizeof(*onode));
+ ret = onode_append_space(req, account, bucket_vid, bucket, name, onode);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to create onode and allocate space %s", name);
+ goto out;
+ }
+
+ ret = onode_populate_append_data(onode, req);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to write data to onode %s", name);
+ goto out;
+ }
+out:
+ free(onode);
+ return ret;
+}
+
int kv_read_object(struct http_request *req, const char *account,
const char *bucket, const char *name)
{
diff --git a/sheep/http/swift.c b/sheep/http/swift.c
index e5707ee..2e77d15 100644
--- a/sheep/http/swift.c
+++ b/sheep/http/swift.c
@@ -249,7 +249,13 @@ static void swift_put_object(struct http_request *req, const char *account,
{
int ret;
- ret = kv_create_object(req, account, container, object);
+ if (req->eof)
+ ret = kv_complete_object(req, account, container, object);
+ else if (req->append)
+ ret = kv_append_object(req, account, container, object);
+ else
+ ret = kv_create_object(req, account, container, object);
+
switch (ret) {
case SD_RES_SUCCESS:
http_response_header(req, CREATED);
--
1.7.12.4
More information about the sheepdog
mailing list