[sheepdog] [PATCH v1 1/2] sheep/http: store accounts and containers into hyper volume for object-storage
Robin Dong
robin.k.dong at gmail.com
Fri Dec 6 10:04:18 CET 2013
From: Robin Dong <sanbai at taobao.com>
Using hyper volume (size up to 16PB) to store large number of accounts
and containers.
Signed-off-by: Robin Dong <sanbai at taobao.com>
---
sheep/http/http.c | 2 +
sheep/http/http.h | 1 +
sheep/http/kv.c | 630 +++++++++++++++++++++++++++++++++++++++++++++++------
sheep/http/kv.h | 20 +-
sheep/http/s3.c | 4 +-
sheep/http/swift.c | 98 ++++++---
6 files changed, 654 insertions(+), 101 deletions(-)
diff --git a/sheep/http/http.c b/sheep/http/http.c
index 04ef364..cae3f44 100644
--- a/sheep/http/http.c
+++ b/sheep/http/http.c
@@ -52,6 +52,7 @@ static inline const char *strstatus(enum http_status status)
[NO_CONTENT] = "204 No Content",
[PARTIAL_CONTENT] = "206 Partial Content",
[BAD_REQUEST] = "400 Bad Request",
+ [UNAUTHORIZED] = "401 Unauthorized",
[NOT_FOUND] = "404 Not Found",
[METHOD_NOT_ALLOWED] = "405 Method Not Allowed",
[CONFLICT] = "409 Conflict",
@@ -233,6 +234,7 @@ static void http_run_request(struct work *work)
if (method != NULL) {
method(req);
+ sd_debug("req->status %d", req->status);
if (req->status != UNKNOWN)
goto out;
}
diff --git a/sheep/http/http.h b/sheep/http/http.h
index 046d412..a8527d1 100644
--- a/sheep/http/http.h
+++ b/sheep/http/http.h
@@ -32,6 +32,7 @@ enum http_status {
NO_CONTENT, /* 204 */
PARTIAL_CONTENT, /* 206 */
BAD_REQUEST, /* 400 */
+ UNAUTHORIZED, /* 401 */
NOT_FOUND, /* 404 */
METHOD_NOT_ALLOWED, /* 405 */
CONFLICT, /* 409 */
diff --git a/sheep/http/kv.c b/sheep/http/kv.c
index 8113389..55a7e24 100644
--- a/sheep/http/kv.c
+++ b/sheep/http/kv.c
@@ -16,14 +16,25 @@
#define FOR_EACH_VDI(nr, vdis) FOR_EACH_BIT(nr, vdis, SD_NR_VDIS)
-static int lookup_bucket(struct http_request *req, const char *bucket,
- uint32_t *vid)
+struct bucket_inode {
+ char bucket_name[SD_MAX_BUCKET_NAME];
+ uint64_t obj_count;
+ uint64_t bytes_used;
+ uint32_t vdi_id; /* kv_onode stores in this vdi */
+ uint32_t pad;
+ uint64_t reserved[SD_MAX_BUCKET_NAME/sizeof(uint64_t) - 3];
+};
+
+#define MAX_BUCKETS (SD_MAX_VDI_SIZE / sizeof(struct bucket_inode))
+#define BUCKETS_PER_SD_OBJ (SD_DATA_OBJ_SIZE / sizeof(struct bucket_inode))
+
+static int lookup_vdi(const char *name, uint32_t *vid)
{
int ret;
struct vdi_info info = {};
struct vdi_iocb iocb = {
- .name = bucket,
- .data_len = strlen(bucket),
+ .name = name,
+ .data_len = strlen(name),
};
ret = vdi_lookup(&iocb, &info);
@@ -32,27 +43,23 @@ static int lookup_bucket(struct http_request *req, const char *bucket,
*vid = info.vid;
break;
case SD_RES_NO_VDI:
- sd_info("no such bucket %s", bucket);
- http_response_header(req, NOT_FOUND);
- return -1;
+ sd_info("no such vdi %s", name);
+ break;
default:
- sd_err("%s: bucket %s", sd_strerror(ret), bucket);
- http_response_header(req, INTERNAL_SERVER_ERROR);
- return -1;
+ sd_err("Failed to find vdi %s %s", name, sd_strerror(ret));
}
- return 0;
+ return ret;
}
-/* Bucket operations */
-
-int kv_create_bucket(struct http_request *req, const char *bucket)
+static int kv_create_hyper_volume(const char *name, uint32_t *vdi_id)
{
struct sd_req hdr;
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
int ret;
char buf[SD_MAX_VDI_LEN] = {0};
- pstrcpy(buf, SD_MAX_VDI_LEN, bucket);
+ pstrcpy(buf, SD_MAX_VDI_LEN, name);
sd_init_req(&hdr, SD_OP_NEW_VDI);
hdr.flags = SD_FLAG_CMD_WRITE;
@@ -64,44 +71,50 @@ int kv_create_bucket(struct http_request *req, const char *bucket)
hdr.vdi.store_policy = 1;
ret = exec_local_req(&hdr, buf);
- switch (ret) {
- case SD_RES_SUCCESS:
- http_response_header(req, CREATED);
- break;
- case SD_RES_VDI_EXIST:
- http_response_header(req, ACCEPTED);
- break;
- default:
- sd_err("%s: bucket %s", sd_strerror(ret), bucket);
- http_response_header(req, INTERNAL_SERVER_ERROR);
- return -1;
- }
+ if (rsp->result != SD_RES_SUCCESS)
+ sd_err("Failed to create VDI %s: %s", name,
+ sd_strerror(rsp->result));
- return 0;
-}
+ if (vdi_id)
+ *vdi_id = rsp->vdi.vdi_id;
-int kv_read_bucket(struct http_request *req, const char *bucket)
-{
- /* TODO: read metadata of the bucket */
- return -1;
+ return ret;
}
-int kv_update_bucket(struct http_request *req, const char *bucket)
+static int discard_data_obj(uint64_t oid)
{
- /* TODO: update metadata of the bucket */
- return -1;
+ int ret;
+ struct sd_req hdr;
+
+ sd_init_req(&hdr, SD_OP_DELETE_CACHE);
+ hdr.obj.oid = oid;
+
+ ret = exec_local_req(&hdr, NULL);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to execute request");
+ return ret;
+ }
+
+ sd_init_req(&hdr, SD_OP_DISCARD_OBJ);
+ hdr.obj.oid = oid;
+
+ ret = exec_local_req(&hdr, NULL);
+ if (ret != SD_RES_SUCCESS)
+ sd_err("Failed to discard data obj %lu %s", oid,
+ sd_strerror(ret));
+
+ return ret;
}
-/* TODO: return HTTP_CONFLICT when the bucket is not empty */
-int kv_delete_bucket(struct http_request *req, const char *bucket)
+static int kv_delete_vdi(const char *name)
{
int ret;
struct sd_req hdr;
char data[SD_MAX_VDI_LEN] = {0};
uint32_t vid;
- ret = lookup_bucket(req, bucket, &vid);
- if (ret < 0)
+ ret = lookup_vdi(name, &vid);
+ if (ret != SD_RES_SUCCESS)
return ret;
sd_init_req(&hdr, SD_OP_DELETE_CACHE);
@@ -110,60 +123,541 @@ int kv_delete_bucket(struct http_request *req, const char *bucket)
ret = exec_local_req(&hdr, NULL);
if (ret != SD_RES_SUCCESS) {
sd_err("failed to execute request");
- http_response_header(req, INTERNAL_SERVER_ERROR);
- return -1;
+ return ret;
}
sd_init_req(&hdr, SD_OP_DEL_VDI);
hdr.flags = SD_FLAG_CMD_WRITE;
hdr.data_length = sizeof(data);
- pstrcpy(data, SD_MAX_VDI_LEN, bucket);
+ pstrcpy(data, SD_MAX_VDI_LEN, name);
ret = exec_local_req(&hdr, data);
- if (ret == SD_RES_SUCCESS) {
- http_response_header(req, NO_CONTENT);
- return 0;
- } else {
- sd_err("%s: bucket %s", sd_strerror(ret), bucket);
+ if (ret != SD_RES_SUCCESS)
+ sd_err("Failed to delete vdi %s %s", name, sd_strerror(ret));
+
+ return ret;
+}
+
+/*
+ * An account is actually a hyper volume vdi (up to 16PB),
+ * all the buckets (or containers, identified by 'struct bucket_inode') are
+ * stores in this hyper vdi using hashing algorithm.
+ * The bucket also has a hyper vdi named "account/bucket" which stores
+ * 'struct kv_onodes'.
+ *
+ * For example: account "coly" has two buckets "jetta" and "volvo"
+ *
+ *
+ * account vdi
+ * +-----------+---+--------------------------+---+--------------------------+--
+ * |name: coly |...|bucket_inode (name: jetta)|...|bucket_inode (name: volvo)|..
+ * +-----------+---+--------------------------+---+--------------------------+--
+ * | |
+ * / |
+ * bucket vdi / |
+ * +-----------------+-------+ <-- |
+ * |name: coly/jetta |.......| |
+ * +-----------------+-------+ /
+ * bucket vdi /
+ * +-----------------+------+ <----
+ * | name: coly/volvo|......|
+ * +-----------------+------+
+ */
+
+/* Account operations */
+
+int kv_create_account(const char *account)
+{
+ uint32_t vdi_id;
+ return kv_create_hyper_volume(account, &vdi_id);
+}
+
+typedef void (*list_cb)(struct http_request *req, const char *bucket,
+ void *opaque);
+
+struct list_buckets_arg {
+ struct http_request *req;
+ const char *account;
+ list_cb cb;
+ uint32_t bucket_counter;
+};
+
+static void list_buckets_cb(void *data, enum btree_node_type type, void *arg)
+{
+ struct sd_extent *ext;
+ struct list_buckets_arg *lbarg = arg;
+ struct bucket_inode *bnode;
+ uint64_t oid;
+ char *buf = NULL;
+ int ret;
+
+ if (type == BTREE_EXT) {
+ ext = (struct sd_extent *)data;
+ if (!ext->vdi_id)
+ return;
+
+ buf = xzalloc(SD_DATA_OBJ_SIZE);
+
+ oid = vid_to_data_oid(ext->vdi_id, ext->idx);
+ ret = sd_read_object(oid, buf, SD_DATA_OBJ_SIZE, 0);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to read data object %lx", oid);
+ goto out;
+ }
+ /* loop all bucket_inodes in this data-object */
+ for (int i = 0; i < BUCKETS_PER_SD_OBJ; i++) {
+ bnode = (struct bucket_inode *)
+ (buf + i * sizeof(struct bucket_inode));
+ if (bnode->vdi_id == 0)
+ continue;
+ if (lbarg->cb)
+ lbarg->cb(lbarg->req, bnode->bucket_name,
+ (void *)lbarg->account);
+ lbarg->bucket_counter++;
+ }
+ }
+out:
+ free(buf);
+}
+
+/* get number of buckets in this account */
+static int kv_get_account(const char *account, uint32_t *nr_buckets)
+{
+ struct sd_inode inode;
+ uint64_t oid;
+ uint32_t account_vid;
+ int ret;
+
+ ret = lookup_vdi(account, &account_vid);
+ if (ret != SD_RES_SUCCESS)
+ return ret;
+
+ /* read account vdi out */
+ oid = vid_to_vdi_oid(account_vid);
+ ret = sd_read_object(oid, (char *)&inode, sizeof(struct sd_inode), 0);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to read inode header %lx", oid);
+ return ret;
+ }
+
+ struct list_buckets_arg arg = {NULL, account, NULL, 0};
+ traverse_btree(sheep_bnode_reader, &inode, list_buckets_cb, &arg);
+ if (nr_buckets)
+ *nr_buckets = arg.bucket_counter;
+
+ return SD_RES_SUCCESS;
+}
+
+int kv_read_account(const char *account, uint32_t *nr_buckets)
+{
+ int ret;
+
+ ret = kv_get_account(account, nr_buckets);
+ if (ret != SD_RES_SUCCESS)
+ sd_err("Failed to get number of buckets in %s", account);
+ return ret;
+}
+
+int kv_update_account(const char *account)
+{
+ /* TODO: update metadata of the account */
+ return -1;
+}
+
+int kv_delete_account(const char *account)
+{
+ int ret;
+
+ ret = kv_delete_vdi(account);
+ if (ret != SD_RES_SUCCESS)
+ sd_err("Failed to delete vdi %s", account);
+
+ return ret;
+}
+
+/* Bucket operations */
+
+static int lookup_bucket(struct http_request *req, const char *bucket,
+ uint32_t *vid)
+{
+ int ret;
+ struct vdi_info info = {};
+ struct vdi_iocb iocb = {
+ .name = bucket,
+ .data_len = strlen(bucket),
+ };
+
+ ret = vdi_lookup(&iocb, &info);
+ switch (ret) {
+ case SD_RES_SUCCESS:
+ *vid = info.vid;
+ break;
+ case SD_RES_NO_VDI:
+ sd_info("no such bucket %s", bucket);
+ http_response_header(req, NOT_FOUND);
+ break;
+ default:
+ sd_err("Failed to find bucket %s %s", bucket, sd_strerror(ret));
http_response_header(req, INTERNAL_SERVER_ERROR);
- return -1;
}
+
+ return ret;
}
-int kv_list_buckets(struct http_request *req,
- void (*cb)(struct http_request *req, const char *bucket,
- void *opaque),
- void *opaque)
+/*
+ * Delete bucket(container) inode in account vdi.
+ * idx: the target hash positon of bucket
+ * Return the position of bucket_inode in sd-data-object if success
+ * Return BUCKETS_PER_SD_OBJ if bucket_inode is not found
+ * Return -1 if some errors happend
+ */
+static int delete_bucket(struct sd_inode *account_inode, uint64_t idx,
+ const char *bucket)
{
- char buf[SD_INODE_HEADER_SIZE];
- struct sd_inode *inode = (struct sd_inode *)buf;
- unsigned long nr;
+ struct bucket_inode *bnode;
+ char *buf;
+ uint32_t vdi_id;
+ uint64_t oid;
+ uint64_t data_index = idx / BUCKETS_PER_SD_OBJ;
+ int offset = idx % BUCKETS_PER_SD_OBJ;
+ int ret, i, empty_buckets = 0, found = 0;
+
+ vdi_id = INODE_GET_VID(account_inode, data_index);
+ if (!vdi_id) {
+ sd_err("the %lu in vdi %s is not exists", data_index,
+ account_inode->name);
+ ret = -1;
+ goto out;
+ }
- http_response_header(req, OK);
+ oid = vid_to_data_oid(account_inode->vdi_id, data_index);
+ buf = xzalloc(SD_DATA_OBJ_SIZE);
+ ret = sd_read_object(oid, buf, SD_DATA_OBJ_SIZE, 0);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to read inode header %lx", oid);
+ ret = -1;
+ goto out;
+ }
- FOR_EACH_VDI(nr, sys->vdi_inuse) {
- uint64_t oid;
- int ret;
+ for (i = 0; i < BUCKETS_PER_SD_OBJ; i++) {
+ char vdi_name[SD_MAX_VDI_LEN];
+ bnode = (struct bucket_inode *)
+ (buf + i * sizeof(struct bucket_inode));
+ /* count all empty buckets in this sd-data-obj */
+ if (bnode->vdi_id == 0) {
+ empty_buckets++;
+ continue;
+ }
+ if (strncmp(bnode->bucket_name, bucket, SD_MAX_BUCKET_NAME))
+ continue;
- oid = vid_to_vdi_oid(nr);
+ if (i < offset)
+ panic("postion of bucket inode %d is smaller than %d",
+ i, offset);
- ret = sd_read_object(oid, (char *)inode, SD_INODE_HEADER_SIZE,
- 0);
+ found = i;
+ /* find the bnode */
+ bnode->vdi_id = 0;
+ snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s",
+ account_inode->name, bucket);
+
+ ret = kv_delete_vdi(vdi_name);
if (ret != SD_RES_SUCCESS) {
- sd_err("Failed to read inode header");
- continue;
+ sd_err("Failed to delete vdi %s", vdi_name);
+ ret = -1;
+ goto out;
+ }
+ sd_debug("delete vdi %s success", vdi_name);
+ }
+
+ if (!found) {
+ ret = BUCKETS_PER_SD_OBJ;
+ goto out;
+ }
+
+ /*
+ * if only this bucket_inode is in the sd-data-obj,
+ * then delete this sd-data-obj
+ */
+ if (empty_buckets == BUCKETS_PER_SD_OBJ - 1) {
+ ret = discard_data_obj(oid);
+ if (ret != SD_RES_SUCCESS) {
+ ret = -1;
+ goto out;
+ }
+ INODE_SET_VID(account_inode, data_index, 0);
+ ret = sd_inode_write_vid(sheep_bnode_writer, account_inode,
+ data_index, vdi_id, vdi_id, 0, false,
+ false);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to write inode %x", vdi_id);
+ ret = -1;
+ goto out;
}
+ sd_debug("discard obj %lx and update vdi %x success",
+ oid, vdi_id);
+ } else {
+ ret = sd_write_object(oid, buf, sizeof(struct bucket_inode),
+ i * sizeof(struct bucket_inode), false);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to write object %lx", oid);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ sd_debug("write object oid %lx success", oid);
+ ret = found;
+out:
+ free(buf);
+ return ret;
+}
- if (inode->name[0] == '\0') /* this VDI has been deleted */
+/*
+ * Add bucket(container) inode into account vdi.
+ * idx: the target hash positon of bucket
+ * Return the position of bucket_inode in sd-data-object if success
+ * Return BUCKETS_PER_SD_OBJ if the data-object is full of bucket_inode
+ * Return -1 if some error happend
+ */
+static int add_bucket(struct sd_inode *account_inode, uint64_t idx,
+ const char *bucket)
+{
+ struct bucket_inode *bnode;
+ char *buf;
+ uint32_t vdi_id;
+ uint64_t oid;
+ uint64_t data_index = idx / BUCKETS_PER_SD_OBJ;
+ int offset = idx % BUCKETS_PER_SD_OBJ;
+ int ret, i;
+ bool create = false;
+
+ buf = xzalloc(SD_DATA_OBJ_SIZE);
+
+ vdi_id = INODE_GET_VID(account_inode, data_index);
+ oid = vid_to_data_oid(account_inode->vdi_id, data_index);
+ sd_debug("oid %x %lx %lx", account_inode->vdi_id, data_index, oid);
+ /* the data object is exists */
+ if (vdi_id) {
+ ret = sd_read_object(oid, buf, SD_DATA_OBJ_SIZE, 0);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to read inode header %lx", oid);
+ ret = -1;
+ goto out;
+ }
+ } else
+ create = true;
+
+ sd_debug("bucket_inode offset %d %lu", offset, BUCKETS_PER_SD_OBJ);
+ for (i = offset; i < BUCKETS_PER_SD_OBJ; i++) {
+ char vdi_name[SD_MAX_VDI_LEN];
+ bnode = (struct bucket_inode *)
+ (buf + i * sizeof(struct bucket_inode));
+ if (bnode->vdi_id != 0)
continue;
- if (!vdi_is_snapshot(inode))
- cb(req, inode->name, opaque);
+ /* the bnode not used */
+ strncpy(bnode->bucket_name, bucket, SD_MAX_BUCKET_NAME);
+ bnode->obj_count = 0;
+ bnode->bytes_used = 0;
+ snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s",
+ account_inode->name, bucket);
+ ret = kv_create_hyper_volume(vdi_name, &(bnode->vdi_id));
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to create hyper volume %d", ret);
+ ret = -1;
+ goto out;
+ }
+ sd_debug("create hyper volume %s success", vdi_name);
+ break;
+ }
+
+ if (i >= BUCKETS_PER_SD_OBJ) {
+ ret = BUCKETS_PER_SD_OBJ;
+ goto out;
+ }
+
+ /* write bnode back to account-vdi */
+ if (create)
+ ret = sd_write_object(oid, buf, SD_DATA_OBJ_SIZE, 0, create);
+ else
+ ret = sd_write_object(oid, buf, sizeof(struct bucket_inode),
+ i * sizeof(struct bucket_inode), create);
+
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to write object %lx", oid);
+ ret = -1;
+ goto out;
+ }
+
+ sd_debug("write object oid %lx success", oid);
+
+ /* update index of vdi */
+ if (create) {
+ vdi_id = account_inode->vdi_id;
+ INODE_SET_VID(account_inode, data_index, vdi_id);
+ ret = sd_inode_write_vid(sheep_bnode_writer, account_inode,
+ data_index, vdi_id, vdi_id, 0, false,
+ false);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to write inode %x", vdi_id);
+ ret = -1;
+ goto out;
+ }
+ sd_debug("write account inode success");
+ }
+
+ ret = i;
+out:
+ free(buf);
+ return ret;
+}
+
+static int kv_get_bucket(struct sd_inode *account_inode, const char *account,
+ const char *bucket)
+{
+ char vdi_name[SD_MAX_VDI_LEN];
+ uint64_t oid;
+ uint32_t account_vid, bucket_vid;
+ int ret;
+
+ ret = lookup_vdi(account, &account_vid);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to find account %s", account);
+ return -1;
+ }
+
+ /* read account vdi out */
+ oid = vid_to_vdi_oid(account_vid);
+ ret = sd_read_object(oid, (char *)account_inode,
+ sizeof(struct sd_inode), 0);
+ if (ret != SD_RES_SUCCESS)
+ return ret;
+
+ /* find bucket vdi */
+ snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s",
+ account_inode->name, bucket);
+
+ return lookup_vdi(vdi_name, &bucket_vid);
+}
+
+int kv_create_bucket(const char *account, const char *bucket)
+{
+ struct sd_inode inode;
+ uint64_t hval, i;
+ int ret;
+
+ ret = kv_get_bucket(&inode, account, bucket);
+ /* if bucket is exists, return SD_RES_VDI_EXIST */
+ if (!ret) {
+ sd_err("bucket %s is exists.", bucket);
+ return SD_RES_VDI_EXIST;
+ } else if (ret < 0)
+ return -1;
+
+ sd_debug("read account inode success");
+
+ hval = sd_hash(bucket, strlen(bucket));
+ for (i = 0; i < MAX_BUCKETS; i++) {
+ uint64_t idx = (hval + i) % MAX_BUCKETS;
+ ret = add_bucket(&inode, idx, bucket);
+ /* data-object is full */
+ if (ret == BUCKETS_PER_SD_OBJ) {
+ i += BUCKETS_PER_SD_OBJ;
+ continue;
+ } else if (ret < 0) {
+ sd_err("Failed to add bucket");
+ return ret;
+ }
+ /* add bucket success */
+ sd_debug("add bucket success");
+ break;
}
+ if (i >= MAX_BUCKETS) {
+ sd_err("Containers in vdi %s is full!", account);
+ return -1;
+ }
return 0;
}
+int kv_read_bucket(const char *account, const char *bucket)
+{
+ /* TODO: read metadata of the bucket */
+ return -1;
+}
+
+int kv_update_bucket(const char *account, const char *bucket)
+{
+ /* TODO: update metadata of the bucket */
+ return -1;
+}
+
+/* return SD_RES_NO_VDI if bucket is not existss */
+int kv_delete_bucket(const char *account, const char *bucket)
+{
+ struct sd_inode inode;
+ uint64_t hval, i;
+ int ret;
+
+ ret = kv_get_bucket(&inode, account, bucket);
+ if (ret) {
+ sd_err("Failed to get bucket");
+ return ret;
+ }
+
+ hval = sd_hash(bucket, strlen(bucket));
+ for (i = 0; i < MAX_BUCKETS; i++) {
+ uint64_t idx = (hval + i) % MAX_BUCKETS;
+ ret = delete_bucket(&inode, idx, bucket);
+ if (ret == BUCKETS_PER_SD_OBJ) {
+ i += BUCKETS_PER_SD_OBJ;
+ continue;
+ } else if (ret < 0) {
+ sd_err("Failed to delete bucket %d", ret);
+ return ret;
+ }
+ /* delete bucket success */
+ sd_debug("delete bucket success");
+ break;
+ }
+
+ if (i >= MAX_BUCKETS) {
+ sd_err("Can't find bucket %s", bucket);
+ return SD_RES_NO_VDI;
+ }
+ return SD_RES_SUCCESS;
+}
+
+int kv_list_buckets(struct http_request *req, list_cb cb, void *opaque)
+{
+ struct sd_inode account_inode;
+ const char *account = (const char *)opaque;
+ uint32_t account_vid;
+ uint64_t oid;
+ int ret;
+
+ ret = lookup_vdi(account, &account_vid);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to find account %s", account);
+ return ret;
+ }
+
+ /* read account vdi out */
+ oid = vid_to_vdi_oid(account_vid);
+ ret = sd_read_object(oid, (char *)&account_inode,
+ sizeof(struct sd_inode), 0);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to read account inode header %lx", oid);
+ return ret;
+ }
+
+ struct list_buckets_arg arg = {req, account, cb, 0};
+ traverse_btree(sheep_bnode_reader, &account_inode,
+ list_buckets_cb, &arg);
+ return SD_RES_SUCCESS;
+}
+
/* Object operations */
/* 4 KB header of kv object index node */
diff --git a/sheep/http/kv.h b/sheep/http/kv.h
index f0b09fe..1543a16 100644
--- a/sheep/http/kv.h
+++ b/sheep/http/kv.h
@@ -14,14 +14,24 @@
#include "http.h"
-#define SD_MAX_BUCKET_NAME 1024
+#define SD_MAX_BUCKET_NAME 64
#define SD_MAX_OBJECT_NAME 1024
+/* Account operations */
+int kv_create_account(const char *account);
+int kv_read_account(const char *account, uint32_t *nr_buckets);
+int kv_update_account(const char *account);
+int kv_delete_account(const char *account);
+int kv_list_accounts(struct http_request *req,
+ void (*cb)(struct http_request *req, const char *account,
+ void *opaque),
+ void *opaque);
+
/* Bucket operations */
-int kv_create_bucket(struct http_request *req, const char *bucket);
-int kv_read_bucket(struct http_request *req, const char *bucket);
-int kv_update_bucket(struct http_request *req, const char *bucket);
-int kv_delete_bucket(struct http_request *req, const char *bucket);
+int kv_create_bucket(const char *account, const char *bucket);
+int kv_read_bucket(const char *account, const char *bucket);
+int kv_update_bucket(const char *account, const char *bucket);
+int kv_delete_bucket(const char *account, const char *bucket);
int kv_list_buckets(struct http_request *req,
void (*cb)(struct http_request *req, const char *bucket,
void *opaque),
diff --git a/sheep/http/s3.c b/sheep/http/s3.c
index ca2efe3..1fff9d8 100644
--- a/sheep/http/s3.c
+++ b/sheep/http/s3.c
@@ -125,7 +125,7 @@ static void s3_get_bucket(struct http_request *req, const char *bucket)
static void s3_put_bucket(struct http_request *req, const char *bucket)
{
- kv_create_bucket(req, bucket);
+ kv_create_bucket("s3", bucket);
if (req->status == ACCEPTED)
s3_write_err_response(req, "BucketAlreadyExists",
@@ -139,7 +139,7 @@ static void s3_post_bucket(struct http_request *req, const char *bucket)
static void s3_delete_bucket(struct http_request *req, const char *bucket)
{
- kv_delete_bucket(req, bucket);
+ kv_delete_bucket("s3", bucket);
switch (req->status) {
case NOT_FOUND:
diff --git a/sheep/http/swift.c b/sheep/http/swift.c
index 7b3354a..d2e1e18 100644
--- a/sheep/http/swift.c
+++ b/sheep/http/swift.c
@@ -12,6 +12,10 @@
#include "http.h"
#include "kv.h"
+#define HTTP_REMOVE_ACCOUNT "HTTP_X_REMOVE_ACCOUNT_META_BOOK"
+
+static void swift_delete_account(struct http_request *req, const char *account);
+
static void make_bucket_path(char *bucket, size_t size, const char *account,
const char *container)
{
@@ -24,25 +28,35 @@ static void make_bucket_path(char *bucket, size_t size, const char *account,
static void swift_head_account(struct http_request *req, const char *account)
{
- http_response_header(req, NOT_IMPLEMENTED);
+ uint32_t nr_buckets;
+ int ret;
+
+ ret = kv_read_account(account, &nr_buckets);
+ if (ret)
+ http_response_header(req, UNAUTHORIZED);
+ else {
+ http_request_writef(req, "X-Account-Container-Count: %u\n",
+ nr_buckets);
+ http_response_header(req, NO_CONTENT);
+ }
}
static void swift_get_account_cb(struct http_request *req, const char *bucket,
void *opaque)
{
const char *account = opaque;
- char *args[2] = {};
-
- split_path(bucket, ARRAY_SIZE(args), args);
- if (args[1] != NULL && strcmp(args[0], account) == 0) {
- http_request_writes(req, args[1]);
+ sd_debug("account: %s bucket: %s", account, bucket);
+ if (account && bucket) {
+ http_request_writes(req, bucket);
http_request_writes(req, "\n");
}
}
static void swift_get_account(struct http_request *req, const char *account)
{
+ http_response_header(req, OK);
+ http_request_writes(req, "\n");
kv_list_buckets(req, swift_get_account_cb, (void *)account);
}
@@ -53,24 +67,50 @@ static void swift_put_account(struct http_request *req, const char *account)
static void swift_post_account(struct http_request *req, const char *account)
{
- http_response_header(req, NOT_IMPLEMENTED);
+ char *p;
+ int ret;
+
+ for (int i = 0; (p = req->fcgx.envp[i]); ++i) {
+ /* delete account */
+ if (!strncmp(p, HTTP_REMOVE_ACCOUNT,
+ strlen(HTTP_REMOVE_ACCOUNT))) {
+ swift_delete_account(req, account);
+ return;
+ }
+ }
+ /* create account */
+ ret = kv_create_account(account);
+ if (ret == SD_RES_SUCCESS)
+ http_response_header(req, CREATED);
+ else if (ret == SD_RES_VDI_EXIST)
+ http_response_header(req, ACCEPTED);
+ else
+ http_response_header(req, INTERNAL_SERVER_ERROR);
}
-static void swift_delete_account_cb(struct http_request *req,
- const char *bucket, void *opaque)
+static void swift_delete_account(struct http_request *req, const char *account)
{
- const char *account = opaque;
- char *args[2] = {};
+ uint32_t nr_buckets;
+ int ret;
- split_path(bucket, ARRAY_SIZE(args), args);
+ ret = kv_read_account(account, &nr_buckets);
+ if (ret) {
+ http_response_header(req, INTERNAL_SERVER_ERROR);
+ return;
+ }
- if (args[1] != NULL && strcmp(args[0], account) == 0)
- kv_delete_bucket(req, bucket);
-}
+ if (nr_buckets) {
+ /* return HTTP_CONFLICT when the account is not empty */
+ http_response_header(req, CONFLICT);
+ return;
+ }
-static void swift_delete_account(struct http_request *req, const char *account)
-{
- kv_list_buckets(req, swift_delete_account_cb, (void *)account);
+ ret = kv_delete_account(account);
+ if (ret) {
+ http_response_header(req, INTERNAL_SERVER_ERROR);
+ return;
+ }
+ http_response_header(req, OK);
}
/* Operations on Containers */
@@ -100,10 +140,14 @@ static void swift_get_container(struct http_request *req, const char *account,
static void swift_put_container(struct http_request *req, const char *account,
const char *container)
{
- char bucket[SD_MAX_BUCKET_NAME];
-
- make_bucket_path(bucket, sizeof(bucket), account, container);
- kv_create_bucket(req, bucket);
+ int ret;
+ ret = kv_create_bucket(account, container);
+ if (ret == SD_RES_SUCCESS)
+ http_response_header(req, CREATED);
+ else if (ret == SD_RES_VDI_EXIST)
+ http_response_header(req, ACCEPTED);
+ else
+ http_response_header(req, INTERNAL_SERVER_ERROR);
}
static void swift_post_container(struct http_request *req, const char *account,
@@ -115,10 +159,12 @@ static void swift_post_container(struct http_request *req, const char *account,
static void swift_delete_container(struct http_request *req,
const char *account, const char *container)
{
- char bucket[SD_MAX_BUCKET_NAME];
-
- make_bucket_path(bucket, sizeof(bucket), account, container);
- kv_delete_bucket(req, bucket);
+ int ret;
+ ret = kv_delete_bucket(account, container);
+ if (ret == SD_RES_NO_VDI)
+ http_response_header(req, NOT_FOUND);
+ else
+ http_response_header(req, NO_CONTENT);
}
/* Operations on Objects */
--
1.7.12.4
More information about the sheepdog
mailing list