[sheepdog] [PATCH v4 1/3] sheep/http: store accounts and containers into hyper volume for object-storage
Robin Dong
robin.k.dong at gmail.com
Thu Dec 12 11:15:55 CET 2013
From: Robin Dong <sanbai at taobao.com>
Using hyper volume (size up to 16PB) to store large number of accounts
and containers.
Signed-off-by: Robin Dong <sanbai at taobao.com>
---
sheep/http/http.c | 5 +
sheep/http/http.h | 1 +
sheep/http/kv.c | 646 +++++++++++++++++++++++++++++++++++++++++++++++------
sheep/http/kv.h | 22 +-
sheep/http/s3.c | 6 +-
sheep/http/swift.c | 108 ++++++---
6 files changed, 676 insertions(+), 112 deletions(-)
diff --git a/sheep/http/http.c b/sheep/http/http.c
index 04ef364..0081707 100644
--- a/sheep/http/http.c
+++ b/sheep/http/http.c
@@ -52,6 +52,7 @@ static inline const char *strstatus(enum http_status status)
[NO_CONTENT] = "204 No Content",
[PARTIAL_CONTENT] = "206 Partial Content",
[BAD_REQUEST] = "400 Bad Request",
+ [UNAUTHORIZED] = "401 Unauthorized",
[NOT_FOUND] = "404 Not Found",
[METHOD_NOT_ALLOWED] = "405 Method Not Allowed",
[CONFLICT] = "409 Conflict",
@@ -192,6 +193,9 @@ void http_response_header(struct http_request *req, enum http_status status)
req->status = status;
http_request_writef(req, "Status: %s\r\n", strstatus(status));
+ if (req->opcode == HTTP_GET && req->data_length > 0)
+ http_request_writef(req, "Content-Length: %lu\r\n",
+ req->data_length);
http_request_writes(req, "Content-type: text/plain;\r\n\r\n");
}
@@ -233,6 +237,7 @@ static void http_run_request(struct work *work)
if (method != NULL) {
method(req);
+ sd_debug("req->status %d", req->status);
if (req->status != UNKNOWN)
goto out;
}
diff --git a/sheep/http/http.h b/sheep/http/http.h
index 046d412..a8527d1 100644
--- a/sheep/http/http.h
+++ b/sheep/http/http.h
@@ -32,6 +32,7 @@ enum http_status {
NO_CONTENT, /* 204 */
PARTIAL_CONTENT, /* 206 */
BAD_REQUEST, /* 400 */
+ UNAUTHORIZED, /* 401 */
NOT_FOUND, /* 404 */
METHOD_NOT_ALLOWED, /* 405 */
CONFLICT, /* 409 */
diff --git a/sheep/http/kv.c b/sheep/http/kv.c
index 8113389..8d33e37 100644
--- a/sheep/http/kv.c
+++ b/sheep/http/kv.c
@@ -16,14 +16,30 @@
#define FOR_EACH_VDI(nr, vdis) FOR_EACH_BIT(nr, vdis, SD_NR_VDIS)
-static int lookup_bucket(struct http_request *req, const char *bucket,
- uint32_t *vid)
+struct bucket_inode_hdr {
+ char bucket_name[SD_MAX_BUCKET_NAME];
+ uint64_t obj_count;
+ uint64_t bytes_used;
+ uint32_t onode_vid;
+};
+
+struct bucket_inode {
+ union {
+ struct bucket_inode_hdr hdr;
+ uint8_t data[SD_MAX_BUCKET_NAME << 1];
+ };
+};
+
+#define MAX_BUCKETS (SD_MAX_VDI_SIZE / sizeof(struct bucket_inode))
+#define BUCKETS_PER_SD_OBJ (SD_DATA_OBJ_SIZE / sizeof(struct bucket_inode))
+
+static int lookup_vdi(const char *name, uint32_t *vid)
{
int ret;
struct vdi_info info = {};
struct vdi_iocb iocb = {
- .name = bucket,
- .data_len = strlen(bucket),
+ .name = name,
+ .data_len = strlen(name),
};
ret = vdi_lookup(&iocb, &info);
@@ -32,27 +48,23 @@ static int lookup_bucket(struct http_request *req, const char *bucket,
*vid = info.vid;
break;
case SD_RES_NO_VDI:
- sd_info("no such bucket %s", bucket);
- http_response_header(req, NOT_FOUND);
- return -1;
+ sd_info("no such vdi %s", name);
+ break;
default:
- sd_err("%s: bucket %s", sd_strerror(ret), bucket);
- http_response_header(req, INTERNAL_SERVER_ERROR);
- return -1;
+ sd_err("Failed to find vdi %s %s", name, sd_strerror(ret));
}
- return 0;
+ return ret;
}
-/* Bucket operations */
-
-int kv_create_bucket(struct http_request *req, const char *bucket)
+static int kv_create_hyper_volume(const char *name, uint32_t *vdi_id)
{
struct sd_req hdr;
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
int ret;
char buf[SD_MAX_VDI_LEN] = {0};
- pstrcpy(buf, SD_MAX_VDI_LEN, bucket);
+ pstrcpy(buf, SD_MAX_VDI_LEN, name);
sd_init_req(&hdr, SD_OP_NEW_VDI);
hdr.flags = SD_FLAG_CMD_WRITE;
@@ -64,104 +76,592 @@ int kv_create_bucket(struct http_request *req, const char *bucket)
hdr.vdi.store_policy = 1;
ret = exec_local_req(&hdr, buf);
+ if (rsp->result != SD_RES_SUCCESS)
+ sd_err("Failed to create VDI %s: %s", name,
+ sd_strerror(rsp->result));
+
+ if (vdi_id)
+ *vdi_id = rsp->vdi.vdi_id;
+
+ return ret;
+}
+
+static int discard_data_obj(uint64_t oid)
+{
+ int ret;
+ struct sd_req hdr;
+
+ sd_init_req(&hdr, SD_OP_DISCARD_OBJ);
+ hdr.obj.oid = oid;
+
+ ret = exec_local_req(&hdr, NULL);
+ if (ret != SD_RES_SUCCESS)
+ sd_err("Failed to discard data obj %lu %s", oid,
+ sd_strerror(ret));
+
+ return ret;
+}
+
+static int kv_delete_vdi(const char *name)
+{
+ int ret;
+ struct sd_req hdr;
+ char data[SD_MAX_VDI_LEN] = {0};
+ uint32_t vid;
+
+ ret = lookup_vdi(name, &vid);
+ if (ret != SD_RES_SUCCESS)
+ return ret;
+
+ sd_init_req(&hdr, SD_OP_DEL_VDI);
+ hdr.flags = SD_FLAG_CMD_WRITE;
+ hdr.data_length = sizeof(data);
+ pstrcpy(data, SD_MAX_VDI_LEN, name);
+
+ ret = exec_local_req(&hdr, data);
+ if (ret != SD_RES_SUCCESS)
+ sd_err("Failed to delete vdi %s %s", name, sd_strerror(ret));
+
+ return ret;
+}
+
+/*
+ * An account is actually a hyper volume vdi (up to 16PB),
+ * all the buckets (or containers, identified by 'struct bucket_inode') are
+ * stores in this hyper vdi using hashing algorithm.
+ * The bucket also has a hyper vdi named "account/bucket" which stores
+ * 'struct kv_onodes'.
+ *
+ * For example: account "coly" has two buckets "jetta" and "volvo"
+ *
+ *
+ * account vdi
+ * +-----------+---+--------------------------+---+--------------------------+--
+ * |name: coly |...|bucket_inode (name: jetta)|...|bucket_inode (name: volvo)|..
+ * +-----------+---+--------------------------+---+--------------------------+--
+ * | |
+ * / |
+ * bucket vdi / |
+ * +-----------------+-------+ <-- |
+ * |name: coly/jetta |.......| |
+ * +-----------------+-------+ /
+ * bucket vdi /
+ * +-----------------+------+ <----
+ * | name: coly/volvo|......|
+ * +-----------------+------+
+ */
+
+/* Account operations */
+
+int kv_create_account(const char *account)
+{
+ uint32_t vdi_id;
+ return kv_create_hyper_volume(account, &vdi_id);
+}
+
+typedef void (*list_cb)(struct http_request *req, const char *bucket,
+ void *opaque);
+
+struct list_buckets_arg {
+ struct http_request *req;
+ void *opaque;
+ list_cb cb;
+ uint32_t bucket_counter;
+};
+
+static void list_buckets_cb(void *data, enum btree_node_type type, void *arg)
+{
+ struct sd_extent *ext;
+ struct list_buckets_arg *lbarg = arg;
+ struct bucket_inode *bnode;
+ uint64_t oid;
+ char *buf = NULL;
+ int ret;
+
+ if (type == BTREE_EXT) {
+ ext = (struct sd_extent *)data;
+ if (!ext->vdi_id)
+ return;
+
+ buf = xzalloc(SD_DATA_OBJ_SIZE);
+
+ oid = vid_to_data_oid(ext->vdi_id, ext->idx);
+ ret = sd_read_object(oid, buf, SD_DATA_OBJ_SIZE, 0);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to read data object %lx", oid);
+ goto out;
+ }
+ /* loop all bucket_inodes in this data-object */
+ for (int i = 0; i < BUCKETS_PER_SD_OBJ; i++) {
+ bnode = (struct bucket_inode *)
+ (buf + i * sizeof(struct bucket_inode));
+ if (bnode->hdr.onode_vid == 0)
+ continue;
+ if (lbarg->cb)
+ lbarg->cb(lbarg->req, bnode->hdr.bucket_name,
+ (void *)lbarg->opaque);
+ lbarg->bucket_counter++;
+ }
+ }
+out:
+ free(buf);
+}
+
+/* get number of buckets in this account */
+static int kv_get_account(const char *account, uint32_t *nr_buckets)
+{
+ struct sd_inode inode;
+ uint64_t oid;
+ uint32_t account_vid;
+ int ret;
+
+ ret = lookup_vdi(account, &account_vid);
+ if (ret != SD_RES_SUCCESS)
+ return ret;
+
+ /* read account vdi out */
+ oid = vid_to_vdi_oid(account_vid);
+ ret = sd_read_object(oid, (char *)&inode, sizeof(struct sd_inode), 0);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to read inode header %lx", oid);
+ return ret;
+ }
+
+ struct list_buckets_arg arg = {NULL, NULL, NULL, 0};
+ traverse_btree(sheep_bnode_reader, &inode, list_buckets_cb, &arg);
+ if (nr_buckets)
+ *nr_buckets = arg.bucket_counter;
+
+ return SD_RES_SUCCESS;
+}
+
+int kv_read_account(const char *account, uint32_t *nr_buckets)
+{
+ int ret;
+
+ ret = kv_get_account(account, nr_buckets);
+ if (ret != SD_RES_SUCCESS)
+ sd_err("Failed to get number of buckets in %s", account);
+ return ret;
+}
+
+int kv_update_account(const char *account)
+{
+ /* TODO: update metadata of the account */
+ return -1;
+}
+
+int kv_delete_account(const char *account)
+{
+ int ret;
+
+ ret = kv_delete_vdi(account);
+ if (ret != SD_RES_SUCCESS)
+ sd_err("Failed to delete vdi %s", account);
+
+ return ret;
+}
+
+/* Bucket operations */
+
+static int lookup_bucket(struct http_request *req, const char *bucket,
+ uint32_t *vid)
+{
+ int ret;
+ struct vdi_info info = {};
+ struct vdi_iocb iocb = {
+ .name = bucket,
+ .data_len = strlen(bucket),
+ };
+
+ ret = vdi_lookup(&iocb, &info);
switch (ret) {
case SD_RES_SUCCESS:
- http_response_header(req, CREATED);
+ *vid = info.vid;
break;
- case SD_RES_VDI_EXIST:
- http_response_header(req, ACCEPTED);
+ case SD_RES_NO_VDI:
+ sd_info("no such bucket %s", bucket);
+ http_response_header(req, NOT_FOUND);
break;
default:
- sd_err("%s: bucket %s", sd_strerror(ret), bucket);
+ sd_err("Failed to find bucket %s %s", bucket, sd_strerror(ret));
http_response_header(req, INTERNAL_SERVER_ERROR);
- return -1;
}
- return 0;
+ return ret;
}
-int kv_read_bucket(struct http_request *req, const char *bucket)
+/*
+ * Delete bucket(container) inode in account vdi.
+ * idx: the target hash positon of bucket
+ * Return the position of bucket_inode in sd-data-object if success
+ * Return BUCKETS_PER_SD_OBJ if bucket_inode is not found
+ * Return -1 if some errors happend
+ */
+static int delete_bucket(struct sd_inode *account_inode, uint64_t idx,
+ const char *bucket)
{
- /* TODO: read metadata of the bucket */
- return -1;
+ struct bucket_inode *bnode;
+ char *buf = NULL;
+ uint32_t vdi_id;
+ uint64_t oid;
+ uint64_t data_index = idx / BUCKETS_PER_SD_OBJ;
+ int offset = idx % BUCKETS_PER_SD_OBJ;
+ int ret, i, empty_buckets = 0, found = 0;
+
+ vdi_id = INODE_GET_VID(account_inode, data_index);
+ if (!vdi_id) {
+ sd_err("the %lu in vdi %s is not exists", data_index,
+ account_inode->name);
+ ret = -1;
+ goto out;
+ }
+
+ oid = vid_to_data_oid(account_inode->vdi_id, data_index);
+ buf = xzalloc(SD_DATA_OBJ_SIZE);
+ ret = sd_read_object(oid, buf, SD_DATA_OBJ_SIZE, 0);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to read inode header %lx", oid);
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < BUCKETS_PER_SD_OBJ; i++) {
+ char vdi_name[SD_MAX_VDI_LEN];
+ bnode = (struct bucket_inode *)
+ (buf + i * sizeof(struct bucket_inode));
+ /* count all empty buckets in this sd-data-obj */
+ if (bnode->hdr.onode_vid == 0) {
+ empty_buckets++;
+ continue;
+ }
+ if (strncmp(bnode->hdr.bucket_name, bucket, SD_MAX_BUCKET_NAME))
+ continue;
+
+ if (i < offset)
+ panic("postion of bucket inode %d is smaller than %d",
+ i, offset);
+
+ found = i;
+ /* find the bnode */
+ bnode->hdr.onode_vid = 0;
+ snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s",
+ account_inode->name, bucket);
+
+ ret = kv_delete_vdi(vdi_name);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to delete vdi %s", vdi_name);
+ ret = -1;
+ goto out;
+ }
+ sd_debug("delete vdi %s success", vdi_name);
+ }
+
+ if (!found) {
+ ret = BUCKETS_PER_SD_OBJ;
+ goto out;
+ }
+
+ /*
+ * if only this bucket_inode is in the sd-data-obj,
+ * then delete this sd-data-obj
+ */
+ if (empty_buckets == BUCKETS_PER_SD_OBJ - 1) {
+ ret = discard_data_obj(oid);
+ if (ret != SD_RES_SUCCESS) {
+ ret = -1;
+ goto out;
+ }
+ INODE_SET_VID(account_inode, data_index, 0);
+ ret = sd_inode_write_vid(sheep_bnode_writer, account_inode,
+ data_index, vdi_id, vdi_id, 0, false,
+ false);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to write inode %x", vdi_id);
+ ret = -1;
+ goto out;
+ }
+ sd_debug("discard obj %lx and update vdi %x success",
+ oid, vdi_id);
+ } else {
+ ret = sd_write_object(oid, buf, sizeof(struct bucket_inode),
+ i * sizeof(struct bucket_inode), false);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to write object %lx", oid);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ sd_debug("write object oid %lx success", oid);
+ ret = found;
+out:
+ free(buf);
+ return ret;
}
-int kv_update_bucket(struct http_request *req, const char *bucket)
+/*
+ * Add bucket(container) inode into account vdi.
+ * idx: the target hash positon of bucket
+ * Return the position of bucket_inode in sd-data-object if success
+ * Return BUCKETS_PER_SD_OBJ if the data-object is full of bucket_inode
+ * Return -1 if some error happend
+ */
+static int add_bucket(struct sd_inode *account_inode, uint64_t idx,
+ const char *bucket)
{
- /* TODO: update metadata of the bucket */
- return -1;
+ struct bucket_inode *bnode;
+ char *buf = NULL;
+ uint32_t vdi_id;
+ uint64_t oid;
+ uint64_t data_index = idx / BUCKETS_PER_SD_OBJ;
+ int offset = idx % BUCKETS_PER_SD_OBJ;
+ int ret, i;
+ bool create = false;
+
+ buf = xzalloc(SD_DATA_OBJ_SIZE);
+
+ vdi_id = INODE_GET_VID(account_inode, data_index);
+ oid = vid_to_data_oid(account_inode->vdi_id, data_index);
+ sd_debug("oid %x %lx %lx", account_inode->vdi_id, data_index, oid);
+ /* the data object is exists */
+ if (vdi_id) {
+ ret = sd_read_object(oid, buf, SD_DATA_OBJ_SIZE, 0);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to read inode header %lx", oid);
+ ret = -1;
+ goto out;
+ }
+ } else
+ create = true;
+
+ sd_debug("bucket_inode offset %d %lu", offset, BUCKETS_PER_SD_OBJ);
+ for (i = offset; i < BUCKETS_PER_SD_OBJ; i++) {
+ char vdi_name[SD_MAX_VDI_LEN];
+ bnode = (struct bucket_inode *)
+ (buf + i * sizeof(struct bucket_inode));
+ if (bnode->hdr.onode_vid != 0)
+ continue;
+
+ /* the bnode not used */
+ strncpy(bnode->hdr.bucket_name, bucket, SD_MAX_BUCKET_NAME);
+ bnode->hdr.obj_count = 0;
+ bnode->hdr.bytes_used = 0;
+ snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s",
+ account_inode->name, bucket);
+ ret = kv_create_hyper_volume(vdi_name, &(bnode->hdr.onode_vid));
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to create hyper volume %d", ret);
+ ret = -1;
+ goto out;
+ }
+ sd_debug("create hyper volume %s success", vdi_name);
+ break;
+ }
+
+ if (i >= BUCKETS_PER_SD_OBJ) {
+ ret = BUCKETS_PER_SD_OBJ;
+ goto out;
+ }
+
+ /* write bnode back to account-vdi */
+ if (create)
+ ret = sd_write_object(oid, buf, SD_DATA_OBJ_SIZE, 0, create);
+ else
+ ret = sd_write_object(oid, buf, sizeof(struct bucket_inode),
+ i * sizeof(struct bucket_inode), create);
+
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to write object %lx", oid);
+ ret = -1;
+ goto out;
+ }
+
+ sd_debug("write object oid %lx success", oid);
+
+ /* update index of vdi */
+ if (create) {
+ vdi_id = account_inode->vdi_id;
+ INODE_SET_VID(account_inode, data_index, vdi_id);
+ ret = sd_inode_write_vid(sheep_bnode_writer, account_inode,
+ data_index, vdi_id, vdi_id, 0, false,
+ false);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to write inode %x", vdi_id);
+ ret = -1;
+ goto out;
+ }
+ sd_debug("write account inode success");
+ }
+
+ ret = i;
+out:
+ free(buf);
+ return ret;
}
-/* TODO: return HTTP_CONFLICT when the bucket is not empty */
-int kv_delete_bucket(struct http_request *req, const char *bucket)
+static int kv_get_bucket(struct sd_inode *account_inode, uint32_t account_vid,
+ const char *account, const char *bucket)
{
+ char vdi_name[SD_MAX_VDI_LEN];
+ uint64_t oid;
+ uint32_t bucket_vid;
int ret;
- struct sd_req hdr;
- char data[SD_MAX_VDI_LEN] = {0};
- uint32_t vid;
- ret = lookup_bucket(req, bucket, &vid);
- if (ret < 0)
- return ret;
+ /* read account vdi out */
+ oid = vid_to_vdi_oid(account_vid);
+ ret = sd_read_object(oid, (char *)account_inode,
+ sizeof(struct sd_inode), 0);
+ if (ret != SD_RES_SUCCESS)
+ goto out;
- sd_init_req(&hdr, SD_OP_DELETE_CACHE);
- hdr.obj.oid = vid_to_vdi_oid(vid);
+ /* find bucket vdi */
+ snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s",
+ account_inode->name, bucket);
- ret = exec_local_req(&hdr, NULL);
+ ret = lookup_vdi(vdi_name, &bucket_vid);
+out:
+ return ret;
+}
+
+int kv_create_bucket(const char *account, const char *bucket)
+{
+ struct sd_inode inode;
+ uint64_t hval, i;
+ uint32_t account_vid;
+ int ret;
+
+ ret = lookup_vdi(account, &account_vid);
if (ret != SD_RES_SUCCESS) {
- sd_err("failed to execute request");
- http_response_header(req, INTERNAL_SERVER_ERROR);
- return -1;
+ sd_err("Failed to find account %s", account);
+ return ret;
}
- sd_init_req(&hdr, SD_OP_DEL_VDI);
- hdr.flags = SD_FLAG_CMD_WRITE;
- hdr.data_length = sizeof(data);
- pstrcpy(data, SD_MAX_VDI_LEN, bucket);
-
- ret = exec_local_req(&hdr, data);
+ ret = kv_get_bucket(&inode, account_vid, account, bucket);
+ /*
+ * if lookup bucket success, kv_get_bucket will return SD_RES_SUCCESS,
+ * which means the bucket is already exists.
+ */
if (ret == SD_RES_SUCCESS) {
- http_response_header(req, NO_CONTENT);
- return 0;
- } else {
- sd_err("%s: bucket %s", sd_strerror(ret), bucket);
- http_response_header(req, INTERNAL_SERVER_ERROR);
+ sd_err("bucket %s is exists.", bucket);
+ ret = SD_RES_VDI_EXIST;
+ return ret;
+ } else if (ret != SD_RES_NO_VDI)
+ return ret;
+
+ /*
+ * if kv_get_bucket() return SD_RES_NO_VDI, it means we can
+ * create bucket normally now.
+ */
+
+ sd_debug("read account inode success");
+
+ hval = sd_hash(bucket, strlen(bucket));
+ for (i = 0; i < MAX_BUCKETS; i++) {
+ uint64_t idx = (hval + i) % MAX_BUCKETS;
+ ret = add_bucket(&inode, idx, bucket);
+ /* data-object is full */
+ if (ret == BUCKETS_PER_SD_OBJ) {
+ i += BUCKETS_PER_SD_OBJ;
+ continue;
+ } else if (ret < 0) {
+ sd_err("Failed to add bucket");
+ return ret;
+ }
+ /* add bucket success */
+ sd_debug("add bucket success");
+ break;
+ }
+
+ if (i >= MAX_BUCKETS) {
+ sd_err("Containers in vdi %s is full!", account);
return -1;
}
+ return 0;
}
-int kv_list_buckets(struct http_request *req,
- void (*cb)(struct http_request *req, const char *bucket,
- void *opaque),
- void *opaque)
+int kv_read_bucket(const char *account, const char *bucket)
{
- char buf[SD_INODE_HEADER_SIZE];
- struct sd_inode *inode = (struct sd_inode *)buf;
- unsigned long nr;
+ /* TODO: read metadata of the bucket */
+ return -1;
+}
- http_response_header(req, OK);
+int kv_update_bucket(const char *account, const char *bucket)
+{
+ /* TODO: update metadata of the bucket */
+ return -1;
+}
- FOR_EACH_VDI(nr, sys->vdi_inuse) {
- uint64_t oid;
- int ret;
+/* return SD_RES_NO_VDI if bucket is not existss */
+int kv_delete_bucket(const char *account, const char *bucket)
+{
+ struct sd_inode inode;
+ uint64_t hval, i;
+ uint32_t account_vid;
+ int ret;
- oid = vid_to_vdi_oid(nr);
+ ret = lookup_vdi(account, &account_vid);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to find account %s", account);
+ return ret;
+ }
- ret = sd_read_object(oid, (char *)inode, SD_INODE_HEADER_SIZE,
- 0);
- if (ret != SD_RES_SUCCESS) {
- sd_err("Failed to read inode header");
+ ret = kv_get_bucket(&inode, account_vid, account, bucket);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to get bucket");
+ return ret;
+ }
+
+ hval = sd_hash(bucket, strlen(bucket));
+ for (i = 0; i < MAX_BUCKETS; i++) {
+ uint64_t idx = (hval + i) % MAX_BUCKETS;
+ ret = delete_bucket(&inode, idx, bucket);
+ if (ret == BUCKETS_PER_SD_OBJ) {
+ i += BUCKETS_PER_SD_OBJ;
continue;
+ } else if (ret < 0) {
+ sd_err("Failed to delete bucket %d", ret);
+ return ret;
}
+ /* delete bucket success */
+ sd_debug("delete bucket success");
+ break;
+ }
- if (inode->name[0] == '\0') /* this VDI has been deleted */
- continue;
+ if (i >= MAX_BUCKETS) {
+ sd_err("Can't find bucket %s", bucket);
+ return SD_RES_NO_VDI;
+ }
+ return SD_RES_SUCCESS;
+}
+
+int kv_list_buckets(struct http_request *req, const char *account, list_cb cb,
+ void *opaque)
+{
+ struct sd_inode account_inode;
+ uint32_t account_vid;
+ uint64_t oid;
+ int ret;
- if (!vdi_is_snapshot(inode))
- cb(req, inode->name, opaque);
+ ret = lookup_vdi(account, &account_vid);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to find account %s", account);
+ return ret;
}
- return 0;
+ /* read account vdi out */
+ oid = vid_to_vdi_oid(account_vid);
+ ret = sd_read_object(oid, (char *)&account_inode,
+ sizeof(struct sd_inode), 0);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Failed to read account inode header %lx", oid);
+ return ret;
+ }
+
+ struct list_buckets_arg arg = {req, opaque, cb, 0};
+ traverse_btree(sheep_bnode_reader, &account_inode,
+ list_buckets_cb, &arg);
+ return SD_RES_SUCCESS;
}
/* Object operations */
diff --git a/sheep/http/kv.h b/sheep/http/kv.h
index f0b09fe..1774a36 100644
--- a/sheep/http/kv.h
+++ b/sheep/http/kv.h
@@ -14,15 +14,25 @@
#include "http.h"
-#define SD_MAX_BUCKET_NAME 1024
+#define SD_MAX_BUCKET_NAME 64
#define SD_MAX_OBJECT_NAME 1024
+/* Account operations */
+int kv_create_account(const char *account);
+int kv_read_account(const char *account, uint32_t *nr_buckets);
+int kv_update_account(const char *account);
+int kv_delete_account(const char *account);
+int kv_list_accounts(struct http_request *req,
+ void (*cb)(struct http_request *req, const char *account,
+ void *opaque),
+ void *opaque);
+
/* Bucket operations */
-int kv_create_bucket(struct http_request *req, const char *bucket);
-int kv_read_bucket(struct http_request *req, const char *bucket);
-int kv_update_bucket(struct http_request *req, const char *bucket);
-int kv_delete_bucket(struct http_request *req, const char *bucket);
-int kv_list_buckets(struct http_request *req,
+int kv_create_bucket(const char *account, const char *bucket);
+int kv_read_bucket(const char *account, const char *bucket);
+int kv_update_bucket(const char *account, const char *bucket);
+int kv_delete_bucket(const char *account, const char *bucket);
+int kv_list_buckets(struct http_request *req, const char *account,
void (*cb)(struct http_request *req, const char *bucket,
void *opaque),
void *opaque);
diff --git a/sheep/http/s3.c b/sheep/http/s3.c
index ca2efe3..8142bb5 100644
--- a/sheep/http/s3.c
+++ b/sheep/http/s3.c
@@ -58,7 +58,7 @@ static void s3_get_service(struct http_request *req)
{
bool print_header = true;
- kv_list_buckets(req, s3_get_service_cb, &print_header);
+ kv_list_buckets(req, "s3", s3_get_service_cb, &print_header);
http_request_writes(req, "</Buckets></ListAllMyBucketsResult>\r\n");
}
@@ -125,7 +125,7 @@ static void s3_get_bucket(struct http_request *req, const char *bucket)
static void s3_put_bucket(struct http_request *req, const char *bucket)
{
- kv_create_bucket(req, bucket);
+ kv_create_bucket("s3", bucket);
if (req->status == ACCEPTED)
s3_write_err_response(req, "BucketAlreadyExists",
@@ -139,7 +139,7 @@ static void s3_post_bucket(struct http_request *req, const char *bucket)
static void s3_delete_bucket(struct http_request *req, const char *bucket)
{
- kv_delete_bucket(req, bucket);
+ kv_delete_bucket("s3", bucket);
switch (req->status) {
case NOT_FOUND:
diff --git a/sheep/http/swift.c b/sheep/http/swift.c
index 7b3354a..3812cf1 100644
--- a/sheep/http/swift.c
+++ b/sheep/http/swift.c
@@ -9,9 +9,14 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include "strbuf.h"
#include "http.h"
#include "kv.h"
+#define HTTP_REMOVE_ACCOUNT "HTTP_X_REMOVE_ACCOUNT_META_BOOK"
+
+static void swift_delete_account(struct http_request *req, const char *account);
+
static void make_bucket_path(char *bucket, size_t size, const char *account,
const char *container)
{
@@ -24,26 +29,37 @@ static void make_bucket_path(char *bucket, size_t size, const char *account,
static void swift_head_account(struct http_request *req, const char *account)
{
- http_response_header(req, NOT_IMPLEMENTED);
+ uint32_t nr_buckets;
+ int ret;
+
+ ret = kv_read_account(account, &nr_buckets);
+ if (ret)
+ http_response_header(req, UNAUTHORIZED);
+ else {
+ http_request_writef(req, "X-Account-Container-Count: %u\n",
+ nr_buckets);
+ http_response_header(req, NO_CONTENT);
+ }
}
static void swift_get_account_cb(struct http_request *req, const char *bucket,
void *opaque)
{
- const char *account = opaque;
- char *args[2] = {};
-
- split_path(bucket, ARRAY_SIZE(args), args);
+ struct strbuf *buf = (struct strbuf *)opaque;
- if (args[1] != NULL && strcmp(args[0], account) == 0) {
- http_request_writes(req, args[1]);
- http_request_writes(req, "\n");
- }
+ if (bucket)
+ strbuf_addf(buf, "%s\n", bucket);
}
static void swift_get_account(struct http_request *req, const char *account)
{
- kv_list_buckets(req, swift_get_account_cb, (void *)account);
+ struct strbuf buf = STRBUF_INIT;
+
+ kv_list_buckets(req, account, swift_get_account_cb, (void *)&buf);
+ req->data_length = buf.len;
+ http_response_header(req, OK);
+ http_request_write(req, buf.buf, buf.len);
+ strbuf_release(&buf);
}
static void swift_put_account(struct http_request *req, const char *account)
@@ -53,24 +69,50 @@ static void swift_put_account(struct http_request *req, const char *account)
static void swift_post_account(struct http_request *req, const char *account)
{
- http_response_header(req, NOT_IMPLEMENTED);
+ char *p;
+ int ret;
+
+ for (int i = 0; (p = req->fcgx.envp[i]); ++i) {
+ /* delete account */
+ if (!strncmp(p, HTTP_REMOVE_ACCOUNT,
+ strlen(HTTP_REMOVE_ACCOUNT))) {
+ swift_delete_account(req, account);
+ return;
+ }
+ }
+ /* create account */
+ ret = kv_create_account(account);
+ if (ret == SD_RES_SUCCESS)
+ http_response_header(req, CREATED);
+ else if (ret == SD_RES_VDI_EXIST)
+ http_response_header(req, ACCEPTED);
+ else
+ http_response_header(req, INTERNAL_SERVER_ERROR);
}
-static void swift_delete_account_cb(struct http_request *req,
- const char *bucket, void *opaque)
+static void swift_delete_account(struct http_request *req, const char *account)
{
- const char *account = opaque;
- char *args[2] = {};
+ uint32_t nr_buckets;
+ int ret;
- split_path(bucket, ARRAY_SIZE(args), args);
+ ret = kv_read_account(account, &nr_buckets);
+ if (ret) {
+ http_response_header(req, INTERNAL_SERVER_ERROR);
+ return;
+ }
- if (args[1] != NULL && strcmp(args[0], account) == 0)
- kv_delete_bucket(req, bucket);
-}
+ if (nr_buckets) {
+ /* return HTTP_CONFLICT when the account is not empty */
+ http_response_header(req, CONFLICT);
+ return;
+ }
-static void swift_delete_account(struct http_request *req, const char *account)
-{
- kv_list_buckets(req, swift_delete_account_cb, (void *)account);
+ ret = kv_delete_account(account);
+ if (ret) {
+ http_response_header(req, INTERNAL_SERVER_ERROR);
+ return;
+ }
+ http_response_header(req, OK);
}
/* Operations on Containers */
@@ -100,10 +142,14 @@ static void swift_get_container(struct http_request *req, const char *account,
static void swift_put_container(struct http_request *req, const char *account,
const char *container)
{
- char bucket[SD_MAX_BUCKET_NAME];
-
- make_bucket_path(bucket, sizeof(bucket), account, container);
- kv_create_bucket(req, bucket);
+ int ret;
+ ret = kv_create_bucket(account, container);
+ if (ret == SD_RES_SUCCESS)
+ http_response_header(req, CREATED);
+ else if (ret == SD_RES_VDI_EXIST)
+ http_response_header(req, ACCEPTED);
+ else
+ http_response_header(req, INTERNAL_SERVER_ERROR);
}
static void swift_post_container(struct http_request *req, const char *account,
@@ -115,10 +161,12 @@ static void swift_post_container(struct http_request *req, const char *account,
static void swift_delete_container(struct http_request *req,
const char *account, const char *container)
{
- char bucket[SD_MAX_BUCKET_NAME];
-
- make_bucket_path(bucket, sizeof(bucket), account, container);
- kv_delete_bucket(req, bucket);
+ int ret;
+ ret = kv_delete_bucket(account, container);
+ if (ret == SD_RES_NO_VDI)
+ http_response_header(req, NOT_FOUND);
+ else
+ http_response_header(req, NO_CONTENT);
}
/* Operations on Objects */
--
1.7.12.4
More information about the sheepdog
mailing list