[sheepdog] [PATCH v2 1/3] sheep/http: store accounts and containers into hyper volume for object-storage

Robin Dong robin.k.dong at gmail.com
Tue Dec 10 11:41:04 CET 2013


From: Robin Dong <sanbai at taobao.com>

Using hyper volume (size up to 16PB) to store large number of accounts
and containers.

Signed-off-by: Robin Dong <sanbai at taobao.com>
---
 sheep/http/http.c  |   5 +
 sheep/http/http.h  |   1 +
 sheep/http/kv.c    | 627 ++++++++++++++++++++++++++++++++++++++++++++++-------
 sheep/http/kv.h    |  22 +-
 sheep/http/s3.c    |   6 +-
 sheep/http/swift.c | 107 ++++++---
 6 files changed, 656 insertions(+), 112 deletions(-)

diff --git a/sheep/http/http.c b/sheep/http/http.c
index b3bbb79..577b163 100644
--- a/sheep/http/http.c
+++ b/sheep/http/http.c
@@ -52,6 +52,7 @@ static inline const char *strstatus(enum http_status status)
 		[NO_CONTENT] = "204 No Content",
 		[PARTIAL_CONTENT] = "206 Partial Content",
 		[BAD_REQUEST] = "400 Bad Request",
+		[UNAUTHORIZED] = "401 Unauthorized",
 		[NOT_FOUND] = "404 Not Found",
 		[METHOD_NOT_ALLOWED] = "405 Method Not Allowed",
 		[CONFLICT] = "409 Conflict",
@@ -192,6 +193,9 @@ void http_response_header(struct http_request *req, enum http_status status)
 
 	req->status = status;
 	http_request_writef(req, "Status: %s\r\n", strstatus(status));
+	if (req->opcode == HTTP_GET && req->data_length > 0)
+		http_request_writef(req, "Content-Length: %lu\r\n",
+				    req->data_length);
 	http_request_writes(req, "Content-type: text/plain;\r\n\r\n");
 }
 
@@ -233,6 +237,7 @@ static void http_run_request(struct work *work)
 
 		if (method != NULL) {
 			method(req);
+			sd_debug("req->status %d", req->status);
 			if (req->status != UNKNOWN)
 				goto out;
 		}
diff --git a/sheep/http/http.h b/sheep/http/http.h
index 046d412..a8527d1 100644
--- a/sheep/http/http.h
+++ b/sheep/http/http.h
@@ -32,6 +32,7 @@ enum http_status {
 	NO_CONTENT,                     /* 204 */
 	PARTIAL_CONTENT,                /* 206 */
 	BAD_REQUEST,                    /* 400 */
+	UNAUTHORIZED,			/* 401 */
 	NOT_FOUND,                      /* 404 */
 	METHOD_NOT_ALLOWED,             /* 405 */
 	CONFLICT,                       /* 409 */
diff --git a/sheep/http/kv.c b/sheep/http/kv.c
index 8113389..7d002b0 100644
--- a/sheep/http/kv.c
+++ b/sheep/http/kv.c
@@ -16,14 +16,30 @@
 
 #define FOR_EACH_VDI(nr, vdis) FOR_EACH_BIT(nr, vdis, SD_NR_VDIS)
 
-static int lookup_bucket(struct http_request *req, const char *bucket,
-			 uint32_t *vid)
+struct bucket_inode_hdr {
+	char bucket_name[SD_MAX_BUCKET_NAME];
+	uint64_t obj_count;
+	uint64_t bytes_used;
+	uint32_t onode_vid;
+};
+
+struct bucket_inode {
+	union {
+		struct bucket_inode_hdr hdr;
+		uint8_t data[SD_MAX_BUCKET_NAME << 1];
+	};
+};
+
+#define MAX_BUCKETS (SD_MAX_VDI_SIZE / sizeof(struct bucket_inode))
+#define BUCKETS_PER_SD_OBJ (SD_DATA_OBJ_SIZE / sizeof(struct bucket_inode))
+
+static int lookup_vdi(const char *name, uint32_t *vid)
 {
 	int ret;
 	struct vdi_info info = {};
 	struct vdi_iocb iocb = {
-		.name = bucket,
-		.data_len = strlen(bucket),
+		.name = name,
+		.data_len = strlen(name),
 	};
 
 	ret = vdi_lookup(&iocb, &info);
@@ -32,27 +48,23 @@ static int lookup_bucket(struct http_request *req, const char *bucket,
 		*vid = info.vid;
 		break;
 	case SD_RES_NO_VDI:
-		sd_info("no such bucket %s", bucket);
-		http_response_header(req, NOT_FOUND);
-		return -1;
+		sd_info("no such vdi %s", name);
+		break;
 	default:
-		sd_err("%s: bucket %s", sd_strerror(ret), bucket);
-		http_response_header(req, INTERNAL_SERVER_ERROR);
-		return -1;
+		sd_err("Failed to find vdi %s %s", name, sd_strerror(ret));
 	}
 
-	return 0;
+	return ret;
 }
 
-/* Bucket operations */
-
-int kv_create_bucket(struct http_request *req, const char *bucket)
+static int kv_create_hyper_volume(const char *name, uint32_t *vdi_id)
 {
 	struct sd_req hdr;
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
 	int ret;
 	char buf[SD_MAX_VDI_LEN] = {0};
 
-	pstrcpy(buf, SD_MAX_VDI_LEN, bucket);
+	pstrcpy(buf, SD_MAX_VDI_LEN, name);
 
 	sd_init_req(&hdr, SD_OP_NEW_VDI);
 	hdr.flags = SD_FLAG_CMD_WRITE;
@@ -64,104 +76,573 @@ int kv_create_bucket(struct http_request *req, const char *bucket)
 	hdr.vdi.store_policy = 1;
 
 	ret = exec_local_req(&hdr, buf);
+	if (rsp->result != SD_RES_SUCCESS)
+		sd_err("Failed to create VDI %s: %s", name,
+		       sd_strerror(rsp->result));
+
+	if (vdi_id)
+		*vdi_id = rsp->vdi.vdi_id;
+
+	return ret;
+}
+
+static int discard_data_obj(uint64_t oid)
+{
+	int ret;
+	struct sd_req hdr;
+
+	sd_init_req(&hdr, SD_OP_DISCARD_OBJ);
+	hdr.obj.oid = oid;
+
+	ret = exec_local_req(&hdr, NULL);
+	if (ret != SD_RES_SUCCESS)
+		sd_err("Failed to discard data obj %lu %s", oid,
+		       sd_strerror(ret));
+
+	return ret;
+}
+
+static int kv_delete_vdi(const char *name)
+{
+	int ret;
+	struct sd_req hdr;
+	char data[SD_MAX_VDI_LEN] = {0};
+	uint32_t vid;
+
+	ret = lookup_vdi(name, &vid);
+	if (ret != SD_RES_SUCCESS)
+		return ret;
+
+	sd_init_req(&hdr, SD_OP_DEL_VDI);
+	hdr.flags = SD_FLAG_CMD_WRITE;
+	hdr.data_length = sizeof(data);
+	pstrcpy(data, SD_MAX_VDI_LEN, name);
+
+	ret = exec_local_req(&hdr, data);
+	if (ret != SD_RES_SUCCESS)
+		sd_err("Failed to delete vdi %s %s", name, sd_strerror(ret));
+
+	return ret;
+}
+
+/*
+ * An account is actually a hyper volume vdi (up to 16PB),
+ * all the buckets (or containers, identified by 'struct bucket_inode') are
+ * stores in this hyper vdi using hashing algorithm.
+ * The bucket also has a hyper vdi named "account/bucket" which stores
+ * 'struct kv_onodes'.
+ *
+ * For example: account "coly" has two buckets "jetta" and "volvo"
+ *
+ *
+ * account vdi
+ * +-----------+---+--------------------------+---+--------------------------+--
+ * |name: coly |...|bucket_inode (name: jetta)|...|bucket_inode (name: volvo)|..
+ * +-----------+---+--------------------------+---+--------------------------+--
+ *                                  |                             |
+ *                                 /                              |
+ * bucket vdi                     /                               |
+ * +-----------------+-------+ <--                                |
+ * |name: coly/jetta |.......|                                    |
+ * +-----------------+-------+                                   /
+ *                              bucket vdi                      /
+ *                              +-----------------+------+ <----
+ *                              | name: coly/volvo|......|
+ *                              +-----------------+------+
+ */
+
+/* Account operations */
+
+int kv_create_account(const char *account)
+{
+	uint32_t vdi_id;
+	return kv_create_hyper_volume(account, &vdi_id);
+}
+
+typedef void (*list_cb)(struct http_request *req, const char *bucket,
+			void *opaque);
+
+struct list_buckets_arg {
+	struct http_request *req;
+	void *opaque;
+	list_cb cb;
+	uint32_t bucket_counter;
+};
+
+static void list_buckets_cb(void *data, enum btree_node_type type, void *arg)
+{
+	struct sd_extent *ext;
+	struct list_buckets_arg *lbarg = arg;
+	struct bucket_inode *bnode;
+	uint64_t oid;
+	char *buf = NULL;
+	int ret;
+
+	if (type == BTREE_EXT) {
+		ext = (struct sd_extent *)data;
+		if (!ext->vdi_id)
+			return;
+
+		buf = xzalloc(SD_DATA_OBJ_SIZE);
+
+		oid = vid_to_data_oid(ext->vdi_id, ext->idx);
+		ret = sd_read_object(oid, buf, SD_DATA_OBJ_SIZE, 0);
+		if (ret != SD_RES_SUCCESS) {
+			sd_err("Failed to read data object %lx", oid);
+			goto out;
+		}
+		/* loop all bucket_inodes in this data-object */
+		for (int i = 0; i < BUCKETS_PER_SD_OBJ; i++) {
+			bnode = (struct bucket_inode *)
+				(buf + i * sizeof(struct bucket_inode));
+			if (bnode->hdr.onode_vid == 0)
+				continue;
+			if (lbarg->cb)
+				lbarg->cb(lbarg->req, bnode->hdr.bucket_name,
+					  (void *)lbarg->opaque);
+			lbarg->bucket_counter++;
+		}
+	}
+out:
+	free(buf);
+}
+
+/* get number of buckets in this account */
+static int kv_get_account(const char *account, uint32_t *nr_buckets)
+{
+	struct sd_inode inode;
+	uint64_t oid;
+	uint32_t account_vid;
+	int ret;
+
+	ret = lookup_vdi(account, &account_vid);
+	if (ret != SD_RES_SUCCESS)
+		return ret;
+
+	/* read account vdi out */
+	oid = vid_to_vdi_oid(account_vid);
+	ret = sd_read_object(oid, (char *)&inode, sizeof(struct sd_inode), 0);
+	if (ret != SD_RES_SUCCESS) {
+		sd_err("Failed to read inode header %lx", oid);
+		return ret;
+	}
+
+	struct list_buckets_arg arg = {NULL, NULL, NULL, 0};
+	traverse_btree(sheep_bnode_reader, &inode, list_buckets_cb, &arg);
+	if (nr_buckets)
+		*nr_buckets = arg.bucket_counter;
+
+	return SD_RES_SUCCESS;
+}
+
+int kv_read_account(const char *account, uint32_t *nr_buckets)
+{
+	int ret;
+
+	ret = kv_get_account(account, nr_buckets);
+	if (ret != SD_RES_SUCCESS)
+		sd_err("Failed to get number of buckets in %s", account);
+	return ret;
+}
+
+int kv_update_account(const char *account)
+{
+	/* TODO: update metadata of the account */
+	return -1;
+}
+
+int kv_delete_account(const char *account)
+{
+	int ret;
+
+	ret = kv_delete_vdi(account);
+	if (ret != SD_RES_SUCCESS)
+		sd_err("Failed to delete vdi %s", account);
+
+	return ret;
+}
+
+/* Bucket operations */
+
+static int lookup_bucket(struct http_request *req, const char *bucket,
+			 uint32_t *vid)
+{
+	int ret;
+	struct vdi_info info = {};
+	struct vdi_iocb iocb = {
+		.name = bucket,
+		.data_len = strlen(bucket),
+	};
+
+	ret = vdi_lookup(&iocb, &info);
 	switch (ret) {
 	case SD_RES_SUCCESS:
-		http_response_header(req, CREATED);
+		*vid = info.vid;
 		break;
-	case SD_RES_VDI_EXIST:
-		http_response_header(req, ACCEPTED);
+	case SD_RES_NO_VDI:
+		sd_info("no such bucket %s", bucket);
+		http_response_header(req, NOT_FOUND);
 		break;
 	default:
-		sd_err("%s: bucket %s", sd_strerror(ret), bucket);
+		sd_err("Failed to find bucket %s %s", bucket, sd_strerror(ret));
 		http_response_header(req, INTERNAL_SERVER_ERROR);
-		return -1;
 	}
 
-	return 0;
+	return ret;
 }
 
-int kv_read_bucket(struct http_request *req, const char *bucket)
+/*
+ * Delete bucket(container) inode in account vdi.
+ * idx: the target hash positon of bucket
+ * Return the position of bucket_inode in sd-data-object if success
+ * Return BUCKETS_PER_SD_OBJ if bucket_inode is not found
+ * Return -1 if some errors happend
+ */
+static int delete_bucket(struct sd_inode *account_inode, uint64_t idx,
+			 const char *bucket)
 {
-	/* TODO: read metadata of the bucket */
-	return -1;
+	struct bucket_inode *bnode;
+	char *buf;
+	uint32_t vdi_id;
+	uint64_t oid;
+	uint64_t data_index = idx / BUCKETS_PER_SD_OBJ;
+	int offset = idx % BUCKETS_PER_SD_OBJ;
+	int ret, i, empty_buckets = 0, found = 0;
+
+	vdi_id = INODE_GET_VID(account_inode, data_index);
+	if (!vdi_id) {
+		sd_err("the %lu in vdi %s is not exists", data_index,
+		       account_inode->name);
+		ret = -1;
+		goto out;
+	}
+
+	oid = vid_to_data_oid(account_inode->vdi_id, data_index);
+	buf = xzalloc(SD_DATA_OBJ_SIZE);
+	ret = sd_read_object(oid, buf, SD_DATA_OBJ_SIZE, 0);
+	if (ret != SD_RES_SUCCESS) {
+		sd_err("Failed to read inode header %lx", oid);
+		ret = -1;
+		goto out;
+	}
+
+	for (i = 0; i < BUCKETS_PER_SD_OBJ; i++) {
+		char vdi_name[SD_MAX_VDI_LEN];
+		bnode = (struct bucket_inode *)
+			(buf + i * sizeof(struct bucket_inode));
+		/* count all empty buckets in this sd-data-obj */
+		if (bnode->hdr.onode_vid == 0) {
+			empty_buckets++;
+			continue;
+		}
+		if (strncmp(bnode->hdr.bucket_name, bucket, SD_MAX_BUCKET_NAME))
+			continue;
+
+		if (i < offset)
+			panic("postion of bucket inode %d is smaller than %d",
+			      i, offset);
+
+		found = i;
+		/* find the bnode */
+		bnode->hdr.onode_vid = 0;
+		snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s",
+			 account_inode->name, bucket);
+
+		ret = kv_delete_vdi(vdi_name);
+		if (ret != SD_RES_SUCCESS) {
+			sd_err("Failed to delete vdi %s", vdi_name);
+			ret = -1;
+			goto out;
+		}
+		sd_debug("delete vdi %s success", vdi_name);
+	}
+
+	if (!found) {
+		ret = BUCKETS_PER_SD_OBJ;
+		goto out;
+	}
+
+	/*
+	 * if only this bucket_inode is in the sd-data-obj,
+	 * then delete this sd-data-obj
+	 */
+	if (empty_buckets == BUCKETS_PER_SD_OBJ - 1) {
+		ret = discard_data_obj(oid);
+		if (ret != SD_RES_SUCCESS) {
+			ret = -1;
+			goto out;
+		}
+		INODE_SET_VID(account_inode, data_index, 0);
+		ret = sd_inode_write_vid(sheep_bnode_writer, account_inode,
+					 data_index, vdi_id, vdi_id, 0, false,
+					 false);
+		if (ret != SD_RES_SUCCESS) {
+			sd_err("Failed to write inode %x", vdi_id);
+			ret = -1;
+			goto out;
+		}
+		sd_debug("discard obj %lx and update vdi %x success",
+			 oid, vdi_id);
+	} else {
+		ret = sd_write_object(oid, buf, sizeof(struct bucket_inode),
+				   i * sizeof(struct bucket_inode), false);
+		if (ret != SD_RES_SUCCESS) {
+			sd_err("Failed to write object %lx", oid);
+			ret = -1;
+			goto out;
+		}
+	}
+
+	sd_debug("write object oid %lx success", oid);
+	ret = found;
+out:
+	free(buf);
+	return ret;
 }
 
-int kv_update_bucket(struct http_request *req, const char *bucket)
+/*
+ * Add bucket(container) inode into account vdi.
+ * idx: the target hash positon of bucket
+ * Return the position of bucket_inode in sd-data-object if success
+ * Return BUCKETS_PER_SD_OBJ if the data-object is full of bucket_inode
+ * Return -1 if some error happend
+ */
+static int add_bucket(struct sd_inode *account_inode, uint64_t idx,
+		      const char *bucket)
 {
-	/* TODO: update metadata of the bucket */
-	return -1;
+	struct bucket_inode *bnode;
+	char *buf;
+	uint32_t vdi_id;
+	uint64_t oid;
+	uint64_t data_index = idx / BUCKETS_PER_SD_OBJ;
+	int offset = idx % BUCKETS_PER_SD_OBJ;
+	int ret, i;
+	bool create = false;
+
+	buf = xzalloc(SD_DATA_OBJ_SIZE);
+
+	vdi_id = INODE_GET_VID(account_inode, data_index);
+	oid = vid_to_data_oid(account_inode->vdi_id, data_index);
+	sd_debug("oid %x %lx %lx", account_inode->vdi_id, data_index, oid);
+	/* the data object is exists */
+	if (vdi_id) {
+		ret = sd_read_object(oid, buf, SD_DATA_OBJ_SIZE, 0);
+		if (ret != SD_RES_SUCCESS) {
+			sd_err("Failed to read inode header %lx", oid);
+			ret = -1;
+			goto out;
+		}
+	} else
+		create = true;
+
+	sd_debug("bucket_inode offset %d %lu", offset, BUCKETS_PER_SD_OBJ);
+	for (i = offset; i < BUCKETS_PER_SD_OBJ; i++) {
+		char vdi_name[SD_MAX_VDI_LEN];
+		bnode = (struct bucket_inode *)
+			(buf + i * sizeof(struct bucket_inode));
+		if (bnode->hdr.onode_vid != 0)
+			continue;
+
+		/* the bnode not used */
+		strncpy(bnode->hdr.bucket_name, bucket, SD_MAX_BUCKET_NAME);
+		bnode->hdr.obj_count = 0;
+		bnode->hdr.bytes_used = 0;
+		snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s",
+			 account_inode->name, bucket);
+		ret = kv_create_hyper_volume(vdi_name, &(bnode->hdr.onode_vid));
+		if (ret != SD_RES_SUCCESS) {
+			sd_err("Failed to create hyper volume %d", ret);
+			ret = -1;
+			goto out;
+		}
+		sd_debug("create hyper volume %s success", vdi_name);
+		break;
+	}
+
+	if (i >= BUCKETS_PER_SD_OBJ) {
+		ret = BUCKETS_PER_SD_OBJ;
+		goto out;
+	}
+
+	/* write bnode back to account-vdi */
+	if (create)
+		ret = sd_write_object(oid, buf, SD_DATA_OBJ_SIZE, 0, create);
+	else
+		ret = sd_write_object(oid, buf, sizeof(struct bucket_inode),
+				   i * sizeof(struct bucket_inode), create);
+
+	if (ret != SD_RES_SUCCESS) {
+		sd_err("Failed to write object %lx", oid);
+		ret = -1;
+		goto out;
+	}
+
+	sd_debug("write object oid %lx success", oid);
+
+	/* update index of vdi */
+	if (create) {
+		vdi_id = account_inode->vdi_id;
+		INODE_SET_VID(account_inode, data_index, vdi_id);
+		ret = sd_inode_write_vid(sheep_bnode_writer, account_inode,
+					 data_index, vdi_id, vdi_id, 0, false,
+					 false);
+		if (ret != SD_RES_SUCCESS) {
+			sd_err("Failed to write inode %x", vdi_id);
+			ret = -1;
+			goto out;
+		}
+		sd_debug("write account inode success");
+	}
+
+	ret = i;
+out:
+	free(buf);
+	return ret;
 }
 
-/* TODO: return HTTP_CONFLICT when the bucket is not empty */
-int kv_delete_bucket(struct http_request *req, const char *bucket)
+static int kv_get_bucket(struct sd_inode *account_inode, const char *account,
+			 const char *bucket)
 {
+	char vdi_name[SD_MAX_VDI_LEN];
+	uint64_t oid;
+	uint32_t account_vid, bucket_vid;
 	int ret;
-	struct sd_req hdr;
-	char data[SD_MAX_VDI_LEN] = {0};
-	uint32_t vid;
 
-	ret = lookup_bucket(req, bucket, &vid);
-	if (ret < 0)
+	ret = lookup_vdi(account, &account_vid);
+	if (ret != SD_RES_SUCCESS) {
+		sd_err("Failed to find account %s", account);
+		return -1;
+	}
+
+	/* read account vdi out */
+	oid = vid_to_vdi_oid(account_vid);
+	ret = sd_read_object(oid, (char *)account_inode,
+			  sizeof(struct sd_inode), 0);
+	if (ret != SD_RES_SUCCESS)
 		return ret;
 
-	sd_init_req(&hdr, SD_OP_DELETE_CACHE);
-	hdr.obj.oid = vid_to_vdi_oid(vid);
+	/* find bucket vdi */
+	snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s",
+		 account_inode->name, bucket);
 
-	ret = exec_local_req(&hdr, NULL);
-	if (ret != SD_RES_SUCCESS) {
-		sd_err("failed to execute request");
-		http_response_header(req, INTERNAL_SERVER_ERROR);
+	return lookup_vdi(vdi_name, &bucket_vid);
+}
+
+int kv_create_bucket(const char *account, const char *bucket)
+{
+	struct sd_inode inode;
+	uint64_t hval, i;
+	int ret;
+
+	ret = kv_get_bucket(&inode, account, bucket);
+	/* if bucket is exists, return SD_RES_VDI_EXIST */
+	if (!ret) {
+		sd_err("bucket %s is exists.", bucket);
+		return SD_RES_VDI_EXIST;
+	} else if (ret < 0)
 		return -1;
-	}
 
-	sd_init_req(&hdr, SD_OP_DEL_VDI);
-	hdr.flags = SD_FLAG_CMD_WRITE;
-	hdr.data_length = sizeof(data);
-	pstrcpy(data, SD_MAX_VDI_LEN, bucket);
+	sd_debug("read account inode success");
 
-	ret = exec_local_req(&hdr, data);
-	if (ret == SD_RES_SUCCESS) {
-		http_response_header(req, NO_CONTENT);
-		return 0;
-	} else {
-		sd_err("%s: bucket %s", sd_strerror(ret), bucket);
-		http_response_header(req, INTERNAL_SERVER_ERROR);
+	hval = sd_hash(bucket, strlen(bucket));
+	for (i = 0; i < MAX_BUCKETS; i++) {
+		uint64_t idx = (hval + i) % MAX_BUCKETS;
+		ret = add_bucket(&inode, idx, bucket);
+		/* data-object is full */
+		if (ret == BUCKETS_PER_SD_OBJ) {
+			i += BUCKETS_PER_SD_OBJ;
+			continue;
+		} else if (ret < 0) {
+			sd_err("Failed to add bucket");
+			return ret;
+		}
+		/* add bucket success */
+		sd_debug("add bucket success");
+		break;
+	}
+
+	if (i >= MAX_BUCKETS) {
+		sd_err("Containers in vdi %s is full!", account);
 		return -1;
 	}
+	return 0;
 }
 
-int kv_list_buckets(struct http_request *req,
-		    void (*cb)(struct http_request *req, const char *bucket,
-			       void *opaque),
-		    void *opaque)
+int kv_read_bucket(const char *account, const char *bucket)
 {
-	char buf[SD_INODE_HEADER_SIZE];
-	struct sd_inode *inode = (struct sd_inode *)buf;
-	unsigned long nr;
+	/* TODO: read metadata of the bucket */
+	return -1;
+}
 
-	http_response_header(req, OK);
+int kv_update_bucket(const char *account, const char *bucket)
+{
+	/* TODO: update metadata of the bucket */
+	return -1;
+}
 
-	FOR_EACH_VDI(nr, sys->vdi_inuse) {
-		uint64_t oid;
-		int ret;
+/* return SD_RES_NO_VDI if bucket is not existss */
+int kv_delete_bucket(const char *account, const char *bucket)
+{
+	struct sd_inode inode;
+	uint64_t hval, i;
+	int ret;
 
-		oid = vid_to_vdi_oid(nr);
+	ret = kv_get_bucket(&inode, account, bucket);
+	if (ret) {
+		sd_err("Failed to get bucket");
+		return ret;
+	}
 
-		ret = sd_read_object(oid, (char *)inode, SD_INODE_HEADER_SIZE,
-				     0);
-		if (ret != SD_RES_SUCCESS) {
-			sd_err("Failed to read inode header");
+	hval = sd_hash(bucket, strlen(bucket));
+	for (i = 0; i < MAX_BUCKETS; i++) {
+		uint64_t idx = (hval + i) % MAX_BUCKETS;
+		ret = delete_bucket(&inode, idx, bucket);
+		if (ret == BUCKETS_PER_SD_OBJ) {
+			i += BUCKETS_PER_SD_OBJ;
 			continue;
+		} else if (ret < 0) {
+			sd_err("Failed to delete bucket %d", ret);
+			return ret;
 		}
+		/* delete bucket success */
+		sd_debug("delete bucket success");
+		break;
+	}
 
-		if (inode->name[0] == '\0') /* this VDI has been deleted */
-			continue;
+	if (i >= MAX_BUCKETS) {
+		sd_err("Can't find bucket %s", bucket);
+		return SD_RES_NO_VDI;
+	}
+	return SD_RES_SUCCESS;
+}
+
+int kv_list_buckets(struct http_request *req, const char *account, list_cb cb,
+		    void *opaque)
+{
+	struct sd_inode account_inode;
+	uint32_t account_vid;
+	uint64_t oid;
+	int ret;
 
-		if (!vdi_is_snapshot(inode))
-			cb(req, inode->name, opaque);
+	ret = lookup_vdi(account, &account_vid);
+	if (ret != SD_RES_SUCCESS) {
+		sd_err("Failed to find account %s", account);
+		return ret;
 	}
 
-	return 0;
+	/* read account vdi out */
+	oid = vid_to_vdi_oid(account_vid);
+	ret = sd_read_object(oid, (char *)&account_inode,
+			  sizeof(struct sd_inode), 0);
+	if (ret != SD_RES_SUCCESS) {
+		sd_err("Failed to read account inode header %lx", oid);
+		return ret;
+	}
+
+	struct list_buckets_arg arg = {req, opaque, cb, 0};
+	traverse_btree(sheep_bnode_reader, &account_inode,
+		       list_buckets_cb, &arg);
+	return SD_RES_SUCCESS;
 }
 
 /* Object operations */
diff --git a/sheep/http/kv.h b/sheep/http/kv.h
index f0b09fe..1774a36 100644
--- a/sheep/http/kv.h
+++ b/sheep/http/kv.h
@@ -14,15 +14,25 @@
 
 #include "http.h"
 
-#define SD_MAX_BUCKET_NAME 1024
+#define SD_MAX_BUCKET_NAME 64
 #define SD_MAX_OBJECT_NAME 1024
 
+/* Account operations */
+int kv_create_account(const char *account);
+int kv_read_account(const char *account, uint32_t *nr_buckets);
+int kv_update_account(const char *account);
+int kv_delete_account(const char *account);
+int kv_list_accounts(struct http_request *req,
+		    void (*cb)(struct http_request *req, const char *account,
+			       void *opaque),
+		    void *opaque);
+
 /* Bucket operations */
-int kv_create_bucket(struct http_request *req, const char *bucket);
-int kv_read_bucket(struct http_request *req, const char *bucket);
-int kv_update_bucket(struct http_request *req, const char *bucket);
-int kv_delete_bucket(struct http_request *req, const char *bucket);
-int kv_list_buckets(struct http_request *req,
+int kv_create_bucket(const char *account, const char *bucket);
+int kv_read_bucket(const char *account, const char *bucket);
+int kv_update_bucket(const char *account, const char *bucket);
+int kv_delete_bucket(const char *account, const char *bucket);
+int kv_list_buckets(struct http_request *req, const char *account,
 		    void (*cb)(struct http_request *req, const char *bucket,
 			       void *opaque),
 		    void *opaque);
diff --git a/sheep/http/s3.c b/sheep/http/s3.c
index ca2efe3..8142bb5 100644
--- a/sheep/http/s3.c
+++ b/sheep/http/s3.c
@@ -58,7 +58,7 @@ static void s3_get_service(struct http_request *req)
 {
 	bool print_header = true;
 
-	kv_list_buckets(req, s3_get_service_cb, &print_header);
+	kv_list_buckets(req, "s3", s3_get_service_cb, &print_header);
 
 	http_request_writes(req, "</Buckets></ListAllMyBucketsResult>\r\n");
 }
@@ -125,7 +125,7 @@ static void s3_get_bucket(struct http_request *req, const char *bucket)
 
 static void s3_put_bucket(struct http_request *req, const char *bucket)
 {
-	kv_create_bucket(req, bucket);
+	kv_create_bucket("s3", bucket);
 
 	if (req->status == ACCEPTED)
 		s3_write_err_response(req, "BucketAlreadyExists",
@@ -139,7 +139,7 @@ static void s3_post_bucket(struct http_request *req, const char *bucket)
 
 static void s3_delete_bucket(struct http_request *req, const char *bucket)
 {
-	kv_delete_bucket(req, bucket);
+	kv_delete_bucket("s3", bucket);
 
 	switch (req->status) {
 	case NOT_FOUND:
diff --git a/sheep/http/swift.c b/sheep/http/swift.c
index 14f5ae5..8b5024d 100644
--- a/sheep/http/swift.c
+++ b/sheep/http/swift.c
@@ -21,6 +21,10 @@ static int dfd;
 static char content[4096];
 static char tmp[4096];
 
+#define HTTP_REMOVE_ACCOUNT "HTTP_X_REMOVE_ACCOUNT_META_BOOK"
+
+static void swift_delete_account(struct http_request *req, const char *account);
+
 static void make_bucket_path(char *bucket, size_t size, const char *account,
 			     const char *container)
 {
@@ -33,26 +37,37 @@ static void make_bucket_path(char *bucket, size_t size, const char *account,
 
 static void swift_head_account(struct http_request *req, const char *account)
 {
-	http_response_header(req, NOT_IMPLEMENTED);
+	uint32_t nr_buckets;
+	int ret;
+
+	ret = kv_read_account(account, &nr_buckets);
+	if (ret)
+		http_response_header(req, UNAUTHORIZED);
+	else {
+		http_request_writef(req, "X-Account-Container-Count: %u\n",
+				    nr_buckets);
+		http_response_header(req, NO_CONTENT);
+	}
 }
 
 static void swift_get_account_cb(struct http_request *req, const char *bucket,
 				 void *opaque)
 {
-	const char *account = opaque;
-	char *args[2] = {};
+	struct strbuf *buf = (struct strbuf *)opaque;
 
-	split_path(bucket, ARRAY_SIZE(args), args);
-
-	if (args[1] != NULL && strcmp(args[0], account) == 0) {
-		http_request_writes(req, args[1]);
-		http_request_writes(req, "\n");
-	}
+	if (bucket)
+		strbuf_addf(buf, "%s\n", bucket);
 }
 
 static void swift_get_account(struct http_request *req, const char *account)
 {
-	kv_list_buckets(req, swift_get_account_cb, (void *)account);
+	struct strbuf buf = STRBUF_INIT;
+
+	kv_list_buckets(req, account, swift_get_account_cb, (void *)&buf);
+	req->data_length = buf.len;
+	http_response_header(req, OK);
+	http_request_write(req, buf.buf, buf.len);
+	strbuf_release(&buf);
 }
 
 static void swift_put_account(struct http_request *req, const char *account)
@@ -62,24 +77,50 @@ static void swift_put_account(struct http_request *req, const char *account)
 
 static void swift_post_account(struct http_request *req, const char *account)
 {
-	http_response_header(req, NOT_IMPLEMENTED);
+	char *p;
+	int ret;
+
+	for (int i = 0; (p = req->fcgx.envp[i]); ++i) {
+		/* delete account */
+		if (!strncmp(p, HTTP_REMOVE_ACCOUNT,
+			     strlen(HTTP_REMOVE_ACCOUNT))) {
+			swift_delete_account(req, account);
+			return;
+		}
+	}
+	/* create account */
+	ret = kv_create_account(account);
+	if (ret == SD_RES_SUCCESS)
+		http_response_header(req, CREATED);
+	else if (ret == SD_RES_VDI_EXIST)
+		http_response_header(req, ACCEPTED);
+	else
+		http_response_header(req, INTERNAL_SERVER_ERROR);
 }
 
-static void swift_delete_account_cb(struct http_request *req,
-				    const char *bucket, void *opaque)
+static void swift_delete_account(struct http_request *req, const char *account)
 {
-	const char *account = opaque;
-	char *args[2] = {};
+	uint32_t nr_buckets;
+	int ret;
 
-	split_path(bucket, ARRAY_SIZE(args), args);
+	ret = kv_read_account(account, &nr_buckets);
+	if (ret) {
+		http_response_header(req, INTERNAL_SERVER_ERROR);
+		return;
+	}
 
-	if (args[1] != NULL && strcmp(args[0], account) == 0)
-		kv_delete_bucket(req, bucket);
-}
+	if (nr_buckets) {
+		/* return HTTP_CONFLICT when the account is not empty */
+		http_response_header(req, CONFLICT);
+		return;
+	}
 
-static void swift_delete_account(struct http_request *req, const char *account)
-{
-	kv_list_buckets(req, swift_delete_account_cb, (void *)account);
+	ret = kv_delete_account(account);
+	if (ret) {
+		http_response_header(req, INTERNAL_SERVER_ERROR);
+		return;
+	}
+	http_response_header(req, OK);
 }
 
 /* Operations on Containers */
@@ -109,10 +150,14 @@ static void swift_get_container(struct http_request *req, const char *account,
 static void swift_put_container(struct http_request *req, const char *account,
 				const char *container)
 {
-	char bucket[SD_MAX_BUCKET_NAME];
-
-	make_bucket_path(bucket, sizeof(bucket), account, container);
-	kv_create_bucket(req, bucket);
+	int ret;
+	ret = kv_create_bucket(account, container);
+	if (ret == SD_RES_SUCCESS)
+		http_response_header(req, CREATED);
+	else if (ret == SD_RES_VDI_EXIST)
+		http_response_header(req, ACCEPTED);
+	else
+		http_response_header(req, INTERNAL_SERVER_ERROR);
 }
 
 static void swift_post_container(struct http_request *req, const char *account,
@@ -124,10 +169,12 @@ static void swift_post_container(struct http_request *req, const char *account,
 static void swift_delete_container(struct http_request *req,
 				   const char *account, const char *container)
 {
-	char bucket[SD_MAX_BUCKET_NAME];
-
-	make_bucket_path(bucket, sizeof(bucket), account, container);
-	kv_delete_bucket(req, bucket);
+	int ret;
+	ret = kv_delete_bucket(account, container);
+	if (ret == SD_RES_NO_VDI)
+		http_response_header(req, NOT_FOUND);
+	else
+		http_response_header(req, NO_CONTENT);
 }
 
 /* Operations on Objects */
-- 
1.7.12.4




More information about the sheepdog mailing list