<div dir="ltr">Reviewed-by: Robin Dong <<a href="mailto:sanbai@taobao.com">sanbai@taobao.com</a>></div><div class="gmail_extra"><br><br><div class="gmail_quote">2013/11/27 Liu Yuan <span dir="ltr"><<a href="mailto:namei.unix@gmail.com" target="_blank">namei.unix@gmail.com</a>></span><br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">To support object larger than 4M, we have to introduce extent like structure<br>
to map the data indexes to data holders(sheepdog object). So basically, we use<br>
one object index node(onode) to hold the metadata and extents.<br>
<br>
user object -> onode[metadata, extent1, ............., extentN]<br>
| |<br>
V |<br>
+-----------------------------------+ |<br>
| obj1 | obj2 | .............| objN | |<br>
+-----------------------------------+ |<br>
V<br>
+-----------------------------------+<br>
| obj1` | obj2` |...........| objN` |<br>
+-----------------------------------+<br>
<br>
For user object size smaller than 4M, we just inline it to the onode<br>
<br>
user object -> onode[metadata, user-data].<br>
<br>
kv_create_extented_object() is left for later patch set.<br>
<br>
For object name to onode mapping, we make use of old hash approach, that is<br>
<br>
- hash(objec_name) --> vdi[hash_value] -> onode<br>
<br>
Signed-off-by: Liu Yuan <<a href="mailto:namei.unix@gmail.com">namei.unix@gmail.com</a>><br>
---<br>
include/internal_proto.h | 1 +<br>
sheep/http/kv.c | 183 ++++++++++++++++++++++++++++------------------<br>
2 files changed, 113 insertions(+), 71 deletions(-)<br>
<br>
diff --git a/include/internal_proto.h b/include/internal_proto.h<br>
index e5e0f05..70a7b5d 100644<br>
--- a/include/internal_proto.h<br>
+++ b/include/internal_proto.h<br>
@@ -124,6 +124,7 @@<br>
#define SD_RES_AGAIN 0x8F /* Ask to try again */<br>
#define SD_RES_STALE_OBJ 0x90 /* Object may be stale */<br>
#define SD_RES_CLUSTER_ERROR 0x91 /* Cluster driver error */<br>
+#define SD_RES_OBJ_TAKEN 0x92 /* Object ID is taken up */<br>
<br>
enum sd_status {<br>
SD_STATUS_OK = 1,<br>
diff --git a/sheep/http/kv.c b/sheep/http/kv.c<br>
index d30a6a1..c04e629 100644<br>
--- a/sheep/http/kv.c<br>
+++ b/sheep/http/kv.c<br>
@@ -165,43 +165,100 @@ int kv_list_buckets(struct http_request *req,<br>
<br>
/* Object operations */<br>
<br>
-/* 4 KB header of kv object */<br>
-struct kv_object_hdr {<br>
+/* 4 KB header of kv object index node */<br>
+struct kv_onode_hdr {<br>
union {<br>
struct {<br>
char name[SD_MAX_OBJECT_NAME];<br>
+ /* a hash value for etag */<br>
+ uint8_t sha1[round_up(SHA1_DIGEST_SIZE, 8)];<br>
uint64_t size;<br>
uint64_t ctime;<br>
uint64_t mtime;<br>
-<br>
- /* the index of the multi parted object */<br>
- uint64_t segment;<br>
-<br>
- /* a hash value for etag */<br>
- uint8_t sha1[round_up(SHA1_DIGEST_SIZE, 8)];<br>
+ uint32_t data_vid;<br>
+ uint32_t nr_extent;<br>
+ uint8_t inlined;<br>
+ uint8_t pad[5];<br>
};<br>
<br>
uint8_t __pad[BLOCK_SIZE];<br>
};<br>
};<br>
<br>
-struct kv_object {<br>
- struct kv_object_hdr hdr;<br>
- uint8_t data[SD_DATA_OBJ_SIZE - sizeof(struct kv_object_hdr)];<br>
+struct onode_extent {<br>
+ uint32_t vdi;<br>
+ uint32_t pad;<br>
+ uint64_t start;<br>
+ uint64_t count;<br>
};<br>
<br>
+struct kv_onode {<br>
+ struct kv_onode_hdr hdr;<br>
+ union {<br>
+ uint8_t data[SD_DATA_OBJ_SIZE - sizeof(struct kv_onode_hdr)];<br>
+ struct onode_extent *o_extent;<br>
+ };<br>
+};<br>
+<br>
+#define KV_ONODE_INLINE_SIZE (SD_DATA_OBJ_SIZE - sizeof(struct kv_onode_hdr))<br>
+<br>
+static int kv_create_inlined_object(struct sd_inode *inode,<br>
+ struct kv_onode *onode,<br>
+ uint32_t vid, uint32_t idx,<br>
+ bool overwrite)<br>
+{<br>
+ uint64_t oid = vid_to_data_oid(vid, idx);<br>
+ int ret;<br>
+<br>
+ if (overwrite) {<br>
+ sd_info("overwrite object %s", onode-><a href="http://hdr.name" target="_blank">hdr.name</a>);<br>
+ ret = write_object(oid, (char *)onode,<br>
+ sizeof(onode->hdr) + onode->hdr.size,<br>
+ 0, false);<br>
+ if (ret != SD_RES_SUCCESS) {<br>
+ sd_err("failed to write object, %" PRIx64, oid);<br>
+ goto out;<br>
+ }<br>
+ } else {<br>
+ ret = write_object(oid, (char *)onode,<br>
+ sizeof(onode->hdr) + onode->hdr.size,<br>
+ 0, true);<br>
+ if (ret != SD_RES_SUCCESS) {<br>
+ sd_err("failed to create object, %" PRIx64, oid);<br>
+ goto out;<br>
+ }<br>
+ INODE_SET_VID(inode, idx, vid);<br>
+ ret = sd_inode_write_vid(sheep_bnode_writer, inode, idx,<br>
+ vid, vid, 0, false, false);<br>
+ if (ret != SD_RES_SUCCESS) {<br>
+ sd_err("failed to update inode, %" PRIx64,<br>
+ vid_to_vdi_oid(vid));<br>
+ goto out;<br>
+ }<br>
+ }<br>
+out:<br>
+ return ret;<br>
+}<br>
+<br>
+static int kv_create_extented_object(struct sd_inode *inode,<br>
+ struct kv_onode *onode,<br>
+ uint32_t vid, uint32_t idx)<br>
+{<br>
+ return SD_RES_SUCCESS;<br>
+}<br>
+<br>
/*<br>
* Create the object if the index isn't taken. Overwrite the object if it exists<br>
- * Return 0 if the index is taken by other object.<br>
+ * Return SD_RES_OBJ_TAKEN if the index is taken by other object.<br>
*/<br>
-static int do_kv_create_object(struct http_request *req, const char *obj_name,<br>
- struct kv_object *obj, uint32_t vid,<br>
- uint32_t idx)<br>
+static int do_kv_create_object(struct http_request *req,<br>
+ struct kv_onode *onode,<br>
+ uint32_t vid, uint32_t idx)<br>
{<br>
+ struct sd_inode *inode = xmalloc(sizeof(struct sd_inode));<br>
uint64_t oid = vid_to_data_oid(vid, idx);<br>
+ struct kv_onode_hdr hdr;<br>
uint32_t tmp_vid;<br>
- struct kv_object_hdr hdr;<br>
- struct sd_inode *inode = xmalloc(sizeof(struct sd_inode));<br>
int ret;<br>
<br>
ret = read_object(vid_to_vdi_oid(vid), (char *)inode,<br>
@@ -209,60 +266,37 @@ static int do_kv_create_object(struct http_request *req, const char *obj_name,<br>
if (ret != SD_RES_SUCCESS) {<br>
sd_err("failed to read inode, %" PRIx64,<br>
vid_to_vdi_oid(vid));<br>
- goto err;<br>
+ goto out;<br>
}<br>
tmp_vid = INODE_GET_VID(inode, idx);<br>
if (tmp_vid) {<br>
ret = read_object(oid, (char *)&hdr, sizeof(hdr), 0);<br>
if (ret != SD_RES_SUCCESS) {<br>
sd_err("failed to read object, %" PRIx64, oid);<br>
- goto err;<br>
+ goto out;<br>
}<br>
<br>
if (<a href="http://hdr.name" target="_blank">hdr.name</a>[0] != '\0' &&<br>
- strcmp(<a href="http://hdr.name" target="_blank">hdr.name</a>, obj-><a href="http://hdr.name" target="_blank">hdr.name</a>) != 0){<br>
+ strcmp(<a href="http://hdr.name" target="_blank">hdr.name</a>, onode-><a href="http://hdr.name" target="_blank">hdr.name</a>) != 0) {<br>
sd_debug("index %d is already used", idx);<br>
+ ret = SD_RES_OBJ_TAKEN;<br>
goto out;<br>
}<br>
- sd_info("overwrite object %s", obj_name);<br>
- ret = write_object(oid, (char *)obj,<br>
- sizeof(obj->hdr) + obj->hdr.size,<br>
- 0, false);<br>
- if (ret != SD_RES_SUCCESS) {<br>
- sd_err("failed to write object, %" PRIx64, oid);<br>
- goto err;<br>
- }<br>
- } else {<br>
- ret = write_object(oid, (char *)obj,<br>
- sizeof(obj->hdr) + obj->hdr.size,<br>
- 0, true);<br>
- if (ret != SD_RES_SUCCESS) {<br>
- sd_err("failed to create object, %" PRIx64, oid);<br>
- goto err;<br>
- }<br>
- INODE_SET_VID(inode, idx, vid);<br>
- ret = sd_inode_write_vid(sheep_bnode_writer, inode, idx,<br>
- vid, vid, 0, false, false);<br>
- if (ret != SD_RES_SUCCESS) {<br>
- sd_err("failed to update inode, %" PRIx64,<br>
- vid_to_vdi_oid(vid));<br>
- goto err;<br>
- }<br>
}<br>
- http_response_header(req, CREATED);<br>
+ if (onode->hdr.inlined)<br>
+ ret = kv_create_inlined_object(inode, onode, vid, idx,<br>
+ !!tmp_vid);<br>
+ else<br>
+ ret = kv_create_extented_object(inode, onode, vid, idx);<br>
out:<br>
free(inode);<br>
- return 0;<br>
-err:<br>
- http_response_header(req, INTERNAL_SERVER_ERROR);<br>
- free(inode);<br>
- return -1;<br>
+ return ret;<br>
}<br>
<br>
int kv_create_object(struct http_request *req, const char *bucket,<br>
- const char *object)<br>
+ const char *name)<br>
{<br>
- struct kv_object *obj;<br>
+ struct kv_onode *onode;<br>
ssize_t size;<br>
int ret;<br>
uint64_t hval;<br>
@@ -273,44 +307,51 @@ int kv_create_object(struct http_request *req, const char *bucket,<br>
if (ret < 0)<br>
return ret;<br>
<br>
- obj = xzalloc(sizeof(*obj));<br>
+ onode = xzalloc(sizeof(*onode));<br>
<br>
gettimeofday(&tv, NULL);<br>
- pstrcpy(obj-><a href="http://hdr.name" target="_blank">hdr.name</a>, sizeof(obj-><a href="http://hdr.name" target="_blank">hdr.name</a>), object);<br>
- obj->hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000;<br>
- obj->hdr.mtime = obj->hdr.ctime;<br>
+ pstrcpy(onode-><a href="http://hdr.name" target="_blank">hdr.name</a>, sizeof(onode-><a href="http://hdr.name" target="_blank">hdr.name</a>), name);<br>
+ onode->hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000;<br>
+ onode->hdr.mtime = onode->hdr.ctime;<br>
<br>
- /* TODO: support multi parted object for large object */<br>
- size = http_request_read(req, obj->data, sizeof(obj->data));<br>
+ size = http_request_read(req, onode->data, sizeof(onode->data));<br>
if (size < 0) {<br>
sd_err("%s: bucket %s, object %s", sd_strerror(ret),<br>
- bucket, object);<br>
+ bucket, name);<br>
http_response_header(req, INTERNAL_SERVER_ERROR);<br>
return -1;<br>
}<br>
<br>
- obj->hdr.size = size;<br>
-<br>
- hval = sd_hash(object, strlen(object));<br>
+ onode->hdr.size = size;<br>
+ if (size <= KV_ONODE_INLINE_SIZE)<br>
+ onode->hdr.inlined = 1;<br>
+ hval = sd_hash(name, strlen(name));<br>
for (int i = 0; i < MAX_DATA_OBJS; i++) {<br>
uint32_t idx = (hval + i) % MAX_DATA_OBJS;<br>
<br>
- do_kv_create_object(req, object, obj, vid, idx);<br>
- if (req->status != UNKNOWN) {<br>
- free(obj);<br>
+ ret = do_kv_create_object(req, onode, vid, idx);<br>
+ switch (ret) {<br>
+ case SD_RES_SUCCESS:<br>
+ http_response_header(req, CREATED);<br>
+ free(onode);<br>
return 0;<br>
+ case SD_RES_OBJ_TAKEN:<br>
+ break;<br>
+ default:<br>
+ http_response_header(req, INTERNAL_SERVER_ERROR);<br>
+ free(onode);<br>
+ return -1;<br>
}<br>
}<br>
<br>
- free(obj);<br>
-<br>
/* no free space to create a object */<br>
http_response_header(req, SERVICE_UNAVAILABLE);<br>
+ free(onode);<br>
return -1;<br>
}<br>
<br>
static int do_kv_read_object(struct http_request *req, const char *obj_name,<br>
- struct kv_object *obj, uint32_t vid, uint32_t idx)<br>
+ struct kv_onode *obj, uint32_t vid, uint32_t idx)<br>
{<br>
uint64_t oid = vid_to_data_oid(vid, idx);<br>
int ret;<br>
@@ -342,7 +383,7 @@ static int do_kv_read_object(struct http_request *req, const char *obj_name,<br>
int kv_read_object(struct http_request *req, const char *bucket,<br>
const char *object)<br>
{<br>
- struct kv_object *obj;<br>
+ struct kv_onode *obj;<br>
int ret;<br>
uint64_t hval;<br>
uint32_t vid;<br>
@@ -371,7 +412,7 @@ int kv_read_object(struct http_request *req, const char *bucket,<br>
}<br>
<br>
static int do_kv_update_object(struct http_request *req, const char *obj_name,<br>
- struct kv_object *obj, uint32_t vid,<br>
+ struct kv_onode *obj, uint32_t vid,<br>
uint32_t idx, size_t size)<br>
{<br>
uint64_t oid = vid_to_data_oid(vid, idx);<br>
@@ -415,7 +456,7 @@ static int do_kv_update_object(struct http_request *req, const char *obj_name,<br>
int kv_update_object(struct http_request *req, const char *bucket,<br>
const char *object)<br>
{<br>
- struct kv_object *obj;<br>
+ struct kv_onode *obj;<br>
int ret;<br>
uint64_t hval;<br>
uint32_t vid;<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.7.9.5<br>
<br>
--<br>
sheepdog mailing list<br>
<a href="mailto:sheepdog@lists.wpkg.org">sheepdog@lists.wpkg.org</a><br>
<a href="http://lists.wpkg.org/mailman/listinfo/sheepdog" target="_blank">http://lists.wpkg.org/mailman/listinfo/sheepdog</a><br>
</font></span></blockquote></div><br><br clear="all"><div><br></div>-- <br>--<br>Best Regard<br>Robin Dong
</div>