[sheepdog] [PATCH] sheep/http: unify onode lookup/find operation

Liu Yuan namei.unix at gmail.com
Wed Dec 25 06:21:51 CET 2013


On Wed, Dec 25, 2013 at 01:16:05PM +0800, Liu Yuan wrote:
> On Wed, Dec 25, 2013 at 01:12:26PM +0800, Liu Yuan wrote:
> > On Wed, Dec 25, 2013 at 01:11:13PM +0800, Robin Dong wrote:
> > > 2013/12/24 Liu Yuan <namei.unix at gmail.com>
> > > 
> > > > Previously we discard the onode object, so can't check if object exists by
> > > > loop
> > > > around the adjacent objects one by one becaue there is might be hole
> > > > between
> > > > collisioned hash chains. For this reason, we have to iterate all the
> > > > objects
> > > > in a container to check if some one is in. This is obviously:
> > > >
> > > >  - time consuming because CREATE/DELETE/GET operations will firstly call
> > > >    onode_find() to iterate the container.
> > > >  - can't scale at all. Every object operation will read the whole onodes
> > > > in the
> > > >    bucket.
> > > >
> > > > This patch reworks the onode lookup operation:
> > > >
> > > > - We check adjacent objects one by one once we get a start index by hashing
> > > >   name. Unallocated slot marks the end of the check window.
> > > >
> > > > For e.g, if we are going to check if fish in the following bucket, assume
> > > > fish hashes to 'sheep', so we compare the name one by one from 'sheep' to
> > > > 'fish'. '\0' indicates that object was deleted before checking.
> > > >
> > > > [ sheep, dog, wolve, '\0', fish, {unallocated}, tiger, ]
> > > >
> > > > With this lookup mechanism, we can safely remove onode_find(). With this
> > > > patch
> > > > we can observe a huge time reduction even in tests/func/081, 082:
> > > >
> > > > before:
> > > > 081 Last Used:20s.  Test http service with a single server
> > > > 082 Last Used:21s.  Test http service with multiple servers
> > > >
> > > > after:
> > > > 081 Last Used:11s.  Test http service with a single server
> > > > 082 Last Used:12s.  Test http service with multiple servers
> > > >
> > > > Signed-off-by: Liu Yuan <namei.unix at gmail.com>
> > > > ---
> > > >  sheep/http/kv.c          | 274
> > > > +++++++++++++++++++++++++++--------------------
> > > >  tests/functional/081.out |   4 +-
> > > >  tests/functional/082.out |   4 +-
> > > >  3 files changed, 160 insertions(+), 122 deletions(-)
> > > >
> > > > diff --git a/sheep/http/kv.c b/sheep/http/kv.c
> > > > index 5843bb3..c0b3c4f 100644
> > > > --- a/sheep/http/kv.c
> > > > +++ b/sheep/http/kv.c
> > > > @@ -43,7 +43,7 @@ struct kv_onode {
> > > >                         uint8_t inlined;
> > > >                 };
> > > >
> > > > -               uint8_t __pad[BLOCK_SIZE];
> > > > +               uint8_t pad[BLOCK_SIZE];
> > > >         };
> > > >         union {
> > > >                 uint8_t data[SD_DATA_OBJ_SIZE - BLOCK_SIZE];
> > > > @@ -88,69 +88,6 @@ static int kv_create_hyper_volume(const char *name,
> > > > uint32_t *vdi_id)
> > > >         return ret;
> > > >  }
> > > >
> > > > -/*
> > > > - * Find an free object index by hash of name in the vid and create an
> > > > object
> > > > - * that holds the kv node{kv_bnode, kv_onode}.
> > > > - */
> > > > -#define kv_generic_object_create(node, vid, node_do_create)            \
> > > > -({                                                                     \
> > > > -       struct sd_inode *__inode = xmalloc(sizeof(struct sd_inode));    \
> > > > -       uint32_t __tmp_vid, __idx, __i;                                 \
> > > > -       uint64_t __hval;                                                \
> > > > -       int __ret;                                                      \
> > > > -                                                                       \
> > > > -       __ret = sd_read_object(vid_to_vdi_oid(vid), (char *)__inode,    \
> > > > -                              sizeof(*__inode), 0);                    \
> > > > -       if (__ret != SD_RES_SUCCESS) {                                  \
> > > > -               sd_err("failed to read %" PRIx32 " %s", vid,            \
> > > > -                      sd_strerror(__ret));                             \
> > > > -               goto out;                                               \
> > > > -       }                                                               \
> > > > -                                                                       \
> > > > -       __hval = sd_hash(node->name, strlen(node->name));               \
> > > > -       for (__i = 0; __i < MAX_DATA_OBJS; __i++) {                     \
> > > > -               __idx = (__hval + __i) % MAX_DATA_OBJS;                 \
> > > > -               __tmp_vid = INODE_GET_VID(__inode, __idx);              \
> > > > -               if (__tmp_vid)                                          \
> > > > -                       continue;                                       \
> > > > -               else                                                    \
> > > > -                       break;                                          \
> > > > -       }                                                               \
> > > > -       if (__i == MAX_DATA_OBJS) {                                     \
> > > > -               __ret = SD_RES_NO_SPACE;                                \
> > > > -               goto out;                                               \
> > > > -       }                                                               \
> > > > -       __ret = node_do_create(node, __inode, __idx);                   \
> > > > -out:                                                                   \
> > > > -       free(__inode);                                                  \
> > > > -       __ret;                                                          \
> > > > -})
> > > > -
> > > > -/* Find the object in the vid which holds the 'node' that matches 'name'
> > > > */
> > > > -#define kv_generic_object_lookup(node, vid, name)                      \
> > > > -({                                                                     \
> > > > -       uint64_t __hval;                                                \
> > > > -       uint32_t __i;                                                   \
> > > > -       int __ret;                                                      \
> > > > -                                                                       \
> > > > -       __hval = sd_hash(name, strlen(name));                           \
> > > > -       for (__i = 0; __i < MAX_DATA_OBJS; __i++) {                     \
> > > > -               uint32_t __idx = (__hval + __i) % MAX_DATA_OBJS;        \
> > > > -               uint64_t __oid = vid_to_data_oid(vid, __idx);           \
> > > > -                                                                       \
> > > > -               __ret = sd_read_object(__oid, (char *)node, sizeof(*node),
> > > > 0); \
> > > > -               if (__ret != SD_RES_SUCCESS)                            \
> > > > -                       goto out;                                       \
> > > > -               if (strcmp(node->name, name) == 0)                      \
> > > > -                       break;                                          \
> > > > -       }                                                               \
> > > > -                                                                       \
> > > > -       if (__i == MAX_DATA_OBJS)                                       \
> > > > -               __ret = SD_RES_NO_OBJ;                                  \
> > > > -out:                                                                   \
> > > > -       __ret;                                                          \
> > > > -})
> > > > -
> > > >  /* Account operations */
> > > >
> > > >  /*
> > > > @@ -320,7 +257,36 @@ out:
> > > >
> > > >  static int bnode_create(struct kv_bnode *bnode, uint32_t account_vid)
> > > >  {
> > > > -       return kv_generic_object_create(bnode, account_vid,
> > > > bnode_do_create);
> > > > +       struct sd_inode *inode = xmalloc(sizeof(struct sd_inode));
> > > > +       uint32_t tmp_vid, idx, i;
> > > > +       uint64_t hval;
> > > > +       int ret;
> > > > +
> > > > +       ret = sd_read_object(vid_to_vdi_oid(account_vid), (char *)inode,
> > > > +                              sizeof(*inode), 0);
> > > > +       if (ret != SD_RES_SUCCESS) {
> > > > +               sd_err("failed to read %" PRIx32 " %s", account_vid,
> > > > +                      sd_strerror(ret));
> > > > +               goto out;
> > > > +       }
> > > > +
> > > > +       hval = sd_hash(bnode->name, strlen(bnode->name));
> > > > +       for (i = 0; i < MAX_DATA_OBJS; i++) {
> > > >
> > > 
> > > MAX_DATA_OBJS is (2**32) and 'i' is type of uint32_t, so 'i' will always be
> > > smaller than MAX_DATA_OBJS.
> > > If all the oid in this vdi is not empty, we will fall into dead-loop.
> > > So I think type of 'i' should be 'uint64_t'.
> > 
> > Oops, good catch. I'll rework it on patch v2
> 
> For a second thought, MAX_DATA_OBJS might be max of uint32_t, so your comment
> doesn't hold true, no?
> 

Okay, you are right :)

Thanks
Yuan



More information about the sheepdog mailing list