[sheepdog] [PATCH 5/6] sheep: refactor vdi lookup, create, delete operation
Liu Yuan
namei.unix at gmail.com
Mon Apr 22 17:34:14 CEST 2013
From: Liu Yuan <tailai.ly at taobao.com>
This patch set only refactor vdi code of the sheep side, so try to maintain
the same behavior as old code and (should) passes all the tests regarding
VDI operation.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/ops.c | 75 ++++++++-----
sheep/sheep_priv.h | 15 ++-
sheep/vdi.c | 308 +++++++++++++++++++++++++++-------------------------
3 files changed, 216 insertions(+), 182 deletions(-)
diff --git a/sheep/ops.c b/sheep/ops.c
index 3e0a9d3..5526ca8 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -92,21 +92,21 @@ static int cluster_new_vdi(struct request *req)
{
const struct sd_req *hdr = &req->rq;
struct sd_rsp *rsp = &req->rp;
- uint32_t vid = 0;
- struct vdi_iocb iocb;
+ uint32_t vid;
+ struct vdi_iocb iocb = {};
int ret;
+ if (hdr->data_length != SD_MAX_VDI_LEN)
+ return SD_RES_INVALID_PARMS;
+
iocb.name = req->data;
iocb.data_len = hdr->data_length;
iocb.size = hdr->vdi.vdi_size;
iocb.base_vid = hdr->vdi.base_vdi_id;
iocb.create_snapshot = !!hdr->vdi.snapid;
- iocb.nr_copies = hdr->vdi.copies;
+ iocb.nr_copies = hdr->vdi.copies ? hdr->vdi.copies : sys->nr_copies;
- if (!iocb.nr_copies)
- iocb.nr_copies = sys->nr_copies;
-
- ret = add_vdi(&iocb, &vid);
+ ret = vdi_create(&iocb, &vid);
rsp->vdi.vdi_id = vid;
rsp->vdi.copies = iocb.nr_copies;
@@ -120,23 +120,39 @@ static int post_cluster_new_vdi(const struct sd_req *req, struct sd_rsp *rsp,
unsigned long nr = rsp->vdi.vdi_id;
int ret = rsp->result;
- sd_dprintf("done %d %ld", ret, nr);
+ sd_dprintf("done %d %lx", ret, nr);
if (ret == SD_RES_SUCCESS)
set_bit(nr, sys->vdi_inuse);
return ret;
}
+static int vdi_init_tag(const char **tag, const char *buf, uint32_t len)
+{
+ if (len == SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN)
+ *tag = buf + SD_MAX_VDI_LEN;
+ else if (len == SD_MAX_VDI_LEN)
+ *tag = NULL;
+ else
+ return -1;
+
+ return 0;
+}
+
static int cluster_del_vdi(struct request *req)
{
struct sd_req *hdr = &req->rq;
- struct vdi_iocb iocb;
+ uint32_t data_len = hdr->data_length;
+ struct vdi_iocb iocb = {};
+
+ if (vdi_init_tag(&iocb.tag, req->data, data_len) < 0)
+ return SD_RES_INVALID_PARMS;
iocb.name = req->data;
- iocb.data_len = hdr->data_length;
+ iocb.data_len = data_len;
iocb.snapid = hdr->vdi.snapid;
- return del_vdi(&iocb, req);
+ return vdi_delete(&iocb, req);
}
struct cache_deletion_work {
@@ -184,24 +200,23 @@ static int local_get_vdi_info(struct request *req)
{
const struct sd_req *hdr = &req->rq;
struct sd_rsp *rsp = &req->rp;
- uint32_t vid;
- void *tag;
+ uint32_t data_len = hdr->data_length;
+ struct vdi_iocb iocb = {};
+ struct vdi_info info = {};
int ret;
- if (hdr->data_length == SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN)
- tag = (char *)req->data + SD_MAX_VDI_LEN;
- else if (hdr->data_length == SD_MAX_VDI_LEN)
- tag = NULL;
- else
+ if (vdi_init_tag(&iocb.tag, req->data, data_len) < 0)
return SD_RES_INVALID_PARMS;
- ret = lookup_vdi(req->data, tag, &vid,
- hdr->vdi.snapid, NULL);
+ iocb.name = req->data;
+ iocb.data_len = data_len;
+ iocb.snapid = hdr->vdi.snapid;
+ ret = vdi_lookup(&iocb, &info);
if (ret != SD_RES_SUCCESS)
return ret;
- rsp->vdi.vdi_id = vid;
- rsp->vdi.copies = get_vdi_copy_number(vid);
+ rsp->vdi.vdi_id = info.vid;
+ rsp->vdi.copies = get_vdi_copy_number(info.vid);
return ret;
}
@@ -304,17 +319,19 @@ static int cluster_get_vdi_attr(struct request *req)
{
const struct sd_req *hdr = &req->rq;
struct sd_rsp *rsp = &req->rp;
- uint32_t vid = 0, attrid = 0;
- uint64_t created_time = 0;
- int ret;
+ uint32_t vid, attrid = 0;
struct sheepdog_vdi_attr *vattr;
+ struct vdi_iocb iocb = {};
+ struct vdi_info info = {};
+ int ret;
vattr = req->data;
- ret = lookup_vdi(vattr->name, vattr->tag, &vid,
- hdr->vdi.snapid, &created_time);
+ iocb.name = vattr->name;
+ iocb.tag = vattr->tag;
+ iocb.snapid = hdr->vdi.snapid;
+ ret = vdi_lookup(&iocb, &info);
if (ret != SD_RES_SUCCESS)
return ret;
-
/*
* the current VDI id can change if we take a snapshot,
* so we use the hash value of the VDI name as the VDI id
@@ -322,7 +339,7 @@ static int cluster_get_vdi_attr(struct request *req)
vid = fnv_64a_buf(vattr->name, strlen(vattr->name), FNV1A_64_INIT);
vid &= SD_NR_VDIS - 1;
ret = get_vdi_attr(req->data, hdr->data_length,
- vid, &attrid, created_time,
+ vid, &attrid, info.create_time,
!!(hdr->flags & SD_FLAG_CMD_CREAT),
!!(hdr->flags & SD_FLAG_CMD_EXCL),
!!(hdr->flags & SD_FLAG_CMD_DEL));
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 85339df..b9d6f97 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -134,6 +134,7 @@ struct siocb {
struct vdi_iocb {
const char *name;
+ const char *tag;
uint32_t data_len;
uint64_t size;
uint32_t base_vid;
@@ -142,6 +143,12 @@ struct vdi_iocb {
int nr_copies;
};
+struct vdi_info {
+ uint32_t vid;
+ uint32_t free_bit;
+ uint64_t create_time;
+};
+
struct store_driver {
struct list_head list;
const char *name;
@@ -251,11 +258,9 @@ int get_max_copy_number(void);
int get_req_copy_number(struct request *req);
int add_vdi_copy_number(uint32_t vid, int nr_copies);
int vdi_exist(uint32_t vid);
-int add_vdi(struct vdi_iocb *iocb, uint32_t *new_vid);
-int del_vdi(struct vdi_iocb *iocb, struct request *req);
-
-int lookup_vdi(const char *name, const char *tag, uint32_t *vid,
- uint32_t snapid, uint64_t *ctime);
+int vdi_create(struct vdi_iocb *iocb, uint32_t *new_vid);
+int vdi_delete(struct vdi_iocb *iocb, struct request *req);
+int vdi_lookup(struct vdi_iocb *iocb, struct vdi_info *info);
int read_vdis(char *data, int len, unsigned int *rsp_len);
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 33a5943..8ec7b9b 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -165,6 +165,13 @@ int fill_vdi_copy_list(void *data)
return nr * sizeof(*vc);
}
+static inline bool is_deleted(struct sheepdog_inode *inode)
+{
+ if (*inode->name == '\0')
+ return true;
+ return false;
+}
+
int vdi_exist(uint32_t vid)
{
struct sheepdog_inode *inode;
@@ -182,7 +189,7 @@ int vdi_exist(uint32_t vid)
goto out;
}
- if (*inode->name == '\0') {
+ if (is_deleted(inode)) {
ret = 0;
goto out;
}
@@ -299,131 +306,159 @@ out:
return ret;
}
-static int find_first_vdi(unsigned long start, unsigned long end,
- const char *name, const char *tag, uint32_t snapid,
- uint32_t *vid, unsigned long *deleted_nr,
- uint32_t *next_snap, uint64_t *create_time)
+/*
+ * Return SUCCESS (range of bits set):
+ * Iff we get a bitmap range [left, right) that VDI might be set between. if
+ * right < start, this means a wrap around case where we should examine the
+ * two split ranges, [left, SD_NR_VDIS - 1] and [0, right). 'Right' is the free
+ * bit that might be used by newly created VDI.
+ *
+ * Otherwise:
+ * Return NO_VDI (bit not set) or FULL_VDI (bitmap fully set)
+ */
+static int get_vdi_bitmap_range(const char *name, unsigned long *left,
+ unsigned long *right)
{
- struct sheepdog_inode *inode = NULL;
- unsigned long i;
- int ret = SD_RES_NO_MEM;
+ *left = fnv_64a_buf(name, strlen(name), FNV1A_64_INIT) &
+ (SD_NR_VDIS - 1);
+ *right = find_next_zero_bit(sys->vdi_inuse, SD_NR_VDIS, *left);
+ if (*left == *right)
+ return SD_RES_NO_VDI;
+
+ if (*right == SD_NR_VDIS) {
+ /* Wrap around */
+ *right = find_next_zero_bit(sys->vdi_inuse, SD_NR_VDIS, 0);
+ if (*right == SD_NR_VDIS)
+ return SD_RES_FULL_VDI;
+ }
+ return SD_RES_SUCCESS;
+}
+
+static inline bool is_snapshot(struct sheepdog_inode *inode)
+{
+ return !!inode->snap_ctime;
+}
+
+static inline bool has_tag(struct vdi_iocb *iocb)
+{
+ if ((iocb->tag && iocb->tag[0]) || iocb->snapid)
+ return true;
+ return false;
+}
+
+static inline bool tag_match(struct vdi_iocb *iocb,
+ struct sheepdog_inode *inode)
+{
+ const char *tag = iocb->tag;
+
+ if (inode->tag[0] && !strncmp(inode->tag, tag, sizeof(inode->tag)))
+ return true;
+ if (iocb->snapid == inode->snap_id)
+ return true;
+ return false;
+}
+
+static int fill_vdi_info_range(uint32_t left, uint32_t right,
+ struct vdi_iocb *iocb, struct vdi_info *info)
+{
+ struct sheepdog_inode *inode;
bool vdi_found = false;
- int nr_copies;
+ int nr_copies, ret;
+ uint32_t i;
+ const char *name = iocb->name;
inode = xmalloc(SD_INODE_HEADER_SIZE);
- for (i = start; i >= end; i--) {
+ for (i = right - 1; i >= left; i--) {
nr_copies = get_vdi_copy_number(i);
ret = read_object(vid_to_vdi_oid(i), (char *)inode,
SD_INODE_HEADER_SIZE, 0, nr_copies);
- if (ret != SD_RES_SUCCESS) {
- ret = SD_RES_EIO;
- goto out_free_inode;
- }
+ if (ret != SD_RES_SUCCESS)
+ goto out;
- if (inode->name[0] == '\0') {
- *deleted_nr = i;
- continue; /* deleted */
+ if (is_deleted(inode)) {
+ /* Recycle the deleted inode for fresh vdi create */
+ if (!iocb->create_snapshot)
+ info->free_bit = i;
+ continue;
}
if (!strncmp(inode->name, name, strlen(inode->name))) {
- *next_snap = inode->snap_id + 1;
-
- if (!(tag && tag[0]) && !snapid && inode->snap_ctime)
- break;
-
- vdi_found = true;
- if (tag && tag[0] &&
- strncmp(inode->tag, tag, sizeof(inode->tag)) != 0)
- continue;
- if (snapid) {
- if (snapid != inode->snap_id)
+ sd_dprintf("%s = %s, %u = %u", iocb->tag, inode->tag,
+ iocb->snapid, inode->snap_id);
+ if (has_tag(iocb)) {
+ /* Read, delete, clone on snapshots */
+ if (!is_snapshot(inode)) {
+ vdi_found = true;
continue;
-
- if (inode->snap_ctime == 0) {
- sd_dprintf("vdi %" PRIx32 " is not a "
- "snapshot\n", inode->vdi_id);
- ret = SD_RES_INVALID_PARMS;
- goto out_free_inode;
}
+ if (!tag_match(iocb, inode))
+ continue;
+ } else {
+ /*
+ * Rollback & snap create, read, delete on
+ * current working VDI
+ */
+ iocb->snapid = inode->snap_id + 1;
+ if (is_snapshot(inode))
+ /* return NO_VDI for rollback create */
+ break;
}
-
- *vid = inode->vdi_id;
- if (create_time)
- *create_time = inode->create_time;
- ret = SD_RES_SUCCESS;
- goto out_free_inode;
+ info->create_time = inode->create_time;
+ info->vid = inode->vdi_id;
+ goto out;
}
}
-
- if (vdi_found)
- ret = SD_RES_NO_TAG;
- else
- ret = SD_RES_NO_VDI;
-
-out_free_inode:
+ ret = vdi_found ? SD_RES_NO_TAG : SD_RES_NO_VDI;
+out:
free(inode);
return ret;
}
-static int do_lookup_vdi(const char *name, int namelen, uint32_t *vid,
- const char *tag, uint32_t snapid,
- uint32_t *next_snapid, unsigned long *right_nr,
- unsigned long *deleted_nr, uint64_t *create_time)
+/* Fill the VDI information from right to left in the bitmap */
+static int fill_vdi_info(unsigned long left, unsigned long right,
+ struct vdi_iocb *iocb, struct vdi_info *info)
{
int ret;
- unsigned long nr, start_nr;
-
- start_nr = fnv_64a_buf(name, namelen, FNV1A_64_INIT) & (SD_NR_VDIS - 1);
-
- sd_dprintf("looking for %s (%lx)", name, start_nr);
-
- /* bitmap search from the hash point */
- nr = find_next_zero_bit(sys->vdi_inuse, SD_NR_VDIS, start_nr);
- *right_nr = nr;
- if (nr == start_nr) {
- return SD_RES_NO_VDI;
- } else if (nr < SD_NR_VDIS) {
-right_side:
- /* look up on the right side of the hash point */
- ret = find_first_vdi(nr - 1, start_nr, name, tag, snapid, vid,
- deleted_nr, next_snapid, create_time);
- return ret;
- } else {
- /* round up... bitmap search from the head of the bitmap */
- nr = find_next_zero_bit(sys->vdi_inuse, SD_NR_VDIS, 0);
- *right_nr = nr;
- if (nr >= SD_NR_VDIS)
- return SD_RES_FULL_VDI;
- else if (nr) {
- /* look up on the left side of the hash point */
- ret = find_first_vdi(nr - 1, 0, name, tag, snapid, vid,
- deleted_nr, next_snapid,
- create_time);
- if (ret == SD_RES_NO_VDI)
- ; /* we need to go to the right side */
- else
- return ret;
- }
- nr = SD_NR_VDIS;
- goto right_side;
+ if (left < right)
+ return fill_vdi_info_range(left, right, iocb, info);
+
+ ret = fill_vdi_info_range(0, right, iocb, info);
+ switch (ret) {
+ case SD_RES_NO_VDI:
+ case SD_RES_NO_TAG:
+ ret = fill_vdi_info_range(left, SD_NR_VDIS - 1, iocb, info);
+ break;
+ default:
+ break;
}
+ return ret;
}
-int lookup_vdi(const char *name, const char *tag, uint32_t *vid,
- uint32_t snapid, uint64_t *create_time)
+/* Return SUCCESS if we find targeted VDI specified by iocb and fill info */
+int vdi_lookup(struct vdi_iocb *iocb, struct vdi_info *info)
{
- uint32_t dummy0;
- unsigned long dummy1, dummy2;
+ unsigned long left, right;
+ int ret;
+
- return do_lookup_vdi(name, strlen(name), vid, tag, snapid, &dummy0,
- &dummy1, &dummy2, create_time);
+ ret = get_vdi_bitmap_range(iocb->name, &left, &right);
+ info->free_bit = right;
+ sd_dprintf("%s left %lx right %lx, %x", iocb->name, left, right, ret);
+ switch (ret) {
+ case SD_RES_NO_VDI:
+ case SD_RES_FULL_VDI:
+ return ret;
+ case SD_RES_SUCCESS:
+ break;
+ }
+ return fill_vdi_info(left, right, iocb, info);
}
static int notify_vdi_add(uint32_t vdi_id, uint32_t nr_copies)
{
- struct sd_req hdr;
int ret = SD_RES_SUCCESS;
+ struct sd_req hdr;
char *buf;
sd_init_req(&hdr, SD_OP_NOTIFY_VDI_ADD);
@@ -444,83 +479,60 @@ static int notify_vdi_add(uint32_t vdi_id, uint32_t nr_copies)
return ret;
}
-int add_vdi(struct vdi_iocb *iocb, uint32_t *new_vid)
+/*
+ * There are 3 create operation in SD:
+ * 1. fresh create (expect NO_VDI returned from vdi_lookup)
+ * 2. snapshot create (expect SUCCESS)
+ * 3. rollback create (expect NO_VDI)
+ *
+ * Fresh create started with id = 1. Both rollback & snap create started with
+ * current working VDI's snap_id + 1. Working VDI always has the highest snapid.
+ */
+int vdi_create(struct vdi_iocb *iocb, uint32_t *new_vid)
{
- uint32_t cur_vid = 0;
- unsigned long nr, deleted_nr = SD_NR_VDIS, right_nr = SD_NR_VDIS;
+ const char *name = iocb->name;
+ struct vdi_info info = {};
int ret;
- const char *name;
- iocb->snapid = 1;
-
- if (iocb->data_len != SD_MAX_VDI_LEN)
- return SD_RES_INVALID_PARMS;
-
- name = iocb->name;
-
- ret = do_lookup_vdi(name, strlen(name), &cur_vid,
- NULL, 0, &iocb->snapid, &right_nr, &deleted_nr,
- NULL);
+ ret = vdi_lookup(iocb, &info);
if (iocb->create_snapshot) {
- if (ret != SD_RES_SUCCESS) {
- if (ret == SD_RES_NO_VDI)
- sd_printf(SDOG_CRIT, "VDI %s does not exist",
- name);
+ /* We should have base vdi, otherwise error happens */
+ if (ret != SD_RES_SUCCESS)
return ret;
- }
- nr = right_nr;
} else {
- /* we already have the same VDI or met other errors. */
+ /* If we already have the same VDI or met other errors. */
if (ret != SD_RES_NO_VDI) {
if (ret == SD_RES_SUCCESS)
ret = SD_RES_VDI_EXIST;
return ret;
}
-
- if (deleted_nr == SD_NR_VDIS)
- nr = right_nr;
- else
- nr = deleted_nr; /* we can recycle a deleted VDI */
}
+ if (!iocb->snapid)
+ iocb->snapid = 1;
+ *new_vid = info.free_bit;
+ notify_vdi_add(*new_vid, iocb->nr_copies);
- *new_vid = nr;
-
- notify_vdi_add(nr, iocb->nr_copies);
-
- sd_iprintf("creating new %s %s: size %" PRIu64 ", vid %"
- PRIx32 ", base %" PRIx32 ", cur %" PRIx32 ", copies %d",
+ sd_dprintf("%s %s: size %" PRIu64 ", vid %" PRIx32 ", base %" PRIx32
+ ", cur %" PRIx32 ", copies %d, snapid %"PRIu32,
iocb->create_snapshot ? "snapshot" : "vdi", name, iocb->size,
- *new_vid, iocb->base_vid, cur_vid, iocb->nr_copies);
+ *new_vid, iocb->base_vid, info.vid, iocb->nr_copies,
+ iocb->snapid);
- return create_vdi_obj(iocb, *new_vid, cur_vid);
+ return create_vdi_obj(iocb, *new_vid, info.vid);
}
static int start_deletion(struct request *req, uint32_t vid);
-int del_vdi(struct vdi_iocb *iocb, struct request *req)
+int vdi_delete(struct vdi_iocb *iocb, struct request *req)
{
- const char *name = iocb->name, *tag;
- uint32_t dummy0;
- unsigned long dummy1, dummy2;
+ struct vdi_info info;
int ret;
- uint32_t vid, data_len = iocb->data_len;
-
- if (data_len == SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN) {
- tag = (char *)req->data + SD_MAX_VDI_LEN;
- } else if (data_len == SD_MAX_VDI_LEN) {
- tag = NULL;
- } else {
- ret = SD_RES_INVALID_PARMS;
- goto out;
- }
- ret = do_lookup_vdi(name, strlen(name), &vid, tag,
- iocb->snapid, &dummy0, &dummy1, &dummy2,
- NULL);
+ ret = vdi_lookup(iocb, &info);
if (ret != SD_RES_SUCCESS)
goto out;
- ret = start_deletion(req, vid);
+ ret = start_deletion(req, info.vid);
out:
return ret;
}
@@ -616,7 +628,7 @@ static void delete_one(struct work *work)
goto out;
}
- if (inode->vdi_size == 0 && inode->name[0] == '\0')
+ if (inode->vdi_size == 0 && is_deleted(inode))
goto out;
for (nr_deleted = 0, i = 0; i < MAX_DATA_OBJS; i++) {
@@ -644,7 +656,7 @@ static void delete_one(struct work *work)
if (nr_deleted)
notify_vdi_deletion(vdi_id);
- if (*(inode->name) == '\0')
+ if (is_deleted(inode))
goto out;
inode->vdi_size = 0;
@@ -703,7 +715,7 @@ again:
goto err;
}
- if (inode->name[0] != '\0' && vid != dw->vid)
+ if (!is_deleted(inode) && vid != dw->vid)
goto out;
for (i = 0; i < ARRAY_SIZE(inode->child_vdi_id); i++) {
--
1.7.9.5
More information about the sheepdog
mailing list