[sheepdog] [PATCH v2 3/6] sheep: store snapshot vid information in memory
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Wed May 1 10:41:46 CEST 2013
This renames vdi_copy to vdi_state and adds a new field 'snapshot' to
it so that we can know whether the vdi is snapshot or not.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
include/sheep.h | 5 ---
include/sheepdog_proto.h | 5 +++
sheep/group.c | 14 ++++----
sheep/ops.c | 11 +++---
sheep/plain_store.c | 7 ++--
sheep/sheep_priv.h | 11 ++++--
sheep/vdi.c | 91 ++++++++++++++++++++++++------------------------
7 files changed, 77 insertions(+), 67 deletions(-)
diff --git a/include/sheep.h b/include/sheep.h
index 26b9639..1c3d847 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -36,11 +36,6 @@ struct vnode_info {
int refcnt;
};
-struct vdi_copy {
- uint32_t vid;
- uint32_t nr_copies;
-};
-
#define TRACE_GRAPH_ENTRY 0x01
#define TRACE_GRAPH_RETURN 0x02
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 886b535..aef97dd 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -138,6 +138,11 @@ struct sd_req {
uint32_t copies;
uint32_t tag;
} cluster;
+ struct {
+ uint32_t old_vid;
+ uint32_t new_vid;
+ uint32_t copies;
+ } vdi_state;
uint32_t __pad[8];
};
diff --git a/sheep/group.c b/sheep/group.c
index 4044d12..bb5d703 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -647,7 +647,7 @@ static int get_vdis_from(struct sd_node *node)
{
struct sd_req hdr;
struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
- struct vdi_copy *vc = NULL;
+ struct vdi_state *vs = NULL;
int i, ret = SD_RES_SUCCESS;
unsigned int rlen;
int count;
@@ -656,21 +656,21 @@ static int get_vdis_from(struct sd_node *node)
goto out;
rlen = SD_DATA_OBJ_SIZE; /* FIXME */
- vc = xzalloc(rlen);
+ vs = xzalloc(rlen);
sd_init_req(&hdr, SD_OP_GET_VDI_COPIES);
hdr.data_length = rlen;
hdr.epoch = sys_epoch();
- ret = sheep_exec_req(&node->nid, &hdr, (char *)vc);
+ ret = sheep_exec_req(&node->nid, &hdr, (char *)vs);
if (ret != SD_RES_SUCCESS)
goto out;
- count = rsp->data_length / sizeof(*vc);
+ count = rsp->data_length / sizeof(*vs);
for (i = 0; i < count; i++) {
- set_bit(vc[i].vid, sys->vdi_inuse);
- add_vdi_copy_number(vc[i].vid, vc[i].nr_copies);
+ set_bit(vs[i].vid, sys->vdi_inuse);
+ add_vdi_state(vs[i].vid, vs[i].nr_copies, vs[i].snapshot);
}
out:
- free(vc);
+ free(vs);
return ret;
}
diff --git a/sheep/ops.c b/sheep/ops.c
index 48dd08c..7f27bc3 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -390,7 +390,7 @@ static int local_read_vdis(const struct sd_req *req, struct sd_rsp *rsp,
static int local_get_vdi_copies(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
- rsp->data_length = fill_vdi_copy_list(data);
+ rsp->data_length = fill_vdi_state_list(data);
return SD_RES_SUCCESS;
}
@@ -618,10 +618,13 @@ static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp,
static int cluster_notify_vdi_add(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
- uint32_t vid = *(uint32_t *)data;
- uint32_t nr_copies = *(uint32_t *)((char *)data + sizeof(vid));
+ if (req->vdi_state.old_vid)
+ /* make the previous working vdi a snapshot */
+ add_vdi_state(req->vdi_state.old_vid,
+ get_vdi_copy_number(req->vdi_state.old_vid),
+ true);
- add_vdi_copy_number(vid, nr_copies);
+ add_vdi_state(req->vdi_state.new_vid, req->vdi_state.copies, false);
return SD_RES_SUCCESS;
}
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index b539df1..640f769 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -168,7 +168,7 @@ int default_cleanup(void)
return SD_RES_SUCCESS;
}
-static int init_vdi_copy_number(uint64_t oid, char *wd)
+static int init_vdi_state(uint64_t oid, char *wd)
{
char path[PATH_MAX];
int fd, flags = get_open_flags(oid, false, 0), ret;
@@ -190,7 +190,8 @@ static int init_vdi_copy_number(uint64_t oid, char *wd)
goto out;
}
- add_vdi_copy_number(oid_to_vid(oid), inode->nr_copies);
+ add_vdi_state(oid_to_vid(oid), inode->nr_copies,
+ vdi_is_snapshot(inode));
ret = SD_RES_SUCCESS;
out:
@@ -206,7 +207,7 @@ static int init_objlist_and_vdi_bitmap(uint64_t oid, char *wd, void *arg)
if (is_vdi_obj(oid)) {
sd_dprintf("found the VDI object %" PRIx64, oid);
set_bit(oid_to_vid(oid), sys->vdi_inuse);
- ret = init_vdi_copy_number(oid, wd);
+ ret = init_vdi_state(oid, wd);
if (ret != SD_RES_SUCCESS)
return ret;
}
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 1fc291c..99b44a2 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -149,6 +149,13 @@ struct vdi_info {
uint64_t create_time;
};
+struct vdi_state {
+ uint32_t vid;
+ uint8_t nr_copies;
+ uint8_t snapshot;
+ uint16_t _pad;
+};
+
struct store_driver {
struct list_head list;
const char *name;
@@ -252,12 +259,12 @@ int init_base_path(const char *dir);
int init_disk_space(const char *d);
int lock_base_dir(const char *d);
-int fill_vdi_copy_list(void *data);
+int fill_vdi_state_list(void *data);
int get_vdi_copy_number(uint32_t vid);
int get_obj_copy_number(uint64_t oid, int nr_zones);
int get_max_copy_number(void);
int get_req_copy_number(struct request *req);
-int add_vdi_copy_number(uint32_t vid, int nr_copies);
+int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot);
int vdi_exist(uint32_t vid);
int vdi_create(struct vdi_iocb *iocb, uint32_t *new_vid);
int vdi_delete(struct vdi_iocb *iocb, struct request *req);
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 057dd5a..3085148 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -16,24 +16,25 @@
#include "sheepdog_proto.h"
#include "sheep_priv.h"
-struct vdi_copy_entry {
+struct vdi_state_entry {
uint32_t vid;
unsigned int nr_copies;
+ bool snapshot;
struct rb_node node;
};
static uint32_t max_copies;
-static struct rb_root vdi_copy_root = RB_ROOT;
-static pthread_rwlock_t vdi_copy_lock = PTHREAD_RWLOCK_INITIALIZER;
+static struct rb_root vdi_state_root = RB_ROOT;
+static pthread_rwlock_t vdi_state_lock = PTHREAD_RWLOCK_INITIALIZER;
-static struct vdi_copy_entry *vdi_copy_search(struct rb_root *root,
- uint32_t vid)
+static struct vdi_state_entry *vdi_state_search(struct rb_root *root,
+ uint32_t vid)
{
struct rb_node *n = root->rb_node;
- struct vdi_copy_entry *t;
+ struct vdi_state_entry *t;
while (n) {
- t = rb_entry(n, struct vdi_copy_entry, node);
+ t = rb_entry(n, struct vdi_state_entry, node);
if (vid < t->vid)
n = n->rb_left;
@@ -46,16 +47,16 @@ static struct vdi_copy_entry *vdi_copy_search(struct rb_root *root,
return NULL;
}
-static struct vdi_copy_entry *vdi_copy_insert(struct rb_root *root,
- struct vdi_copy_entry *new)
+static struct vdi_state_entry *vdi_state_insert(struct rb_root *root,
+ struct vdi_state_entry *new)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
- struct vdi_copy_entry *entry;
+ struct vdi_state_entry *entry;
while (*p) {
parent = *p;
- entry = rb_entry(parent, struct vdi_copy_entry, node);
+ entry = rb_entry(parent, struct vdi_state_entry, node);
if (new->vid < entry->vid)
p = &(*p)->rb_left;
@@ -72,11 +73,11 @@ static struct vdi_copy_entry *vdi_copy_insert(struct rb_root *root,
int get_vdi_copy_number(uint32_t vid)
{
- struct vdi_copy_entry *entry;
+ struct vdi_state_entry *entry;
- pthread_rwlock_rdlock(&vdi_copy_lock);
- entry = vdi_copy_search(&vdi_copy_root, vid);
- pthread_rwlock_unlock(&vdi_copy_lock);
+ pthread_rwlock_rdlock(&vdi_state_lock);
+ entry = vdi_state_search(&vdi_state_root, vid);
+ pthread_rwlock_unlock(&vdi_state_lock);
if (!entry) {
sd_eprintf("No VDI copy entry for %" PRIx32 " found", vid);
@@ -113,50 +114,54 @@ int get_max_copy_number(void)
return nr_copies;
}
-int add_vdi_copy_number(uint32_t vid, int nr_copies)
+int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot)
{
- struct vdi_copy_entry *entry, *old;
+ struct vdi_state_entry *entry, *old;
entry = xzalloc(sizeof(*entry));
entry->vid = vid;
entry->nr_copies = nr_copies;
+ entry->snapshot = snapshot;
sd_dprintf("%" PRIx32 ", %d", vid, nr_copies);
- pthread_rwlock_wrlock(&vdi_copy_lock);
- old = vdi_copy_insert(&vdi_copy_root, entry);
+ pthread_rwlock_wrlock(&vdi_state_lock);
+ old = vdi_state_insert(&vdi_state_root, entry);
if (old) {
free(entry);
entry = old;
entry->nr_copies = nr_copies;
+ entry->snapshot = snapshot;
}
if (uatomic_read(&max_copies) == 0 ||
nr_copies > uatomic_read(&max_copies))
uatomic_set(&max_copies, nr_copies);
- pthread_rwlock_unlock(&vdi_copy_lock);
+ pthread_rwlock_unlock(&vdi_state_lock);
return SD_RES_SUCCESS;
}
-int fill_vdi_copy_list(void *data)
+int fill_vdi_state_list(void *data)
{
int nr = 0;
struct rb_node *n;
- struct vdi_copy *vc = data;
- struct vdi_copy_entry *entry;
-
- pthread_rwlock_rdlock(&vdi_copy_lock);
- for (n = rb_first(&vdi_copy_root); n; n = rb_next(n)) {
- entry = rb_entry(n, struct vdi_copy_entry, node);
- vc->vid = entry->vid;
- vc->nr_copies = entry->nr_copies;
- vc++;
+ struct vdi_state *vs = data;
+ struct vdi_state_entry *entry;
+
+ pthread_rwlock_rdlock(&vdi_state_lock);
+ for (n = rb_first(&vdi_state_root); n; n = rb_next(n)) {
+ entry = rb_entry(n, struct vdi_state_entry, node);
+ memset(vs, 0, sizeof(*vs));
+ vs->vid = entry->vid;
+ vs->nr_copies = entry->nr_copies;
+ vs->snapshot = entry->snapshot;
+ vs++;
nr++;
}
- pthread_rwlock_unlock(&vdi_copy_lock);
+ pthread_rwlock_unlock(&vdi_state_lock);
- return nr * sizeof(*vc);
+ return nr * sizeof(*vs);
}
static inline bool vdi_is_deleted(struct sd_inode *inode)
@@ -443,26 +448,20 @@ int vdi_lookup(struct vdi_iocb *iocb, struct vdi_info *info)
return fill_vdi_info(left, right, iocb, info);
}
-static int notify_vdi_add(uint32_t vdi_id, uint32_t nr_copies)
+static int notify_vdi_add(uint32_t vdi_id, uint32_t nr_copies, uint32_t old_vid)
{
int ret = SD_RES_SUCCESS;
struct sd_req hdr;
- char *buf;
sd_init_req(&hdr, SD_OP_NOTIFY_VDI_ADD);
- hdr.flags = SD_FLAG_CMD_WRITE;
- hdr.data_length = sizeof(vdi_id) + sizeof(nr_copies);
-
- buf = xmalloc(sizeof(vdi_id) + sizeof(nr_copies));
- memcpy(buf, &vdi_id, sizeof(vdi_id));
- memcpy(buf + sizeof(vdi_id), &nr_copies, sizeof(nr_copies));
+ hdr.vdi_state.old_vid = old_vid;
+ hdr.vdi_state.new_vid = vdi_id;
+ hdr.vdi_state.copies = nr_copies;
- ret = exec_local_req(&hdr, buf);
+ ret = exec_local_req(&hdr, NULL);
if (ret != SD_RES_SUCCESS)
- sd_eprintf("fail to notify vdi add event(%" PRIx32 ", %d)",
- vdi_id, nr_copies);
-
- free(buf);
+ sd_eprintf("fail to notify vdi add event(%" PRIx32 ", %d, %"
+ PRIx32 ")", vdi_id, nr_copies, old_vid);
return ret;
}
@@ -499,7 +498,7 @@ int vdi_create(struct vdi_iocb *iocb, uint32_t *new_vid)
if (!iocb->snapid)
iocb->snapid = 1;
*new_vid = info.free_bit;
- notify_vdi_add(*new_vid, iocb->nr_copies);
+ notify_vdi_add(*new_vid, iocb->nr_copies, info.vid);
sd_dprintf("%s %s: size %" PRIu64 ", vid %" PRIx32 ", base %" PRIx32
", cur %" PRIx32 ", copies %d, snapid %"PRIu32,
--
1.8.1.3.566.gaa39828
More information about the sheepdog
mailing list