Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp> --- collie/collie.h | 14 ++- collie/group.c | 17 ++- collie/net.c | 2 +- collie/vdi.c | 278 ++++++++++++++++++++++++++++++++-------------- include/bitops.h | 132 ++++++++++++++++++++++ include/meta.h | 65 ++++++----- include/sheepdog_proto.h | 8 +- include/util.h | 3 +- shepherd/shepherd.c | 80 +++++++------- 9 files changed, 428 insertions(+), 171 deletions(-) create mode 100644 include/bitops.h diff --git a/collie/collie.h b/collie/collie.h index 5cd2383..8829079 100644 --- a/collie/collie.h +++ b/collie/collie.h @@ -19,6 +19,7 @@ #include "logger.h" #include "work.h" #include "net.h" +#include "meta.h" #define SD_MSG_JOIN 0x01 #define SD_MSG_VDI_OP 0x02 @@ -70,6 +71,8 @@ struct cluster_info { struct list_head vm_list; struct list_head pending_list; + DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS); + int nr_sobjs; }; @@ -79,12 +82,13 @@ int create_listen_port(int port, void *data); int init_store(char *dir); -int add_vdi(char *buf, int len, uint64_t size, - uint64_t *added_oid, uint64_t base_oid, uint32_t tag, int copies, - uint16_t flags); +int add_vdi(char *data, int data_len, uint64_t size, + uint64_t *new_oid, uint64_t base_oid, uint32_t copies, + int is_snapshot); + +int lookup_vdi(char *data, int data_len, uint64_t *oid, uint32_t snapid); -int lookup_vdi(char *filename, uint64_t * oid, - uint32_t tag, int do_lock, int *current); +int read_vdis(char *data, int len, unsigned int *rsp_len); int make_super_object(struct sd_vdi_req *hdr); diff --git a/collie/group.c b/collie/group.c index a49c1be..4a2397b 100644 --- a/collie/group.c +++ b/collie/group.c @@ -199,6 +199,9 @@ void cluster_queue_request(struct work *work, int idx) rsp->result = SD_RES_SUCCESS; break; + case SD_OP_READ_VDIS: + rsp->result = read_vdis(req->data, hdr->data_length, &rsp->data_length); + break; default: /* forward request to group */ goto forward; @@ -506,21 +509,20 @@ static void vdi_op(struct vdi_op_message *msg) const struct sd_vdi_req *hdr = &msg->req; struct sd_vdi_rsp *rsp = &msg->rsp; void *data = msg->data; - int ret = SD_RES_SUCCESS, is_current; + int ret = SD_RES_SUCCESS; uint64_t oid = 0; switch (hdr->opcode) { case SD_OP_NEW_VDI: ret = add_vdi(data, hdr->data_length, hdr->vdi_size, &oid, - hdr->base_oid, hdr->tag, hdr->copies, hdr->flags); + hdr->base_oid, hdr->copies, + hdr->snapid); break; case SD_OP_LOCK_VDI: case SD_OP_GET_VDI_INFO: - ret = lookup_vdi(data, &oid, hdr->tag, 1, &is_current); + ret = lookup_vdi(data, hdr->data_length, &oid, hdr->snapid); if (ret != SD_RES_SUCCESS) break; - if (is_current) - rsp->flags = SD_VDI_RSP_FLAG_CURRENT; break; case SD_OP_RELEASE_VDI: break; @@ -556,7 +558,12 @@ static void vdi_op_done(struct vdi_op_message *msg) switch (hdr->opcode) { case SD_OP_NEW_VDI: + { + unsigned long nr = (rsp->oid & ~VDI_BIT) >> VDI_SPACE_SHIFT; + vprintf(SDOG_INFO "done %d %ld %" PRIx64 "\n", ret, nr, rsp->oid); + set_bit(nr, sys->vdi_inuse); break; + } case SD_OP_LOCK_VDI: if (lookup_vm(&sys->vm_list, (char *)data)) { ret = SD_RES_VDI_LOCKED; diff --git a/collie/net.c b/collie/net.c index 04f9547..749c33d 100644 --- a/collie/net.c +++ b/collie/net.c @@ -89,10 +89,10 @@ static void queue_request(struct request *req) case SD_OP_MAKE_FS: case SD_OP_SHUTDOWN: case SD_OP_STAT_CLUSTER: + case SD_OP_READ_VDIS: req->work.fn = cluster_queue_request; break; case SD_OP_SO: - case SD_OP_SO_NEW_VDI: case SD_OP_SO_LOOKUP_VDI: case SD_OP_SO_READ_VDIS: case SD_OP_SO_STAT: diff --git a/collie/vdi.c b/collie/vdi.c index 5904488..25cc83c 100644 --- a/collie/vdi.c +++ b/collie/vdi.c @@ -16,17 +16,23 @@ #include "meta.h" #include "collie.h" + /* TODO: should be performed atomically */ -static int create_inode_obj(struct sheepdog_node_list_entry *entries, - int nr_nodes, uint64_t epoch, int copies, - uint64_t oid, uint64_t size, uint64_t base_oid) +static int create_vdi_obj(char *name, uint64_t new_oid, uint64_t size, + uint64_t base_oid, uint64_t cur_oid, uint32_t copies, + uint32_t snapid, int is_snapshot) { - struct sheepdog_inode inode, base; + struct sheepdog_node_list_entry entries[SD_MAX_NODES]; + /* we are not called concurrently */ + static struct sheepdog_inode new, base, cur; struct timeval tv; - int ret; + int ret, nr_nodes; + unsigned long block_size = SD_DATA_OBJ_SIZE; + + nr_nodes = build_node_list(&sys->sd_node_list, entries); if (base_oid) { - ret = read_object(entries, nr_nodes, epoch, + ret = read_object(entries, nr_nodes, sys->epoch, base_oid, (char *)&base, sizeof(base), 0, copies); if (ret < 0) @@ -35,26 +41,45 @@ static int create_inode_obj(struct sheepdog_node_list_entry *entries, gettimeofday(&tv, NULL); - memset(&inode, 0, sizeof(inode)); + if (is_snapshot) { + if (cur_oid != base_oid) { + vprintf(SDOG_INFO "tree snapshot %s %" PRIx64 " %" PRIx64 "\n", + name, cur_oid, base_oid); + + ret = read_object(entries, nr_nodes, sys->epoch, + cur_oid, (char *)&cur, sizeof(cur), 0, + copies); + if (ret < 0) { + vprintf(SDOG_ERR "failed\n"); + return SD_RES_BASE_VDI_READ; + } + + cur.snap_ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000; + } else + base.snap_ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000; + } + + memset(&new, 0, sizeof(new)); - inode.oid = oid; - inode.vdi_size = size; - inode.block_size = SD_DATA_OBJ_SIZE; - inode.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000; - inode.nr_copies = copies; + strncpy(new.name, name, sizeof(new.name)); + new.oid = new_oid; + new.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000; + new.vdi_size = size; + new.copy_policy = 0; + new.nr_copies = copies; + new.block_size_shift = find_next_bit(&block_size, BITS_PER_LONG, 0); + new.snap_id = snapid; if (base_oid) { int i; - eprintf("%zd %zd\n", sizeof(inode.data_oid), - ARRAY_SIZE(base.child_oid)); - inode.parent_oid = base_oid; - memcpy(inode.data_oid, base.data_oid, + new.parent_oid = base_oid; + memcpy(new.data_oid, base.data_oid, MAX_DATA_OBJS * sizeof(uint64_t)); for (i = 0; i < ARRAY_SIZE(base.child_oid); i++) { if (!base.child_oid[i]) { - base.child_oid[i] = oid; + base.child_oid[i] = new_oid; break; } } @@ -62,120 +87,203 @@ static int create_inode_obj(struct sheepdog_node_list_entry *entries, if (i == ARRAY_SIZE(base.child_oid)) return SD_RES_NO_BASE_VDI; + } + + if (is_snapshot && cur_oid != base_oid) { + ret = write_object(entries, nr_nodes, sys->epoch, + cur_oid, (char *)&cur, sizeof(cur), 0, + copies, 0); + if (ret < 0) { + vprintf(SDOG_ERR "failed\n"); + return SD_RES_BASE_VDI_READ; + } + } + + if (base_oid) { ret = write_object(entries, nr_nodes, - epoch, base_oid, (char *)&base, + sys->epoch, base_oid, (char *)&base, sizeof(base), 0, copies, 0); - if (ret < 0) + if (ret < 0) { + vprintf(SDOG_ERR "failed\n"); return SD_RES_BASE_VDI_WRITE; + } } - ret = write_object(entries, nr_nodes, epoch, - oid, (char *)&inode, sizeof(inode), 0, copies, 1); + ret = write_object(entries, nr_nodes, sys->epoch, + new_oid, (char *)&new, sizeof(new), 0, copies, 1); if (ret < 0) return SD_RES_VDI_WRITE; return ret; } -/* - * TODO: handle larger buffer - */ -int add_vdi(char *name, int len, uint64_t size, - uint64_t *added_oid, uint64_t base_oid, uint32_t tag, int copies, - uint16_t flags) +static int find_first_vdi(unsigned long start, unsigned long end, + char *name, int namelen, uint32_t snapid, uint64_t *oid, + unsigned long *deleted_nr, uint32_t *next_snap) { struct sheepdog_node_list_entry entries[SD_MAX_NODES]; + static struct sheepdog_inode inode; + unsigned long i; int nr_nodes, nr_reqs; - uint64_t oid = 0; int ret; - struct sd_so_req req; - struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req; - - memset(&req, 0, sizeof(req)); nr_nodes = build_node_list(&sys->sd_node_list, entries); - dprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size, - base_oid); - nr_reqs = sys->nr_sobjs; if (nr_reqs > nr_nodes) nr_reqs = nr_nodes; - memset(&req, 0, sizeof(req)); - - eprintf("%d %d\n", copies, sys->nr_sobjs); - /* qemu doesn't specify the copies, then we use the default. */ - if (!copies) - copies = sys->nr_sobjs; - - req.opcode = SD_OP_SO_NEW_VDI; - req.copies = copies; - req.tag = tag; - req.flags |= flags; + for (i = start; i >= end; i--) { + ret = read_object(entries, nr_nodes, sys->epoch, + bit_to_oid(i), (char *)&inode, sizeof(inode), 0, + nr_reqs); + if (ret < 0) + return SD_RES_EIO; - ret = exec_reqs(entries, nr_nodes, sys->epoch, - SD_DIR_OID, (struct sd_req *)&req, name, len, 0, - nr_reqs, nr_reqs); + if (inode.name[0] == '\0') { + *deleted_nr = i; + continue; /* deleted */ + } - if (ret < 0) - return rsp->result; + if (!strncmp(inode.name, name, strlen(inode.name))) { + if (snapid && snapid != inode.snap_id) + continue; - oid = rsp->oid; - *added_oid = oid; + *next_snap = inode.snap_id + 1; + *oid = inode.oid; + return SD_RES_SUCCESS; + } + } + return SD_RES_NO_VDI; +} - dprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size, - oid); - ret = create_inode_obj(entries, nr_nodes, sys->epoch, copies, - oid, size, base_oid); +static int do_lookup_vdi(char *name, int namelen, uint64_t *oid, uint32_t snapid, + uint32_t *next_snapid, + unsigned long *right_nr, unsigned long *deleted_nr) +{ + int ret; + unsigned long nr, start_nr; + + start_nr = fnv_64a_buf(name, namelen, FNV1A_64_INIT) & (SD_NR_VDIS - 1); + + vprintf(SDOG_INFO "looking for %s %d, %lx\n", name, namelen, start_nr); + + /* bitmap search from the hash point */ + nr = find_next_zero_bit(sys->vdi_inuse, SD_NR_VDIS, start_nr); + *right_nr = nr; + if (nr == start_nr) { + return SD_RES_NO_VDI; + } else if (nr < SD_NR_VDIS) { + right_side: + /* look up on the right side of the hash point */ + ret = find_first_vdi(nr - 1, start_nr, name, namelen, snapid, oid, + deleted_nr, next_snapid); + return ret; + } else { + /* round up... bitmap search from the head of the bitmap */ + nr = find_next_zero_bit(sys->vdi_inuse, SD_NR_VDIS, 0); + *right_nr = nr; + if (nr >= SD_NR_VDIS) + return SD_RES_FULL_VDI; + else if (nr) { + /* look up on the left side of the hash point */ + ret = find_first_vdi(nr - 1, 0, name, namelen, snapid, oid, + deleted_nr, next_snapid); + if (ret == SD_RES_NO_VDI) + ; /* we need to go to the right side */ + else + return ret; + } - return ret; + nr = SD_NR_VDIS; + goto right_side; + } } -int del_vdi(char *name, int len) +int lookup_vdi(char *data, int data_len, uint64_t *oid, uint32_t snapid) { - return 0; + char *name = data; + uint32_t dummy0; + unsigned long dummy1, dummy2; + + if (data_len != SD_MAX_VDI_LEN) + return SD_RES_INVALID_PARMS; + + return do_lookup_vdi(name, strlen(name), oid, snapid, + &dummy0, &dummy1, &dummy2); } -int lookup_vdi(char *filename, uint64_t * oid, uint32_t tag, int do_lock, - int *current) +int add_vdi(char *data, int data_len, uint64_t size, + uint64_t *new_oid, uint64_t base_oid, uint32_t copies, int is_snapshot) { - struct sheepdog_node_list_entry entries[SD_MAX_NODES]; - int nr_nodes, nr_reqs; + uint64_t cur_oid; + uint32_t next_snapid; + unsigned long nr, deleted_nr = SD_NR_VDIS, right_nr = SD_NR_VDIS; int ret; - struct sd_so_req req; - struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req; + char *name; - memset(&req, 0, sizeof(req)); + if (data_len != SD_MAX_VDI_LEN) + return SD_RES_INVALID_PARMS; - nr_nodes = build_node_list(&sys->sd_node_list, entries); + name = data; - *current = 0; + ret = do_lookup_vdi(name, strlen(name), &cur_oid, 0, &next_snapid, + &right_nr, &deleted_nr); - dprintf("looking for %s %zd\n", filename, strlen(filename)); + if (is_snapshot) { + if (ret != SD_RES_SUCCESS) { + if (ret == SD_RES_NO_VDI) + vprintf(SDOG_CRIT "we dont's have %s\n", name); + return ret; + } + nr = right_nr; + } else { + /* we already have the same vdi or met other errors. */ + if (ret != SD_RES_NO_VDI) { + if (ret == SD_RES_SUCCESS) + ret = SD_RES_VDI_EXIST; + return ret; + } - nr_reqs = sys->nr_sobjs; - if (nr_reqs > nr_nodes) - nr_reqs = nr_nodes; + if (deleted_nr == SD_NR_VDIS) + nr = right_nr; + else + nr = deleted_nr; /* we can recycle a deleted vdi */ - memset(&req, 0, sizeof(req)); + next_snapid = 1; + } - req.opcode = SD_OP_SO_LOOKUP_VDI; - req.tag = tag; + *new_oid = bit_to_oid(nr); - ret = exec_reqs(entries, nr_nodes, sys->epoch, - SD_DIR_OID, (struct sd_req *)&req, filename, strlen(filename), 0, - nr_reqs, 1); + vprintf(SDOG_INFO "we create a new vdi, %d %s (%zd) %" PRIu64 ", oid: %" + PRIx64 ", base %" PRIx64 ", cur %" PRIx64 " \n", + is_snapshot, name, strlen(name), size, *new_oid, base_oid, cur_oid); - *oid = rsp->oid; - if (rsp->flags & SD_VDI_RSP_FLAG_CURRENT) - *current = 1; + if (!copies) { + vprintf(SDOG_WARNING "qemu doesn't specify the copies... %d\n", + sys->nr_sobjs); + copies = sys->nr_sobjs; + } - dprintf("looking for %s %lx\n", filename, *oid); + ret = create_vdi_obj(name, *new_oid, size, base_oid, cur_oid, copies, + next_snapid, is_snapshot); - if (ret < 0) - return rsp->result; + return ret; +} + +int del_vdi(char *name, int len) +{ + return 0; +} + +int read_vdis(char *data, int len, unsigned int *rsp_len) +{ + if (len != sizeof(sys->vdi_inuse)) + return SD_RES_INVALID_PARMS; + + memcpy(data, sys->vdi_inuse, sizeof(sys->vdi_inuse)); + *rsp_len = sizeof(sys->vdi_inuse); return SD_RES_SUCCESS; } diff --git a/include/bitops.h b/include/bitops.h new file mode 100644 index 0000000..e3191dd --- /dev/null +++ b/include/bitops.h @@ -0,0 +1,132 @@ +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) +#define BITS_PER_BYTE 8 +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] + +#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long)) + +#define ffz(x) __ffs(~(x)) + +static inline unsigned long __ffs(unsigned long word) +{ + int num = 0; + + if (BITS_PER_LONG == 64) { + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } + } + + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf) == 0) { + num += 4; + word >>= 4; + } + if ((word & 0x3) == 0) { + num += 2; + word >>= 2; + } + if ((word & 0x1) == 0) + num += 1; + return num; +} + +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +static inline unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp |= ~0UL >> (BITS_PER_LONG - offset); + if (size < BITS_PER_LONG) + goto found_first; + if (~tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if (~(tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp |= ~0UL << size; + if (tmp == ~0UL) /* Are any bits zero? */ + return result + size; /* Nope. */ +found_middle: + return result + ffz(tmp); +} + +static inline unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __ffs(tmp); +} + +static inline void set_bit(int nr, unsigned long *addr) +{ + addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); +} + +static inline int test_bit(unsigned int nr, const unsigned long *addr) +{ + return ((1UL << (nr % BITS_PER_LONG)) & + (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; +} diff --git a/include/meta.h b/include/meta.h index 67d2b11..5b296b2 100644 --- a/include/meta.h +++ b/include/meta.h @@ -21,33 +21,22 @@ /* * Object ID rules * - * 0 - 17 (18 bits): data object - * 17 - 55 (37 bits): inode object - * 56 - 63 ( 8 bits): PGID - * - * each VDI can use 2^18 data objects. + * 0 - 19 (20 bits): data object space + * 20 - 31 (12 bits): reserved data object space + * 32 - 55 (24 bits): vdi object space + * 56 - 62 (17 bits): reserved vdi object space + * 63 - 63 ( 1 bit ): set if vdi */ -#define DATA_SPACE_SHIFT 18 - +#define VDI_SPACE 24 +#define VDI_SPACE_SHIFT 32 +#define VDI_BIT (UINT64_C(1) << 63) #define DEAFAULT_NR_COPIES 1 +#define SD_MAX_VDI_LEN 256 +#define MAX_DATA_OBJS (1ULL << 20) +#define MAX_CHILDREN 1024 -static inline uint64_t oid_to_ino(uint64_t inode_oid) -{ - return (inode_oid >> DATA_SPACE_SHIFT) & ((UINT64_C(1) << 37) - 1); -} - -static inline int is_data_obj_writeable(uint64_t inode_oid, uint64_t data_oid) -{ - return oid_to_ino(inode_oid) == oid_to_ino(data_oid); -} - -static inline int is_data_obj(uint64_t oid) -{ - return oid & ((UINT64_C(1) << DATA_SPACE_SHIFT) - 1); -} - -#define SHEEPDOG_SUPER_OBJ_SIZE (UINT64_C(1) << 12) +#define SD_NR_VDIS (1U << 24) #define FLAG_CURRENT 1 @@ -63,19 +52,37 @@ struct sheepdog_vdi_info { char tag[SD_MAX_VDI_LEN]; }; -#define MAX_DATA_OBJS (1 << 18) -#define MAX_CHILDREN 1024 - struct sheepdog_inode { + char name[SD_MAX_VDI_LEN]; uint64_t oid; uint64_t ctime; + uint64_t snap_ctime; uint64_t vdi_size; - uint64_t block_size; - uint32_t copy_policy; - uint32_t nr_copies; + uint16_t copy_policy; + uint8_t nr_copies; + uint8_t block_size_shift; + uint32_t snap_id; uint64_t parent_oid; uint64_t child_oid[MAX_CHILDREN]; uint64_t data_oid[MAX_DATA_OBJS]; }; +static inline int is_data_obj_writeable(struct sheepdog_inode *inode, int idx) +{ + return (inode->oid >> VDI_SPACE_SHIFT) == + (inode->data_oid[idx] >> VDI_SPACE_SHIFT); +} + +static inline int is_data_obj(uint64_t oid) +{ + return !(VDI_BIT & oid); +} + +#define NR_VDIS (1U << DATA_SPECE_SHIFT) + +static inline uint64_t bit_to_oid(unsigned long nr) +{ + return ((unsigned long long)nr << VDI_SPACE_SHIFT) | VDI_BIT; +} + #endif diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index 9863aa3..b6afbe1 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -20,8 +20,6 @@ #define SD_MAX_NODES 1024 #define SD_MAX_VMS 4096 -#define SD_MAX_VDI_LEN 256 - /* -> vmon */ #define SD_OP_NEW_VDI 0x11 @@ -36,6 +34,7 @@ #define SD_OP_GET_EPOCH 0x23 #define SD_OP_SHUTDOWN 0x24 #define SD_OP_READ_EPOCH 0x25 +#define SD_OP_READ_VDIS 0x26 #define SD_OP_DEBUG_INC_NVER 0xA0 #define SD_OP_DEBUG_SET_NODE 0xA1 @@ -96,6 +95,7 @@ #define SD_RES_SHUTDOWN 0x18 /* Sheepdog is shutting down */ #define SD_RES_NO_MEM 0x19 /* Cannot allocate memory */ #define SD_RES_INCONSISTENT_EPOCHS 0x1A /* There is inconsistency between epochs */ +#define SD_RES_FULL_VDI 0x1B /* we already have the maximum vdis */ #define SD_VDI_RSP_FLAG_CURRENT 0x01 @@ -206,10 +206,10 @@ struct sd_vdi_req { uint32_t id; uint32_t data_length; uint64_t base_oid; - uint64_t tag; uint64_t vdi_size; uint32_t copies; - uint32_t pad[1]; + uint32_t snapid; + uint32_t pad[2]; }; struct sd_vdi_rsp { diff --git a/include/util.h b/include/util.h index 4c10670..b107e30 100644 --- a/include/util.h +++ b/include/util.h @@ -3,9 +3,10 @@ #include <string.h> +#include "bitops.h" + #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) -#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #if __BYTE_ORDER == __LITTLE_ENDIAN #define __cpu_to_be16(x) bswap_16(x) diff --git a/shepherd/shepherd.c b/shepherd/shepherd.c index 5d89710..0d7cecb 100644 --- a/shepherd/shepherd.c +++ b/shepherd/shepherd.c @@ -100,6 +100,11 @@ static struct sheepdog_node_list_entry *node_list_entries; static int nr_nodes; static unsigned master_idx; +static int is_current(struct sheepdog_inode *i) +{ + return !i->snap_ctime; +} + static char *size_to_str(uint64_t size, char *str, int str_size) { char *units[] = {"MB", "GB", "TB", "PB", "EB", "ZB", "YB"}; @@ -407,53 +412,46 @@ static int shutdown_sheepdog(void) typedef void (*vdi_parser_func_t)(uint64_t oid, char *name, uint32_t tag, uint32_t flags, struct sheepdog_inode *i, void *data); -/* - * TODO: handle larger buffer - */ + + int parse_vdi(vdi_parser_func_t func, void *data) { - struct sheepdog_vdi_info *ent; - char *buf; - int rest, ret; - struct sheepdog_inode i; - struct sd_so_req req; + int ret, fd; + unsigned long nr; + static struct sheepdog_inode i; + struct sd_req req; + static DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS); + unsigned int rlen, wlen = 0; - memset(&req, 0, sizeof(req)); + fd = connect_to("localhost", sdport); + if (fd < 0) + return fd; - buf = zalloc(DIR_BUF_LEN); - if (!buf) - return 1; + memset(&req, 0, sizeof(req)); - req.opcode = SD_OP_SO_READ_VDIS; + req.opcode = SD_OP_READ_VDIS; + req.data_length = sizeof(vdi_inuse); + req.epoch = node_list_version; - ret = exec_reqs(node_list_entries, nr_nodes, node_list_version, - SD_DIR_OID, (struct sd_req *)&req, buf, 0, DIR_BUF_LEN, - nr_nodes, 1); + rlen = sizeof(vdi_inuse); + ret = exec_req(fd, &req, vdi_inuse, &wlen, &rlen); + close(fd); - if (ret < 0) { - ret = 1; - goto out; - } + if (ret < 0) + return ret; - ent = (struct sheepdog_vdi_info *)buf; - rest = ret; - while (rest > 0) { - if (!ent->name_len) - break; + for (nr = 0; nr < SD_NR_VDIS; nr++) { + if (!test_bit(nr, vdi_inuse)) + continue; ret = read_object(node_list_entries, nr_nodes, node_list_version, - ent->oid, (void *)&i, sizeof(i), 0, nr_nodes); + bit_to_oid(nr), (void *)&i, sizeof(i), 0, nr_nodes); if (ret == sizeof(i)) - func(ent->oid, ent->name, ent->id, ent->flags, &i, data); + func(i.oid, i.name, i.snap_id, 0, &i, data); - ent++; - rest -= sizeof(*ent); } -out: - free(buf); - return 0; } @@ -499,7 +497,7 @@ static void print_graph_tree(uint64_t oid, char *name, uint32_t tag, "time: %8s", name, tag, size_str, date, time); - if (info->highlight && (flags & FLAG_CURRENT)) + if (info->highlight && is_current(i)) printf("\", color=\"red\"];\n"); else printf("\"];\n"); @@ -548,9 +546,9 @@ static void print_vdi_tree(uint64_t oid, char *name, uint32_t tag, if (info->name && strcmp(name, info->name)) return; - if (flags & FLAG_CURRENT) { + if (is_current(i)) strcpy(buf, "(You Are Here)"); - } else { + else { ti = i->ctime >> 32; localtime_r(&ti, &tm); @@ -559,7 +557,7 @@ static void print_vdi_tree(uint64_t oid, char *name, uint32_t tag, } add_vdi_tree(name, buf, oid, i->parent_oid, - info->highlight && (flags & FLAG_CURRENT)); + info->highlight && is_current(i)); } static int treeview_vdi(char *vdiname, int highlight) @@ -599,7 +597,7 @@ static void print_vdi_list(uint64_t oid, char *name, uint32_t tag, for (idx = 0; idx < MAX_DATA_OBJS; idx++) { if (!i->data_oid[idx]) continue; - if (is_data_obj_writeable(i->data_oid[idx], oid)) + if (is_data_obj_writeable(i, idx)) my_objs++; else cow_objs++; @@ -611,7 +609,7 @@ static void print_vdi_list(uint64_t oid, char *name, uint32_t tag, if (!data || strcmp(name, data) == 0) { printf("%c %-8s %5d %7s %7s %7s %s %9" PRIx64 "\n", - flags & FLAG_CURRENT ? ' ' : 's', name, tag, + is_current(i) ? ' ' : 's', name, tag, vdi_size_str, my_objs_str, cow_objs_str, dbuf, oid); } } @@ -630,7 +628,7 @@ static void print_vm_list(uint64_t oid, char *name, uint32_t tag, struct vm_list_info *vli = (struct vm_list_info *)data; char vdi_size_str[8], my_objs_str[8], cow_objs_str[8]; - if (!(flags & FLAG_CURRENT)) + if (!is_current(inode)) return; for (i = 0; i < vli->nr_vms; i++) { @@ -643,7 +641,7 @@ static void print_vm_list(uint64_t oid, char *name, uint32_t tag, for (j = 0; j < MAX_DATA_OBJS; j++) { if (!inode->data_oid[j]) continue; - if (is_data_obj_writeable(inode->data_oid[j], oid)) + if (is_data_obj_writeable(inode, j)) my_objs++; else cow_objs++; @@ -676,7 +674,7 @@ static void cal_total_vdi_size(uint64_t oid, char *name, uint32_t tag, { uint64_t *size = data; - if (flags & FLAG_CURRENT) + if (is_current(i)) *size += i->vdi_size; } -- 1.7.0 |