[Sheepdog] [PATCH] sheepdog: changes for the superblock removal

FUJITA Tomonori fujita.tomonori at lab.ntt.co.jp
Fri Mar 26 02:17:50 CET 2010


Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
 block/sheepdog.c |  210 ++++++++++++++++++++++++++---------------------------
 1 files changed, 103 insertions(+), 107 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 6a45cfa..e049463 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -36,6 +36,7 @@
 #define SD_OP_GET_NODE_LIST  0x19
 #define SD_OP_GET_VM_LIST    0x20
 #define SD_OP_MAKE_FS        0x21
+#define SD_OP_READ_VDIS      0x26
 
 #define SD_OP_DEBUG_INC_NVER 0xA0
 #define SD_OP_DEBUG_SET_NODE 0xA1
@@ -49,8 +50,6 @@
 #define SD_FLAG_CMD_WRITE    0x01
 #define SD_FLAG_CMD_COW      0x02
 
-#define SD_FLAG_CMD_SNAPSHOT (1U << 8)
-
 #define SD_RES_SUCCESS       0x00 /* Success */
 #define SD_RES_UNKNOWN       0x01 /* Unknown error */
 #define SD_RES_NO_OBJ        0x02 /* No object found */
@@ -77,21 +76,21 @@
 #define SD_RES_VDI_NOT_LOCKED   0x17 /* Vdi is not locked */
 #define SD_RES_SHUTDOWN      0x18 /* Sheepdog is shutting down */
 
-#define MAX_DATA_OBJS (1ULL << 18)
-#define MAX_CHILDREN 1024
-#define MAX_AIO_REQS 4096
-
 /* should be configurable? */
 #define MAX_RETRIES 6
 
+#define SD_NR_VDIS   (1U << 24)
+#define VDI_SPACE_SHIFT   32
+#define VDI_BIT (UINT64_C(1) << 63)
+#define DEAFAULT_NR_COPIES 1
+#define SD_MAX_VDI_LEN 256
+#define MAX_DATA_OBJS (1ULL << 20)
+#define MAX_CHILDREN 1024
 #define SD_DATA_OBJ_SIZE (1UL << 22)
 
 #define SD_INODE_SIZE (sizeof(struct sd_inode))
-
 #define CURRENT_VDI_ID 0
 
-#define SD_MAX_VDI_LEN 256
-
 #undef eprintf
 #define eprintf(fmt, args...)						\
 do {									\
@@ -162,13 +161,12 @@ struct sd_vdi_req {
 	uint32_t        id;
 	uint32_t        data_length;
 	uint64_t        base_oid;
-	uint64_t        tag;
 	uint64_t	vdi_size;
+	uint32_t        copies;
+	uint32_t        snapid;
 	uint32_t        pad[2];
 };
 
-#define SD_VDI_RSP_FLAG_CURRENT 0x01;
-
 struct sd_vdi_rsp {
 	uint8_t		proto_ver;
 	uint8_t		opcode;
@@ -207,12 +205,15 @@ struct sd_node_rsp {
 };
 
 struct sd_inode {
+	char name[SD_MAX_VDI_LEN];
 	uint64_t oid;
 	uint64_t ctime;
+	uint64_t snap_ctime;
 	uint64_t vdi_size;
-	uint64_t block_size;
-	uint32_t copy_policy;
-	uint32_t nr_copies;
+	uint16_t copy_policy;
+	uint8_t  nr_copies;
+	uint8_t  block_size_shift;
+	uint32_t snap_id;
 	uint64_t parent_oid;
 	uint64_t child_oid[MAX_CHILDREN];
 	uint64_t data_oid[MAX_DATA_OBJS];
@@ -252,6 +253,8 @@ struct sd_aiocb {
 	QLIST_HEAD(aioreq_head, aio_req) aioreq_head;
 };
 
+#define MAX_AIO_REQS 4096
+
 struct sd_aiostate {
 	struct bdrv_sd_state *s;
 	int fd;
@@ -322,19 +325,15 @@ static inline int after(uint32_t seq1, uint32_t seq2)
 	return (int32_t)(seq2 - seq1) < 0;
 }
 
-static inline uint64_t oid_to_ino(uint64_t inode_oid)
-{
-	return (inode_oid >> 18) & ((1ULL << 37) - 1);
-}
-
-static inline int is_data_obj_writeable(uint64_t inode_oid, uint64_t data_oid)
+static inline int is_data_obj_writeable(struct sd_inode *inode, unsigned int idx)
 {
-	return oid_to_ino(inode_oid) == oid_to_ino(data_oid);
+	return (inode->oid >> VDI_SPACE_SHIFT) ==
+		(inode->data_oid[idx] >> VDI_SPACE_SHIFT);
 }
 
 static inline int is_data_obj(uint64_t oid)
 {
-	return oid & ((1ULL << 18) - 1);
+	return !(VDI_BIT & oid);
 }
 
 /*
@@ -872,7 +871,7 @@ static int get_sheep_fd(struct bdrv_sd_state *s)
 }
 
 static int parse_vdiname(const char *filename, char *vdi, int vdi_len,
-			 uint64_t *tag)
+			 uint32_t *snapid)
 {
 	char *p, *q;
 
@@ -888,35 +887,37 @@ static int parse_vdiname(const char *filename, char *vdi, int vdi_len,
 	p = strchr(vdi, ':');
 	if (p) {
 		*p++ = '\0';
-		*tag = strtol(p, NULL, 16);
+		*snapid = strtol(p, NULL, 16);
 	} else
-		*tag = CURRENT_VDI_ID; /* search current vdi */
+		*snapid = CURRENT_VDI_ID; /* search current vdi */
 
 	free(q);
 
 	return 0;
 }
 
-static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint64_t tag,
-			 uint64_t *oid, int for_snapshot, int *current)
+static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint32_t snapid,
+			 uint64_t *oid)
 {
 	int ret, fd;
 	struct sd_vdi_req hdr;
 	struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
 	unsigned int wlen, rlen = 0;
+	char buf[SD_MAX_VDI_LEN];
 
 	fd = connect_to_vost();
 	if (fd < 0)
 		return -1;
 
 	memset(&hdr, 0, sizeof(hdr));
+	snprintf(buf, sizeof(buf), "%s", filename);
 	hdr.opcode = SD_OP_GET_VDI_INFO;
-	wlen = strlen(filename) + 1;
-	hdr.data_length = wlen;
-	hdr.tag = tag;
+	wlen = SD_MAX_VDI_LEN;
+	hdr.data_length = SD_MAX_VDI_LEN;
+	hdr.snapid = snapid;
 	hdr.flags = SD_FLAG_CMD_WRITE;
 
-	ret = do_req(fd, (struct sd_req *)&hdr, filename, &wlen, &rlen);
+	ret = do_req(fd, (struct sd_req *)&hdr, buf, &wlen, &rlen);
 	if (ret) {
 		ret = -1;
 		goto out;
@@ -927,9 +928,8 @@ static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint64_t tag,
 		ret = -1;
 		goto out;
 	}
-
 	*oid = rsp->oid;
-	s->is_current = rsp->flags & SD_VDI_RSP_FLAG_CURRENT;
+
 	ret = 0;
 out:
 	close(fd);
@@ -1045,7 +1045,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
 	uint64_t oid = 0;
 	struct bdrv_sd_state *s = bs->opaque;
 	char vdi[256];
-	uint64_t tag;
+	uint32_t snapid;
 	int for_snapshot = 0, dummy;
 	char *buf;
 
@@ -1070,15 +1070,17 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
 		for_snapshot = 1;
 
 	memset(vdi, 0, sizeof(vdi));
-	if (parse_vdiname(filename, vdi, sizeof(vdi), &tag) < 0)
+	if (parse_vdiname(filename, vdi, sizeof(vdi), &snapid) < 0)
 		goto out;
 
-	ret = find_vdi_name(s, vdi, tag, &oid, for_snapshot, &s->is_current);
+	ret = find_vdi_name(s, vdi, snapid, &oid);
 	if (ret)
 		goto out;
 
-	if (!s->is_current)
+	if (snapid)
 		eprintf("%" PRIx64 " non current inode was open.\n", oid);
+	else
+		s->is_current = 1;
 
 	ret = read_vdi_obj(buf, oid, &dummy);
 	if (ret)
@@ -1105,28 +1107,23 @@ static int do_sd_create(char *filename, char *tag, int64_t total_sectors,
 	struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
 	int fd, ret;
 	unsigned int wlen, rlen = 0;
-	char buf[SD_MAX_VDI_LEN * 2];
+	char buf[SD_MAX_VDI_LEN];
 
 	fd = connect_to_vost();
 	if (fd < 0)
 		return -1;
 
-	memset(buf, 0, sizeof(buf));
 	strncpy(buf, filename, SD_MAX_VDI_LEN);
-	if (tag)
-		strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_LEN);
 
 	memset(&hdr, 0, sizeof(hdr));
 	hdr.opcode = SD_OP_NEW_VDI;
 	hdr.base_oid = base_oid;
 
 	wlen = SD_MAX_VDI_LEN;
-	if (tag)
-		wlen += SD_MAX_VDI_LEN;
 
 	hdr.flags = SD_FLAG_CMD_WRITE;
-	if (snapshot)
-		hdr.flags |= SD_FLAG_CMD_SNAPSHOT;
+	hdr.snapid = snapshot;
+
 	hdr.data_length = wlen;
 	hdr.vdi_size = total_sectors * 512;
 
@@ -1166,8 +1163,8 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
 
 	if (backing_file) {
 		BlockDriverState bs;
-		char vdi[256];
-		uint64_t tag;
+		char vdi[SD_MAX_VDI_LEN];
+		uint32_t snapid;
 
 		memset(&bs, 0, sizeof(bs));
 
@@ -1179,16 +1176,15 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
 		if (ret < 0)
 			return -1;
 
-		if (parse_vdiname(backing_file, vdi, sizeof(vdi), &tag) < 0)
+		if (parse_vdiname(backing_file, vdi, sizeof(vdi), &snapid) < 0)
 			return -1;
 
 		/* cannot clone from a current inode */
-		if (tag == CURRENT_VDI_ID)
+		if (snapid == CURRENT_VDI_ID)
 			return -1;
 
-		ret = find_vdi_name(bs.opaque, vdi, tag, &oid, 1, NULL);
-		struct bdrv_sd_state *s = bs.opaque;
-		if (ret || s->is_current)
+		ret = find_vdi_name(bs.opaque, vdi, snapid, &oid);
+		if (ret)
 			return -1;
 	}
 
@@ -1199,8 +1195,6 @@ static void sd_close(BlockDriverState *bs)
 {
 	struct bdrv_sd_state *s = bs->opaque;
 
-	eprintf("%s\n", s->name);
-
 	free(s->name);
 }
 
@@ -1211,6 +1205,7 @@ static int sd_claim(BlockDriverState *bs)
 	struct sd_vdi_req hdr;
 	struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
 	unsigned int wlen, rlen = 0;
+	char buf[SD_MAX_VDI_LEN];
 
 	eprintf("%s\n", s->name);
 
@@ -1218,14 +1213,16 @@ static int sd_claim(BlockDriverState *bs)
 	if (fd < 0)
 		return -1;
 
+	memset(buf, 0, sizeof(buf));
+	strncpy(buf, s->name, SD_MAX_VDI_LEN);
 	memset(&hdr, 0, sizeof(hdr));
 	hdr.opcode = SD_OP_LOCK_VDI;
-	wlen = strlen(s->name) + 1;
-	hdr.data_length = wlen;
-	hdr.tag = CURRENT_VDI_ID;
+	wlen = SD_MAX_VDI_LEN;
+	hdr.data_length = SD_MAX_VDI_LEN;
+	hdr.snapid = CURRENT_VDI_ID;
 	hdr.flags = SD_FLAG_CMD_WRITE;
 
-	ret = do_req(fd, (struct sd_req *)&hdr, s->name, &wlen, &rlen);
+	ret = do_req(fd, (struct sd_req *)&hdr, buf, &wlen, &rlen);
 	if (ret) {
 		ret = -1;
 		goto out;
@@ -1407,19 +1404,19 @@ static void sd_write_bh_cb(void *p)
 
 		len = min_t(unsigned long, total - done, CHUNK_SIZE - offset);
 
-		if (!oid || !is_data_obj_writeable(inode->oid, oid)) {
+		if (!oid || !is_data_obj_writeable(inode, idx)) {
 			if (!acb->write)
 				goto done;
 
 			create = 1;
 			dprintf("update ino (%" PRIu64") %"
 				PRIu64 " %" PRIu64 " %" PRIu64 "\n",
-				inode->oid, oid, inode->oid + (idx + 1), idx);
-			if (oid && !is_data_obj_writeable(inode->oid, oid)) {
+				inode->oid, oid, inode->oid + idx, idx);
+			if (oid && !is_data_obj_writeable(inode, idx)) {
 				old_oid = oid;
 				flags = SD_FLAG_CMD_COW;
 			}
-			oid = inode->oid + (idx + 1);
+			oid = inode->oid + idx;
 			acb->oid[i] = oid;
 			dprintf("new oid %lx\n", acb->oid[i]);
 		}
@@ -1619,96 +1616,95 @@ struct sd_so_rsp {
 	uint32_t	opcode_specific[2];
 };
 
-struct sheepdog_vdi_info {
-	uint64_t oid;
-	uint16_t id;
-	uint16_t name_len;
-	uint16_t tag_len;
-	uint8_t type;
-	uint8_t flags;
-	uint32_t epoch;
-	char name[SD_MAX_VDI_LEN];
-	char tag[SD_MAX_VDI_LEN];
-};
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#define BITS_PER_BYTE		8
+#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define DECLARE_BITMAP(name,bits) \
+	unsigned long name[BITS_TO_LONGS(bits)]
+
+#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
+
+static inline int test_bit(unsigned int nr, const unsigned long *addr)
+{
+	return ((1UL << (nr % BITS_PER_LONG)) &
+		(((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
+}
 
-#define SD_OP_SO_READ_VDIS   0x64
+static inline uint64_t bit_to_oid(unsigned long nr)
+{
+	return ((unsigned long long)nr << VDI_SPACE_SHIFT) | VDI_BIT;
+}
 
 static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 {
 	struct bdrv_sd_state *s = bs->opaque;
-	struct sd_so_req req;
-	struct sd_rsp *rsp;
-	struct sheepdog_vdi_info *vi;
-	int i, fd, nr = 0, ret, max = 1024; /* FIXME */
-	char name[SD_MAX_VDI_LEN];
+	struct sd_req req;
+	int i, fd, nr = 1024, ret, max = BITS_TO_LONGS(SD_NR_VDIS) * sizeof(long);
 	QEMUSnapshotInfo *sn_tab = NULL;
 	unsigned wlen, rlen;
 	int found = 0;
-	struct sd_inode inode;
+	static struct sd_inode inode;
+	unsigned long *vdi_inuse;
+	unsigned int start_nr;
 
-	vi = malloc(max * sizeof(*vi));
-	if (!vi)
+	vdi_inuse = malloc(max);
+	if (!vdi_inuse)
 		return 0;
 
-	memset(name, 0, sizeof(name));
-	snprintf(name, sizeof(name), "%s", s->name);
-
 	fd = connect_to_vost();
 	if (fd < 0)
 		goto out;
 
+	rlen = max;
 	wlen = 0;
-	rlen = max * sizeof(*vi);
 
 	memset(&req, 0, sizeof(req));
 
-	req.opcode = SD_OP_SO_READ_VDIS;
-	req.data_length = rlen;
+	req.opcode = SD_OP_READ_VDIS;
+	req.data_length = max;
 
-	ret = do_req(fd, (struct sd_req *)&req, vi, &wlen, &rlen);
+	ret = do_req(fd, (struct sd_req *)&req, vdi_inuse, &wlen, &rlen);
 
 	close(fd);
 	if (ret)
 		goto out;
 
-	rsp = (struct sd_rsp *)&req;
-	if (rsp->result != SD_RES_SUCCESS)
-		goto out;
-
-	nr = rsp->data_length / sizeof(*vi);
 	sn_tab = malloc(nr * sizeof(*sn_tab));
 	if (!sn_tab)
 		goto out;
 
 	memset(sn_tab, 0, nr * sizeof(*sn_tab));
 
-	for (i = 0; i < nr; i++) {
+	start_nr = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT) & (SD_NR_VDIS - 1);
+
+	/* TODO: round up */
+	for (i = start_nr; i < SD_NR_VDIS && found < nr; i++) {
 		int copies;
 
-		if (strcmp(vi[i].name, s->name) || !vi[i].id)
-			continue;
+		if (!test_bit(i, vdi_inuse))
+			break;
 
-		ret = read_vdi_obj((char *)&inode, vi[i].oid, &copies);
+		ret = read_vdi_obj((char *)&inode, bit_to_oid(i), &copies);
 		if (ret)
 			continue;
 
-		sn_tab[found].date_sec = inode.ctime >> 32;
-		sn_tab[found].date_nsec = inode.ctime & 0xffffffff;
+		if (!strcmp(inode.name, s->name) && inode.snap_ctime) {
+			sn_tab[found].date_sec = inode.snap_ctime >> 32;
+			sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff;
 
-		snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u",
-			vi[i].id);
-		strncpy(sn_tab[found].name, vi[i].tag,  sizeof(sn_tab[found].name));
-		found++;
+			snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u",
+				 inode.snap_id);
+			found++;
+		}
 	}
 out:
 	*psn_tab = sn_tab;
 
-	free(vi);
+	free(vdi_inuse);
 
 	return found;
 }
 
-
 static QEMUOptionParameter sd_create_options[] = {
 	{
 		.name = BLOCK_OPT_SIZE,
-- 
1.7.0




More information about the sheepdog mailing list