[Sheepdog] [PATCH] sheepdog: support running VMs outside the sheepdog cluster

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Fri Apr 30 12:17:29 CEST 2010


This patch enables us to run VMs on the non-sheepdog servers.

usage:
  qemu-system-x86_64 --drive format=sheepdog,file=[hostname]:[port]:[name]

    hostname - Specify the hostname of one of sheepdog servers.
               Qemu creates a connection to the server.
    port     - Specify the port number of the sheepdog sever.
               The default port collie listens is 7000.
    name     - Specify the vdi name you want to open.

NOTE: If VMs fail unexpectedly outside sheepdog cluster, sheepdog
servers cannot know whether the VMs are alive or not, so they cannot
release VDI locks.  In this case, you need to release the lock
manually by the following command.

  $ shepherd debug -o release_vdi [vdiname]

This restriction would be removed in the future.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 block/sheepdog.c |  102 +++++++++++++++++++++++++++++++++---------------------
 1 files changed, 62 insertions(+), 40 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index f7ea853..b57256a 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -12,7 +12,7 @@
 #include "qemu_socket.h"
 #include "block_int.h"
 
-#define DOG_PORT 7000
+#define SD_DEFAULT_ADDR "localhost:7000"
 
 #define SD_OP_NEW_VDI        0x11
 #define SD_OP_DEL_VDI        0x12
@@ -230,6 +230,7 @@ struct bdrv_sd_state {
 	char name[SD_MAX_VDI_LEN];
 	int is_current;
 
+	char *addr;
 	int fd;
 
 	struct aio_req aio_req_list[MAX_AIO_REQS];
@@ -443,16 +444,6 @@ static int sd_schedule_bh(QEMUBHFunc *cb, struct sd_aiocb *acb)
 	return 0;
 }
 
-static int connect_to_vost(void)
-{
-	char buf[1024];
-	char name[] = "localhost";
-	int port = DOG_PORT;
-
-	snprintf(buf, sizeof(buf), "%s:%d", name, port);
-	return inet_connect(buf, SOCK_STREAM);
-}
-
 static int do_send_recv(int sockfd, struct iovec *iov, int len, int offset,
 			int write)
 {
@@ -497,6 +488,14 @@ static int do_send_recv(int sockfd, struct iovec *iov, int len, int offset,
 	return ret;
 }
 
+static int connect_to_sdog(const char *addr)
+{
+	if (!addr)
+		addr = SD_DEFAULT_ADDR;
+
+	return inet_connect(addr, SOCK_STREAM);
+}
+
 static int do_readv_writev(int sockfd, struct iovec *iov, int len,
 			   int iov_offset, int write)
 {
@@ -707,7 +706,7 @@ static int get_sheep_fd(struct bdrv_sd_state *s)
 {
 	int ret, fd;
 
-	fd = connect_to_vost();
+	fd = connect_to_sdog(s->addr);
 	if (fd < 0) {
 		eprintf("%m\n");
 		return -1;
@@ -729,17 +728,32 @@ static int get_sheep_fd(struct bdrv_sd_state *s)
 	return fd;
 }
 
-static int parse_vdiname(const char *filename, char *vdi, int vdi_len,
-			 uint32_t *snapid)
+static int parse_vdiname(struct bdrv_sd_state *s, const char *filename,
+			 char *vdi, int vdi_len, uint32_t *snapid)
 {
 	char *p, *q;
+	int nr_sep;
+
+	strstart(filename, "sheepdog:", (const char **)&filename);
 
 	p = q = strdup(filename);
 
 	if (!p)
 		return 1;
 
-	strstart(p, "sheepdog:", (const char **)&p);
+	nr_sep = 0;
+	while (*p) {
+		if (*p == ':')
+			nr_sep++;
+		if (nr_sep == 2)
+			break;
+		p++;
+	}
+
+	if (nr_sep == 2)
+		*p++ = '\0';
+	else
+		p = q;
 
 	strncpy(vdi, p, vdi_len);
 
@@ -750,7 +764,12 @@ static int parse_vdiname(const char *filename, char *vdi, int vdi_len,
 	} else
 		*snapid = CURRENT_VDI_ID; /* search current vdi */
 
-	qemu_free(q);
+	if (nr_sep == 2)
+		s->addr = q;
+	else {
+		free(q);
+		s->addr = NULL;
+	}
 
 	return 0;
 }
@@ -764,7 +783,7 @@ static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint32_t snapi
 	unsigned int wlen, rlen = 0;
 	char buf[SD_MAX_VDI_LEN];
 
-	fd = connect_to_vost();
+	fd = connect_to_sdog(s->addr);
 	if (fd < 0)
 		return -1;
 
@@ -859,7 +878,7 @@ static int add_aio_request(struct bdrv_sd_state *s, struct aio_req *aio_req,
 	return 0;
 }
 
-static int read_vdi_obj(char *buf, uint32_t vid, int *copies)
+static int read_vdi_obj(char *addr, char *buf, uint32_t vid, int *copies)
 {
 	struct sd_obj_req hdr;
 	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
@@ -875,7 +894,7 @@ static int read_vdi_obj(char *buf, uint32_t vid, int *copies)
 	hdr.oid = vid_to_vdi_oid(vid);
 	hdr.data_length = rlen;
 
-	fd = connect_to_vost();
+	fd = connect_to_sdog(addr);
 	if (fd < 0) {
 		eprintf("failed to connect to a sheep, %d\n", i);
 		return -1;
@@ -921,17 +940,17 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
 		s->aio_req_free[i] = &s->aio_req_list[i];
 		s->aio_req_list[i].aiocb = NULL;
 	}
-	s->fd = get_sheep_fd(s);
-	if (s->fd < 0)
-		return -1;
 	s->nr_aio_req_free = MAX_AIO_REQS;
 
 	if (strstart(filename, "sheepdog:", NULL))
 		for_snapshot = 1;
 
 	memset(vdi, 0, sizeof(vdi));
-	if (parse_vdiname(filename, vdi, sizeof(vdi), &snapid) < 0)
+	if (parse_vdiname(s, filename, vdi, sizeof(vdi), &snapid) < 0)
 		goto out;
+	s->fd = get_sheep_fd(s);
+	if (s->fd < 0)
+		return -1;
 
 	ret = find_vdi_name(s, vdi, snapid, &vid);
 	if (ret)
@@ -942,7 +961,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
 	else
 		s->is_current = 1;
 
-	ret = read_vdi_obj(buf, vid, &dummy);
+	ret = read_vdi_obj(s->addr, buf, vid, &dummy);
 	if (ret)
 		goto out;
 
@@ -960,8 +979,9 @@ out:
 	return -1;
 }
 
-static int do_sd_create(char *filename, char *tag, int64_t total_sectors,
-			uint32_t base_vid, uint32_t *vdi_id, int snapshot)
+static int do_sd_create(const char *addr, char *filename, char *tag,
+			int64_t total_sectors, uint32_t base_vid,
+			uint32_t *vdi_id, int snapshot)
 {
 	struct sd_vdi_req hdr;
 	struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
@@ -969,7 +989,7 @@ static int do_sd_create(char *filename, char *tag, int64_t total_sectors,
 	unsigned int wlen, rlen = 0;
 	char buf[SD_MAX_VDI_LEN];
 
-	fd = connect_to_vost();
+	fd = connect_to_sdog(addr);
 	if (fd < 0)
 		return -1;
 
@@ -1036,7 +1056,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
 		if (ret < 0)
 			return -1;
 
-		if (parse_vdiname(backing_file, vdi, sizeof(vdi), &snapid) < 0)
+		if (parse_vdiname(bs.opaque, backing_file, vdi, sizeof(vdi), &snapid) < 0)
 			return -1;
 
 		/* cannot clone from a current inode */
@@ -1048,7 +1068,8 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
 			return -1;
 	}
 
-	return do_sd_create((char *)filename, NULL, total_sectors, vid, NULL, 0);
+	return do_sd_create(NULL, (char *)filename, NULL, total_sectors, vid,
+			    NULL, 0);
 }
 
 static void sd_close(BlockDriverState *bs)
@@ -1056,6 +1077,7 @@ static void sd_close(BlockDriverState *bs)
 	struct bdrv_sd_state *s = bs->opaque;
 
 	close(s->fd);
+	free(s->addr);
 }
 
 static int sd_claim(BlockDriverState *bs)
@@ -1069,7 +1091,7 @@ static int sd_claim(BlockDriverState *bs)
 
 	eprintf("%s\n", s->name);
 
-	fd = connect_to_vost();
+	fd = connect_to_sdog(s->addr);
 	if (fd < 0)
 		return -1;
 
@@ -1110,7 +1132,7 @@ static void sd_release(BlockDriverState *bs)
 
 	eprintf("%s\n", s->name);
 
-	fd = connect_to_vost();
+	fd = connect_to_sdog(s->addr);
 	if (fd < 0)
 		return;
 
@@ -1186,7 +1208,7 @@ static int sd_create_branch(struct bdrv_sd_state *s)
 	if (!buf)
 		return -1;
 
-	ret = do_sd_create(s->name, NULL, s->inode.vdi_size >> 9,
+	ret = do_sd_create(s->addr, s->name, NULL, s->inode.vdi_size >> 9,
 			   s->inode.vdi_id, &vid, 1);
 	if (ret)
 		goto out;
@@ -1195,7 +1217,7 @@ static int sd_create_branch(struct bdrv_sd_state *s)
 
 	copies = s->inode.nr_copies;
 
-	ret = read_vdi_obj(buf, vid, &copies);
+	ret = read_vdi_obj(s->addr, buf, vid, &copies);
 	if (ret < 0)
 		goto out;
 
@@ -1373,7 +1395,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 	s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
 
 	/* refresh inode. */
-	fd = connect_to_vost();
+	fd = connect_to_sdog(s->addr);
 	if (fd < 0) {
 		ret = -EIO;
 		goto cleanup;
@@ -1403,7 +1425,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 		goto cleanup;
 	}
 
-	ret = do_sd_create(s->name, NULL, s->inode.vdi_size >> 9,
+	ret = do_sd_create(s->addr, s->name, NULL, s->inode.vdi_size >> 9,
 			   s->inode.vdi_id, &new_vid, 1);
 	if (ret < 0) {
 		eprintf("failed to create inode for snapshot. %m\n");
@@ -1417,7 +1439,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 		goto cleanup;
 	}
 
-	if (read_vdi_obj((char *)inode, new_vid, &dummy) < 0) {
+	if (read_vdi_obj(s->addr, (char *)inode, new_vid, &dummy) < 0) {
 		eprintf("failed to read new inode info. %m\n");
 		ret = -EIO;
 		goto cleanup;
@@ -1471,7 +1493,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
 		goto out;
 	}
 
-	ret = read_vdi_obj(buf, vid, &dummy);
+	ret = read_vdi_obj(s->addr, buf, vid, &dummy);
 	if (ret) {
 		ret = -ENOENT;
 		goto out;
@@ -1538,7 +1560,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 	if (!vdi_inuse)
 		return 0;
 
-	fd = connect_to_vost();
+	fd = connect_to_sdog(s->addr);
 	if (fd < 0)
 		goto out;
 
@@ -1569,7 +1591,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 		if (!test_bit(i, vdi_inuse))
 			break;
 
-		ret = read_vdi_obj((char *)&inode, i, &copies);
+		ret = read_vdi_obj(s->addr, (char *)&inode, i, &copies);
 		if (ret)
 			continue;
 
@@ -1603,7 +1625,7 @@ static int do_load_save_vmstate(struct bdrv_sd_state *s, uint8_t *data,
 	uint32_t vdi_index;
 	uint64_t offset;
 
-	fd = connect_to_vost();
+	fd = connect_to_sdog(s->addr);
 	if (fd < 0) {
 		ret = -EIO;
 		goto cleanup;
-- 
1.5.6.5




More information about the sheepdog mailing list