[Sheepdog] [PATCH v4] sheepdog: implement SD_OP_FLUSH_VDI operation
Liu Yuan
namei.unix at gmail.com
Tue Apr 3 11:58:19 CEST 2012
From: Liu Yuan <tailai.ly at taobao.com>
Flush operation is supposed to flush the write-back cache of
sheepdog cluster.
By issuing flush operation, we can assure the Guest of data
reaching the sheepdog cluster storage.
Cc: Kevin Wolf <kwolf at redhat.com>
Cc: Michael Tokarev <mjt at tls.msk.ru>
Cc: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
Cc: Stefan Hajnoczi <stefanha at gmail.com>
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
v3 -> v4:
address Kazutaka's comments. Thanks !
- use a dedicated fd for flush operation
v2 -> v3:
address Stefan Hajnoczi comments. Thanks !
- use qemu_co_send/recv to send/revc msg
v1 -> v2:
address Michael Tokarev comments. Thanks !
- use per-device flag
- use bs->fd instead of 'connect_to_sdog()'
block/sheepdog.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++++-----
1 files changed, 128 insertions(+), 14 deletions(-)
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 00276f6f..62dfa48 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -32,9 +32,11 @@
#define SD_OP_RELEASE_VDI 0x13
#define SD_OP_GET_VDI_INFO 0x14
#define SD_OP_READ_VDIS 0x15
+#define SD_OP_FLUSH_VDI 0x16
#define SD_FLAG_CMD_WRITE 0x01
#define SD_FLAG_CMD_COW 0x02
+#define SD_FLAG_CMD_CACHE 0x04
#define SD_RES_SUCCESS 0x00 /* Success */
#define SD_RES_UNKNOWN 0x01 /* Unknown error */
@@ -293,10 +295,12 @@ typedef struct BDRVSheepdogState {
char name[SD_MAX_VDI_LEN];
int is_snapshot;
+ uint8_t cache_enabled;
char *addr;
char *port;
int fd;
+ int flush_fd;
CoMutex lock;
Coroutine *co_send;
@@ -516,6 +520,23 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data,
return ret;
}
+static int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
+ unsigned int *wlen)
+{
+ int ret;
+
+ ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
+ if (ret < sizeof(*hdr)) {
+ error_report("failed to send a req, %s", strerror(errno));
+ }
+
+ ret = qemu_co_send(sockfd, data, *wlen);
+ if (ret < *wlen) {
+ error_report("failed to send a req, %s", strerror(errno));
+ }
+
+ return ret;
+}
static int do_req(int sockfd, SheepdogReq *hdr, void *data,
unsigned int *wlen, unsigned int *rlen)
{
@@ -550,6 +571,40 @@ out:
return ret;
}
+static int do_co_req(int sockfd, SheepdogReq *hdr, void *data,
+ unsigned int *wlen, unsigned int *rlen)
+{
+ int ret;
+
+ socket_set_block(sockfd);
+ ret = send_co_req(sockfd, hdr, data, wlen);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
+ if (ret < sizeof(*hdr)) {
+ error_report("failed to get a rsp, %s", strerror(errno));
+ goto out;
+ }
+
+ if (*rlen > hdr->data_length) {
+ *rlen = hdr->data_length;
+ }
+
+ if (*rlen) {
+ ret = qemu_co_recv(sockfd, data, *rlen);
+ if (ret < *rlen) {
+ error_report("failed to get the data, %s", strerror(errno));
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ socket_set_nonblock(sockfd);
+ return ret;
+}
+
static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
struct iovec *iov, int niov, int create,
enum AIOCBState aiocb_type);
@@ -900,6 +955,10 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
hdr.flags = SD_FLAG_CMD_WRITE | flags;
}
+ if (s->cache_enabled) {
+ hdr.flags |= SD_FLAG_CMD_CACHE;
+ }
+
hdr.oid = oid;
hdr.cow_oid = old_oid;
hdr.copies = s->inode.nr_copies;
@@ -942,7 +1001,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
unsigned int datalen, uint64_t offset,
- int write, int create)
+ int write, int create, uint8_t cache)
{
SheepdogObjReq hdr;
SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
@@ -965,6 +1024,11 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
rlen = datalen;
hdr.opcode = SD_OP_READ_OBJ;
}
+
+ if (cache) {
+ hdr.flags |= SD_FLAG_CMD_CACHE;
+ }
+
hdr.oid = oid;
hdr.data_length = datalen;
hdr.offset = offset;
@@ -986,15 +1050,18 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
}
static int read_object(int fd, char *buf, uint64_t oid, int copies,
- unsigned int datalen, uint64_t offset)
+ unsigned int datalen, uint64_t offset, uint8_t cache)
{
- return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0);
+ return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0,
+ cache);
}
static int write_object(int fd, char *buf, uint64_t oid, int copies,
- unsigned int datalen, uint64_t offset, int create)
+ unsigned int datalen, uint64_t offset, int create,
+ uint8_t cache)
{
- return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create);
+ return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create,
+ cache);
}
static int sd_open(BlockDriverState *bs, const char *filename, int flags)
@@ -1011,6 +1078,15 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
QLIST_INIT(&s->outstanding_aio_head);
s->fd = -1;
+ if (flags & BDRV_O_CACHE_WB) {
+ s->cache_enabled = 1;
+ s->flush_fd = connect_to_sdog(s->addr, s->port);
+ if (s->flush_fd < 0) {
+ error_report("failed to connect");
+ goto out;
+ }
+ }
+
memset(vdi, 0, sizeof(vdi));
memset(tag, 0, sizeof(tag));
if (parse_vdiname(s, filename, vdi, &snapid, tag) < 0) {
@@ -1038,7 +1114,8 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
}
buf = g_malloc(SD_INODE_SIZE);
- ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0);
+ ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0,
+ s->cache_enabled);
closesocket(fd);
@@ -1272,6 +1349,9 @@ static void sd_close(BlockDriverState *bs)
qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL);
closesocket(s->fd);
+ if (s->cache_enabled) {
+ closesocket(s->flush_fd);
+ }
g_free(s->addr);
}
@@ -1305,7 +1385,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
s->inode.vdi_size = offset;
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
- s->inode.nr_copies, datalen, 0, 0);
+ s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
close(fd);
if (ret < 0) {
@@ -1387,7 +1467,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
}
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
- SD_INODE_SIZE, 0);
+ SD_INODE_SIZE, 0, s->cache_enabled);
closesocket(fd);
@@ -1575,6 +1655,36 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
return acb->ret;
}
+static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
+{
+ BDRVSheepdogState *s = bs->opaque;
+ SheepdogObjReq hdr = { 0 };
+ SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
+ SheepdogInode *inode = &s->inode;
+ int ret;
+ unsigned int wlen = 0, rlen = 0;
+
+ if (!s->cache_enabled) {
+ return 0;
+ }
+
+ hdr.opcode = SD_OP_FLUSH_VDI;
+ hdr.oid = vid_to_vdi_oid(inode->vdi_id);
+
+ ret = do_co_req(s->fd, (SheepdogReq *)&hdr, NULL, &wlen, &rlen);
+ if (ret) {
+ error_report("failed to send a request to the sheep");
+ return ret;
+ }
+
+ if (rsp->result != SD_RES_SUCCESS) {
+ error_report("%s", sd_strerror(rsp->result));
+ return -EIO;
+ }
+
+ return 0;
+}
+
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
BDRVSheepdogState *s = bs->opaque;
@@ -1610,7 +1720,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
}
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
- s->inode.nr_copies, datalen, 0, 0);
+ s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
if (ret < 0) {
error_report("failed to write snapshot's inode.");
ret = -EIO;
@@ -1629,7 +1739,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
inode = (SheepdogInode *)g_malloc(datalen);
ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
- s->inode.nr_copies, datalen, 0);
+ s->inode.nr_copies, datalen, 0, s->cache_enabled);
if (ret < 0) {
error_report("failed to read new inode info. %s", strerror(errno));
@@ -1684,7 +1794,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
buf = g_malloc(SD_INODE_SIZE);
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
- SD_INODE_SIZE, 0);
+ SD_INODE_SIZE, 0, s->cache_enabled);
closesocket(fd);
@@ -1779,7 +1889,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
/* we don't need to read entire object */
ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
- 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0);
+ 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
+ s->cache_enabled);
if (ret) {
continue;
@@ -1835,10 +1946,12 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
create = (offset == 0);
if (load) {
ret = read_object(fd, (char *)data, vmstate_oid,
- s->inode.nr_copies, data_len, offset);
+ s->inode.nr_copies, data_len, offset,
+ s->cache_enabled);
} else {
ret = write_object(fd, (char *)data, vmstate_oid,
- s->inode.nr_copies, data_len, offset, create);
+ s->inode.nr_copies, data_len, offset, create,
+ s->cache_enabled);
}
if (ret < 0) {
@@ -1904,6 +2017,7 @@ BlockDriver bdrv_sheepdog = {
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
+ .bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,
--
1.7.8.2
More information about the sheepdog
mailing list